2 # This file is part of Koha.
4 # Koha is free software; you can redistribute it and/or modify it under the
5 # terms of the GNU General Public License as published by the Free Software
6 # Foundation; either version 2 of the License, or (at your option) any later
9 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
10 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License along with
14 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
15 # Suite 330, Boston, MA 02111-1307 USA
25 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
26 use vars qw($ext_dict $select_all @fields);
32 &get_tag &get_tags &get_tag_rows
36 &delete_tag_rows_by_ids
46 $ext_dict = C4::Context->preference('TagsExternalDictionary');
49 import Data::Dumper qw(:DEFAULT);
50 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
53 require Lingua::Ispell;
54 import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
59 $ext_dict and $Lingua::Ispell::path = $ext_dict;
60 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
61 @fields = qw(tag_id borrowernumber biblionumber term language date_created);
62 $select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
65 sub get_filters (;$) {
66 my $query = "SELECT * FROM tags_filters ";
69 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
72 $sth = C4::Context->dbh->prepare($query);
75 return $sth->fetchall_arrayref({});
78 # (SELECT count(*) FROM tags_all ) as tags_all,
79 # (SELECT count(*) FROM tags_index ) as tags_index,
81 sub approval_counts () {
83 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
84 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
85 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
87 my $sth = C4::Context->dbh->prepare($query);
89 my $result = $sth->fetchrow_hashref();
90 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
91 $debug and warn "counts returned: " . Dumper $result;
95 sub remove_tag ($;$) {
96 my $tag_id = shift or return undef;
97 my $user_id = (@_) ? shift : undef;
98 my $rows = (defined $user_id) ?
99 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
100 get_tag_rows({tag_id=>$tag_id}) ;
102 (scalar(@$rows) == 1) or return undef; # should never happen (duplicate ids)
103 my $row = shift(@$rows);
104 ($tag_id == $row->{tag_id}) or return 0;
105 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
106 my $index = shift(@$tags);
107 $debug and print STDERR
108 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
109 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
110 if ($index->{weight} <= 1) {
111 delete_tag_index($row->{term},$row->{biblionumber});
113 decrement_weight($row->{term},$row->{biblionumber});
115 if ($index->{weight_total} <= 1) {
116 delete_tag_approval($row->{term});
118 decrement_weight_total($row->{term});
120 delete_tag_row_by_id($tag_id);
123 sub delete_tag_index ($$) {
124 (@_) or return undef;
125 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
127 return $sth->rows || 0;
129 sub delete_tag_approval ($) {
130 (@_) or return undef;
131 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
132 $sth->execute(shift);
133 return $sth->rows || 0;
135 sub delete_tag_row_by_id ($) {
136 (@_) or return undef;
137 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
138 $sth->execute(shift);
139 return $sth->rows || 0;
141 sub delete_tag_rows_by_ids (@) {
142 (@_) or return undef;
145 $i += delete_tag_row_by_id($_);
147 ($i == scalar(@_)) or
148 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
152 sub get_tag_rows ($) {
153 my $hash = shift || {};
154 my @ok_fields = @fields;
155 push @ok_fields, 'limit'; # push the limit! :)
159 foreach my $key (keys %$hash) {
160 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
161 unless (length $key) {
162 carp "Empty argument key to get_tag_rows: ignoring!";
165 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
166 carp "get_tag_rows received unreconized argument key '$key'.";
169 if ($key eq 'limit') {
170 my $val = $hash->{$key};
171 unless ($val =~ /^(\d+,)?\d+$/) {
172 carp "Non-nuerical limit value '$val' ignored!";
175 $limit = " LIMIT $val\n";
177 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
178 push @exe_args, $hash->{$key};
181 my $query = $select_all . ($wheres||'') . $limit;
182 $debug and print STDERR "get_tag_rows query:\n $query\n",
183 "get_tag_rows query args: ", join(',', @exe_args), "\n";
184 my $sth = C4::Context->dbh->prepare($query);
186 $sth->execute(@exe_args);
190 return $sth->fetchall_arrayref({});
193 sub get_tags (;$) { # i.e., from tags_index
194 my $hash = shift || {};
195 my @ok_fields = qw(term biblionumber weight limit sort);
200 foreach my $key (keys %$hash) {
201 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
202 unless (length $key) {
203 carp "Empty argument key to get_tags: ignoring!";
206 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
207 carp "get_tags received unreconized argument key '$key'.";
210 if ($key eq 'limit') {
211 my $val = $hash->{$key};
212 unless ($val =~ /^(\d+,)?\d+$/) {
213 carp "Non-nuerical limit value '$val' ignored!";
216 $limit = " LIMIT $val\n";
217 } elsif ($key eq 'sort') {
218 foreach my $by (split /\,/, $hash->{$key}) {
220 $by =~ /^([-+])?(term)/ or
221 $by =~ /^([-+])?(biblionumber)/ or
222 $by =~ /^([-+])?(weight)/
224 carp "get_tags received illegal sort order '$by'";
230 $order = " ORDER BY ";
232 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
236 my $whereval = $hash->{$key};
237 my $longkey = ($key eq 'term') ? 'tags_index.term' : $key;
238 my $op = ($whereval =~ s/^(>=|<=)// or
239 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
240 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
241 push @exe_args, $whereval;
245 SELECT tags_index.term as term,biblionumber,weight,weight_total
247 LEFT JOIN tags_approval
248 ON tags_index.term = tags_approval.term
249 " . ($wheres||'') . $order . $limit;
250 $debug and print STDERR "get_tags query:\n $query\n",
251 "get_tags query args: ", join(',', @exe_args), "\n";
252 my $sth = C4::Context->dbh->prepare($query);
254 $sth->execute(@exe_args);
258 return $sth->fetchall_arrayref({});
261 sub get_approval_rows (;$) { # i.e., from tags_approval
262 my $hash = shift || {};
263 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort);
268 foreach my $key (keys %$hash) {
269 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
270 unless (length $key) {
271 carp "Empty argument key to get_approval_rows: ignoring!";
274 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
275 carp "get_approval_rows received unreconized argument key '$key'.";
278 if ($key eq 'limit') {
279 my $val = $hash->{$key};
280 unless ($val =~ /^(\d+,)?\d+$/) {
281 carp "Non-nuerical limit value '$val' ignored!";
284 $limit = " LIMIT $val\n";
285 } elsif ($key eq 'sort') {
286 foreach my $by (split /\,/, $hash->{$key}) {
288 $by =~ /^([-+])?(term)/ or
289 $by =~ /^([-+])?(biblionumber)/ or
290 $by =~ /^([-+])?(weight_total)/ or
291 $by =~ /^([-+])?(approved(_by)?)/ or
292 $by =~ /^([-+])?(date_approved)/
294 carp "get_approval_rows received illegal sort order '$by'";
300 $order = " ORDER BY " unless $order;
302 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
306 my $whereval = $hash->{$key};
307 my $op = ($whereval =~ s/^(>=|<=)// or
308 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
309 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
310 push @exe_args, $whereval;
314 SELECT tags_approval.term AS term,
315 tags_approval.approved AS approved,
316 tags_approval.date_approved AS date_approved,
317 tags_approval.approved_by AS approved_by,
318 tags_approval.weight_total AS weight_total,
319 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
322 ON tags_approval.approved_by = borrowers.borrowernumber ";
323 $query .= ($wheres||'') . $order . $limit;
324 $debug and print STDERR "get_approval_rows query:\n $query\n",
325 "get_approval_rows query args: ", join(',', @exe_args), "\n";
326 my $sth = C4::Context->dbh->prepare($query);
328 $sth->execute(@exe_args);
332 return $sth->fetchall_arrayref({});
335 sub is_approved ($) {
336 my $term = shift or return undef;
337 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
338 $sth->execute($term);
339 unless ($sth->rows) {
340 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
346 sub get_tag_index ($;$) {
347 my $term = shift or return undef;
350 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
351 $sth->execute($term,shift);
353 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
354 $sth->execute($term);
356 return $sth->fetchrow_hashref;
360 my $operator = shift;
361 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
364 spellcheck($_) or next;
369 my $aref = get_approval_rows({term=>$_});
370 if ($aref and scalar @$aref) {
371 mod_tag_approval($operator,$_,1);
373 add_tag_approval($_,$operator);
378 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
379 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
380 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
382 my $operator = shift;
383 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
385 my $aref = get_approval_rows({term=>$_});
386 if ($aref and scalar @$aref) {
387 mod_tag_approval($operator,$_,-1);
389 add_tag_approval($_,$operator,-1);
395 my $operator = shift;
396 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
397 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
398 # my $sth = C4::Context->dbh->prepare($query);
402 my $operator = shift;
403 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
404 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
405 # my $sth = C4::Context->dbh->prepare($query);
406 # $sth->execute($term);
410 sub add_tag_approval ($;$$) { # or disapproval
411 my $term = shift or return undef;
412 my $query = "SELECT * FROM tags_approval WHERE term = ?";
413 my $sth = C4::Context->dbh->prepare($query);
414 $sth->execute($term);
415 ($sth->rows) and return increment_weight_total($term);
416 my $operator = (@_ ? shift : 0);
418 my $approval = (@_ ? shift : 1); # default is to approve
419 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
420 $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term,$operator,$approval)\n";
421 $sth = C4::Context->dbh->prepare($query);
422 $sth->execute($term,$operator,$approval);
424 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
425 $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term)\n";
426 $sth = C4::Context->dbh->prepare($query);
427 $sth->execute($term);
432 sub mod_tag_approval ($$$) {
433 my $operator = shift;
434 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
435 my $term = shift or return undef;
436 my $approval = (@_ ? shift : 1); # default is to approve
437 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
438 $debug and print STDERR "mod_tag_approval query:\n$query\nmod_tag_approval args: ($operator,$approval,$term)\n";
439 my $sth = C4::Context->dbh->prepare($query);
440 $sth->execute($operator,$approval,$term);
443 sub add_tag_index ($$;$) {
444 my $term = shift or return undef;
445 my $biblionumber = shift or return undef;
446 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
447 my $sth = C4::Context->dbh->prepare($query);
448 $sth->execute($term,$biblionumber);
449 ($sth->rows) and return increment_weight($term,$biblionumber);
450 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
451 $debug and print "add_tag_index query:\n$query\nadd_tag_index args: ($term,$biblionumber)\n";
452 $sth = C4::Context->dbh->prepare($query);
453 $sth->execute($term,$biblionumber);
457 sub get_tag ($) { # by tag_id
458 (@_) or return undef;
459 my $sth = C4::Context->dbh->prepare("$select_all WHERE tag_id = ?");
460 $sth->execute(shift);
461 return $sth->fetchrow_hashref;
464 sub rectify_weights (;$) {
465 my $dbh = C4::Context->dbh;
468 SELECT term,biblionumber,count(*) as count
471 (@_) and $query .= " WHERE term =? ";
472 $query .= " GROUP BY term,biblionumber ";
473 $sth = $dbh->prepare($query);
475 $sth->execute(shift);
479 my $results = $sth->fetchall_arrayref({}) or return undef;
481 foreach (@$results) {
482 _set_weight($_->{count},$_->{term},$_->{biblionumber});
483 $tally{$_->{term}} += $_->{count};
485 foreach (keys %tally) {
486 _set_weight_total($tally{$_},$_);
488 return ($results,\%tally);
491 sub increment_weights ($$) {
492 increment_weight(@_);
493 increment_weight_total(shift);
495 sub decrement_weights ($$) {
496 decrement_weight(@_);
497 decrement_weight_total(shift);
499 sub increment_weight_total ($) {
500 _set_weight_total('weight_total+1',shift);
502 sub increment_weight ($$) {
503 _set_weight('weight+1',shift,shift);
505 sub decrement_weight_total ($) {
506 _set_weight_total('weight_total-1',shift);
508 sub decrement_weight ($$) {
509 _set_weight('weight-1',shift,shift);
511 sub _set_weight_total ($$) {
512 my $sth = C4::Context->dbh->prepare("
514 SET weight_total=" . (shift) . "
516 "); # note: CANNOT use "?" for weight_total (see the args above).
517 $sth->execute(shift); # just the term
519 sub _set_weight ($$$) {
520 my $dbh = C4::Context->dbh;
521 my $sth = $dbh->prepare("
523 SET weight=" . (shift) . "
530 sub add_tag ($$;$$) { # biblionumber,term,[borrowernumber,approvernumber]
531 my $biblionumber = shift or return undef;
532 my $term = shift or return undef;
533 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
535 # first, add to tags regardless of approaval
536 my $query = "INSERT INTO tags_all
537 (borrowernumber,biblionumber,term,date_created)
538 VALUES (?,?,?,NOW())";
539 $debug and print STDERR "add_tag query:\n $query\n",
540 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
541 my $sth = C4::Context->dbh->prepare($query);
542 $sth->execute($borrowernumber,$biblionumber,$term);
545 if (@_) { # if an arg remains, it is the borrowernumber of the approver: tag is pre-approved.
546 my $approver = shift;
547 add_tag_approval($term,$approver);
548 add_tag_index($term,$biblionumber,$approver);
549 } elsif (is_approved($term)) {
550 add_tag_approval($term,1);
551 add_tag_index($term,$biblionumber,1);
553 add_tag_approval($term);
554 add_tag_index($term,$biblionumber);
561 =head1 C4::Tags.pm - Support for user tagging of biblios.
563 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
565 =head2 add_tag(biblionumber,term[,borrowernumber])
567 =head3 TO DO: Add real perldoc
569 =head2 External Dictionary (Ispell) [Recommended]
571 An external dictionary can be used as a means of "pre-populating" and tracking
572 allowed terms based on the widely available Ispell dictionary. This can be the system
573 dictionary or a personal version, but in order to support whitelisting, it must be
574 editable to the process running Koha.
576 To enable, enter the absolute path to the ispell dictionary in the system
577 preference "TagsExternalDictionary".
579 Using external Ispell is recommended for both ease of use and performance. Note that any
580 language version of Ispell can be installed. It is also possible to modify the dictionary
581 at the command line to affect the desired content.
583 =head2 Table Structure
585 The tables used by tags are:
591 Your first thought may be that this looks a little complicated. It is, but only because
592 it has to be. I'll try to explain.
594 tags_all - This table would be all we really need if we didn't care about moderation or
595 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
596 though, it contains all the relevant info about a given tag:
597 tag_id - unique id number for it
598 borrowernumber - user that entered it
599 biblionumber - book record it is attached to
600 term - tag "term" itself
601 language - perhaps used later to influence weighting
602 date_created - date and time it was created
604 tags_approval - Since we need to provide moderation, this table is used to track it. If no
605 external dictionary is used, this table is the sole reference for approval and rejection.
606 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
607 This could be called an "approved terms" table. See above regarding the External Dictionary.
608 term - tag "term" itself
609 approved - Negative, 0 or positive if tag is rejected, pending or approved.
610 date_approved - date of last action
611 approved_by - staffer performing the last action
612 weight_total - total occurance of term in any biblio by any users
614 tags_index - This table is for performance, because by far the most common operation will
615 be fetching tags for a list of search results. We will have a set of biblios, and we will
616 want ONLY their approved tags and overall weighting. While we could implement a query that
617 would traverse tags_all filtered against tags_approval, the performance implications of
618 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
619 term - approved term as it appears in tags_approval
620 biblionumber - book record it is attached to
621 weight - number of times tag applied by any user
623 tags_blacklist - TODO
625 So the best way to think about the different tabes is that they are each tailored to a certain
626 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
627 the tag population can continue to grow even if a user is removed, along with the corresponding
632 If you want to auto-populate some tags for debugging, do something like this:
634 mysql> select biblionumber from biblio where title LIKE "%Health%";
665 26 rows in set (0.00 sec)
667 Then, take those numbers and type them into this perl command line:
668 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'