3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it under the
9 # terms of the GNU General Public License as published by the Free Software
10 # Foundation; either version 2 of the License, or (at your option) any later
13 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with Koha; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
30 use vars qw($ext_dict $select_all @fields);
33 $VERSION = 3.08.01.002;
36 &get_tag &get_tags &get_tag_rows
40 &delete_tag_rows_by_ids
47 &get_count_by_tag_status
51 $ext_dict = C4::Context->preference('TagsExternalDictionary');
54 import Data::Dumper qw(:DEFAULT);
55 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
58 require Lingua::Ispell;
59 import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
64 $ext_dict and $Lingua::Ispell::path = $ext_dict;
65 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
66 @fields = qw(tag_id borrowernumber biblionumber term language date_created);
67 $select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
70 sub get_filters (;$) {
71 my $query = "SELECT * FROM tags_filters ";
74 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
77 $sth = C4::Context->dbh->prepare($query);
80 return $sth->fetchall_arrayref({});
83 # (SELECT count(*) FROM tags_all ) as tags_all,
84 # (SELECT count(*) FROM tags_index ) as tags_index,
86 sub approval_counts () {
88 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
89 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
90 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
92 my $sth = C4::Context->dbh->prepare($query);
94 my $result = $sth->fetchrow_hashref();
95 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
96 $debug and warn "counts returned: " . Dumper $result;
100 =head2 get_count_by_tag_status
102 get_count_by_tag_status($status);
104 Takes a status and gets a count of tags with that status
108 sub get_count_by_tag_status {
110 my $dbh = C4::Context->dbh;
112 "SELECT count(*) FROM tags_approval WHERE approved=?";
113 my $sth = $dbh->prepare($query);
114 $sth->execute( $status );
115 return $sth->fetchrow;
118 sub remove_tag ($;$) {
119 my $tag_id = shift or return undef;
120 my $user_id = (@_) ? shift : undef;
121 my $rows = (defined $user_id) ?
122 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
123 get_tag_rows({tag_id=>$tag_id}) ;
125 (scalar(@$rows) == 1) or return undef; # should never happen (duplicate ids)
126 my $row = shift(@$rows);
127 ($tag_id == $row->{tag_id}) or return 0;
128 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
129 my $index = shift(@$tags);
130 $debug and print STDERR
131 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
132 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
133 if ($index->{weight} <= 1) {
134 delete_tag_index($row->{term},$row->{biblionumber});
136 decrement_weight($row->{term},$row->{biblionumber});
138 if ($index->{weight_total} <= 1) {
139 delete_tag_approval($row->{term});
141 decrement_weight_total($row->{term});
143 delete_tag_row_by_id($tag_id);
146 sub delete_tag_index ($$) {
147 (@_) or return undef;
148 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
150 return $sth->rows || 0;
152 sub delete_tag_approval ($) {
153 (@_) or return undef;
154 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
155 $sth->execute(shift);
156 return $sth->rows || 0;
158 sub delete_tag_row_by_id ($) {
159 (@_) or return undef;
160 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
161 $sth->execute(shift);
162 return $sth->rows || 0;
164 sub delete_tag_rows_by_ids (@) {
165 (@_) or return undef;
168 $i += delete_tag_row_by_id($_);
170 ($i == scalar(@_)) or
171 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
175 sub get_tag_rows ($) {
176 my $hash = shift || {};
177 my @ok_fields = @fields;
178 push @ok_fields, 'limit'; # push the limit! :)
182 foreach my $key (keys %$hash) {
183 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
184 unless (length $key) {
185 carp "Empty argument key to get_tag_rows: ignoring!";
188 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
189 carp "get_tag_rows received unreconized argument key '$key'.";
192 if ($key eq 'limit') {
193 my $val = $hash->{$key};
194 unless ($val =~ /^(\d+,)?\d+$/) {
195 carp "Non-nuerical limit value '$val' ignored!";
198 $limit = " LIMIT $val\n";
200 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
201 push @exe_args, $hash->{$key};
204 my $query = $select_all . ($wheres||'') . $limit;
205 $debug and print STDERR "get_tag_rows query:\n $query\n",
206 "get_tag_rows query args: ", join(',', @exe_args), "\n";
207 my $sth = C4::Context->dbh->prepare($query);
209 $sth->execute(@exe_args);
213 return $sth->fetchall_arrayref({});
216 sub get_tags (;$) { # i.e., from tags_index
217 my $hash = shift || {};
218 my @ok_fields = qw(term biblionumber weight limit sort approved);
223 foreach my $key (keys %$hash) {
224 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
225 unless (length $key) {
226 carp "Empty argument key to get_tags: ignoring!";
229 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
230 carp "get_tags received unreconized argument key '$key'.";
233 if ($key eq 'limit') {
234 my $val = $hash->{$key};
235 unless ($val =~ /^(\d+,)?\d+$/) {
236 carp "Non-nuerical limit value '$val' ignored!";
239 $limit = " LIMIT $val\n";
240 } elsif ($key eq 'sort') {
241 foreach my $by (split /\,/, $hash->{$key}) {
243 $by =~ /^([-+])?(term)/ or
244 $by =~ /^([-+])?(biblionumber)/ or
245 $by =~ /^([-+])?(weight)/
247 carp "get_tags received illegal sort order '$by'";
253 $order = " ORDER BY ";
255 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
259 my $whereval = $hash->{$key};
260 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
261 ($key eq 'approved') ? 'tags_approval.approved' : $key;
262 my $op = ($whereval =~ s/^(>=|<=)// or
263 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
264 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
265 push @exe_args, $whereval;
269 SELECT tags_index.term as term,biblionumber,weight,weight_total
271 LEFT JOIN tags_approval
272 ON tags_index.term = tags_approval.term
273 " . ($wheres||'') . $order . $limit;
274 $debug and print STDERR "get_tags query:\n $query\n",
275 "get_tags query args: ", join(',', @exe_args), "\n";
276 my $sth = C4::Context->dbh->prepare($query);
278 $sth->execute(@exe_args);
282 return $sth->fetchall_arrayref({});
285 sub get_approval_rows (;$) { # i.e., from tags_approval
286 my $hash = shift || {};
287 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
292 foreach my $key (keys %$hash) {
293 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
294 unless (length $key) {
295 carp "Empty argument key to get_approval_rows: ignoring!";
298 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
299 carp "get_approval_rows received unreconized argument key '$key'.";
302 if ($key eq 'limit') {
303 my $val = $hash->{$key};
304 unless ($val =~ /^(\d+,)?\d+$/) {
305 carp "Non-numerical limit value '$val' ignored!";
308 $limit = " LIMIT $val\n";
309 } elsif ($key eq 'sort') {
310 foreach my $by (split /\,/, $hash->{$key}) {
312 $by =~ /^([-+])?(term)/ or
313 $by =~ /^([-+])?(biblionumber)/ or
314 $by =~ /^([-+])?(borrowernumber)/ or
315 $by =~ /^([-+])?(weight_total)/ or
316 $by =~ /^([-+])?(approved(_by)?)/ or
317 $by =~ /^([-+])?(date_approved)/
319 carp "get_approval_rows received illegal sort order '$by'";
325 $order = " ORDER BY " unless $order;
327 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
331 my $whereval = $hash->{$key};
332 my $op = ($whereval =~ s/^(>=|<=)// or
333 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
334 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
335 push @exe_args, $whereval;
339 SELECT tags_approval.term AS term,
340 tags_approval.approved AS approved,
341 tags_approval.date_approved AS date_approved,
342 tags_approval.approved_by AS approved_by,
343 tags_approval.weight_total AS weight_total,
344 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
347 ON tags_approval.approved_by = borrowers.borrowernumber ";
348 $query .= ($wheres||'') . $order . $limit;
349 $debug and print STDERR "get_approval_rows query:\n $query\n",
350 "get_approval_rows query args: ", join(',', @exe_args), "\n";
351 my $sth = C4::Context->dbh->prepare($query);
353 $sth->execute(@exe_args);
357 return $sth->fetchall_arrayref({});
360 sub is_approved ($) {
361 my $term = shift or return undef;
362 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
363 $sth->execute($term);
364 unless ($sth->rows) {
365 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
368 return $sth->fetchrow;
371 sub get_tag_index ($;$) {
372 my $term = shift or return undef;
375 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
376 $sth->execute($term,shift);
378 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
379 $sth->execute($term);
381 return $sth->fetchrow_hashref;
385 my $operator = shift;
386 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
389 spellcheck($_) or next;
394 my $aref = get_approval_rows({term=>$_});
395 if ($aref and scalar @$aref) {
396 mod_tag_approval($operator,$_,1);
398 add_tag_approval($_,$operator);
403 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
404 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
405 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
407 my $operator = shift;
408 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
410 my $aref = get_approval_rows({term=>$_});
411 if ($aref and scalar @$aref) {
412 mod_tag_approval($operator,$_,-1);
414 add_tag_approval($_,$operator,-1);
420 my $operator = shift;
421 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
422 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
423 # my $sth = C4::Context->dbh->prepare($query);
427 my $operator = shift;
428 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
429 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
430 # my $sth = C4::Context->dbh->prepare($query);
431 # $sth->execute($term);
435 sub add_tag_approval ($;$$) { # or disapproval
436 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ? $_ : 'UNDEF'} @_) . ")";
437 my $term = shift or return undef;
438 my $query = "SELECT * FROM tags_approval WHERE term = ?";
439 my $sth = C4::Context->dbh->prepare($query);
440 $sth->execute($term);
441 ($sth->rows) and return increment_weight_total($term);
442 my $operator = shift || 0;
443 my $approval = (@_ ? shift : 0); # default is unapproved
444 my @exe_args = ($term); # all 3 queries will use this argument
446 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
447 push @exe_args, $operator, $approval;
448 } elsif ($approval) {
449 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
450 push @exe_args, $approval;
452 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
454 $debug and print STDERR "add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
455 $sth = C4::Context->dbh->prepare($query);
456 $sth->execute(@exe_args);
460 sub mod_tag_approval ($$$) {
461 my $operator = shift;
462 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
463 my $term = shift or return undef;
464 my $approval = (scalar @_ ? shift : 1); # default is to approve
465 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
466 $debug and print STDERR "mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
467 my $sth = C4::Context->dbh->prepare($query);
468 $sth->execute($operator,$approval,$term);
471 sub add_tag_index ($$;$) {
472 my $term = shift or return undef;
473 my $biblionumber = shift or return undef;
474 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
475 my $sth = C4::Context->dbh->prepare($query);
476 $sth->execute($term,$biblionumber);
477 ($sth->rows) and return increment_weight($term,$biblionumber);
478 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
479 $debug and print STDERR "add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
480 $sth = C4::Context->dbh->prepare($query);
481 $sth->execute($term,$biblionumber);
485 sub get_tag ($) { # by tag_id
486 (@_) or return undef;
487 my $sth = C4::Context->dbh->prepare("$select_all WHERE tag_id = ?");
488 $sth->execute(shift);
489 return $sth->fetchrow_hashref;
492 sub rectify_weights (;$) {
493 my $dbh = C4::Context->dbh;
496 SELECT term,biblionumber,count(*) as count
499 (@_) and $query .= " WHERE term =? ";
500 $query .= " GROUP BY term,biblionumber ";
501 $sth = $dbh->prepare($query);
503 $sth->execute(shift);
507 my $results = $sth->fetchall_arrayref({}) or return undef;
509 foreach (@$results) {
510 _set_weight($_->{count},$_->{term},$_->{biblionumber});
511 $tally{$_->{term}} += $_->{count};
513 foreach (keys %tally) {
514 _set_weight_total($tally{$_},$_);
516 return ($results,\%tally);
519 sub increment_weights ($$) {
520 increment_weight(@_);
521 increment_weight_total(shift);
523 sub decrement_weights ($$) {
524 decrement_weight(@_);
525 decrement_weight_total(shift);
527 sub increment_weight_total ($) {
528 _set_weight_total('weight_total+1',shift);
530 sub increment_weight ($$) {
531 _set_weight('weight+1',shift,shift);
533 sub decrement_weight_total ($) {
534 _set_weight_total('weight_total-1',shift);
536 sub decrement_weight ($$) {
537 _set_weight('weight-1',shift,shift);
539 sub _set_weight_total ($$) {
540 my $sth = C4::Context->dbh->prepare("
542 SET weight_total=" . (shift) . "
544 "); # note: CANNOT use "?" for weight_total (see the args above).
545 $sth->execute(shift); # just the term
547 sub _set_weight ($$$) {
548 my $dbh = C4::Context->dbh;
549 my $sth = $dbh->prepare("
551 SET weight=" . (shift) . "
558 sub add_tag ($$;$$) { # biblionumber,term,[borrowernumber,approvernumber]
559 my $biblionumber = shift or return undef;
560 my $term = shift or return undef;
561 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
564 ($term) or return undef; # must be more than whitespace
565 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
566 my $query = "INSERT INTO tags_all
567 (borrowernumber,biblionumber,term,date_created)
568 VALUES (?,?,?,NOW())";
569 $debug and print STDERR "add_tag query: $query\n",
570 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
572 $debug and carp "Duplicate tag detected. Tag not added.";
575 # add to tags_all regardless of approaval
576 my $sth = C4::Context->dbh->prepare($query);
577 $sth->execute($borrowernumber,$biblionumber,$term);
580 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
581 my $approver = shift;
582 $debug and print STDERR "term '$term' pre-approved by borrower #$approver\n";
583 add_tag_approval($term,$approver,1);
584 add_tag_index($term,$biblionumber,$approver);
585 } elsif (is_approved($term) >= 1) {
586 $debug and print STDERR "term '$term' approved by whitelist\n";
587 add_tag_approval($term,0,1);
588 add_tag_index($term,$biblionumber,1);
590 $debug and print STDERR "term '$term' NOT approved (yet)\n";
591 add_tag_approval($term);
592 add_tag_index($term,$biblionumber);
599 =head1 C4::Tags.pm - Support for user tagging of biblios.
601 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
603 =head2 add_tag(biblionumber,term[,borrowernumber])
605 =head3 TO DO: Add real perldoc
609 =head2 External Dictionary (Ispell) [Recommended]
611 An external dictionary can be used as a means of "pre-populating" and tracking
612 allowed terms based on the widely available Ispell dictionary. This can be the system
613 dictionary or a personal version, but in order to support whitelisting, it must be
614 editable to the process running Koha.
616 To enable, enter the absolute path to the ispell dictionary in the system
617 preference "TagsExternalDictionary".
619 Using external Ispell is recommended for both ease of use and performance. Note that any
620 language version of Ispell can be installed. It is also possible to modify the dictionary
621 at the command line to affect the desired content.
623 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
624 should build their own wordlist and recompile Ispell based on it. See man ispell for
627 =head2 Table Structure
629 The tables used by tags are:
635 Your first thought may be that this looks a little complicated. It is, but only because
636 it has to be. I'll try to explain.
638 tags_all - This table would be all we really need if we didn't care about moderation or
639 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
640 though, it contains all the relevant info about a given tag:
641 tag_id - unique id number for it
642 borrowernumber - user that entered it
643 biblionumber - book record it is attached to
644 term - tag "term" itself
645 language - perhaps used later to influence weighting
646 date_created - date and time it was created
648 tags_approval - Since we need to provide moderation, this table is used to track it. If no
649 external dictionary is used, this table is the sole reference for approval and rejection.
650 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
651 This could be called an "approved terms" table. See above regarding the External Dictionary.
652 term - tag "term" itself
653 approved - Negative, 0 or positive if tag is rejected, pending or approved.
654 date_approved - date of last action
655 approved_by - staffer performing the last action
656 weight_total - total occurance of term in any biblio by any users
658 tags_index - This table is for performance, because by far the most common operation will
659 be fetching tags for a list of search results. We will have a set of biblios, and we will
660 want ONLY their approved tags and overall weighting. While we could implement a query that
661 would traverse tags_all filtered against tags_approval, the performance implications of
662 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
663 term - approved term as it appears in tags_approval
664 biblionumber - book record it is attached to
665 weight - number of times tag applied by any user
667 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
668 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
669 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
670 if you do not understand their operation and interaction. It is quite easy to define too
671 simple or too complex a regexp and effectively block all terms. The blacklist operation is
672 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
673 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
674 understanding of regular expression syntax and performance.
676 So the best way to think about the different tables is that they are each tailored to a certain
677 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
678 the tag population can continue to grow even if a user (along with their corresponding
679 rows in tags_all) is removed.
683 If you want to auto-populate some tags for debugging, do something like this:
685 mysql> select biblionumber from biblio where title LIKE "%Health%";
716 26 rows in set (0.00 sec)
718 Then, take those numbers and type/pipe them into this perl command line:
719 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
721 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.