3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
29 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
30 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
37 &get_tag &get_tags &get_tag_rows
41 &delete_tag_rows_by_ids
47 &get_count_by_tag_status
52 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
55 import Data::Dumper qw(:DEFAULT);
56 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
59 require Lingua::Ispell;
60 import Lingua::Ispell qw(spellcheck add_word_lc);
61 $Lingua::Ispell::path = $ext_dict;
62 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
66 =head1 C4::Tags.pm - Support for user tagging of biblios.
68 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
73 my $query = "SELECT * FROM tags_filters ";
76 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
79 $sth = C4::Context->dbh->prepare($query);
82 return $sth->fetchall_arrayref({});
85 # (SELECT count(*) FROM tags_all ) as tags_all,
86 # (SELECT count(*) FROM tags_index ) as tags_index,
90 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
91 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
92 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
94 my $sth = C4::Context->dbh->prepare($query);
96 my $result = $sth->fetchrow_hashref();
97 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
98 $debug and warn "counts returned: " . Dumper $result;
102 =head2 get_count_by_tag_status
104 get_count_by_tag_status($status);
106 Takes a status and gets a count of tags with that status
110 sub get_count_by_tag_status {
112 my $dbh = C4::Context->dbh;
114 "SELECT count(*) FROM tags_approval WHERE approved=?";
115 my $sth = $dbh->prepare($query);
116 $sth->execute( $status );
117 return $sth->fetchrow;
121 my $tag_id = shift or return;
122 my $user_id = (@_) ? shift : undef;
123 my $rows = (defined $user_id) ?
124 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
125 get_tag_rows({tag_id=>$tag_id}) ;
127 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
128 my $row = shift(@$rows);
129 ($tag_id == $row->{tag_id}) or return 0;
130 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
131 my $index = shift(@$tags);
132 $debug and print STDERR
133 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
134 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
135 if ($index->{weight} <= 1) {
136 delete_tag_index($row->{term},$row->{biblionumber});
138 decrement_weight($row->{term},$row->{biblionumber});
140 if ($index->{weight_total} <= 1) {
141 delete_tag_approval($row->{term});
143 decrement_weight_total($row->{term});
145 delete_tag_row_by_id($tag_id);
148 sub delete_tag_index {
150 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
152 return $sth->rows || 0;
154 sub delete_tag_approval {
156 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
157 $sth->execute(shift);
158 return $sth->rows || 0;
160 sub delete_tag_row_by_id {
162 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
163 $sth->execute(shift);
164 return $sth->rows || 0;
166 sub delete_tag_rows_by_ids {
170 $i += delete_tag_row_by_id($_);
172 ($i == scalar(@_)) or
173 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
178 my $hash = shift || {};
179 my @ok_fields = TAG_FIELDS;
180 push @ok_fields, 'limit'; # push the limit! :)
184 foreach my $key (keys %$hash) {
185 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
186 unless (length $key) {
187 carp "Empty argument key to get_tag_rows: ignoring!";
190 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
191 carp "get_tag_rows received unreconized argument key '$key'.";
194 if ($key eq 'limit') {
195 my $val = $hash->{$key};
196 unless ($val =~ /^(\d+,)?\d+$/) {
197 carp "Non-nuerical limit value '$val' ignored!";
200 $limit = " LIMIT $val\n";
202 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
203 push @exe_args, $hash->{$key};
206 my $query = TAG_SELECT . ($wheres||'') . $limit;
207 $debug and print STDERR "get_tag_rows query:\n $query\n",
208 "get_tag_rows query args: ", join(',', @exe_args), "\n";
209 my $sth = C4::Context->dbh->prepare($query);
211 $sth->execute(@exe_args);
215 return $sth->fetchall_arrayref({});
218 sub get_tags { # i.e., from tags_index
219 my $hash = shift || {};
220 my @ok_fields = qw(term biblionumber weight limit sort approved);
225 foreach my $key (keys %$hash) {
226 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
227 unless (length $key) {
228 carp "Empty argument key to get_tags: ignoring!";
231 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
232 carp "get_tags received unreconized argument key '$key'.";
235 if ($key eq 'limit') {
236 my $val = $hash->{$key};
237 unless ($val =~ /^(\d+,)?\d+$/) {
238 carp "Non-nuerical limit value '$val' ignored!";
241 $limit = " LIMIT $val\n";
242 } elsif ($key eq 'sort') {
243 foreach my $by (split /\,/, $hash->{$key}) {
245 $by =~ /^([-+])?(term)/ or
246 $by =~ /^([-+])?(biblionumber)/ or
247 $by =~ /^([-+])?(weight)/
249 carp "get_tags received illegal sort order '$by'";
255 $order = " ORDER BY ";
257 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
261 my $whereval = $hash->{$key};
262 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
263 ($key eq 'approved') ? 'tags_approval.approved' : $key;
264 my $op = ($whereval =~ s/^(>=|<=)// or
265 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
266 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
267 push @exe_args, $whereval;
271 SELECT tags_index.term as term,biblionumber,weight,weight_total
273 LEFT JOIN tags_approval
274 ON tags_index.term = tags_approval.term
275 " . ($wheres||'') . $order . $limit;
276 $debug and print STDERR "get_tags query:\n $query\n",
277 "get_tags query args: ", join(',', @exe_args), "\n";
278 my $sth = C4::Context->dbh->prepare($query);
280 $sth->execute(@exe_args);
284 return $sth->fetchall_arrayref({});
287 sub get_approval_rows { # i.e., from tags_approval
288 my $hash = shift || {};
289 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
294 foreach my $key (keys %$hash) {
295 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
296 unless (length $key) {
297 carp "Empty argument key to get_approval_rows: ignoring!";
300 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
301 carp "get_approval_rows received unreconized argument key '$key'.";
304 if ($key eq 'limit') {
305 my $val = $hash->{$key};
306 unless ($val =~ /^(\d+,)?\d+$/) {
307 carp "Non-numerical limit value '$val' ignored!";
310 $limit = " LIMIT $val\n";
311 } elsif ($key eq 'sort') {
312 foreach my $by (split /\,/, $hash->{$key}) {
314 $by =~ /^([-+])?(term)/ or
315 $by =~ /^([-+])?(biblionumber)/ or
316 $by =~ /^([-+])?(borrowernumber)/ or
317 $by =~ /^([-+])?(weight_total)/ or
318 $by =~ /^([-+])?(approved(_by)?)/ or
319 $by =~ /^([-+])?(date_approved)/
321 carp "get_approval_rows received illegal sort order '$by'";
327 $order = " ORDER BY " unless $order;
329 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
333 my $whereval = $hash->{$key};
334 my $op = ($whereval =~ s/^(>=|<=)// or
335 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
336 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
337 push @exe_args, $whereval;
341 SELECT tags_approval.term AS term,
342 tags_approval.approved AS approved,
343 tags_approval.date_approved AS date_approved,
344 tags_approval.approved_by AS approved_by,
345 tags_approval.weight_total AS weight_total,
346 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
349 ON tags_approval.approved_by = borrowers.borrowernumber ";
350 $query .= ($wheres||'') . $order . $limit;
351 $debug and print STDERR "get_approval_rows query:\n $query\n",
352 "get_approval_rows query args: ", join(',', @exe_args), "\n";
353 my $sth = C4::Context->dbh->prepare($query);
355 $sth->execute(@exe_args);
359 return $sth->fetchall_arrayref({});
363 my $term = shift or return;
364 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
365 $sth->execute($term);
366 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
367 unless ($sth->rows) {
368 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
371 return $sth->fetchrow;
375 my $term = shift or return;
378 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
379 $sth->execute($term,shift);
381 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
382 $sth->execute($term);
384 return $sth->fetchrow_hashref;
388 my $operator = shift;
389 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
390 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
393 spellcheck($_) or next;
398 my $aref = get_approval_rows({term=>$_});
399 if ($aref and scalar @$aref) {
400 mod_tag_approval($operator,$_,1);
402 add_tag_approval($_,$operator);
407 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
408 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
409 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
411 my $operator = shift;
412 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
414 my $aref = get_approval_rows({term=>$_});
415 if ($aref and scalar @$aref) {
416 mod_tag_approval($operator,$_,-1);
418 add_tag_approval($_,$operator,-1);
424 my $operator = shift;
425 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
426 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
427 # my $sth = C4::Context->dbh->prepare($query);
431 my $operator = shift;
432 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
433 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
434 # my $sth = C4::Context->dbh->prepare($query);
435 # $sth->execute($term);
439 sub add_tag_approval { # or disapproval
440 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ? $_ : 'UNDEF'} @_) . ")";
441 my $term = shift or return;
442 my $query = "SELECT * FROM tags_approval WHERE term = ?";
443 my $sth = C4::Context->dbh->prepare($query);
444 $sth->execute($term);
445 ($sth->rows) and return increment_weight_total($term);
446 my $operator = shift || 0;
447 my $approval = (@_ ? shift : 0); # default is unapproved
448 my @exe_args = ($term); # all 3 queries will use this argument
450 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
451 push @exe_args, $operator, $approval;
452 } elsif ($approval) {
453 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
454 push @exe_args, $approval;
456 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
458 $debug and print STDERR "add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
459 $sth = C4::Context->dbh->prepare($query);
460 $sth->execute(@exe_args);
464 sub mod_tag_approval {
465 my $operator = shift;
466 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
467 my $term = shift or return;
468 my $approval = (scalar @_ ? shift : 1); # default is to approve
469 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
470 $debug and print STDERR "mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
471 my $sth = C4::Context->dbh->prepare($query);
472 $sth->execute($operator,$approval,$term);
476 my $term = shift or return;
477 my $biblionumber = shift or return;
478 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
479 my $sth = C4::Context->dbh->prepare($query);
480 $sth->execute($term,$biblionumber);
481 ($sth->rows) and return increment_weight($term,$biblionumber);
482 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
483 $debug and print STDERR "add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
484 $sth = C4::Context->dbh->prepare($query);
485 $sth->execute($term,$biblionumber);
489 sub get_tag { # by tag_id
491 my $sth = C4::Context->dbh->prepare(TAG_SELECT . "WHERE tag_id = ?");
492 $sth->execute(shift);
493 return $sth->fetchrow_hashref;
496 sub increment_weights {
497 increment_weight(@_);
498 increment_weight_total(shift);
500 sub decrement_weights {
501 decrement_weight(@_);
502 decrement_weight_total(shift);
504 sub increment_weight_total {
505 _set_weight_total('weight_total+1',shift);
507 sub increment_weight {
508 _set_weight('weight+1',shift,shift);
510 sub decrement_weight_total {
511 _set_weight_total('weight_total-1',shift);
513 sub decrement_weight {
514 _set_weight('weight-1',shift,shift);
516 sub _set_weight_total {
517 my $sth = C4::Context->dbh->prepare("
519 SET weight_total=" . (shift) . "
521 "); # note: CANNOT use "?" for weight_total (see the args above).
522 $sth->execute(shift); # just the term
525 my $dbh = C4::Context->dbh;
526 my $sth = $dbh->prepare("
528 SET weight=" . (shift) . "
535 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
536 my $biblionumber = shift or return;
537 my $term = shift or return;
538 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
541 ($term) or return; # must be more than whitespace
542 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
543 my $query = "INSERT INTO tags_all
544 (borrowernumber,biblionumber,term,date_created)
545 VALUES (?,?,?,NOW())";
546 $debug and print STDERR "add_tag query: $query\n",
547 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
549 $debug and carp "Duplicate tag detected. Tag not added.";
552 # add to tags_all regardless of approaval
553 my $sth = C4::Context->dbh->prepare($query);
554 $sth->execute($borrowernumber,$biblionumber,$term);
557 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
558 my $approver = shift;
559 $debug and print STDERR "term '$term' pre-approved by borrower #$approver\n";
560 add_tag_approval($term,$approver,1);
561 add_tag_index($term,$biblionumber,$approver);
562 } elsif (is_approved($term) >= 1) {
563 $debug and print STDERR "term '$term' approved by whitelist\n";
564 add_tag_approval($term,0,1);
565 add_tag_index($term,$biblionumber,1);
567 $debug and print STDERR "term '$term' NOT approved (yet)\n";
568 add_tag_approval($term);
569 add_tag_index($term,$biblionumber);
573 # This takes a set of tags, as returned by C<get_approval_rows> and divides
574 # them up into a number of "strata" based on their weight. This is useful
575 # to display them in a number of different sizes.
578 # ($min, $max) = stratify_tags($strata, $tags);
579 # $stratum: the number of divisions you want
580 # $tags: the tags, as provided by get_approval_rows
581 # $min: the minimum stratum value
582 # $max: the maximum stratum value. This may be the same as $min if there
583 # is only one weight. Beware of divide by zeros.
584 # This will add a field to the tag called "stratum" containing the calculated
587 my ( $strata, $tags ) = @_;
588 return (0,0) if !@$tags;
591 my $w = $_->{weight_total};
592 $min = $w if ( !defined($min) || $min > $w );
593 $max = $w if ( !defined($max) || $max < $w );
596 # normalise min to zero
601 # if min and max are the same, just make it 1
602 my $span = ( $strata - 1 ) / ( $max || 1 );
604 my $w = $_->{weight_total};
605 $_->{stratum} = int( ( $w - $orig_min ) * $span );
607 return ( $min, $max );
613 =head2 add_tag(biblionumber,term[,borrowernumber])
615 =head3 TO DO: Add real perldoc
619 =head2 External Dictionary (Ispell) [Recommended]
621 An external dictionary can be used as a means of "pre-populating" and tracking
622 allowed terms based on the widely available Ispell dictionary. This can be the system
623 dictionary or a personal version, but in order to support whitelisting, it must be
624 editable to the process running Koha.
626 To enable, enter the absolute path to the ispell dictionary in the system
627 preference "TagsExternalDictionary".
629 Using external Ispell is recommended for both ease of use and performance. Note that any
630 language version of Ispell can be installed. It is also possible to modify the dictionary
631 at the command line to affect the desired content.
633 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
634 should build their own wordlist and recompile Ispell based on it. See man ispell for
637 =head2 Table Structure
639 The tables used by tags are:
645 Your first thought may be that this looks a little complicated. It is, but only because
646 it has to be. I'll try to explain.
648 tags_all - This table would be all we really need if we didn't care about moderation or
649 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
650 though, it contains all the relevant info about a given tag:
651 tag_id - unique id number for it
652 borrowernumber - user that entered it
653 biblionumber - book record it is attached to
654 term - tag "term" itself
655 language - perhaps used later to influence weighting
656 date_created - date and time it was created
658 tags_approval - Since we need to provide moderation, this table is used to track it. If no
659 external dictionary is used, this table is the sole reference for approval and rejection.
660 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
661 This could be called an "approved terms" table. See above regarding the External Dictionary.
662 term - tag "term" itself
663 approved - Negative, 0 or positive if tag is rejected, pending or approved.
664 date_approved - date of last action
665 approved_by - staffer performing the last action
666 weight_total - total occurrence of term in any biblio by any users
668 tags_index - This table is for performance, because by far the most common operation will
669 be fetching tags for a list of search results. We will have a set of biblios, and we will
670 want ONLY their approved tags and overall weighting. While we could implement a query that
671 would traverse tags_all filtered against tags_approval, the performance implications of
672 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
673 term - approved term as it appears in tags_approval
674 biblionumber - book record it is attached to
675 weight - number of times tag applied by any user
677 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
678 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
679 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
680 if you do not understand their operation and interaction. It is quite easy to define too
681 simple or too complex a regexp and effectively block all terms. The blacklist operation is
682 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
683 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
684 understanding of regular expression syntax and performance.
686 So the best way to think about the different tables is that they are each tailored to a certain
687 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
688 the tag population can continue to grow even if a user (along with their corresponding
689 rows in tags_all) is removed.
693 If you want to auto-populate some tags for debugging, do something like this:
695 mysql> select biblionumber from biblio where title LIKE "%Health%";
726 26 rows in set (0.00 sec)
728 Then, take those numbers and type/pipe them into this perl command line:
729 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
731 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.