3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
28 use Module::Load::Conditional qw/check_install/;
30 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
31 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
33 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
36 $VERSION = 3.07.00.049;
39 &get_tag &get_tags &get_tag_rows
43 &delete_tag_rows_by_ids
49 &get_count_by_tag_status
54 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
55 if ( $ext_dict && ! check_install( module => 'Lingua::Ispell' ) ) {
56 warn "Ignoring TagsExternalDictionary, because Lingua::Ispell is not installed.";
61 import Data::Dumper qw(:DEFAULT);
62 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
65 require Lingua::Ispell;
66 import Lingua::Ispell qw(spellcheck add_word_lc);
67 $Lingua::Ispell::path = $ext_dict;
68 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
72 =head1 C4::Tags.pm - Support for user tagging of biblios.
74 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
79 my $query = "SELECT * FROM tags_filters ";
82 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
85 $sth = C4::Context->dbh->prepare($query);
88 return $sth->fetchall_arrayref({});
91 # (SELECT count(*) FROM tags_all ) as tags_all,
92 # (SELECT count(*) FROM tags_index ) as tags_index,
96 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
97 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
98 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
100 my $sth = C4::Context->dbh->prepare($query);
102 my $result = $sth->fetchrow_hashref();
103 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
104 $debug and warn "counts returned: " . Dumper $result;
108 =head2 get_count_by_tag_status
110 get_count_by_tag_status($status);
112 Takes a status and gets a count of tags with that status
116 sub get_count_by_tag_status {
118 my $dbh = C4::Context->dbh;
120 "SELECT count(*) FROM tags_approval WHERE approved=?";
121 my $sth = $dbh->prepare($query);
122 $sth->execute( $status );
123 return $sth->fetchrow;
127 my $tag_id = shift or return;
128 my $user_id = (@_) ? shift : undef;
129 my $rows = (defined $user_id) ?
130 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
131 get_tag_rows({tag_id=>$tag_id}) ;
133 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
134 my $row = shift(@$rows);
135 ($tag_id == $row->{tag_id}) or return 0;
136 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
137 my $index = shift(@$tags);
138 $debug and print STDERR
139 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
140 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
141 if ($index->{weight} <= 1) {
142 delete_tag_index($row->{term},$row->{biblionumber});
144 decrement_weight($row->{term},$row->{biblionumber});
146 if ($index->{weight_total} <= 1) {
147 delete_tag_approval($row->{term});
149 decrement_weight_total($row->{term});
151 delete_tag_row_by_id($tag_id);
154 sub delete_tag_index {
156 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
158 return $sth->rows || 0;
160 sub delete_tag_approval {
162 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
163 $sth->execute(shift);
164 return $sth->rows || 0;
166 sub delete_tag_row_by_id {
168 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
169 $sth->execute(shift);
170 return $sth->rows || 0;
172 sub delete_tag_rows_by_ids {
176 $i += delete_tag_row_by_id($_);
178 ($i == scalar(@_)) or
179 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
184 my $hash = shift || {};
185 my @ok_fields = TAG_FIELDS;
186 push @ok_fields, 'limit'; # push the limit! :)
190 foreach my $key (keys %$hash) {
191 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
192 unless (length $key) {
193 carp "Empty argument key to get_tag_rows: ignoring!";
196 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
197 carp "get_tag_rows received unreconized argument key '$key'.";
200 if ($key eq 'limit') {
201 my $val = $hash->{$key};
202 unless ($val =~ /^(\d+,)?\d+$/) {
203 carp "Non-nuerical limit value '$val' ignored!";
206 $limit = " LIMIT $val\n";
208 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
209 push @exe_args, $hash->{$key};
212 my $query = TAG_SELECT . ($wheres||'') . $limit;
213 $debug and print STDERR "get_tag_rows query:\n $query\n",
214 "get_tag_rows query args: ", join(',', @exe_args), "\n";
215 my $sth = C4::Context->dbh->prepare($query);
217 $sth->execute(@exe_args);
221 return $sth->fetchall_arrayref({});
224 sub get_tags { # i.e., from tags_index
225 my $hash = shift || {};
226 my @ok_fields = qw(term biblionumber weight limit sort approved);
231 foreach my $key (keys %$hash) {
232 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
233 unless (length $key) {
234 carp "Empty argument key to get_tags: ignoring!";
237 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
238 carp "get_tags received unreconized argument key '$key'.";
241 if ($key eq 'limit') {
242 my $val = $hash->{$key};
243 unless ($val =~ /^(\d+,)?\d+$/) {
244 carp "Non-nuerical limit value '$val' ignored!";
247 $limit = " LIMIT $val\n";
248 } elsif ($key eq 'sort') {
249 foreach my $by (split /\,/, $hash->{$key}) {
251 $by =~ /^([-+])?(term)/ or
252 $by =~ /^([-+])?(biblionumber)/ or
253 $by =~ /^([-+])?(weight)/
255 carp "get_tags received illegal sort order '$by'";
261 $order = " ORDER BY ";
263 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
267 my $whereval = $hash->{$key};
268 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
269 ($key eq 'approved') ? 'tags_approval.approved' : $key;
270 my $op = ($whereval =~ s/^(>=|<=)// or
271 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
272 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
273 push @exe_args, $whereval;
277 SELECT tags_index.term as term,biblionumber,weight,weight_total
279 LEFT JOIN tags_approval
280 ON tags_index.term = tags_approval.term
281 " . ($wheres||'') . $order . $limit;
282 $debug and print STDERR "get_tags query:\n $query\n",
283 "get_tags query args: ", join(',', @exe_args), "\n";
284 my $sth = C4::Context->dbh->prepare($query);
286 $sth->execute(@exe_args);
290 return $sth->fetchall_arrayref({});
293 sub get_approval_rows { # i.e., from tags_approval
294 my $hash = shift || {};
295 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
300 foreach my $key (keys %$hash) {
301 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
302 unless (length $key) {
303 carp "Empty argument key to get_approval_rows: ignoring!";
306 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
307 carp "get_approval_rows received unreconized argument key '$key'.";
310 if ($key eq 'limit') {
311 my $val = $hash->{$key};
312 unless ($val =~ /^(\d+,)?\d+$/) {
313 carp "Non-numerical limit value '$val' ignored!";
316 $limit = " LIMIT $val\n";
317 } elsif ($key eq 'sort') {
318 foreach my $by (split /\,/, $hash->{$key}) {
320 $by =~ /^([-+])?(term)/ or
321 $by =~ /^([-+])?(biblionumber)/ or
322 $by =~ /^([-+])?(borrowernumber)/ or
323 $by =~ /^([-+])?(weight_total)/ or
324 $by =~ /^([-+])?(approved(_by)?)/ or
325 $by =~ /^([-+])?(date_approved)/
327 carp "get_approval_rows received illegal sort order '$by'";
333 $order = " ORDER BY " unless $order;
335 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
339 my $whereval = $hash->{$key};
340 my $op = ($whereval =~ s/^(>=|<=)// or
341 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
342 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
343 push @exe_args, $whereval;
347 SELECT tags_approval.term AS term,
348 tags_approval.approved AS approved,
349 tags_approval.date_approved AS date_approved,
350 tags_approval.approved_by AS approved_by,
351 tags_approval.weight_total AS weight_total,
352 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
355 ON tags_approval.approved_by = borrowers.borrowernumber ";
356 $query .= ($wheres||'') . $order . $limit;
357 $debug and print STDERR "get_approval_rows query:\n $query\n",
358 "get_approval_rows query args: ", join(',', @exe_args), "\n";
359 my $sth = C4::Context->dbh->prepare($query);
361 $sth->execute(@exe_args);
365 return $sth->fetchall_arrayref({});
369 my $term = shift or return;
370 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
371 $sth->execute($term);
372 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
373 unless ($sth->rows) {
374 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
377 return $sth->fetchrow;
381 my $term = shift or return;
384 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
385 $sth->execute($term,shift);
387 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
388 $sth->execute($term);
390 return $sth->fetchrow_hashref;
394 my $operator = shift;
395 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
396 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
399 spellcheck($_) or next;
404 my $aref = get_approval_rows({term=>$_});
405 if ($aref and scalar @$aref) {
406 mod_tag_approval($operator,$_,1);
408 add_tag_approval($_,$operator);
413 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
414 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
415 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
417 my $operator = shift;
418 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
420 my $aref = get_approval_rows({term=>$_});
421 if ($aref and scalar @$aref) {
422 mod_tag_approval($operator,$_,-1);
424 add_tag_approval($_,$operator,-1);
430 my $operator = shift;
431 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
432 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
433 # my $sth = C4::Context->dbh->prepare($query);
437 my $operator = shift;
438 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
439 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
440 # my $sth = C4::Context->dbh->prepare($query);
441 # $sth->execute($term);
445 sub add_tag_approval { # or disapproval
446 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ? $_ : 'UNDEF'} @_) . ")";
447 my $term = shift or return;
448 my $query = "SELECT * FROM tags_approval WHERE term = ?";
449 my $sth = C4::Context->dbh->prepare($query);
450 $sth->execute($term);
451 ($sth->rows) and return increment_weight_total($term);
452 my $operator = shift || 0;
453 my $approval = (@_ ? shift : 0); # default is unapproved
454 my @exe_args = ($term); # all 3 queries will use this argument
456 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
457 push @exe_args, $operator, $approval;
458 } elsif ($approval) {
459 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
460 push @exe_args, $approval;
462 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
464 $debug and print STDERR "add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
465 $sth = C4::Context->dbh->prepare($query);
466 $sth->execute(@exe_args);
470 sub mod_tag_approval {
471 my $operator = shift;
472 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
473 my $term = shift or return;
474 my $approval = (scalar @_ ? shift : 1); # default is to approve
475 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
476 $debug and print STDERR "mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
477 my $sth = C4::Context->dbh->prepare($query);
478 $sth->execute($operator,$approval,$term);
482 my $term = shift or return;
483 my $biblionumber = shift or return;
484 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
485 my $sth = C4::Context->dbh->prepare($query);
486 $sth->execute($term,$biblionumber);
487 ($sth->rows) and return increment_weight($term,$biblionumber);
488 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
489 $debug and print STDERR "add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
490 $sth = C4::Context->dbh->prepare($query);
491 $sth->execute($term,$biblionumber);
495 sub get_tag { # by tag_id
497 my $sth = C4::Context->dbh->prepare(TAG_SELECT . "WHERE tag_id = ?");
498 $sth->execute(shift);
499 return $sth->fetchrow_hashref;
502 sub increment_weights {
503 increment_weight(@_);
504 increment_weight_total(shift);
506 sub decrement_weights {
507 decrement_weight(@_);
508 decrement_weight_total(shift);
510 sub increment_weight_total {
511 _set_weight_total('weight_total+1',shift);
513 sub increment_weight {
514 _set_weight('weight+1',shift,shift);
516 sub decrement_weight_total {
517 _set_weight_total('weight_total-1',shift);
519 sub decrement_weight {
520 _set_weight('weight-1',shift,shift);
522 sub _set_weight_total {
523 my $sth = C4::Context->dbh->prepare("
525 SET weight_total=" . (shift) . "
527 "); # note: CANNOT use "?" for weight_total (see the args above).
528 $sth->execute(shift); # just the term
531 my $dbh = C4::Context->dbh;
532 my $sth = $dbh->prepare("
534 SET weight=" . (shift) . "
541 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
542 my $biblionumber = shift or return;
543 my $term = shift or return;
544 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
547 ($term) or return; # must be more than whitespace
548 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
549 my $query = "INSERT INTO tags_all
550 (borrowernumber,biblionumber,term,date_created)
551 VALUES (?,?,?,NOW())";
552 $debug and print STDERR "add_tag query: $query\n",
553 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
555 $debug and carp "Duplicate tag detected. Tag not added.";
558 # add to tags_all regardless of approaval
559 my $sth = C4::Context->dbh->prepare($query);
560 $sth->execute($borrowernumber,$biblionumber,$term);
563 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
564 my $approver = shift;
565 $debug and print STDERR "term '$term' pre-approved by borrower #$approver\n";
566 add_tag_approval($term,$approver,1);
567 add_tag_index($term,$biblionumber,$approver);
568 } elsif (is_approved($term) >= 1) {
569 $debug and print STDERR "term '$term' approved by whitelist\n";
570 add_tag_approval($term,0,1);
571 add_tag_index($term,$biblionumber,1);
573 $debug and print STDERR "term '$term' NOT approved (yet)\n";
574 add_tag_approval($term);
575 add_tag_index($term,$biblionumber);
579 # This takes a set of tags, as returned by C<get_approval_rows> and divides
580 # them up into a number of "strata" based on their weight. This is useful
581 # to display them in a number of different sizes.
584 # ($min, $max) = stratify_tags($strata, $tags);
585 # $stratum: the number of divisions you want
586 # $tags: the tags, as provided by get_approval_rows
587 # $min: the minimum stratum value
588 # $max: the maximum stratum value. This may be the same as $min if there
589 # is only one weight. Beware of divide by zeros.
590 # This will add a field to the tag called "stratum" containing the calculated
593 my ( $strata, $tags ) = @_;
594 return (0,0) if !@$tags;
597 my $w = $_->{weight_total};
598 $min = $w if ( !defined($min) || $min > $w );
599 $max = $w if ( !defined($max) || $max < $w );
602 # normalise min to zero
607 # if min and max are the same, just make it 1
608 my $span = ( $strata - 1 ) / ( $max || 1 );
610 my $w = $_->{weight_total};
611 $_->{stratum} = int( ( $w - $orig_min ) * $span );
613 return ( $min, $max );
619 =head2 add_tag(biblionumber,term[,borrowernumber])
621 =head3 TO DO: Add real perldoc
625 =head2 External Dictionary (Ispell) [Recommended]
627 An external dictionary can be used as a means of "pre-populating" and tracking
628 allowed terms based on the widely available Ispell dictionary. This can be the system
629 dictionary or a personal version, but in order to support whitelisting, it must be
630 editable to the process running Koha.
632 To enable, enter the absolute path to the ispell dictionary in the system
633 preference "TagsExternalDictionary".
635 Using external Ispell is recommended for both ease of use and performance. Note that any
636 language version of Ispell can be installed. It is also possible to modify the dictionary
637 at the command line to affect the desired content.
639 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
640 should build their own wordlist and recompile Ispell based on it. See man ispell for
643 =head2 Table Structure
645 The tables used by tags are:
651 Your first thought may be that this looks a little complicated. It is, but only because
652 it has to be. I'll try to explain.
654 tags_all - This table would be all we really need if we didn't care about moderation or
655 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
656 though, it contains all the relevant info about a given tag:
657 tag_id - unique id number for it
658 borrowernumber - user that entered it
659 biblionumber - book record it is attached to
660 term - tag "term" itself
661 language - perhaps used later to influence weighting
662 date_created - date and time it was created
664 tags_approval - Since we need to provide moderation, this table is used to track it. If no
665 external dictionary is used, this table is the sole reference for approval and rejection.
666 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
667 This could be called an "approved terms" table. See above regarding the External Dictionary.
668 term - tag "term" itself
669 approved - Negative, 0 or positive if tag is rejected, pending or approved.
670 date_approved - date of last action
671 approved_by - staffer performing the last action
672 weight_total - total occurrence of term in any biblio by any users
674 tags_index - This table is for performance, because by far the most common operation will
675 be fetching tags for a list of search results. We will have a set of biblios, and we will
676 want ONLY their approved tags and overall weighting. While we could implement a query that
677 would traverse tags_all filtered against tags_approval, the performance implications of
678 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
679 term - approved term as it appears in tags_approval
680 biblionumber - book record it is attached to
681 weight - number of times tag applied by any user
683 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
684 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
685 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
686 if you do not understand their operation and interaction. It is quite easy to define too
687 simple or too complex a regexp and effectively block all terms. The blacklist operation is
688 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
689 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
690 understanding of regular expression syntax and performance.
692 So the best way to think about the different tables is that they are each tailored to a certain
693 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
694 the tag population can continue to grow even if a user (along with their corresponding
695 rows in tags_all) is removed.
699 If you want to auto-populate some tags for debugging, do something like this:
701 mysql> select biblionumber from biblio where title LIKE "%Health%";
732 26 rows in set (0.00 sec)
734 Then, take those numbers and type/pipe them into this perl command line:
735 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
737 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.