3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
27 use Module::Load::Conditional qw( check_install );
29 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
30 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
32 our (@ISA, @EXPORT_OK);
36 get_tag get_tags get_tag_rows
43 delete_tag_rows_by_ids
49 get_count_by_tag_status
53 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
54 if ( $ext_dict && ! check_install( module => 'Lingua::Ispell' ) ) {
55 warn "Ignoring TagsExternalDictionary, because Lingua::Ispell is not installed.";
59 require Lingua::Ispell;
60 import Lingua::Ispell qw(spellcheck add_word_lc);
61 $Lingua::Ispell::path = $ext_dict;
65 =head1 C4::Tags.pm - Support for user tagging of biblios.
70 my $query = "SELECT * FROM tags_filters ";
73 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
76 $sth = C4::Context->dbh->prepare($query);
79 return $sth->fetchall_arrayref({});
82 # (SELECT count(*) FROM tags_all ) as tags_all,
83 # (SELECT count(*) FROM tags_index ) as tags_index,
87 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
88 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
89 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
91 my $sth = C4::Context->dbh->prepare($query);
93 my $result = $sth->fetchrow_hashref();
94 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
98 =head2 get_count_by_tag_status
100 get_count_by_tag_status($status);
102 Takes a status and gets a count of tags with that status
106 sub get_count_by_tag_status {
108 my $dbh = C4::Context->dbh;
110 "SELECT count(*) FROM tags_approval WHERE approved=?";
111 my $sth = $dbh->prepare($query);
112 $sth->execute( $status );
113 return $sth->fetchrow;
117 my $tag_id = shift or return;
118 my $user_id = (@_) ? shift : undef;
119 my $rows = (defined $user_id) ?
120 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
121 get_tag_rows({tag_id=>$tag_id}) ;
123 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
124 my $row = shift(@$rows);
125 ($tag_id == $row->{tag_id}) or return 0;
126 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
127 my $index = shift(@$tags);
128 if ($index->{weight} <= 1) {
129 delete_tag_index($row->{term},$row->{biblionumber});
131 decrement_weight($row->{term},$row->{biblionumber});
133 if ($index->{weight_total} <= 1) {
134 delete_tag_approval($row->{term});
136 decrement_weight_total($row->{term});
138 delete_tag_row_by_id($tag_id);
141 sub delete_tag_index {
143 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
145 return $sth->rows || 0;
147 sub delete_tag_approval {
149 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
150 $sth->execute(shift);
151 return $sth->rows || 0;
153 sub delete_tag_row_by_id {
155 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
156 $sth->execute(shift);
157 return $sth->rows || 0;
159 sub delete_tag_rows_by_ids {
163 $i += delete_tag_row_by_id($_);
165 ($i == scalar(@_)) or
166 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
171 my $hash = shift || {};
172 my @ok_fields = TAG_FIELDS;
173 push @ok_fields, 'limit'; # push the limit! :)
177 foreach my $key (keys %$hash) {
178 unless (length $key) {
179 carp "Empty argument key to get_tag_rows: ignoring!";
182 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
183 carp "get_tag_rows received unreconized argument key '$key'.";
186 if ($key eq 'limit') {
187 my $val = $hash->{$key};
188 unless ($val =~ /^(\d+,)?\d+$/) {
189 carp "Non-nuerical limit value '$val' ignored!";
192 $limit = " LIMIT $val\n";
194 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
195 push @exe_args, $hash->{$key};
198 my $query = TAG_SELECT . ($wheres||'') . $limit;
199 my $sth = C4::Context->dbh->prepare($query);
201 $sth->execute(@exe_args);
205 return $sth->fetchall_arrayref({});
208 sub get_tags { # i.e., from tags_index
209 my $hash = shift || {};
210 my @ok_fields = qw(term biblionumber weight limit sort approved);
215 foreach my $key (keys %$hash) {
216 unless (length $key) {
217 carp "Empty argument key to get_tags: ignoring!";
220 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
221 carp "get_tags received unreconized argument key '$key'.";
224 if ($key eq 'limit') {
225 my $val = $hash->{$key};
226 unless ($val =~ /^(\d+,)?\d+$/) {
227 carp "Non-nuerical limit value '$val' ignored!";
230 $limit = " LIMIT $val\n";
231 } elsif ($key eq 'sort') {
232 foreach my $by (split /\,/, $hash->{$key}) {
234 $by =~ /^([-+])?(term)/ or
235 $by =~ /^([-+])?(biblionumber)/ or
236 $by =~ /^([-+])?(weight)/
238 carp "get_tags received illegal sort order '$by'";
244 $order = " ORDER BY ";
246 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
250 my $whereval = $hash->{$key};
251 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
252 ($key eq 'approved') ? 'tags_approval.approved' : $key;
253 my $op = ($whereval =~ s/^(>=|<=)// or
254 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
255 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
256 push @exe_args, $whereval;
260 SELECT tags_index.term as term,biblionumber,weight,weight_total
262 LEFT JOIN tags_approval
263 ON tags_index.term = tags_approval.term
264 " . ($wheres||'') . $order . $limit;
265 my $sth = C4::Context->dbh->prepare($query);
267 $sth->execute(@exe_args);
271 return $sth->fetchall_arrayref({});
274 sub get_approval_rows { # i.e., from tags_approval
275 my $hash = shift || {};
276 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
281 foreach my $key (keys %$hash) {
282 unless (length $key) {
283 carp "Empty argument key to get_approval_rows: ignoring!";
286 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
287 carp "get_approval_rows received unreconized argument key '$key'.";
290 if ($key eq 'limit') {
291 my $val = $hash->{$key};
292 unless ($val =~ /^(\d+,)?\d+$/) {
293 carp "Non-numerical limit value '$val' ignored!";
296 $limit = " LIMIT $val\n";
297 } elsif ($key eq 'sort') {
298 foreach my $by (split /\,/, $hash->{$key}) {
300 $by =~ /^([-+])?(term)/ or
301 $by =~ /^([-+])?(biblionumber)/ or
302 $by =~ /^([-+])?(borrowernumber)/ or
303 $by =~ /^([-+])?(weight_total)/ or
304 $by =~ /^([-+])?(approved(_by)?)/ or
305 $by =~ /^([-+])?(date_approved)/
307 carp "get_approval_rows received illegal sort order '$by'";
313 $order = " ORDER BY " unless $order;
315 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
319 my $whereval = $hash->{$key};
320 my $op = ($whereval =~ s/^(>=|<=)// or
321 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
322 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
323 push @exe_args, $whereval;
327 SELECT tags_approval.term AS term,
328 tags_approval.approved AS approved,
329 tags_approval.date_approved AS date_approved,
330 tags_approval.approved_by AS approved_by,
331 tags_approval.weight_total AS weight_total,
332 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
335 ON tags_approval.approved_by = borrowers.borrowernumber ";
336 $query .= ($wheres||'') . $order . $limit;
337 my $sth = C4::Context->dbh->prepare($query);
339 $sth->execute(@exe_args);
343 return $sth->fetchall_arrayref({});
347 my $term = shift or return;
348 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
349 $sth->execute($term);
350 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
351 unless ($sth->rows) {
352 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
355 return $sth->fetchrow;
359 my $term = shift or return;
362 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
363 $sth->execute($term,shift);
365 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
366 $sth->execute($term);
368 return $sth->fetchrow_hashref;
372 my $operator = shift;
373 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
374 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
377 spellcheck($_) or next;
382 my $aref = get_approval_rows({term=>$_});
383 if ($aref and scalar @$aref) {
384 mod_tag_approval($operator,$_,1);
386 add_tag_approval($_,$operator);
391 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
392 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
393 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
395 my $operator = shift;
396 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
398 my $aref = get_approval_rows({term=>$_});
399 if ($aref and scalar @$aref) {
400 mod_tag_approval($operator,$_,-1);
402 add_tag_approval($_,$operator,-1);
408 my $operator = shift;
409 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
410 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
411 # my $sth = C4::Context->dbh->prepare($query);
415 my $operator = shift;
416 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
417 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
418 # my $sth = C4::Context->dbh->prepare($query);
419 # $sth->execute($term);
423 sub add_tag_approval { # or disapproval
424 my $term = shift or return;
425 my $query = "SELECT * FROM tags_approval WHERE term = ?";
426 my $sth = C4::Context->dbh->prepare($query);
427 $sth->execute($term);
428 ($sth->rows) and return increment_weight_total($term);
429 my $operator = shift || 0;
430 my $approval = (@_ ? shift : 0); # default is unapproved
431 my @exe_args = ($term); # all 3 queries will use this argument
433 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
434 push @exe_args, $operator, $approval;
435 } elsif ($approval) {
436 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
437 push @exe_args, $approval;
439 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
441 $sth = C4::Context->dbh->prepare($query);
442 $sth->execute(@exe_args);
446 sub mod_tag_approval {
447 my $operator = shift;
448 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
449 my $term = shift or return;
450 my $approval = (scalar @_ ? shift : 1); # default is to approve
451 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
452 my $sth = C4::Context->dbh->prepare($query);
453 $sth->execute($operator,$approval,$term);
457 my $term = shift or return;
458 my $biblionumber = shift or return;
459 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
460 my $sth = C4::Context->dbh->prepare($query);
461 $sth->execute($term,$biblionumber);
462 ($sth->rows) and return increment_weight($term,$biblionumber);
463 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
464 $sth = C4::Context->dbh->prepare($query);
465 $sth->execute($term,$biblionumber);
469 sub get_tag { # by tag_id
471 my $sth = C4::Context->dbh->prepare(TAG_SELECT . "WHERE tag_id = ?");
472 $sth->execute(shift);
473 return $sth->fetchrow_hashref;
476 sub increment_weights {
477 increment_weight(@_);
478 increment_weight_total(shift);
480 sub decrement_weights {
481 decrement_weight(@_);
482 decrement_weight_total(shift);
484 sub increment_weight_total {
485 _set_weight_total('weight_total+1',shift);
487 sub increment_weight {
488 _set_weight('weight+1',shift,shift);
490 sub decrement_weight_total {
491 _set_weight_total('weight_total-1',shift);
493 sub decrement_weight {
494 _set_weight('weight-1',shift,shift);
496 sub _set_weight_total {
497 my $sth = C4::Context->dbh->prepare("
499 SET weight_total=" . (shift) . "
501 "); # note: CANNOT use "?" for weight_total (see the args above).
502 $sth->execute(shift); # just the term
505 my $dbh = C4::Context->dbh;
506 my $sth = $dbh->prepare("
508 SET weight=" . (shift) . "
515 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
516 my $biblionumber = shift or return;
517 my $term = shift or return;
518 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
521 ($term) or return; # must be more than whitespace
522 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
523 my $query = "INSERT INTO tags_all
524 (borrowernumber,biblionumber,term,date_created)
525 VALUES (?,?,?,NOW())";
529 # add to tags_all regardless of approaval
530 my $sth = C4::Context->dbh->prepare($query);
531 $sth->execute($borrowernumber,$biblionumber,$term);
534 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
535 my $approver = shift;
536 add_tag_approval($term,$approver,1);
537 add_tag_index($term,$biblionumber,$approver);
538 } elsif (is_approved($term) >= 1) {
539 add_tag_approval($term,0,1);
540 add_tag_index($term,$biblionumber,1);
542 add_tag_approval($term);
543 add_tag_index($term,$biblionumber);
547 # This takes a set of tags, as returned by C<get_approval_rows> and divides
548 # them up into a number of "strata" based on their weight. This is useful
549 # to display them in a number of different sizes.
552 # ($min, $max) = stratify_tags($strata, $tags);
553 # $stratum: the number of divisions you want
554 # $tags: the tags, as provided by get_approval_rows
555 # $min: the minimum stratum value
556 # $max: the maximum stratum value. This may be the same as $min if there
557 # is only one weight. Beware of divide by zeros.
558 # This will add a field to the tag called "stratum" containing the calculated
561 my ( $strata, $tags ) = @_;
562 return (0,0) if !@$tags;
565 my $w = $_->{weight_total};
566 $min = $w if ( !defined($min) || $min > $w );
567 $max = $w if ( !defined($max) || $max < $w );
570 # normalise min to zero
575 # if min and max are the same, just make it 1
576 my $span = ( $strata - 1 ) / ( $max || 1 );
578 my $w = $_->{weight_total};
579 $_->{stratum} = int( ( $w - $orig_min ) * $span );
581 return ( $min, $max );
587 =head2 add_tag(biblionumber,term[,borrowernumber])
589 =head3 TO DO: Add real perldoc
593 =head2 External Dictionary (Ispell) [Recommended]
595 An external dictionary can be used as a means of "pre-populating" and tracking
596 allowed terms based on the widely available Ispell dictionary. This can be the system
597 dictionary or a personal version, but in order to support whitelisting, it must be
598 editable to the process running Koha.
600 To enable, enter the absolute path to the ispell dictionary in the system
601 preference "TagsExternalDictionary".
603 Using external Ispell is recommended for both ease of use and performance. Note that any
604 language version of Ispell can be installed. It is also possible to modify the dictionary
605 at the command line to affect the desired content.
607 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
608 should build their own wordlist and recompile Ispell based on it. See man ispell for
611 =head2 Table Structure
613 The tables used by tags are:
619 Your first thought may be that this looks a little complicated. It is, but only because
620 it has to be. I'll try to explain.
622 tags_all - This table would be all we really need if we didn't care about moderation or
623 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
624 though, it contains all the relevant info about a given tag:
625 tag_id - unique id number for it
626 borrowernumber - user that entered it
627 biblionumber - book record it is attached to
628 term - tag "term" itself
629 language - perhaps used later to influence weighting
630 date_created - date and time it was created
632 tags_approval - Since we need to provide moderation, this table is used to track it. If no
633 external dictionary is used, this table is the sole reference for approval and rejection.
634 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
635 This could be called an "approved terms" table. See above regarding the External Dictionary.
636 term - tag "term" itself
637 approved - Negative, 0 or positive if tag is rejected, pending or approved.
638 date_approved - date of last action
639 approved_by - staffer performing the last action
640 weight_total - total occurrence of term in any biblio by any users
642 tags_index - This table is for performance, because by far the most common operation will
643 be fetching tags for a list of search results. We will have a set of biblios, and we will
644 want ONLY their approved tags and overall weighting. While we could implement a query that
645 would traverse tags_all filtered against tags_approval, the performance implications of
646 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
647 term - approved term as it appears in tags_approval
648 biblionumber - book record it is attached to
649 weight - number of times tag applied by any user
651 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
652 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
653 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
654 if you do not understand their operation and interaction. It is quite easy to define too
655 simple or too complex a regexp and effectively block all terms. The blacklist operation is
656 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
657 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
658 understanding of regular expression syntax and performance.
660 So the best way to think about the different tables is that they are each tailored to a certain
661 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
662 the tag population can continue to grow even if a user (along with their corresponding
663 rows in tags_all) is removed.
667 If you want to auto-populate some tags for debugging, do something like this:
669 mysql> select biblionumber from biblio where title LIKE "%Health%";
700 26 rows in set (0.00 sec)
702 Then, take those numbers and type/pipe them into this perl command line:
703 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
705 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.