3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
27 use Module::Load::Conditional qw/check_install/;
29 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
30 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
37 &get_tag &get_tags &get_tag_rows
41 &delete_tag_rows_by_ids
47 &get_count_by_tag_status
52 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
53 if ( $ext_dict && ! check_install( module => 'Lingua::Ispell' ) ) {
54 warn "Ignoring TagsExternalDictionary, because Lingua::Ispell is not installed.";
58 require Lingua::Ispell;
59 import Lingua::Ispell qw(spellcheck add_word_lc);
60 $Lingua::Ispell::path = $ext_dict;
64 =head1 C4::Tags.pm - Support for user tagging of biblios.
69 my $query = "SELECT * FROM tags_filters ";
72 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
75 $sth = C4::Context->dbh->prepare($query);
78 return $sth->fetchall_arrayref({});
81 # (SELECT count(*) FROM tags_all ) as tags_all,
82 # (SELECT count(*) FROM tags_index ) as tags_index,
86 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
87 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
88 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
90 my $sth = C4::Context->dbh->prepare($query);
92 my $result = $sth->fetchrow_hashref();
93 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
97 =head2 get_count_by_tag_status
99 get_count_by_tag_status($status);
101 Takes a status and gets a count of tags with that status
105 sub get_count_by_tag_status {
107 my $dbh = C4::Context->dbh;
109 "SELECT count(*) FROM tags_approval WHERE approved=?";
110 my $sth = $dbh->prepare($query);
111 $sth->execute( $status );
112 return $sth->fetchrow;
116 my $tag_id = shift or return;
117 my $user_id = (@_) ? shift : undef;
118 my $rows = (defined $user_id) ?
119 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
120 get_tag_rows({tag_id=>$tag_id}) ;
122 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
123 my $row = shift(@$rows);
124 ($tag_id == $row->{tag_id}) or return 0;
125 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
126 my $index = shift(@$tags);
127 if ($index->{weight} <= 1) {
128 delete_tag_index($row->{term},$row->{biblionumber});
130 decrement_weight($row->{term},$row->{biblionumber});
132 if ($index->{weight_total} <= 1) {
133 delete_tag_approval($row->{term});
135 decrement_weight_total($row->{term});
137 delete_tag_row_by_id($tag_id);
140 sub delete_tag_index {
142 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
144 return $sth->rows || 0;
146 sub delete_tag_approval {
148 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
149 $sth->execute(shift);
150 return $sth->rows || 0;
152 sub delete_tag_row_by_id {
154 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
155 $sth->execute(shift);
156 return $sth->rows || 0;
158 sub delete_tag_rows_by_ids {
162 $i += delete_tag_row_by_id($_);
164 ($i == scalar(@_)) or
165 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
170 my $hash = shift || {};
171 my @ok_fields = TAG_FIELDS;
172 push @ok_fields, 'limit'; # push the limit! :)
176 foreach my $key (keys %$hash) {
177 unless (length $key) {
178 carp "Empty argument key to get_tag_rows: ignoring!";
181 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
182 carp "get_tag_rows received unreconized argument key '$key'.";
185 if ($key eq 'limit') {
186 my $val = $hash->{$key};
187 unless ($val =~ /^(\d+,)?\d+$/) {
188 carp "Non-nuerical limit value '$val' ignored!";
191 $limit = " LIMIT $val\n";
193 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
194 push @exe_args, $hash->{$key};
197 my $query = TAG_SELECT . ($wheres||'') . $limit;
198 my $sth = C4::Context->dbh->prepare($query);
200 $sth->execute(@exe_args);
204 return $sth->fetchall_arrayref({});
207 sub get_tags { # i.e., from tags_index
208 my $hash = shift || {};
209 my @ok_fields = qw(term biblionumber weight limit sort approved);
214 foreach my $key (keys %$hash) {
215 unless (length $key) {
216 carp "Empty argument key to get_tags: ignoring!";
219 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
220 carp "get_tags received unreconized argument key '$key'.";
223 if ($key eq 'limit') {
224 my $val = $hash->{$key};
225 unless ($val =~ /^(\d+,)?\d+$/) {
226 carp "Non-nuerical limit value '$val' ignored!";
229 $limit = " LIMIT $val\n";
230 } elsif ($key eq 'sort') {
231 foreach my $by (split /\,/, $hash->{$key}) {
233 $by =~ /^([-+])?(term)/ or
234 $by =~ /^([-+])?(biblionumber)/ or
235 $by =~ /^([-+])?(weight)/
237 carp "get_tags received illegal sort order '$by'";
243 $order = " ORDER BY ";
245 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
249 my $whereval = $hash->{$key};
250 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
251 ($key eq 'approved') ? 'tags_approval.approved' : $key;
252 my $op = ($whereval =~ s/^(>=|<=)// or
253 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
254 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
255 push @exe_args, $whereval;
259 SELECT tags_index.term as term,biblionumber,weight,weight_total
261 LEFT JOIN tags_approval
262 ON tags_index.term = tags_approval.term
263 " . ($wheres||'') . $order . $limit;
264 my $sth = C4::Context->dbh->prepare($query);
266 $sth->execute(@exe_args);
270 return $sth->fetchall_arrayref({});
273 sub get_approval_rows { # i.e., from tags_approval
274 my $hash = shift || {};
275 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
280 foreach my $key (keys %$hash) {
281 unless (length $key) {
282 carp "Empty argument key to get_approval_rows: ignoring!";
285 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
286 carp "get_approval_rows received unreconized argument key '$key'.";
289 if ($key eq 'limit') {
290 my $val = $hash->{$key};
291 unless ($val =~ /^(\d+,)?\d+$/) {
292 carp "Non-numerical limit value '$val' ignored!";
295 $limit = " LIMIT $val\n";
296 } elsif ($key eq 'sort') {
297 foreach my $by (split /\,/, $hash->{$key}) {
299 $by =~ /^([-+])?(term)/ or
300 $by =~ /^([-+])?(biblionumber)/ or
301 $by =~ /^([-+])?(borrowernumber)/ or
302 $by =~ /^([-+])?(weight_total)/ or
303 $by =~ /^([-+])?(approved(_by)?)/ or
304 $by =~ /^([-+])?(date_approved)/
306 carp "get_approval_rows received illegal sort order '$by'";
312 $order = " ORDER BY " unless $order;
314 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
318 my $whereval = $hash->{$key};
319 my $op = ($whereval =~ s/^(>=|<=)// or
320 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
321 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
322 push @exe_args, $whereval;
326 SELECT tags_approval.term AS term,
327 tags_approval.approved AS approved,
328 tags_approval.date_approved AS date_approved,
329 tags_approval.approved_by AS approved_by,
330 tags_approval.weight_total AS weight_total,
331 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
334 ON tags_approval.approved_by = borrowers.borrowernumber ";
335 $query .= ($wheres||'') . $order . $limit;
336 my $sth = C4::Context->dbh->prepare($query);
338 $sth->execute(@exe_args);
342 return $sth->fetchall_arrayref({});
346 my $term = shift or return;
347 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
348 $sth->execute($term);
349 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
350 unless ($sth->rows) {
351 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
354 return $sth->fetchrow;
358 my $term = shift or return;
361 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
362 $sth->execute($term,shift);
364 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
365 $sth->execute($term);
367 return $sth->fetchrow_hashref;
371 my $operator = shift;
372 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
373 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
376 spellcheck($_) or next;
381 my $aref = get_approval_rows({term=>$_});
382 if ($aref and scalar @$aref) {
383 mod_tag_approval($operator,$_,1);
385 add_tag_approval($_,$operator);
390 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
391 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
392 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
394 my $operator = shift;
395 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
397 my $aref = get_approval_rows({term=>$_});
398 if ($aref and scalar @$aref) {
399 mod_tag_approval($operator,$_,-1);
401 add_tag_approval($_,$operator,-1);
407 my $operator = shift;
408 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
409 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
410 # my $sth = C4::Context->dbh->prepare($query);
414 my $operator = shift;
415 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
416 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
417 # my $sth = C4::Context->dbh->prepare($query);
418 # $sth->execute($term);
422 sub add_tag_approval { # or disapproval
423 my $term = shift or return;
424 my $query = "SELECT * FROM tags_approval WHERE term = ?";
425 my $sth = C4::Context->dbh->prepare($query);
426 $sth->execute($term);
427 ($sth->rows) and return increment_weight_total($term);
428 my $operator = shift || 0;
429 my $approval = (@_ ? shift : 0); # default is unapproved
430 my @exe_args = ($term); # all 3 queries will use this argument
432 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
433 push @exe_args, $operator, $approval;
434 } elsif ($approval) {
435 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
436 push @exe_args, $approval;
438 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
440 $sth = C4::Context->dbh->prepare($query);
441 $sth->execute(@exe_args);
445 sub mod_tag_approval {
446 my $operator = shift;
447 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
448 my $term = shift or return;
449 my $approval = (scalar @_ ? shift : 1); # default is to approve
450 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
451 my $sth = C4::Context->dbh->prepare($query);
452 $sth->execute($operator,$approval,$term);
456 my $term = shift or return;
457 my $biblionumber = shift or return;
458 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
459 my $sth = C4::Context->dbh->prepare($query);
460 $sth->execute($term,$biblionumber);
461 ($sth->rows) and return increment_weight($term,$biblionumber);
462 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
463 $sth = C4::Context->dbh->prepare($query);
464 $sth->execute($term,$biblionumber);
468 sub get_tag { # by tag_id
470 my $sth = C4::Context->dbh->prepare(TAG_SELECT . "WHERE tag_id = ?");
471 $sth->execute(shift);
472 return $sth->fetchrow_hashref;
475 sub increment_weights {
476 increment_weight(@_);
477 increment_weight_total(shift);
479 sub decrement_weights {
480 decrement_weight(@_);
481 decrement_weight_total(shift);
483 sub increment_weight_total {
484 _set_weight_total('weight_total+1',shift);
486 sub increment_weight {
487 _set_weight('weight+1',shift,shift);
489 sub decrement_weight_total {
490 _set_weight_total('weight_total-1',shift);
492 sub decrement_weight {
493 _set_weight('weight-1',shift,shift);
495 sub _set_weight_total {
496 my $sth = C4::Context->dbh->prepare("
498 SET weight_total=" . (shift) . "
500 "); # note: CANNOT use "?" for weight_total (see the args above).
501 $sth->execute(shift); # just the term
504 my $dbh = C4::Context->dbh;
505 my $sth = $dbh->prepare("
507 SET weight=" . (shift) . "
514 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
515 my $biblionumber = shift or return;
516 my $term = shift or return;
517 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
520 ($term) or return; # must be more than whitespace
521 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
522 my $query = "INSERT INTO tags_all
523 (borrowernumber,biblionumber,term,date_created)
524 VALUES (?,?,?,NOW())";
528 # add to tags_all regardless of approaval
529 my $sth = C4::Context->dbh->prepare($query);
530 $sth->execute($borrowernumber,$biblionumber,$term);
533 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
534 my $approver = shift;
535 add_tag_approval($term,$approver,1);
536 add_tag_index($term,$biblionumber,$approver);
537 } elsif (is_approved($term) >= 1) {
538 add_tag_approval($term,0,1);
539 add_tag_index($term,$biblionumber,1);
541 add_tag_approval($term);
542 add_tag_index($term,$biblionumber);
546 # This takes a set of tags, as returned by C<get_approval_rows> and divides
547 # them up into a number of "strata" based on their weight. This is useful
548 # to display them in a number of different sizes.
551 # ($min, $max) = stratify_tags($strata, $tags);
552 # $stratum: the number of divisions you want
553 # $tags: the tags, as provided by get_approval_rows
554 # $min: the minimum stratum value
555 # $max: the maximum stratum value. This may be the same as $min if there
556 # is only one weight. Beware of divide by zeros.
557 # This will add a field to the tag called "stratum" containing the calculated
560 my ( $strata, $tags ) = @_;
561 return (0,0) if !@$tags;
564 my $w = $_->{weight_total};
565 $min = $w if ( !defined($min) || $min > $w );
566 $max = $w if ( !defined($max) || $max < $w );
569 # normalise min to zero
574 # if min and max are the same, just make it 1
575 my $span = ( $strata - 1 ) / ( $max || 1 );
577 my $w = $_->{weight_total};
578 $_->{stratum} = int( ( $w - $orig_min ) * $span );
580 return ( $min, $max );
586 =head2 add_tag(biblionumber,term[,borrowernumber])
588 =head3 TO DO: Add real perldoc
592 =head2 External Dictionary (Ispell) [Recommended]
594 An external dictionary can be used as a means of "pre-populating" and tracking
595 allowed terms based on the widely available Ispell dictionary. This can be the system
596 dictionary or a personal version, but in order to support whitelisting, it must be
597 editable to the process running Koha.
599 To enable, enter the absolute path to the ispell dictionary in the system
600 preference "TagsExternalDictionary".
602 Using external Ispell is recommended for both ease of use and performance. Note that any
603 language version of Ispell can be installed. It is also possible to modify the dictionary
604 at the command line to affect the desired content.
606 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
607 should build their own wordlist and recompile Ispell based on it. See man ispell for
610 =head2 Table Structure
612 The tables used by tags are:
618 Your first thought may be that this looks a little complicated. It is, but only because
619 it has to be. I'll try to explain.
621 tags_all - This table would be all we really need if we didn't care about moderation or
622 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
623 though, it contains all the relevant info about a given tag:
624 tag_id - unique id number for it
625 borrowernumber - user that entered it
626 biblionumber - book record it is attached to
627 term - tag "term" itself
628 language - perhaps used later to influence weighting
629 date_created - date and time it was created
631 tags_approval - Since we need to provide moderation, this table is used to track it. If no
632 external dictionary is used, this table is the sole reference for approval and rejection.
633 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
634 This could be called an "approved terms" table. See above regarding the External Dictionary.
635 term - tag "term" itself
636 approved - Negative, 0 or positive if tag is rejected, pending or approved.
637 date_approved - date of last action
638 approved_by - staffer performing the last action
639 weight_total - total occurrence of term in any biblio by any users
641 tags_index - This table is for performance, because by far the most common operation will
642 be fetching tags for a list of search results. We will have a set of biblios, and we will
643 want ONLY their approved tags and overall weighting. While we could implement a query that
644 would traverse tags_all filtered against tags_approval, the performance implications of
645 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
646 term - approved term as it appears in tags_approval
647 biblionumber - book record it is attached to
648 weight - number of times tag applied by any user
650 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
651 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
652 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
653 if you do not understand their operation and interaction. It is quite easy to define too
654 simple or too complex a regexp and effectively block all terms. The blacklist operation is
655 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
656 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
657 understanding of regular expression syntax and performance.
659 So the best way to think about the different tables is that they are each tailored to a certain
660 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
661 the tag population can continue to grow even if a user (along with their corresponding
662 rows in tags_all) is removed.
666 If you want to auto-populate some tags for debugging, do something like this:
668 mysql> select biblionumber from biblio where title LIKE "%Health%";
699 26 rows in set (0.00 sec)
701 Then, take those numbers and type/pipe them into this perl command line:
702 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
704 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.