3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
27 use Module::Load::Conditional qw( check_install );
29 use Koha::Tags::Approvals;
30 use Koha::Tags::Indexes;
31 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
32 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
34 our (@ISA, @EXPORT_OK);
49 get_count_by_tag_status
53 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
54 if ( $ext_dict && ! check_install( module => 'Lingua::Ispell' ) ) {
55 warn "Ignoring TagsExternalDictionary, because Lingua::Ispell is not installed.";
59 require Lingua::Ispell;
60 import Lingua::Ispell qw(spellcheck add_word_lc);
61 $Lingua::Ispell::path = $ext_dict;
65 =head1 C4::Tags.pm - Support for user tagging of biblios.
70 my $query = "SELECT * FROM tags_filters ";
73 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
76 $sth = C4::Context->dbh->prepare($query);
79 return $sth->fetchall_arrayref({});
82 # (SELECT count(*) FROM tags_all ) as tags_all,
83 # (SELECT count(*) FROM tags_index ) as tags_index,
87 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
88 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
89 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
91 my $sth = C4::Context->dbh->prepare($query);
93 my $result = $sth->fetchrow_hashref();
94 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
98 =head2 get_count_by_tag_status
100 get_count_by_tag_status($status);
102 Takes a status and gets a count of tags with that status
106 sub get_count_by_tag_status {
108 my $dbh = C4::Context->dbh;
110 "SELECT count(*) FROM tags_approval WHERE approved=?";
111 my $sth = $dbh->prepare($query);
112 $sth->execute( $status );
113 return $sth->fetchrow;
117 my $tag_id = shift or return;
118 my $user_id = (@_) ? shift : undef;
119 my $rows = (defined $user_id) ?
120 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
121 get_tag_rows({tag_id=>$tag_id}) ;
123 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
124 my $row = shift(@$rows);
125 ($tag_id == $row->{tag_id}) or return 0;
126 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
127 my $index = shift(@$tags);
128 if ($index->{weight} <= 1) {
129 Koha::Tags::Indexes->search({ term => $row->{term}, biblionumber => $row->{biblionumber} })->delete;
131 decrement_weight($row->{term},$row->{biblionumber});
133 if ($index->{weight_total} <= 1) {
134 Koha::Tags::Approvals->search({ term => $row->{term} })->delete;
136 decrement_weight_total($row->{term});
138 Koha::Tags->search({ tag_id => $tag_id })->delete;
142 my $hash = shift || {};
143 my @ok_fields = TAG_FIELDS;
144 push @ok_fields, 'limit'; # push the limit! :)
148 foreach my $key (keys %$hash) {
149 unless (length $key) {
150 carp "Empty argument key to get_tag_rows: ignoring!";
153 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
154 carp "get_tag_rows received unreconized argument key '$key'.";
157 if ($key eq 'limit') {
158 my $val = $hash->{$key};
159 unless ($val =~ /^(\d+,)?\d+$/) {
160 carp "Non-nuerical limit value '$val' ignored!";
163 $limit = " LIMIT $val\n";
165 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
166 push @exe_args, $hash->{$key};
169 my $query = TAG_SELECT . ($wheres||'') . $limit;
170 my $sth = C4::Context->dbh->prepare($query);
172 $sth->execute(@exe_args);
176 return $sth->fetchall_arrayref({});
179 sub get_tags { # i.e., from tags_index
180 my $hash = shift || {};
181 my @ok_fields = qw(term biblionumber weight limit sort approved);
186 foreach my $key (keys %$hash) {
187 unless (length $key) {
188 carp "Empty argument key to get_tags: ignoring!";
191 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
192 carp "get_tags received unreconized argument key '$key'.";
195 if ($key eq 'limit') {
196 my $val = $hash->{$key};
197 unless ($val =~ /^(\d+,)?\d+$/) {
198 carp "Non-nuerical limit value '$val' ignored!";
201 $limit = " LIMIT $val\n";
202 } elsif ($key eq 'sort') {
203 foreach my $by (split /\,/, $hash->{$key}) {
205 $by =~ /^([-+])?(term)/ or
206 $by =~ /^([-+])?(biblionumber)/ or
207 $by =~ /^([-+])?(weight)/
209 carp "get_tags received illegal sort order '$by'";
215 $order = " ORDER BY ";
217 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
221 my $whereval = $hash->{$key};
222 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
223 ($key eq 'approved') ? 'tags_approval.approved' : $key;
224 my $op = ($whereval =~ s/^(>=|<=)// or
225 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
226 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
227 push @exe_args, $whereval;
231 SELECT tags_index.term as term,biblionumber,weight,weight_total
233 LEFT JOIN tags_approval
234 ON tags_index.term = tags_approval.term
235 " . ($wheres||'') . $order . $limit;
236 my $sth = C4::Context->dbh->prepare($query);
238 $sth->execute(@exe_args);
242 return $sth->fetchall_arrayref({});
245 sub get_approval_rows { # i.e., from tags_approval
246 my $hash = shift || {};
247 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
252 foreach my $key (keys %$hash) {
253 unless (length $key) {
254 carp "Empty argument key to get_approval_rows: ignoring!";
257 unless (1 == scalar grep { $_ eq $key } @ok_fields) {
258 carp "get_approval_rows received unreconized argument key '$key'.";
261 if ($key eq 'limit') {
262 my $val = $hash->{$key};
263 unless ($val =~ /^(\d+,)?\d+$/) {
264 carp "Non-numerical limit value '$val' ignored!";
267 $limit = " LIMIT $val\n";
268 } elsif ($key eq 'sort') {
269 foreach my $by (split /\,/, $hash->{$key}) {
271 $by =~ /^([-+])?(term)/ or
272 $by =~ /^([-+])?(biblionumber)/ or
273 $by =~ /^([-+])?(borrowernumber)/ or
274 $by =~ /^([-+])?(weight_total)/ or
275 $by =~ /^([-+])?(approved(_by)?)/ or
276 $by =~ /^([-+])?(date_approved)/
278 carp "get_approval_rows received illegal sort order '$by'";
284 $order = " ORDER BY " unless $order;
286 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
290 my $whereval = $hash->{$key};
291 my $op = ($whereval =~ s/^(>=|<=)// or
292 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
293 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
294 push @exe_args, $whereval;
298 SELECT tags_approval.term AS term,
299 tags_approval.approved AS approved,
300 tags_approval.date_approved AS date_approved,
301 tags_approval.approved_by AS approved_by,
302 tags_approval.weight_total AS weight_total,
303 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
306 ON tags_approval.approved_by = borrowers.borrowernumber ";
307 $query .= ($wheres||'') . $order . $limit;
308 my $sth = C4::Context->dbh->prepare($query);
310 $sth->execute(@exe_args);
314 return $sth->fetchall_arrayref({});
318 my $term = shift or return;
319 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
320 $sth->execute($term);
321 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
322 unless ($sth->rows) {
323 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
326 return $sth->fetchrow;
330 my $term = shift or return;
333 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
334 $sth->execute($term,shift);
336 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
337 $sth->execute($term);
339 return $sth->fetchrow_hashref;
343 my $operator = shift;
344 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
345 my $ext_dict = C4::Context->preference('TagsExternalDictionary');
348 spellcheck($_) or next;
353 my $aref = get_approval_rows({term=>$_});
354 if ($aref and scalar @$aref) {
355 mod_tag_approval($operator,$_,1);
357 add_tag_approval($_,$operator);
362 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
363 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
364 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
366 my $operator = shift;
367 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
369 my $aref = get_approval_rows({term=>$_});
370 if ($aref and scalar @$aref) {
371 mod_tag_approval($operator,$_,-1);
373 add_tag_approval($_,$operator,-1);
379 my $operator = shift;
380 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
381 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
382 # my $sth = C4::Context->dbh->prepare($query);
386 my $operator = shift;
387 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
388 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
389 # my $sth = C4::Context->dbh->prepare($query);
390 # $sth->execute($term);
394 sub add_tag_approval { # or disapproval
395 my $term = shift or return;
396 my $query = "SELECT * FROM tags_approval WHERE term = ?";
397 my $sth = C4::Context->dbh->prepare($query);
398 $sth->execute($term);
399 ($sth->rows) and return increment_weight_total($term);
400 my $operator = shift || 0;
401 my $approval = (@_ ? shift : 0); # default is unapproved
402 my @exe_args = ($term); # all 3 queries will use this argument
404 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
405 push @exe_args, $operator, $approval;
406 } elsif ($approval) {
407 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
408 push @exe_args, $approval;
410 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
412 $sth = C4::Context->dbh->prepare($query);
413 $sth->execute(@exe_args);
417 sub mod_tag_approval {
418 my $operator = shift;
419 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
420 my $term = shift or return;
421 my $approval = (scalar @_ ? shift : 1); # default is to approve
422 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
423 my $sth = C4::Context->dbh->prepare($query);
424 $sth->execute($operator,$approval,$term);
428 my $term = shift or return;
429 my $biblionumber = shift or return;
430 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
431 my $sth = C4::Context->dbh->prepare($query);
432 $sth->execute($term,$biblionumber);
433 ($sth->rows) and return increment_weight($term,$biblionumber);
434 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
435 $sth = C4::Context->dbh->prepare($query);
436 $sth->execute($term,$biblionumber);
440 sub increment_weights {
441 increment_weight(@_);
442 increment_weight_total(shift);
444 sub decrement_weights {
445 decrement_weight(@_);
446 decrement_weight_total(shift);
448 sub increment_weight_total {
449 _set_weight_total('weight_total+1',shift);
451 sub increment_weight {
452 _set_weight('weight+1',shift,shift);
454 sub decrement_weight_total {
455 _set_weight_total('weight_total-1',shift);
457 sub decrement_weight {
458 _set_weight('weight-1',shift,shift);
460 sub _set_weight_total {
461 my $sth = C4::Context->dbh->prepare("
463 SET weight_total=" . (shift) . "
465 "); # note: CANNOT use "?" for weight_total (see the args above).
466 $sth->execute(shift); # just the term
469 my $dbh = C4::Context->dbh;
470 my $sth = $dbh->prepare("
472 SET weight=" . (shift) . "
479 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
480 my $biblionumber = shift or return;
481 my $term = shift or return;
482 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
485 ($term) or return; # must be more than whitespace
486 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
487 my $query = "INSERT INTO tags_all
488 (borrowernumber,biblionumber,term,date_created)
489 VALUES (?,?,?,NOW())";
493 # add to tags_all regardless of approaval
494 my $sth = C4::Context->dbh->prepare($query);
495 $sth->execute($borrowernumber,$biblionumber,$term);
498 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
499 my $approver = shift;
500 add_tag_approval($term,$approver,1);
501 add_tag_index($term,$biblionumber,$approver);
502 } elsif (is_approved($term) >= 1) {
503 add_tag_approval($term,0,1);
504 add_tag_index($term,$biblionumber,1);
506 add_tag_approval($term);
507 add_tag_index($term,$biblionumber);
511 # This takes a set of tags, as returned by C<get_approval_rows> and divides
512 # them up into a number of "strata" based on their weight. This is useful
513 # to display them in a number of different sizes.
516 # ($min, $max) = stratify_tags($strata, $tags);
517 # $stratum: the number of divisions you want
518 # $tags: the tags, as provided by get_approval_rows
519 # $min: the minimum stratum value
520 # $max: the maximum stratum value. This may be the same as $min if there
521 # is only one weight. Beware of divide by zeros.
522 # This will add a field to the tag called "stratum" containing the calculated
525 my ( $strata, $tags ) = @_;
526 return (0,0) if !@$tags;
529 my $w = $_->{weight_total};
530 $min = $w if ( !defined($min) || $min > $w );
531 $max = $w if ( !defined($max) || $max < $w );
534 # normalise min to zero
539 # if min and max are the same, just make it 1
540 my $span = ( $strata - 1 ) / ( $max || 1 );
542 my $w = $_->{weight_total};
543 $_->{stratum} = int( ( $w - $orig_min ) * $span );
545 return ( $min, $max );
551 =head2 add_tag(biblionumber,term[,borrowernumber])
553 =head3 TO DO: Add real perldoc
557 =head2 External Dictionary (Ispell) [Recommended]
559 An external dictionary can be used as a means of "pre-populating" and tracking
560 allowed terms based on the widely available Ispell dictionary. This can be the system
561 dictionary or a personal version, but in order to support whitelisting, it must be
562 editable to the process running Koha.
564 To enable, enter the absolute path to the ispell dictionary in the system
565 preference "TagsExternalDictionary".
567 Using external Ispell is recommended for both ease of use and performance. Note that any
568 language version of Ispell can be installed. It is also possible to modify the dictionary
569 at the command line to affect the desired content.
571 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
572 should build their own wordlist and recompile Ispell based on it. See man ispell for
575 =head2 Table Structure
577 The tables used by tags are:
583 Your first thought may be that this looks a little complicated. It is, but only because
584 it has to be. I'll try to explain.
586 tags_all - This table would be all we really need if we didn't care about moderation or
587 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
588 though, it contains all the relevant info about a given tag:
589 tag_id - unique id number for it
590 borrowernumber - user that entered it
591 biblionumber - book record it is attached to
592 term - tag "term" itself
593 language - perhaps used later to influence weighting
594 date_created - date and time it was created
596 tags_approval - Since we need to provide moderation, this table is used to track it. If no
597 external dictionary is used, this table is the sole reference for approval and rejection.
598 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
599 This could be called an "approved terms" table. See above regarding the External Dictionary.
600 term - tag "term" itself
601 approved - Negative, 0 or positive if tag is rejected, pending or approved.
602 date_approved - date of last action
603 approved_by - staffer performing the last action
604 weight_total - total occurrence of term in any biblio by any users
606 tags_index - This table is for performance, because by far the most common operation will
607 be fetching tags for a list of search results. We will have a set of biblios, and we will
608 want ONLY their approved tags and overall weighting. While we could implement a query that
609 would traverse tags_all filtered against tags_approval, the performance implications of
610 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
611 term - approved term as it appears in tags_approval
612 biblionumber - book record it is attached to
613 weight - number of times tag applied by any user
615 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
616 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
617 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
618 if you do not understand their operation and interaction. It is quite easy to define too
619 simple or too complex a regexp and effectively block all terms. The blacklist operation is
620 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
621 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
622 understanding of regular expression syntax and performance.
624 So the best way to think about the different tables is that they are each tailored to a certain
625 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
626 the tag population can continue to grow even if a user (along with their corresponding
627 rows in tags_all) is removed.
631 If you want to auto-populate some tags for debugging, do something like this:
633 mysql> select biblionumber from biblio where title LIKE "%Health%";
664 26 rows in set (0.00 sec)
666 Then, take those numbers and type/pipe them into this perl command line:
667 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
669 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.