2 # This file is part of Koha.
4 # Koha is free software; you can redistribute it and/or modify it under the
5 # terms of the GNU General Public License as published by the Free Software
6 # Foundation; either version 2 of the License, or (at your option) any later
9 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
10 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License along with
14 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
15 # Suite 330, Boston, MA 02111-1307 USA
25 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
26 use vars qw($ext_dict $select_all @fields);
32 &get_tag &get_tags &get_tag_rows
36 &delete_tag_rows_by_ids
44 $ext_dict = C4::Context->preference('TagsExternalDictionary');
47 import Data::Dumper qw(:DEFAULT);
48 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
51 require Lingua::Ispell;
52 import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
57 $ext_dict and $Lingua::Ispell::path = $ext_dict;
58 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
59 @fields = qw(tag_id borrowernumber biblionumber term language date_created);
60 $select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
65 my $rows = get_tag_rows({tag_id=>$tag_id}) or return 0;
66 (scalar(@$rows) == 1) or return undef;
67 my $row = shift(@$rows);
68 ($tag_id == $row->{tag_id}) or return 0;
69 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
70 my $index = shift(@$tags);
71 $debug and print STDERR
72 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
73 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
74 if ($index->{weight} <= 1) {
75 delete_tag_index($row->{term},$row->{biblionumber});
77 decrement_weight($row->{term},$row->{biblionumber});
79 if ($index->{weight_total} <= 1) {
80 delete_tag_approval($row->{term});
82 decrement_weight_total($row->{term});
84 delete_tag_row_by_id($tag_id);
87 sub delete_tag_index ($$) {
89 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
91 return $sth->rows || 0;
93 sub delete_tag_approval ($) {
95 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
97 return $sth->rows || 0;
99 sub delete_tag_row_by_id ($) {
100 (@_) or return undef;
101 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
102 $sth->execute(shift);
103 return $sth->rows || 0;
105 sub delete_tag_rows_by_ids (@) {
106 (@_) or return undef;
109 $i += delete_tag_row_by_id($_);
111 ($i == scalar(@_)) or
112 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
116 sub get_tag_rows ($) {
117 my $hash = shift || {};
118 my @ok_fields = @fields;
119 push @ok_fields, 'limit'; # push the limit! :)
123 foreach my $key (keys %$hash) {
124 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
125 unless (length $key) {
126 carp "Empty argument key to get_tag_rows: ignoring!";
129 unless (1 == scalar grep {/^ $key $/xi} @ok_fields) {
130 carp "get_tag_rows received unreconized argument key '$key'.";
133 if ($key =~ /^limit$/i) {
134 my $val = $hash->{$key};
135 unless ($val =~ /^(\d+,)?\d+$/) {
136 carp "Non-nuerical limit value '$val' ignored!";
139 $limit = " LIMIT $val\n";
141 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
142 push @exe_args, $hash->{$key};
145 my $query = $select_all . ($wheres||'') . $limit;
146 $debug and print STDERR "get_tag_rows query:\n $query\n",
147 "get_tag_rows query args: ", join(',', @exe_args), "\n";
148 my $sth = C4::Context->dbh->prepare($query);
150 $sth->execute(@exe_args);
154 return $sth->fetchall_arrayref({});
157 sub get_tags (;$) { # i.e., from tags_index
158 my $hash = shift || {};
159 my @ok_fields = qw(term biblionumber weight limit sort);
164 foreach my $key (keys %$hash) {
165 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
166 unless (length $key) {
167 carp "Empty argument key to get_tags: ignoring!";
170 unless (1 == scalar grep {/^ $key $/xi} @ok_fields) {
171 carp "get_tags received unreconized argument key '$key'.";
174 if ($key =~ /^limit$/i) {
175 my $val = $hash->{$key};
176 unless ($val =~ /^(\d+,)?\d+$/) {
177 carp "Non-nuerical limit value '$val' ignored!";
180 $limit = " LIMIT $val\n";
181 } elsif ($key =~ /^sort$/i) {
182 foreach my $by (split /\,/, $hash->{$key}) {
184 $by =~ /^([-+])?(term)/ or
185 $by =~ /^([-+])?(biblionumber)/ or
186 $by =~ /^([-+])?(weight)/
188 carp "get_tags received illegal sort order '$by'";
194 $order = " ORDER BY ";
196 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
201 ($key =~ /^term$/i) and $whereval = 'tags_index.term';
202 $wheres .= ($wheres) ? " AND $whereval = ?\n" : " WHERE $whereval = ?\n";
203 push @exe_args, $hash->{$key};
207 SELECT tags_index.term as term,biblionumber,weight,weight_total
209 LEFT JOIN tags_approval
210 ON tags_index.term = tags_approval.term
211 " . ($wheres||'') . $order . $limit;
212 $debug and print STDERR "get_tags query:\n $query\n",
213 "get_tags query args: ", join(',', @exe_args), "\n";
214 my $sth = C4::Context->dbh->prepare($query);
216 $sth->execute(@exe_args);
220 return $sth->fetchall_arrayref({});
223 sub get_approval_rows (;$) { # i.e., from tags_approval
224 my $hash = shift || {};
225 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort);
230 foreach my $key (keys %$hash) {
231 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
232 unless (length $key) {
233 carp "Empty argument key to get_approval_rows: ignoring!";
236 unless (1 == scalar grep {/^ $key $/xi} @ok_fields) {
237 carp "get_approval_rows received unreconized argument key '$key'.";
240 if ($key =~ /^limit$/i) {
241 my $val = $hash->{$key};
242 unless ($val =~ /^(\d+,)?\d+$/) {
243 carp "Non-nuerical limit value '$val' ignored!";
246 $limit = " LIMIT $val\n";
247 } elsif ($key =~ /^sort$/i) {
248 foreach my $by (split /\,/, $hash->{$key}) {
250 $by =~ /^([-+])?(term)/ or
251 $by =~ /^([-+])?(biblionumber)/ or
252 $by =~ /^([-+])?(weight_total)/ or
253 $by =~ /^([-+])?(approved(_by)?)/ or
254 $by =~ /^([-+])?(date_approved)/
256 carp "get_approval_rows received illegal sort order '$by'";
262 $order = " ORDER BY " unless $order;
264 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
269 # ($key =~ /^term$/i) and $whereval = 'tags_index.term';
270 $wheres .= ($wheres) ? " AND $whereval = ?\n" : " WHERE $whereval = ?\n";
271 push @exe_args, $hash->{$key};
275 SELECT tags_approval.term AS term,
276 tags_approval.approved AS approved,
277 tags_approval.date_approved AS date_approved,
278 tags_approval.approved_by AS approved_by,
279 tags_approval.weight_total AS weight_total,
280 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
283 ON tags_approval.approved_by = borrowers.borrowernumber ";
284 $query .= ($wheres||'') . $order . $limit;
285 $debug and print STDERR "get_approval_rows query:\n $query\n",
286 "get_approval_rows query args: ", join(',', @exe_args), "\n";
287 my $sth = C4::Context->dbh->prepare($query);
289 $sth->execute(@exe_args);
293 return $sth->fetchall_arrayref({});
296 sub is_approved ($) {
297 my $term = shift or return undef;
298 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
299 $sth->execute($term);
300 unless ($sth->rows) {
301 $ext_dict and return (spellcheck($term) ? 0 : 1);
307 sub get_tag_index ($;$) {
308 my $term = shift or return undef;
311 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
312 $sth->execute($term,shift);
314 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
315 $sth->execute($term);
317 return $sth->fetchrow_hashref;
321 my $operator = shift;
322 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
325 spellcheck($_) or next;
330 my $aref = get_approval_rows({term=>$_});
331 if ($aref and scalar @$aref) {
332 mod_tag_approval($operator,$_,1);
334 add_tag_approval($_,$operator);
339 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
340 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
341 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
343 my $operator = shift;
344 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
346 my $aref = get_approval_rows({term=>$_});
347 if ($aref and scalar @$aref) {
348 mod_tag_approval($operator,$_,-1);
350 add_tag_approval($_,$operator,-1);
356 my $operator = shift;
357 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
358 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
359 # my $sth = C4::Context->dbh->prepare($query);
363 my $operator = shift;
364 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
365 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
366 # my $sth = C4::Context->dbh->prepare($query);
367 # $sth->execute($term);
371 sub add_tag_approval ($;$$) { # or disapproval
372 my $term = shift or return undef;
373 my $query = "SELECT * FROM tags_approval WHERE term = ?";
374 my $sth = C4::Context->dbh->prepare($query);
375 $sth->execute($term);
376 ($sth->rows) and return increment_weight_total($term);
377 my $operator = (@_ ? shift : 0);
379 my $approval = (@_ ? shift : 1); # default is to approve
380 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
381 $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term,$operator,$approval)\n";
382 $sth = C4::Context->dbh->prepare($query);
383 $sth->execute($term,$operator,$approval);
385 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
386 $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term)\n";
387 $sth = C4::Context->dbh->prepare($query);
388 $sth->execute($term);
393 sub mod_tag_approval ($$$) {
394 my $operator = shift or return undef;
395 my $term = shift or return undef;
396 my $approval = (@_ ? shift : 1); # default is to approve
397 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
398 $debug and print STDERR "mod_tag_approval query:\n$query\nmod_tag_approval args: ($operator,$approval,$term)\n";
399 my $sth = C4::Context->dbh->prepare($query);
400 $sth->execute($operator,$approval,$term);
403 sub add_tag_index ($$;$) {
404 my $term = shift or return undef;
405 my $biblionumber = shift or return undef;
406 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
407 my $sth = C4::Context->dbh->prepare($query);
408 $sth->execute($term,$biblionumber);
409 ($sth->rows) and return increment_weight($term,$biblionumber);
410 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
411 $debug and print "add_tag_index query:\n$query\nadd_tag_index args: ($term,$biblionumber)\n";
412 $sth = C4::Context->dbh->prepare($query);
413 $sth->execute($term,$biblionumber);
417 sub get_tag ($) { # by tag_id
418 (@_) or return undef;
419 my $sth = C4::Context->dbh->prepare("$select_all WHERE tag_id = ?");
420 $sth->execute(shift);
421 return $sth->fetchrow_hashref;
424 sub rectify_weights (;$) {
425 my $dbh = C4::Context->dbh;
428 SELECT term,biblionumber,count(*) as count
431 (@_) and $query .= " WHERE term =? ";
432 $query .= " GROUP BY term,biblionumber ";
433 $sth = $dbh->prepare($query);
435 $sth->execute(shift);
439 my $results = $sth->fetchall_arrayref({}) or return undef;
441 foreach (@$results) {
442 _set_weight($_->{count},$_->{term},$_->{biblionumber});
443 $tally{$_->{term}} += $_->{count};
445 foreach (keys %tally) {
446 _set_weight_total($tally{$_},$_);
448 return ($results,\%tally);
451 sub increment_weights ($$) {
452 increment_weight(@_);
453 increment_weight_total(shift);
455 sub decrement_weights ($$) {
456 decrement_weight(@_);
457 decrement_weight_total(shift);
459 sub increment_weight_total ($) {
460 _set_weight_total('weight_total+1',shift);
462 sub increment_weight ($$) {
463 _set_weight('weight+1',shift,shift);
465 sub decrement_weight_total ($) {
466 _set_weight_total('weight_total-1',shift);
468 sub decrement_weight ($$) {
469 _set_weight('weight-1',shift,shift);
471 sub _set_weight_total ($$) {
472 my $sth = C4::Context->dbh->prepare("
474 SET weight_total=" . (shift) . "
476 "); # note: CANNOT use "?" for weight_total (see the args above).
477 $sth->execute(shift); # just the term
479 sub _set_weight ($$$) {
480 my $dbh = C4::Context->dbh;
481 my $sth = $dbh->prepare("
483 SET weight=" . (shift) . "
490 sub add_tag ($$;$$) { # biblionumber,term,[borrowernumber,approvernumber]
491 my $biblionumber = shift or return undef;
492 my $term = shift or return undef;
493 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
495 # first, add to tags regardless of approaval
496 my $query = "INSERT INTO tags_all
497 (borrowernumber,biblionumber,term,date_created)
498 VALUES (?,?,?,NOW())";
499 $debug and print STDERR "add_tag query:\n $query\n",
500 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
501 my $sth = C4::Context->dbh->prepare($query);
502 $sth->execute($borrowernumber,$biblionumber,$term);
505 if (@_) { # if an arg remains, it is the borrowernumber of the approver: tag is pre-approved.
506 my $approver = shift;
507 add_tag_approval($term,$approver);
508 add_tag_index($term,$biblionumber,$approver);
509 } elsif (is_approved($term)) {
510 add_tag_approval($term,1);
511 add_tag_index($term,$biblionumber,1);
513 add_tag_approval($term);
514 add_tag_index($term,$biblionumber);
521 =head1 C4::Tags.pm - Support for user tagging of biblios.
523 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
525 =head2 add_tag(biblionumber,term[,borrowernumber])
527 =head3 TO DO: Add real perldoc
529 =head2 External Dictionary (Ispell) [Recommended]
531 An external dictionary can be used as a means of "pre-populating" and tracking
532 allowed terms based on the widely available Ispell dictionary. This can be the system
533 dictionary or a personal version, but in order to support whitelisting, it must be
534 editable to the process running Koha.
536 To enable, enter the absolute path to the ispell dictionary in the system
537 preference "TagsExternalDictionary".
539 Using external Ispell is recommended for both ease of use and performance. Note that any
540 language version of Ispell can be installed. It is also possible to modify the dictionary
541 at the command line to affect the desired content.
543 =head2 Table Structure
545 The tables used by tags are:
551 Your first thought may be that this looks a little complicated. It is, but only because
552 it has to be. I'll try to explain.
554 tags_all - This table would be all we really need if we didn't care about moderation or
555 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
556 though, it contains all the relevant info about a given tag:
557 tag_id - unique id number for it
558 borrowernumber - user that entered it
559 biblionumber - book record it is attached to
560 term - tag "term" itself
561 language - perhaps used later to influence weighting
562 date_created - date and time it was created
564 tags_approval - Since we need to provide moderation, this table is used to track it. If no
565 external dictionary is used, this table is the sole reference for approval and rejection.
566 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
567 This could be called an "approved terms" table. See above regarding the External Dictionary.
568 term - tag "term" itself
569 approved - Negative, 0 or positive if tag is rejected, pending or approved.
570 date_approved - date of last action
571 approved_by - staffer performing the last action
572 weight_total - total occurance of term in any biblio by any users
574 tags_index - This table is for performance, because by far the most common operation will
575 be fetching tags for a list of search results. We will have a set of biblios, and we will
576 want ONLY their approved tags and overall weighting. While we could implement a query that
577 would traverse tags_all filtered against tags_approval, the performance implications of
578 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
579 term - approved term as it appears in tags_approval
580 biblionumber - book record it is attached to
581 weight - number of times tag applied by any user
583 tags_blacklist - TODO
585 So the best way to think about the different tabes is that they are each tailored to a certain
586 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
587 the tag population can continue to grow even if a user is removed, along with the corresponding
592 If you want to auto-populate some tags for debugging, do something like this:
594 mysql> select biblionumber from biblio where title LIKE "%Health%";
625 26 rows in set (0.00 sec)
627 Then, take those numbers and type them into this perl command line:
628 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'