synch'ing 2.2 and head
[koha.git] / z3950 / processz3950queue
1 #!/usr/bin/perl
2
3 # $Id$
4
5 use C4::Context;
6 use DBI;
7 use strict;
8 use C4::Biblio;
9 use C4::Output;
10 use C4::Breeding;
11 use Net::Z3950;
12
13 =head1 NAME
14
15 processz3950queue. The script that does z3950 searches.
16
17 =head1 SYNOPSIS
18
19 This script can be used on a console (as normal user) or by the daemon-launch script.
20
21 Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode.
22
23 It :
24 1- extracts z3950 requests from z3950queue table,
25 2- creates entries in z3950results to store the result
26 3- launch z3950 queries in asynchronous mode, using Unix fork()
27 4- store results in marc_breeding table.
28
29 The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor).
30
31 =head1 DESCRIPTION
32
33 =head2 table z3950servers
34
35 This table stores the differents z3950 servers.
36 A server is used if checked=1. The rank is NOT used as searches are now asynchronous.
37
38 =head2 table z3950queue
39
40 Table use to manage queries. A single line is created  in this table for each z3950 search request.
41 If more than 1 server is called, the C<servers> field containt all of them separated by |.
42
43 z3950 search requests are done by z3950/search.pl script.
44 At this stage, the fields are created with C<startdate> and C<done> empty
45
46 Then, the processz3950queue finds this entry and :
47 1- store date (time()) in C<startdate>
48 2- set C<done> = -1
49
50 when the requests are all sent :
51 2- set C<done> = 1
52 3- set C<enddate> (FIXME: always equal to startdate for me)
53
54 entries are deleted when :
55 - C<startdate> is more than 1 day ago.
56
57 FIXME:
58 - results, numrecords fields are unused
59
60 =head2 table z3950results
61
62 1 entry is created for each request, for each server called.
63 when created :
64 * C<startdate> is filled
65 * C<enddate> is null
66 * active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way.
67
68 When a search is ended, C<enddate> is set, and C<active> is set to -1
69
70 =head1 How it's written
71
72 on every loop :
73 * delete old queries
74 * for each entry in z3950queue table that is not done=1 {
75         for each search request {
76                 for each server {
77                         try to connect
78                         look for results
79                         *      results can be :
80                         - existing and already running on another process (active=1)
81                         - existing & finished (active=-1)
82                         - non existent => create it and run the request.
83                 }
84         }
85 }
86 =over 2
87
88 =cut
89
90
91 if ($< == 0) {
92     # Running as root, switch privs
93     if (-d "/var/run") {
94         open PID, ">/var/run/processz3950queue.pid";
95         print PID $$."\n";
96         close PID;
97     }
98     # Get real apacheuser from koha.conf or reparsing httpd.conf
99     my $apacheuser=C4::Context->config("httpduser");
100     my $uid=0;
101     unless ($uid = (getpwnam($apacheuser))[2]) {
102         die "Attempt to run daemon as non-existent or superuser\n";
103     }
104     $>=$uid;
105     $<=$uid;
106 }
107 my $db_driver = C4::Context->config("db_scheme") || "mysql";
108 my $db_name   = C4::Context->config("database");
109 my $db_host   = C4::Context->config("hostname");
110 my $db_user   = C4::Context->config("user");
111 my $db_passwd = C4::Context->config("pass");
112 my $dbh = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
113
114 # we begin the script, so "unactive" every pending request : they will never give anything, the script died :-(
115 my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1");
116 $sth->execute;
117 $sth->finish;
118 $SIG{CHLD}='reap';
119 $SIG{HUP}='checkqueue';
120
121
122 my $logdir=$ARGV[0];
123
124 open PID, ">$logdir/processz3950queue.pid";
125 print PID $$."\n";
126 close PID;
127
128 my $reapcounter=0;
129 my $forkcounter=0;
130 my $checkqueue=1;
131 my $pid=$$;
132 my $lastrun=0;
133 while (1) {
134         if ((time-$lastrun)>5) {
135                 $checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
136 # clean DB
137                 my $now = time();
138                 # delete z3950queue entries that are more than 1 day old
139                 my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400");
140                 $sth->execute($now);
141                 # delete z3950results queries that are more than 1 hour old
142                 $sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600");
143                 $sth->execute($now);
144                 if ($checkqueue) { # everytime a SIG{HUP} is recieved
145                         $checkqueue=0;
146 # parse every entry in QUEUE
147                         $sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id");
148                         $sth->execute;
149                         while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
150 # FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost !
151                                 if ($forkcounter<12) {
152                                         my $now=time();
153 # search for results entries for this request
154                                         my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
155                                         ($stk->execute($id)) || (next);
156                                         my %serverdone;
157 # if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries
158                                         unless ($stk->rows) {
159                                                 my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
160                                                 $sti->execute($now,$id);
161                                         }
162 # check which servers calls have already been created (before a crash)
163                                         while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
164                                                 if ($r_enddate >0) { # result entry exist & finished
165                                                         $serverdone{$r_server}=1;
166                                                 } elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1)
167                                                         $serverdone{$r_server}=1;
168                                                 } else { # otherwise
169                                                         $serverdone{$r_server}=-1;
170                                                 }
171                                                 # note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !)
172                                         }
173                                         $stk->finish;
174                                         foreach my $serverinfo (split(/\|/, $servers)) {
175                                                 (next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1)
176                                                 my $totalrecords=0;
177                                                 my $globalname;
178                                                 my $globalsyntax;
179                                                 my $globalencoding;
180 # fork a process for this z3950 query
181                                                 if (my $pid=fork()) {
182                                                         $forkcounter++;
183                                                 } else {
184 # and connect to z3950 server
185 #FIXME: why do we need $dbi ? can't we use $dbh ?
186                                                         my $db_driver = C4::Context->config("db_scheme") || "mysql";
187                                                         my $db_name   = C4::Context->config("database");
188                                                         my $db_host   = C4::Context->config("hostname");
189                                                         my $db_user   = C4::Context->config("user");
190                                                         my $db_passwd = C4::Context->config("pass");
191                                                         my $dbi = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
192                                                         $dbh->{"InactiveDestroy"} = "true";
193                                                         my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
194                                                         $globalname=$name;
195                                                         $globalsyntax = $syntax;
196                                                         $server=~/(.*)\:(\d+)/;
197                                                         my $servername=$1;
198                                                         my $port=$2;
199                                                         my $attr='';
200                                                         if ($type eq 'isbn') {
201                                                                 $attr='1=7';
202                                                         } elsif ($type eq 'title') {
203                                                                 $attr='1=4';
204                                                         } elsif ($type eq 'author') {
205                                                                 $attr='1=1003';
206                                                         } elsif ($type eq 'lccn') {
207                                                                 $attr='1=9';
208                                                         } elsif ($type eq 'keyword') {
209                                                                 $attr='1=1016';
210                                                         }
211                                                         my $query="\@attr $attr \"$term\"";
212                                                         print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
213 # try to connect
214                                                         my $conn;
215                                                         my $noconnection=0;
216                                                         my $error=0;
217 # the z3950 query is builded. Launch it.
218                                                         if ($user) {
219                                                                 $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password) || ($noconnection=1);
220                                                         } else {
221                                                                 $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database) || ($noconnection=1);
222                                                         }
223                                                         if ($noconnection || $error) {
224 # if connection impossible, don't go further !
225                                                                 print "$$/$id : no connection at $globalname\n";
226                                                                 my $result = MARC::Record->new();
227                                                                 my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random);
228                                                         } else {
229 # else, build z3950 query and do it !
230                                                                 $now=time();
231                                                                 my $resultsid="";
232         # create z3950results entries.
233                                                                 if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it
234                                                                         my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
235                                                                         $stj->execute($serverinfo,$id);
236                                                                         ($resultsid) = $stj->fetchrow;
237                                                                         $stj->finish;
238                                                                         print "$$/$id : 1 >> $resultsid\n";
239                                                                 } else { # else create it : (may be serverdone=1 or 0)
240                                                                         my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
241                                                                         $stj->execute($serverinfo,$id);
242                                                                         ($resultsid) = $stj->fetchrow;
243                                                                         $stj->finish;
244                                                                         print "$$/$id : 2 >> $resultsid\n";
245                                                                         unless ($resultsid) {
246                                                                                 $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
247                                                                                 $stj->execute($serverinfo, $id, $now);
248                                                                                 $resultsid=$dbi->{'mysql_insertid'};
249                                                                                 $stj->finish;
250                                                                                 print "$$/$id : creating and ";
251                                                                         }
252                                                                 }
253                                                                 print "$$/$id : working on results entry $resultsid\n";
254         # set active to 1 => this request is on the way.
255                                                                 my $stj=$dbi->prepare("update z3950results set active=1 where id=?");
256                                                                 $stj->execute($resultsid);
257 #######
258                                                                 print "$$/$id : connected to $globalname\n";
259                                                                 eval {$conn->option(elementSetName => 'F')};
260                                                                 eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);} if ($globalsyntax eq "MARC21");
261                                                                 eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::UNIMARC);} if ($globalsyntax eq "UNIMARC");
262                                                                 if ($@) {
263                                                                         print "$$/$id : $globalname ERROR: $@ for $resultsid\n";
264         # in case pb during connexion, set result to "empty" to avoid everlasting loops
265                                                                         my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
266                                                                         $stj->execute(0,0,$now,$resultsid);
267                                                                 } else {
268                                                                         my $rs=$conn->search($query);
269                                                                         pe();
270         # we have an answer for a query => get results & store them in marc_breeding table
271                                                                         my $numresults=$rs->size();
272                                                                         if ($numresults eq 0) {
273                                                                                 print "$$/$id : $globalname : no records found\n";
274                                                                         } else {
275                                                                                 print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n";
276                                                                         }
277                                                                         pe();
278                                                                         my $i;
279                                                                         my $result='';
280                                                                         my $scantimerstart=time();
281                                                                         for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) {
282                                                                                 my $rec=$rs->record($i);
283                                                                                 my $marcdata = $rec->rawdata();
284                                                                                 $globalencoding = ref($rec);
285                                                                                 $result.=$marcdata;
286                                                                         }
287                                                                         my @x=split /::/,$globalencoding;
288                                                                         my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"Z3950-$globalname",$x[3],$random);
289                                                                         my $scantimerend=time();
290                                                                         my $numrecords;
291                                                                         ($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80);
292                                                                         my $elapsed=$scantimerend-$scantimerstart;
293                                                                         if ($elapsed) {
294                                                                                 my $speed=int($numresults/$elapsed*100)/100;
295                                                                                 print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n";
296                                                                         }
297                                                                         my $q_result=$dbi->quote($result);
298                                                                         ($q_result) || ($q_result='""');
299                                                                         $now=time();
300                                                                         if ($numresults >0) {
301                                                                                 my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results=?,enddate=? where id=?");
302                                                                                 $stj->execute($numresults,$numrecords,$q_result,$now,$resultsid);
303                                                                         } else { # no results...
304                                                                                 my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
305                                                                                 $stj->execute($numresults,$numrecords,$now,$resultsid);
306                                                                         }
307                                                                 }
308                                                                 $stj=$dbi->prepare("update z3950results set active=-1 where id=?");
309                                                                 $stj->execute($resultsid);
310                                                                 eval {my $stj->finish};
311                                                         }
312 #OK, the search is done inactivate it..
313                                                         print "$$/$id : $server search done.\n";
314                                                         exit;
315                                                 }
316                                         }
317                                 } else {
318 # $forkcounter >=12
319                                 }
320                                 # delete z3950queue entry, as everything is done
321                                 my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?");
322                                 $now=time();
323                                 $sti->execute($now,$id);
324                         }
325                         $lastrun=time();
326                 }
327                 sleep 10;
328         }
329 }
330
331 sub reap {
332     $forkcounter--;
333     wait;
334 }
335
336
337 sub checkqueue {
338     $checkqueue=1;
339 }
340
341
342 sub pe {
343         return 0;
344 }