Handle the iso8859-1 charset somewhat, so that when the po file is in
[koha.git] / z3950 / processz3950queue
1 #!/usr/bin/perl
2
3 # $Id$
4
5 use C4::Context;
6 use DBI;
7 use strict;
8 use C4::Biblio;
9 use C4::Output;
10 use C4::Breeding;
11 use Net::Z3950;
12
13 =head1 NAME
14
15 processz3950queue. The script that does z3950 searches.
16
17 =head1 SYNOPSIS
18
19 This script can be used on a console (as normal user) or by the daemon-launch script.
20
21 Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode.
22
23 It :
24 1- extracts z3950 requests from z3950queue table,
25 2- creates entries in z3950results to store the result
26 3- launch z3950 queries in asynchronous mode, using Unix fork()
27 4- store results in marc_breeding table.
28
29 The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor).
30
31 =head1 DESCRIPTION
32
33 =head2 table z3950servers
34
35 This table stores the differents z3950 servers.
36 A server is used if checked=1. The rank is NOT used as searches are now asynchronous.
37
38 =head2 table z3950queue
39
40 Table use to manage queries. A single line is created  in this table for each z3950 search request.
41 If more than 1 server is called, the C<servers> field containt all of them separated by |.
42
43 z3950 search requests are done by z3950/search.pl script.
44 At this stage, the fields are created with C<startdate> and C<done> empty
45
46 Then, the processz3950queue finds this entry and :
47 1- store date (time()) in C<startdate>
48 2- set C<done> = -1
49
50 when the requests are all sent :
51 2- set C<done> = 1
52 3- set C<enddate> (FIXME: always equal to startdate for me)
53
54 entries are deleted when :
55 - C<startdate> is more than 1 day ago.
56
57 FIXME:
58 - results, numrecords fields are unused
59
60 =head2 table z3950results
61
62 1 entry is created for each request, for each server called.
63 when created :
64 * C<startdate> is filled
65 * C<enddate> is null
66 * active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way.
67
68 When a search is ended, C<enddate> is set, and C<active> is set to -1
69
70 =head1 How it's written
71
72 on every loop :
73 * delete old queries
74 * for each entry in z3950queue table that is not done=1 {
75         for each search request {
76                 for each server {
77                         try to connect
78                         look for results
79                         *      results can be :
80                         - existing and already running on another process (active=1)
81                         - existing & finished (active=-1)
82                         - non existent => create it and run the request.
83                 }
84         }
85 }
86 =over 2
87
88 =cut
89
90
91 if ($< == 0) {
92     # Running as root, switch privs
93     if (-d "/var/run") {
94         open PID, ">/var/run/processz3950queue.pid";
95         print PID $$."\n";
96         close PID;
97     }
98     # Get real apacheuser from koha.conf or reparsing httpd.conf
99     my $apacheuser=C4::Context->config("httpduser");
100     my $uid=0;
101     unless ($uid = (getpwnam($apacheuser))[2]) {
102         die "Attempt to run daemon as non-existent or superuser\n";
103     }
104     $>=$uid;
105     $<=$uid;
106 }
107 my $dbh = C4::Context->dbh;
108
109 # we begin the script, so "unactive" every pending request : they will never give anything, the script died :-(
110 my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1");
111 $sth->execute;
112 $sth->finish;
113 $SIG{CHLD}='reap';
114 $SIG{HUP}='checkqueue';
115
116
117 my $logdir=$ARGV[0];
118
119 open PID, ">$logdir/processz3950queue.pid";
120 print PID $$."\n";
121 close PID;
122
123 my $reapcounter=0;
124 my $forkcounter=0;
125 my $checkqueue=1;
126 my $pid=$$;
127 my $lastrun=0;
128 while (1) {
129         if ((time-$lastrun)>5) {
130                 print "starting loop\n";
131                 $checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
132 # clean DB
133                 my $now = time();
134                 # delete z3950queue entries that are more than 1 day old
135                 my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400");
136                 $sth->execute($now);
137                 # delete z3950results queries that are more than 1 hour old
138                 $sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600");
139                 $sth->execute($now);
140                 if ($checkqueue) { # everytime a SIG{HUP} is recieved
141                         $checkqueue=0;
142 # parse every entry in QUEUE
143                         $sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id");
144                         $sth->execute;
145                         while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
146 # FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost !
147                                 if ($forkcounter<12) {
148                                         my $now=time();
149 # search for results entries for this request
150                                         my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
151                                         ($stk->execute($id)) || (next);
152                                         my %serverdone;
153 # if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries
154                                         unless ($stk->rows) {
155                                                 my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
156                                                 $sti->execute($now,$id);
157                                         }
158 # check which servers calls have already been created (before a crash)
159                                         while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
160                                                 if ($r_enddate >0) { # result entry exist & finished
161                                                         $serverdone{$r_server}=1;
162                                                 } elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1)
163                                                         $serverdone{$r_server}=1;
164                                                 } else { # otherwise
165                                                         $serverdone{$r_server}=-1;
166                                                 }
167                                                 # note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !)
168                                         }
169                                         $stk->finish;
170                                         foreach my $serverinfo (split(/\|/, $servers)) {
171                                                 (next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1)
172                                                 my $totalrecords=0;
173                                                 my $globalname;
174                                                 my $globalsyntax;
175                                                 my $globalencoding;
176 # fork a process for this z3950 query
177                                                 if (my $pid=fork()) {
178                                                         $forkcounter++;
179                                                 } else {
180 # and connect to z3950 server
181 #FIXME: why do we need $dbi ? can't we use $dbh ?
182                                                         my $dbi = C4::Context->dbh;
183                                                         my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
184                                                         $globalname=$name;
185                                                         $globalsyntax = $syntax;
186                                                         $server=~/(.*)\:(\d+)/;
187                                                         my $servername=$1;
188                                                         my $port=$2;
189                                                         my $attr='';
190                                                         if ($type eq 'isbn') {
191                                                                 $attr='1=7';
192                                                         } elsif ($type eq 'title') {
193                                                                 $attr='1=4';
194                                                         } elsif ($type eq 'author') {
195                                                                 $attr='1=1003';
196                                                         } elsif ($type eq 'lccn') {
197                                                                 $attr='1=9';
198                                                         } elsif ($type eq 'keyword') {
199                                                                 $attr='1=1016';
200                                                         }
201                                                         my $query="\@attr $attr \"$term\"";
202                                                         print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
203 # try to connect
204                                                         my $conn;
205                                                         my $noconnection=0;
206                                                         my $error=0;
207 # the z3950 query is builded. Launch it.
208                                                         if ($user) {
209                                                                 $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password) || ($noconnection=1);
210                                                         } else {
211                                                                 $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database) || ($noconnection=1);
212                                                         }
213                                                         if ($noconnection || $error) {
214 # if connection impossible, don't go further !
215                                                                 print "$$/$id : no connection at $globalname\n";
216                                                                 my $result = MARC::Record->new();
217                                                                 my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random);
218                                                         } else {
219 # else, build z3950 query and do it !
220                                                                 $now=time();
221                                                                 my $resultsid="";
222         # create z3950results entries.
223                                                                 if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it
224                                                                         my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
225                                                                         $stj->execute($serverinfo,$id);
226                                                                         ($resultsid) = $stj->fetchrow;
227                                                                         $stj->finish;
228                                                                         print "$$/$id : 1 >> $resultsid\n";
229                                                                 } else { # else create it : (may be serverdone=1 or 0)
230                                                                         my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
231                                                                         $stj->execute($serverinfo,$id);
232                                                                         ($resultsid) = $stj->fetchrow;
233                                                                         $stj->finish;
234                                                                         print "$$/$id : 2 >> $resultsid\n";
235                                                                         unless ($resultsid) {
236                                                                                 $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
237                                                                                 $stj->execute($serverinfo, $id, $now);
238                                                                                 $resultsid=$dbi->{'mysql_insertid'};
239                                                                                 $stj->finish;
240                                                                                 print "$$/$id : creating and ";
241                                                                         }
242                                                                 }
243                                                                 print "$$/$id : working on results entry $resultsid\n";
244         # set active to 1 => this request is on the way.
245                                                                 my $stj=$dbi->prepare("update z3950results set active=1 where id=?");
246                                                                 $stj->execute($resultsid);
247 #######
248                                                                 print "$$/$id : connected to $globalname\n";
249                                                                 eval {$conn->option(elementSetName => 'F')};
250                                                                 eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);} if ($globalsyntax eq "USMARC");
251                                                                 eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::UNIMARC);} if ($globalsyntax eq "UNIMARC");
252                                                                 if ($@) {
253                                                                         print "$$/$id : $globalname ERROR: $@ for $resultsid\n";
254         # in case pb during connexion, set result to "empty" to avoid everlasting loops
255                                                                         my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
256                                                                         $stj->execute(0,0,$now,$resultsid);
257                                                                 } else {
258                                                                         my $rs=$conn->search($query);
259                                                                         pe();
260         # we have an answer for a query => get results & store them in marc_breeding table
261                                                                         my $numresults=$rs->size();
262                                                                         if ($numresults eq 0) {
263                                                                                 print "$$/$id : $globalname : no records found\n";
264                                                                         } else {
265                                                                                 print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n";
266                                                                         }
267                                                                         pe();
268                                                                         my $i;
269                                                                         my $result='';
270                                                                         my $scantimerstart=time();
271                                                                         for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) {
272                                                                                 my $rec=$rs->record($i);
273                                                                                 my $marcdata;
274                                                                                 # use render() or rawdata() depending on the type of the returned record
275                                                                                 my $marcrecord;
276                                                                                 if (ref($rec) eq "Net::Z3950::Record::USMARC") {
277                                                                                         $marcdata = $rec->rawdata();
278                                                                                         $marcrecord = MARC::File::USMARC::decode($rec->rawdata())
279                                                                                 }
280                                                                                 if (ref($rec) eq "Net::Z3950::Record::UNIMARC") {
281                                                                                         $marcdata = $rec->render();
282                                                                                         $marcrecord = MARC::File::USMARC::decode($rec->render())
283                                                                                 }
284                                                                                 $globalencoding = ref($rec);
285                                                                                 $result.=$marcdata;
286                                                                         }
287                                                                         my @x=split /::/,$globalencoding;
288                                                                         my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"Z3950-$globalname",$x[3],$random);
289                                                                         my $scantimerend=time();
290                                                                         my $numrecords;
291                                                                         ($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80);
292                                                                         my $elapsed=$scantimerend-$scantimerstart;
293                                                                         if ($elapsed) {
294                                                                                 my $speed=int($numresults/$elapsed*100)/100;
295                                                                                 print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n";
296                                                                         }
297                                                                         my $q_result=$dbi->quote($result);
298                                                                         ($q_result) || ($q_result='""');
299                                                                         $now=time();
300                                                                         if ($numresults >0) {
301                                                                                 my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results=?,enddate=? where id=?");
302                                                                                 $stj->execute($numresults,$numrecords,$q_result,$now,$resultsid);
303                                                                         } else { # no results...
304                                                                                 my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
305                                                                                 $stj->execute($numresults,$numrecords,$now,$resultsid);
306                                                                         }
307                                                                 }
308                                                                 $stj=$dbi->prepare("update z3950results set active=-1 where id=?");
309                                                                 $stj->execute($resultsid);
310                                                                 eval {my $stj->finish};
311                                                         }
312 #OK, the search is done inactivate it..
313                                                         print "$$/$id : $server search done.\n";
314                                                         exit;
315                                                 }
316                                         }
317                                 } else {
318 # $forkcounter >=12
319                                 }
320                                 # delete z3950queue entry, as everything is done
321                                 my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?");
322                                 $now=time();
323                                 $sti->execute($now,$id);
324                         }
325                         $lastrun=time();
326                 }
327                 sleep 10;
328         }
329 }
330
331 sub reap {
332     $forkcounter--;
333     wait;
334 }
335
336
337 sub checkqueue {
338     $checkqueue=1;
339 }
340
341
342 sub pe {
343         return 0;
344 }