Added POD.
[koha.git] / acqui.simple / processz3950queue
1 #!/usr/bin/perl
2 use C4::Database;
3 use DBI;
4 #use strict;
5 use C4::Acquisitions;
6 use C4::Biblio;
7 use C4::Output;
8 use Net::Z3950;
9
10
11 if ($< == 0) {
12     # Running as root, switch privs 
13     if (-d "/var/run") {
14         open PID, ">/var/run/processz3950queue.pid";
15         print PID $$."\n";
16         close PID;
17     }
18     # Get real apacheuser from koha.conf or reparsing httpd.conf
19     my $apacheuser='www-data';  
20     my $uid=0;
21     unless ($uid = (getpwnam($apacheuser))[2]) { 
22         die "Attempt to run daemon as non-existent or superuser\n";
23     }
24     $>=$uid;
25     $<=$uid;
26 }
27     
28
29 my $dbh=C4Connect;
30
31 my $sth=$dbh->prepare("update z3950results set active=0");
32 $sth->execute;
33 $sth->finish;
34 $SIG{CHLD}='reap';
35 $SIG{HUP}='checkqueue';
36
37
38 my $logdir=$ARGV[0];
39
40 open PID, ">$logdir/processz3950queue.pid";
41 print PID $$."\n";
42 close PID;
43
44 my $reapcounter=0;
45 my $forkcounter=0;
46 my $checkqueue=1;
47 my $pid=$$;
48 my $lastrun=0;
49 while (1) {
50     if ((time-$lastrun)>5) {
51         if ($checkqueue) {
52             $checkqueue=0;
53             my $sth=$dbh->prepare("select id,term,type,servers from z3950queue order by id");
54             $sth->execute;
55             while (my ($id, $term, $type, $servers) = $sth->fetchrow) {
56                 if ($forkcounter<12) {
57                     my $now=time();
58                     $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=$id");
59                     ($stk->execute) || (next);
60                     my %serverdone;
61                     unless ($stk->rows) {
62                         my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=$now where id=$id");
63                         $sti->execute;
64                     }
65                     while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
66                         if ($r_enddate >0) {
67                             $serverdone{$r_server}=1;
68                         } elsif ($active) {
69                             $serverdone{$r_server}=1;
70                         } else {
71                             $serverdone{$r_server}=-1;
72                         }
73                     }
74
75                     $stk->finish;
76                     my $attr='';
77                     if ($type eq 'isbn') {
78                         $attr='1=7';
79                     } elsif ($type eq 'title') {
80                         $attr='1=4';
81                     } elsif ($type eq 'author') {
82                         $attr='1=1003';
83                     } elsif ($type eq 'lccn') {
84                         $attr='1=9';
85                     } elsif ($type eq 'keyword') {
86                         $attr='1=1016';
87                     }
88                     $term='"'.$term.'"';
89                     $query="\@attr $attr $term";
90                     my $totalrecords=0;
91                     my $serverinfo;
92                     my $stillprocessing=0;
93                     foreach $serverinfo (split(/\s+/, $servers)) {
94                         (next) if ($serverdone{$serverinfo} == 1);
95                         my $stillprocessing=1;
96                         if (my $pid=fork()) {
97                             $forkcounter++;
98                         } else {
99                             #$sth->finish;
100                             #$sti->finish;
101                             #$dbh->disconnect;
102                             my $dbi=C4Connect;
103                             my ($name, $server, $database, $user, $password) = split(/\//, $serverinfo, 5);
104                             $server=~/(.*)\:(\d+)/;
105                             my $servername=$1;
106                             my $port=$2;
107                             print "Processing $type=$term at $name $server $database (".($forkcounter+1)." forks)\n";
108                             $now=time();
109                             my $q_serverinfo=$dbi->quote($serverinfo);
110                             my $resultsid;
111                             if ($serverdone{$serverinfo}==-1) {
112                                 my $stj=$dbi->prepare("select id from z3950results where server=$q_serverinfo and queryid=$id");
113                                 $stj->execute;
114                                 ($resultsid) = $stj->fetchrow;
115                             } else {
116                                 my $stj=$dbi->prepare("select id from z3950results where server=$q_serverinfo and queryid=$id");
117                                 $stj->execute;
118                                 ($resultsid) = $stj->fetchrow;
119                                 unless ($resultsid) {
120                                     my $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, $now)");
121                                     $stj->execute;
122                                     $resultsid=$dbi->{'mysql_insertid'};
123                                 }
124                             }
125                             my $stj=$dbh->prepare("update z3950results set active=1 where id=$resultsid");
126                             $stj->execute;
127                             my $conn;
128                             my $noconnection=0;
129                             my $error=0;
130                             if ($user) {
131                                 eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password); };
132                                 if ($@) {
133                                     $noconnection=1;
134                                 } else {
135                                     $error=pe();
136                                 }
137                             } else {
138                                 eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database); };
139                                 if ($@) {
140                                     $noconnection=1;
141                                 } else {
142                                     $error=pe();
143                                 }
144                             }
145                             if ($noconnection || $error) {
146                             } else {
147                                 print "Q: $query\n";
148                                 my $rs=$conn->search($query);
149                                 pe();
150                                 eval { $rs->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);};
151                                 if ($@) {
152                                     print "ERROR: $@\n";
153                                 } else {
154                                     pe();
155                                     my $numresults=$rs->size();
156                                     pe();
157                                     my $i;
158                                     my $result='';
159                                     my $scantimerstart=time();
160                                     for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) {
161                                         my $rec=$rs->record($i);
162                                         my $marcdata=$rec->rawdata();
163                                         $result.=$marcdata;
164                                     }
165                                     my $scantimerend=time();
166                                     my $numrecords;
167                                     ($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80);
168                                     my $elapsed=$scantimerend-$scantimerstart;
169                                     if ($elapsed) {
170                                         my $speed=int($numresults/$elapsed*100)/100;
171                                         print "  SPEED: $speed  $server done $numrecords\n";
172                                     }
173
174                                     my $q_result=$dbi->quote($result);
175                                     ($q_result) || ($q_result='""');
176                                     $now=time();
177                                     my $task="update z3950results set numrecords=$numresults,numdownloaded=$numrecords,highestseen=0,results=$q_result,enddate=$now where id=$resultsid";
178                                     my $stj=$dbi->prepare($task);
179                                     $stj->execute;
180                                     my $counter=0;
181                                     while ($counter<60 && $numrecords<$numresults) {
182                                         $counter++;
183                                         my $stj=$dbi->prepare("select highestseen from z3950results where id=$resultsid");
184                                         $stj->execute;
185                                         my ($highestseen) = $stj->fetchrow;
186                                         if ($highestseen>($numrecords-30)) {
187                                             $counter=0;
188                                             print "   $server rescanning\n";
189                                             my $scantimerstart=time();
190                                             for ($i=$numrecords+1; $i<=(($numresults<($numrecords+40)) ? ($numresults) : ($numrecords+40)); $i++) {
191                                                 my $rec=$rs->record($i);
192                                                 my $marcdata=$rec->rawdata();
193                                                 $result.=$marcdata;
194                                             }
195                                             my $scantimerend=time();
196                                             ($numresults<$numrecords+40) ? ($numrecords=$numresults) : ($numrecords=$numrecords+40);
197                                             my $elapsed=$scantimerend-$scantimerstart;
198                                             if ($elapsed) {
199                                                 my $speed=int($numresults/$elapsed*100)/100;
200                                                 print "  SPEED: $speed  $server done $numrecords\n";
201                                             }
202
203                                             my $q_result=$dbi->quote($result);
204                                             ($q_result) || ($q_result='""');
205                                             $now=time();
206                                             my $task="update z3950results set numdownloaded=$numrecords,results=$q_result where id=$resultsid";
207                                             my $stj=$dbi->prepare($task);
208                                             $stj->execute;
209                                         }
210                                         sleep 5;
211                                     }
212                                 }
213                             }
214                             my $stj=$dbi->prepare("update z3950results set active=0 where id=$resultsid");
215                             $stj->execute;
216                             eval {$stj->finish};
217                             $dbi->disconnect;
218                             print "    $server done.\n";
219                             exit;
220                             sub pe {
221                                 return 0;
222                                 my $code=$conn->errcode();
223                                 my $msg=$conn->errmsg();
224                                 my $ai=$conn->addinfo();
225                                 print << "EOF";
226 CODE:  $code
227 MSG:   $msg
228 ADDTL: $ai
229
230 EOF
231                                 return 0;
232                             }
233                         }
234                     } unless ($stillprocessing) {
235                         #my $sti=$dbh->prepare("select enddate from z3950queue where id=$id");
236                         #$sti->execute;
237                         #my ($enddate) = $sti->fetchrow;
238                         #unless ($enddate) {
239                     }
240                 } else {
241                 }
242             }
243             $lastrun=time();
244         }
245         sleep 10;
246     }
247 }
248
249 sub getrecord {
250     my $server=shift;
251     my $base=shift;
252     my $query=shift;
253     my $auth=shift;
254     my $id=shift;
255     open  (M, "|yaz-client -m yaz-$id.mrc >>yaz.out 2>>yaz.err");
256     select M;
257     $|=1;
258     select STDOUT;
259     ($auth) && ($auth="authentication $auth\n");
260     print M << "EOF";
261 $auth\open $server
262 base $base
263 setnames
264 $query
265 s
266 s
267 s
268 s
269 s
270 s
271 s
272 s
273 s
274 s
275 quit
276 EOF
277     close M;
278 }
279 sub reap {
280     $forkcounter--;
281     wait;
282 }
283
284
285 sub checkqueue {
286     $checkqueue=1;
287 }
288
289