#!/usr/bin/perl # $Id$ use MARC::Record; use C4::Context; use DBI; use strict; use C4::Biblio; use C4::Output; use C4::Breeding; use ZOOM; =head1 NAME processz3950queue. The script that does z3950 searches. =head1 SYNOPSIS This script can be used on a console (as normal user) or by the daemon-launch script. Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode. It : 1- extracts z3950 requests from z3950queue table, 2- creates entries in z3950results to store the result 3- launch z3950 queries in asynchronous mode, using Unix fork() 4- store results in marc_breeding table. The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor). =head1 DESCRIPTION =head2 table z3950servers This table stores the differents z3950 servers. A server is used if checked=1. The rank is NOT used as searches are now asynchronous. =head2 table z3950queue Table use to manage queries. A single line is created in this table for each z3950 search request. If more than 1 server is called, the C field containt all of them separated by |. z3950 search requests are done by z3950/search.pl script. At this stage, the fields are created with C and C empty Then, the processz3950queue finds this entry and : 1- store date (time()) in C 2- set C = -1 when the requests are all sent : 2- set C = 1 3- set C (FIXME: always equal to startdate for me) entries are deleted when : - C is more than 1 day ago. FIXME: - results, numrecords fields are unused =head2 table z3950results 1 entry is created for each request, for each server called. when created : * C is filled * C is null * active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way. When a search is ended, C is set, and C is set to -1 =head1 How it's written on every loop : * delete old queries * for each entry in z3950queue table that is not done=1 { for each search request { for each server { try to connect look for results * results can be : - existing and already running on another process (active=1) - existing & finished (active=-1) - non existent => create it and run the request. } } } =over 2 =cut if ($< == 0) { # Running as root, switch privs if (-d "/var/run") { open PID, ">/var/run/processz3950queue.pid"; print PID $$."\n"; close PID; } # Get real apacheuser from koha.conf or reparsing httpd.conf my $apacheuser=C4::Context->config("httpduser"); my $uid=0; unless ($uid = (getpwnam($apacheuser))[2]) { die "Attempt to run daemon as non-existent or superuser\n"; } $>=$uid; $<=$uid; } my $db_driver = C4::Context->config("db_scheme") || "mysql"; my $db_name = C4::Context->config("database"); my $db_host = C4::Context->config("hostname"); my $db_user = C4::Context->config("user"); my $db_passwd = C4::Context->config("pass"); my $dbh = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd); # we begin the script, so "unactive" every pending request : they will never give anything, the script died :-( my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1"); $sth->execute; $sth->finish; $SIG{CHLD}='reap'; $SIG{HUP}='checkqueue'; my $logdir=$ARGV[0]; open PID, ">$logdir/processz3950queue.pid"; print PID $$."\n"; close PID; my $reapcounter=0; my $forkcounter=0; my $checkqueue=1; my $pid=$$; my $lastrun=0; while (1) { if ((time-$lastrun)>5) { $checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" ! # clean DB my $now = time(); # delete z3950queue entries that are more than 1 day old my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400"); $sth->execute($now); # delete z3950results queries that are more than 1 hour old $sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600"); $sth->execute($now); if ($checkqueue) { # everytime a SIG{HUP} is recieved $checkqueue=0; # parse every entry in QUEUE $sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id"); $sth->execute; while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) { # FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost ! if ($forkcounter<12) { my $now=time(); # search for results entries for this request my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?"); ($stk->execute($id)) || (next); my %serverdone; # if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries unless ($stk->rows) { my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?"); $sti->execute($now,$id); } # check which servers calls have already been created (before a crash) while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) { if ($r_enddate >0) { # result entry exist & finished $serverdone{$r_server}=1; } elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1) $serverdone{$r_server}=1; } else { # otherwise $serverdone{$r_server}=-1; } # note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !) } $stk->finish; foreach my $serverinfo (split(/\|/, $servers)) { (next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1) my $totalrecords=0; my $globalname; my $globalsyntax; my $globalencoding; # fork a process for this z3950 query if (my $pid=fork()) { $forkcounter++; } else { # and connect to z3950 server #FIXME: why do we need $dbi ? can't we use $dbh ? my $db_driver = C4::Context->config("db_scheme") || "mysql"; my $db_name = C4::Context->config("database"); my $db_host = C4::Context->config("hostname"); my $db_user = C4::Context->config("user"); my $db_passwd = C4::Context->config("pass"); my $dbi = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd); $dbh->{"InactiveDestroy"} = "true"; my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6); $globalname=$name; $globalsyntax = $syntax; $server=~/(.*)\:(\d+)/; my $servername=$1; my $port=$2; my $attr=''; if ($type eq 'isbn') { $attr='1=7'; } elsif ($type eq 'title') { $attr='1=4'; } elsif ($type eq 'author') { $attr='1=1003'; } elsif ($type eq 'lccn') { $attr='1=9'; } elsif ($type eq 'keyword') { $attr='1=1016'; } my $query = "\@attr $attr \"$term\""; print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n"; # try to connect my $conn; my $noconnection=0; my $error=0; # the z3950 query is builded. Launch it. if ($user) { $conn= new ZOOM::Connection($servername, $port, databaseName => $database, user => $user, password => $password) || ($noconnection=1); } else { $conn= new ZOOM::Connection($servername, $port, databaseName => $database) || ($noconnection=1); } if ($noconnection || $error) { # if connection impossible, don't go further ! print "$$/$id : no connection at $globalname\n"; my $result = MARC::Record->new(); my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random); } else { # else, build z3950 query and do it ! $now=time(); my $resultsid=""; # create z3950results entries. if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?"); $stj->execute($serverinfo,$id); ($resultsid) = $stj->fetchrow; $stj->finish; print "$$/$id : 1 >> $resultsid\n"; } else { # else create it : (may be serverdone=1 or 0) my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?"); $stj->execute($serverinfo,$id); ($resultsid) = $stj->fetchrow; $stj->finish; print "$$/$id : 2 >> $resultsid\n"; unless ($resultsid) { $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)"); $stj->execute($serverinfo, $id, $now); $resultsid=$dbi->{'mysql_insertid'}; $stj->finish; print "$$/$id : creating and "; } } print "$$/$id : working on results entry $resultsid\n"; # set active to 1 => this request is on the way. my $stj=$dbi->prepare("update z3950results set active=1 where id=?"); $stj->execute($resultsid); ####### print "$$/$id : connected to $globalname\n"; print "Global Syntax =>".$globalsyntax."<="; eval {$conn->option(elementSetName => 'F')}; eval { $conn->option(preferredRecordSyntax => 'USMARC');} if ($globalsyntax eq "USMARC"); eval { $conn->option(preferredRecordSyntax => 'UNIMARC');} if ($globalsyntax eq "UNIMARC"); if ($@) { print "$$/$id : $globalname ERROR: $@ for $resultsid\n"; # in case pb during connexion, set result to "empty" to avoid everlasting loops my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?"); $stj->execute(0,0,$now,$resultsid); } else { my $rs=$conn->search_pqf($query) or warn "Connection Problem:".$conn->errmsg(); pe(); # we have an answer for a query => get results & store them in marc_breeding table my $numresults=$rs->size(); if ($numresults eq 0) { print "$$/$id : $globalname : no records found\n"; } else { print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n"; } pe(); my $i; my $result=''; my $scantimerstart=time(); for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) { my $rec=$rs->record($i-1); my $marcdata = $rec->raw(); #my $marcrecord = MARC::Record->new_from_usmarc($marcdata); #warn $marcrecord->as_formatted(); $globalencoding = ref($rec); $result.=$marcdata; } my @x=split /::/,$globalencoding; my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"Z3950-$globalname",$x[3],$random); my $scantimerend=time(); my $numrecords; ($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80); my $elapsed=$scantimerend-$scantimerstart; if ($elapsed) { my $speed=int($numresults/$elapsed*100)/100; print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n"; } my $q_result=$dbi->quote($result); ($q_result) || ($q_result='""'); $now=time(); if ($numresults >0) { my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results=?,enddate=? where id=?"); $stj->execute($numresults,$numrecords,$q_result,$now,$resultsid); } else { # no results... my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?"); $stj->execute($numresults,$numrecords,$now,$resultsid); } } $stj=$dbi->prepare("update z3950results set active=-1 where id=?"); $stj->execute($resultsid); eval {my $stj->finish}; } #OK, the search is done inactivate it.. print "$$/$id : $server search done.\n"; exit; } } } else { # $forkcounter >=12 } # delete z3950queue entry, as everything is done my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?"); $now=time(); $sti->execute($now,$id); } $lastrun=time(); } sleep 10; } } sub reap { $forkcounter--; wait; } sub checkqueue { $checkqueue=1; } sub pe { return 0; }