347 lines
12 KiB
Perl
Executable file
347 lines
12 KiB
Perl
Executable file
#!/usr/bin/perl
|
|
|
|
# $Id$
|
|
use MARC::Record;
|
|
use C4::Context;
|
|
use DBI;
|
|
use strict;
|
|
use C4::Biblio;
|
|
use C4::Output;
|
|
use C4::Breeding;
|
|
use ZOOM;
|
|
|
|
=head1 NAME
|
|
|
|
processz3950queue. The script that does z3950 searches.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
This script can be used on a console (as normal user) or by the daemon-launch script.
|
|
|
|
Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode.
|
|
|
|
It :
|
|
1- extracts z3950 requests from z3950queue table,
|
|
2- creates entries in z3950results to store the result
|
|
3- launch z3950 queries in asynchronous mode, using Unix fork()
|
|
4- store results in marc_breeding table.
|
|
|
|
The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor).
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
=head2 table z3950servers
|
|
|
|
This table stores the differents z3950 servers.
|
|
A server is used if checked=1. The rank is NOT used as searches are now asynchronous.
|
|
|
|
=head2 table z3950queue
|
|
|
|
Table use to manage queries. A single line is created in this table for each z3950 search request.
|
|
If more than 1 server is called, the C<servers> field containt all of them separated by |.
|
|
|
|
z3950 search requests are done by z3950/search.pl script.
|
|
At this stage, the fields are created with C<startdate> and C<done> empty
|
|
|
|
Then, the processz3950queue finds this entry and :
|
|
1- store date (time()) in C<startdate>
|
|
2- set C<done> = -1
|
|
|
|
when the requests are all sent :
|
|
2- set C<done> = 1
|
|
3- set C<enddate> (FIXME: always equal to startdate for me)
|
|
|
|
entries are deleted when :
|
|
- C<startdate> is more than 1 day ago.
|
|
|
|
FIXME:
|
|
- results, numrecords fields are unused
|
|
|
|
=head2 table z3950results
|
|
|
|
1 entry is created for each request, for each server called.
|
|
when created :
|
|
* C<startdate> is filled
|
|
* C<enddate> is null
|
|
* active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way.
|
|
|
|
When a search is ended, C<enddate> is set, and C<active> is set to -1
|
|
|
|
=head1 How it's written
|
|
|
|
on every loop :
|
|
* delete old queries
|
|
* for each entry in z3950queue table that is not done=1 {
|
|
for each search request {
|
|
for each server {
|
|
try to connect
|
|
look for results
|
|
* results can be :
|
|
- existing and already running on another process (active=1)
|
|
- existing & finished (active=-1)
|
|
- non existent => create it and run the request.
|
|
}
|
|
}
|
|
}
|
|
=over 2
|
|
|
|
=cut
|
|
|
|
|
|
if ($< == 0) {
|
|
# Running as root, switch privs
|
|
if (-d "/var/run") {
|
|
open PID, ">/var/run/processz3950queue.pid";
|
|
print PID $$."\n";
|
|
close PID;
|
|
}
|
|
# Get real apacheuser from koha.conf or reparsing httpd.conf
|
|
my $apacheuser=C4::Context->config("httpduser");
|
|
my $uid=0;
|
|
unless ($uid = (getpwnam($apacheuser))[2]) {
|
|
die "Attempt to run daemon as non-existent or superuser\n";
|
|
}
|
|
$>=$uid;
|
|
$<=$uid;
|
|
}
|
|
my $db_driver = C4::Context->config("db_scheme") || "mysql";
|
|
my $db_name = C4::Context->config("database");
|
|
my $db_host = C4::Context->config("hostname");
|
|
my $db_user = C4::Context->config("user");
|
|
my $db_passwd = C4::Context->config("pass");
|
|
my $dbh = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
|
|
|
|
# we begin the script, so "unactive" every pending request : they will never give anything, the script died :-(
|
|
my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1");
|
|
$sth->execute;
|
|
$sth->finish;
|
|
$SIG{CHLD}='reap';
|
|
$SIG{HUP}='checkqueue';
|
|
|
|
|
|
my $logdir=$ARGV[0];
|
|
|
|
open PID, ">$logdir/processz3950queue.pid";
|
|
print PID $$."\n";
|
|
close PID;
|
|
|
|
my $reapcounter=0;
|
|
my $forkcounter=0;
|
|
my $checkqueue=1;
|
|
my $pid=$$;
|
|
my $lastrun=0;
|
|
while (1) {
|
|
if ((time-$lastrun)>5) {
|
|
$checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
|
|
# clean DB
|
|
my $now = time();
|
|
# delete z3950queue entries that are more than 1 day old
|
|
my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400");
|
|
$sth->execute($now);
|
|
# delete z3950results queries that are more than 1 hour old
|
|
$sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600");
|
|
$sth->execute($now);
|
|
if ($checkqueue) { # everytime a SIG{HUP} is recieved
|
|
$checkqueue=0;
|
|
# parse every entry in QUEUE
|
|
$sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id");
|
|
$sth->execute;
|
|
while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
|
|
# FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost !
|
|
if ($forkcounter<12) {
|
|
my $now=time();
|
|
# search for results entries for this request
|
|
my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
|
|
($stk->execute($id)) || (next);
|
|
my %serverdone;
|
|
# if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries
|
|
unless ($stk->rows) {
|
|
my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
|
|
$sti->execute($now,$id);
|
|
}
|
|
# check which servers calls have already been created (before a crash)
|
|
while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
|
|
if ($r_enddate >0) { # result entry exist & finished
|
|
$serverdone{$r_server}=1;
|
|
} elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1)
|
|
$serverdone{$r_server}=1;
|
|
} else { # otherwise
|
|
$serverdone{$r_server}=-1;
|
|
}
|
|
# note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !)
|
|
}
|
|
$stk->finish;
|
|
foreach my $serverinfo (split(/\|/, $servers)) {
|
|
(next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1)
|
|
my $totalrecords=0;
|
|
my $globalname;
|
|
my $globalsyntax;
|
|
my $globalencoding;
|
|
# fork a process for this z3950 query
|
|
if (my $pid=fork()) {
|
|
$forkcounter++;
|
|
} else {
|
|
# and connect to z3950 server
|
|
#FIXME: why do we need $dbi ? can't we use $dbh ?
|
|
my $db_driver = C4::Context->config("db_scheme") || "mysql";
|
|
my $db_name = C4::Context->config("database");
|
|
my $db_host = C4::Context->config("hostname");
|
|
my $db_user = C4::Context->config("user");
|
|
my $db_passwd = C4::Context->config("pass");
|
|
my $dbi = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
|
|
$dbh->{"InactiveDestroy"} = "true";
|
|
my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
|
|
$globalname=$name;
|
|
$globalsyntax = $syntax;
|
|
$server=~/(.*)\:(\d+)/;
|
|
my $servername=$1;
|
|
my $port=$2;
|
|
my $attr='';
|
|
if ($type eq 'isbn') {
|
|
$attr='1=7';
|
|
} elsif ($type eq 'title') {
|
|
$attr='1=4';
|
|
} elsif ($type eq 'author') {
|
|
$attr='1=1003';
|
|
} elsif ($type eq 'lccn') {
|
|
$attr='1=9';
|
|
} elsif ($type eq 'keyword') {
|
|
$attr='1=1016';
|
|
}
|
|
my $query = "\@attr $attr \"$term\"";
|
|
print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
|
|
# try to connect
|
|
my $conn;
|
|
my $noconnection=0;
|
|
my $error=0;
|
|
# the z3950 query is builded. Launch it.
|
|
if ($user) {
|
|
$conn= new ZOOM::Connection($servername, $port, databaseName => $database, user => $user, password => $password) || ($noconnection=1);
|
|
} else {
|
|
$conn= new ZOOM::Connection($servername, $port, databaseName => $database) || ($noconnection=1);
|
|
}
|
|
if ($noconnection || $error) {
|
|
# if connection impossible, don't go further !
|
|
print "$$/$id : no connection at $globalname\n";
|
|
my $result = MARC::Record->new();
|
|
my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random);
|
|
} else {
|
|
# else, build z3950 query and do it !
|
|
$now=time();
|
|
my $resultsid="";
|
|
# create z3950results entries.
|
|
if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it
|
|
my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
|
|
$stj->execute($serverinfo,$id);
|
|
($resultsid) = $stj->fetchrow;
|
|
$stj->finish;
|
|
print "$$/$id : 1 >> $resultsid\n";
|
|
} else { # else create it : (may be serverdone=1 or 0)
|
|
my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
|
|
$stj->execute($serverinfo,$id);
|
|
($resultsid) = $stj->fetchrow;
|
|
$stj->finish;
|
|
print "$$/$id : 2 >> $resultsid\n";
|
|
unless ($resultsid) {
|
|
$stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
|
|
$stj->execute($serverinfo, $id, $now);
|
|
$resultsid=$dbi->{'mysql_insertid'};
|
|
$stj->finish;
|
|
print "$$/$id : creating and ";
|
|
}
|
|
}
|
|
print "$$/$id : working on results entry $resultsid\n";
|
|
# set active to 1 => this request is on the way.
|
|
my $stj=$dbi->prepare("update z3950results set active=1 where id=?");
|
|
$stj->execute($resultsid);
|
|
#######
|
|
print "$$/$id : connected to $globalname\n";
|
|
print "Global Syntax =>".$globalsyntax."<=";
|
|
eval {$conn->option(elementSetName => 'F')};
|
|
eval { $conn->option(preferredRecordSyntax => 'USMARC');} if ($globalsyntax eq "USMARC");
|
|
eval { $conn->option(preferredRecordSyntax => 'UNIMARC');} if ($globalsyntax eq "UNIMARC");
|
|
if ($@) {
|
|
print "$$/$id : $globalname ERROR: $@ for $resultsid\n";
|
|
# in case pb during connexion, set result to "empty" to avoid everlasting loops
|
|
my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
|
|
$stj->execute(0,0,$now,$resultsid);
|
|
} else {
|
|
my $rs=$conn->search_pqf($query) or warn "Connection Problem:".$conn->errmsg();
|
|
pe();
|
|
# we have an answer for a query => get results & store them in marc_breeding table
|
|
my $numresults=$rs->size();
|
|
if ($numresults eq 0) {
|
|
print "$$/$id : $globalname : no records found\n";
|
|
} else {
|
|
print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n";
|
|
}
|
|
pe();
|
|
my $i;
|
|
my $result='';
|
|
my $scantimerstart=time();
|
|
for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) {
|
|
my $rec=$rs->record($i-1);
|
|
my $marcdata = $rec->raw();
|
|
#my $marcrecord = MARC::Record->new_from_usmarc($marcdata);
|
|
#warn $marcrecord->as_formatted();
|
|
$globalencoding = ref($rec);
|
|
$result.=$marcdata;
|
|
}
|
|
my @x=split /::/,$globalencoding;
|
|
my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"Z3950-$globalname",$x[3],$random);
|
|
my $scantimerend=time();
|
|
my $numrecords;
|
|
($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80);
|
|
my $elapsed=$scantimerend-$scantimerstart;
|
|
if ($elapsed) {
|
|
my $speed=int($numresults/$elapsed*100)/100;
|
|
print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n";
|
|
}
|
|
my $q_result=$dbi->quote($result);
|
|
($q_result) || ($q_result='""');
|
|
$now=time();
|
|
if ($numresults >0) {
|
|
my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results=?,enddate=? where id=?");
|
|
$stj->execute($numresults,$numrecords,$q_result,$now,$resultsid);
|
|
} else { # no results...
|
|
my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
|
|
$stj->execute($numresults,$numrecords,$now,$resultsid);
|
|
}
|
|
}
|
|
$stj=$dbi->prepare("update z3950results set active=-1 where id=?");
|
|
$stj->execute($resultsid);
|
|
eval {my $stj->finish};
|
|
}
|
|
#OK, the search is done inactivate it..
|
|
print "$$/$id : $server search done.\n";
|
|
exit;
|
|
}
|
|
}
|
|
} else {
|
|
# $forkcounter >=12
|
|
}
|
|
# delete z3950queue entry, as everything is done
|
|
my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?");
|
|
$now=time();
|
|
$sti->execute($now,$id);
|
|
}
|
|
$lastrun=time();
|
|
}
|
|
sleep 10;
|
|
}
|
|
}
|
|
|
|
sub reap {
|
|
$forkcounter--;
|
|
wait;
|
|
}
|
|
|
|
|
|
sub checkqueue {
|
|
$checkqueue=1;
|
|
}
|
|
|
|
|
|
sub pe {
|
|
return 0;
|
|
}
|