From c1fcb1b4e9f6e71e878fc9c73d4493f41e489a8b Mon Sep 17 00:00:00 2001 From: tonnesen Date: Tue, 6 Nov 2001 18:13:59 +0000 Subject: [PATCH] Continuing work on Z39.50 search tool. Daemon now forks up to 12 processes to do Z39.50 searches. Daemon will also wait to see if the user looks beyond the first couple of pages of results, and will download more results if necessary. --- acqui.simple/marcimport.pl | 131 ++++++++++++---- acqui.simple/processz3950queue | 264 +++++++++++++++++++++++++-------- 2 files changed, 305 insertions(+), 90 deletions(-) diff --git a/acqui.simple/marcimport.pl b/acqui.simple/marcimport.pl index 7aae1e9d67..de3fea6ea3 100755 --- a/acqui.simple/marcimport.pl +++ b/acqui.simple/marcimport.pl @@ -133,8 +133,14 @@ if ($input->param('z3950queue')) { } chop $serverlist; my $q_serverlist=$dbh->quote($serverlist); - my $sth=$dbh->prepare("insert into z3950queue (term,type,servers) values ($q_term, '$type', $q_serverlist)"); + my $rand=$input->param('rand'); + my $sth=$dbh->prepare("select identifier from z3950queue where + identifier=$rand"); $sth->execute; + unless ($sth->rows) { + $sth=$dbh->prepare("insert into z3950queue (term,type,servers, identifier) values ($q_term, '$type', $q_serverlist, '$rand')"); + $sth->execute; + } } } @@ -695,26 +701,56 @@ EOF $sth->execute; my ($servers) = $sth->fetchrow; my $serverstring; + my $starttimer=time(); foreach $serverstring (split(/\s+/, $servers)) { my ($name, $server, $database, $auth) = split(/\//, $serverstring, 4); if ($name eq 'MAN') { print "$server/$database
\n"; - } elsif ($name eq 'LOC') { - print "Library of Congress
\n"; - } elsif ($name eq 'NLC') { - print "National Library of Canada
\n"; } else { my $sti=$dbh->prepare("select name from z3950servers where id=$name"); $sti->execute; my ($longname)=$sti->fetchrow; - print "$longname
\n"; + print "\n"; + if ($longname) { + print "$longname \n"; + } else { + print "$server/$database \n"; + } } - print "\n"; @@ -944,15 +1015,17 @@ sub z3950 { } $serverlist.=" \n"; + my $rand=rand(1000000000); print << "EOF";

+ - + diff --git a/acqui.simple/processz3950queue b/acqui.simple/processz3950queue index 51b3feabb7..aad5e4bf38 100755 --- a/acqui.simple/processz3950queue +++ b/acqui.simple/processz3950queue @@ -4,56 +4,206 @@ use DBI; #use strict; use C4::Acquisitions; use C4::Output; +use Net::Z3950; my $dbh=C4Connect; +my $sth=$dbh->prepare("update z3950results set active=0"); +$sth->execute; +$sth->finish; +$SIG{CHLD}='reap'; - +my $reapcounter=0; +my $forkcounter=0; +my $pid=$$; +my $lastrun=0; while (1) { - my $sth=$dbh->prepare("select id,term,type,servers from z3950queue where - isnull(done) || done=-1"); - $sth->execute; - while (my ($id, $term, $type, $servers) = $sth->fetchrow) { - my $now=time(); - my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=$now where id=$id"); - $sti->execute; - my $attr=''; - if ($type eq 'isbn') { - $attr='1=7'; - } elsif ($type eq 'title') { - $attr='1=4'; - } elsif ($type eq 'lccn') { - $attr='1=9'; - } - $term='"'.$term.'"'; - $query="f \@attr $attr $term"; - my $totalrecords=0; - my $serverinfo; - foreach $serverinfo (split(/\s+/, $servers)) { - my ($name, $server, $database, $auth) = split(/\//, $serverinfo, 4); - ($auth eq '/') && ($auth=''); - print "Processing $type=$term at $name $server $database $auth\n"; - $now=time(); - my $q_serverinfo=$dbh->quote($serverinfo); - my $sti=$dbh->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, $now)"); - $sti->execute; - my $resultsid=$dbh->{'mysql_insertid'}; - getrecord($server, $database, $query, $auth); - my $result=`cat yaz.mrc`; - unlink ('yaz.mrc'); - my $splitchar=chr(29); - my @records=split(/$splitchar/, $result); - my $numrecords=$#records+1; - $totalrecords+=$numrecords; - my $q_result=$dbh->quote($result); - ($q_result) || ($q_result='""'); - $now=time(); - $sti=$dbh->prepare("update z3950results set numrecords=$numrecords,results=$q_result,enddate=$now where id=$resultsid"); - $sti->execute; + if ((time-$lastrun)>5) { + my $sth=$dbh->prepare("select id,term,type,servers from z3950queue order by id"); + $sth->execute; + while (my ($id, $term, $type, $servers) = $sth->fetchrow) { + if ($forkcounter<12) { + my $now=time(); + $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=$id"); + $stk->execute; + my %serverdone; + unless ($stk->rows) { + my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=$now where id=$id"); + $sti->execute; + } + while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) { + if ($r_enddate >0) { + $serverdone{$r_server}=1; + } elsif ($active) { + $serverdone{$r_server}=1; + } else { + $serverdone{$r_server}=-1; + } + } + + $stk->finish; + my $attr=''; + if ($type eq 'isbn') { + $attr='1=7'; + } elsif ($type eq 'title') { + $attr='1=4'; + } elsif ($type eq 'author') { + $attr='1=1003'; + } elsif ($type eq 'lccn') { + $attr='1=9'; + } elsif ($type eq 'keyword') { + $attr='1=1016'; + } + $term='"'.$term.'"'; + $query="\@attr $attr $term"; + my $totalrecords=0; + my $serverinfo; + my $stillprocessing=0; + foreach $serverinfo (split(/\s+/, $servers)) { + (next) if ($serverdone{$serverinfo} == 1); + my $stillprocessing=1; + if (my $pid=fork()) { + $forkcounter++; + } else { + #$sth->finish; + #$sti->finish; + #$dbh->disconnect; + my $dbi=C4Connect; + my ($name, $server, $database, $user, $password) = split(/\//, $serverinfo, 5); + $server=~/(.*)\:(\d+)/; + my $servername=$1; + my $port=$2; + print "Processing $type=$term at $name $server $database (".($forkcounter+1)." forks)\n"; + $now=time(); + my $q_serverinfo=$dbi->quote($serverinfo); + my $resultsid; + if ($serverdone{$serverinfo}==-1) { + my $stj=$dbi->prepare("select id from z3950results where server=$q_serverinfo and queryid=$id"); + $stj->execute; + ($resultsid) = $stj->fetchrow; + } else { + my $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, $now)"); + $stj->execute; + $resultsid=$dbi->{'mysql_insertid'}; + } + my $stj=$dbh->prepare("update z3950results set active=1 where id=$resultsid"); + $stj->execute; + my $conn; + my $noconnection=0; + if ($user) { + eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password); }; + if ($@) { + $noconnection=1; + } + pe(); + } else { + eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database); }; + if ($@) { + $noconnection=1; + } + pe(); + } + if ($noconnection) { + } else { + my $rs=$conn->search($query); + pe(); + $rs->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC); + pe(); + my $numresults=$rs->size(); + pe(); + my $i; + my $result=''; + my $scantimerstart=time(); + for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) { + my $rec=$rs->record($i); + my $marcdata=$rec->rawdata(); + $result.=$marcdata; + } + my $scantimerend=time(); + my $numrecords; + ($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80); + my $elapsed=$scantimerend-$scantimerstart; + if ($elapsed) { + my $speed=int($numresults/$elapsed*100)/100; + print " SPEED: $speed $server done $numrecords\n"; + } + + my $q_result=$dbi->quote($result); + ($q_result) || ($q_result='""'); + $now=time(); + my $task="update z3950results set numrecords=$numresults,numdownloaded=$numrecords,highestseen=0,results=$q_result,enddate=$now where id=$resultsid"; + my $stj=$dbi->prepare($task); + $stj->execute; + my $counter=0; + while ($counter<60 && $numrecords<$numresults) { + $counter++; + my $stj=$dbi->prepare("select highestseen from z3950results where id=$resultsid"); + $stj->execute; + my ($highestseen) = $stj->fetchrow; + if ($highestseen>($numrecords-30)) { + $counter=0; + print " $server rescanning\n"; + my $scantimerstart=time(); + for ($i=$numrecords+1; $i<=(($numresults<($numrecords+40)) ? ($numresults) : ($numrecords+40)); $i++) { + my $rec=$rs->record($i); + my $marcdata=$rec->rawdata(); + $result.=$marcdata; + } + my $scantimerend=time(); + ($numresults<$numrecords+40) ? ($numrecords=$numresults) : ($numrecords=$numrecords+40); + my $elapsed=$scantimerend-$scantimerstart; + if ($elapsed) { + my $speed=int($numresults/$elapsed*100)/100; + print " SPEED: $speed $server done $numrecords\n"; + } + + my $q_result=$dbi->quote($result); + ($q_result) || ($q_result='""'); + $now=time(); + my $task="update z3950results set numdownloaded=$numrecords,results=$q_result where id=$resultsid"; + my $stj=$dbi->prepare($task); + $stj->execute; + } + sleep 5; + } + } + my $stj=$dbi->prepare("update z3950results set active=0 where id=$resultsid"); + $stj->execute; + eval {$stj->finish}; + $dbi->disconnect; + print " $server done.\n"; + exit; + sub pe { + (return) unless ($code); + my $code=$conn->errcode(); + my $msg=$conn->errmsg(); + my $ai=$conn->addinfo(); + print << "EOF"; + CODE: $code + MSG: $msg + ADDTL: $ai + +EOF + } + } + } unless ($stillprocessing) { + #my $sti=$dbh->prepare("select enddate from z3950queue where id=$id"); + #$sti->execute; + #my ($enddate) = $sti->fetchrow; + #unless ($enddate) { + # my $now=time; +# $sti=$dbh->prepare("update z3950queue set done=1,numrecords=$totalrecords,enddate=$now where id=$id"); +# $sti->execute; +# } + } + } else { +# my $q_serverinfo=$dbh->quote($serverinfo); +# my $stj=$dbh->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, 0)"); +# $stj->execute; + } } - $sti=$dbh->prepare("update z3950queue set done=1,numrecords=$totalrecords,enddate=$now where id=$id"); - $sti->execute; + $lastrun=time(); } - sleep 15; + sleep 1; } sub getrecord { @@ -61,30 +211,16 @@ sub getrecord { my $base=shift; my $query=shift; my $auth=shift; - open (M, "|yaz-client -m yaz.mrc >>yaz.out 2>>yaz.err"); + my $id=shift; + open (M, "|yaz-client -m yaz-$id.mrc >>yaz.out 2>>yaz.err"); select M; $|=1; select STDOUT; ($auth) && ($auth="authentication $auth\n"); - print << "EOF"; -$auth\open $server -base $base -$query -s -s -s -s -s -s -s -s -s -s -quit -EOF print M << "EOF"; $auth\open $server base $base +setnames $query s s @@ -100,3 +236,9 @@ quit EOF close M; } +sub reap { + $forkcounter--; +} + + + -- 2.39.5
Search for MARC records
Query Term
ISBN LCCN Title
 ISBN  LCCN
 Author  Title  Keyword
$serverlist