From df1f46f9da2b5a5058e616066888f8e21e0c2163 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Thu, 19 Jun 2008 09:30:59 -0500 Subject: [PATCH] bug 2253: improve rebuild_zebra's handling of zebraqueue Prior to this patch, rebuild_zebra.pl -z was effectively hanging on to a lock on the zebraqueue table, preventing other scripts from inserting new entries into the table. This had the effect of causing circulation operations to time out. Refactored by having rebuld_zebra.pl pull the active queue into memory, then mark entries done by zebraqueue.id. Consequently, rebuild_zebra.pl should no longer block adding new entries into zebraqueue. Signed-off-by: Joshua Ferraro --- misc/migration_tools/rebuild_zebra.pl | 118 +++++++++++++++----------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 132f68cbd4..38054c3e09 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -106,18 +106,14 @@ if ($do_munge) { munge_config(); } -$dbh->{AutoCommit} = 0; # don't autocommit - want a consistent view of the zebraqueue table - if ($authorities) { index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue); - $dbh->commit(); # commit changes to zebraqueue, if any } else { print "skipping authorities\n"; } if ($biblios) { index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue); - $dbh->commit(); # commit changes to zebraqueue, if any } else { print "skipping biblios\n"; } @@ -163,21 +159,21 @@ sub index_records { mkdir "$directory" unless (-d $directory); mkdir "$directory/$record_type" unless (-d "$directory/$record_type"); if ($process_zebraqueue) { - my $sth = select_zebraqueue_records($record_type, 'deleted'); + my $entries = select_zebraqueue_records($record_type, 'deleted'); mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); - $num_records_deleted = generate_deleted_marc_records($record_type, $sth, "$directory/del_$record_type", $as_xml); - mark_zebraqueue_done($record_type, 'deleted'); - $sth = select_zebraqueue_records($record_type, 'updated'); + $num_records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); + mark_zebraqueue_batch_done($entries); + $entries = select_zebraqueue_records($record_type, 'updated'); mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type"); - $num_records_exported = export_marc_records($record_type, $sth, "$directory/upd_$record_type", $as_xml, $noxml); - mark_zebraqueue_done($record_type, 'updated'); + $num_records_exported = export_marc_records_from_list($record_type, + $entries, "$directory/upd_$record_type", $as_xml, $noxml); + mark_zebraqueue_batch_done($entries); } else { my $sth = select_all_records($record_type); + $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml); unless ($do_not_clear_zebraqueue) { - mark_zebraqueue_done($record_type, 'deleted'); - mark_zebraqueue_done($record_type, 'updated'); + mark_all_zebraqueue_done($record_type); } - $num_records_exported = export_marc_records($record_type, $sth, "$directory/$record_type", $as_xml, $noxml); } } @@ -205,44 +201,37 @@ sub select_zebraqueue_records { my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver'; my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate'; - my $sth = $dbh->prepare("SELECT DISTINCT biblio_auth_number + my $sth = $dbh->prepare("SELECT id, biblio_auth_number FROM zebraqueue WHERE server = ? AND operation = ? - AND done = 0"); + AND done = 0 + ORDER BY id DESC"); $sth->execute($server, $op); - return $sth; + my $entries = $sth->fetchall_arrayref({}); } -sub mark_zebraqueue_done { - my ($record_type, $update_type) = @_; +sub mark_all_zebraqueue_done { + my ($record_type) = @_; my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver'; - my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate'; - if ($op eq 'recordDelete') { - my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 - WHERE id IN ( - SELECT id FROM ( - SELECT z1.id - FROM zebraqueue z1 - JOIN zebraqueue z2 ON z2.biblio_auth_number = z1.biblio_auth_number - WHERE z1.done = 0 - AND z1.server = ? - AND z2.done = 0 - AND z2.server = ? - AND z1.operation = ? - ) d2 - ) - "); - $sth->execute($server, $server, $op); # if we've deleted a record, any prior specialUpdates are void - } else { - my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 - WHERE server = ? - AND operation = ? - AND done = 0"); - $sth->execute($server, $op); + my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 + WHERE server = ? + AND done = 0"); + $sth->execute($server); +} + +sub mark_zebraqueue_batch_done { + my ($entries) = @_; + + $dbh->{AutoCommit} = 0; + my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?"); + $dbh->commit(); + foreach my $id (map { $_->{id} } @$entries) { + $sth->execute($id); } + $dbh->{AutoCommit} = 1; } sub select_all_records { @@ -262,7 +251,7 @@ sub select_all_biblios { return $sth; } -sub export_marc_records { +sub export_marc_records_from_sth { my ($record_type, $sth, $directory, $as_xml, $noxml) = @_; my $num_exported = 0; @@ -287,13 +276,41 @@ sub export_marc_records { return $num_exported; } +sub export_marc_records_from_list { + my ($record_type, $entries, $directory, $as_xml, $noxml) = @_; + + my $num_exported = 0; + open (OUT, ">:utf8 ", "$directory/exported_records") or die $!; + my $i = 0; + my %found = (); + foreach my $record_number ( map { $_->{biblio_auth_number} } + grep { !$found{ $_->{biblio_auth_number} }++ } + @$entries ) { + print "."; + print "\r$i" unless ($i++ %100); + my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml); + if (defined $marc) { + # FIXME - when more than one record is exported and $as_xml is true, + # the output file is not valid XML - it's just multiple elements + # strung together with no single root element. zebraidx doesn't seem + # to care, though, at least if you're using the GRS-1 filter. It does + # care if you're using the DOM filter, which requires valid XML file(s). + print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc(); + $num_exported++; + } + } + print "\nRecords exported: $num_exported\n"; + close OUT; + return $num_exported; +} + sub generate_deleted_marc_records { - my ($record_type, $sth, $directory, $as_xml) = @_; + my ($record_type, $entries, $directory, $as_xml) = @_; my $num_exported = 0; open (OUT, ">:utf8 ", "$directory/exported_records") or die $!; my $i = 0; - while (my ($record_number) = $sth->fetchrow_array) { + foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) { print "\r$i" unless ($i++ %100); print "."; @@ -348,21 +365,26 @@ sub get_raw_marc_record { $fetch_sth->execute($record_number); if (my ($blob) = $fetch_sth->fetchrow_array) { $marc = MARC::Record->new_from_usmarc($blob); + $fetch_sth->finish(); } else { - warn "failed to retrieve biblio $record_number"; + return; # failure to find a bib is not a problem - + # a delete could have been done before + # trying to process a record update } - $fetch_sth->finish(); } else { eval { $marc = GetMarcBiblio($record_number); }; if ($@) { - warn "failed to retrieve biblio $record_number"; + # here we do warn since catching an exception + # means that the bib was found but failed + # to be parsed + warn "error retrieving biblio $record_number"; return; } } } else { eval { $marc = GetAuthority($record_number); }; if ($@) { - warn "failed to retrieve authority $record_number"; + warn "error retrieving authority $record_number"; return; } } -- 2.39.5