From 1b0992e8d5a2bc8f0f74623daa9a7eac4786644a Mon Sep 17 00:00:00 2001 From: Doug Kingston Date: Sun, 27 Oct 2013 17:18:17 -0700 Subject: [PATCH] Bug 6435: Add daemon mode to rebuild_zebra.pl This change adds code to check the zebraqueue table with a cheap SQL query and a daemon loop that checks for new entries and processes them incrementally before sleeping for a controllable number of seconds. The default is 5 seconds which provides a near realtime search index update. This is desirable particularly for libraries that are doing active catalogue updating. The query is adjusted based on whether -a, -b, or -a -b are specified. Help text updated. Tested against a live 3.12 system. Note that this fix will benefit from the fix to lack of locking (bug 11078) Signed-off-by: Chris Cormack Signed-off-by: Martin Renvoize Signed-off-by: Galen Charlton --- misc/migration_tools/rebuild_zebra.pl | 65 +++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index a46526d14f..b59935ca32 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -21,6 +21,8 @@ $|=1; # flushes output # If the cron job starts us in an unreadable dir, we will break without # this. chdir $ENV{HOME} if (!(-r '.')); +my $daemon_mode; +my $daemon_sleep = 5; my $directory; my $nosanitize; my $skip_export; @@ -45,6 +47,8 @@ my $run_user = (getpwuid($<))[0]; my $verbose_logging = 0; my $zebraidx_log_opt = " -v none,fatal,warn "; my $result = GetOptions( + 'daemon' => \$daemon_mode, + 'sleep:i' => \$daemon_sleep, 'd:s' => \$directory, 'r|reset' => \$reset, 's' => \$skip_export, @@ -152,16 +156,13 @@ if ($do_munge) { my $tester = XML::LibXML->new(); -if ($authorities) { - index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); -} else { - print "skipping authorities\n" if ( $verbose_logging ); -} - -if ($biblios) { - index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); +if ($daemon_mode) { + while (1) { + do_one_pass() if ( zebraqueue_not_empty() ); + sleep $daemon_sleep; + } } else { - print "skipping biblios\n" if ( $verbose_logging ); + do_one_pass(); } @@ -191,6 +192,40 @@ if ($keep_export) { } } +sub do_one_pass { + if ($authorities) { + index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); + } else { + print "skipping authorities\n" if ( $verbose_logging ); + } + + if ($biblios) { + index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); + } else { + print "skipping biblios\n" if ( $verbose_logging ); + } +} + +# Check the zebra update queue and return true if there are records to process +sub zebraqueue_not_empty { + my $where_str; + + if ($authorities && $biblios) { + $where_str = 'done = 0;'; + } elsif ($biblios) { + $where_str = 'server = "biblioserver" AND done = 0;'; + } else { + $where_str = 'server = "authorityserver" AND done = 0;'; + } + my $query = + $dbh->prepare( 'SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str ); + + $query->execute; + my $count = $query->fetchrow_arrayref->[0]; + print "queued records: $count\n" if $verbose_logging > 0; + return $count > 0; +} + # This checks to see if the zebra directories exist under the provided path. # If they don't, then zebra is likely to spit the dummy. This returns true # if the directories had to be created, false otherwise. @@ -692,6 +727,18 @@ Parameters: -a index authority records + -daemon Run in daemon mode. The program will loop checking + for entries on the zebraqueue table, processing + them incrementally if present, and then sleep + for a few seconds before repeating the process + Checking the zebraqueue table is done with a cheap + SQL query. This allows for near realtime update of + the zebra search index with low system overhead. + Use -sleep to control the checking interval. + + -sleep 10 Seconds to sleep between checks of the zebraqueue + table in daemon mode. The default is 5 seconds. + -z select only updated and deleted records marked in the zebraqueue table. Cannot be used with -r -- 2.39.5