diff --git a/Makefile.PL b/Makefile.PL index 277742ad59..8b896338f4 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -186,7 +186,9 @@ Directory for Zebra configuration files. =item ZEBRA_LOCK_DIR -Directory for Zebra's lock files. +Directory for Zebra's lock files. This includes subdirs for authorities, +biblios, and the zebra rebuild function. Any activity to reindex +zebra from koha should interlock here with rebuild_zebra.pl. =item ZEBRA_DATA_DIR @@ -318,6 +320,7 @@ my $target_map = { './skel/var/lib/koha/zebradb/biblios/register' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 }, './skel/var/lib/koha/zebradb/biblios/shadow' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 }, './skel/var/lib/koha/zebradb/biblios/tmp' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 }, + './skel/var/lock/koha/zebradb/rebuild' => { target => 'ZEBRA_LOCK_DIR', trimdir => 6 }, './skel/var/lib/koha/plugins' => { target => 'PLUGINS_DIR', trimdir => 6 }, './sms' => 'INTRANET_CGI_DIR', './suggestion' => 'INTRANET_CGI_DIR', diff --git a/debian/templates/koha-conf-site.xml.in b/debian/templates/koha-conf-site.xml.in index 9bf090b321..371b1d29d9 100644 --- a/debian/templates/koha-conf-site.xml.in +++ b/debian/templates/koha-conf-site.xml.in @@ -280,6 +280,7 @@ __MEMCACHED_NAMESPACE__ __BIBLIOS_INDEXING_MODE__ __AUTHORITIES_INDEXING_MODE__ + /var/lock/koha/__KOHASITE__ /etc/koha/searchengine/queryparser.yaml diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml index 1da53780c1..72da2de9f8 100644 --- a/etc/koha-conf.xml +++ b/etc/koha-conf.xml @@ -297,6 +297,7 @@ __PAZPAR2_TOGGLE_XML_POST__ 0 __BIB_INDEX_MODE__ __AUTH_INDEX_MODE__ + __ZEBRA_LOCK_DIR__ __KOHA_CONF_DIR__/searchengine/queryparser.yaml diff --git a/misc/bin/koha-zebra-ctl.sh b/misc/bin/koha-zebra-ctl.sh index 397eb19c6e..82b91628dd 100755 --- a/misc/bin/koha-zebra-ctl.sh +++ b/misc/bin/koha-zebra-ctl.sh @@ -65,6 +65,7 @@ case "$1" in mkdir -p $LOCKDIR mkdir -p $LOCKDIR/biblios mkdir -p $LOCKDIR/authorities + mkdir -p $LOCKDIR/rebuild if [[ $EUID -eq 0 ]]; then chown -R $USER:$GROUP $LOCKDIR fi diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 04cf45428d..ec5f5189f0 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -5,6 +5,7 @@ use strict; use C4::Context; use Getopt::Long; +use Fcntl qw(:flock); use File::Temp qw/ tempdir /; use File::Path; use C4::Biblio; @@ -42,6 +43,8 @@ my $where; my $offset; my $run_as_root; my $run_user = (getpwuid($<))[0]; +my $wait_for_lock = 0; +my $use_flock; my $verbose_logging = 0; my $zebraidx_log_opt = " -v none,fatal,warn "; @@ -62,11 +65,12 @@ my $result = GetOptions( 'x' => \$as_xml, 'y' => \$do_not_clear_zebraqueue, 'z' => \$process_zebraqueue, - 'where:s' => \$where, - 'length:i' => \$length, + 'where:s' => \$where, + 'length:i' => \$length, 'offset:i' => \$offset, - 'v+' => \$verbose_logging, - 'run-as-root' => \$run_as_root, + 'v+' => \$verbose_logging, + 'run-as-root' => \$run_as_root, + 'wait-for-lock' => \$wait_for_lock, ); if (not $result or $want_help) { @@ -151,12 +155,27 @@ my $dbh = C4::Context->dbh; my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber",""); my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber",""); +# Protect again simultaneous update of the zebra index by using a lock file. +# Create our own lock directory if its missing. This shouild be created +# by koha-zebra-ctl.sh or at system installation. If the desired directory +# does not exist and cannot be created, we fall back on /tmp - which will +# always work. + +my $lockdir = C4::Context->config("zebra_lockdir") // "/var/lock"; +$lockdir .= "/rebuild"; +unless (-d $lockdir) { + eval { mkpath($lockdir, 0, oct(755)) }; + $lockdir = "/tmp" if ($@); +} +my $lockfile = $lockdir . "/rebuild..LCK"; + if ( $verbose_logging ) { print "Zebra configuration information\n"; print "================================\n"; print "Zebra biblio directory = $biblioserverdir\n"; print "Zebra authorities directory = $authorityserverdir\n"; print "Koha directory = $kohadir\n"; + print "Lockfile = $lockfile\n"; print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n"; print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n"; print "================================\n"; @@ -164,13 +183,37 @@ if ( $verbose_logging ) { my $tester = XML::LibXML->new(); +# The main work is done here by calling do_one_pass(). We have added locking +# avoid race conditions between Full rebuilds and incremental updates either from +# daemon mode or periodic invocation from cron. The race can lead to an updated +# record being overwritten by a rebuild if the update is applied after the export +# by the rebuild and before the rebuild finishes (more likely to effect large +# catalogs). +# +# We have chosen to exit immediately by default if we cannot obtain the lock +# to prevent the potential for a infinite backlog from cron invocations, but an +# option (wait-for-lock) is provided to let the program wait for the lock. +# See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details. +open my $LockFH, q{>}, $lockfile or die "$lockfile: $!"; if ($daemon_mode) { while (1) { - do_one_pass() if ( zebraqueue_not_empty() ); + # For incremental updates, skip the update if the updates are locked + if (_flock($LockFH, LOCK_EX|LOCK_NB)) { + do_one_pass() if ( zebraqueue_not_empty() ); + _flock($LockFH, LOCK_UN); + } sleep $daemon_sleep; } } else { - do_one_pass(); + # all one-off invocations + my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB; + if (_flock($LockFH, $lock_mode)) { + do_one_pass(); + _flock($LockFH, LOCK_UN); + } else { + # Can't die() here because we have files to dlean up. + print "Aborting rebuild. Unable to flock $lockfile: $!\n"; + } } @@ -228,7 +271,7 @@ sub zebraqueue_not_empty { $where_str = 'server = "authorityserver" AND done = 0;'; } my $query = - $dbh->prepare( 'SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str ); + $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str ); $query->execute; my $count = $query->fetchrow_arrayref->[0]; @@ -724,6 +767,26 @@ sub do_indexing { } +sub _flock { +# test if flock is present; if so, use it; if not, return true +# op refers to the official flock operations incl LOCK_EX, LOCK_UN, etc. +# combining LOCK_EX with LOCK_NB returns immediately + my ($fh, $op)= @_; + if( !defined($use_flock) ) { + #check if flock is present; if not, you will have a fatal error + my $i=eval { flock($fh, $op) }; + #assuming that $fh and $op are fine(..), an undef i means no flock + $use_flock= defined($i)? 1: 0; + print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock; + return 1 if !$use_flock; + return $i; + } + else { + return 1 if !$use_flock; + return flock($fh, $op); + } +} + sub print_usage { print <<_USAGE_; $0: reindex MARC bibs and/or authorities in Zebra. @@ -809,6 +872,12 @@ Parameters: --run-as-root explicitily allow script to run as 'root' user + --wait-for-lock when not running in daemon mode, the default + behavior is to abort a rebuild if the rebuild + lock is busy. This option will cause the program + to wait for the lock to free and then continue + processing the rebuild request, + --help or -h show this message. _USAGE_ } diff --git a/skel/var/lock/koha/zebradb/rebuild/README b/skel/var/lock/koha/zebradb/rebuild/README new file mode 100644 index 0000000000..98b4c5b91c --- /dev/null +++ b/skel/var/lock/koha/zebradb/rebuild/README @@ -0,0 +1 @@ +Zebra rebuild lock dir diff --git a/t/db_dependent/zebra_config.pl b/t/db_dependent/zebra_config.pl index 47854da42b..2a5e99dcb9 100755 --- a/t/db_dependent/zebra_config.pl +++ b/t/db_dependent/zebra_config.pl @@ -31,6 +31,7 @@ if ($indexing_mode eq 'dom') { make_path("$destination/var/lock/zebradb"); make_path("$destination/var/lock/zebradb/biblios"); make_path("$destination/var/lock/zebradb/authorities"); +make_path("$destination/var/lock/zebradb/rebuild"); make_path("$destination/var/lib/zebradb"); make_path("$destination/var/lib/zebradb/biblios"); make_path("$destination/var/lib/zebradb/biblios/key");