Browse Source

Bug 11078: Add locking to rebuild_zebra

This patch adds locking to rebuild_zebra.pl to ensure that simultaneous
changes are prevented (as one is likely to overwrite the other).
Incremental updates in daemon mode will skipped if the lock is busy
and they will be picked up on the next pass.  Non-daemon mode
invocations will also exit immediately if they cannot get the lock
unless the new flag -wait-for-lock is specified, in which case they
will wait until the get the lock and then proceed.

Supporting changes made to Makefile.PL and templates for the new
locking directory (paralleling the other zebra lock directories).
We stash the zebra_lockdir in koha-conf.xml so rebuild_zebra.pl
can find it.

To address earlier QA concerns we:
1. added code to check if flock is available and ignore locking if
it's missing (from M. de Rooy)

2. changed default for adhoc invocations to abort if they cannot
obtain the lock.  Added option -wait-for-lock if the user prefers
to wait until the lock is free, and then continue processing.

3. added missing entry to t/db_dependent/zebra_config.pl

4. added a fallback locking directory of /tmp

Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Doug merged the original patch with the QA changes.
Just for the record, noting here that the original patch was tested
extensively too by Martin Renvoize.
I have added a followup for some exceptional cases.

Signed-off-by: Galen Charlton <gmc@esilibrary.com>
3.16.x
Doug Kingston 8 years ago
committed by Galen Charlton
parent
commit
88e7faf860
  1. 5
      Makefile.PL
  2. 1
      debian/templates/koha-conf-site.xml.in
  3. 1
      etc/koha-conf.xml
  4. 1
      misc/bin/koha-zebra-ctl.sh
  5. 83
      misc/migration_tools/rebuild_zebra.pl
  6. 1
      skel/var/lock/koha/zebradb/rebuild/README
  7. 1
      t/db_dependent/zebra_config.pl

5
Makefile.PL

@ -186,7 +186,9 @@ Directory for Zebra configuration files.
=item ZEBRA_LOCK_DIR
Directory for Zebra's lock files.
Directory for Zebra's lock files. This includes subdirs for authorities,
biblios, and the zebra rebuild function. Any activity to reindex
zebra from koha should interlock here with rebuild_zebra.pl.
=item ZEBRA_DATA_DIR
@ -318,6 +320,7 @@ my $target_map = {
'./skel/var/lib/koha/zebradb/biblios/register' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
'./skel/var/lib/koha/zebradb/biblios/shadow' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
'./skel/var/lib/koha/zebradb/biblios/tmp' => { target => 'ZEBRA_DATA_DIR', trimdir => 6 },
'./skel/var/lock/koha/zebradb/rebuild' => { target => 'ZEBRA_LOCK_DIR', trimdir => 6 },
'./skel/var/lib/koha/plugins' => { target => 'PLUGINS_DIR', trimdir => 6 },
'./sms' => 'INTRANET_CGI_DIR',
'./suggestion' => 'INTRANET_CGI_DIR',

1
debian/templates/koha-conf-site.xml.in

@ -280,6 +280,7 @@
<memcached_namespace>__MEMCACHED_NAMESPACE__</memcached_namespace>
<zebra_bib_index_mode>__BIBLIOS_INDEXING_MODE__</zebra_bib_index_mode>
<zebra_auth_index_mode>__AUTHORITIES_INDEXING_MODE__</zebra_auth_index_mode>
<zebra_lockdir>/var/lock/koha/__KOHASITE__</zebra_lockdir>
<queryparser_config>/etc/koha/searchengine/queryparser.yaml</queryparser_config>
</config>

1
etc/koha-conf.xml

@ -297,6 +297,7 @@ __PAZPAR2_TOGGLE_XML_POST__
<useldapserver>0</useldapserver><!-- see C4::Auth_with_ldap for extra configs you must add if you want to turn this on -->
<zebra_bib_index_mode>__BIB_INDEX_MODE__</zebra_bib_index_mode>
<zebra_auth_index_mode>__AUTH_INDEX_MODE__</zebra_auth_index_mode>
<zebra_lockdir>__ZEBRA_LOCK_DIR__</zebra_lockdir>
<queryparser_config>__KOHA_CONF_DIR__/searchengine/queryparser.yaml</queryparser_config>
</config>
</yazgfs>

1
misc/bin/koha-zebra-ctl.sh

@ -65,6 +65,7 @@ case "$1" in
mkdir -p $LOCKDIR
mkdir -p $LOCKDIR/biblios
mkdir -p $LOCKDIR/authorities
mkdir -p $LOCKDIR/rebuild
if [[ $EUID -eq 0 ]]; then
chown -R $USER:$GROUP $LOCKDIR
fi

83
misc/migration_tools/rebuild_zebra.pl

@ -5,6 +5,7 @@ use strict;
use C4::Context;
use Getopt::Long;
use Fcntl qw(:flock);
use File::Temp qw/ tempdir /;
use File::Path;
use C4::Biblio;
@ -42,6 +43,8 @@ my $where;
my $offset;
my $run_as_root;
my $run_user = (getpwuid($<))[0];
my $wait_for_lock = 0;
my $use_flock;
my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
@ -62,11 +65,12 @@ my $result = GetOptions(
'x' => \$as_xml,
'y' => \$do_not_clear_zebraqueue,
'z' => \$process_zebraqueue,
'where:s' => \$where,
'length:i' => \$length,
'where:s' => \$where,
'length:i' => \$length,
'offset:i' => \$offset,
'v+' => \$verbose_logging,
'run-as-root' => \$run_as_root,
'v+' => \$verbose_logging,
'run-as-root' => \$run_as_root,
'wait-for-lock' => \$wait_for_lock,
);
if (not $result or $want_help) {
@ -151,12 +155,27 @@ my $dbh = C4::Context->dbh;
my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
# Protect again simultaneous update of the zebra index by using a lock file.
# Create our own lock directory if its missing. This shouild be created
# by koha-zebra-ctl.sh or at system installation. If the desired directory
# does not exist and cannot be created, we fall back on /tmp - which will
# always work.
my $lockdir = C4::Context->config("zebra_lockdir") // "/var/lock";
$lockdir .= "/rebuild";
unless (-d $lockdir) {
eval { mkpath($lockdir, 0, oct(755)) };
$lockdir = "/tmp" if ($@);
}
my $lockfile = $lockdir . "/rebuild..LCK";
if ( $verbose_logging ) {
print "Zebra configuration information\n";
print "================================\n";
print "Zebra biblio directory = $biblioserverdir\n";
print "Zebra authorities directory = $authorityserverdir\n";
print "Koha directory = $kohadir\n";
print "Lockfile = $lockfile\n";
print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
print "================================\n";
@ -164,13 +183,37 @@ if ( $verbose_logging ) {
my $tester = XML::LibXML->new();
# The main work is done here by calling do_one_pass(). We have added locking
# avoid race conditions between Full rebuilds and incremental updates either from
# daemon mode or periodic invocation from cron. The race can lead to an updated
# record being overwritten by a rebuild if the update is applied after the export
# by the rebuild and before the rebuild finishes (more likely to effect large
# catalogs).
#
# We have chosen to exit immediately by default if we cannot obtain the lock
# to prevent the potential for a infinite backlog from cron invocations, but an
# option (wait-for-lock) is provided to let the program wait for the lock.
# See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
open my $LockFH, q{>}, $lockfile or die "$lockfile: $!";
if ($daemon_mode) {
while (1) {
do_one_pass() if ( zebraqueue_not_empty() );
# For incremental updates, skip the update if the updates are locked
if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
do_one_pass() if ( zebraqueue_not_empty() );
_flock($LockFH, LOCK_UN);
}
sleep $daemon_sleep;
}
} else {
do_one_pass();
# all one-off invocations
my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
if (_flock($LockFH, $lock_mode)) {
do_one_pass();
_flock($LockFH, LOCK_UN);
} else {
# Can't die() here because we have files to dlean up.
print "Aborting rebuild. Unable to flock $lockfile: $!\n";
}
}
@ -228,7 +271,7 @@ sub zebraqueue_not_empty {
$where_str = 'server = "authorityserver" AND done = 0;';
}
my $query =
$dbh->prepare( 'SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
$dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
$query->execute;
my $count = $query->fetchrow_arrayref->[0];
@ -724,6 +767,26 @@ sub do_indexing {
}
sub _flock {
# test if flock is present; if so, use it; if not, return true
# op refers to the official flock operations incl LOCK_EX, LOCK_UN, etc.
# combining LOCK_EX with LOCK_NB returns immediately
my ($fh, $op)= @_;
if( !defined($use_flock) ) {
#check if flock is present; if not, you will have a fatal error
my $i=eval { flock($fh, $op) };
#assuming that $fh and $op are fine(..), an undef i means no flock
$use_flock= defined($i)? 1: 0;
print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
return 1 if !$use_flock;
return $i;
}
else {
return 1 if !$use_flock;
return flock($fh, $op);
}
}
sub print_usage {
print <<_USAGE_;
$0: reindex MARC bibs and/or authorities in Zebra.
@ -809,6 +872,12 @@ Parameters:
--run-as-root explicitily allow script to run as 'root' user
--wait-for-lock when not running in daemon mode, the default
behavior is to abort a rebuild if the rebuild
lock is busy. This option will cause the program
to wait for the lock to free and then continue
processing the rebuild request,
--help or -h show this message.
_USAGE_
}

1
skel/var/lock/koha/zebradb/rebuild/README

@ -0,0 +1 @@
Zebra rebuild lock dir

1
t/db_dependent/zebra_config.pl

@ -31,6 +31,7 @@ if ($indexing_mode eq 'dom') {
make_path("$destination/var/lock/zebradb");
make_path("$destination/var/lock/zebradb/biblios");
make_path("$destination/var/lock/zebradb/authorities");
make_path("$destination/var/lock/zebradb/rebuild");
make_path("$destination/var/lib/zebradb");
make_path("$destination/var/lib/zebradb/biblios");
make_path("$destination/var/lib/zebradb/biblios/key");

Loading…
Cancel
Save