3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
22 use Getopt::Long qw( GetOptions );
23 use Fcntl qw( LOCK_EX LOCK_NB LOCK_UN );
24 use File::Temp qw( tempdir );
25 use File::Path qw( mkpath rmtree );
26 use C4::Biblio qw( GetXmlBiblio );
27 use C4::AuthoritiesMarc qw( GetAuthority GetAuthorityXML );
28 use C4::Items qw( GetItemsInfo Item2Marc );
29 use Koha::RecordProcessor;
33 use constant LOCK_FILENAME => 'rebuild..LCK';
35 # script that checks zebradir structure & create directories & mandatory files if needed
39 $|=1; # flushes output
40 # If the cron job starts us in an unreadable dir, we will break without
42 chdir $ENV{HOME} if (!(-r '.'));
56 my $process_zebraqueue;
57 my $process_zebraqueue_skip_deletes;
58 my $do_not_clear_zebraqueue;
63 my $run_user = (getpwuid($<))[0];
64 my $wait_for_lock = 0;
66 my $table = 'biblioitems';
67 my $is_memcached = Koha::Caches->get_instance->memcached_cache;
69 my $verbose_logging = 0;
70 my $zebraidx_log_opt = " -v none,fatal,warn ";
71 my $result = GetOptions(
72 'daemon' => \$daemon_mode,
73 'sleep:i' => \$daemon_sleep,
78 'I|skip-index' => \$skip_index,
79 'nosanitize' => \$nosanitize,
83 'h|help' => \$want_help,
85 'y' => \$do_not_clear_zebraqueue,
86 'z' => \$process_zebraqueue,
87 'skip-deletes' => \$process_zebraqueue_skip_deletes,
89 'length:i' => \$length,
90 'offset:i' => \$offset,
91 'v+' => \$verbose_logging,
92 'run-as-root' => \$run_as_root,
93 'wait-for-lock' => \$wait_for_lock,
94 't|table:s' => \$table,
97 if (not $result or $want_help) {
103 warn "Warning: You passed -x which is already the default and is now deprecated\n";
104 undef $as_xml; # Should not be used later
107 if( not defined $run_as_root and $run_user eq 'root') {
108 my $msg = "Warning: You are running this script as the user 'root'.\n";
109 $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
110 $msg .= "Please do '$0 --help' to see usage.\n";
114 if ($process_zebraqueue and ($skip_export or $reset)) {
115 my $msg = "Cannot specify -r or -s if -z is specified\n";
116 $msg .= "Please do '$0 --help' to see usage.\n";
120 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
121 my $msg = "Cannot specify both -y and -z\n";
122 $msg .= "Please do '$0 --help' to see usage.\n";
127 # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
128 if ($skip_export or $keep_export or $skip_index or
129 $where or $length or $offset) {
130 my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n";
131 $msg .= "Please do '$0 --help' to see usage.\n";
134 unless ($is_memcached) {
135 warn "Warning: script running in daemon mode, without recommended caching system (memcached).\n";
139 $process_zebraqueue = 1;
142 if (not $biblios and not $authorities) {
143 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
144 $msg .= "Please do '$0 --help' to see usage.\n";
148 our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio', 'biblio_metadata' );
149 unless ( grep { $_ eq $table } @tables_allowed_for_select ) {
150 die "Cannot specify -t|--table with value '$table'. Only "
151 . ( join ', ', @tables_allowed_for_select )
156 # -v is for verbose, which seems backwards here because of how logging is set
157 # on the CLI of zebraidx. It works this way. The default is to not log much
158 if ($verbose_logging >= 2) {
159 $zebraidx_log_opt = '-v none,fatal,warn,all';
163 unless ($directory) {
165 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
169 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
170 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
172 my $kohadir = C4::Context->config('intranetdir');
174 my ($biblionumbertagfield,$biblionumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblio.biblionumber" );
175 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblioitems.biblioitemnumber" );
177 my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
178 <collection xmlns="http://www.loc.gov/MARC21/slim">
181 my $marcxml_close = q{
185 # Protect again simultaneous update of the zebra index by using a lock file.
186 # Create our own lock directory if it is missing. This should be created
187 # by koha-zebra-ctl.sh or at system installation. If the desired directory
188 # does not exist and cannot be created, we fall back on /tmp - which will
191 my ($lockfile, $LockFH);
193 C4::Context->config("zebra_lockdir"),
194 '/var/lock/zebra_' . C4::Context->config('database'),
195 '/tmp/zebra_' . C4::Context->config('database')
197 #we try three possibilities (we really want to lock :)
199 ($LockFH, $lockfile) = _create_lockfile($_.'/rebuild');
200 last if defined $LockFH;
202 if( !defined $LockFH ) {
203 print "WARNING: Could not create lock file $lockfile: $!\n";
204 print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n";
205 print "Verify file permissions for it too.\n";
206 $use_flock = 0; # we disable file locking now and will continue
208 # note that this mimics old behavior (before we used
212 my $start_time = time();
213 if ( $verbose_logging ) {
214 my $pretty_time = POSIX::strftime("%H:%M:%S",localtime($start_time));
215 print "Zebra configuration information\n";
216 print "================================\n";
217 print "Zebra biblio directory = $biblioserverdir\n";
218 print "Zebra authorities directory = $authorityserverdir\n";
219 print "Koha directory = $kohadir\n";
220 print "Lockfile = $lockfile\n" if $lockfile;
221 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
222 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
223 print "================================\n";
224 print "Job started: $pretty_time\n";
227 my $tester = XML::LibXML->new();
230 # The main work is done here by calling do_one_pass(). We have added locking
231 # avoid race conditions between full rebuilds and incremental updates either from
232 # daemon mode or periodic invocation from cron. The race can lead to an updated
233 # record being overwritten by a rebuild if the update is applied after the export
234 # by the rebuild and before the rebuild finishes (more likely to affect large
237 # We have chosen to exit immediately by default if we cannot obtain the lock
238 # to prevent the potential for a infinite backlog from cron invocations, but an
239 # option (wait-for-lock) is provided to let the program wait for the lock.
240 # See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
243 # For incremental updates, skip the update if the updates are locked
244 if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
246 $dbh = C4::Context->dbh;
247 if( zebraqueue_not_empty() ) {
248 Koha::Caches->flush_L1_caches() if $is_memcached;
252 if ($@ && $verbose_logging) {
253 warn "Warning : $@\n";
255 _flock($LockFH, LOCK_UN);
260 # all one-off invocations
261 my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
262 if (_flock($LockFH, $lock_mode)) {
263 $dbh = C4::Context->dbh;
265 _flock($LockFH, LOCK_UN);
267 print "Skipping rebuild/update because flock failed on $lockfile: $!\n";
272 if ( $verbose_logging ) {
273 print "====================\n";
274 print "Indexing complete: ". pretty_time() . "\n";
275 print "====================\n";
277 print "====================\n";
280 print "NOTHING cleaned : the export $directory has been kept.\n";
281 print "You can re-run this script with the -s ";
283 print " and -d $directory parameters";
288 print "if you just want to rebuild zebra after changing zebra config files\n";
290 unless ($use_tempdir) {
291 # if we're using a temporary directory
292 # created by File::Temp, it will be removed
294 rmtree($directory, 0, 1);
295 print "directory $directory deleted\n";
301 index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
303 print "skipping authorities\n" if ( $verbose_logging );
307 index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
309 print "skipping biblios\n" if ( $verbose_logging );
313 # Check the zebra update queue and return true if there are records to process
314 # This routine will handle each of -ab, -a, or -b, but in practice we force
315 # -ab when in daemon mode.
316 sub zebraqueue_not_empty {
319 if ($authorities && $biblios) {
320 $where_str = 'done = 0;';
322 $where_str = 'server = "biblioserver" AND done = 0;';
324 $where_str = 'server = "authorityserver" AND done = 0;';
327 $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
330 my $count = $query->fetchrow_arrayref->[0];
331 print "queued records: $count\n" if $verbose_logging > 0;
335 # This checks to see if the zebra directories exist under the provided path.
336 # If they don't, then zebra is likely to spit the dummy. This returns true
337 # if the directories had to be created, false otherwise.
338 sub check_zebra_dirs {
339 my ($base) = shift() . '/';
340 my $needed_repairing = 0;
341 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
342 foreach my $dir (@dirs) {
343 my $bdir = $base . $dir;
345 $needed_repairing = 1;
346 mkdir $bdir || die "Unable to create '$bdir': $!\n";
347 print "$0: needed to create '$bdir'\n";
350 return $needed_repairing;
351 } # ---------- end of subroutine check_zebra_dirs ----------
354 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
356 my $num_records_exported = 0;
357 my $records_deleted = {};
358 my $need_reset = check_zebra_dirs($server_dir);
360 print "$0: found broken zebra server directories: forcing a rebuild\n";
363 if ($skip_export && $verbose_logging) {
364 print "====================\n";
365 print "SKIPPING $record_type export\n";
366 print "====================\n";
368 if ( $verbose_logging ) {
369 print "====================\n";
370 print "exporting $record_type " . pretty_time() . "\n";
371 print "====================\n";
373 mkdir "$directory" unless (-d $directory);
374 mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
375 if ($process_zebraqueue) {
378 unless ( $process_zebraqueue_skip_deletes ) {
379 $entries = select_zebraqueue_records($record_type, 'deleted');
380 mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
381 $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type");
382 mark_zebraqueue_batch_done($entries);
385 $entries = select_zebraqueue_records($record_type, 'updated');
386 mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
387 $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted);
388 mark_zebraqueue_batch_done($entries);
391 my $sth = select_all_records($record_type);
392 $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize);
393 unless ($do_not_clear_zebraqueue) {
394 mark_all_zebraqueue_done($record_type);
400 # and reindexing everything
403 if ($verbose_logging) {
404 print "====================\n";
405 print "SKIPPING $record_type indexing\n";
406 print "====================\n";
409 if ( $verbose_logging ) {
410 print "====================\n";
411 print "REINDEXING zebra " . pretty_time() . "\n";
412 print "====================\n";
414 my $record_fmt = 'marcxml';
415 if ($process_zebraqueue) {
416 do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
417 if %$records_deleted;
418 do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
419 if $num_records_exported;
421 do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
422 if ($num_records_exported or $skip_export);
428 sub select_zebraqueue_records {
429 my ($record_type, $update_type) = @_;
431 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
432 my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
434 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
440 $sth->execute($server, $op);
441 my $entries = $sth->fetchall_arrayref({});
444 sub mark_all_zebraqueue_done {
445 my ($record_type) = @_;
447 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
449 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
452 $sth->execute($server);
455 sub mark_zebraqueue_batch_done {
458 $dbh->{AutoCommit} = 0;
459 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
461 foreach my $id (map { $_->{id} } @$entries) {
464 $dbh->{AutoCommit} = 1;
467 sub select_all_records {
468 my $record_type = shift;
469 return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
472 sub select_all_authorities {
473 my $strsth=qq{SELECT authid FROM auth_header};
474 $strsth.=qq{ WHERE $where } if ($where);
475 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
476 $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
477 my $sth = $dbh->prepare($strsth);
482 sub select_all_biblios {
483 $table = 'biblioitems'
484 unless grep { $_ eq $table } @tables_allowed_for_select;
485 my $strsth = qq{ SELECT DISTINCT biblionumber FROM $table };
486 $strsth.=qq{ WHERE $where } if ($where);
487 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
488 $strsth.=qq{ LIMIT $offset,$length } if ($offset);
489 my $sth = $dbh->prepare($strsth);
494 sub export_marc_records_from_sth {
495 my ($record_type, $sth, $directory, $nosanitize) = @_;
497 my $num_exported = 0;
498 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
500 print {$fh} $marcxml_open;
503 my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField( "items.itemnumber" );
504 while (my ($record_number) = $sth->fetchrow_array) {
505 print "." if ( $verbose_logging );
506 print "\r$i" unless ($i++ %100 or !$verbose_logging);
508 my $marcxml = $record_type eq 'biblio'
509 ? GetXmlBiblio( $record_number )
510 : GetAuthorityXML( $record_number );
511 if ($record_type eq 'biblio'){
512 my @items = GetItemsInfo($record_number);
514 my $record = MARC::Record->new;
515 $record->encoding('UTF-8');
517 foreach my $item (@items){
518 my $record = Item2Marc($item, $record_number);
519 push @itemsrecord, $record->field($itemtag);
521 $record->insert_fields_ordered(@itemsrecord);
522 my $itemsxml = $record->as_xml_record();
524 substr($marcxml, 0, length($marcxml)-10) .
525 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
528 # extra test to ensure that result is valid XML; otherwise
529 # Zebra won't parse it in DOM mode
531 my $doc = $tester->parse_string($marcxml);
534 warn "Error exporting record $record_number ($record_type): $@\n";
538 $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
539 print {$fh} $marcxml;
544 my ($marc) = get_corrected_marc_record($record_type, $record_number);
547 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
549 my $doc = $tester->parse_string($rec);
552 die "invalid XML: $@";
554 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
559 warn "Error exporting record $record_number ($record_type) XML";
560 warn "... specific error is $@" if $verbose_logging;
564 print "\nRecords exported: $num_exported " . pretty_time() . "\n" if ( $verbose_logging );
565 print {$fh} $marcxml_close;
568 return $num_exported;
571 sub export_marc_records_from_list {
572 my ($record_type, $entries, $directory, $records_deleted) = @_;
574 my $num_exported = 0;
575 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
577 print {$fh} $marcxml_open;
581 # Skip any deleted records. We check for this anyway, but this reduces error spam
582 my %found = %$records_deleted;
583 foreach my $record_number ( map { $_->{biblio_auth_number} }
584 grep { !$found{ $_->{biblio_auth_number} }++ }
586 print "." if ( $verbose_logging );
587 print "\r$i" unless ($i++ %100 or !$verbose_logging);
588 my ($marc) = get_corrected_marc_record($record_type, $record_number);
591 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
592 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
597 warn "Error exporting record $record_number ($record_type) XML";
601 print "\nRecords exported: $num_exported " . pretty_time() . "\n" if ( $verbose_logging );
603 print {$fh} $marcxml_close;
606 return $num_exported;
609 sub generate_deleted_marc_records {
611 my ($record_type, $entries, $directory) = @_;
613 my $records_deleted = {};
614 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
616 print {$fh} $marcxml_open;
619 foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
620 print "\r$i" unless ($i++ %100 or !$verbose_logging);
621 print "." if ( $verbose_logging );
623 my $marc = MARC::Record->new();
624 if ($record_type eq 'biblio') {
625 fix_biblio_ids($marc, $record_number, $record_number);
627 fix_authority_id($marc, $record_number);
629 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
630 fix_unimarc_100($marc);
633 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
634 # Remove the record's XML header
635 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
638 $records_deleted->{$record_number} = 1;
640 print "\nRecords exported: $i " . pretty_time() . "\n" if ( $verbose_logging );
642 print {$fh} $marcxml_close;
645 return $records_deleted;
648 sub get_corrected_marc_record {
649 my ( $record_type, $record_number ) = @_;
651 my $marc = get_raw_marc_record( $record_type, $record_number );
653 if ( defined $marc ) {
655 if ( $record_type eq 'authority' ) {
656 fix_authority_id( $marc, $record_number );
658 elsif ( $record_type eq 'biblio' ) {
661 push @filters, 'EmbedItemsAvailability';
662 push @filters, 'EmbedSeeFromHeadings'
663 if C4::Context->preference('IncludeSeeFromInSearches');
664 push @filters, 'Index880InZebra';
666 my $normalizer = Koha::RecordProcessor->new( { filters => \@filters } );
667 $marc = $normalizer->process($marc);
669 if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
670 fix_unimarc_100($marc);
677 sub get_raw_marc_record {
678 my ($record_type, $record_number) = @_;
681 if ($record_type eq 'biblio') {
682 eval { $marc = C4::Biblio::GetMarcBiblio({ biblionumber => $record_number, embed_items => 1 }); };
684 # here we do warn since catching an exception
685 # means that the bib was found but failed
687 warn "error retrieving biblio $record_number";
691 eval { $marc = GetAuthority($record_number); };
693 warn "error retrieving authority $record_number";
701 # FIXME - this routine is suspect
702 # It blanks the Leader/00-05 and Leader/12-16 to
703 # force them to be recalculated correct when
704 # the $marc->as_usmarc() or $marc->as_xml() is called.
705 # But why is this necessary? It would be a serious bug
706 # in MARC::Record (definitely) and MARC::File::XML (arguably)
707 # if they are emitting incorrect leader values.
710 my $leader = $marc->leader;
711 substr($leader, 0, 5) = ' ';
712 substr($leader, 10, 7) = '22 ';
713 $marc->leader(substr($leader, 0, 24));
717 # FIXME - it is essential to ensure that the biblionumber is present,
718 # otherwise, Zebra will choke on the record. However, this
719 # logic belongs in the relevant C4::Biblio APIs.
721 my $biblionumber = shift;
722 my $biblioitemnumber;
724 $biblioitemnumber = shift;
726 my $sth = $dbh->prepare(
727 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
728 $sth->execute($biblionumber);
729 ($biblioitemnumber) = $sth->fetchrow_array;
731 unless ($biblioitemnumber) {
732 warn "failed to get biblioitemnumber for biblio $biblionumber";
737 # FIXME - this is cheating on two levels
738 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
739 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
740 # present in the MARC::Record object ought to be part of GetMarcBiblio.
742 # On the other hand, this better for now than what rebuild_zebra.pl used to
743 # do, which was duplicate the code for inserting the biblionumber
744 # and biblioitemnumber
745 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
750 sub fix_authority_id {
751 # FIXME - as with fix_biblio_ids, the authid must be present
752 # for Zebra's sake. However, this really belongs
753 # in C4::AuthoritiesMarc.
754 my ($marc, $authid) = @_;
755 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
756 $marc->delete_field($marc->field('001'));
757 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
761 sub fix_unimarc_100 {
762 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
766 my $length_100a = length($marc->subfield( 100, "a" ));
767 if ( $length_100a and $length_100a == 36 ) {
768 $string = $marc->subfield( 100, "a" );
769 my $f100 = $marc->field(100);
770 $marc->delete_field($f100);
773 $string = POSIX::strftime( "%Y%m%d", localtime );
775 $string = sprintf( "%-*s", 35, $string );
777 substr( $string, 22, 6, "frey50" );
778 $length_100a = length($marc->subfield( 100, "a" ));
779 unless ( $length_100a and $length_100a == 36 ) {
780 $marc->delete_field($marc->field(100));
781 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
786 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
788 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
789 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
790 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
791 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
795 if ($noshadow or $reset_index) {
799 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
800 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
801 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
805 # test if flock is present; if so, use it; if not, return true
806 # op refers to the official flock operations including LOCK_EX,
808 # combining LOCK_EX with LOCK_NB returns immediately
810 if( !defined($use_flock) ) {
811 #check if flock is present; if not, you will have a fatal error
812 my $lock_acquired = eval { flock($fh, $op) };
813 # assuming that $fh and $op are fine(..), an undef $lock_acquired
815 $use_flock = defined($lock_acquired) ? 1 : 0;
816 print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
817 return 1 if !$use_flock;
818 return $lock_acquired;
820 return 1 if !$use_flock;
821 return flock($fh, $op);
825 sub _create_lockfile { #returns undef on failure
828 eval { mkpath($dir, 0, oct(755)) };
831 return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME;
832 return ( $fh, $dir.'/'.LOCK_FILENAME );
838 my $elapsed = $now - $start_time;
847 my $now_pretty = POSIX::strftime("%H:%M:%S",localtime($now));
848 my $elapsed_pretty = sprintf "[%02d:%02d:%02d]",$h,$m,$s;
850 return "$now_pretty $elapsed_pretty";
855 $0: reindex MARC bibs and/or authorities in Zebra.
857 Use this batch job to reindex all biblio or authority
858 records in your Koha database.
862 -b index bibliographic records
864 -a index authority records
866 -daemon Run in daemon mode. The program will loop checking
867 for entries on the zebraqueue table, processing
868 them incrementally if present, and then sleep
869 for a few seconds before repeating the process
870 Checking the zebraqueue table is done with a cheap
871 SQL query. This allows for near realtime update of
872 the zebra search index with low system overhead.
873 Use -sleep to control the checking interval.
875 Daemon mode implies -z, -a, -b. The program will
876 refuse to start if options are present that do not
877 make sense while running as an incremental update
878 daemon (e.g. -r or -offset).
880 -sleep 10 Seconds to sleep between checks of the zebraqueue
881 table in daemon mode. The default is 5 seconds.
883 -z select only updated and deleted
884 records marked in the zebraqueue
885 table. Cannot be used with -r
888 --skip-deletes only select record updates, not record
889 deletions, to avoid potential excessive
890 I/O when zebraidx processes deletions.
891 If this option is used for normal indexing,
892 a cronjob should be set up to run
893 rebuild_zebra.pl -z without --skip-deletes
895 Only effective with -z.
897 -r clear Zebra index before
898 adding records to index. Implies -w.
900 -d Temporary directory for indexing.
901 If not specified, one is automatically
902 created. The export directory
903 is automatically deleted unless
904 you supply the -k switch.
906 -k Do not delete export directory.
908 -s Skip export. Used if you have
909 already exported the records
912 -nosanitize export biblio/authority records directly from DB marcxml
913 field without sanitizing records. It speed up
914 dump process but could fail if DB contains badly
915 encoded records. Works only with -x,
917 -w skip shadow indexing for this batch
919 -y do NOT clear zebraqueue after indexing; normally,
920 after doing batch indexing, zebraqueue should be
921 marked done for the affected record type(s) so that
922 a running zebraqueue_daemon doesn't try to reindex
923 the same records - specify -y to override this.
924 Cannot be used with -z.
926 -v increase the amount of logging. Normally only
927 warnings and errors from the indexing are shown.
928 Use log level 2 (-v -v) to include all Zebra logs.
930 --length 1234 how many biblio you want to export
931 --offset 1243 offset you want to start to
932 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
933 note that the numbers are NOT related to biblionumber, that's the intended behaviour.
934 --where let you specify a WHERE query, like itemtype='BOOK'
935 or something like that
937 --run-as-root explicitily allow script to run as 'root' user
939 --wait-for-lock when not running in daemon mode, the default
940 behavior is to abort a rebuild if the rebuild
941 lock is busy. This option will cause the program
942 to wait for the lock to free and then continue
943 processing the rebuild request,
945 --table specify a table (can be items, biblioitems, biblio, biblio_metadata) to retrieve biblionumber to index.
946 biblioitems is the default value.
948 --help or -h show this message.