3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
23 use File::Temp qw/ tempdir /;
26 use C4::AuthoritiesMarc;
28 use Koha::RecordProcessor;
32 use constant LOCK_FILENAME => 'rebuild..LCK';
34 # script that checks zebradir structure & create directories & mandatory files if needed
38 $|=1; # flushes output
39 # If the cron job starts us in an unreadable dir, we will break without
41 chdir $ENV{HOME} if (!(-r '.'));
55 my $process_zebraqueue;
56 my $process_zebraqueue_skip_deletes;
57 my $do_not_clear_zebraqueue;
62 my $run_user = (getpwuid($<))[0];
63 my $wait_for_lock = 0;
65 my $table = 'biblioitems';
66 my $is_memcached = Koha::Caches->get_instance->memcached_cache;
68 my $verbose_logging = 0;
69 my $zebraidx_log_opt = " -v none,fatal,warn ";
70 my $result = GetOptions(
71 'daemon' => \$daemon_mode,
72 'sleep:i' => \$daemon_sleep,
77 'I|skip-index' => \$skip_index,
78 'nosanitize' => \$nosanitize,
82 'h|help' => \$want_help,
84 'y' => \$do_not_clear_zebraqueue,
85 'z' => \$process_zebraqueue,
86 'skip-deletes' => \$process_zebraqueue_skip_deletes,
88 'length:i' => \$length,
89 'offset:i' => \$offset,
90 'v+' => \$verbose_logging,
91 'run-as-root' => \$run_as_root,
92 'wait-for-lock' => \$wait_for_lock,
93 't|table:s' => \$table,
96 if (not $result or $want_help) {
102 warn "Warning: You passed -x which is already the default and is now deprecated\n";
103 undef $as_xml; # Should not be used later
106 if( not defined $run_as_root and $run_user eq 'root') {
107 my $msg = "Warning: You are running this script as the user 'root'.\n";
108 $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
109 $msg .= "Please do '$0 --help' to see usage.\n";
113 if ($process_zebraqueue and ($skip_export or $reset)) {
114 my $msg = "Cannot specify -r or -s if -z is specified\n";
115 $msg .= "Please do '$0 --help' to see usage.\n";
119 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
120 my $msg = "Cannot specify both -y and -z\n";
121 $msg .= "Please do '$0 --help' to see usage.\n";
126 # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
127 if ($skip_export or $keep_export or $skip_index or
128 $where or $length or $offset) {
129 my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n";
130 $msg .= "Please do '$0 --help' to see usage.\n";
133 unless ($is_memcached) {
134 warn "Warning: script running in daemon mode, without recommended caching system (memcached).\n";
138 $process_zebraqueue = 1;
141 if (not $biblios and not $authorities) {
142 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
143 $msg .= "Please do '$0 --help' to see usage.\n";
147 our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio' );
148 unless ( grep { /^$table$/ } @tables_allowed_for_select ) {
149 die "Cannot specify -t|--table with value '$table'. Only "
150 . ( join ', ', @tables_allowed_for_select )
155 # -v is for verbose, which seems backwards here because of how logging is set
156 # on the CLI of zebraidx. It works this way. The default is to not log much
157 if ($verbose_logging >= 2) {
158 $zebraidx_log_opt = '-v none,fatal,warn,all';
162 unless ($directory) {
164 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
168 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
169 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
171 my $kohadir = C4::Context->config('intranetdir');
172 my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') // 'dom';
173 my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') // 'dom';
175 my ($biblionumbertagfield,$biblionumbertagsubfield) = C4::Biblio::GetMarcFromKohaField("biblio.biblionumber","");
176 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = C4::Biblio::GetMarcFromKohaField("biblioitems.biblioitemnumber","");
178 my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
179 <collection xmlns="http://www.loc.gov/MARC21/slim">
182 my $marcxml_close = q{
186 # Protect again simultaneous update of the zebra index by using a lock file.
187 # Create our own lock directory if it is missing. This should be created
188 # by koha-zebra-ctl.sh or at system installation. If the desired directory
189 # does not exist and cannot be created, we fall back on /tmp - which will
192 my ($lockfile, $LockFH);
194 C4::Context->config("zebra_lockdir"),
195 '/var/lock/zebra_' . C4::Context->config('database'),
196 '/tmp/zebra_' . C4::Context->config('database')
198 #we try three possibilities (we really want to lock :)
200 ($LockFH, $lockfile) = _create_lockfile($_.'/rebuild');
201 last if defined $LockFH;
203 if( !defined $LockFH ) {
204 print "WARNING: Could not create lock file $lockfile: $!\n";
205 print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n";
206 print "Verify file permissions for it too.\n";
207 $use_flock = 0; # we disable file locking now and will continue
209 # note that this mimics old behavior (before we used
213 if ( $verbose_logging ) {
214 print "Zebra configuration information\n";
215 print "================================\n";
216 print "Zebra biblio directory = $biblioserverdir\n";
217 print "Zebra authorities directory = $authorityserverdir\n";
218 print "Koha directory = $kohadir\n";
219 print "Lockfile = $lockfile\n" if $lockfile;
220 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
221 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
222 print "================================\n";
225 my $tester = XML::LibXML->new();
228 # The main work is done here by calling do_one_pass(). We have added locking
229 # avoid race conditions between full rebuilds and incremental updates either from
230 # daemon mode or periodic invocation from cron. The race can lead to an updated
231 # record being overwritten by a rebuild if the update is applied after the export
232 # by the rebuild and before the rebuild finishes (more likely to affect large
235 # We have chosen to exit immediately by default if we cannot obtain the lock
236 # to prevent the potential for a infinite backlog from cron invocations, but an
237 # option (wait-for-lock) is provided to let the program wait for the lock.
238 # See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
241 # For incremental updates, skip the update if the updates are locked
242 if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
244 $dbh = C4::Context->dbh;
245 if( zebraqueue_not_empty() ) {
246 Koha::Caches->flush_L1_caches() if $is_memcached;
250 if ($@ && $verbose_logging) {
251 warn "Warning : $@\n";
253 _flock($LockFH, LOCK_UN);
258 # all one-off invocations
259 my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
260 if (_flock($LockFH, $lock_mode)) {
261 $dbh = C4::Context->dbh;
263 _flock($LockFH, LOCK_UN);
265 print "Skipping rebuild/update because flock failed on $lockfile: $!\n";
270 if ( $verbose_logging ) {
271 print "====================\n";
273 print "====================\n";
276 print "NOTHING cleaned : the export $directory has been kept.\n";
277 print "You can re-run this script with the -s ";
279 print " and -d $directory parameters";
284 print "if you just want to rebuild zebra after changing the record.abs\n";
285 print "or another zebra config file\n";
287 unless ($use_tempdir) {
288 # if we're using a temporary directory
289 # created by File::Temp, it will be removed
291 rmtree($directory, 0, 1);
292 print "directory $directory deleted\n";
298 index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
300 print "skipping authorities\n" if ( $verbose_logging );
304 index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
306 print "skipping biblios\n" if ( $verbose_logging );
310 # Check the zebra update queue and return true if there are records to process
311 # This routine will handle each of -ab, -a, or -b, but in practice we force
312 # -ab when in daemon mode.
313 sub zebraqueue_not_empty {
316 if ($authorities && $biblios) {
317 $where_str = 'done = 0;';
319 $where_str = 'server = "biblioserver" AND done = 0;';
321 $where_str = 'server = "authorityserver" AND done = 0;';
324 $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
327 my $count = $query->fetchrow_arrayref->[0];
328 print "queued records: $count\n" if $verbose_logging > 0;
332 # This checks to see if the zebra directories exist under the provided path.
333 # If they don't, then zebra is likely to spit the dummy. This returns true
334 # if the directories had to be created, false otherwise.
335 sub check_zebra_dirs {
336 my ($base) = shift() . '/';
337 my $needed_repairing = 0;
338 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
339 foreach my $dir (@dirs) {
340 my $bdir = $base . $dir;
342 $needed_repairing = 1;
343 mkdir $bdir || die "Unable to create '$bdir': $!\n";
344 print "$0: needed to create '$bdir'\n";
347 return $needed_repairing;
348 } # ---------- end of subroutine check_zebra_dirs ----------
351 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
353 my $num_records_exported = 0;
354 my $records_deleted = {};
355 my $need_reset = check_zebra_dirs($server_dir);
357 print "$0: found broken zebra server directories: forcing a rebuild\n";
360 if ($skip_export && $verbose_logging) {
361 print "====================\n";
362 print "SKIPPING $record_type export\n";
363 print "====================\n";
365 if ( $verbose_logging ) {
366 print "====================\n";
367 print "exporting $record_type\n";
368 print "====================\n";
370 mkdir "$directory" unless (-d $directory);
371 mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
372 if ($process_zebraqueue) {
375 unless ( $process_zebraqueue_skip_deletes ) {
376 $entries = select_zebraqueue_records($record_type, 'deleted');
377 mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
378 $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type");
379 mark_zebraqueue_batch_done($entries);
382 $entries = select_zebraqueue_records($record_type, 'updated');
383 mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
384 $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted);
385 mark_zebraqueue_batch_done($entries);
388 my $sth = select_all_records($record_type);
389 $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize);
390 unless ($do_not_clear_zebraqueue) {
391 mark_all_zebraqueue_done($record_type);
397 # and reindexing everything
400 if ($verbose_logging) {
401 print "====================\n";
402 print "SKIPPING $record_type indexing\n";
403 print "====================\n";
406 if ( $verbose_logging ) {
407 print "====================\n";
408 print "REINDEXING zebra\n";
409 print "====================\n";
411 my $record_fmt = 'marcxml';
412 if ($process_zebraqueue) {
413 do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
414 if %$records_deleted;
415 do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
416 if $num_records_exported;
418 do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
419 if ($num_records_exported or $skip_export);
425 sub select_zebraqueue_records {
426 my ($record_type, $update_type) = @_;
428 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
429 my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
431 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
437 $sth->execute($server, $op);
438 my $entries = $sth->fetchall_arrayref({});
441 sub mark_all_zebraqueue_done {
442 my ($record_type) = @_;
444 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
446 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
449 $sth->execute($server);
452 sub mark_zebraqueue_batch_done {
455 $dbh->{AutoCommit} = 0;
456 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
458 foreach my $id (map { $_->{id} } @$entries) {
461 $dbh->{AutoCommit} = 1;
464 sub select_all_records {
465 my $record_type = shift;
466 return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
469 sub select_all_authorities {
470 my $strsth=qq{SELECT authid FROM auth_header};
471 $strsth.=qq{ WHERE $where } if ($where);
472 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
473 $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
474 my $sth = $dbh->prepare($strsth);
479 sub select_all_biblios {
480 $table = 'biblioitems'
481 unless grep { /^$table$/ } @tables_allowed_for_select;
482 my $strsth = qq{ SELECT biblionumber FROM $table };
483 $strsth.=qq{ WHERE $where } if ($where);
484 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
485 $strsth.=qq{ LIMIT $offset,$length } if ($offset);
486 my $sth = $dbh->prepare($strsth);
491 sub export_marc_records_from_sth {
492 my ($record_type, $sth, $directory, $nosanitize) = @_;
494 my $num_exported = 0;
495 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
497 print {$fh} $marcxml_open;
500 my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField("items.itemnumber",'');
501 while (my ($record_number) = $sth->fetchrow_array) {
502 print "." if ( $verbose_logging );
503 print "\r$i" unless ($i++ %100 or !$verbose_logging);
505 my $marcxml = $record_type eq 'biblio'
506 ? GetXmlBiblio( $record_number )
507 : GetAuthorityXML( $record_number );
508 if ($record_type eq 'biblio'){
509 my @items = GetItemsInfo($record_number);
511 my $record = MARC::Record->new;
512 $record->encoding('UTF-8');
514 foreach my $item (@items){
515 my $record = Item2Marc($item, $record_number);
516 push @itemsrecord, $record->field($itemtag);
518 $record->insert_fields_ordered(@itemsrecord);
519 my $itemsxml = $record->as_xml_record();
521 substr($marcxml, 0, length($marcxml)-10) .
522 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
525 # extra test to ensure that result is valid XML; otherwise
526 # Zebra won't parse it in DOM mode
528 my $doc = $tester->parse_string($marcxml);
531 warn "Error exporting record $record_number ($record_type): $@\n";
535 $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
536 print {$fh} $marcxml;
541 my ($marc) = get_corrected_marc_record($record_type, $record_number);
544 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
546 my $doc = $tester->parse_string($rec);
549 die "invalid XML: $@";
551 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
556 warn "Error exporting record $record_number ($record_type) XML";
557 warn "... specific error is $@" if $verbose_logging;
561 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
562 print {$fh} $marcxml_close;
565 return $num_exported;
568 sub export_marc_records_from_list {
569 my ($record_type, $entries, $directory, $records_deleted) = @_;
571 my $num_exported = 0;
572 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
574 print {$fh} $marcxml_open;
578 # Skip any deleted records. We check for this anyway, but this reduces error spam
579 my %found = %$records_deleted;
580 foreach my $record_number ( map { $_->{biblio_auth_number} }
581 grep { !$found{ $_->{biblio_auth_number} }++ }
583 print "." if ( $verbose_logging );
584 print "\r$i" unless ($i++ %100 or !$verbose_logging);
585 my ($marc) = get_corrected_marc_record($record_type, $record_number);
588 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
589 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
594 warn "Error exporting record $record_number ($record_type) XML";
598 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
600 print {$fh} $marcxml_close;
603 return $num_exported;
606 sub generate_deleted_marc_records {
608 my ($record_type, $entries, $directory) = @_;
610 my $records_deleted = {};
611 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
613 print {$fh} $marcxml_open;
616 foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
617 print "\r$i" unless ($i++ %100 or !$verbose_logging);
618 print "." if ( $verbose_logging );
620 my $marc = MARC::Record->new();
621 if ($record_type eq 'biblio') {
622 fix_biblio_ids($marc, $record_number, $record_number);
624 fix_authority_id($marc, $record_number);
626 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
627 fix_unimarc_100($marc);
630 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
631 # Remove the record's XML header
632 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
635 $records_deleted->{$record_number} = 1;
637 print "\nRecords exported: $i\n" if ( $verbose_logging );
639 print {$fh} $marcxml_close;
642 return $records_deleted;
645 sub get_corrected_marc_record {
646 my ( $record_type, $record_number ) = @_;
648 my $marc = get_raw_marc_record( $record_type, $record_number );
650 if ( defined $marc ) {
652 if ( $record_type eq 'authority' ) {
653 fix_authority_id( $marc, $record_number );
655 elsif ( $record_type eq 'biblio' ) {
658 push @filters, 'EmbedItemsAvailability';
659 push @filters, 'EmbedSeeFromHeadings'
660 if C4::Context->preference('IncludeSeeFromInSearches');
662 my $normalizer = Koha::RecordProcessor->new( { filters => \@filters } );
663 $marc = $normalizer->process($marc);
665 if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
666 fix_unimarc_100($marc);
673 sub get_raw_marc_record {
674 my ($record_type, $record_number) = @_;
677 if ($record_type eq 'biblio') {
678 eval { $marc = C4::Biblio::GetMarcBiblio({ biblionumber => $record_number, embed_items => 1 }); };
680 # here we do warn since catching an exception
681 # means that the bib was found but failed
683 warn "error retrieving biblio $record_number";
687 eval { $marc = GetAuthority($record_number); };
689 warn "error retrieving authority $record_number";
697 # FIXME - this routine is suspect
698 # It blanks the Leader/00-05 and Leader/12-16 to
699 # force them to be recalculated correct when
700 # the $marc->as_usmarc() or $marc->as_xml() is called.
701 # But why is this necessary? It would be a serious bug
702 # in MARC::Record (definitely) and MARC::File::XML (arguably)
703 # if they are emitting incorrect leader values.
706 my $leader = $marc->leader;
707 substr($leader, 0, 5) = ' ';
708 substr($leader, 10, 7) = '22 ';
709 $marc->leader(substr($leader, 0, 24));
713 # FIXME - it is essential to ensure that the biblionumber is present,
714 # otherwise, Zebra will choke on the record. However, this
715 # logic belongs in the relevant C4::Biblio APIs.
717 my $biblionumber = shift;
718 my $biblioitemnumber;
720 $biblioitemnumber = shift;
722 my $sth = $dbh->prepare(
723 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
724 $sth->execute($biblionumber);
725 ($biblioitemnumber) = $sth->fetchrow_array;
727 unless ($biblioitemnumber) {
728 warn "failed to get biblioitemnumber for biblio $biblionumber";
733 # FIXME - this is cheating on two levels
734 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
735 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
736 # present in the MARC::Record object ought to be part of GetMarcBiblio.
738 # On the other hand, this better for now than what rebuild_zebra.pl used to
739 # do, which was duplicate the code for inserting the biblionumber
740 # and biblioitemnumber
741 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
746 sub fix_authority_id {
747 # FIXME - as with fix_biblio_ids, the authid must be present
748 # for Zebra's sake. However, this really belongs
749 # in C4::AuthoritiesMarc.
750 my ($marc, $authid) = @_;
751 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
752 $marc->delete_field($marc->field('001'));
753 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
757 sub fix_unimarc_100 {
758 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
762 my $length_100a = length($marc->subfield( 100, "a" ));
763 if ( $length_100a and $length_100a == 36 ) {
764 $string = $marc->subfield( 100, "a" );
765 my $f100 = $marc->field(100);
766 $marc->delete_field($f100);
769 $string = POSIX::strftime( "%Y%m%d", localtime );
771 $string = sprintf( "%-*s", 35, $string );
773 substr( $string, 22, 6, "frey50" );
774 $length_100a = length($marc->subfield( 100, "a" ));
775 unless ( $length_100a and $length_100a == 36 ) {
776 $marc->delete_field($marc->field(100));
777 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
782 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
784 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
785 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
786 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
787 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
791 if ($noshadow or $reset_index) {
795 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
796 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
797 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
801 # test if flock is present; if so, use it; if not, return true
802 # op refers to the official flock operations including LOCK_EX,
804 # combining LOCK_EX with LOCK_NB returns immediately
806 if( !defined($use_flock) ) {
807 #check if flock is present; if not, you will have a fatal error
808 my $lock_acquired = eval { flock($fh, $op) };
809 # assuming that $fh and $op are fine(..), an undef $lock_acquired
811 $use_flock = defined($lock_acquired) ? 1 : 0;
812 print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
813 return 1 if !$use_flock;
814 return $lock_acquired;
816 return 1 if !$use_flock;
817 return flock($fh, $op);
821 sub _create_lockfile { #returns undef on failure
824 eval { mkpath($dir, 0, oct(755)) };
827 return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME;
828 return ( $fh, $dir.'/'.LOCK_FILENAME );
833 $0: reindex MARC bibs and/or authorities in Zebra.
835 Use this batch job to reindex all biblio or authority
836 records in your Koha database.
840 -b index bibliographic records
842 -a index authority records
844 -daemon Run in daemon mode. The program will loop checking
845 for entries on the zebraqueue table, processing
846 them incrementally if present, and then sleep
847 for a few seconds before repeating the process
848 Checking the zebraqueue table is done with a cheap
849 SQL query. This allows for near realtime update of
850 the zebra search index with low system overhead.
851 Use -sleep to control the checking interval.
853 Daemon mode implies -z, -a, -b. The program will
854 refuse to start if options are present that do not
855 make sense while running as an incremental update
856 daemon (e.g. -r or -offset).
858 -sleep 10 Seconds to sleep between checks of the zebraqueue
859 table in daemon mode. The default is 5 seconds.
861 -z select only updated and deleted
862 records marked in the zebraqueue
863 table. Cannot be used with -r
866 --skip-deletes only select record updates, not record
867 deletions, to avoid potential excessive
868 I/O when zebraidx processes deletions.
869 If this option is used for normal indexing,
870 a cronjob should be set up to run
871 rebuild_zebra.pl -z without --skip-deletes
873 Only effective with -z.
875 -r clear Zebra index before
876 adding records to index. Implies -w.
878 -d Temporary directory for indexing.
879 If not specified, one is automatically
880 created. The export directory
881 is automatically deleted unless
882 you supply the -k switch.
884 -k Do not delete export directory.
886 -s Skip export. Used if you have
887 already exported the records
890 -nosanitize export biblio/authority records directly from DB marcxml
891 field without sanitizing records. It speed up
892 dump process but could fail if DB contains badly
893 encoded records. Works only with -x,
895 -w skip shadow indexing for this batch
897 -y do NOT clear zebraqueue after indexing; normally,
898 after doing batch indexing, zebraqueue should be
899 marked done for the affected record type(s) so that
900 a running zebraqueue_daemon doesn't try to reindex
901 the same records - specify -y to override this.
902 Cannot be used with -z.
904 -v increase the amount of logging. Normally only
905 warnings and errors from the indexing are shown.
906 Use log level 2 (-v -v) to include all Zebra logs.
908 --length 1234 how many biblio you want to export
909 --offset 1243 offset you want to start to
910 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
911 note that the numbers are NOT related to biblionumber, that's the intended behaviour.
912 --where let you specify a WHERE query, like itemtype='BOOK'
913 or something like that
915 --run-as-root explicitily allow script to run as 'root' user
917 --wait-for-lock when not running in daemon mode, the default
918 behavior is to abort a rebuild if the rebuild
919 lock is busy. This option will cause the program
920 to wait for the lock to free and then continue
921 processing the rebuild request,
923 --table specify a table (can be items, biblioitems or biblio) to retrieve biblionumber to index.
924 biblioitems is the default value.
926 --help or -h show this message.