4 #use warnings; FIXME - Bug 2505
9 use File::Temp qw/ tempdir /;
12 use C4::AuthoritiesMarc;
14 use Koha::RecordProcessor;
17 use constant LOCK_FILENAME => 'rebuild..LCK';
19 # script that checks zebradir structure & create directories & mandatory files if needed
23 $|=1; # flushes output
24 # If the cron job starts us in an unreadable dir, we will break without
26 chdir $ENV{HOME} if (!(-r '.'));
41 my $process_zebraqueue;
42 my $process_zebraqueue_skip_deletes;
43 my $do_not_clear_zebraqueue;
48 my $run_user = (getpwuid($<))[0];
49 my $wait_for_lock = 0;
52 my $verbose_logging = 0;
53 my $zebraidx_log_opt = " -v none,fatal,warn ";
54 my $result = GetOptions(
55 'daemon' => \$daemon_mode,
56 'sleep:i' => \$daemon_sleep,
61 'I|skip-index' => \$skip_index,
62 'nosanitize' => \$nosanitize,
67 'h|help' => \$want_help,
69 'y' => \$do_not_clear_zebraqueue,
70 'z' => \$process_zebraqueue,
71 'skip-deletes' => \$process_zebraqueue_skip_deletes,
73 'length:i' => \$length,
74 'offset:i' => \$offset,
75 'v+' => \$verbose_logging,
76 'run-as-root' => \$run_as_root,
77 'wait-for-lock' => \$wait_for_lock,
80 if (not $result or $want_help) {
85 if( not defined $run_as_root and $run_user eq 'root') {
86 my $msg = "Warning: You are running this script as the user 'root'.\n";
87 $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
88 $msg .= "Please do '$0 --help' to see usage.\n";
92 if ( !$as_xml and $nosanitize ) {
93 my $msg = "Cannot specify both -no_xml and -nosanitize\n";
94 $msg .= "Please do '$0 --help' to see usage.\n";
98 if ($process_zebraqueue and ($skip_export or $reset)) {
99 my $msg = "Cannot specify -r or -s if -z is specified\n";
100 $msg .= "Please do '$0 --help' to see usage.\n";
104 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
105 my $msg = "Cannot specify both -y and -z\n";
106 $msg .= "Please do '$0 --help' to see usage.\n";
119 # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
120 if ($skip_export or $keep_export or $skip_index or
121 $where or $length or $offset) {
122 my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n";
123 $msg .= "Please do '$0 --help' to see usage.\n";
128 $process_zebraqueue = 1;
131 if (not $biblios and not $authorities) {
132 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
133 $msg .= "Please do '$0 --help' to see usage.\n";
138 # -v is for verbose, which seems backwards here because of how logging is set
139 # on the CLI of zebraidx. It works this way. The default is to not log much
140 if ($verbose_logging >= 2) {
141 $zebraidx_log_opt = '-v none,fatal,warn,all';
145 unless ($directory) {
147 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
151 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
152 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
154 my $kohadir = C4::Context->config('intranetdir');
155 my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1';
156 my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom';
158 my $dbh = C4::Context->dbh;
159 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
160 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
162 # Protect again simultaneous update of the zebra index by using a lock file.
163 # Create our own lock directory if its missing. This shouild be created
164 # by koha-zebra-ctl.sh or at system installation. If the desired directory
165 # does not exist and cannot be created, we fall back on /tmp - which will
168 my ($lockfile, $LockFH);
170 C4::Context->config("zebra_lockdir"),
171 '/var/lock/zebra_' . C4::Context->config('database'),
172 '/tmp/zebra_' . C4::Context->config('database')
174 #we try three possibilities (we really want to lock :)
176 ($LockFH, $lockfile) = _create_lockfile($_.'/rebuild');
177 last if defined $LockFH;
179 if( !defined $LockFH ) {
180 print "WARNING: Could not create lock file $lockfile: $!\n";
181 print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n";
182 print "Verify file permissions for it too.\n";
183 $use_flock = 0; # we disable file locking now and will continue
185 # note that this mimics old behavior (before we used
189 if ( $verbose_logging ) {
190 print "Zebra configuration information\n";
191 print "================================\n";
192 print "Zebra biblio directory = $biblioserverdir\n";
193 print "Zebra authorities directory = $authorityserverdir\n";
194 print "Koha directory = $kohadir\n";
195 print "Lockfile = $lockfile\n" if $lockfile;
196 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
197 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
198 print "================================\n";
201 my $tester = XML::LibXML->new();
203 # The main work is done here by calling do_one_pass(). We have added locking
204 # avoid race conditions between full rebuilds and incremental updates either from
205 # daemon mode or periodic invocation from cron. The race can lead to an updated
206 # record being overwritten by a rebuild if the update is applied after the export
207 # by the rebuild and before the rebuild finishes (more likely to affect large
210 # We have chosen to exit immediately by default if we cannot obtain the lock
211 # to prevent the potential for a infinite backlog from cron invocations, but an
212 # option (wait-for-lock) is provided to let the program wait for the lock.
213 # See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
216 # For incremental updates, skip the update if the updates are locked
217 if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
218 do_one_pass() if ( zebraqueue_not_empty() );
219 _flock($LockFH, LOCK_UN);
224 # all one-off invocations
225 my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
226 if (_flock($LockFH, $lock_mode)) {
228 _flock($LockFH, LOCK_UN);
230 print "Skipping rebuild/update because flock failed on $lockfile: $!\n";
235 if ( $verbose_logging ) {
236 print "====================\n";
238 print "====================\n";
241 print "NOTHING cleaned : the export $directory has been kept.\n";
242 print "You can re-run this script with the -s ";
244 print " and -d $directory parameters";
249 print "if you just want to rebuild zebra after changing the record.abs\n";
250 print "or another zebra config file\n";
252 unless ($use_tempdir) {
253 # if we're using a temporary directory
254 # created by File::Temp, it will be removed
256 rmtree($directory, 0, 1);
257 print "directory $directory deleted\n";
263 index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
265 print "skipping authorities\n" if ( $verbose_logging );
269 index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
271 print "skipping biblios\n" if ( $verbose_logging );
275 # Check the zebra update queue and return true if there are records to process
276 # This routine will handle each of -ab, -a, or -b, but in practice we force
277 # -ab when in daemon mode.
278 sub zebraqueue_not_empty {
281 if ($authorities && $biblios) {
282 $where_str = 'done = 0;';
284 $where_str = 'server = "biblioserver" AND done = 0;';
286 $where_str = 'server = "authorityserver" AND done = 0;';
289 $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
292 my $count = $query->fetchrow_arrayref->[0];
293 print "queued records: $count\n" if $verbose_logging > 0;
297 # This checks to see if the zebra directories exist under the provided path.
298 # If they don't, then zebra is likely to spit the dummy. This returns true
299 # if the directories had to be created, false otherwise.
300 sub check_zebra_dirs {
301 my ($base) = shift() . '/';
302 my $needed_repairing = 0;
303 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
304 foreach my $dir (@dirs) {
305 my $bdir = $base . $dir;
307 $needed_repairing = 1;
308 mkdir $bdir || die "Unable to create '$bdir': $!\n";
309 print "$0: needed to create '$bdir'\n";
312 return $needed_repairing;
313 } # ---------- end of subroutine check_zebra_dirs ----------
316 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
318 my $num_records_exported = 0;
319 my $records_deleted = {};
320 my $need_reset = check_zebra_dirs($server_dir);
322 print "$0: found broken zebra server directories: forcing a rebuild\n";
325 if ($skip_export && $verbose_logging) {
326 print "====================\n";
327 print "SKIPPING $record_type export\n";
328 print "====================\n";
330 if ( $verbose_logging ) {
331 print "====================\n";
332 print "exporting $record_type\n";
333 print "====================\n";
335 mkdir "$directory" unless (-d $directory);
336 mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
337 if ($process_zebraqueue) {
340 unless ( $process_zebraqueue_skip_deletes ) {
341 $entries = select_zebraqueue_records($record_type, 'deleted');
342 mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
343 $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml);
344 mark_zebraqueue_batch_done($entries);
347 $entries = select_zebraqueue_records($record_type, 'updated');
348 mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
349 $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted);
350 mark_zebraqueue_batch_done($entries);
353 my $sth = select_all_records($record_type);
354 $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);
355 unless ($do_not_clear_zebraqueue) {
356 mark_all_zebraqueue_done($record_type);
362 # and reindexing everything
365 if ($verbose_logging) {
366 print "====================\n";
367 print "SKIPPING $record_type indexing\n";
368 print "====================\n";
371 if ( $verbose_logging ) {
372 print "====================\n";
373 print "REINDEXING zebra\n";
374 print "====================\n";
376 my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
377 if ($process_zebraqueue) {
378 do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
379 if %$records_deleted;
380 do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
381 if $num_records_exported;
383 do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
384 if ($num_records_exported or $skip_export);
390 sub select_zebraqueue_records {
391 my ($record_type, $update_type) = @_;
393 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
394 my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
396 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
402 $sth->execute($server, $op);
403 my $entries = $sth->fetchall_arrayref({});
406 sub mark_all_zebraqueue_done {
407 my ($record_type) = @_;
409 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
411 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
414 $sth->execute($server);
417 sub mark_zebraqueue_batch_done {
420 $dbh->{AutoCommit} = 0;
421 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
423 foreach my $id (map { $_->{id} } @$entries) {
426 $dbh->{AutoCommit} = 1;
429 sub select_all_records {
430 my $record_type = shift;
431 return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
434 sub select_all_authorities {
435 my $strsth=qq{SELECT authid FROM auth_header};
436 $strsth.=qq{ WHERE $where } if ($where);
437 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
438 $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
439 my $sth = $dbh->prepare($strsth);
444 sub select_all_biblios {
445 my $strsth = qq{ SELECT biblionumber FROM biblioitems };
446 $strsth.=qq{ WHERE $where } if ($where);
447 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
448 $strsth.=qq{ LIMIT $offset,$length } if ($offset);
449 my $sth = $dbh->prepare($strsth);
454 sub include_xml_wrapper {
456 my $record_type = shift;
458 return 0 unless $as_xml;
459 return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';
460 return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';
465 sub export_marc_records_from_sth {
466 my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
468 my $num_exported = 0;
469 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
470 if (include_xml_wrapper($as_xml, $record_type)) {
471 # include XML declaration and root element
472 print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
475 my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
476 while (my ($record_number) = $sth->fetchrow_array) {
477 print "." if ( $verbose_logging );
478 print "\r$i" unless ($i++ %100 or !$verbose_logging);
480 my $marcxml = $record_type eq 'biblio'
481 ? GetXmlBiblio( $record_number )
482 : GetAuthorityXML( $record_number );
483 if ($record_type eq 'biblio'){
484 my @items = GetItemsInfo($record_number);
486 my $record = MARC::Record->new;
487 $record->encoding('UTF-8');
489 foreach my $item (@items){
490 my $record = Item2Marc($item, $record_number);
491 push @itemsrecord, $record->field($itemtag);
493 $record->insert_fields_ordered(@itemsrecord);
494 my $itemsxml = $record->as_xml_record();
496 substr($marcxml, 0, length($marcxml)-10) .
497 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
500 # extra test to ensure that result is valid XML; otherwise
501 # Zebra won't parse it in DOM mode
503 my $doc = $tester->parse_string($marcxml);
506 warn "Error exporting record $record_number ($record_type): $@\n";
510 $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
511 print {$fh} $marcxml;
516 my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
521 $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
523 my $doc = $tester->parse_string($rec);
526 die "invalid XML: $@";
528 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
530 $rec = $marc->as_usmarc();
536 warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
537 warn "... specific error is $@" if $verbose_logging;
541 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
542 print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
544 return $num_exported;
547 sub export_marc_records_from_list {
548 my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
550 my $num_exported = 0;
551 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
552 if (include_xml_wrapper($as_xml, $record_type)) {
553 # include XML declaration and root element
554 print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
558 # Skip any deleted records. We check for this anyway, but this reduces error spam
559 my %found = %$records_deleted;
560 foreach my $record_number ( map { $_->{biblio_auth_number} }
561 grep { !$found{ $_->{biblio_auth_number} }++ }
563 print "." if ( $verbose_logging );
564 print "\r$i" unless ($i++ %100 or !$verbose_logging);
565 my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
570 $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
571 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
573 $rec = $marc->as_usmarc();
579 warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
583 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
584 print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
586 return $num_exported;
589 sub generate_deleted_marc_records {
590 my ($record_type, $entries, $directory, $as_xml) = @_;
592 my $records_deleted = {};
593 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
594 if (include_xml_wrapper($as_xml, $record_type)) {
595 # include XML declaration and root element
596 print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
599 foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
600 print "\r$i" unless ($i++ %100 or !$verbose_logging);
601 print "." if ( $verbose_logging );
603 my $marc = MARC::Record->new();
604 if ($record_type eq 'biblio') {
605 fix_biblio_ids($marc, $record_number, $record_number);
607 fix_authority_id($marc, $record_number);
609 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
610 fix_unimarc_100($marc);
615 $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
616 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
618 $rec = $marc->as_usmarc();
622 $records_deleted->{$record_number} = 1;
624 print "\nRecords exported: $i\n" if ( $verbose_logging );
625 print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
627 return $records_deleted;
632 sub get_corrected_marc_record {
633 my ($record_type, $record_number, $noxml) = @_;
635 my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
639 if ($record_type eq 'authority') {
640 fix_authority_id($marc, $record_number);
641 } elsif ($record_type eq 'biblio' && C4::Context->preference('IncludeSeeFromInSearches')) {
642 my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } );
643 $marc = $normalizer->process($marc);
645 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
646 fix_unimarc_100($marc);
653 sub get_raw_marc_record {
654 my ($record_type, $record_number, $noxml) = @_;
657 if ($record_type eq 'biblio') {
659 my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
660 $fetch_sth->execute($record_number);
661 if (my ($blob) = $fetch_sth->fetchrow_array) {
662 $marc = MARC::Record->new_from_usmarc($blob);
664 warn "error creating MARC::Record from $blob";
667 # failure to find a bib is not a problem -
668 # a delete could have been done before
669 # trying to process a record update
671 $fetch_sth->finish();
674 eval { $marc = GetMarcBiblio($record_number, 1); };
676 # here we do warn since catching an exception
677 # means that the bib was found but failed
679 warn "error retrieving biblio $record_number";
684 eval { $marc = GetAuthority($record_number); };
686 warn "error retrieving authority $record_number";
694 # FIXME - this routine is suspect
695 # It blanks the Leader/00-05 and Leader/12-16 to
696 # force them to be recalculated correct when
697 # the $marc->as_usmarc() or $marc->as_xml() is called.
698 # But why is this necessary? It would be a serious bug
699 # in MARC::Record (definitely) and MARC::File::XML (arguably)
700 # if they are emitting incorrect leader values.
703 my $leader = $marc->leader;
704 substr($leader, 0, 5) = ' ';
705 substr($leader, 10, 7) = '22 ';
706 $marc->leader(substr($leader, 0, 24));
710 # FIXME - it is essential to ensure that the biblionumber is present,
711 # otherwise, Zebra will choke on the record. However, this
712 # logic belongs in the relevant C4::Biblio APIs.
714 my $biblionumber = shift;
715 my $biblioitemnumber;
717 $biblioitemnumber = shift;
719 my $sth = $dbh->prepare(
720 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
721 $sth->execute($biblionumber);
722 ($biblioitemnumber) = $sth->fetchrow_array;
724 unless ($biblioitemnumber) {
725 warn "failed to get biblioitemnumber for biblio $biblionumber";
730 # FIXME - this is cheating on two levels
731 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
732 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
733 # present in the MARC::Record object ought to be part of GetMarcBiblio.
735 # On the other hand, this better for now than what rebuild_zebra.pl used to
736 # do, which was duplicate the code for inserting the biblionumber
737 # and biblioitemnumber
738 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
743 sub fix_authority_id {
744 # FIXME - as with fix_biblio_ids, the authid must be present
745 # for Zebra's sake. However, this really belongs
746 # in C4::AuthoritiesMarc.
747 my ($marc, $authid) = @_;
748 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
749 $marc->delete_field($marc->field('001'));
750 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
754 sub fix_unimarc_100 {
755 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
759 if ( length($marc->subfield( 100, "a" )) == 36 ) {
760 $string = $marc->subfield( 100, "a" );
761 my $f100 = $marc->field(100);
762 $marc->delete_field($f100);
765 $string = POSIX::strftime( "%Y%m%d", localtime );
767 $string = sprintf( "%-*s", 35, $string );
769 substr( $string, 22, 6, "frey50" );
770 unless ( length($marc->subfield( 100, "a" )) == 36 ) {
771 $marc->delete_field($marc->field(100));
772 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
777 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
779 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
780 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
781 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
782 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
784 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
785 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
786 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
791 # test if flock is present; if so, use it; if not, return true
792 # op refers to the official flock operations including LOCK_EX,
794 # combining LOCK_EX with LOCK_NB returns immediately
796 if( !defined($use_flock) ) {
797 #check if flock is present; if not, you will have a fatal error
798 my $lock_acquired = eval { flock($fh, $op) };
799 # assuming that $fh and $op are fine(..), an undef $lock_acquired
801 $use_flock = defined($lock_acquired) ? 1 : 0;
802 print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
803 return 1 if !$use_flock;
804 return $lock_acquired;
806 return 1 if !$use_flock;
807 return flock($fh, $op);
811 sub _create_lockfile { #returns undef on failure
814 eval { mkpath($dir, 0, oct(755)) };
817 return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME;
818 return ( $fh, $dir.'/'.LOCK_FILENAME );
823 $0: reindex MARC bibs and/or authorities in Zebra.
825 Use this batch job to reindex all biblio or authority
826 records in your Koha database.
830 -b index bibliographic records
832 -a index authority records
834 -daemon Run in daemon mode. The program will loop checking
835 for entries on the zebraqueue table, processing
836 them incrementally if present, and then sleep
837 for a few seconds before repeating the process
838 Checking the zebraqueue table is done with a cheap
839 SQL query. This allows for near realtime update of
840 the zebra search index with low system overhead.
841 Use -sleep to control the checking interval.
843 Daemon mode implies -z, -a, -b. The program will
844 refuse to start if options are present that do not
845 make sense while running as an incremental update
846 daemon (e.g. -r or -offset).
848 -sleep 10 Seconds to sleep between checks of the zebraqueue
849 table in daemon mode. The default is 5 seconds.
851 -z select only updated and deleted
852 records marked in the zebraqueue
853 table. Cannot be used with -r
856 --skip-deletes only select record updates, not record
857 deletions, to avoid potential excessive
858 I/O when zebraidx processes deletions.
859 If this option is used for normal indexing,
860 a cronjob should be set up to run
861 rebuild_zebra.pl -z without --skip-deletes
863 Only effective with -z.
865 -r clear Zebra index before
866 adding records to index. Implies -w.
868 -d Temporary directory for indexing.
869 If not specified, one is automatically
870 created. The export directory
871 is automatically deleted unless
872 you supply the -k switch.
874 -k Do not delete export directory.
876 -s Skip export. Used if you have
877 already exported the records
880 -noxml index from ISO MARC blob
881 instead of MARC XML. This
882 option is recommended only
885 -x export and index as xml instead of is02709 (biblios only).
886 use this if you might have records > 99,999 chars,
888 -nosanitize export biblio/authority records directly from DB marcxml
889 field without sanitizing records. It speed up
890 dump process but could fail if DB contains badly
891 encoded records. Works only with -x,
893 -w skip shadow indexing for this batch
895 -y do NOT clear zebraqueue after indexing; normally,
896 after doing batch indexing, zebraqueue should be
897 marked done for the affected record type(s) so that
898 a running zebraqueue_daemon doesn't try to reindex
899 the same records - specify -y to override this.
900 Cannot be used with -z.
902 -v increase the amount of logging. Normally only
903 warnings and errors from the indexing are shown.
904 Use log level 2 (-v -v) to include all Zebra logs.
906 --length 1234 how many biblio you want to export
907 --offset 1243 offset you want to start to
908 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
909 note that the numbers are NOT related to biblionumber, that's the intended behaviour.
910 --where let you specify a WHERE query, like itemtype='BOOK'
911 or something like that
913 --run-as-root explicitily allow script to run as 'root' user
915 --wait-for-lock when not running in daemon mode, the default
916 behavior is to abort a rebuild if the rebuild
917 lock is busy. This option will cause the program
918 to wait for the lock to free and then continue
919 processing the rebuild request,
921 --help or -h show this message.