3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use File::Temp qw/ tempdir /;
27 use C4::AuthoritiesMarc;
29 use Koha::RecordProcessor;
33 use constant LOCK_FILENAME => 'rebuild..LCK';
35 # script that checks zebradir structure & create directories & mandatory files if needed
39 $|=1; # flushes output
40 # If the cron job starts us in an unreadable dir, we will break without
42 chdir $ENV{HOME} if (!(-r '.'));
56 my $process_zebraqueue;
57 my $process_zebraqueue_skip_deletes;
58 my $do_not_clear_zebraqueue;
63 my $run_user = (getpwuid($<))[0];
64 my $wait_for_lock = 0;
66 my $table = 'biblioitems';
67 my $is_memcached = Koha::Caches->get_instance->memcached_cache;
69 my $verbose_logging = 0;
70 my $zebraidx_log_opt = " -v none,fatal,warn ";
71 my $result = GetOptions(
72 'daemon' => \$daemon_mode,
73 'sleep:i' => \$daemon_sleep,
78 'I|skip-index' => \$skip_index,
79 'nosanitize' => \$nosanitize,
83 'h|help' => \$want_help,
85 'y' => \$do_not_clear_zebraqueue,
86 'z' => \$process_zebraqueue,
87 'skip-deletes' => \$process_zebraqueue_skip_deletes,
89 'length:i' => \$length,
90 'offset:i' => \$offset,
91 'v+' => \$verbose_logging,
92 'run-as-root' => \$run_as_root,
93 'wait-for-lock' => \$wait_for_lock,
94 't|table:s' => \$table,
97 if (not $result or $want_help) {
103 warn "Warning: You passed -x which is already the default and is now deprecated\n";
104 undef $as_xml; # Should not be used later
107 if( not defined $run_as_root and $run_user eq 'root') {
108 my $msg = "Warning: You are running this script as the user 'root'.\n";
109 $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
110 $msg .= "Please do '$0 --help' to see usage.\n";
114 if ($process_zebraqueue and ($skip_export or $reset)) {
115 my $msg = "Cannot specify -r or -s if -z is specified\n";
116 $msg .= "Please do '$0 --help' to see usage.\n";
120 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
121 my $msg = "Cannot specify both -y and -z\n";
122 $msg .= "Please do '$0 --help' to see usage.\n";
127 # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
128 if ($skip_export or $keep_export or $skip_index or
129 $where or $length or $offset) {
130 my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n";
131 $msg .= "Please do '$0 --help' to see usage.\n";
134 unless ($is_memcached) {
135 warn "Warning: script running in daemon mode, without recommended caching system (memcached).\n";
139 $process_zebraqueue = 1;
142 if (not $biblios and not $authorities) {
143 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
144 $msg .= "Please do '$0 --help' to see usage.\n";
148 our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio', 'biblio_metadata' );
149 unless ( grep { $_ eq $table } @tables_allowed_for_select ) {
150 die "Cannot specify -t|--table with value '$table'. Only "
151 . ( join ', ', @tables_allowed_for_select )
156 # -v is for verbose, which seems backwards here because of how logging is set
157 # on the CLI of zebraidx. It works this way. The default is to not log much
158 if ($verbose_logging >= 2) {
159 $zebraidx_log_opt = '-v none,fatal,warn,all';
163 unless ($directory) {
165 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
169 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
170 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
172 my $kohadir = C4::Context->config('intranetdir');
174 my ($biblionumbertagfield,$biblionumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblio.biblionumber" );
175 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblioitems.biblioitemnumber" );
177 my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
178 <collection xmlns="http://www.loc.gov/MARC21/slim">
181 my $marcxml_close = q{
185 # Protect again simultaneous update of the zebra index by using a lock file.
186 # Create our own lock directory if it is missing. This should be created
187 # by koha-zebra-ctl.sh or at system installation. If the desired directory
188 # does not exist and cannot be created, we fall back on /tmp - which will
191 my ($lockfile, $LockFH);
193 C4::Context->config("zebra_lockdir"),
194 '/var/lock/zebra_' . C4::Context->config('database'),
195 '/tmp/zebra_' . C4::Context->config('database')
197 #we try three possibilities (we really want to lock :)
199 ($LockFH, $lockfile) = _create_lockfile($_.'/rebuild');
200 last if defined $LockFH;
202 if( !defined $LockFH ) {
203 print "WARNING: Could not create lock file $lockfile: $!\n";
204 print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n";
205 print "Verify file permissions for it too.\n";
206 $use_flock = 0; # we disable file locking now and will continue
208 # note that this mimics old behavior (before we used
212 if ( $verbose_logging ) {
213 print "Zebra configuration information\n";
214 print "================================\n";
215 print "Zebra biblio directory = $biblioserverdir\n";
216 print "Zebra authorities directory = $authorityserverdir\n";
217 print "Koha directory = $kohadir\n";
218 print "Lockfile = $lockfile\n" if $lockfile;
219 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
220 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
221 print "================================\n";
224 my $tester = XML::LibXML->new();
227 # The main work is done here by calling do_one_pass(). We have added locking
228 # avoid race conditions between full rebuilds and incremental updates either from
229 # daemon mode or periodic invocation from cron. The race can lead to an updated
230 # record being overwritten by a rebuild if the update is applied after the export
231 # by the rebuild and before the rebuild finishes (more likely to affect large
234 # We have chosen to exit immediately by default if we cannot obtain the lock
235 # to prevent the potential for a infinite backlog from cron invocations, but an
236 # option (wait-for-lock) is provided to let the program wait for the lock.
237 # See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
240 # For incremental updates, skip the update if the updates are locked
241 if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
243 $dbh = C4::Context->dbh;
244 if( zebraqueue_not_empty() ) {
245 Koha::Caches->flush_L1_caches() if $is_memcached;
249 if ($@ && $verbose_logging) {
250 warn "Warning : $@\n";
252 _flock($LockFH, LOCK_UN);
257 # all one-off invocations
258 my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
259 if (_flock($LockFH, $lock_mode)) {
260 $dbh = C4::Context->dbh;
262 _flock($LockFH, LOCK_UN);
264 print "Skipping rebuild/update because flock failed on $lockfile: $!\n";
269 if ( $verbose_logging ) {
270 print "====================\n";
272 print "====================\n";
275 print "NOTHING cleaned : the export $directory has been kept.\n";
276 print "You can re-run this script with the -s ";
278 print " and -d $directory parameters";
283 print "if you just want to rebuild zebra after changing zebra config files\n";
285 unless ($use_tempdir) {
286 # if we're using a temporary directory
287 # created by File::Temp, it will be removed
289 rmtree($directory, 0, 1);
290 print "directory $directory deleted\n";
296 index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
298 print "skipping authorities\n" if ( $verbose_logging );
302 index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
304 print "skipping biblios\n" if ( $verbose_logging );
308 # Check the zebra update queue and return true if there are records to process
309 # This routine will handle each of -ab, -a, or -b, but in practice we force
310 # -ab when in daemon mode.
311 sub zebraqueue_not_empty {
314 if ($authorities && $biblios) {
315 $where_str = 'done = 0;';
317 $where_str = 'server = "biblioserver" AND done = 0;';
319 $where_str = 'server = "authorityserver" AND done = 0;';
322 $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
325 my $count = $query->fetchrow_arrayref->[0];
326 print "queued records: $count\n" if $verbose_logging > 0;
330 # This checks to see if the zebra directories exist under the provided path.
331 # If they don't, then zebra is likely to spit the dummy. This returns true
332 # if the directories had to be created, false otherwise.
333 sub check_zebra_dirs {
334 my ($base) = shift() . '/';
335 my $needed_repairing = 0;
336 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
337 foreach my $dir (@dirs) {
338 my $bdir = $base . $dir;
340 $needed_repairing = 1;
341 mkdir $bdir || die "Unable to create '$bdir': $!\n";
342 print "$0: needed to create '$bdir'\n";
345 return $needed_repairing;
346 } # ---------- end of subroutine check_zebra_dirs ----------
349 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
351 my $num_records_exported = 0;
352 my $records_deleted = {};
353 my $need_reset = check_zebra_dirs($server_dir);
355 print "$0: found broken zebra server directories: forcing a rebuild\n";
358 if ($skip_export && $verbose_logging) {
359 print "====================\n";
360 print "SKIPPING $record_type export\n";
361 print "====================\n";
363 if ( $verbose_logging ) {
364 print "====================\n";
365 print "exporting $record_type\n";
366 print "====================\n";
368 mkdir "$directory" unless (-d $directory);
369 mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
370 if ($process_zebraqueue) {
373 unless ( $process_zebraqueue_skip_deletes ) {
374 $entries = select_zebraqueue_records($record_type, 'deleted');
375 mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
376 $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type");
377 mark_zebraqueue_batch_done($entries);
380 $entries = select_zebraqueue_records($record_type, 'updated');
381 mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
382 $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted);
383 mark_zebraqueue_batch_done($entries);
386 my $sth = select_all_records($record_type);
387 $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize);
388 unless ($do_not_clear_zebraqueue) {
389 mark_all_zebraqueue_done($record_type);
395 # and reindexing everything
398 if ($verbose_logging) {
399 print "====================\n";
400 print "SKIPPING $record_type indexing\n";
401 print "====================\n";
404 if ( $verbose_logging ) {
405 print "====================\n";
406 print "REINDEXING zebra\n";
407 print "====================\n";
409 my $record_fmt = 'marcxml';
410 if ($process_zebraqueue) {
411 do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
412 if %$records_deleted;
413 do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
414 if $num_records_exported;
416 do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
417 if ($num_records_exported or $skip_export);
423 sub select_zebraqueue_records {
424 my ($record_type, $update_type) = @_;
426 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
427 my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
429 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
435 $sth->execute($server, $op);
436 my $entries = $sth->fetchall_arrayref({});
439 sub mark_all_zebraqueue_done {
440 my ($record_type) = @_;
442 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
444 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
447 $sth->execute($server);
450 sub mark_zebraqueue_batch_done {
453 $dbh->{AutoCommit} = 0;
454 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
456 foreach my $id (map { $_->{id} } @$entries) {
459 $dbh->{AutoCommit} = 1;
462 sub select_all_records {
463 my $record_type = shift;
464 return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
467 sub select_all_authorities {
468 my $strsth=qq{SELECT authid FROM auth_header};
469 $strsth.=qq{ WHERE $where } if ($where);
470 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
471 $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
472 my $sth = $dbh->prepare($strsth);
477 sub select_all_biblios {
478 $table = 'biblioitems'
479 unless grep { $_ eq $table } @tables_allowed_for_select;
480 my $strsth = qq{ SELECT DISTINCT biblionumber FROM $table };
481 $strsth.=qq{ WHERE $where } if ($where);
482 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
483 $strsth.=qq{ LIMIT $offset,$length } if ($offset);
484 my $sth = $dbh->prepare($strsth);
489 sub export_marc_records_from_sth {
490 my ($record_type, $sth, $directory, $nosanitize) = @_;
492 my $num_exported = 0;
493 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
495 print {$fh} $marcxml_open;
498 my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField( "items.itemnumber" );
499 while (my ($record_number) = $sth->fetchrow_array) {
500 print "." if ( $verbose_logging );
501 print "\r$i" unless ($i++ %100 or !$verbose_logging);
503 my $marcxml = $record_type eq 'biblio'
504 ? GetXmlBiblio( $record_number )
505 : GetAuthorityXML( $record_number );
506 if ($record_type eq 'biblio'){
507 my @items = GetItemsInfo($record_number);
509 my $record = MARC::Record->new;
510 $record->encoding('UTF-8');
512 foreach my $item (@items){
513 my $record = Item2Marc($item, $record_number);
514 push @itemsrecord, $record->field($itemtag);
516 $record->insert_fields_ordered(@itemsrecord);
517 my $itemsxml = $record->as_xml_record();
519 substr($marcxml, 0, length($marcxml)-10) .
520 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
523 # extra test to ensure that result is valid XML; otherwise
524 # Zebra won't parse it in DOM mode
526 my $doc = $tester->parse_string($marcxml);
529 warn "Error exporting record $record_number ($record_type): $@\n";
533 $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
534 print {$fh} $marcxml;
539 my ($marc) = get_corrected_marc_record($record_type, $record_number);
542 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
544 my $doc = $tester->parse_string($rec);
547 die "invalid XML: $@";
549 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
554 warn "Error exporting record $record_number ($record_type) XML";
555 warn "... specific error is $@" if $verbose_logging;
559 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
560 print {$fh} $marcxml_close;
563 return $num_exported;
566 sub export_marc_records_from_list {
567 my ($record_type, $entries, $directory, $records_deleted) = @_;
569 my $num_exported = 0;
570 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
572 print {$fh} $marcxml_open;
576 # Skip any deleted records. We check for this anyway, but this reduces error spam
577 my %found = %$records_deleted;
578 foreach my $record_number ( map { $_->{biblio_auth_number} }
579 grep { !$found{ $_->{biblio_auth_number} }++ }
581 print "." if ( $verbose_logging );
582 print "\r$i" unless ($i++ %100 or !$verbose_logging);
583 my ($marc) = get_corrected_marc_record($record_type, $record_number);
586 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
587 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
592 warn "Error exporting record $record_number ($record_type) XML";
596 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
598 print {$fh} $marcxml_close;
601 return $num_exported;
604 sub generate_deleted_marc_records {
606 my ($record_type, $entries, $directory) = @_;
608 my $records_deleted = {};
609 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
611 print {$fh} $marcxml_open;
614 foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
615 print "\r$i" unless ($i++ %100 or !$verbose_logging);
616 print "." if ( $verbose_logging );
618 my $marc = MARC::Record->new();
619 if ($record_type eq 'biblio') {
620 fix_biblio_ids($marc, $record_number, $record_number);
622 fix_authority_id($marc, $record_number);
624 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
625 fix_unimarc_100($marc);
628 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
629 # Remove the record's XML header
630 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
633 $records_deleted->{$record_number} = 1;
635 print "\nRecords exported: $i\n" if ( $verbose_logging );
637 print {$fh} $marcxml_close;
640 return $records_deleted;
643 sub get_corrected_marc_record {
644 my ( $record_type, $record_number ) = @_;
646 my $marc = get_raw_marc_record( $record_type, $record_number );
648 if ( defined $marc ) {
650 if ( $record_type eq 'authority' ) {
651 fix_authority_id( $marc, $record_number );
653 elsif ( $record_type eq 'biblio' ) {
656 push @filters, 'EmbedItemsAvailability';
657 push @filters, 'EmbedSeeFromHeadings'
658 if C4::Context->preference('IncludeSeeFromInSearches');
660 my $normalizer = Koha::RecordProcessor->new( { filters => \@filters } );
661 $marc = $normalizer->process($marc);
663 if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
664 fix_unimarc_100($marc);
671 sub get_raw_marc_record {
672 my ($record_type, $record_number) = @_;
675 if ($record_type eq 'biblio') {
676 eval { $marc = C4::Biblio::GetMarcBiblio({ biblionumber => $record_number, embed_items => 1 }); };
678 # here we do warn since catching an exception
679 # means that the bib was found but failed
681 warn "error retrieving biblio $record_number";
685 eval { $marc = GetAuthority($record_number); };
687 warn "error retrieving authority $record_number";
695 # FIXME - this routine is suspect
696 # It blanks the Leader/00-05 and Leader/12-16 to
697 # force them to be recalculated correct when
698 # the $marc->as_usmarc() or $marc->as_xml() is called.
699 # But why is this necessary? It would be a serious bug
700 # in MARC::Record (definitely) and MARC::File::XML (arguably)
701 # if they are emitting incorrect leader values.
704 my $leader = $marc->leader;
705 substr($leader, 0, 5) = ' ';
706 substr($leader, 10, 7) = '22 ';
707 $marc->leader(substr($leader, 0, 24));
711 # FIXME - it is essential to ensure that the biblionumber is present,
712 # otherwise, Zebra will choke on the record. However, this
713 # logic belongs in the relevant C4::Biblio APIs.
715 my $biblionumber = shift;
716 my $biblioitemnumber;
718 $biblioitemnumber = shift;
720 my $sth = $dbh->prepare(
721 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
722 $sth->execute($biblionumber);
723 ($biblioitemnumber) = $sth->fetchrow_array;
725 unless ($biblioitemnumber) {
726 warn "failed to get biblioitemnumber for biblio $biblionumber";
731 # FIXME - this is cheating on two levels
732 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
733 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
734 # present in the MARC::Record object ought to be part of GetMarcBiblio.
736 # On the other hand, this better for now than what rebuild_zebra.pl used to
737 # do, which was duplicate the code for inserting the biblionumber
738 # and biblioitemnumber
739 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
744 sub fix_authority_id {
745 # FIXME - as with fix_biblio_ids, the authid must be present
746 # for Zebra's sake. However, this really belongs
747 # in C4::AuthoritiesMarc.
748 my ($marc, $authid) = @_;
749 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
750 $marc->delete_field($marc->field('001'));
751 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
755 sub fix_unimarc_100 {
756 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
760 my $length_100a = length($marc->subfield( 100, "a" ));
761 if ( $length_100a and $length_100a == 36 ) {
762 $string = $marc->subfield( 100, "a" );
763 my $f100 = $marc->field(100);
764 $marc->delete_field($f100);
767 $string = POSIX::strftime( "%Y%m%d", localtime );
769 $string = sprintf( "%-*s", 35, $string );
771 substr( $string, 22, 6, "frey50" );
772 $length_100a = length($marc->subfield( 100, "a" ));
773 unless ( $length_100a and $length_100a == 36 ) {
774 $marc->delete_field($marc->field(100));
775 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
780 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
782 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
783 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
784 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
785 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
789 if ($noshadow or $reset_index) {
793 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
794 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
795 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
799 # test if flock is present; if so, use it; if not, return true
800 # op refers to the official flock operations including LOCK_EX,
802 # combining LOCK_EX with LOCK_NB returns immediately
804 if( !defined($use_flock) ) {
805 #check if flock is present; if not, you will have a fatal error
806 my $lock_acquired = eval { flock($fh, $op) };
807 # assuming that $fh and $op are fine(..), an undef $lock_acquired
809 $use_flock = defined($lock_acquired) ? 1 : 0;
810 print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
811 return 1 if !$use_flock;
812 return $lock_acquired;
814 return 1 if !$use_flock;
815 return flock($fh, $op);
819 sub _create_lockfile { #returns undef on failure
822 eval { mkpath($dir, 0, oct(755)) };
825 return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME;
826 return ( $fh, $dir.'/'.LOCK_FILENAME );
831 $0: reindex MARC bibs and/or authorities in Zebra.
833 Use this batch job to reindex all biblio or authority
834 records in your Koha database.
838 -b index bibliographic records
840 -a index authority records
842 -daemon Run in daemon mode. The program will loop checking
843 for entries on the zebraqueue table, processing
844 them incrementally if present, and then sleep
845 for a few seconds before repeating the process
846 Checking the zebraqueue table is done with a cheap
847 SQL query. This allows for near realtime update of
848 the zebra search index with low system overhead.
849 Use -sleep to control the checking interval.
851 Daemon mode implies -z, -a, -b. The program will
852 refuse to start if options are present that do not
853 make sense while running as an incremental update
854 daemon (e.g. -r or -offset).
856 -sleep 10 Seconds to sleep between checks of the zebraqueue
857 table in daemon mode. The default is 5 seconds.
859 -z select only updated and deleted
860 records marked in the zebraqueue
861 table. Cannot be used with -r
864 --skip-deletes only select record updates, not record
865 deletions, to avoid potential excessive
866 I/O when zebraidx processes deletions.
867 If this option is used for normal indexing,
868 a cronjob should be set up to run
869 rebuild_zebra.pl -z without --skip-deletes
871 Only effective with -z.
873 -r clear Zebra index before
874 adding records to index. Implies -w.
876 -d Temporary directory for indexing.
877 If not specified, one is automatically
878 created. The export directory
879 is automatically deleted unless
880 you supply the -k switch.
882 -k Do not delete export directory.
884 -s Skip export. Used if you have
885 already exported the records
888 -nosanitize export biblio/authority records directly from DB marcxml
889 field without sanitizing records. It speed up
890 dump process but could fail if DB contains badly
891 encoded records. Works only with -x,
893 -w skip shadow indexing for this batch
895 -y do NOT clear zebraqueue after indexing; normally,
896 after doing batch indexing, zebraqueue should be
897 marked done for the affected record type(s) so that
898 a running zebraqueue_daemon doesn't try to reindex
899 the same records - specify -y to override this.
900 Cannot be used with -z.
902 -v increase the amount of logging. Normally only
903 warnings and errors from the indexing are shown.
904 Use log level 2 (-v -v) to include all Zebra logs.
906 --length 1234 how many biblio you want to export
907 --offset 1243 offset you want to start to
908 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
909 note that the numbers are NOT related to biblionumber, that's the intended behaviour.
910 --where let you specify a WHERE query, like itemtype='BOOK'
911 or something like that
913 --run-as-root explicitily allow script to run as 'root' user
915 --wait-for-lock when not running in daemon mode, the default
916 behavior is to abort a rebuild if the rebuild
917 lock is busy. This option will cause the program
918 to wait for the lock to free and then continue
919 processing the rebuild request,
921 --table specify a table (can be items, biblioitems, biblio, biblio_metadata) to retrieve biblionumber to index.
922 biblioitems is the default value.
924 --help or -h show this message.