Koha/misc/migration_tools/rebuild_zebra.pl


								#!/usr/bin/perl


								use strict;

								#use warnings; FIXME - Bug 2505


								use C4::Context;

								use Getopt::Long;

								use File::Temp qw/ tempdir /;

								use File::Path;

								use C4::Biblio;

								use C4::AuthoritiesMarc;

								use C4::Items;

								use Koha::RecordProcessor;


								#

								# script that checks zebradir structure & create directories & mandatory files if needed

								#

								#


								$|=1; # flushes output

								# If the cron job starts us in an unreadable dir, we will break without

								# this.

								chdir $ENV{HOME} if (!(-r '.'));

								my $directory;

								my $nosanitize;

								my $skip_export;

								my $keep_export;

								my $skip_index;

								my $reset;

								my $biblios;

								my $authorities;

								my $noxml;

								my $noshadow;

								my $do_munge;

								my $want_help;

								my $as_xml;

								my $process_zebraqueue;

								my $do_not_clear_zebraqueue;

								my $length;

								my $where;

								my $offset;

								my $verbose_logging = 0;

								my $zebraidx_log_opt = " -v none,fatal,warn ";

								my $result = GetOptions(

								    'd:s'           => \$directory,

								    'r|reset'       => \$reset,

								    's'             => \$skip_export,

								    'k'             => \$keep_export,

								    'I|skip-index'    => \$skip_index,

								    'nosanitize'    => \$nosanitize,

								    'b'             => \$biblios,

								    'noxml'         => \$noxml,

								    'w'             => \$noshadow,

								    'munge-config'  => \$do_munge,

								    'a'             => \$authorities,

								    'h|help'        => \$want_help,

									'x'				=> \$as_xml,

								    'y'             => \$do_not_clear_zebraqueue,

								    'z'             => \$process_zebraqueue,

								    'where:s'        => \$where,

								    'length:i'        => \$length,

								    'offset:i'      => \$offset,

								    'v+'             => \$verbose_logging,

								);


								if (not $result or $want_help) {

								    print_usage();

								    exit 0;

								}


								if (not $biblios and not $authorities) {

								    my $msg = "Must specify -b or -a to reindex bibs or authorities\n";

								    $msg   .= "Please do '$0 --help' to see usage.\n";

								    die $msg;

								}


								if ( !$as_xml and $nosanitize ) {

								    my $msg = "Cannot specify both -no_xml and -nosanitize\n";

								    $msg   .= "Please do '$0 --help' to see usage.\n";

								    die $msg;

								}


								if ($process_zebraqueue and ($skip_export or $reset)) {

								    my $msg = "Cannot specify -r or -s if -z is specified\n";

								    $msg   .= "Please do '$0 --help' to see usage.\n";

								    die $msg;

								}


								if ($process_zebraqueue and $do_not_clear_zebraqueue) {

								    my $msg = "Cannot specify both -y and -z\n";

								    $msg   .= "Please do '$0 --help' to see usage.\n";

								    die $msg;

								}


								if ($reset) {

								    $noshadow = 1;

								}


								if ($noshadow) {

								    $noshadow = ' -n ';

								}


								#  -v is for verbose, which seems backwards here because of how logging is set

								#    on the CLI of zebraidx.  It works this way.  The default is to not log much

								if ($verbose_logging >= 2) {

								    $zebraidx_log_opt = '-v none,fatal,warn,all';

								}


								my $use_tempdir = 0;

								unless ($directory) {

								    $use_tempdir = 1;

								    $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));

								}


								my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};

								my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};


								my $kohadir = C4::Context->config('intranetdir');

								my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1';

								my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom';


								my $dbh = C4::Context->dbh;

								my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");

								my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");


								if ( $verbose_logging ) {

								    print "Zebra configuration information\n";

								    print "================================\n";

								    print "Zebra biblio directory      = $biblioserverdir\n";

								    print "Zebra authorities directory = $authorityserverdir\n";

								    print "Koha directory              = $kohadir\n";

								    print "BIBLIONUMBER in :     $biblionumbertagfield\$$biblionumbertagsubfield\n";

								    print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";

								    print "================================\n";

								}


								if ($do_munge) {

								    munge_config();

								}


								if ($authorities) {

								    index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);

								} else {

								    print "skipping authorities\n" if ( $verbose_logging );

								}


								if ($biblios) {

								    index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);

								} else {

								    print "skipping biblios\n" if ( $verbose_logging );

								}


								if ( $verbose_logging ) {

								    print "====================\n";

								    print "CLEANING\n";

								    print "====================\n";

								}

								if ($keep_export) {

								    print "NOTHING cleaned : the export $directory has been kept.\n";

								    print "You can re-run this script with the -s ";

								    if ($use_tempdir) {

								        print " and -d $directory parameters";

								    } else {

								        print "parameter";

								    }

								    print "\n";

								    print "if you just want to rebuild zebra after changing the record.abs\n";

								    print "or another zebra config file\n";

								} else {

								    unless ($use_tempdir) {

								        # if we're using a temporary directory

								        # created by File::Temp, it will be removed

								        # automatically.

								        rmtree($directory, 0, 1);

								        print "directory $directory deleted\n";

								    }

								}


								# This checks to see if the zebra directories exist under the provided path.

								# If they don't, then zebra is likely to spit the dummy. This returns true

								# if the directories had to be created, false otherwise.

								sub check_zebra_dirs {

									my ($base) = shift() . '/';

									my $needed_repairing = 0;

									my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );

									foreach my $dir (@dirs) {

										my $bdir = $base . $dir;

								        if (! -d $bdir) {

								        	$needed_repairing = 1;

								        	mkdir $bdir || die "Unable to create '$bdir': $!\n";

								        	print "$0: needed to create '$bdir'\n";

								        }

								    }

								    return $needed_repairing;

								}	# ----------  end of subroutine check_zebra_dirs  ----------


								sub index_records {

								    my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;


								    my $num_records_exported = 0;

								    my $records_deleted;

								    my $need_reset = check_zebra_dirs($server_dir);

								    if ($need_reset) {

								    	print "$0: found broken zebra server directories: forcing a rebuild\n";

								    	$reset = 1;

								    }

								    if ($skip_export && $verbose_logging) {

								        print "====================\n";

								        print "SKIPPING $record_type export\n";

								        print "====================\n";

								    } else {

								        if ( $verbose_logging ) {

								            print "====================\n";

								            print "exporting $record_type\n";

								            print "====================\n";

								        }

								        mkdir "$directory" unless (-d $directory);

								        mkdir "$directory/$record_type" unless (-d "$directory/$record_type");

								        if ($process_zebraqueue) {

								            my $entries = select_zebraqueue_records($record_type, 'deleted');

								            mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");

								            $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml);

								            mark_zebraqueue_batch_done($entries);

								            $entries = select_zebraqueue_records($record_type, 'updated');

								            mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");

								            $num_records_exported = export_marc_records_from_list($record_type,

								                                                                  $entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted);

								            mark_zebraqueue_batch_done($entries);

								        } else {

								            my $sth = select_all_records($record_type);

								            $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);

								            unless ($do_not_clear_zebraqueue) {

								                mark_all_zebraqueue_done($record_type);

								            }

								        }

								    }


								    #

								    # and reindexing everything

								    #

								    if ($skip_index) {

								        if ($verbose_logging) {

								            print "====================\n";

								            print "SKIPPING $record_type indexing\n";

								            print "====================\n";

								        }

								    } else {

								        if ( $verbose_logging ) {

								            print "====================\n";

								            print "REINDEXING zebra\n";

								            print "====================\n";

								        }

								        my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;

								        if ($process_zebraqueue) {

								            do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)

								                if %$records_deleted;

								            do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)

								                if $num_records_exported;

								        } else {

								            do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)

								                if ($num_records_exported or $skip_export);

								        }

								    }

								}


								sub select_zebraqueue_records {

								    my ($record_type, $update_type) = @_;


								    my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';

								    my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';


								    my $sth = $dbh->prepare("SELECT id, biblio_auth_number

								                             FROM zebraqueue

								                             WHERE server = ?

								                             AND   operation = ?

								                             AND   done = 0

								                             ORDER BY id DESC");

								    $sth->execute($server, $op);

								    my $entries = $sth->fetchall_arrayref({});

								}


								sub mark_all_zebraqueue_done {

								    my ($record_type) = @_;


								    my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';


								    my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1

								                             WHERE server = ?

								                             AND done = 0");

								    $sth->execute($server);

								}


								sub mark_zebraqueue_batch_done {

								    my ($entries) = @_;


								    $dbh->{AutoCommit} = 0;

								    my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");

								    $dbh->commit();

								    foreach my $id (map { $_->{id} } @$entries) {

								        $sth->execute($id);

								    }

								    $dbh->{AutoCommit} = 1;

								}


								sub select_all_records {

								    my $record_type = shift;

								    return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();

								}


								sub select_all_authorities {

								    my $strsth=qq{SELECT authid FROM auth_header};

								    $strsth.=qq{ WHERE $where } if ($where);

								    $strsth.=qq{ LIMIT $length } if ($length && !$offset);

								    $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);

								    my $sth = $dbh->prepare($strsth);

								    $sth->execute();

								    return $sth;

								}


								sub select_all_biblios {

								    my $strsth = qq{ SELECT biblionumber FROM biblioitems };

								    $strsth.=qq{ WHERE $where } if ($where);

								    $strsth.=qq{ LIMIT $length } if ($length && !$offset);

								    $strsth.=qq{ LIMIT $offset,$length } if ($offset);

								    my $sth = $dbh->prepare($strsth);

								    $sth->execute();

								    return $sth;

								}


								sub include_xml_wrapper {

								    my $as_xml = shift;

								    my $record_type = shift;


								    return 0 unless $as_xml;

								    return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';

								    return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';

								    return 0;


								}


								sub export_marc_records_from_sth {

								    my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;


								    my $num_exported = 0;

								    open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;

								    if (include_xml_wrapper($as_xml, $record_type)) {

								        # include XML declaration and root element

								        print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';

								    }

								    my $i = 0;

								    my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');

								    while (my ($record_number) = $sth->fetchrow_array) {

								        print "." if ( $verbose_logging );

								        print "\r$i" unless ($i++ %100 or !$verbose_logging);

								        if ( $nosanitize ) {

								            my $marcxml = $record_type eq 'biblio'

								                          ? GetXmlBiblio( $record_number )

								                          : GetAuthorityXML( $record_number );

								            if ($record_type eq 'biblio'){

								                my @items = GetItemsInfo($record_number);

								                if (@items){

								                    my $record = MARC::Record->new;

								                    $record->encoding('UTF-8');

								                    my @itemsrecord;

								                    foreach my $item (@items){

								                        my $record = Item2Marc($item, $record_number);

								                        push @itemsrecord, $record->field($itemtag);

								                    }

								                    $record->insert_fields_ordered(@itemsrecord);

								                    my $itemsxml = $record->as_xml_record();

								                    $marcxml =

								                        substr($marcxml, 0, length($marcxml)-10) .

								                        substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);

								                }

								            }

								            if ( $marcxml ) {

								                print {$fh} $marcxml if $marcxml;

								                $num_exported++;

								            }

								            next;

								        }

								        my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);

								        if (defined $marc) {

								            eval {

								                my $rec;

								                if ($as_xml) {

								                    $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));

								                    $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;

								                } else {

								                    $rec = $marc->as_usmarc();

								                }

								                print {$fh} $rec;

								                $num_exported++;

								            };

								            if ($@) {

								              warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");

								            }

								        }

								    }

								    print "\nRecords exported: $num_exported\n" if ( $verbose_logging );

								    print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));

								    close $fh;

								    return $num_exported;

								}


								sub export_marc_records_from_list {

								    my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;


								    my $num_exported = 0;

								    open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;

								    if (include_xml_wrapper($as_xml, $record_type)) {

								        # include XML declaration and root element

								        print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';

								    }

								    my $i = 0;


								    # Skip any deleted records. We check for this anyway, but this reduces error spam

								    my %found = %$records_deleted;

								    foreach my $record_number ( map { $_->{biblio_auth_number} }

								                                grep { !$found{ $_->{biblio_auth_number} }++ }

								                                @$entries ) {

								        print "." if ( $verbose_logging );

								        print "\r$i" unless ($i++ %100 or !$verbose_logging);

								        my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);

								        if (defined $marc) {

								            eval {

								                my $rec;

								                if ($as_xml) {

								                    $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));

								                    $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;

								                } else {

								                    $rec = $marc->as_usmarc();

								                }

								                print {$fh} $rec;

								                $num_exported++;

								            };

								            if ($@) {

								              warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");

								            }

								            $num_exported++;

								        }

								    }

								    print "\nRecords exported: $num_exported\n" if ( $verbose_logging );

								    print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));

								    close $fh;

								    return $num_exported;

								}


								sub generate_deleted_marc_records {

								    my ($record_type, $entries, $directory, $as_xml) = @_;


								    my $records_deleted = {};

								    open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;

								    if (include_xml_wrapper($as_xml, $record_type)) {

								        # include XML declaration and root element

								        print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';

								    }

								    my $i = 0;

								    foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {

								        print "\r$i" unless ($i++ %100 or !$verbose_logging);

								        print "." if ( $verbose_logging );


								        my $marc = MARC::Record->new();

								        if ($record_type eq 'biblio') {

								            fix_biblio_ids($marc, $record_number, $record_number);

								        } else {

								            fix_authority_id($marc, $record_number);

								        }

								        if (C4::Context->preference("marcflavour") eq "UNIMARC") {

								            fix_unimarc_100($marc);

								        }


								        my $rec;

								        if ($as_xml) {

								            $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));

								            $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;

								        } else {

								            $rec = $marc->as_usmarc();

								        }

								        print {$fh} $rec;


								        $records_deleted->{$record_number} = 1;

								    }

								    print "\nRecords exported: $i\n" if ( $verbose_logging );

								    print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));

								    close $fh;

								    return $records_deleted;


								}


								sub get_corrected_marc_record {

								    my ($record_type, $record_number, $noxml) = @_;


								    my $marc = get_raw_marc_record($record_type, $record_number, $noxml);


								    if (defined $marc) {

								        fix_leader($marc);

								        if ($record_type eq 'authority') {

								            fix_authority_id($marc, $record_number);

								        } elsif ($record_type eq 'biblio' && C4::Context->preference('IncludeSeeFromInSearches')) {

								            my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } );

								            $marc = $normalizer->process($marc);

								        }

								        if (C4::Context->preference("marcflavour") eq "UNIMARC") {

								            fix_unimarc_100($marc);

								        }

								    }


								    return $marc;

								}


								sub get_raw_marc_record {

								    my ($record_type, $record_number, $noxml) = @_;


								    my $marc;

								    if ($record_type eq 'biblio') {

								        if ($noxml) {

								            my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");

								            $fetch_sth->execute($record_number);

								            if (my ($blob) = $fetch_sth->fetchrow_array) {

								                $marc = MARC::Record->new_from_usmarc($blob);

								                unless ($marc) {

								                    warn "error creating MARC::Record from $blob";

								                }

								            }

								            # failure to find a bib is not a problem -

								            # a delete could have been done before

								            # trying to process a record update


								            $fetch_sth->finish();

								            return unless $marc;

								        } else {

								            eval { $marc = GetMarcBiblio($record_number, 1); };

								            if ($@ || !$marc) {

								                # here we do warn since catching an exception

								                # means that the bib was found but failed

								                # to be parsed

								                warn "error retrieving biblio $record_number";

								                return;

								            }

								        }

								    } else {

								        eval { $marc = GetAuthority($record_number); };

								        if ($@) {

								            warn "error retrieving authority $record_number";

								            return;

								        }

								    }

								    return $marc;

								}


								sub fix_leader {

								    # FIXME - this routine is suspect

								    # It blanks the Leader/00-05 and Leader/12-16 to

								    # force them to be recalculated correct when

								    # the $marc->as_usmarc() or $marc->as_xml() is called.

								    # But why is this necessary?  It would be a serious bug

								    # in MARC::Record (definitely) and MARC::File::XML (arguably)

								    # if they are emitting incorrect leader values.

								    my $marc = shift;


								    my $leader = $marc->leader;

								    substr($leader,  0, 5) = '     ';

								    substr($leader, 10, 7) = '22     ';

								    $marc->leader(substr($leader, 0, 24));

								}


								sub fix_biblio_ids {

								    # FIXME - it is essential to ensure that the biblionumber is present,

								    #         otherwise, Zebra will choke on the record.  However, this

								    #         logic belongs in the relevant C4::Biblio APIs.

								    my $marc = shift;

								    my $biblionumber = shift;

								    my $biblioitemnumber;

								    if (@_) {

								        $biblioitemnumber = shift;

								    } else {

								        my $sth = $dbh->prepare(

								            "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");

								        $sth->execute($biblionumber);

								        ($biblioitemnumber) = $sth->fetchrow_array;

								        $sth->finish;

								        unless ($biblioitemnumber) {

								            warn "failed to get biblioitemnumber for biblio $biblionumber";

								            return 0;

								        }

								    }


								    # FIXME - this is cheating on two levels

								    # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function

								    # 2. Making sure that the biblionumber and biblioitemnumber are correct and

								    #    present in the MARC::Record object ought to be part of GetMarcBiblio.

								    #

								    # On the other hand, this better for now than what rebuild_zebra.pl used to

								    # do, which was duplicate the code for inserting the biblionumber

								    # and biblioitemnumber

								    C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);


								    return 1;

								}


								sub fix_authority_id {

								    # FIXME - as with fix_biblio_ids, the authid must be present

								    #         for Zebra's sake.  However, this really belongs

								    #         in C4::AuthoritiesMarc.

								    my ($marc, $authid) = @_;

								    unless ($marc->field('001') and $marc->field('001')->data() eq $authid){

								        $marc->delete_field($marc->field('001'));

								        $marc->insert_fields_ordered(MARC::Field->new('001',$authid));

								    }

								}


								sub fix_unimarc_100 {

								    # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.

								    my $marc = shift;


								    my $string;

								    if ( length($marc->subfield( 100, "a" )) == 36 ) {

								        $string = $marc->subfield( 100, "a" );

								        my $f100 = $marc->field(100);

								        $marc->delete_field($f100);

								    }

								    else {

								        $string = POSIX::strftime( "%Y%m%d", localtime );

								        $string =~ s/\-//g;

								        $string = sprintf( "%-*s", 35, $string );

								    }

								    substr( $string, 22, 6, "frey50" );

								    unless ( length($marc->subfield( 100, "a" )) == 36 ) {

								        $marc->delete_field($marc->field(100));

								        $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));

								    }

								}


								sub do_indexing {

								    my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;


								    my $zebra_server  = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';

								    my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';

								    my $zebra_config  = C4::Context->zebraconfig($zebra_server)->{'config'};

								    my $zebra_db_dir  = C4::Context->zebraconfig($zebra_server)->{'directory'};


								    system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;

								    system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");

								    system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;


								}


								sub print_usage {

								    print <<_USAGE_;

								$0: reindex MARC bibs and/or authorities in Zebra.


								Use this batch job to reindex all biblio or authority

								records in your Koha database.


								Parameters:

								    -b                      index bibliographic records


								    -a                      index authority records


								    -z                      select only updated and deleted

								                            records marked in the zebraqueue

								                            table.  Cannot be used with -r

								                            or -s.


								    -r                      clear Zebra index before

								                            adding records to index. Implies -w.


								    -d                      Temporary directory for indexing.

								                            If not specified, one is automatically

								                            created.  The export directory

								                            is automatically deleted unless

								                            you supply the -k switch.


								    -k                      Do not delete export directory.


								    -s                      Skip export.  Used if you have

								                            already exported the records

								                            in a previous run.


								    -noxml                  index from ISO MARC blob

								                            instead of MARC XML.  This

								                            option is recommended only

								                            for advanced user.


								    -x                      export and index as xml instead of is02709 (biblios only).

								                            use this if you might have records > 99,999 chars,


								    -nosanitize             export biblio/authority records directly from DB marcxml

								                            field without sanitizing records. It speed up

								                            dump process but could fail if DB contains badly

								                            encoded records. Works only with -x,


								    -w                      skip shadow indexing for this batch


								    -y                      do NOT clear zebraqueue after indexing; normally,

								                            after doing batch indexing, zebraqueue should be

								                            marked done for the affected record type(s) so that

								                            a running zebraqueue_daemon doesn't try to reindex

								                            the same records - specify -y to override this.

								                            Cannot be used with -z.


								    -v                      increase the amount of logging.  Normally only

								                            warnings and errors from the indexing are shown.

								                            Use log level 2 (-v -v) to include all Zebra logs.


								    --length   1234         how many biblio you want to export

								    --offset 1243           offset you want to start to

								                                example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)

								                                note that the numbers are NOT related to biblionumber, that's the intended behaviour.

								    --where                 let you specify a WHERE query, like itemtype='BOOK'

								                            or something like that


								    --munge-config          Deprecated option to try

								                            to fix Zebra config files.

								    --help or -h            show this message.

								_USAGE_

								}


								# FIXME: the following routines are deprecated and

								# will be removed once it is determined whether

								# a script to fix Zebra configuration files is

								# actually needed.

								sub munge_config {

								#

								# creating zebra-biblios.cfg depending on system

								#


								# getting zebraidx directory

								my $zebraidxdir;

								foreach (qw(/usr/local/bin/zebraidx

								        /opt/bin/zebraidx

								        /usr/bin/zebraidx

								        )) {

								    if ( -f $_ ) {

								        $zebraidxdir=$_;

								    }

								}


								unless ($zebraidxdir) {

								    print qq|

								    ERROR: could not find zebraidx directory

								    ERROR: Either zebra is not installed,

								    ERROR: or it's in a directory I don't checked.

								    ERROR: do a which zebraidx and edit this file to add the result you get

								|;

								    exit;

								}

								$zebraidxdir =~ s/\/bin\/.*//;

								print "Info : zebra is in $zebraidxdir \n";


								# getting modules directory

								my $modulesdir;

								foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so

								            /usr/local/lib/idzebra/modules/mod-grs-xml.so

								            /usr/lib/idzebra/modules/mod-grs-xml.so

								            /usr/lib/idzebra-2.0/modules/mod-grs-xml.so

								        )) {

								    if ( -f $_ ) {

								        $modulesdir=$_;

								    }

								}


								unless ($modulesdir) {

								    print qq|

								    ERROR: could not find mod-grs-xml.so directory

								    ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so,

								    ERROR: or it's in a directory I don't checked.

								    ERROR: find where mod-grs-xml.so is and edit this file to add the result you get

								|;

								    exit;

								}

								$modulesdir =~ s/\/modules\/.*//;

								print "Info: zebra modules dir : $modulesdir\n";


								# getting tab directory

								my $tabdir;

								foreach (qw(/usr/local/share/idzebra/tab/explain.att

								            /usr/local/share/idzebra-2.0/tab/explain.att

								            /usr/share/idzebra/tab/explain.att

								            /usr/share/idzebra-2.0/tab/explain.att

								        )) {

								    if ( -f $_ ) {

								        $tabdir=$_;

								    }

								}


								unless ($tabdir) {

								    print qq|

								    ERROR: could not find explain.att directory

								    ERROR: Either zebra is not properly compiled,

								    ERROR: or it's in a directory I don't checked.

								    ERROR: find where explain.att is and edit this file to add the result you get

								|;

								    exit;

								}

								$tabdir =~ s/\/tab\/.*//;

								print "Info: tab dir : $tabdir\n";


								#

								# AUTHORITIES creating directory structure

								#

								my $created_dir_or_file = 0;

								if ($authorities) {

								    if ( $verbose_logging ) {

								        print "====================\n";

								        print "checking directories & files for authorities\n";

								        print "====================\n";

								    }

								    unless (-d "$authorityserverdir") {

								        system("mkdir -p $authorityserverdir");

								        print "Info: created $authorityserverdir\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$authorityserverdir/lock") {

								        mkdir "$authorityserverdir/lock";

								        print "Info: created $authorityserverdir/lock\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$authorityserverdir/register") {

								        mkdir "$authorityserverdir/register";

								        print "Info: created $authorityserverdir/register\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$authorityserverdir/shadow") {

								        mkdir "$authorityserverdir/shadow";

								        print "Info: created $authorityserverdir/shadow\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$authorityserverdir/tab") {

								        mkdir "$authorityserverdir/tab";

								        print "Info: created $authorityserverdir/tab\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$authorityserverdir/key") {

								        mkdir "$authorityserverdir/key";

								        print "Info: created $authorityserverdir/key\n";

								        $created_dir_or_file++;

								    }


								    unless (-d "$authorityserverdir/etc") {

								        mkdir "$authorityserverdir/etc";

								        print "Info: created $authorityserverdir/etc\n";

								        $created_dir_or_file++;

								    }


								    #

								    # AUTHORITIES : copying mandatory files

								    #

								    # the record model, depending on marc flavour

								    unless (-f "$authorityserverdir/tab/record.abs") {

								        if (C4::Context->preference("marcflavour") eq "UNIMARC") {

								            system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/authorities/record.abs $authorityserverdir/tab/record.abs");

								            print "Info: copied record.abs for UNIMARC\n";

								        } else {

								            system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/authorities/record.abs $authorityserverdir/tab/record.abs");

								            print "Info: copied record.abs for USMARC\n";

								        }

								        $created_dir_or_file++;

								    }

								    unless (-f "$authorityserverdir/tab/sort-string-utf.chr") {

								        system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/sort-string-utf.chr");

								        print "Info: copied sort-string-utf.chr\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {

								        system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/word-phrase-utf.chr");

								        print "Info: copied word-phase-utf.chr\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$authorityserverdir/tab/auth1.att") {

								        system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att");

								        print "Info: copied auth1.att\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$authorityserverdir/tab/default.idx") {

								        system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx");

								        print "Info: copied default.idx\n";

								        $created_dir_or_file++;

								    }


								    unless (-f "$authorityserverdir/etc/ccl.properties") {

								#         system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});

								        system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties");

								        print "Info: copied ccl.properties\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$authorityserverdir/etc/pqf.properties") {

								#         system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});

								        system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties");

								        print "Info: copied pqf.properties\n";

								        $created_dir_or_file++;

								    }


								    #

								    # AUTHORITIES : copying mandatory files

								    #

								    unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {

								    open my $zd, '>:encoding(UTF-8)' ,C4::Context->zebraconfig('authorityserver')->{config};

								    print {$zd} "

								# generated by KOHA/misc/migration_tools/rebuild_zebra.pl

								profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/


								encoding: UTF-8

								# Files that describe the attribute sets supported.

								attset: auth1.att

								attset: explain.att

								attset: gils.att


								modulePath:$modulesdir/modules/

								# Specify record type

								iso2709.recordType:grs.marcxml.record

								recordType:grs.xml

								recordId: (auth1,Local-Number)

								storeKeys:1

								storeData:1


								# Lock File Area

								lockDir: $authorityserverdir/lock

								perm.anonymous:r

								perm.kohaadmin:rw

								register: $authorityserverdir/register:4G

								shadow: $authorityserverdir/shadow:4G


								# Temp File area for result sets

								setTmpDir: $authorityserverdir/tmp


								# Temp File area for index program

								keyTmpDir: $authorityserverdir/key


								# Approx. Memory usage during indexing

								memMax: 40M

								rank:rank-1

								    ";

								        print "Info: creating zebra-authorities.cfg\n";

								        $created_dir_or_file++;

								    }


								    if ($created_dir_or_file) {

								        print "Info: created : $created_dir_or_file directories & files\n";

								    } else {

								        print "Info: file & directories OK\n";

								    }


								}

								if ($biblios) {

								    if ( $verbose_logging ) {

								        print "====================\n";

								        print "checking directories & files for biblios\n";

								        print "====================\n";

								    }


								    #

								    # BIBLIOS : creating directory structure

								    #

								    unless (-d "$biblioserverdir") {

								        system("mkdir -p $biblioserverdir");

								        print "Info: created $biblioserverdir\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$biblioserverdir/lock") {

								        mkdir "$biblioserverdir/lock";

								        print "Info: created $biblioserverdir/lock\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$biblioserverdir/register") {

								        mkdir "$biblioserverdir/register";

								        print "Info: created $biblioserverdir/register\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$biblioserverdir/shadow") {

								        mkdir "$biblioserverdir/shadow";

								        print "Info: created $biblioserverdir/shadow\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$biblioserverdir/tab") {

								        mkdir "$biblioserverdir/tab";

								        print "Info: created $biblioserverdir/tab\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$biblioserverdir/key") {

								        mkdir "$biblioserverdir/key";

								        print "Info: created $biblioserverdir/key\n";

								        $created_dir_or_file++;

								    }

								    unless (-d "$biblioserverdir/etc") {

								        mkdir "$biblioserverdir/etc";

								        print "Info: created $biblioserverdir/etc\n";

								        $created_dir_or_file++;

								    }


								    #

								    # BIBLIOS : copying mandatory files

								    #

								    # the record model, depending on marc flavour

								    unless (-f "$biblioserverdir/tab/record.abs") {

								        if (C4::Context->preference("marcflavour") eq "UNIMARC") {

								            system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/biblios/record.abs $biblioserverdir/tab/record.abs");

								            print "Info: copied record.abs for UNIMARC\n";

								        } else {

								            system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/biblios/record.abs $biblioserverdir/tab/record.abs");

								            print "Info: copied record.abs for USMARC\n";

								        }

								        $created_dir_or_file++;

								    }

								    unless (-f "$biblioserverdir/tab/sort-string-utf.chr") {

								        system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/sort-string-utf.chr");

								        print "Info: copied sort-string-utf.chr\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {

								        system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/word-phrase-utf.chr");

								        print "Info: copied word-phase-utf.chr\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$biblioserverdir/tab/bib1.att") {

								        system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att");

								        print "Info: copied bib1.att\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$biblioserverdir/tab/default.idx") {

								        system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx");

								        print "Info: copied default.idx\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$biblioserverdir/etc/ccl.properties") {

								#         system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});

								        system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties");

								        print "Info: copied ccl.properties\n";

								        $created_dir_or_file++;

								    }

								    unless (-f "$biblioserverdir/etc/pqf.properties") {

								#         system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});

								        system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties");

								        print "Info: copied pqf.properties\n";

								        $created_dir_or_file++;

								    }


								    #

								    # BIBLIOS : copying mandatory files

								    #

								    unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {

								    open my $zd, '>:encoding(UTF-8)', C4::Context->zebraconfig('biblioserver')->{config};

								    print {$zd} "

								# generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl

								profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/


								encoding: UTF-8

								# Files that describe the attribute sets supported.

								attset:bib1.att

								attset:explain.att

								attset:gils.att


								modulePath:$modulesdir/modules/

								# Specify record type

								iso2709.recordType:grs.marcxml.record

								recordType:grs.xml

								recordId: (bib1,Local-Number)

								storeKeys:1

								storeData:1


								# Lock File Area

								lockDir: $biblioserverdir/lock

								perm.anonymous:r

								perm.kohaadmin:rw

								register: $biblioserverdir/register:4G

								shadow: $biblioserverdir/shadow:4G


								# Temp File area for result sets

								setTmpDir: $biblioserverdir/tmp


								# Temp File area for index program

								keyTmpDir: $biblioserverdir/key


								# Approx. Memory usage during indexing

								memMax: 40M

								rank:rank-1

								    ";

								        print "Info: creating zebra-biblios.cfg\n";

								        $created_dir_or_file++;

								    }


								    if ($created_dir_or_file) {

								        print "Info: created : $created_dir_or_file directories & files\n";

								    } else {

								        print "Info: file & directories OK\n";

								    }


								}

								}