Koha/misc/migration_tools/rebuild_zebra.pl
2007-03-09 14:52:58 +00:00

572 lines
21 KiB
Prolog
Executable file

#!/usr/bin/perl
use C4::Context;
use Getopt::Long;
use C4::Biblio;
use C4::AuthoritiesMarc;
use strict;
#
# script that checks zebradir structure & create directories & mandatory files if needed
#
#
$|=1; # flushes output
# limit for database dumping
my $limit;# = "LIMIT 500";
my $directory;
my $skip_export;
my $keep_export;
my $reset;
my $biblios;
my $authorities;
GetOptions(
'd:s' => \$directory,
'reset' => \$reset,
's' => \$skip_export,
'k' => \$keep_export,
'b' => \$biblios,
'a' => \$authorities,
);
$directory = "export" unless $directory;
my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
my $kohadir = C4::Context->config('intranetdir');
my $dbh = C4::Context->dbh;
my ($biblionumbertagfield,$biblionumbertagsubfield) = &MARCfind_marc_from_kohafield($dbh,"biblio.biblionumber","");
my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &MARCfind_marc_from_kohafield($dbh,"biblioitems.biblioitemnumber","");
print "some informations\n";
print "=================\n";
print "Zebra biblio directory =>$biblioserverdir\n";
print "Zebra authorities directory =>$authorityserverdir\n";
print "Koha directory =>$kohadir\n";
print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
print "=================\n";
#
# creating zebra-biblios.cfg depending on system
#
# getting zebraidx directory
my $zebraidxdir;
foreach (qw(/usr/local/bin/zebraidx
/opt/bin/zebraidx
/usr/bin/zebraidx
)) {
if ( -f $_ ) {
$zebraidxdir=$_;
}
}
unless ($zebraidxdir) {
print qq|
ERROR: could not find zebraidx directory
ERROR: Either zebra is not installed,
ERROR: or it's in a directory I don't checked.
ERROR: do a which zebraidx and edit this file to add the result you get
|;
exit;
}
$zebraidxdir =~ s/\/bin\/.*//;
print "Info : zebra is in $zebraidxdir \n";
# getting modules directory
my $modulesdir;
foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so
/usr/local/lib/idzebra/modules/mod-grs-xml.so
/usr/lib/idzebra/modules/mod-grs-xml.so
/usr/lib/idzebra-2.0/modules/mod-grs-xml.so
)) {
if ( -f $_ ) {
$modulesdir=$_;
}
}
unless ($modulesdir) {
print qq|
ERROR: could not find mod-grs-xml.so directory
ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so,
ERROR: or it's in a directory I don't checked.
ERROR: find where mod-grs-xml.so is and edit this file to add the result you get
|;
exit;
}
$modulesdir =~ s/\/modules\/.*//;
print "Info: zebra modules dir : $modulesdir\n";
# getting tab directory
my $tabdir;
foreach (qw(/usr/local/share/idzebra/tab/explain.att
/usr/local/share/idzebra-2.0/tab/explain.att
/usr/share/idzebra/tab/explain.att
/usr/share/idzebra-2.0/tab/explain.att
)) {
if ( -f $_ ) {
$tabdir=$_;
}
}
unless ($tabdir) {
print qq|
ERROR: could not find explain.att directory
ERROR: Either zebra is not properly compiled,
ERROR: or it's in a directory I don't checked.
ERROR: find where explain.att is and edit this file to add the result you get
|;
exit;
}
$tabdir =~ s/\/tab\/.*//;
print "Info: tab dir : $tabdir\n";
#
# AUTHORITIES creating directory structure
#
my $created_dir_or_file = 0;
if ($authorities) {
print "====================\n";
print "checking directories & files for authorities\n";
print "====================\n";
unless (-d "$authorityserverdir") {
system("mkdir -p $authorityserverdir");
print "Info: created $authorityserverdir\n";
$created_dir_or_file++;
}
unless (-d "$authorityserverdir/lock") {
mkdir "$authorityserverdir/lock";
print "Info: created $authorityserverdir/lock\n";
$created_dir_or_file++;
}
unless (-d "$authorityserverdir/register") {
mkdir "$authorityserverdir/register";
print "Info: created $authorityserverdir/register\n";
$created_dir_or_file++;
}
unless (-d "$authorityserverdir/shadow") {
mkdir "$authorityserverdir/shadow";
print "Info: created $authorityserverdir/shadow\n";
$created_dir_or_file++;
}
unless (-d "$authorityserverdir/tab") {
mkdir "$authorityserverdir/tab";
print "Info: created $authorityserverdir/tab\n";
$created_dir_or_file++;
}
unless (-d "$authorityserverdir/key") {
mkdir "$authorityserverdir/key";
print "Info: created $authorityserverdir/key\n";
$created_dir_or_file++;
}
unless (-d "$authorityserverdir/etc") {
mkdir "$authorityserverdir/etc";
print "Info: created $authorityserverdir/etc\n";
$created_dir_or_file++;
}
#
# AUTHORITIES : copying mandatory files
#
# the record model, depending on marc flavour
unless (-f "$authorityserverdir/tab/record.abs") {
if (C4::Context->preference("marcflavour") eq "UNIMARC") {
system("cp -f $kohadir/misc/zebra/record_authorities_unimarc.abs $authorityserverdir/tab/record.abs");
print "Info: copied record.abs for UNIMARC\n";
} else {
system("cp -f $kohadir/misc/zebra/record_authorities_usmarc.abs $authorityserverdir/tab/record.abs");
print "Info: copied record.abs for USMARC\n";
}
$created_dir_or_file++;
}
unless (-f "$authorityserverdir/tab/sort-string-utf_french.chr") {
system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr");
print "Info: copied sort-string-utf.chr\n";
$created_dir_or_file++;
}
unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {
system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr");
print "Info: copied word-phase-utf.chr\n";
$created_dir_or_file++;
}
unless (-f "$authorityserverdir/tab/auth1.att") {
system("cp -f $kohadir/misc/zebra/bib1_authorities.att $authorityserverdir/tab/auth1.att");
print "Info: copied auth1.att\n";
$created_dir_or_file++;
}
unless (-f "$authorityserverdir/tab/default.idx") {
system("cp -f $kohadir/misc/zebra/default.idx $authorityserverdir/tab/default.idx");
print "Info: copied default.idx\n";
$created_dir_or_file++;
}
unless (-f C4::Context->zebraoptions('authorityserver')->{ccl2rpn}) {
system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraoptions('authorityserver')->{ccl2rpn});
# system("cp -f $kohadir/misc/zebra/ccl.properties $authorityserverdir/etc/ccl.properties");
print "Info: copied ccl.properties\n";
$created_dir_or_file++;
}
unless (-f C4::Context->zebraconfig('authorityserver')->{cql2rpn}) {
system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{cql2rpn});
print "Info: copied pqf.properties\n";
$created_dir_or_file++;
}
#
# AUTHORITIES : copying mandatory files
#
unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config};
print ZD "
# generated by KOHA/misc/migration_tools/rebuild_zebra.pl
profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
encoding: UTF-8
# Files that describe the attribute sets supported.
attset: auth1.att
attset: explain.att
attset: gils.att
modulePath:$modulesdir/modules/
# Specify record type
iso2709.recordType:grs.marcxml.record
recordType:grs.xml
recordId: (auth1,Local-Number)
storeKeys:1
storeData:1
# Lock File Area
lockDir: $authorityserverdir/lock
perm.anonymous:r
perm.kohaadmin:rw
passw.kohalis
shadow
register: $authorityserverdir/register:4G
shadow: $authorityserverdir/shadow:4G
# Temp File area for result sets
setTmpDir: $authorityserverdir/tmp
# Temp File area for index program
keyTmpDir: $authorityserverdir/key
# Approx. Memory usage during indexing
memMax: 40M
rank:rank-1
";
print "Info: creating zebra-authorities.cfg\n";
$created_dir_or_file++;
}
if ($created_dir_or_file) {
print "Info: created : $created_dir_or_file directories & files\n";
} else {
print "Info: file & directories OK\n";
}
#
# exporting authorities
#
if ($skip_export) {
print "====================\n";
print "SKIPPING authorities export\n";
print "====================\n";
} else {
print "====================\n";
print "exporting authorities\n";
print "====================\n";
mkdir "$directory" unless (-d $directory);
mkdir "$directory/authorities" unless (-d "$directory/authorities");
open(OUT,">:utf8","$directory/authorities/authorities.iso2709") or die $!;
my $dbh=C4::Context->dbh;
my $sth;
$sth=$dbh->prepare("select authid from auth_header $limit");
$sth->execute();
my $i=0;
while (my ($authid) = $sth->fetchrow) {
my $record = AUTHgetauthority($dbh,$authid);
print ".";
print "\r$i" unless ($i++ %100);
print OUT $record->as_usmarc();
}
close(OUT);
}
#
# and reindexing everything
#
print "====================\n";
print "REINDEXING zebra\n";
print "====================\n";
system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('authorityserver')->{config}." -d authorities init") if ($reset);
system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('authorityserver')->{config}." -d authorities update $directory/authorities");
system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('authorityserver')->{config}." -d authorities commit");
} else {
print "skipping authorities\n";
}
#################################################################################################################
# BIBLIOS
#################################################################################################################
if ($biblios) {
print "====================\n";
print "checking directories & files for biblios\n";
print "====================\n";
#
# BIBLIOS : creating directory structure
#
unless (-d "$biblioserverdir") {
system("mkdir -p $biblioserverdir");
print "Info: created $biblioserverdir\n";
$created_dir_or_file++;
}
unless (-d "$biblioserverdir/lock") {
mkdir "$biblioserverdir/lock";
print "Info: created $biblioserverdir/lock\n";
$created_dir_or_file++;
}
unless (-d "$biblioserverdir/register") {
mkdir "$biblioserverdir/register";
print "Info: created $biblioserverdir/register\n";
$created_dir_or_file++;
}
unless (-d "$biblioserverdir/shadow") {
mkdir "$biblioserverdir/shadow";
print "Info: created $biblioserverdir/shadow\n";
$created_dir_or_file++;
}
unless (-d "$biblioserverdir/tab") {
mkdir "$biblioserverdir/tab";
print "Info: created $biblioserverdir/tab\n";
$created_dir_or_file++;
}
unless (-d "$biblioserverdir/key") {
mkdir "$biblioserverdir/key";
print "Info: created $biblioserverdir/key\n";
$created_dir_or_file++;
}
unless (-d "$biblioserverdir/etc") {
mkdir "$biblioserverdir/etc";
print "Info: created $biblioserverdir/etc\n";
$created_dir_or_file++;
}
#
# BIBLIOS : copying mandatory files
#
# the record model, depending on marc flavour
unless (-f "$biblioserverdir/tab/record.abs") {
if (C4::Context->preference("marcflavour") eq "UNIMARC") {
system("cp -f $kohadir/misc/zebra/record_biblios_unimarc.abs $biblioserverdir/tab/record.abs");
print "Info: copied record.abs for UNIMARC\n";
} else {
system("cp -f $kohadir/misc/zebra/record_biblios_usmarc.abs $biblioserverdir/tab/record.abs");
print "Info: copied record.abs for USMARC\n";
}
$created_dir_or_file++;
}
unless (-f "$biblioserverdir/tab/sort-string-utf_french.chr") {
system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr");
print "Info: copied sort-string-utf.chr\n";
$created_dir_or_file++;
}
unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {
system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr");
print "Info: copied word-phase-utf.chr\n";
$created_dir_or_file++;
}
unless (-f "$biblioserverdir/tab/bib1.att") {
system("cp -f $kohadir/misc/zebra/bib1_biblios.att $biblioserverdir/tab/bib1.att");
print "Info: copied bib1.att\n";
$created_dir_or_file++;
}
unless (-f "$biblioserverdir/tab/default.idx") {
system("cp -f $kohadir/misc/zebra/default.idx $biblioserverdir/tab/default.idx");
print "Info: copied default.idx\n";
$created_dir_or_file++;
}
unless (-f C4::Context->zebraoptions('biblioserver')->{ccl2rpn}) {
system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraoptions('biblioserver')->{ccl2rpn});
# system("cp -f $kohadir/misc/zebra/ccl.properties $biblioserverdir/etc/ccl.properties");
print "Info: copied ccl.properties\n";
$created_dir_or_file++;
}
unless (-f C4::Context->zebraconfig('biblioserver')->{cql2rpn}) {
system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{cql2rpn});
print "Info: copied pqf.properties\n";
$created_dir_or_file++;
}
#
# BIBLIOS : copying mandatory files
#
unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config};
print ZD "
# generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl
profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
encoding: UTF-8
# Files that describe the attribute sets supported.
attset:bib1.att
attset:explain.att
attset:gils.att
modulePath:$modulesdir/modules/
# Specify record type
iso2709.recordType:grs.marcxml.record
recordType:grs.xml
recordId: (bib1,Local-Number)
storeKeys:1
storeData:1
# Lock File Area
lockDir: $biblioserverdir/lock
perm.anonymous:r
perm.kohaadmin:rw
passw.kohalis
shadow
register: $biblioserverdir/register:4G
shadow: $biblioserverdir/shadow:4G
# Temp File area for result sets
setTmpDir: $biblioserverdir/tmp
# Temp File area for index program
keyTmpDir: $biblioserverdir/key
# Approx. Memory usage during indexing
memMax: 40M
rank:rank-1
";
print "Info: creating zebra-biblios.cfg\n";
$created_dir_or_file++;
}
if ($created_dir_or_file) {
print "Info: created : $created_dir_or_file directories & files\n";
} else {
print "Info: file & directories OK\n";
}
# die;
#
# exporting biblios
#
if ($skip_export) {
print "====================\n";
print "SKIPPING biblio export\n";
print "====================\n";
} else {
print "====================\n";
print "exporting biblios\n";
print "====================\n";
mkdir "$directory" unless (-d $directory);
mkdir "$directory/biblios" unless (-d "$directory/biblios");
open(OUT,">:utf8 ","$directory/biblios/export") or die $!;
my $dbh=C4::Context->dbh;
my $sth;
$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit");
$sth->execute();
my $i=0;
while (my ($biblionumber) = $sth->fetchrow) {
my $record = GetMarcBiblio($biblionumber);
# warn $record->as_formatted;
# print $record;
# check that biblionumber & biblioitemnumber are stored in the MARC record, otherwise, add them & update the biblioitems.marcxml data.
my $record_correct=1;
next unless $record->field($biblionumbertagfield);
if ($biblionumbertagfield eq '001') {
unless ($record->field($biblionumbertagfield)->data()) {
$record_correct=0;
my $field;
# if the field where biblionumber is already exist, just update it, otherwise create it
if ($record->field($biblionumbertagfield)) {
$field = $record->field($biblionumbertagfield);
$field->update($biblionumber);
} else {
my $newfield;
$newfield = MARC::Field->new( $biblionumbertagfield, $biblionumber);
$record->append_fields($newfield);
}
}
} else {
unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) {
$record_correct=0;
my $field;
# if the field where biblionumber is already exist, just update it, otherwise create it
if ($record->field($biblionumbertagfield)) {
$field = $record->field($biblionumbertagfield);
$field->add_subfields($biblionumbertagsubfield => $biblionumber);
} else {
my $newfield;
$newfield = MARC::Field->new( $biblionumbertagfield,'','', $biblionumbertagsubfield => $biblionumber);
$record->append_fields($newfield);
}
}
# warn "FIXED BIBLIONUMBER".$record->as_formatted;
}
unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) {
$record_correct=0;
# warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted;
my $field;
# if the field where biblionumber is already exist, just update it, otherwise create it
if ($record->field($biblioitemnumbertagfield)) {
$field = $record->field($biblioitemnumbertagfield);
if ($biblioitemnumbertagfield <10) {
$field->update($biblionumber);
} else {
$field->add_subfields($biblioitemnumbertagsubfield => $biblionumber);
}
} else {
my $newfield;
if ($biblioitemnumbertagfield <10) {
$newfield = MARC::Field->new( $biblioitemnumbertagfield, $biblionumber);
} else {
$newfield = MARC::Field->new( $biblioitemnumbertagfield,'','', $biblioitemnumbertagsubfield => $biblionumber);
}
$record->insert_grouped_field($newfield);
}
# warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted;
}
unless ($record_correct) {
my $update_xml = $dbh->prepare("update biblioitems set marcxml=? where biblionumber=?");
warn "UPDATING $biblionumber (missing biblionumber or biblioitemnumber in MARC record : ".$record->as_xml;
$update_xml->execute($record->as_xml,$biblionumber);
}
print ".";
print "\r$i" unless ($i++ %100);
print OUT $record->as_usmarc();
}
close(OUT);
}
#
# and reindexing everything
#
print "====================\n";
print "REINDEXING zebra\n";
print "====================\n";
system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios init") if ($reset);
system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios update $directory/biblios");
system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios commit");
} else {
print "skipping biblios\n";
}
print "====================\n";
print "CLEANING\n";
print "====================\n";
if ($keep_export) {
print "NOTHING cleaned : the $directory has been kept. You can re-run this script with the -s parameter if you just want to rebuild zebra after changing the record.abs or another zebra config file\n";
} else {
system("rm -rf $directory");
print "directory $directory deleted\n";
}