Bug 11081: Port Koha::Contrib::Tamil indexer into Koha code base
Add two scripts for indexing: koha-index and koha-index-daemon. Documentation: perldoc koha-index perldoc koha-index-daemon New dependencies: MooseX::Getopt MooseX::RW AnyEvent::Processor Available as a Debian package: libmoosex-getopt-perl Test plan: - Apply the patch - Install dependencies, if necessary - Reindex a whole biblio catalog: koha-index --verbose - Reindex a whole authorities catalog: koha-index --source authority --verbose - Modify 1 biblio record, and index it: koha-index --select queue - Modifiy 1 authority record, and index it: koha-index --select queue --source authority - Run indexing daemon, with a 15s timeout: koha-index-daemon --timeout 15 - Modify a biblio record: wait 15s, and see - Modify an authority record: wait 15s, and see Signed-off-by: Bernardo Gonzalez Kriegel <bgkriegel@gmail.com> Works well, followed test plan without problems One comment: perhaps an option for koha-index-daemon to work quietly, without printing information. No koha-qa errors, but new files don't have license, fixed in followup Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com> Signed-off-by: Brendan A Gallagher <brendan@bywatersolutions.com>
This commit is contained in:
parent
0dd817178b
commit
6d44b0a91a
7 changed files with 801 additions and 0 deletions
|
@ -552,6 +552,36 @@ our $PERL_DEPS = {
|
|||
'required' => '1',
|
||||
'min_ver' => '0.02',
|
||||
},
|
||||
'MooseX::Storage' => {
|
||||
'usage' => 'Core',
|
||||
'required' => '0',
|
||||
'min_ver' => '0.30',
|
||||
},
|
||||
'MooseX::Types' => {
|
||||
'usage' => 'Core',
|
||||
'required' => '0',
|
||||
'min_ver' => '0.30',
|
||||
},
|
||||
'MooseX::Getopt' => {
|
||||
'usage' => 'Command line scripts',
|
||||
'required' => '0',
|
||||
'min_ver' => '0.46',
|
||||
},
|
||||
'MooseX::RW' => {
|
||||
'usage' => 'Command line scripts',
|
||||
'required' => '0',
|
||||
'min_ver' => '0.003',
|
||||
},
|
||||
'String::RewritePrefix' => {
|
||||
'usage' => 'Core',
|
||||
'required' => '0',
|
||||
'min_ver' => '0.006',
|
||||
},
|
||||
'Time::Progress' => {
|
||||
'usage' => 'Core',
|
||||
'required' => '0',
|
||||
'min_ver' => '1.7',
|
||||
},
|
||||
'DBD::Mock' => {
|
||||
'usage' => 'Core',
|
||||
'required' => '1',
|
||||
|
@ -612,6 +642,16 @@ our $PERL_DEPS = {
|
|||
'required' => '0',
|
||||
'min_ver' => '1',
|
||||
},
|
||||
'AnyEvent::Processor' => {
|
||||
'usage' => 'Command line scripts',
|
||||
'required' => '0',
|
||||
'min_ver' => '0.003',
|
||||
},
|
||||
'Moose' => {
|
||||
'usage' => 'Core',
|
||||
'required' => '0',
|
||||
'min_ver' => '1.09',
|
||||
},
|
||||
'String::Random' => {
|
||||
'usage' => 'OpacSelfRegistration',
|
||||
'required' => '1',
|
||||
|
|
116
Koha/Indexer/Daemon.pm
Normal file
116
Koha/Indexer/Daemon.pm
Normal file
|
@ -0,0 +1,116 @@
|
|||
package Koha::Indexer::Daemon;
|
||||
|
||||
use Moose;
|
||||
|
||||
use Modern::Perl;
|
||||
use utf8;
|
||||
use AnyEvent;
|
||||
use Koha::Indexer::Indexing;
|
||||
use C4::Context;
|
||||
|
||||
with 'MooseX::Getopt';
|
||||
|
||||
|
||||
has name => ( is => 'rw', isa => 'Str' );
|
||||
|
||||
has directory => ( is => 'rw', isa => 'Str' );
|
||||
|
||||
has timeout => (
|
||||
is => 'rw',
|
||||
isa => 'Int',
|
||||
default => 60,
|
||||
);
|
||||
|
||||
has verbose => ( is => 'rw', isa => 'Bool', default => 0 );
|
||||
|
||||
|
||||
sub BUILD {
|
||||
my $self = shift;
|
||||
|
||||
say "Starting Koha Indexer Daemon";
|
||||
|
||||
$self->name( C4::Context->config('database') );
|
||||
|
||||
my $idle = AnyEvent->timer(
|
||||
after => $self->timeout,
|
||||
interval => $self->timeout,
|
||||
cb => sub { $self->index_zebraqueue(); }
|
||||
);
|
||||
AnyEvent->condvar->recv;
|
||||
}
|
||||
|
||||
|
||||
sub index_zebraqueue {
|
||||
my $self = shift;
|
||||
|
||||
my $dbh = C4::Context->dbh();
|
||||
my $sql = " SELECT COUNT(*), server
|
||||
FROM zebraqueue
|
||||
WHERE done = 0
|
||||
GROUP BY server ";
|
||||
my $sth = $dbh->prepare($sql);
|
||||
$sth->execute();
|
||||
my %count = ( biblio => 0, authority => 0 );
|
||||
while ( my ($count, $server) = $sth->fetchrow ) {
|
||||
$server =~ s/server//g;
|
||||
$count{$server} = $count;
|
||||
}
|
||||
|
||||
say "[", $self->name, "] Index biblio (", $count{biblio}, ") authority (",
|
||||
$count{authority}, ")";
|
||||
|
||||
for my $source (qw/biblio authority/) {
|
||||
next unless $count{$source};
|
||||
my $indexer = Koha::Indexer::Indexing->new(
|
||||
source => $source,
|
||||
select => 'queue',
|
||||
blocking => 1,
|
||||
keep => 1,
|
||||
verbose => $self->verbose,
|
||||
);
|
||||
$indexer->directory($self->directory) if $self->directory;
|
||||
$indexer->run();
|
||||
}
|
||||
}
|
||||
|
||||
no Moose;
|
||||
__PACKAGE__->meta->make_immutable;
|
||||
1;
|
||||
|
||||
__END__
|
||||
=pod
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
# Index Koha queued biblio/authority records every minute.
|
||||
# KOHA_CONF environment variable is used to find which Koha
|
||||
# instance to use.
|
||||
# Records are exported from Koha DB into files located in
|
||||
# the current directory
|
||||
my $daemon = Koha::Indexer::Daemon->new();
|
||||
|
||||
my $daemon = Koha::Indexer::Daemon->new(
|
||||
timeout => 20,
|
||||
directory => '/home/koha/mylib/tmp',
|
||||
verbose => 1 );
|
||||
|
||||
=head1 Attributes
|
||||
|
||||
=over
|
||||
|
||||
=item directory($directory_name)
|
||||
|
||||
Location of the directory where to export biblio/authority records before
|
||||
sending them to Zebra indexer.
|
||||
|
||||
=item timeout($seconds)
|
||||
|
||||
Number of seconds between indexing.
|
||||
|
||||
=item verbose(0|1)
|
||||
|
||||
Task verbosity.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
195
Koha/Indexer/Indexing.pm
Normal file
195
Koha/Indexer/Indexing.pm
Normal file
|
@ -0,0 +1,195 @@
|
|||
package Koha::Indexer::Indexing;
|
||||
|
||||
use Moose;
|
||||
|
||||
use Modern::Perl;
|
||||
use utf8;
|
||||
use Carp;
|
||||
use Koha::Indexer::RecordReader;
|
||||
use Koha::Indexer::RecordWriter;
|
||||
use AnyEvent::Processor::Conversion;
|
||||
use File::Path;
|
||||
use IO::File;
|
||||
use C4::Context;
|
||||
|
||||
|
||||
with 'MooseX::Getopt';
|
||||
|
||||
|
||||
has source => (
|
||||
is => 'rw',
|
||||
isa => 'Koha::RecordType',
|
||||
default => 'biblio'
|
||||
);
|
||||
|
||||
has select => (
|
||||
is => 'rw',
|
||||
isa => 'Koha::RecordSelect',
|
||||
required => 1,
|
||||
default => 'all',
|
||||
);
|
||||
|
||||
has directory => (
|
||||
is => 'rw',
|
||||
isa => 'Str',
|
||||
default => './koha-index',
|
||||
);
|
||||
|
||||
has keep => ( is => 'rw', isa => 'Bool', default => 0 );
|
||||
|
||||
has verbose => ( is => 'rw', isa => 'Bool', default => 0 );
|
||||
|
||||
has help => (
|
||||
is => 'rw',
|
||||
isa => 'Bool',
|
||||
default => 0,
|
||||
traits => [ 'NoGetopt' ],
|
||||
);
|
||||
|
||||
has blocking => (
|
||||
is => 'rw',
|
||||
isa => 'Bool',
|
||||
default => 0,
|
||||
traits => [ 'NoGetopt' ],
|
||||
);
|
||||
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
|
||||
# Is it a full indexing of all Koha DB records?
|
||||
my $is_full_indexing = $self->select =~ /all/i;
|
||||
|
||||
# Is it biblio indexing (if not it's authority)
|
||||
my $is_biblio_indexing = $self->source =~ /biblio/i;
|
||||
|
||||
# STEP 1: All biblio records are exported in a directory
|
||||
|
||||
unless ( -d $self->directory ) {
|
||||
mkdir $self->directory
|
||||
or die "Unable to create directory: " . $self->directory;
|
||||
}
|
||||
my $from_dir = $self->directory . "/" . $self->source;
|
||||
mkdir $from_dir;
|
||||
for my $dir ( ( "$from_dir/update", "$from_dir/delete") ) {
|
||||
rmtree( $dir ) if -d $dir;
|
||||
mkdir $dir;
|
||||
}
|
||||
|
||||
# DOM indexing? otherwise GRS-1
|
||||
my $is_dom = $self->source eq 'biblio'
|
||||
? 'zebra_bib_index_mode'
|
||||
: 'zebra_auth_index_mode';
|
||||
$is_dom = C4::Context->config($is_dom) || '';
|
||||
$is_dom = $is_dom =~ /dom/i ? 1 : 0;
|
||||
|
||||
# STEP 1.1: Records to update
|
||||
say "Exporting records to update" if $self->verbose;
|
||||
my $exporter = AnyEvent::Processor::Conversion->new(
|
||||
reader => Koha::Indexer::RecordReader->new(
|
||||
source => $self->source,
|
||||
select => $is_full_indexing ? 'all' : 'queue_update',
|
||||
xml => '1'
|
||||
),
|
||||
writer => Koha::Indexer::RecordWriter->new(
|
||||
fh => IO::File->new( "$from_dir/update/records", '>:encoding(utf8)' ),
|
||||
valid => $is_dom ),
|
||||
blocking => $self->blocking,
|
||||
verbose => $self->verbose,
|
||||
);
|
||||
$exporter->run();
|
||||
|
||||
# STEP 1.2: Record to delete, if zebraqueue
|
||||
if ( ! $is_full_indexing ) {
|
||||
say "Exporting records to delete" if $self->verbose;
|
||||
$exporter = AnyEvent::Processor::Conversion->new(
|
||||
reader => Koha::Indexer::RecordReader->new(
|
||||
source => $self->source,
|
||||
select => 'queue_delete',
|
||||
xml => '1'
|
||||
),
|
||||
writer => Koha::Indexer::RecordWriter->new(
|
||||
fh => IO::File->new( "$from_dir/delete/records", '>:encoding(utf8)' ),
|
||||
valid => $is_dom ),
|
||||
blocking => $self->blocking,
|
||||
verbose => $self->verbose,
|
||||
);
|
||||
$exporter->run();
|
||||
}
|
||||
|
||||
# STEP 2: Run zebraidx
|
||||
|
||||
my $cmd;
|
||||
my $zconfig = C4::Context->zebraconfig(
|
||||
$is_biblio_indexing ? 'biblioserver' : 'authorityserver')->{config};
|
||||
my $db_name = $is_biblio_indexing ? 'biblios' : 'authorities';
|
||||
my $cmd_base = "zebraidx -c " . $zconfig;
|
||||
$cmd_base .= " -n" if $is_full_indexing; # No shadow: no indexing daemon
|
||||
$cmd_base .= $self->verbose ? " -v warning,log" : " -v none";
|
||||
$cmd_base .= " -g marcxml";
|
||||
$cmd_base .= " -d $db_name";
|
||||
|
||||
if ( $is_full_indexing ) {
|
||||
$cmd = "$cmd_base init";
|
||||
say $cmd if $self->verbose;
|
||||
system( $cmd );
|
||||
}
|
||||
|
||||
$cmd = "$cmd_base update $from_dir/update";
|
||||
say $cmd if $self->verbose;
|
||||
system( $cmd );
|
||||
|
||||
if ( ! $is_full_indexing ) {
|
||||
$cmd = "$cmd_base adelete $from_dir/delete";
|
||||
say $cmd if $self->verbose;
|
||||
system( $cmd );
|
||||
my $cmd = "$cmd_base commit";
|
||||
say $cmd if $self->verbose;
|
||||
system( $cmd );
|
||||
}
|
||||
|
||||
rmtree( $self->directory ) unless $self->keep;
|
||||
}
|
||||
|
||||
|
||||
no Moose;
|
||||
__PACKAGE__->meta->make_immutable;
|
||||
|
||||
__END__
|
||||
=pod
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
my $indexer = Koha::Indexer->new(
|
||||
source => 'biblio',
|
||||
select => 'queue'
|
||||
);
|
||||
$indexer->run();
|
||||
|
||||
my $indexer = Koha::Indexer->new(
|
||||
source => 'authority',
|
||||
select => 'all',
|
||||
directory => '/tmp',
|
||||
verbose => 1,
|
||||
);
|
||||
$indexer->run();
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Indexes Koha biblio/authority records, full indexing or queued record indexing.
|
||||
|
||||
|
||||
=head1 Methods
|
||||
|
||||
=over
|
||||
|
||||
=item run
|
||||
|
||||
Runs the indexing task.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=cut
|
||||
|
||||
1;
|
263
Koha/Indexer/RecordReader.pm
Normal file
263
Koha/Indexer/RecordReader.pm
Normal file
|
@ -0,0 +1,263 @@
|
|||
package Koha::Indexer::RecordReader;
|
||||
|
||||
use Moose;
|
||||
|
||||
with 'MooseX::RW::Reader';
|
||||
|
||||
|
||||
use Modern::Perl;
|
||||
use utf8;
|
||||
use Moose::Util::TypeConstraints;
|
||||
use MARC::Record;
|
||||
use MARC::File::XML;
|
||||
use C4::Context;
|
||||
use C4::Biblio;
|
||||
use C4::Items;
|
||||
|
||||
|
||||
subtype 'Koha::RecordType'
|
||||
=> as 'Str',
|
||||
=> where { /biblio|authority/i },
|
||||
=> message { "$_ is not a valid Koha::RecordType (biblio or authority" };
|
||||
|
||||
subtype 'Koha::RecordSelect'
|
||||
=> as 'Str',
|
||||
=> where { /all|queue|queue_update|queue_delete/ },
|
||||
=> message {
|
||||
"$_ is not a valide Koha::RecordSelect " .
|
||||
"(all or queue or queue_update or queue_delete)"
|
||||
};
|
||||
|
||||
|
||||
has source => (
|
||||
is => 'rw',
|
||||
isa => 'Koha::RecordType',
|
||||
required => 1,
|
||||
default => 'biblio',
|
||||
);
|
||||
|
||||
has select => (
|
||||
is => 'rw',
|
||||
isa => 'Koha::RecordSelect',
|
||||
required => 1,
|
||||
default => 'all',
|
||||
);
|
||||
|
||||
has xml => ( is => 'rw', isa => 'Bool', default => '0' );
|
||||
|
||||
has sth => ( is => 'rw' );
|
||||
|
||||
# Last returned record biblionumber;
|
||||
has id => ( is => 'rw' );
|
||||
|
||||
# Biblio records normalizer, if necessary
|
||||
has normalizer => ( is => 'rw' );
|
||||
|
||||
# Read all records? (or queued records)
|
||||
has allrecords => ( is => 'rw', isa => 'Bool', default => 1 );
|
||||
|
||||
# Mark as done an entry is Zebra queue
|
||||
has sth_queue_done => ( is => 'rw' );
|
||||
|
||||
# Items tag
|
||||
has itemtag => ( is => 'rw' );
|
||||
|
||||
# Las returned record frameworkcode
|
||||
# FIXME: a KohaRecord class should contain this information
|
||||
has frameworkcode => ( is => 'rw', isa => 'Str' );
|
||||
|
||||
|
||||
sub BUILD {
|
||||
my $self = shift;
|
||||
my $dbh = C4::Context->dbh();
|
||||
|
||||
# Tag containing items
|
||||
my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
|
||||
$self->itemtag($itemtag);
|
||||
|
||||
if ( $self->source =~ /biblio/i &&
|
||||
C4::Context->preference('IncludeSeeFromInSearches') )
|
||||
{
|
||||
require Koha::RecordProcessor;
|
||||
my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } );
|
||||
$self->normalizer($normalizer);
|
||||
# Necessary for as_xml method
|
||||
MARC::File::XML->default_record_format( C4::Context->preference('marcflavour') );
|
||||
}
|
||||
|
||||
my $operation = $self->select =~ /update/i
|
||||
? 'specialUpdate'
|
||||
: 'recordDelete';
|
||||
$self->allrecords( $self->select =~ /all/i ? 1 : 0 );
|
||||
my $sql =
|
||||
$self->source =~ /biblio/i
|
||||
? $self->allrecords
|
||||
? "SELECT NULL, biblionumber FROM biblio"
|
||||
: "SELECT id, biblio_auth_number FROM zebraqueue
|
||||
WHERE server = 'biblioserver'
|
||||
AND operation = '$operation' AND done = 0"
|
||||
: $self->allrecords
|
||||
? "SELECT NULL, authid FROM auth_header"
|
||||
: "SELECT id, biblio_auth_number FROM zebraqueue
|
||||
WHERE server = 'authorityserver'
|
||||
AND operation = '$operation' AND done = 0";
|
||||
my $sth = $dbh->prepare( $sql );
|
||||
$sth->execute();
|
||||
$self->sth( $sth );
|
||||
|
||||
unless ( $self->allrecords ) {
|
||||
$self->sth_queue_done( $dbh->prepare(
|
||||
"UPDATE zebraqueue SET done=1 WHERE id=?" ) );
|
||||
}
|
||||
|
||||
__PACKAGE__->meta->add_method( 'get' =>
|
||||
$self->source =~ /biblio/i
|
||||
? $self->xml && !$self->normalizer
|
||||
? \&get_biblio_xml
|
||||
: \&get_biblio_marc
|
||||
: $self->xml
|
||||
? \&get_auth_xml
|
||||
: \&get_auth_marc
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
sub read {
|
||||
my $self = shift;
|
||||
while ( my ($queue_id, $id) = $self->sth->fetchrow ) {
|
||||
# Suppress entry in zebraqueue table
|
||||
$self->sth_queue_done->execute($queue_id) if $queue_id;
|
||||
if ( my $record = $self->get( $id ) ) {
|
||||
$record = $self->normalizer->process($record) if $self->normalizer;
|
||||
$self->count($self->count+1);
|
||||
$self->id( $id );
|
||||
return $record;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
sub get_biblio_xml {
|
||||
my ( $self, $id ) = @_;
|
||||
my$dbh = C4::Context->dbh();
|
||||
my $sth = $dbh->prepare(
|
||||
"SELECT marcxml FROM biblioitems WHERE biblionumber=? ");
|
||||
$sth->execute( $id );
|
||||
my ($marcxml) = $sth->fetchrow;
|
||||
|
||||
# If biblio isn't found in biblioitems, it is searched in
|
||||
# deletedbilioitems. Usefull for delete Zebra requests
|
||||
unless ( $marcxml ) {
|
||||
$sth = $dbh->prepare(
|
||||
"SELECT marcxml FROM deletedbiblioitems WHERE biblionumber=? ");
|
||||
$sth->execute( $id );
|
||||
($marcxml) = $sth->fetchrow;
|
||||
}
|
||||
|
||||
# Items extraction
|
||||
# FIXME: It slows down drastically biblio records export
|
||||
{
|
||||
my @items = @{ $dbh->selectall_arrayref(
|
||||
"SELECT * FROM items WHERE biblionumber=$id",
|
||||
{Slice => {} } ) };
|
||||
if (@items){
|
||||
my $record = MARC::Record->new;
|
||||
$record->encoding('UTF-8');
|
||||
my @itemsrecord;
|
||||
foreach my $item (@items) {
|
||||
my $record = Item2Marc($item, $id);
|
||||
push @itemsrecord, $record->field($self->itemtag);
|
||||
}
|
||||
$record->insert_fields_ordered(@itemsrecord);
|
||||
my $itemsxml = $record->as_xml_record();
|
||||
$marcxml =
|
||||
substr($marcxml, 0, length($marcxml)-10) .
|
||||
substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
|
||||
}
|
||||
}
|
||||
return $marcxml;
|
||||
}
|
||||
|
||||
|
||||
# Get biblio record, if the record doesn't exist in biblioitems, it is searched
|
||||
# in deletedbiblioitems.
|
||||
sub get_biblio_marc {
|
||||
my ( $self, $id ) = @_;
|
||||
|
||||
my $dbh = C4::Context->dbh();
|
||||
my $sth = $dbh->prepare(
|
||||
"SELECT marcxml FROM biblioitems WHERE biblionumber=? ");
|
||||
$sth->execute( $id );
|
||||
my ($marcxml) = $sth->fetchrow;
|
||||
|
||||
unless ( $marcxml ) {
|
||||
$sth = $dbh->prepare(
|
||||
"SELECT marcxml FROM deletedbiblioitems WHERE biblionumber=? ");
|
||||
$sth->execute( $id );
|
||||
($marcxml) = $sth->fetchrow;
|
||||
}
|
||||
|
||||
$marcxml =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
|
||||
my $record = MARC::Record->new();
|
||||
if ($marcxml) {
|
||||
$record = eval {
|
||||
MARC::Record::new_from_xml( $marcxml, "utf8" ) };
|
||||
if ($@) { warn " problem with: $id : $@ \n$marcxml"; }
|
||||
|
||||
# Items extraction if Koha v3.4 and above
|
||||
# FIXME: It slows down drastically biblio records export
|
||||
if ( $self->itemsextraction ) {
|
||||
my @items = @{ $dbh->selectall_arrayref(
|
||||
"SELECT * FROM items WHERE biblionumber=$id",
|
||||
{Slice => {} } ) };
|
||||
if (@items){
|
||||
my @itemsrecord;
|
||||
foreach my $item (@items) {
|
||||
my $record = Item2Marc($item, $id);
|
||||
push @itemsrecord, $record->field($self->itemtag);
|
||||
}
|
||||
$record->insert_fields_ordered(@itemsrecord);
|
||||
}
|
||||
}
|
||||
return $record;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
sub get_auth_xml {
|
||||
my ( $self, $id ) = @_;
|
||||
|
||||
my $dbh = C4::Context->dbh();
|
||||
my $sth = $dbh->prepare(
|
||||
"select marcxml from auth_header where authid=? " );
|
||||
$sth->execute( $id );
|
||||
my ($xml) = $sth->fetchrow;
|
||||
|
||||
# If authority isn't found we build a mimimalist record
|
||||
# Usefull for delete Zebra requests
|
||||
unless ( $xml ) {
|
||||
return
|
||||
"<record
|
||||
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
|
||||
xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"
|
||||
xmlns=\"http://www.loc.gov/MARC21/slim\">
|
||||
<leader> </leader>
|
||||
<controlfield tag=\"001\">$id</controlfield>
|
||||
</record>\n";
|
||||
}
|
||||
|
||||
my $new_xml = '';
|
||||
foreach ( split /\n/, $xml ) {
|
||||
next if /^<collection|^<\/collection/;
|
||||
$new_xml .= "$_\n";
|
||||
}
|
||||
return $new_xml;
|
||||
}
|
||||
|
||||
|
||||
no Moose;
|
||||
1;
|
59
Koha/Indexer/RecordWriter.pm
Normal file
59
Koha/Indexer/RecordWriter.pm
Normal file
|
@ -0,0 +1,59 @@
|
|||
package Koha::Indexer::RecordWriter;
|
||||
use Moose;
|
||||
|
||||
with 'MooseX::RW::Writer::File';
|
||||
|
||||
|
||||
use Carp;
|
||||
use MARC::Batch;
|
||||
use MARC::Record;
|
||||
use MARC::File::XML;
|
||||
|
||||
|
||||
# Is XML Stream a valid marcxml
|
||||
# By default no => no <collection> </collection>
|
||||
has valid => (
|
||||
is => 'rw',
|
||||
isa => 'Bool',
|
||||
default => 0,
|
||||
);
|
||||
|
||||
|
||||
sub begin {
|
||||
my $self = shift;
|
||||
if ( $self->valid ) {
|
||||
my $fh = $self->fh;
|
||||
print $fh '<?xml version="1.0" encoding="UTF-8"?>', "\n", '<collection>', "\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sub end {
|
||||
my $self = shift;
|
||||
my $fh = $self->fh;
|
||||
if ( $self->valid ) {
|
||||
print $fh '</collection>', "\n";
|
||||
}
|
||||
$fh->flush();
|
||||
}
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Sent record is rather a MARC::Record object or an marcxml string
|
||||
#
|
||||
sub write {
|
||||
my ($self, $record) = @_;
|
||||
|
||||
$self->count( $self->count + 1 );
|
||||
|
||||
my $fh = $self->fh;
|
||||
my $xml = ref($record) eq 'MARC::Record'
|
||||
? $record->as_xml_record() : $record;
|
||||
$xml =~ s/<\?xml version="1.0" encoding="UTF-8"\?>\n//g if $self->valid;
|
||||
print $fh $xml;
|
||||
}
|
||||
|
||||
__PACKAGE__->meta->make_immutable;
|
||||
|
||||
1;
|
79
misc/bin/koha-index
Executable file
79
misc/bin/koha-index
Executable file
|
@ -0,0 +1,79 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
package Main;
|
||||
|
||||
use Modern::Perl;
|
||||
use utf8;
|
||||
use Koha::Indexer::Indexing;
|
||||
use Pod::Usage;
|
||||
|
||||
my $indexer = Koha::Indexer::Indexing->new_with_options();
|
||||
if ( $indexer->help ) {
|
||||
pod2usage( -verbose => 99 );
|
||||
exit;
|
||||
}
|
||||
$indexer->run();
|
||||
|
||||
|
||||
__END__
|
||||
|
||||
=pod
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
koha-index
|
||||
|
||||
koha-index --verbose
|
||||
|
||||
koha-index --source biblio --select queue
|
||||
|
||||
koha-index --source authority --select all
|
||||
|
||||
koha-index --select queue --directory /tmp/koha-index-mylib --keep
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Index queued biblio/autority record, or reindex the whole DB. Koha standard
|
||||
environment variables must ne set appropriately: KOHA_CONF and PERL5LIB.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item --source
|
||||
|
||||
Select records to be indexed: C<biblio> or C<authority>. If not specified,
|
||||
biblio by default.
|
||||
|
||||
=item --select
|
||||
|
||||
Select record to be indexed: C<all> or C<queue>. If not specified, C<all> is
|
||||
selected. If C<all> is selected, zebra database is reset before indexing.
|
||||
|
||||
=item --directory
|
||||
|
||||
Directory where records to be indexed by Zebra are exported. If not specified,
|
||||
a direcory named C<koha-index> is used, and if necessary created, in the
|
||||
current directory. In this directory, sub-directories are created containing
|
||||
records to be updated or deleted by Zebra. If those subdirectories already
|
||||
exist, they are first emptied. The export directory tree is kept after zebra
|
||||
indexing.
|
||||
|
||||
=item --keep
|
||||
|
||||
Keep the directory, and its content, where biblio/authority records have been
|
||||
exported.
|
||||
|
||||
=item --verbose
|
||||
|
||||
Increase the amount of logging. Normally only warnings and errors from the
|
||||
indexing are shown.
|
||||
|
||||
=back
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=for :list
|
||||
* L<koha-index-daemon>
|
||||
|
||||
=cut
|
49
misc/bin/koha-index-daemon
Executable file
49
misc/bin/koha-index-daemon
Executable file
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
package Main;
|
||||
|
||||
use Modern::Perl;
|
||||
use utf8;
|
||||
use Koha::Indexer::Daemon;
|
||||
use Pod::Usage;
|
||||
|
||||
Koha::Indexer::Daemon->new_with_options();
|
||||
|
||||
__END__
|
||||
|
||||
=pod
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
koha-index-daemon
|
||||
|
||||
koha-index-daemon --timeout 60
|
||||
|
||||
koha-index-daemon --timeout 60 --directory /home/mylib/tmp
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Examine periodicaly zebraqueue table from a Koha instance and index
|
||||
bilbio/authority records.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item --timeout
|
||||
|
||||
Specify the daemon timeout in seconds.
|
||||
|
||||
=item --directory
|
||||
|
||||
Directory where to write record exported from Koha DB before sending them to
|
||||
Zebra. Subdirectories are created.
|
||||
|
||||
=back
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
=for :list
|
||||
* L<koha-index>
|
||||
|
||||
=cut
|
Loading…
Reference in a new issue