61e7aa374e
Sometimes we need to only re-index a subset of our bibliographic data or authorities. Currently this is only possible by enumerating all id (-bn or -ai), which does not work well when indexing eg 100.000 items of a 2.000.000 DB. Re-indexing everything is also overkill. This patch adds an `--where` flag to misc/search_tools/rebuild_elasticsearch.pl which can take arbitrary SQL (that of course has to match the respective tables) and adds it as an additional param to the resultset to index To test, start koha-testing-docker with ElasticSearch enabled, for example via `ktd --es7 up Before applying the patch, rebuild_elasticsearch will index all data: Biblios: $ misc/search_tools/rebuild_elasticsearch.pl -b -v [12387] Checking state of biblios index [12387] Indexing biblios [12387] Committing final records... [12387] Total 435 records indexed (there might be a waring regarding a broken biblio, which can be ignored) Auth: $ misc/search_tools/rebuild_elasticsearch.pl -a -v [12546] Checking state of authorities index [12546] Indexing authorities [12546] 1000 records processed [12546] Committing final records... [12546] Total 1706 records indexed Now apply the patch Biblio, limit by range of biblioid: $ misc/search_tools/rebuild_elasticsearch.pl -b -v --where "biblionumber between 100 and 150" [12765] Checking state of biblios index [12765] Indexing biblios [12765] Committing final records... [12765] Total 50 records indexed Note that only 50 records where indexed (instead of the whole set of 435 records) Auth, limit by authtypecode: $ misc/search_tools/rebuild_elasticsearch.pl -a -v --where "authtypecode = 'GEOGR_NAME'" [12848] Checking state of authorities index [12848] Indexing authorities [12848] Committing final records... [12848] Total 142 records indexed Again, only 142 have been indexed. Sponsored-by: Steiermärkische Landesbibliothek Sponsored-by: HKS3 / koha-support.eu Signed-off-by: David Nind <david@davidnind.com> Signed-off-by: Nick Clemens <nick@bywatersolutions.com> Signed-off-by: Katrin Fischer <katrin.fischer@bsz-bw.de>
195 lines
4.8 KiB
Perl
195 lines
4.8 KiB
Perl
package Koha::BiblioUtils;
|
|
|
|
# This contains functions to do with managing biblio records.
|
|
|
|
# Copyright 2014 Catalyst IT
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
=head1 NAME
|
|
|
|
Koha::BiblioUtils - contains fundamental biblio-related functions
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This contains functions for normal operations on biblio records.
|
|
|
|
=cut
|
|
|
|
use Koha::Biblios;
|
|
use Koha::MetadataIterator;
|
|
use Koha::Database;
|
|
use Modern::Perl;
|
|
|
|
|
|
use base qw(Koha::MetadataRecord);
|
|
|
|
__PACKAGE__->mk_accessors(qw( record schema id datatype ));
|
|
|
|
=head1 FUNCTIONS
|
|
|
|
=head2 new
|
|
|
|
my $biblio = Koha::BiblioUtils->new($marc_record, [$biblionumber]);
|
|
|
|
Creates an instance of C<Koha::BiblioUtils> based on the marc record. If known,
|
|
the biblionumber can be provided too.
|
|
|
|
=cut
|
|
|
|
sub new {
|
|
my $class = shift;
|
|
my $record = shift;
|
|
my $biblionumber = shift;
|
|
|
|
my $self = $class->SUPER::new(
|
|
{
|
|
'record' => $record,
|
|
'schema' => lc C4::Context->preference("marcflavour"),
|
|
'id' => $biblionumber,
|
|
'datatype' => 'biblio',
|
|
}
|
|
);
|
|
bless $self, $class;
|
|
return $self;
|
|
}
|
|
|
|
=head2 get_from_biblionumber
|
|
|
|
my $biblio = Koha::BiblioUtils->get_from_biblionumber($biblionumber, %options);
|
|
|
|
This will give you an instance of L<Koha::BiblioUtils> that is the biblio that
|
|
you requested.
|
|
|
|
Options are:
|
|
|
|
=over 4
|
|
|
|
=item C<$item_data>
|
|
|
|
If true, then the item data will be merged into the record when it's loaded.
|
|
|
|
=back
|
|
|
|
It will return C<undef> if the biblio doesn't exist.
|
|
|
|
=cut
|
|
|
|
sub get_from_biblionumber {
|
|
my ($class, $bibnum, %options) = @_;
|
|
|
|
my $marc = $class->get_marc_biblio($bibnum, %options);
|
|
return $class->new($marc, $bibnum);
|
|
}
|
|
|
|
=head2 get_all_biblios_iterator
|
|
|
|
my $it = Koha::BiblioUtils->get_all_biblios_iterator(%options);
|
|
|
|
This will provide an iterator object that will, one by one, provide the
|
|
Koha::BiblioUtils of each biblio. This will include the item data.
|
|
|
|
The iterator is a Koha::MetadataIterator object.
|
|
|
|
Possible options are:
|
|
|
|
=over 4
|
|
|
|
=item C<slice>
|
|
|
|
slice may be defined as a hash of two values: index and count. index
|
|
is the slice number to process and count is total number of slices.
|
|
With this information the iterator returns just the given slice of
|
|
records instead of all.
|
|
|
|
=back
|
|
|
|
=cut
|
|
|
|
sub get_all_biblios_iterator {
|
|
my ($class, %options) = @_;
|
|
|
|
my $search_terms = {};
|
|
my ($slice_modulo, $slice_count);
|
|
if ($options{slice}) {
|
|
$slice_count = $options{slice}->{count};
|
|
$slice_modulo = $options{slice}->{index};
|
|
$search_terms = \[ 'mod(biblionumber, ?) = ?', $slice_count, $slice_modulo ];
|
|
}
|
|
|
|
my $search_options = { columns => [qw/ biblionumber /] };
|
|
if ( $options{desc} ){
|
|
$search_options->{order_by} = { -desc => 'biblionumber' };
|
|
}
|
|
|
|
my $database = Koha::Database->new();
|
|
my $schema = $database->schema();
|
|
my $rs = Koha::Biblios->search(
|
|
$search_terms,
|
|
$search_options );
|
|
|
|
if ( my $sql = $options{where} ) {
|
|
$rs = $rs->search( \[$sql] );
|
|
}
|
|
|
|
my $next_func = sub {
|
|
# Warn and skip bad records, otherwise we break the loop
|
|
while (1) {
|
|
my $row = $rs->next();
|
|
return if !$row;
|
|
my $next = eval {
|
|
my $marc = $row->metadata->record({ embed_items => 1 });
|
|
$class->new($marc, $row->biblionumber);
|
|
};
|
|
if ($@) {
|
|
warn sprintf "Something went wrong reading record for biblio %s: %s\n", $row->biblionumber, $@;
|
|
next;
|
|
}
|
|
return $next;
|
|
}
|
|
};
|
|
return Koha::MetadataIterator->new($next_func);
|
|
}
|
|
|
|
=head2 get_marc_biblio
|
|
|
|
my $marc = Koha::BiblioUtils->get_marc_biblio($bibnum, %options);
|
|
|
|
This non-class function fetches the MARC::Record for the given biblio number.
|
|
Nothing is returned if the biblionumber couldn't be found (or it somehow has no
|
|
MARC data.)
|
|
|
|
Options are:
|
|
|
|
=over 4
|
|
|
|
=item item_data
|
|
|
|
If set to true, item data is embedded in the record. Default is to not do this.
|
|
|
|
=back
|
|
|
|
=cut
|
|
|
|
sub get_marc_biblio {
|
|
my ($class, $bibnum, %options) = @_;
|
|
|
|
my $record = Koha::Biblios->find($bibnum)
|
|
->metadata->record( { $options{item_data} ? ( embed_items => 1 ) : () } );
|
|
return $record;
|
|
}
|
|
|
|
1;
|