From 61e7aa374e8b4f85497c55e3741d829123fc4763 Mon Sep 17 00:00:00 2001 From: Thomas Klausner Date: Fri, 29 Mar 2024 09:20:37 +0000 Subject: [PATCH] Bug 35345: Add --where option to rebuild_elasticsearch.pl MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Sometimes we need to only re-index a subset of our bibliographic data or authorities. Currently this is only possible by enumerating all id (-bn or -ai), which does not work well when indexing eg 100.000 items of a 2.000.000 DB. Re-indexing everything is also overkill. This patch adds an `--where` flag to misc/search_tools/rebuild_elasticsearch.pl which can take arbitrary SQL (that of course has to match the respective tables) and adds it as an additional param to the resultset to index To test, start koha-testing-docker with ElasticSearch enabled, for example via `ktd --es7 up Before applying the patch, rebuild_elasticsearch will index all data: Biblios: $ misc/search_tools/rebuild_elasticsearch.pl -b -v [12387] Checking state of biblios index [12387] Indexing biblios [12387] Committing final records... [12387] Total 435 records indexed (there might be a waring regarding a broken biblio, which can be ignored) Auth: $ misc/search_tools/rebuild_elasticsearch.pl -a -v [12546] Checking state of authorities index [12546] Indexing authorities [12546] 1000 records processed [12546] Committing final records... [12546] Total 1706 records indexed Now apply the patch Biblio, limit by range of biblioid: $ misc/search_tools/rebuild_elasticsearch.pl -b -v --where "biblionumber between 100 and 150" [12765] Checking state of biblios index [12765] Indexing biblios [12765] Committing final records... [12765] Total 50 records indexed Note that only 50 records where indexed (instead of the whole set of 435 records) Auth, limit by authtypecode: $ misc/search_tools/rebuild_elasticsearch.pl -a -v --where "authtypecode = 'GEOGR_NAME'" [12848] Checking state of authorities index [12848] Indexing authorities [12848] Committing final records... [12848] Total 142 records indexed Again, only 142 have been indexed. Sponsored-by: Steiermärkische Landesbibliothek Sponsored-by: HKS3 / koha-support.eu Signed-off-by: David Nind Signed-off-by: Nick Clemens Signed-off-by: Katrin Fischer --- Koha/BiblioUtils.pm | 5 +++++ Koha/MetadataRecord/Authority.pm | 5 +++++ misc/search_tools/rebuild_elasticsearch.pl | 12 +++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Koha/BiblioUtils.pm b/Koha/BiblioUtils.pm index cb7baed0db..5f438253f7 100644 --- a/Koha/BiblioUtils.pm +++ b/Koha/BiblioUtils.pm @@ -140,6 +140,11 @@ sub get_all_biblios_iterator { my $rs = Koha::Biblios->search( $search_terms, $search_options ); + + if ( my $sql = $options{where} ) { + $rs = $rs->search( \[$sql] ); + } + my $next_func = sub { # Warn and skip bad records, otherwise we break the loop while (1) { diff --git a/Koha/MetadataRecord/Authority.pm b/Koha/MetadataRecord/Authority.pm index ed0ebfc016..f616415d18 100644 --- a/Koha/MetadataRecord/Authority.pm +++ b/Koha/MetadataRecord/Authority.pm @@ -201,6 +201,11 @@ sub get_all_authorities_iterator { $schema->resultset('AuthHeader')->search( $search_terms, $search_options); + + if ( my $sql = $options{where} ) { + $rs = $rs->search( \[$sql] ); + } + my $next_func = sub { # Warn and skip bad records, otherwise we break the loop while (1) { diff --git a/misc/search_tools/rebuild_elasticsearch.pl b/misc/search_tools/rebuild_elasticsearch.pl index 337ac19dd2..58fa298830 100755 --- a/misc/search_tools/rebuild_elasticsearch.pl +++ b/misc/search_tools/rebuild_elasticsearch.pl @@ -34,6 +34,7 @@ B [B<--desc>] [B<-bn|--bnumber>] [B<-ai|--authid>] +[B<-w|--where SQL>] [B<-p|--processes>] [B<-v|--verbose>] [B<-h|--help>] @@ -87,6 +88,10 @@ repeated. Only index the supplied authority id, mostly for testing purposes. May be repeated. +=item B<-w|--where> + +Pass some additional SQL to limit the records to be indexed. + =item B<-p|--processes> Number of processes to use for indexing. This can be used to do more indexing @@ -128,7 +133,7 @@ my $verbose = 0; my $commit = 5000; my ($delete, $reset, $help, $man, $processes); my ($index_biblios, $index_authorities); -my (@biblionumbers,@authids); +my (@biblionumbers,@authids,$where); my $desc; $|=1; # flushes output @@ -142,6 +147,7 @@ GetOptions( 'desc' => \$desc, 'bn|bnumber=i' => \@biblionumbers, 'ai|authid=i' => \@authids, + 'w|where=s' => \$where, 'p|processes=i' => \$processes, 'v|verbose+' => \$verbose, 'h|help' => \$help, @@ -197,6 +203,10 @@ if( $desc ){ $iterator_options{desc} = 1; } +if ($where) { + $iterator_options{where} = $where; +} + my $next; if ($index_biblios) { _log(1, "Indexing biblios\n"); -- 2.39.5