From 2b32aee8258002269ab4d789496ea188465d016a Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Thu, 2 May 2019 12:24:08 +0000 Subject: [PATCH] Bug 22831: Elasticsearch - add a maintenance script for checking DB vs index counts This script uses Array::Utils and adds a dependency To test: 1 - Have Koha with ES running 2 - Delete some records from ES curl -X DELETE "es:9200/koha_kohadev_biblios/data/5" curl -X DELETE "es:9200/koha_kohadev_authorities/data/5" 3 - perl misc/maintenance/compare_es_to_db.pl 4 - Note you are notified of problems in both indexes 5 - perl misc/search_tools/rebuild_elastic_search.pl -a 6 - perl misc/maintenance/compare_es_to_db.pl 7 - Note you are only notified about problems in biblios (assuming you don't have other issues) 8 - perl misc/search_tools/rebuild_elasticsearch.pl -b 9 - perl misc/maintenance/compare_es_to_db.pl 10 - Both counts match, no problems Signed-off-by: Ere Maijala Signed-off-by: Bouzid Fergani Signed-off-by: Jonathan Druart Signed-off-by: Martin Renvoize --- C4/Installer/PerlDependencies.pm | 5 ++ misc/maintenance/compare_es_to_db.pl | 82 ++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 misc/maintenance/compare_es_to_db.pl diff --git a/C4/Installer/PerlDependencies.pm b/C4/Installer/PerlDependencies.pm index 84f5fe4930..f949bd790c 100644 --- a/C4/Installer/PerlDependencies.pm +++ b/C4/Installer/PerlDependencies.pm @@ -913,6 +913,11 @@ our $PERL_DEPS = { 'required' => '0', 'min_ver' => '1.15', }, + 'Array::Utils' => { + usage => 'Elasticsearch integration', + required => '0', + min_ver => '0.5', + }, }; 1; diff --git a/misc/maintenance/compare_es_to_db.pl b/misc/maintenance/compare_es_to_db.pl new file mode 100644 index 0000000000..9fab08a759 --- /dev/null +++ b/misc/maintenance/compare_es_to_db.pl @@ -0,0 +1,82 @@ +#! /usr/bin/perl +# +# This compares record counts from a Koha database to Elasticsearch + +# Copyright 2019 ByWater Solutions +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +=head1 NAME + +compare_es_to_db.pl - compares record counts from a Koha database to Elasticsearch + +=head1 SYNOPSIS + +B + +=cut + +use Modern::Perl; +use Koha::Items; +use Koha::SearchEngine::Elasticsearch; +use Array::Utils qw( array_diff ); + +use Koha::Biblios; +use Koha::Authorities; + +foreach my $index ( ('biblios','authorities') ){ + print "=================\n"; + print "Checking $index\n"; + my @db_records = $index eq 'biblios' ? Koha::Biblios->search()->get_column('biblionumber') : Koha::Authorities->search()->get_column('authid'); + + my $searcher = Koha::SearchEngine::Elasticsearch->new({ index => $index }); + my $es = $searcher->get_elasticsearch(); + my $count = $es->indices->stats( index => $searcher->get_elasticsearch_params->{index_name} ) + ->{_all}{primaries}{docs}{count}; + print "Count in db for $index is " . scalar @db_records . ", count in Elasticsearch is $count\n"; + + # Otherwise, lets find all the ES ids + my $scroll = $es->scroll_helper( + index => $searcher->get_elasticsearch_params->{index_name}, + size => 5000, + body => { + query => { + match_all => {} + }, + stored_fields => [] + }, + scroll_in_qs => 1, + ); + + my @es_ids; + + my $i = 1; + print "Fetching Elasticsearch records ids"; + while (my $doc = $scroll->next ){ + print "." if !($i % 500); + print "\nFetching next 5000" if !($i % 5000); + push @es_ids, $doc->{_id}; + $i++; + } + print "\nComparing arrays, this may take a while\n"; + + # And compare the arrays + my @diff = array_diff(@db_records, @es_ids ); + print "All records match\n" unless @diff; + foreach my $problem (@diff){ + print "Record #$problem is not in both sources\n"; + } +} -- 2.39.5