Nick Clemens
e9f7a0dd1c
Signed-off-by: Bouzid Fergani <bouzid.fergani@inlibro.com> Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org> Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
124 lines
4.2 KiB
Perl
124 lines
4.2 KiB
Perl
#! /usr/bin/perl
|
|
#
|
|
# This compares record counts from a Koha database to Elasticsearch
|
|
|
|
# Copyright 2019 ByWater Solutions
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it under the
|
|
# terms of the GNU General Public License as published by the Free Software
|
|
# Foundation; either version 3 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with Koha; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
=head1 NAME
|
|
|
|
compare_es_to_db.pl - compares record counts from a Koha database to Elasticsearch
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
B<compare_es_to_db.pl>
|
|
|
|
=cut
|
|
|
|
use Modern::Perl;
|
|
use Array::Utils qw( array_diff );
|
|
|
|
use C4::Context;
|
|
|
|
use Koha::Authorities;
|
|
use Koha::Biblios;
|
|
use Koha::Items;
|
|
use Koha::SearchEngine::Elasticsearch;
|
|
|
|
foreach my $index ( ('biblios','authorities') ){
|
|
print "=================\n";
|
|
print "Checking $index\n";
|
|
my @db_records = $index eq 'biblios' ? Koha::Biblios->search()->get_column('biblionumber') : Koha::Authorities->search()->get_column('authid');
|
|
|
|
my $searcher = Koha::SearchEngine::Elasticsearch->new({ index => $index });
|
|
my $es = $searcher->get_elasticsearch();
|
|
my $count = $es->indices->stats( index => $searcher->get_elasticsearch_params->{index_name} )
|
|
->{_all}{primaries}{docs}{count};
|
|
print "Count in db for $index is " . scalar @db_records . ", count in Elasticsearch is $count\n";
|
|
|
|
# Now we get all the ids from Elasticsearch
|
|
# The scroll lets us iterate through, it fetches chunks of 'size' as we move through
|
|
my $scroll = $es->scroll_helper(
|
|
index => $searcher->get_elasticsearch_params->{index_name},
|
|
size => 5000,
|
|
body => {
|
|
query => {
|
|
match_all => {}
|
|
},
|
|
stored_fields => []
|
|
},
|
|
scroll_in_qs => 1,
|
|
);
|
|
|
|
my @es_ids;
|
|
|
|
# Here is where we actually iterate through
|
|
# Fetching each record, pushing the id into the array
|
|
my $i = 1;
|
|
print "Fetching Elasticsearch records ids";
|
|
while (my $doc = $scroll->next ){
|
|
print "." if !($i % 500);
|
|
print "\n$i records retrieved" if !($i % 5000);
|
|
push @es_ids, $doc->{_id};
|
|
$i++;
|
|
}
|
|
print "\nComparing arrays, this may take a while\n";
|
|
|
|
# And compare the arrays
|
|
# array_diff returns only a list of values which are not common to both arrays
|
|
my @diff = array_diff(@db_records, @es_ids );
|
|
|
|
print "All records match\n" unless @diff;
|
|
|
|
# Fetch values for providing record links
|
|
my $es_params = $searcher->get_elasticsearch_params;
|
|
my $es_base = "$es_params->{nodes}[0]/$es_params->{index_name}";
|
|
my $opac_base = C4::Context->preference('OPACBaseURL');
|
|
|
|
|
|
my @koha_problems;
|
|
my @es_problems;
|
|
foreach my $problem ( sort { $a <=> $b } @diff){
|
|
if ( (grep /^$problem$/, @db_records) ){
|
|
push @koha_problems, $problem;
|
|
} else {
|
|
push @es_problems, $problem;
|
|
}
|
|
}
|
|
|
|
if ( @koha_problems ){
|
|
print "=================\n";
|
|
print "Records that exist in Koha but not in ES\n";
|
|
for my $problem ( @koha_problems ){
|
|
if ( $index eq 'biblios' ) {
|
|
print " #$problem";
|
|
print " Visit here to see record: $opac_base/cgi-bin/koha/opac-detail.pl?biblionumber=$problem\n";
|
|
} elsif ( $index eq 'authorities' ) {
|
|
print "#$problem";
|
|
print " Visit here to see record: $opac_base/cgi-bin/koha/opac-authoritiesdetail.pl?authid=$problem\n";
|
|
}
|
|
}
|
|
}
|
|
if ( @es_problems ){
|
|
print "=================\n";
|
|
print "Records that exist in ES but not in Koha\n";
|
|
for my $problem ( @es_problems ){
|
|
print " #$problem";
|
|
print " Enter this command to view record: curl $es_base/data/$problem?pretty=true\n";
|
|
}
|
|
}
|
|
}
|