Koha/misc/maintenance/compare_es_to_db.pl
Jonathan Druart 7d8b96803f
Bug 24545: Fix license statements
Bug 9978 should have fixed them all, but some were missing.
We want all the license statements part of Koha to be identical, and
using the GPLv3 statement.

Signed-off-by: David Nind <david@davidnind.com>
Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
2020-02-24 13:31:26 +00:00

114 lines
3.9 KiB
Perl

#! /usr/bin/perl
#
# This compares record counts from a Koha database to Elasticsearch
# Copyright 2019 ByWater Solutions
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
=head1 NAME
compare_es_to_db.pl - compares record counts from a Koha database to Elasticsearch
=head1 SYNOPSIS
B<compare_es_to_db.pl>
=cut
use Modern::Perl;
use Array::Utils qw( array_minus );
use C4::Context;
use Koha::Authorities;
use Koha::Biblios;
use Koha::Items;
use Koha::SearchEngine::Elasticsearch;
foreach my $index ( ('biblios','authorities') ){
print "=================\n";
print "Checking $index\n";
my @db_records = $index eq 'biblios' ? Koha::Biblios->search()->get_column('biblionumber') : Koha::Authorities->search()->get_column('authid');
my $searcher = Koha::SearchEngine::Elasticsearch->new({ index => $index });
my $es = $searcher->get_elasticsearch();
my $count = $es->indices->stats( index => $searcher->get_elasticsearch_params->{index_name} )
->{_all}{primaries}{docs}{count};
print "Count in db for $index is " . scalar @db_records . ", count in Elasticsearch is $count\n";
# Now we get all the ids from Elasticsearch
# The scroll lets us iterate through, it fetches chunks of 'size' as we move through
my $scroll = $es->scroll_helper(
index => $searcher->get_elasticsearch_params->{index_name},
size => 5000,
body => {
query => {
match_all => {}
},
stored_fields => []
},
scroll_in_qs => 1,
);
my @es_ids;
# Here is where we actually iterate through
# Fetching each record, pushing the id into the array
my $i = 1;
print "Fetching Elasticsearch records ids";
while (my $doc = $scroll->next ){
print "." if !($i % 500);
print "\n$i records retrieved" if !($i % 5000);
push @es_ids, $doc->{_id};
$i++;
}
# Fetch values for providing record links
my $es_params = $searcher->get_elasticsearch_params;
my $es_base = "$es_params->{nodes}[0]/$es_params->{index_name}";
my $opac_base = C4::Context->preference('OPACBaseURL');
print "\nComparing arrays, this may take a while\n";
my @koha_problems = sort { $a <=> $b } array_minus(@db_records, @es_ids);
my @es_problems = sort { $a <=> $b } array_minus(@es_ids, @db_records);
print "All records match\n" unless ( @koha_problems || @es_problems );
if ( @koha_problems ){
print "=================\n";
print "Records that exist in Koha but not in ES\n";
for my $problem ( @koha_problems ){
if ( $index eq 'biblios' ) {
print " #$problem";
print " Visit here to see record: $opac_base/cgi-bin/koha/opac-detail.pl?biblionumber=$problem\n";
} elsif ( $index eq 'authorities' ) {
print "#$problem";
print " Visit here to see record: $opac_base/cgi-bin/koha/opac-authoritiesdetail.pl?authid=$problem\n";
}
}
}
if ( @es_problems ){
print "=================\n";
print "Records that exist in ES but not in Koha\n";
for my $problem ( @es_problems ){
print " #$problem";
print " Enter this command to view record: curl $es_base/data/$problem?pretty=true\n";
}
}
}