From 0b3b53e4f2044480bff96f1ae77a165e0c0e4691 Mon Sep 17 00:00:00 2001 From: Tomas Cohen Arazi Date: Mon, 16 May 2016 11:30:35 -0300 Subject: [PATCH] Bug 16506: Make rebuild_zebra.pl use XML as default This patch deprecates the -x switch, making XML the default serialization format used by rebuild_zebra.pl. It doesn't remove the option switch, but raises a warning for the end user about the deprecation so they fix their cronjobs. Later we could remove it. To test: - Disable all indexing (daemon/cronjob) - Create 2 records - Edit one of them, delete the other one - Verify they are queued for updates in zebraqueue - sudo koha-mysql kohadev > SELECT * FROM zebraqueue WHERE done=0 ... | 265 | 265 | specialUpdate | biblioserver | 1 | 2016-05-13 14:23:45 | | 266 | 1 | recordDelete | biblioserver | 1 | 2016-05-16 14:14:33 | | 267 | 2 | specialUpdate | biblioserver | 1 | 2016-05-16 14:15:06 | +-----+--------------------+---------------+--------------+------+---------------------+ - Now go to koha-shell $ sudo koha-shell kohadev ; cd kohaclone - Run: $ misc/migration_tools/rebuild_zebra.pl -k -b -z You will get something similar to this: NOTHING cleaned : the export /tmp/jI0OeHy6Tn has been kept. You can re-run this script with the -s and -d /tmp/jI0OeHy6Tn parameters if you just want to rebuild zebra after changing the record.abs or another zebra config file - Verify * less /tmp/jI0OeHy6Tn/del_biblio/exported_records * less /tmp/jI0OeHy6Tn/upd_biblio/exported_records => FAIL: They contain the records you added/modified/deleted but they are in USMARC format - Apply the patch - Mark your records for indexing (in koha-mysql kohadev) > UPDATE zebraqueue SET done=0 WHERE id > 264 - Run: $ misc/migration_tools/rebuild_zebra.pl -k -b -z You will get something similar to this: [1] NOTHING cleaned : the export /tmp/jI0OeHy6Tn has been kept. You can re-run this script with the -s and -d /tmp/jI0OeHy6Tn parameters if you just want to rebuild zebra after changing the record.abs or another zebra config file - Verify * less /tmp/jI0OeHy6Tn/del_biblio/exported_records * less /tmp/jI0OeHy6Tn/upd_biblio/exported_records => SUCCESS: Data is correctly in XML format - Run: $ misc/migration_tools/rebuild_zebra.pl -k -b -z -noxml You will get something similar to this: [1] NOTHING cleaned : the export /tmp/jI0OeHy6Tn has been kept. You can re-run this script with the -s and -d /tmp/jI0OeHy6Tn parameters if you just want to rebuild zebra after changing the record.abs or another zebra config file - Verify * less /tmp/jI0OeHy6Tn/del_biblio/exported_records * less /tmp/jI0OeHy6Tn/upd_biblio/exported_records => SUCCESS: Data is correctly in USMARC format - Sign off :-D [1] Warnings covered by a followup Signed-off-by: Bernardo Gonzalez Kriegel On top of Bug 16505 Work as described following test plan, usmarc default pre patch, post patch xml default and usmarc on request. No errors (all patchset) Signed-off-by: Jonathan Druart Signed-off-by: Kyle M Hall --- misc/migration_tools/rebuild_zebra.pl | 115 ++++++++++++++------------ 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 45c074a63a..fb06aabf2e 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -1,7 +1,21 @@ #!/usr/bin/perl -use strict; -#use warnings; FIXME - Bug 2505 +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +use Modern::Perl; use C4::Context; use Getopt::Long; @@ -34,10 +48,10 @@ my $skip_index; my $reset; my $biblios; my $authorities; -my $noxml; +my $as_usmarc; +my $as_xml; my $noshadow; my $want_help; -my $as_xml; my $process_zebraqueue; my $process_zebraqueue_skip_deletes; my $do_not_clear_zebraqueue; @@ -62,7 +76,7 @@ my $result = GetOptions( 'I|skip-index' => \$skip_index, 'nosanitize' => \$nosanitize, 'b' => \$biblios, - 'noxml' => \$noxml, + 'noxml' => \$as_usmarc, 'w' => \$noshadow, 'a' => \$authorities, 'h|help' => \$want_help, @@ -84,6 +98,10 @@ if (not $result or $want_help) { exit 0; } +if ( $as_xml ) { + warn "Warning: You passed -x which is already the default and is now deprecated·\n"; +} + if( not defined $run_as_root and $run_user eq 'root') { my $msg = "Warning: You are running this script as the user 'root'.\n"; $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n"; @@ -91,7 +109,7 @@ if( not defined $run_as_root and $run_user eq 'root') { die $msg; } -if ( !$as_xml and $nosanitize ) { +if ( $as_usmarc and $nosanitize ) { my $msg = "Cannot specify both -no_xml and -nosanitize\n"; $msg .= "Please do '$0 --help' to see usage.\n"; die $msg; @@ -277,13 +295,13 @@ if ($keep_export) { sub do_one_pass { if ($authorities) { - index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); + index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); } else { print "skipping authorities\n" if ( $verbose_logging ); } if ($biblios) { - index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); + index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); } else { print "skipping biblios\n" if ( $verbose_logging ); } @@ -330,7 +348,7 @@ sub check_zebra_dirs { } # ---------- end of subroutine check_zebra_dirs ---------- sub index_records { - my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; + my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; my $num_records_exported = 0; my $records_deleted = {}; @@ -357,18 +375,18 @@ sub index_records { unless ( $process_zebraqueue_skip_deletes ) { $entries = select_zebraqueue_records($record_type, 'deleted'); mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); - $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); + $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_usmarc); mark_zebraqueue_batch_done($entries); } $entries = select_zebraqueue_records($record_type, 'updated'); mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type"); - $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted); + $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_usmarc, $records_deleted); mark_zebraqueue_batch_done($entries); } else { my $sth = select_all_records($record_type); - $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize); + $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_usmarc, $nosanitize); unless ($do_not_clear_zebraqueue) { mark_all_zebraqueue_done($record_type); } @@ -390,7 +408,7 @@ sub index_records { print "REINDEXING zebra\n"; print "====================\n"; } - my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ; + my $record_fmt = ($as_usmarc) ? 'iso2709' : 'marcxml' ; if ($process_zebraqueue) { do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) if %$records_deleted; @@ -470,25 +488,14 @@ sub select_all_biblios { return $sth; } -sub include_xml_wrapper { - my $as_xml = shift; - my $record_type = shift; - - return 0 unless $as_xml; - return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom'; - return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom'; - return 0; - -} - sub export_marc_records_from_sth { - my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_; + my ($record_type, $sth, $directory, $as_usmarc, $nosanitize) = @_; my $num_exported = 0; open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!; print {$fh} $marcxml_open - if include_xml_wrapper($as_xml, $record_type); + unless $as_usmarc; my $i = 0; my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",''); @@ -532,11 +539,13 @@ sub export_marc_records_from_sth { } next; } - my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml); + my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc); if (defined $marc) { eval { my $rec; - if ($as_xml) { + if ($as_usmarc) { + $rec = $marc->as_usmarc(); + } else { $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); eval { my $doc = $tester->parse_string($rec); @@ -545,33 +554,32 @@ sub export_marc_records_from_sth { die "invalid XML: $@"; } $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; - } else { - $rec = $marc->as_usmarc(); } print {$fh} $rec; $num_exported++; }; if ($@) { - warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML"); warn "... specific error is $@" if $verbose_logging; } } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); print {$fh} $marcxml_close - if include_xml_wrapper($as_xml, $record_type); + unless $as_usmarc; + close $fh; return $num_exported; } sub export_marc_records_from_list { - my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_; + my ($record_type, $entries, $directory, $as_usmarc, $records_deleted) = @_; my $num_exported = 0; open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!; print {$fh} $marcxml_open - if include_xml_wrapper($as_xml, $record_type); + unless $as_usmarc; my $i = 0; @@ -582,41 +590,42 @@ sub export_marc_records_from_list { @$entries ) { print "." if ( $verbose_logging ); print "\r$i" unless ($i++ %100 or !$verbose_logging); - my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml); + my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc); if (defined $marc) { eval { my $rec; - if ($as_xml) { + if ( $as_usmarc ) { + $rec = $marc->as_usmarc(); + } else { $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; - } else { - $rec = $marc->as_usmarc(); } print {$fh} $rec; $num_exported++; }; if ($@) { - warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML"); } } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); print {$fh} $marcxml_close - if include_xml_wrapper($as_xml, $record_type); + unless $as_usmarc; close $fh; return $num_exported; } sub generate_deleted_marc_records { - my ($record_type, $entries, $directory, $as_xml) = @_; + + my ($record_type, $entries, $directory, $as_usmarc) = @_; my $records_deleted = {}; open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!; print {$fh} $marcxml_open - if include_xml_wrapper($as_xml, $record_type); + unless $as_usmarc; my $i = 0; foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) { @@ -634,11 +643,12 @@ sub generate_deleted_marc_records { } my $rec; - if ($as_xml) { + if ( $as_usmarc ) { + $rec = $marc->as_usmarc(); + } else { $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); + # Remove the record's XML header $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; - } else { - $rec = $marc->as_usmarc(); } print {$fh} $rec; @@ -647,18 +657,16 @@ sub generate_deleted_marc_records { print "\nRecords exported: $i\n" if ( $verbose_logging ); print {$fh} $marcxml_close - if include_xml_wrapper($as_xml, $record_type); + unless $as_usmarc; close $fh; return $records_deleted; - - } sub get_corrected_marc_record { - my ($record_type, $record_number, $noxml) = @_; + my ($record_type, $record_number, $as_usmarc) = @_; - my $marc = get_raw_marc_record($record_type, $record_number, $noxml); + my $marc = get_raw_marc_record($record_type, $record_number, $as_usmarc); if (defined $marc) { fix_leader($marc); @@ -677,11 +685,11 @@ sub get_corrected_marc_record { } sub get_raw_marc_record { - my ($record_type, $record_number, $noxml) = @_; + my ($record_type, $record_number, $as_usmarc) = @_; my $marc; if ($record_type eq 'biblio') { - if ($noxml) { + if ($as_usmarc) { my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?"); $fetch_sth->execute($record_number); if (my ($blob) = $fetch_sth->fetchrow_array) { @@ -908,9 +916,6 @@ Parameters: option is recommended only for advanced user. - -x export and index as xml instead of is02709 (biblios only). - use this if you might have records > 99,999 chars, - -nosanitize export biblio/authority records directly from DB marcxml field without sanitizing records. It speed up dump process but could fail if DB contains badly -- 2.39.5