From 7a2cbfba1f271d307045d867654ff0d7de939aa6 Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Tue, 6 Dec 2016 15:37:50 +0100 Subject: [PATCH] Bug 17731: Remove noxml option from rebuild_zebra.pl The removal of the noxml is a logical follow-up of bug 16506 (which make xml the default). Actually this option should have been removed by bug 10455 (it removes the biblioitem.marc field). Test plan: Make sure the rebuild_zebra.pl script works as before. Signed-off-by: Emma Smith Signed-off-by: Kyle M Hall --- misc/migration_tools/rebuild_zebra.pl | 135 ++++++++------------------ 1 file changed, 42 insertions(+), 93 deletions(-) diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index b32f8ba6ad..5d04545e7b 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -48,7 +48,6 @@ my $skip_index; my $reset; my $biblios; my $authorities; -my $as_usmarc; my $as_xml; my $noshadow; my $want_help; @@ -76,7 +75,6 @@ my $result = GetOptions( 'I|skip-index' => \$skip_index, 'nosanitize' => \$nosanitize, 'b' => \$biblios, - 'noxml' => \$as_usmarc, 'w' => \$noshadow, 'a' => \$authorities, 'h|help' => \$want_help, @@ -110,12 +108,6 @@ if( not defined $run_as_root and $run_user eq 'root') { die $msg; } -if ( $as_usmarc and $nosanitize ) { - my $msg = "Cannot specify both -noxml and -nosanitize\n"; - $msg .= "Please do '$0 --help' to see usage.\n"; - die $msg; -} - if ($process_zebraqueue and ($skip_export or $reset)) { my $msg = "Cannot specify -r or -s if -z is specified\n"; $msg .= "Please do '$0 --help' to see usage.\n"; @@ -295,13 +287,13 @@ if ($keep_export) { sub do_one_pass { if ($authorities) { - index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); + index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); } else { print "skipping authorities\n" if ( $verbose_logging ); } if ($biblios) { - index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); + index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); } else { print "skipping biblios\n" if ( $verbose_logging ); } @@ -348,7 +340,7 @@ sub check_zebra_dirs { } # ---------- end of subroutine check_zebra_dirs ---------- sub index_records { - my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; + my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; my $num_records_exported = 0; my $records_deleted = {}; @@ -375,18 +367,18 @@ sub index_records { unless ( $process_zebraqueue_skip_deletes ) { $entries = select_zebraqueue_records($record_type, 'deleted'); mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); - $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_usmarc); + $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type"); mark_zebraqueue_batch_done($entries); } $entries = select_zebraqueue_records($record_type, 'updated'); mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type"); - $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_usmarc, $records_deleted); + $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted); mark_zebraqueue_batch_done($entries); } else { my $sth = select_all_records($record_type); - $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_usmarc, $nosanitize); + $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize); unless ($do_not_clear_zebraqueue) { mark_all_zebraqueue_done($record_type); } @@ -408,7 +400,7 @@ sub index_records { print "REINDEXING zebra\n"; print "====================\n"; } - my $record_fmt = ($as_usmarc) ? 'iso2709' : 'marcxml' ; + my $record_fmt = 'marcxml'; if ($process_zebraqueue) { do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) if %$records_deleted; @@ -489,13 +481,12 @@ sub select_all_biblios { } sub export_marc_records_from_sth { - my ($record_type, $sth, $directory, $as_usmarc, $nosanitize) = @_; + my ($record_type, $sth, $directory, $nosanitize) = @_; my $num_exported = 0; open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!; - print {$fh} $marcxml_open - unless $as_usmarc; + print {$fh} $marcxml_open; my $i = 0; my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",''); @@ -539,47 +530,40 @@ sub export_marc_records_from_sth { } next; } - my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc); + my ($marc) = get_corrected_marc_record($record_type, $record_number); if (defined $marc) { eval { - my $rec; - if ($as_usmarc) { - $rec = $marc->as_usmarc(); - } else { - $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); - eval { - my $doc = $tester->parse_string($rec); - }; - if ($@) { - die "invalid XML: $@"; - } - $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; + my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); + eval { + my $doc = $tester->parse_string($rec); + }; + if ($@) { + die "invalid XML: $@"; } + $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; print {$fh} $rec; $num_exported++; }; if ($@) { - warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML"); + warn "Error exporting record $record_number ($record_type) XML"; warn "... specific error is $@" if $verbose_logging; } } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); - print {$fh} $marcxml_close - unless $as_usmarc; + print {$fh} $marcxml_close; close $fh; return $num_exported; } sub export_marc_records_from_list { - my ($record_type, $entries, $directory, $as_usmarc, $records_deleted) = @_; + my ($record_type, $entries, $directory, $records_deleted) = @_; my $num_exported = 0; open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!; - print {$fh} $marcxml_open - unless $as_usmarc; + print {$fh} $marcxml_open; my $i = 0; @@ -590,28 +574,22 @@ sub export_marc_records_from_list { @$entries ) { print "." if ( $verbose_logging ); print "\r$i" unless ($i++ %100 or !$verbose_logging); - my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc); + my ($marc) = get_corrected_marc_record($record_type, $record_number); if (defined $marc) { eval { - my $rec; - if ( $as_usmarc ) { - $rec = $marc->as_usmarc(); - } else { - $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); - $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; - } + my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); + $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; print {$fh} $rec; $num_exported++; }; if ($@) { - warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML"); + warn "Error exporting record $record_number ($record_type) XML"; } } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); - print {$fh} $marcxml_close - unless $as_usmarc; + print {$fh} $marcxml_close; close $fh; return $num_exported; @@ -619,13 +597,12 @@ sub export_marc_records_from_list { sub generate_deleted_marc_records { - my ($record_type, $entries, $directory, $as_usmarc) = @_; + my ($record_type, $entries, $directory) = @_; my $records_deleted = {}; open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!; - print {$fh} $marcxml_open - unless $as_usmarc; + print {$fh} $marcxml_open; my $i = 0; foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) { @@ -642,31 +619,25 @@ sub generate_deleted_marc_records { fix_unimarc_100($marc); } - my $rec; - if ( $as_usmarc ) { - $rec = $marc->as_usmarc(); - } else { - $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); - # Remove the record's XML header - $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; - } + my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); + # Remove the record's XML header + $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; print {$fh} $rec; $records_deleted->{$record_number} = 1; } print "\nRecords exported: $i\n" if ( $verbose_logging ); - print {$fh} $marcxml_close - unless $as_usmarc; + print {$fh} $marcxml_close; close $fh; return $records_deleted; } sub get_corrected_marc_record { - my ($record_type, $record_number, $as_usmarc) = @_; + my ($record_type, $record_number) = @_; - my $marc = get_raw_marc_record($record_type, $record_number, $as_usmarc); + my $marc = get_raw_marc_record($record_type, $record_number); if (defined $marc) { fix_leader($marc); @@ -685,34 +656,17 @@ sub get_corrected_marc_record { } sub get_raw_marc_record { - my ($record_type, $record_number, $as_usmarc) = @_; + my ($record_type, $record_number) = @_; my $marc; if ($record_type eq 'biblio') { - if ($as_usmarc) { - my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?"); - $fetch_sth->execute($record_number); - if (my ($blob) = $fetch_sth->fetchrow_array) { - $marc = MARC::Record->new_from_usmarc($blob); - unless ($marc) { - warn "error creating MARC::Record from $blob"; - } - } - # failure to find a bib is not a problem - - # a delete could have been done before - # trying to process a record update - - $fetch_sth->finish(); - return unless $marc; - } else { - eval { $marc = GetMarcBiblio($record_number, 1); }; - if ($@ || !$marc) { - # here we do warn since catching an exception - # means that the bib was found but failed - # to be parsed - warn "error retrieving biblio $record_number"; - return; - } + eval { $marc = GetMarcBiblio($record_number, 1); }; + if ($@ || !$marc) { + # here we do warn since catching an exception + # means that the bib was found but failed + # to be parsed + warn "error retrieving biblio $record_number"; + return; } } else { eval { $marc = GetAuthority($record_number); }; @@ -918,11 +872,6 @@ Parameters: already exported the records in a previous run. - -noxml index from ISO MARC blob - instead of MARC XML. This - option is recommended only - for advanced user. - -nosanitize export biblio/authority records directly from DB marcxml field without sanitizing records. It speed up dump process but could fail if DB contains badly -- 2.39.5