bug 9496: improve error checking in rebuild_zebra.pl
When using rebuild_zebra to index all records, skip over bibliographic or authority records that don't come out as valid XML. Also, strip extraneous XML declarations when using --nosanitize. Test plans ---------- Note that both plans assume that DOM indexing is turned on. Test plan #1 ============ [1] Run rebuild_zebra.pl with the -x -nosanitize options. Without the patch, zebraidx should terminate early and complain about invalid XML. [2] With the patch, the rebuild_zebra.pl should work without error. Test plan #2 ============ [1] Intentionally make a MARCXML record invalid, e.g, by running the following SQL: UPDATE bilbioitems SET marcxml = CONCATENATE(marcxml, 'junk') WHERE biblionumber = 123; [2] Run rebuild_zebra.pl -b -x -r [3] Without the patch, only part of the database will be indexed. [4] With the patch, rebuild_zebra.pl will not export the bad record and will give an error message saying so, but will successfully index the rest of the records. Signed-off-by: Galen Charlton <gmc@esilibrary.com> Signed-off-by: Larry Baerveldt <larry@bywatersolutions.com> Signed-off-by: Mason James <mtj@kohaaloha.com> Signed-off-by: Paul Poulain <paul.poulain@biblibre.com> Signed-off-by: Jared Camins-Esakov <jcamins@cpbibliography.com>
This commit is contained in:
parent
607b4e7745
commit
151e22070a
1 changed files with 22 additions and 2 deletions
|
@ -11,6 +11,7 @@ use C4::Biblio;
|
|||
use C4::AuthoritiesMarc;
|
||||
use C4::Items;
|
||||
use Koha::RecordProcessor;
|
||||
use XML::LibXML;
|
||||
|
||||
#
|
||||
# script that checks zebradir structure & create directories & mandatory files if needed
|
||||
|
@ -140,6 +141,8 @@ if ($do_munge) {
|
|||
munge_config();
|
||||
}
|
||||
|
||||
my $tester = XML::LibXML->new();
|
||||
|
||||
if ($authorities) {
|
||||
index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
|
||||
} else {
|
||||
|
@ -377,8 +380,18 @@ sub export_marc_records_from_sth {
|
|||
substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
|
||||
}
|
||||
}
|
||||
# extra test to ensure that result is valid XML; otherwise
|
||||
# Zebra won't parse it in DOM mode
|
||||
eval {
|
||||
my $doc = $tester->parse_string($marcxml);
|
||||
};
|
||||
if ($@) {
|
||||
warn "Error exporting record $record_number ($record_type): $@\n";
|
||||
next;
|
||||
}
|
||||
if ( $marcxml ) {
|
||||
print {$fh} $marcxml if $marcxml;
|
||||
$marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
|
||||
print {$fh} $marcxml;
|
||||
$num_exported++;
|
||||
}
|
||||
next;
|
||||
|
@ -389,6 +402,12 @@ sub export_marc_records_from_sth {
|
|||
my $rec;
|
||||
if ($as_xml) {
|
||||
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
|
||||
eval {
|
||||
my $doc = $tester->parse_string($rec);
|
||||
};
|
||||
if ($@) {
|
||||
die "invalid XML: $@";
|
||||
}
|
||||
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
|
||||
} else {
|
||||
$rec = $marc->as_usmarc();
|
||||
|
@ -397,7 +416,8 @@ sub export_marc_records_from_sth {
|
|||
$num_exported++;
|
||||
};
|
||||
if ($@) {
|
||||
warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
|
||||
warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
|
||||
warn "... specific error is $@" if $verbose_logging;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue