diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 03f0ff1d1f..491f77e951 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -93,6 +93,7 @@ use C4::Charset qw( nsb_clean SetMarcUnicodeFlag SetUTF8Flag + StripNonXmlChars ); use C4::Languages; use C4::Linker; @@ -2231,6 +2232,7 @@ sub TransformHtmlToMarc { ; # between 001 and 009 (included) } elsif ( $fval ne '' ) { + $fval = StripNonXmlChars($fval); #Strip out any non-XML characters like control characters $newfield = MARC::Field->new( $tag, $fval, ); } @@ -2252,6 +2254,7 @@ sub TransformHtmlToMarc { $newfield->add_subfields( $fkey => $fval); } elsif($fval ne '') { + $fval = StripNonXmlChars($fval); #Strip out any non-XML characters like control characters $newfield = MARC::Field->new( $tag, $ind1, $ind2, $fkey => $fval ); } $j += 2; diff --git a/t/db_dependent/Biblio/TransformHtmlToMarc.t b/t/db_dependent/Biblio/TransformHtmlToMarc.t index 708a764ff4..0d54748031 100755 --- a/t/db_dependent/Biblio/TransformHtmlToMarc.t +++ b/t/db_dependent/Biblio/TransformHtmlToMarc.t @@ -22,7 +22,7 @@ Koha::Caches->get_instance->clear_from_cache( "MarcSubfieldStructure-" ); ( $biblionumbertagfield, $biblionumbertagsubfield ) = C4::Biblio::GetMarcFromKohaField( "biblio.biblionumber" ); subtest 'Biblio record' => sub { - plan tests => 17; + plan tests => 20; my $leader = '00203nam a2200097 4500'; my $input = CGI->new; $input->param( -name => 'biblionumber', -value => '42' ); @@ -73,10 +73,16 @@ subtest 'Biblio record' => sub { $input->param( -name => "tag_490_code_b_1123", -value => 'b' ); $input->param( -name => "tag_490_subfield_b_1123", -value => '490b' ); + # A field (900) after 490 + $input->param( -name => "tag_900_indicator1_1123", -value => "" ); + $input->param( -name => "tag_900_indicator2_1123", -value => "" ); + $input->param( -name => "tag_900_code_a_1123", -value => 'a' ); + $input->param( -name => "tag_900_subfield_a_1123", -value => "This string has bad \x{1B}characters in it" ); + my $record = C4::Biblio::TransformHtmlToMarc($input, 1); my @all_fields = $record->fields; - is( @all_fields, 7, 'The record should have been created with 7 fields' ); + is( @all_fields, 8, 'The record should have been created with 8 fields' ); # biblionumber + 2x010 + 100 + 200 + 390 + 490 my @fields_010 = $record->field('010'); is( @fields_010, 2, 'The record should have been created with 2 010' ); @@ -92,6 +98,13 @@ subtest 'Biblio record' => sub { is( @subfields_200_a, 2, 'The record should have been created with 2 200$a' ); is_deeply( \@subfields_200_a, [ 'first title', 'second title' ], 'The 2 titles should have been kept in the correct order' ); + my @fields_900 = $record->field('900'); + is( @fields_900, 1, 'The record should have been created with 1 900' ); + is_deeply( + $fields_900[0]->subfields(), [ 'a', 'This string has bad characters in it' ], + 'Field 900 had its non-XML characters stripped' + ); + my @subfields_biblionumber = $record->subfield( $biblionumbertagfield, $biblionumbertagsubfield ); is( @subfields_biblionumber, 1, 'The record should contain only one biblionumber field' ); @@ -105,6 +118,11 @@ subtest 'Biblio record' => sub { is( $all_fields[4]->tag, '390', 'Fifth field is 390' ); is( $all_fields[5]->subfield('a'), 42, 'Sixth field contains bibnumber' ); is( $all_fields[6]->tag, '490', 'Last field is 490' ); + + my $new_record = eval { MARC::Record::new_from_xml( $record->as_xml(), 'UTF-8' ); }; + my $record_error = $@; + ok( !$record_error, 'No errors parsing MARCXML generated by TransformHtmlToMarc' ); + }; subtest 'Add authority record' => sub {