From 37340e371857549e480c3814b2d0f9e10f58ca43 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fr=C3=A9d=C3=A9rick=20Capovilla?= Date: Wed, 28 Sep 2011 13:58:14 -0400 Subject: [PATCH] Normalize records imported from Z39.50 servers. Some Z39.50 server may use the MARC-8 encoding, which uses separated diacritics. By forcing a normalization, all imported records will have combined diacritics. Records with separated diacritics might not show up in Zebra searches if the search terms use accented characters. Signed-off-by: Marcel de Rooy http://bugs.koha-community.org/show_bug.cgi?id=8610 Signed-off-by: Paul Poulain checked it still works after the patch with UNIMARC and BNF server (that provide utf-8 records) --- C4/Breeding.pm | 3 +++ C4/ImportBatch.pm | 4 ++++ acqui/z3950_search.pl | 4 ++++ cataloguing/z3950_search.pl | 4 ++++ 4 files changed, 15 insertions(+) diff --git a/C4/Breeding.pm b/C4/Breeding.pm index ea3e90215f..9dedc4d08b 100644 --- a/C4/Breeding.pm +++ b/C4/Breeding.pm @@ -94,6 +94,9 @@ sub ImportBreeding { ($marcrecord, $charset_result, $charset_errors) = MarcToUTF8Record($marcarray[$i]."\x1D", C4::Context->preference("marcflavour"), $encoding); + # Normalize the record so it doesn't have separated diacritics + SetUTF8Flag($marcrecord); + # warn "$i : $marcarray[$i]"; # FIXME - currently this does nothing my @warnings = $marcrecord->warnings(); diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm index 648d96e734..15f1acbc4a 100644 --- a/C4/ImportBatch.pm +++ b/C4/ImportBatch.pm @@ -382,6 +382,10 @@ sub BatchStageMarcRecords { if (scalar($marc_record->fields()) == 0) { push @invalid_records, $marc_blob; } else { + + # Normalize the record so it doesn't have separated diacritics + SetUTF8Flag($marc_record); + $num_valid++; if ($record_type eq 'biblio') { $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0); diff --git a/acqui/z3950_search.pl b/acqui/z3950_search.pl index a1cdce4120..8da89b6645 100755 --- a/acqui/z3950_search.pl +++ b/acqui/z3950_search.pl @@ -281,6 +281,10 @@ warn "query ".$query if $DEBUG; ## In HEAD i change everything to UTF-8 # In rel2_2 i am not sure what encoding is so no character conversion is done here ##Add necessary encoding changes to here -TG + + # Normalize the record so it doesn't have separated diacritics + SetUTF8Flag($marcrecord); + my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, "" ); $oldbiblio->{isbn} =~ s/ |-|\.//g if $oldbiblio->{isbn}; # pad | and ( with spaces to allow line breaks in the HTML diff --git a/cataloguing/z3950_search.pl b/cataloguing/z3950_search.pl index 5e22636b5a..fe07a7d42c 100755 --- a/cataloguing/z3950_search.pl +++ b/cataloguing/z3950_search.pl @@ -249,6 +249,10 @@ warn "query ".$query if $DEBUG; ## In HEAD i change everything to UTF-8 # In rel2_2 i am not sure what encoding is so no character conversion is done here ##Add necessary encoding changes to here -TG + + # Normalize the record so it doesn't have separated diacritics + SetUTF8Flag($marcrecord); + my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, "" ); $oldbiblio->{isbn} =~ s/ |-|\.//g if $oldbiblio->{isbn}; # pad | and ( with spaces to allow line breaks in the HTML -- 2.39.5