From 80683fad479ca8e886957721018328f748b521b7 Mon Sep 17 00:00:00 2001 From: Henri-Damien LAURENT Date: Tue, 8 Jan 2008 16:43:21 -0600 Subject: [PATCH] adding encoding management to z3950 Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Koha.pm | 89 +++++++++++++++++++++++-------------- cataloguing/z3950_search.pl | 4 +- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/C4/Koha.pm b/C4/Koha.pm index d7ed41dfe0..8db25fff57 100644 --- a/C4/Koha.pm +++ b/C4/Koha.pm @@ -844,46 +844,69 @@ Returns a well encoded marcrecord. =cut sub FixEncoding { my $marc=shift; + my $encoding=shift; my $record = MARC::Record->new_from_usmarc($marc); - if (C4::Context->preference("MARCFLAVOUR") eq "UNIMARC"){ - use Encode::Guess; - my $targetcharset="utf8" if (C4::Context->preference("TemplateEncoding") eq "utf-8"); - $targetcharset="latin1" if (C4::Context->preference("TemplateEncoding") eq "iso-8859-1"); - my $decoder = guess_encoding($marc, qw/utf8 latin1/); -# die $decoder unless ref($decoder); - if (ref($decoder)) { + if (C4::Context->preference("marcflavour") eq "UNIMARC"){ + my $targetcharset="utf8"; + if ($encoding && $targetcharset ne $encoding){ my $newRecord=MARC::Record->new(); - foreach my $field ($record->fields()){ - if ($field->tag()<'010'){ - $newRecord->insert_grouped_field($field); - } else { - my $newField; - my $createdfield=0; - foreach my $subfield ($field->subfields()){ - if ($createdfield){ - if (($newField->tag eq '100')) { - substr($subfield->[1],26,2,"0103") if ($targetcharset eq "latin1"); - substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8"); + if ($encoding!~/5426/){ + use Text::Iconv; + my $decoder = Text::Iconv->new($encoding,$targetcharset); + my $newRecord=MARC::Record->new(); + foreach my $field ($record->fields()){ + if ($field->tag()<'010'){ + $newRecord->insert_grouped_field($field); + } else { + my $newField; + my $createdfield=0; + foreach my $subfield ($field->subfields()){ + if ($createdfield){ + if (($newField->tag eq '100')) { + substr($subfield->[1],26,2,"0103") if ($targetcharset eq "latin1"); + substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8"); + } + map {$decoder->convert($_)} @$subfield; + $newField->add_subfields($subfield->[0]=>$subfield->[1]); + } else { + map {$decoder->convert($_)} @$subfield; + $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]); + $createdfield=1; + } + } + $newRecord->insert_grouped_field($newField); } - map {C4::Biblio::char_decode($_,"UNIMARC")} @$subfield; - $newField->add_subfields($subfield->[0]=>$subfield->[1]); - } else { - map {C4::Biblio::char_decode($_,"UNIMARC")} @$subfield; - $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]); - $createdfield=1; - } - } - $newRecord->insert_grouped_field($newField); - } + } + }elsif ($encoding=~/5426/){ + use MARC::Charset; + my $newRecord=MARC::Record->new(); + foreach my $field ($record->fields()){ + if ($field->tag()<'010'){ + $newRecord->insert_grouped_field($field); + } else { + my $newField; + my $createdfield=0; + foreach my $subfield ($field->subfields()){ + if ($createdfield){ + if (($newField->tag eq '100')) { + substr($subfield->[1],26,4,"5050"); + } + $newField->add_subfields($subfield->[0]=>MARC::Charset::marc8_to_utf8($subfield->[1])); + } else { + $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>MARC::Charset::marc8_to_utf8($subfield->[1])); + $createdfield=1; + } + } + $newRecord->insert_grouped_field($newField); + } + } } # warn $newRecord->as_formatted(); return $newRecord; - } else { - return $record; - } - } else { - return $record; + } + return $record; } + return $record; } =head2 GetKohaAuthorisedValues diff --git a/cataloguing/z3950_search.pl b/cataloguing/z3950_search.pl index 3703ce72d4..7c4e1462a0 100755 --- a/cataloguing/z3950_search.pl +++ b/cataloguing/z3950_search.pl @@ -147,7 +147,7 @@ else { || $DEBUG && warn( "" . $oConnection[$s]->errmsg() ); $serverhost[$s] = $server->{host}; - $encoding[$s] = $server->{syntax}; + $encoding[$s] = $server->{encoding}; $s++; } ## while fetch } # foreach @@ -198,7 +198,7 @@ else { my $rec = $oResult[$k]->record($i); my $marcrecord; $marcdata = $rec->raw(); - $marcrecord = FixEncoding($marcdata); + $marcrecord = FixEncoding($marcdata,$encoding[$k]); ####WARNING records coming from Z3950 clients are in various character sets MARC8,UTF8,UNIMARC etc ## In HEAD i change everything to UTF-8 # In rel2_2 i am not sure what encoding is so no character conversion is done here -- 2.39.5