01d78e1ec7
MARC::Record and MARC::File::* modules sometimes use the position 09 of the leader to detect encoding. A blank character means 'MARC-8' while an 'a' means 'UTF-8'. In a UNIMARC authority this position is used to store the authority type (see https://www.transition-bibliographique.fr/wp-content/uploads/2021/02/AIntroLabel-2004.pdf [FR]). In this case, 'a' means 'Personal Name'. The result is that the import will succeed for a Personal Name authority, but it will fail for all other authority types. Steps to reproduce: 0. Be sure to have a Koha UNIMARC instance. 1. Download the MARCXML for "Honoré de Balzac" curl -o balzac.marcxml https://www.idref.fr/02670305X.xml 2. Verify that it's encoded in UTF-8 file balzac.marcxml (should output "balzac.marcxml: XML 1.0 document, UTF-8 Unicode text") 3. Go to Tools » Stage MARC for import and import balzac.marcxml with the following settings: Record type: Authority Character encoding: UTF-8 Format: MARCXML Do not touch the other settings 4. Once imported, go to the staged MARC management tool and find your batch. Click on the authority title "Balzac Honoré de 1799-1850" to show the MARC inside a modal window. There should be no encoding issue. 5. Write down the imported record id (the number in column '#') and go to the MARC authority editor. Replace all URL parameters by 'breedingid=THE_ID_YOU_WROTE_DOWN' The URL should look like this: /cgi-bin/koha/authorities/authorities.pl?breedingid=198 You should see no encoding issues. Do not save the record. 6. Import the batch into the catalog. Verify that the authority record has no encoding issue. 7. Now download the MARCXML for "Athènes (Grèce)" curl -o athènes.marcxml https://www.idref.fr/027290530.xml 8. Repeat steps 2 to 6 using athènes.marcxml file. At steps 4 and 5 you should see encoding issues and that the position 9 of the leader was rewritten from 'c' to 'a'. Strangely, importing this batch fix the encoding issue, but we still lose the information in position 09 of the leader This patch makes use of the MARCXML representation of the record instead of the ISO2709 representation, because, unlike MARC::Record::new_from_usmarc, MARC::Record::new_from_xml allows us to pass directly the encoding and the format, which prevents data to be double encoded when position 09 of the leader is different that 'a' Test plan: - Follow the "steps to reproduce" above and verify that you have no encoding issues. Signed-off-by: David Nind <david@davidnind.com> Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com> Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
105 lines
3.1 KiB
Perl
Executable file
105 lines
3.1 KiB
Perl
Executable file
#!/usr/bin/perl
|
|
|
|
# Koha library project www.koha-community.org
|
|
|
|
# Copyright 2011 Libéo
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
use Modern::Perl;
|
|
|
|
# standard or CPAN modules used
|
|
use CGI qw(:standard -utf8);
|
|
|
|
# Koha modules used
|
|
use C4::Context;
|
|
use C4::Output qw( output_html_with_http_headers );
|
|
use C4::Auth qw( get_template_and_user );
|
|
use C4::Biblio qw( GetMarcBiblio );
|
|
use C4::Auth qw( get_template_and_user );
|
|
use C4::ImportBatch qw( GetImportBiblios );
|
|
use C4::AuthoritiesMarc qw( GetAuthority );
|
|
|
|
use Koha::Biblios;
|
|
use Koha::Import::Records;
|
|
|
|
# Input params
|
|
my $input = CGI->new;
|
|
my $recordid = $input->param('id');
|
|
my $importid = $input->param('importid');
|
|
my $batchid = $input->param('batchid');
|
|
my $type = $input->param('type');
|
|
|
|
if ( not $recordid or not $importid ) {
|
|
print $input->redirect("/cgi-bin/koha/errors/404.pl");
|
|
exit;
|
|
}
|
|
|
|
# Init vars
|
|
my ($record, $recordImportid, $recordTitle, $importTitle, $formatted1, $formatted2, $errorFormatted1, $errorFormatted2);
|
|
|
|
# Prepare template
|
|
my ( $template, $loggedinuser, $cookie ) = get_template_and_user(
|
|
{
|
|
template_name => "tools/showdiffmarc.tt",
|
|
query => $input,
|
|
type => "intranet",
|
|
flagsrequired => { tools => 'manage_staged_marc' },
|
|
}
|
|
);
|
|
|
|
if ( $type eq 'biblio' ) {
|
|
$record = GetMarcBiblio({
|
|
biblionumber => $recordid,
|
|
embed_items => 1,
|
|
});
|
|
my $biblio = Koha::Biblios->find( $recordid );
|
|
$recordTitle = $biblio->title;
|
|
}
|
|
elsif ( $type eq 'auth' ) {
|
|
$record = GetAuthority( $recordid );
|
|
$recordTitle = "Authority number " . $recordid; #FIXME we should get the main heading
|
|
}
|
|
if( $record ) {
|
|
$formatted1 = $record->as_formatted;
|
|
} else {
|
|
$errorFormatted1 = 1;
|
|
}
|
|
|
|
if( $importid ) {
|
|
my $import_record = Koha::Import::Records->find($importid);
|
|
my $recordImportid = $import_record->get_marc_record({ embed_items => 1 });
|
|
$formatted2 = $recordImportid->as_formatted;
|
|
my $biblio = GetImportBiblios($importid);
|
|
$importTitle = $biblio->[0]->{'title'};
|
|
} else {
|
|
$errorFormatted2 = 1;
|
|
}
|
|
|
|
$template->param(
|
|
SCRIPT_NAME => '/cgi-bin/koha/tools/showdiffmarc.pl',
|
|
RECORDID => $recordid,
|
|
IMPORTID => $importid,
|
|
RECORDTITLE => $recordTitle,
|
|
IMPORTTITLE => $importTitle,
|
|
MARC_FORMATTED1 => $formatted1,
|
|
MARC_FORMATTED2 => $formatted2,
|
|
ERROR_FORMATTED1 => $errorFormatted1,
|
|
ERROR_FORMATTED2 => $errorFormatted2,
|
|
batchid => $batchid
|
|
);
|
|
|
|
output_html_with_http_headers $input, $cookie, $template->output;
|