Koha/misc/maintenance/generate_MARC21Languages.pl
Didier Gautheron 10c08b6552 Bug 25381: XSLTs should not define entities
Bug 23290 explicitly disabled entity expansion during XML parsing for security reasons.

However, many XSLTs define the following entity:
<!ENTITY nbsp "&#160;" >

They don't use the entity &nbsp, but its presence could lead to confusion.

Signed-off-by: Eden Bacani <eden.bacani@gmail.com>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
2021-01-22 13:46:40 +01:00

143 lines
3.5 KiB
Perl
Executable file

#!/usr/bin/perl
#
# Copyright (C) 2018 Koha-Suomi Oy
#
# This file is part of Koha
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
#
use Modern::Perl;
use XML::Simple;
use Pod::Usage;
use Getopt::Long;
use Carp;
use open ':std', ':encoding(UTF-8)';
sub usage {
pod2usage( -verbose => 2 );
exit;
}
# Options
my $sourceurl = 'http://www.loc.gov/standards/codelists/languages.xml';
my $help;
my $outfile;
my $tempfile = '/tmp/languages.xml';
GetOptions(
'o|output:s' => \$outfile,
'url:s' => \$sourceurl,
'help|h' => \$help,
);
usage() if $help;
system( qq{/usr/bin/wget $sourceurl -O $tempfile } ) == 0
or croak "Can't wget $sourceurl ($?)";
my $ref = XMLin($tempfile);
my $languages = $ref->{'languages'}->{'language'};
# output log or STDOUT
my $out_handle;
if (defined $outfile) {
open( $out_handle, ">", $outfile ) || croak("Cannot open output file");
} else {
open( $out_handle, ">&STDOUT" ) || croak("Couldn't duplicate STDOUT: $!");
}
generate_header($out_handle);
generate_body($out_handle, $languages);
generate_footer($out_handle);
close $out_handle;
sub generate_body {
my ( $file_handle, $language_list ) = @_;
foreach my $l ( @{$language_list} ) {
my $code = $l->{'code'};
my $name = (
ref( $l->{'name'} ) eq 'HASH'
? $l->{'name'}{'content'}
: $l->{'name'}
);
next if ( ref($code) eq 'HASH' && $code->{'status'} eq 'obsolete' );
print {$file_handle} " <xsl:when test=\"\$code='$code'\">";
print {$file_handle} "<xsl:text>$name</xsl:text>";
print {$file_handle} "</xsl:when>";
print {$file_handle} "\n";
}
return;
}
sub generate_header {
my ($file_handle) = @_;
print {$file_handle} <<"HEADER";
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE stylesheet>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- This file generated by generate_MARC21Languages.pl -->
<xsl:template name="languageCodeText">
<xsl:param name="code"/>
<xsl:choose>
HEADER
return;
}
sub generate_footer {
my ($file_handle) = @_;
print {$file_handle} <<"FOOTER";
<xsl:otherwise>
<!-- when 041\$a is mapped to an authorised value list, this will show the description -->
<xsl:value-of select="\$code" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
FOOTER
return;
}
=head1 NAME
generate_MARC21Languages.pl
=head1 SYNOPSIS
generate_MARC21Languages.pl
generate_MARC21Languages.pl --url='http://www.loc.gov/standards/codelists/languages.xml'
=head1 DESCRIPTION
Create MARC21Languages.xsl from the loc.gov MARC21 Code List for Languages
=over 8
=item B<--help>
Prints this help
=item B<--url>
Fetch the languages XML from this url. Defaults to http://www.loc.gov/standards/codelists/languages.xml
=item B<--output>
Writes the output XML into this file. Defaults to STDOUT.
=back
=cut