From 0161eeba1301885658519b7b0c15cd6221d54a3d Mon Sep 17 00:00:00 2001 From: Tomas Cohen Arazi Date: Mon, 18 Sep 2023 09:35:16 -0300 Subject: [PATCH] Bug 34828: Introduce Koha::MetadataExtractor and ->get_normalized_upc This patch introduces a new pattern for the different ->get_ methods we've been adding. The aim is that code will look more like: my $metadata_extractor = Koha::MetadataExtractor->new; while ( my $biblio = $biblios->next ) { my $record = $biblio->record; my $schema = $biblio->record_schema; $data->{$biblio->id}->{normalized_upc} = $metadata_extractor->get_normalized_upc( { record => $record, schema => $schema } ); $data->{$biblio->id}->{normalized_ean} = $metadata_extractor->get_normalized_ean( { record => $record, schema => $schema } ); } The key is that we are actually reusing the MARC::Record, and code for each schema is organized cleanly so easier to maintain. For the class names, I chose to add the 'MARC' name in the path, so we don't need to refactor anything if we want to add support for another serialization formats. To test: 1. Apply this patch 2. Run: $ ktd --shell k$ qa -c 1 => SUCCESS: Tests pass! 3. Sign off :-D Signed-off-by: David Nind Signed-off-by: Tomas Cohen Arazi --- Koha/MetadataExtractor.pm | 108 ++++++++++++++++++++++++ Koha/MetadataExtractor/MARC/MARC21.pm | 93 ++++++++++++++++++++ Koha/MetadataExtractor/MARC/UNIMARC.pm | 90 ++++++++++++++++++++ t/Koha/MetadataExtractor.t | 102 ++++++++++++++++++++++ t/Koha/MetadataExtractor/MARC/MARC21.t | 64 ++++++++++++++ t/Koha/MetadataExtractor/MARC/UNIMARC.t | 64 ++++++++++++++ 6 files changed, 521 insertions(+) create mode 100644 Koha/MetadataExtractor.pm create mode 100644 Koha/MetadataExtractor/MARC/MARC21.pm create mode 100644 Koha/MetadataExtractor/MARC/UNIMARC.pm create mode 100755 t/Koha/MetadataExtractor.t create mode 100755 t/Koha/MetadataExtractor/MARC/MARC21.t create mode 100755 t/Koha/MetadataExtractor/MARC/UNIMARC.t diff --git a/Koha/MetadataExtractor.pm b/Koha/MetadataExtractor.pm new file mode 100644 index 0000000000..c4b4f1e9be --- /dev/null +++ b/Koha/MetadataExtractor.pm @@ -0,0 +1,108 @@ +package Koha::MetadataExtractor; + +# Copyright ByWater Solutions 2023 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +=head1 NAME + +Koha::MetadataExtractor - Extract specific metadata from MARC::Record objects + +=cut + +use Modern::Perl; + +use Koha::Exceptions; + +=head1 API + +=head2 Class methods + +=head3 new + + my $extractor = Koha::MetadataExtractor->new; + +Constructor for the I class. + +=cut + +sub new { + my ($class) = @_; + my $self = { extractors => {} }; + + return + bless $self, + $class; +} + +=head2 get_normalized_upc + + my $normalized_upc = $extractor->get_normalized_upc( { record => $record, schema => $schema } ); + +Returns the normalized UPC for the passed I<$record>. + +=cut + +sub get_normalized_upc { + my ( $self, $params ) = @_; + + Koha::Exceptions::MissingParameter->throw( parameter => 'record' ) + unless $params->{record}; + + return $self->get_extractor( { schema => $params->{schema} } )->get_normalized_upc( $params->{record} ); +} + +=head2 Internal methods + +=head3 get_extractor + + my $extractor = $self->get_extractor( { schema => $schema } ); + +Returns the cached extractor for the specified I<$schema>. + +=cut + +sub get_extractor { + my ( $self, $params ) = @_; + + my $schema = $params->{schema}; + + Koha::Exceptions::MissingParameter->throw( parameter => 'schema' ) + unless $schema; + + my $valid_schemas = { 'MARC21' => 1, 'UNIMARC' => 1 }; + + Koha::Exceptions::WrongParameter->throw( name => 'schema', value => $schema ) + unless $valid_schemas->{$schema}; + + unless ( $self->{extractors}->{$schema} ) { + my $extractor_class = "Koha::MetadataExtractor::MARC::$schema"; + eval "require $extractor_class"; + $self->{extractors}->{$schema} = $extractor_class->new; + } + + return $self->{extractors}->{$schema}; +} + +=head1 AUTHOR + +Tomas Cohen Arazi, Etomascohen@theke.ioE + +=cut + +1; + +__END__ diff --git a/Koha/MetadataExtractor/MARC/MARC21.pm b/Koha/MetadataExtractor/MARC/MARC21.pm new file mode 100644 index 0000000000..3917159a58 --- /dev/null +++ b/Koha/MetadataExtractor/MARC/MARC21.pm @@ -0,0 +1,93 @@ +package Koha::MetadataExtractor::MARC::MARC21; + +# Copyright ByWater Solutions 2023 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +=head1 NAME + +Koha::MetadataExtractor - Extract specific metadata from MARC::Record objects + +=cut + +use Modern::Perl; + +use Koha::Exceptions; + +=head1 API + +=head2 Class methods + +=head3 new + + my $extractor = Koha::MetadataExtractor::MARC::MARC21->new; + +Constructor for the I class. + +=cut + +sub new { + my ($class) = @_; + my $self = {}; + + return + bless $self, + $class; +} + +=head2 get_normalized_upc + + my $normalized_upc = $extractor->get_normalized_upc( $record ); + +Returns a stringthe COinS (a span) which can be included in a biblio record + +=cut + +sub get_normalized_upc { + my ( $self, $record ) = @_; + + Koha::Exceptions::MissingParameter->throw( parameter => 'record' ) + unless $record; + + Koha::Exceptions::WrongParameter->throw( name => 'record', type => ref($record) ) + unless ref($record) eq 'MARC::Record'; + + my @fields = $record->field('024'); + foreach my $field (@fields) { + + my $indicator = $field->indicator(1); + my $upc = $field->subfield('a'); + + ( my $normalized_upc ) = $upc =~ /([\d-]*[X]*)/; + $normalized_upc =~ s/-//g; + + if ( $normalized_upc && $indicator eq "1" ) { + return $normalized_upc; + } + } + + return; +} + +=head1 AUTHOR + +Tomas Cohen Arazi, Etomascohen@theke.ioE + +=cut + +1; + +__END__ diff --git a/Koha/MetadataExtractor/MARC/UNIMARC.pm b/Koha/MetadataExtractor/MARC/UNIMARC.pm new file mode 100644 index 0000000000..53b8f3855e --- /dev/null +++ b/Koha/MetadataExtractor/MARC/UNIMARC.pm @@ -0,0 +1,90 @@ +package Koha::MetadataExtractor::MARC::UNIMARC; + +# Copyright ByWater Solutions 2023 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +=head1 NAME + +Koha::MetadataExtractor - Extract specific metadata from MARC::Record objects + +=cut + +use Modern::Perl; + +use Koha::Exceptions; + +=head1 API + +=head2 Class methods + +=head3 new + + my $extractor = Koha::MetadataExtractor::MARC::UNIMARC->new; + +Constructor for the I class. + +=cut + +sub new { + my ($class) = @_; + my $self = {}; + + return + bless $self, + $class; +} + +=head2 get_normalized_upc + + my $normalized_upc = $extractor->get_normalized_upc( $record ); + +Returns the normalized UPC for the passed I<$record>. + +=cut + +sub get_normalized_upc { + my ( $self, $record ) = @_; + + Koha::Exceptions::MissingParameter->throw( parameter => 'record' ) + unless $record; + + Koha::Exceptions::WrongParameter->throw( name => 'record', type => ref($record) ) + unless ref($record) eq 'MARC::Record'; + + my @fields = $record->field('072'); + foreach my $field (@fields) { + + my $upc = $field->subfield('a'); + + ( my $normalized_upc ) = $upc =~ /([\d-]*[X]*)/; + $normalized_upc =~ s/-//g; + + if ($normalized_upc) { + return $normalized_upc; + } + } +} + +=head1 AUTHOR + +Tomas Cohen Arazi, Etomascohen@theke.ioE + +=cut + +1; + +__END__ diff --git a/t/Koha/MetadataExtractor.t b/t/Koha/MetadataExtractor.t new file mode 100755 index 0000000000..9e55d96537 --- /dev/null +++ b/t/Koha/MetadataExtractor.t @@ -0,0 +1,102 @@ +#!/usr/bin/perl + +# Copyright 2023 Koha Development team +# +# This file is part of Koha +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +use Modern::Perl; + +use Test::More tests => 3; +use Test::Exception; +use Test::MockModule; + +use MARC::Record; + +use Koha::MetadataExtractor; + +subtest 'new() tests' => sub { + + plan tests => 1; + + my $extractor = Koha::MetadataExtractor->new; + is( ref($extractor), 'Koha::MetadataExtractor' ); +}; + +subtest 'get_extractor() tests' => sub { + + plan tests => 8; + + my $extractor = Koha::MetadataExtractor->new; + + foreach my $schema (qw{ MARC21 UNIMARC }) { + my $specific_extractor = $extractor->get_extractor( { schema => $schema } ); + is( + ref($specific_extractor), "Koha::MetadataExtractor::MARC::$schema", + "Returns the right extractor library for schema ($schema)" + ); + ok( exists $extractor->{extractors}->{$schema}, "Extractor for $schema cached" ); + } + + throws_ok { $extractor->get_extractor } + 'Koha::Exceptions::MissingParameter', + 'Exception if no schema parameter'; + + like( + "$@", qr{A required parameter is missing' with parameter => schema}, + 'Exception correctly details missing parameter' + ); + + throws_ok { $extractor->get_extractor( { schema => 'POTATO' } ) } + 'Koha::Exceptions::WrongParameter', + 'Exception if the passed schema is not supported'; + + like( + "$@", qr{'Parameter has wrong value or type' with name => schema, value => POTATO}, + 'Exception correctly details incorrect parameter value' + ); +}; + +subtest 'get_normalized_upc() tests' => sub { + + plan tests => 6; + + my $extractor = Koha::MetadataExtractor->new; + + my $record = MARC::Record->new(); + + my $mock_marc21 = Test::MockModule->new('Koha::MetadataExtractor::MARC::MARC21'); + $mock_marc21->mock( 'get_normalized_upc', sub { return 'MARC21' } ); + + my $mock_unimarc = Test::MockModule->new('Koha::MetadataExtractor::MARC::UNIMARC'); + $mock_unimarc->mock( 'get_normalized_upc', sub { return 'UNIMARC' } ); + + foreach my $schema (qw{ MARC21 UNIMARC }) { + is( + $extractor->get_normalized_upc( { record => $record, schema => $schema } ), $schema, + "Library for handling $schema called" + ); + ok( exists $extractor->{extractors}->{$schema}, "Extractor for $schema cached" ); + } + + throws_ok { $extractor->get_normalized_upc() } + 'Koha::Exceptions::MissingParameter', + 'Exception if no record parameter'; + + like( + "$@", qr{A required parameter is missing' with parameter => record}, + 'Exception correctly details missing parameter' + ); +}; diff --git a/t/Koha/MetadataExtractor/MARC/MARC21.t b/t/Koha/MetadataExtractor/MARC/MARC21.t new file mode 100755 index 0000000000..99c0a5a723 --- /dev/null +++ b/t/Koha/MetadataExtractor/MARC/MARC21.t @@ -0,0 +1,64 @@ +#!/usr/bin/perl + +# Copyright 2023 Koha Development team +# +# This file is part of Koha +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +use Modern::Perl; + +use Test::More tests => 2; +use Test::Exception; + +use MARC::Record; + +use Koha::MetadataExtractor::MARC::MARC21; + +subtest 'new() tests' => sub { + + plan tests => 1; + + my $extractor = Koha::MetadataExtractor::MARC::MARC21->new; + is( ref($extractor), 'Koha::MetadataExtractor::MARC::MARC21' ); +}; + +subtest 'get_normalized_upc() tests' => sub { + + plan tests => 6; + + my $extractor = Koha::MetadataExtractor::MARC::MARC21->new; + + my $record = MARC::Record->new(); + $record->append_fields( MARC::Field->new( '024', '1', ' ', a => "9-123345345X" ) ); + + is( $extractor->get_normalized_upc($record), '9123345345X' ); + + $record = MARC::Record->new(); + $record->append_fields( MARC::Field->new( '024', ' ', ' ', a => "9-123345345X" ) ); + + is( $extractor->get_normalized_upc($record), undef ); + + throws_ok { $extractor->get_normalized_upc() } + 'Koha::Exceptions::MissingParameter', + 'Exception if no parameter'; + + like( "$@", qr{A required parameter is missing' with parameter => record} ); + + throws_ok { $extractor->get_normalized_upc("Some string") } + 'Koha::Exceptions::WrongParameter', + 'Exception if no parameter'; + + like( "$@", qr{Parameter has wrong value or type} ); +}; diff --git a/t/Koha/MetadataExtractor/MARC/UNIMARC.t b/t/Koha/MetadataExtractor/MARC/UNIMARC.t new file mode 100755 index 0000000000..85e17b1512 --- /dev/null +++ b/t/Koha/MetadataExtractor/MARC/UNIMARC.t @@ -0,0 +1,64 @@ +#!/usr/bin/perl + +# Copyright 2023 Koha Development team +# +# This file is part of Koha +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +use Modern::Perl; + +use Test::More tests => 2; +use Test::Exception; + +use MARC::Record; + +use Koha::MetadataExtractor::MARC::UNIMARC; + +subtest 'new() tests' => sub { + + plan tests => 1; + + my $extractor = Koha::MetadataExtractor::MARC::UNIMARC->new; + is( ref($extractor), 'Koha::MetadataExtractor::MARC::UNIMARC' ); +}; + +subtest 'get_normalized_upc() tests' => sub { + + plan tests => 6; + + my $extractor = Koha::MetadataExtractor::MARC::UNIMARC->new; + + my $record = MARC::Record->new(); + $record->append_fields( MARC::Field->new( '072', '1', ' ', a => "9-123345345X" ) ); + + is( $extractor->get_normalized_upc($record), '9123345345X' ); + + $record = MARC::Record->new(); + $record->append_fields( MARC::Field->new( '072', ' ', ' ', a => "9-123345345X" ) ); + + is( $extractor->get_normalized_upc($record), '9123345345X', 'Indicator has no effect' ); + + throws_ok { $extractor->get_normalized_upc() } + 'Koha::Exceptions::MissingParameter', + 'Exception if no parameter'; + + like( "$@", qr{A required parameter is missing' with parameter => record} ); + + throws_ok { $extractor->get_normalized_upc("Some string") } + 'Koha::Exceptions::WrongParameter', + 'Exception if no parameter'; + + like( "$@", qr{Parameter has wrong value or type} ); +}; -- 2.39.5