Bug 34828: Add Koha::Biblio::Metadata::Extractor::MARC

and inherit from it

The new usage is now:

1. With a Koha::Biblio object
my $extractor = Koha::Biblio::Metadata::Extractor->new({biblio => $biblio});
$extractor->get_normalized_upc;

or

2. With a MARC::Record
my $extractor = Koha::Biblio::Metadata::Extractor->new({metadata=> $biblio->metadata->record});
$extractor->get_normalized_upc;

Note that there are "Inconsistent hierarchy during C3 merge of class"
warnings raised by the QA script. We could remove them by replacing the
'use' by 'require' in Koha::Biblio::Metadata::Extractor::MARC (in ->new)
but that's suboptimal.

Signed-off-by: David Nind <david@davidnind.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
This commit is contained in:
Jonathan Druart 2023-10-05 09:44:42 +02:00 committed by Tomas Cohen Arazi
parent 7efeeda29d
commit d5d2f319df
Signed by: tomascohen
GPG key ID: 0A272EA1B2F3C15F
8 changed files with 216 additions and 235 deletions

View file

@ -33,6 +33,7 @@ use base qw(Koha::Object);
use Koha::Acquisition::Orders; use Koha::Acquisition::Orders;
use Koha::ArticleRequests; use Koha::ArticleRequests;
use Koha::Biblio::Metadatas; use Koha::Biblio::Metadatas;
use Koha::Biblio::Metadata::Extractor;
use Koha::Biblio::ItemGroups; use Koha::Biblio::ItemGroups;
use Koha::Biblioitems; use Koha::Biblioitems;
use Koha::Cache::Memory::Lite; use Koha::Cache::Memory::Lite;
@ -1280,6 +1281,22 @@ sub public_read_list {
]; ];
} }
=head3 metadata_extractor
my $extractor = $biblio->metadata_extractor
Return a Koha::Biblio::Metadata::Extractor object to use to extract data from the metadata (ie. MARC record for now)
=cut
sub metadata_extractor {
my ($self) = @_;
$self->{metadata_extractor} ||= Koha::Biblio::Metadata::Extractor->new( { biblio => $self } );
return $self->{metadata_extractor};
}
=head3 normalized_upc =head3 normalized_upc
my $normalized_upc = $biblio->normalized_upc my $normalized_upc = $biblio->normalized_upc
@ -1290,8 +1307,7 @@ Normalizes and returns the UPC value found in the MARC record.
sub normalized_upc { sub normalized_upc {
my ($self) = @_; my ($self) = @_;
my $marc_record = $self->metadata->record; return $self->metadata_extractor->get_normalized_upc;
return C4::Koha::GetNormalizedUPC($marc_record);
} }
=head3 normalized_oclc =head3 normalized_oclc

View file

@ -1,6 +1,6 @@
package Koha::Biblio::Metadata::Extractor; package Koha::Biblio::Metadata::Extractor;
# Copyright ByWater Solutions 2023 # Copyright Koha Development Team 2023
# #
# This file is part of Koha. # This file is part of Koha.
# #
@ -26,6 +26,7 @@ Koha::Biblio::Metadata::Extractor - Extract specific metadata from MARC::Record
use Modern::Perl; use Modern::Perl;
use Koha::Exceptions; use Koha::Exceptions;
use Koha::Biblio::Metadata::Extractor::MARC;
=head1 API =head1 API
@ -33,74 +34,25 @@ use Koha::Exceptions;
=head3 new =head3 new
my $extractor = Koha::Biblio::Metadata::Extractor->new; my $extractor = Koha::Biblio::Metadata::Extractor->new({ biblio => $biblio });
Constructor for the I<Koha::Biblio::Metadata::Extractor> class. Constructor for the I<Koha::Biblio::Metadata::Extractor> class.
=cut =cut
sub new { sub new {
my ($class) = @_; my ( $class, $params ) = @_;
my $self = { extractors => {} };
return # We only support MARC for now, no need to complexify here
bless $self, return Koha::Biblio::Metadata::Extractor::MARC->new($params);
$class;
}
=head2 get_normalized_upc
my $normalized_upc = $extractor->get_normalized_upc( { record => $record, schema => $schema } );
Returns the normalized UPC for the passed I<$record>.
=cut
sub get_normalized_upc {
my ( $self, $params ) = @_;
Koha::Exceptions::MissingParameter->throw( parameter => 'record' )
unless $params->{record};
return $self->get_extractor( { schema => $params->{schema} } )->get_normalized_upc( $params->{record} );
}
=head2 Internal methods
=head3 get_extractor
my $extractor = $self->get_extractor( { schema => $schema } );
Returns the cached extractor for the specified I<$schema>.
=cut
sub get_extractor {
my ( $self, $params ) = @_;
my $schema = $params->{schema};
Koha::Exceptions::MissingParameter->throw( parameter => 'schema' )
unless $schema;
my $valid_schemas = { 'MARC21' => 1, 'UNIMARC' => 1 };
Koha::Exceptions::WrongParameter->throw( name => 'schema', value => $schema )
unless $valid_schemas->{$schema};
unless ( $self->{extractors}->{$schema} ) {
my $extractor_class = "Koha::Biblio::Metadata::Extractor::MARC::$schema";
eval "require $extractor_class";
$self->{extractors}->{$schema} = $extractor_class->new;
}
return $self->{extractors}->{$schema};
} }
=head1 AUTHOR =head1 AUTHOR
Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt> Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt>
Jonathan Druart, E<lt>jonathan.druart@bugs.koha-community.orgE<gt>
=cut =cut
1; 1;

View file

@ -0,0 +1,106 @@
package Koha::Biblio::Metadata::Extractor::MARC;
# Copyright Koha Development Team 2023
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
=head1 NAME
Koha::Biblio::Metadata::Extractor::MARC - Extract specific metadata from MARC::Record objects
=cut
use Modern::Perl;
use Koha::Exceptions;
use Koha::Biblio::Metadata::Extractor::MARC::MARC21;
use Koha::Biblio::Metadata::Extractor::MARC::UNIMARC;
=head1 API
=head2 Class methods
=head3 new
my $extractor = Koha::Biblio::Metadata::Extractor::MARC->new({ biblio => $biblio });
Constructor for the I<Koha::Biblio::Metadata::Extractor::MARC> class.
=cut
sub new {
my ( $class, $params ) = @_;
Koha::Exceptions::MissingParameter->throw( parameter => 'metadata' )
unless $params->{metadata} || $params->{biblio};
#my $metadata = $biblio->metadata;
#my $schema = $metadata->schema;
# Get the schema from the pref so that we do not fetch the biblio_metadata
my $schema = C4::Context->preference('marcflavour');
my $valid_schemas = { 'MARC21' => 1, 'UNIMARC' => 1 };
Koha::Exceptions::WrongParameter->throw( name => 'schema', value => $schema )
unless $valid_schemas->{$schema};
my $sub_class = "Koha::Biblio::Metadata::Extractor::MARC::$schema";
return $sub_class->new($params);
}
=head3 metadata
my $metadata = $marc_extractor->metadata;
Return a MARC record.
=cut
sub metadata {
my ($self) = @_;
if ( $self->{biblio} ) {
$self->{metadata} ||= $self->{biblio}->metadata->record;
}
return $self->{metadata};
}
=head3 _normalize_string
my $normalized_string = $self->_normalize_string($string);
Returns a normalized string (remove dashes)
=cut
sub _normalize_string {
my ( $self, $string ) = @_;
( my $normalized_string ) = $string =~ /([\d-]*[X]*)/;
$normalized_string =~ s/-//g;
return $normalized_string;
}
=head1 AUTHOR
Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt>
Jonathan Druart, E<lt>jonathan.druart@bugs.koha-community.orgE<gt>
=cut
1;
__END__

View file

@ -1,6 +1,6 @@
package Koha::Biblio::Metadata::Extractor::MARC::MARC21; package Koha::Biblio::Metadata::Extractor::MARC::MARC21;
# Copyright ByWater Solutions 2023 # Copyright Koha Development Team 2023
# #
# This file is part of Koha. # This file is part of Koha.
# #
@ -19,12 +19,14 @@ package Koha::Biblio::Metadata::Extractor::MARC::MARC21;
=head1 NAME =head1 NAME
Koha::Biblio::Metadata::Extractor - Extract specific metadata from MARC::Record objects Koha::Biblio::Metadata::Extractor::MARC::MARC21 - Extract specific metadata from MARC21 MARC::Record objects
=cut =cut
use Modern::Perl; use Modern::Perl;
use base qw(Koha::Biblio::Metadata::Extractor::MARC);
use Koha::Exceptions; use Koha::Exceptions;
=head1 API =head1 API
@ -35,57 +37,49 @@ use Koha::Exceptions;
my $extractor = Koha::Biblio::Metadata::Extractor::MARC::MARC21->new; my $extractor = Koha::Biblio::Metadata::Extractor::MARC::MARC21->new;
Constructor for the I<Koha::Biblio::Metadata::Extractor> class. Constructor for the I<Koha::Biblio::Metadata::Extractor::MARC::MARC21> class.
=cut =cut
sub new { sub new {
my ($class) = @_; my ( $class, $params ) = @_;
my $self = {};
return return
bless $self, bless $params,
$class; $class;
} }
=head2 get_normalized_upc =head2 get_normalized_upc
my $normalized_upc = $extractor->get_normalized_upc( $record ); my $normalized_upc = $extractor->get_normalized_upc();
Returns a stringthe COinS (a span) which can be included in a biblio record Returns a normalized UPC.
=cut =cut
sub get_normalized_upc { sub get_normalized_upc {
my ( $self, $record ) = @_; my ($self) = @_;
Koha::Exceptions::MissingParameter->throw( parameter => 'record' )
unless $record;
Koha::Exceptions::WrongParameter->throw( name => 'record', type => ref($record) )
unless ref($record) eq 'MARC::Record';
my $record = $self->metadata;
my @fields = $record->field('024'); my @fields = $record->field('024');
foreach my $field (@fields) { foreach my $field (@fields) {
my $indicator = $field->indicator(1); my $indicator = $field->indicator(1);
my $upc = $field->subfield('a');
( my $normalized_upc ) = $upc =~ /([\d-]*[X]*)/; my $normalized_upc = $self->_normalize_string( $field->subfield('a') );
$normalized_upc =~ s/-//g;
if ( $normalized_upc && $indicator eq "1" ) { if ( $normalized_upc && $indicator eq "1" ) {
return $normalized_upc; return $normalized_upc;
} }
} }
return;
} }
=head1 AUTHOR =head1 AUTHOR
Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt> Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt>
Jonathan Druart, E<lt>jonathan.druart@bugs.koha-community.orgE<gt>
=cut =cut
1; 1;

View file

@ -1,6 +1,6 @@
package Koha::Biblio::Metadata::Extractor::MARC::UNIMARC; package Koha::Biblio::Metadata::Extractor::MARC::UNIMARC;
# Copyright ByWater Solutions 2023 # Copyright Koha Development Team 2023
# #
# This file is part of Koha. # This file is part of Koha.
# #
@ -19,12 +19,14 @@ package Koha::Biblio::Metadata::Extractor::MARC::UNIMARC;
=head1 NAME =head1 NAME
Koha::Biblio::Metadata::Extractor - Extract specific metadata from MARC::Record objects Koha::Biblio::Metadata::Extractor::MARC::UNIMARC - Extract specific metadata from UNIMARC MARC::Record objects
=cut =cut
use Modern::Perl; use Modern::Perl;
use base qw(Koha::Biblio::Metadata::Extractor::MARC);
use Koha::Exceptions; use Koha::Exceptions;
=head1 API =head1 API
@ -40,38 +42,29 @@ Constructor for the I<Koha::Biblio::Metadata::Extractor::MARC::UNIMARC> class.
=cut =cut
sub new { sub new {
my ($class) = @_; my ( $class, $params ) = @_;
my $self = {};
return return
bless $self, bless $params,
$class; $class;
} }
=head2 get_normalized_upc =head2 get_normalized_upc
my $normalized_upc = $extractor->get_normalized_upc( $record ); my $normalized_upc = $extractor->get_normalized_upc();
Returns the normalized UPC for the passed I<$record>. Returns a normalized UPC.
=cut =cut
sub get_normalized_upc { sub get_normalized_upc {
my ( $self, $record ) = @_; my ($self) = @_;
Koha::Exceptions::MissingParameter->throw( parameter => 'record' )
unless $record;
Koha::Exceptions::WrongParameter->throw( name => 'record', type => ref($record) )
unless ref($record) eq 'MARC::Record';
my $record = $self->metadata;
my @fields = $record->field('072'); my @fields = $record->field('072');
foreach my $field (@fields) { foreach my $field (@fields) {
my $upc = $field->subfield('a'); my $normalized_upc = $self->_normalize_string( $field->subfield('a') );
( my $normalized_upc ) = $upc =~ /([\d-]*[X]*)/;
$normalized_upc =~ s/-//g;
if ($normalized_upc) { if ($normalized_upc) {
return $normalized_upc; return $normalized_upc;
@ -83,6 +76,8 @@ sub get_normalized_upc {
Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt> Tomas Cohen Arazi, E<lt>tomascohen@theke.ioE<gt>
Jonathan Druart, E<lt>jonathan.druart@bugs.koha-community.orgE<gt>
=cut =cut
1; 1;

View file

@ -1,102 +0,0 @@
#!/usr/bin/perl
# Copyright 2023 Koha Development team
#
# This file is part of Koha
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
use Modern::Perl;
use Test::More tests => 3;
use Test::Exception;
use Test::MockModule;
use MARC::Record;
use Koha::Biblio::Metadata::Extractor;
subtest 'new() tests' => sub {
plan tests => 1;
my $extractor = Koha::Biblio::Metadata::Extractor->new;
is( ref($extractor), 'Koha::Biblio::Metadata::Extractor' );
};
subtest 'get_extractor() tests' => sub {
plan tests => 8;
my $extractor = Koha::Biblio::Metadata::Extractor->new;
foreach my $schema (qw{ MARC21 UNIMARC }) {
my $specific_extractor = $extractor->get_extractor( { schema => $schema } );
is(
ref($specific_extractor), "Koha::Biblio::Metadata::Extractor::MARC::$schema",
"Returns the right extractor library for schema ($schema)"
);
ok( exists $extractor->{extractors}->{$schema}, "Extractor for $schema cached" );
}
throws_ok { $extractor->get_extractor }
'Koha::Exceptions::MissingParameter',
'Exception if no schema parameter';
like(
"$@", qr{A required parameter is missing' with parameter => schema},
'Exception correctly details missing parameter'
);
throws_ok { $extractor->get_extractor( { schema => 'POTATO' } ) }
'Koha::Exceptions::WrongParameter',
'Exception if the passed schema is not supported';
like(
"$@", qr{'Parameter has wrong value or type' with name => schema, value => POTATO},
'Exception correctly details incorrect parameter value'
);
};
subtest 'get_normalized_upc() tests' => sub {
plan tests => 6;
my $extractor = Koha::Biblio::Metadata::Extractor->new;
my $record = MARC::Record->new();
my $mock_marc21 = Test::MockModule->new('Koha::Biblio::Metadata::Extractor::MARC::MARC21');
$mock_marc21->mock( 'get_normalized_upc', sub { return 'MARC21' } );
my $mock_unimarc = Test::MockModule->new('Koha::Biblio::Metadata::Extractor::MARC::UNIMARC');
$mock_unimarc->mock( 'get_normalized_upc', sub { return 'UNIMARC' } );
foreach my $schema (qw{ MARC21 UNIMARC }) {
is(
$extractor->get_normalized_upc( { record => $record, schema => $schema } ), $schema,
"Library for handling $schema called"
);
ok( exists $extractor->{extractors}->{$schema}, "Extractor for $schema cached" );
}
throws_ok { $extractor->get_normalized_upc() }
'Koha::Exceptions::MissingParameter',
'Exception if no record parameter';
like(
"$@", qr{A required parameter is missing' with parameter => record},
'Exception correctly details missing parameter'
);
};

View file

@ -22,43 +22,50 @@ use Modern::Perl;
use Test::More tests => 2; use Test::More tests => 2;
use Test::Exception; use Test::Exception;
use MARC::Record; use t::lib::TestBuilder;
use t::lib::Mocks;
use Koha::Biblio::Metadata::Extractor::MARC::MARC21; use Koha::Biblio::Metadata::Extractor;
my $schema = Koha::Database->schema;
my $builder = t::lib::TestBuilder->new;
t::lib::Mocks::mock_preference( 'marcflavour', 'MARC21' );
subtest 'new() tests' => sub { subtest 'new() tests' => sub {
plan tests => 1; plan tests => 3;
my $extractor = Koha::Biblio::Metadata::Extractor::MARC::MARC21->new; $schema->storage->txn_begin;
throws_ok { Koha::Biblio::Metadata::Extractor->new; }
'Koha::Exceptions::MissingParameter',
'Exception if no parameter';
my $biblio = $builder->build_sample_biblio;
my $extractor = Koha::Biblio::Metadata::Extractor->new( { biblio => $biblio } );
is( ref($extractor), 'Koha::Biblio::Metadata::Extractor::MARC::MARC21' );
my $record = $biblio->metadata->record;
$extractor = Koha::Biblio::Metadata::Extractor->new( { metadata => $record } );
is( ref($extractor), 'Koha::Biblio::Metadata::Extractor::MARC::MARC21' ); is( ref($extractor), 'Koha::Biblio::Metadata::Extractor::MARC::MARC21' );
}; };
subtest 'get_normalized_upc() tests' => sub { subtest 'get_normalized_upc() tests' => sub {
plan tests => 6; plan tests => 2;
my $extractor = Koha::Biblio::Metadata::Extractor::MARC::MARC21->new;
my $record = MARC::Record->new(); my $record = MARC::Record->new();
$record->append_fields( MARC::Field->new( '024', '1', ' ', a => "9-123345345X" ) ); $record->append_fields( MARC::Field->new( '024', '1', ' ', a => "9-123345345X" ) );
is( $extractor->get_normalized_upc($record), '9123345345X' ); my $extractor = Koha::Biblio::Metadata::Extractor->new( { metadata => $record } );
is( $extractor->get_normalized_upc, '9123345345X' );
$record = MARC::Record->new(); $record = MARC::Record->new();
$record->append_fields( MARC::Field->new( '024', ' ', ' ', a => "9-123345345X" ) ); $record->append_fields( MARC::Field->new( '024', ' ', ' ', a => "9-123345345X" ) );
is( $extractor->get_normalized_upc($record), undef ); $extractor = Koha::Biblio::Metadata::Extractor->new( { metadata => $record } );
is( $extractor->get_normalized_upc, "" );
throws_ok { $extractor->get_normalized_upc() }
'Koha::Exceptions::MissingParameter',
'Exception if no parameter';
like( "$@", qr{A required parameter is missing' with parameter => record} );
throws_ok { $extractor->get_normalized_upc("Some string") }
'Koha::Exceptions::WrongParameter',
'Exception if no parameter';
like( "$@", qr{Parameter has wrong value or type} );
}; };

View file

@ -22,43 +22,56 @@ use Modern::Perl;
use Test::More tests => 2; use Test::More tests => 2;
use Test::Exception; use Test::Exception;
use MARC::Record; use t::lib::TestBuilder;
use t::lib::Mocks;
use Koha::Biblio::Metadata::Extractor::MARC::UNIMARC; use Koha::Biblio::Metadata::Extractor;
my $schema = Koha::Database->schema;
my $builder = t::lib::TestBuilder->new;
t::lib::Mocks::mock_preference( 'marcflavour', 'UNIMARC' );
subtest 'new() tests' => sub { subtest 'new() tests' => sub {
plan tests => 1; plan tests => 3;
my $extractor = Koha::Biblio::Metadata::Extractor::MARC::UNIMARC->new; $schema->storage->txn_begin;
throws_ok { Koha::Biblio::Metadata::Extractor->new; }
'Koha::Exceptions::MissingParameter',
'Exception if no parameter';
my $biblio = $builder->build_sample_biblio;
my $extractor = Koha::Biblio::Metadata::Extractor->new( { biblio => $biblio } );
is( ref($extractor), 'Koha::Biblio::Metadata::Extractor::MARC::UNIMARC' ); is( ref($extractor), 'Koha::Biblio::Metadata::Extractor::MARC::UNIMARC' );
my $record = $biblio->metadata->record;
$extractor = Koha::Biblio::Metadata::Extractor->new( { metadata => $record } );
is( ref($extractor), 'Koha::Biblio::Metadata::Extractor::MARC::UNIMARC' );
$schema->storage->txn_rollback;
}; };
subtest 'get_normalized_upc() tests' => sub { subtest 'get_normalized_upc() tests' => sub {
plan tests => 6; plan tests => 2;
my $extractor = Koha::Biblio::Metadata::Extractor::MARC::UNIMARC->new; $schema->storage->txn_begin;
my $record = MARC::Record->new(); my $record = MARC::Record->new();
$record->append_fields( MARC::Field->new( '072', '1', ' ', a => "9-123345345X" ) ); $record->append_fields( MARC::Field->new( '072', '1', ' ', a => "9-123345345X" ) );
is( $extractor->get_normalized_upc($record), '9123345345X' ); my $extractor = Koha::Biblio::Metadata::Extractor->new( { metadata => $record } );
is( $extractor->get_normalized_upc, '9123345345X' );
$record = MARC::Record->new(); $record = MARC::Record->new();
$record->append_fields( MARC::Field->new( '072', ' ', ' ', a => "9-123345345X" ) ); $record->append_fields( MARC::Field->new( '072', ' ', ' ', a => "9-123345345X" ) );
is( $extractor->get_normalized_upc($record), '9123345345X', 'Indicator has no effect' ); is( $extractor->get_normalized_upc($record), '9123345345X', 'Indicator has no effect' );
throws_ok { $extractor->get_normalized_upc() } $schema->storage->txn_rollback;
'Koha::Exceptions::MissingParameter',
'Exception if no parameter';
like( "$@", qr{A required parameter is missing' with parameter => record} );
throws_ok { $extractor->get_normalized_upc("Some string") }
'Koha::Exceptions::WrongParameter',
'Exception if no parameter';
like( "$@", qr{Parameter has wrong value or type} );
}; };