From 01fbe2be9925f8586b9a2cb4dc908fc856b672a8 Mon Sep 17 00:00:00 2001 From: Marcel de Rooy Date: Wed, 19 Jul 2017 16:25:04 +0200 Subject: [PATCH] Bug 10306: Core module changes for multiple mappings In order to allow multiple Koha to MARC mappings (for one kohafield), we need to adjust a few key routines in C4/Biblio.pm. This results in a few changes in dependent modules too. Note: Multiple mappings also include 'alternating' mappings. Such as the case of MARC21 260 and 264: only one of both fields will be used. Sub TransformMarcToKoha will handle that just fine; the opposite transformation is harder, since we do no longer know which field was the source. In that case TransformKohaToMarc will fill both fields. We only use that operation in Koha for items (in Acquisition and Cataloging). Sub GetMarcSubfieldStructure This sub used a selectall_hashref, which is fine as long as we have only one mapping for each kohafield. But as DBI states it: If a row has the same key as an earlier row then it replaces the earlier row. In other words, we lose the first mapping if we have two. This patch uses selectall_arrayref with Slice and rearranges the output so that the returned hash returns an arrayref of hashrefs for each kohafield. In order to improve consistency, we add an order clause to the SQL statement used too. Sub GetMarcFromKohaField This sub just returned one tag and subfield, but in case of multiple mappings we should return them all now. Note: Many calls still expect just one result and will work just fine: my ($tag, $sub) = GetMarcFromKohaField(...) A possible second mapping would be silently ignored. Often the sub is called for biblionumber or itemnumber. I would not recommend the use of multiple mappings for such fields btw. In case the sub is called in scalar context, it will return only the first tag (instead of the number of tags and subfields). Sub GetMarcSubfieldStructureFromKohaField This sub previously returned the hash for one kohafield. In scalar context it will behave like before: it returns the first hashref in the arrayref that comes from GetMarcSubfieldStructure. In list context, it returns an array of all hashrefs (incl. multiple mappings). The sub is not used in C4::Ris. Removed the use statement. Sub TransformKohaToMarc This sub got a second parameter: frameworkcode. Historically, Koha more or less assumes kohafields to be defined across all frameworks (see Koha to MARC mappings). Therefore it falls back to Default when it is not passed. When going thru all mappings in building a MARC record, it also supports multiple mappings. Note that Koha uses this routine in Acquisition and in Cataloging for items. Normally the MARC record is leading however and the Koha fields are derivatives for optimization and reporting. The added third parameter allows for passing a new option no_split => 1. We use this option in C4::Items::Item2Marc; if two item fields are mapped to one kohafield but would have different values (which would be very unusual), these values are glued together. When transforming to MARC again, we do not want to duplicate the item subfields, but we keep the glued value in both subfields. This operation only affects items, since we are not doing this reverse operation for biblio and biblioitem fields. Sub _get_inverted_marc_field_map This sub is a helper routine of TransformMarcToKoha, the opposite transformation. When saving a MARC record, all kohafields are extracted including multiple mappings. Suppose that you had both 260c and 264c in your record (which you won't), than both values get saved initially into copyrightdate like A | B. The additional code for copyrightdate will extract the first year from this string. A small fix in TransformMarcToKoha makes that it only saves a value in a kohafield if it is defined and not empty. (Same for concatenation.) Sub TransformMarcToKohaOneField This sub now just calls TransformMarcToKoha and extracts the requested field. Note that since we are caching the structure, this does not result in additional database access and is therefore performance-wise insignificant. We simplify code and maintenance. Instead of modifying the passed hashref, it simply returns a value. A call in C4::Breeding is adjusted accordingly. The routine getKohaField in Koha::MetadataRecord is redirected to TransformMarcToKohaOneField. NOTE: The fourth patch restructures/optimizes TransformMarcToKoha[OneField]. Sub get_koha_field_from_marc This sub can be removed. A call is replaced by TransformMarcToKohaOneField in C4::XISBN. Note: The commented lines for sub ModZebrafiles are removed (directly under TransformMarcToKohaOneField). Test plan: For unit tests and interface tests, please see follow-ups. Run qa tools in order to verify that the modules still compile well. Read the code changes and verify that they make sense. Signed-off-by: Josef Moravec Signed-off-by: Kyle M Hall Signed-off-by: Jonathan Druart --- C4/Biblio.pm | 198 +++++++++++++---------------------------- C4/Breeding.pm | 4 +- C4/Items.pm | 9 +- C4/Ris.pm | 1 - C4/XISBN.pm | 4 +- Koha/MetadataRecord.pm | 27 ++---- 6 files changed, 80 insertions(+), 163 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index b259176557..5f696a2e5f 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -1089,13 +1089,20 @@ sub GetMarcSubfieldStructure { return $cached if $cached; my $dbh = C4::Context->dbh; - my $subfield_structure = $dbh->selectall_hashref( q| + # We moved to selectall_arrayref since selectall_hashref does not + # keep duplicate mappings on kohafield (like place in 260 vs 264) + my $subfield_aref = $dbh->selectall_arrayref( q| SELECT * FROM marc_subfield_structure WHERE frameworkcode = ? AND kohafield > '' - |, 'kohafield', {}, $frameworkcode ); - + ORDER BY frameworkcode,tagfield,tagsubfield + |, { Slice => {} }, $frameworkcode ); + # Now map the output to a hash structure + my $subfield_structure = {}; + foreach my $row ( @$subfield_aref ) { + push @{ $subfield_structure->{ $row->{kohafield} }}, $row; + } $cache->set_in_cache( $cache_key, $subfield_structure ); return $subfield_structure; } @@ -1113,7 +1120,11 @@ sub GetMarcFromKohaField { my ( $kohafield, $frameworkcode ) = @_; return (0, undef) unless $kohafield; my $mss = GetMarcSubfieldStructure( $frameworkcode ); - return ( $mss->{$kohafield}{tagfield}, $mss->{$kohafield}{tagsubfield} ); + my @retval; + foreach( @{ $mss->{$kohafield} } ) { + push @retval, $_->{tagfield}, $_->{tagsubfield}; + } + return wantarray ? @retval : $retval[0]; } =head2 GetMarcSubfieldStructureFromKohaField @@ -1122,6 +1133,8 @@ sub GetMarcFromKohaField { Returns a hashref where keys are marc_subfield_structure column names for the row where kohafield=$kohafield for the given framework code. +In list context returns a list of those hashrefs in case duplicate Koha to +MARC mappings exist. $frameworkcode is optional. If not given, then the default framework is used. @@ -1133,9 +1146,8 @@ sub GetMarcSubfieldStructureFromKohaField { return unless $kohafield; my $mss = GetMarcSubfieldStructure( $frameworkcode ); - return exists $mss->{$kohafield} - ? $mss->{$kohafield} - : undef; + return unless $mss->{$kohafield}; + return wantarray ? @{$mss->{$kohafield}} : $mss->{$kohafield}->[0]; } =head2 GetMarcBiblio @@ -2158,21 +2170,26 @@ entry from user entry sub TransformKohaToMarc { - my $hash = shift; + my ( $hash, $frameworkcode, $params ) = @_; my $record = MARC::Record->new(); SetMarcUnicodeFlag( $record, C4::Context->preference("marcflavour") ); - # FIXME Do not we want to get the marc subfield structure for the biblio framework? - my $mss = GetMarcSubfieldStructure(); + # NOTE: Many older calls do not include a frameworkcode. In that case + # we default to Default framework. + my $mss = GetMarcSubfieldStructure( $frameworkcode//'' ); my $tag_hr = {}; while ( my ($kohafield, $value) = each %$hash ) { - next unless exists $mss->{$kohafield}; - next unless $mss->{$kohafield}; - my $tagfield = $mss->{$kohafield}{tagfield} . ''; - my $tagsubfield = $mss->{$kohafield}{tagsubfield}; - foreach my $value ( split(/\s?\|\s?/, $value, -1) ) { - next if $value eq ''; - $tag_hr->{$tagfield} //= []; - push @{$tag_hr->{$tagfield}}, [($tagsubfield, $value)]; + foreach my $fld ( @{ $mss->{$kohafield} } ) { + my $tagfield = $fld->{tagfield}; + my $tagsubfield = $fld->{tagsubfield}; + next if !$tagfield; + my @values = $params->{no_split} + ? ( $value ) + : split(/\s?\|\s?/, $value, -1); + foreach my $value ( @values ) { + next if $value eq ''; + $tag_hr->{$tagfield} //= []; + push @{$tag_hr->{$tagfield}}, [($tagsubfield, $value)]; + } } } foreach my $tag (sort keys %$tag_hr) { @@ -2560,7 +2577,7 @@ sub TransformMarcToKoha { return $result; } $limit_table = $limit_table || 0; - $frameworkcode = '' unless defined $frameworkcode; + $frameworkcode //= ''; my $inverted_field_map = _get_inverted_marc_field_map($frameworkcode); @@ -2581,14 +2598,15 @@ sub TransformMarcToKoha { my $kohafields = $inverted_field_map->{$tag}->{list}; ENTRY: foreach my $entry ( @{$kohafields} ) { my ( $subfield, $table, $column ) = @{$entry}; + my $value = $field->data; next ENTRY unless exists $tables{$table}; + next ENTRY if !$value; my $key = _disambiguate( $table, $column ); if ( $result->{$key} ) { - unless ( ( $key eq "biblionumber" or $key eq "biblioitemnumber" ) and ( $field->data() eq "" ) ) { - $result->{$key} .= " | " . $field->data(); - } + $result->{$key} .= " | " . $value + unless $result->{$key} eq $value; } else { - $result->{$key} = $field->data(); + $result->{$key} = $value; } } } else { @@ -2601,11 +2619,11 @@ sub TransformMarcToKoha { SFENTRY: foreach my $entry ( @{ $inverted_field_map->{$tag}->{sfs}->{$code} } ) { my ( $table, $column ) = @{$entry}; next SFENTRY unless exists $tables{$table}; + next SFENTRY if !$value; my $key = _disambiguate( $table, $column ); if ( $result->{$key} ) { - unless ( ( $key eq "biblionumber" or $key eq "biblioitemnumber" ) and ( $value eq "" ) ) { - $result->{$key} .= " | " . $value; - } + $result->{$key} .= " | " . $value + unless $result->{$key} eq $value; } else { $result->{$key} = $value; } @@ -2646,12 +2664,15 @@ sub _get_inverted_marc_field_map { my $mss = GetMarcSubfieldStructure( $frameworkcode ); foreach my $kohafield ( keys %{ $mss } ) { - next unless exists $mss->{$kohafield}; # not all columns are mapped to MARC tag & subfield - my $tag = $mss->{$kohafield}{tagfield}; - my $subfield = $mss->{$kohafield}{tagsubfield}; - my ( $table, $column ) = split /[.]/, $kohafield, 2; - push @{ $field_map->{$tag}->{list} }, [ $subfield, $table, $column ]; - push @{ $field_map->{$tag}->{sfs}->{$subfield} }, [ $table, $column ]; + foreach my $fld ( @{ $mss->{$kohafield} } ) { + my $tag = $fld->{tagfield}; + my $subfield = $fld->{tagsubfield}; + my ( $table, $column ) = split /[.]/, $kohafield, 2; + push @{ $field_map->{$tag}->{list} }, + [ $subfield, $table, $column ]; + push @{ $field_map->{$tag}->{sfs}->{$subfield} }, + [ $table, $column ]; + } } return $field_map; } @@ -2704,118 +2725,23 @@ sub _disambiguate { } -=head2 get_koha_field_from_marc - - $result->{_disambiguate($table, $field)} = - get_koha_field_from_marc($table,$field,$record,$frameworkcode); - -Internal function to map data from the MARC record to a specific non-MARC field. -FIXME: this is meant to replace TransformMarcToKohaOneField after more testing. - -=cut - -sub get_koha_field_from_marc { - my ( $koha_table, $koha_column, $record, $frameworkcode ) = @_; - my ( $tagfield, $subfield ) = GetMarcFromKohaField( $koha_table . '.' . $koha_column, $frameworkcode ); - my $kohafield; - foreach my $field ( $record->field($tagfield) ) { - if ( $field->tag() < 10 ) { - if ($kohafield) { - $kohafield .= " | " . $field->data(); - } else { - $kohafield = $field->data(); - } - } else { - if ( $field->subfields ) { - my @subfields = $field->subfields(); - foreach my $subfieldcount ( 0 .. $#subfields ) { - if ( $subfields[$subfieldcount][0] eq $subfield ) { - if ($kohafield) { - $kohafield .= " | " . $subfields[$subfieldcount][1]; - } else { - $kohafield = $subfields[$subfieldcount][1]; - } - } - } - } - } - } - return $kohafield; -} - =head2 TransformMarcToKohaOneField - $result = TransformMarcToKohaOneField( $kohatable, $kohafield, $record, $result, $frameworkcode ) + $value = TransformMarcToKohaOneField( 'biblio.title', $record, $frameworkcode ); =cut sub TransformMarcToKohaOneField { - - # FIXME ? if a field has a repeatable subfield that is used in old-db, - # only the 1st will be retrieved... - my ( $kohatable, $kohafield, $record, $result, $frameworkcode ) = @_; - my $res = ""; - my ( $tagfield, $subfield ) = GetMarcFromKohaField( $kohatable . "." . $kohafield, $frameworkcode ); - foreach my $field ( $record->field($tagfield) ) { - if ( $field->tag() < 10 ) { - if ( $result->{$kohafield} ) { - $result->{$kohafield} .= " | " . $field->data(); - } else { - $result->{$kohafield} = $field->data(); - } - } else { - if ( $field->subfields ) { - my @subfields = $field->subfields(); - foreach my $subfieldcount ( 0 .. $#subfields ) { - if ( $subfields[$subfieldcount][0] eq $subfield ) { - if ( $result->{$kohafield} ) { - $result->{$kohafield} .= " | " . $subfields[$subfieldcount][1]; - } else { - $result->{$kohafield} = $subfields[$subfieldcount][1]; - } - } - } - } - } - } - return $result; + my ( $kohafield, $marc, $frameworkcode ) = @_; + + # It is not really useful to repeat all code from TransformMarcToKoha here. + # Since it is fast enough (by caching), we just extract the field. + my $koharec = TransformMarcToKoha( $marc, $frameworkcode ); + my @temp = split /\./, $kohafield, 2; + $kohafield = _disambiguate( @temp ) if @temp > 1; + return $koharec ? $koharec->{$kohafield} : undef; } - -#" - -# -# true ModZebra commented until indexdata fixes zebraDB crashes (it seems they occur on multiple updates -# at the same time -# replaced by a zebraqueue table, that is filled with ModZebra to run. -# the table is emptied by misc/cronjobs/zebraqueue_start.pl script -# =head2 ModZebrafiles -# -# &ModZebrafiles( $dbh, $biblionumber, $record, $folder, $server ); -# -# =cut -# -# sub ModZebrafiles { -# -# my ( $dbh, $biblionumber, $record, $folder, $server ) = @_; -# -# my $op; -# my $zebradir = -# C4::Context->zebraconfig($server)->{directory} . "/" . $folder . "/"; -# unless ( opendir( DIR, "$zebradir" ) ) { -# warn "$zebradir not found"; -# return; -# } -# closedir DIR; -# my $filename = $zebradir . $biblionumber; -# -# if ($record) { -# open( OUTPUT, ">", $filename . ".xml" ); -# print OUTPUT $record; -# close OUTPUT; -# } -# } - =head2 ModZebra ModZebra( $biblionumber, $op, $server, $record ); diff --git a/C4/Breeding.pm b/C4/Breeding.pm index ee14c81d7c..98f46c4ac4 100644 --- a/C4/Breeding.pm +++ b/C4/Breeding.pm @@ -332,9 +332,7 @@ sub _add_rowdata { date2 => 'biblioitems.publicationyear', #UNIMARC ); foreach my $k (keys %fetch) { - my ($t, $f)= split '\.', $fetch{$k}; - $row= C4::Biblio::TransformMarcToKohaOneField($t, $f, $record, $row); - $row->{$k}= $row->{$f} if $k ne $f; + $row->{$k} = C4::Biblio::TransformMarcToKohaOneField( $fetch{$k}, $record, q{} ); # default framework } $row->{date}//= $row->{date2}; $row->{isbn}=_isbn_replace($row->{isbn}); diff --git a/C4/Items.pm b/C4/Items.pm index b64aa600cf..6281393693 100644 --- a/C4/Items.pm +++ b/C4/Items.pm @@ -1498,8 +1498,13 @@ sub Item2Marc { defined($itemrecord->{$_}) && $itemrecord->{$_} ne '' ? ("items.$_" => $itemrecord->{$_}) : () } keys %{ $itemrecord } }; - my $itemmarc = C4::Biblio::TransformKohaToMarc($mungeditem); - my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField("items.itemnumber",C4::Biblio::GetFrameworkCode($biblionumber)||''); + my $framework = C4::Biblio::GetFrameworkCode( $biblionumber ); + my $itemmarc = C4::Biblio::TransformKohaToMarc( + $mungeditem, $framework, { no_split => 1}, + ); + my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField( + "items.itemnumber", $framework, + ); my $unlinked_item_subfields = _parse_unlinked_item_subfields_from_xml($mungeditem->{'items.more_subfields_xml'}); if (defined $unlinked_item_subfields and $#$unlinked_item_subfields > -1) { diff --git a/C4/Ris.pm b/C4/Ris.pm index 5bf8ecc98d..f244bf694d 100644 --- a/C4/Ris.pm +++ b/C4/Ris.pm @@ -65,7 +65,6 @@ use Modern::Perl; use List::MoreUtils qw/uniq/; use vars qw(@ISA @EXPORT); -use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField); use Koha::SimpleMARC qw(read_field); diff --git a/C4/XISBN.pm b/C4/XISBN.pm index cff35c94d7..9cda7f2eac 100644 --- a/C4/XISBN.pm +++ b/C4/XISBN.pm @@ -72,7 +72,9 @@ sub _get_biblio_from_xisbn { return unless ( !$errors && scalar @$results ); my $record = C4::Search::new_record_from_zebra( 'biblioserver', $results->[0] ); - my $biblionumber = C4::Biblio::get_koha_field_from_marc('biblio', 'biblionumber', $record, ''); + my $biblionumber = C4::Biblio::TransformMarcToKohaOneField( + 'biblio.biblionumber', $record, # Default framework used + ); return unless $biblionumber; my $biblio = Koha::Biblios->find( $biblionumber ); diff --git a/Koha/MetadataRecord.pm b/Koha/MetadataRecord.pm index 9899e164c7..e451a2b2bb 100644 --- a/Koha/MetadataRecord.pm +++ b/Koha/MetadataRecord.pm @@ -109,29 +109,16 @@ sub createMergeHash { } } +=head2 getKohaField + + $metadata->{$key} = $record->getKohaField($kohafield); + +=cut + sub getKohaField { my ($self, $kohafield) = @_; - if ($self->schema =~ m/marc/) { - my $frameworkcode = ""; # FIXME Why do we use the default framework? - my $mss = C4::Biblio::GetMarcSubfieldStructure( $frameworkcode ); - my $tagfield = $mss->{$kohafield}; - - return '' if ref($tagfield) ne 'HASH'; - - my ($tag, $subfield) = ( $tagfield->{tagfield}, $tagfield->{tagsubfield} ); - my @kohafield; - foreach my $field ( $self->record->field($tag) ) { - if ( $field->tag() < 10 ) { - push @kohafield, $field->data(); - } else { - foreach my $contents ( $field->subfield($subfield) ) { - push @kohafield, $contents; - } - } - } - - return join ' | ', @kohafield; + return C4::Biblio::TransformMarcToKohaOneField($kohafield, $self->record); # Default framework used } } -- 2.39.5