From 4579f1ad906b28a78c0ef7a1669b0a08138745d1 Mon Sep 17 00:00:00 2001 From: Tomas Cohen Arazi Date: Mon, 19 Sep 2016 12:25:31 -0300 Subject: [PATCH] Bug 17318: Make C4::Matcher::_get_match_keys handle 'norms' param The current implementation doesn't care about that parameter, and applies a default normalization rule that seems counter-productive (in general) for its aleged purpose. This patch makes it handle the following values for 'norms': - upper_case - lower_case - remove_spaces - legacy_default - none They make it call the relevant Koha::Utils::Normalize routines. 'legacy_default' is used only for backwards compatibility, but could be removed if there's consensus. To test: - Run: $ prove t/Matcher.t => FAIL: most _get_match_keys tests fail - Apply the patch - Run: $ prove t/Matcher.t => SUCCESS: Tests pass! - Sign off :-D Sponsored-by: FIT Signed-off-by: Mark Tompsett Signed-off-by: Jonathan Druart Signed-off-by: Kyle M Hall --- C4/Matcher.pm | 75 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/C4/Matcher.pm b/C4/Matcher.pm index b51d10e859..3c4c391008 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -17,14 +17,13 @@ package C4::Matcher; # You should have received a copy of the GNU General Public License # along with Koha; if not, see . -use strict; -use warnings; +use Modern::Perl; -use C4::Context; use MARC::Record; use Koha::SearchEngine; use Koha::SearchEngine::Search; +use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case/; =head1 NAME @@ -774,6 +773,7 @@ sub _passes_required_checks { } sub _get_match_keys { + my $source_record = shift; my $matchpoint = shift; my $check_only_first_repeat = @_ ? shift : 0; @@ -792,7 +792,7 @@ sub _get_match_keys { # If there are two 003s and two 001s, there will be two keys: # first 003 + first 001 # second 003 + second 001 - + my @keys = (); for (my $i = 0; $i <= $#{ $matchpoint->{'components'} }; $i++) { my $component = $matchpoint->{'components'}->[$i]; @@ -801,24 +801,45 @@ sub _get_match_keys { $j++; last FIELD if $j > 0 and $check_only_first_repeat; last FIELD if $i > 0 and $j > $#keys; - my $key = ""; - my $string; - if ($field->is_control_field()) { - $string=$field->data(); + + my $string; + if ( $field->is_control_field() ) { + $string = $field->data(); } else { - foreach my $subfield ($field->subfields()) { - if (exists $component->{'subfields'}->{$subfield->[0]}) { - $string .= " " . $subfield->[1]; #FIXME: It would be better to create an array and join with a space later... - } - } - } + $string = $field->as_string( + join('', keys %{ $component->{ subfields } }), ' ' # ' ' as separator + ); + } + if ($component->{'length'}>0) { - $string= substr($string, $component->{'offset'}, $component->{'length'}); - # FIXME normalize, substr + $string= substr($string, $component->{'offset'}, $component->{'length'}); } elsif ($component->{'offset'}) { - $string= substr($string, $component->{'offset'}); + $string= substr($string, $component->{'offset'}); } - $key = _normalize($string); + + my $norms = $component->{'norms'}; + my $key = $string; + + foreach my $norm ( @{ $norms } ) { + if ( grep { $norm eq $_ } valid_normalization_routines() ) { + if ( $norm eq 'remove_spaces' ) { + $key = remove_spaces($key); + } + elsif ( $norm eq 'upper_case' ) { + $key = upper_case($key); + } + elsif ( $norm eq 'lower_case' ) { + $key = lower_case($key); + } + elsif ( $norm eq 'legacy_default' ) { + $key = legacy_default($key); + } + } else { + warn "Invalid normalization routine required ($norm)" + unless $norm eq 'none'; + } + } + if ($i == 0) { push @keys, $key if $key; } else { @@ -843,16 +864,14 @@ sub _parse_match_component { return $component; } -# FIXME - default normalizer -sub _normalize { - my $value = uc shift; - $value =~ s/[.;:,\]\[\)\(\/'"]//g; - $value =~ s/^\s+//; - #$value =~ s/^\s+$//; - $value =~ s/\s+$//; - $value =~ s/\s+/ /g; - #$value =~ s/[.;,\]\[\)\(\/"']//g; - return $value; +sub valid_normalization_routines { + + return ( + 'remove_spaces', + 'upper_case', + 'lower_case', + 'legacy_default' + ); } 1; -- 2.39.5