From ada1c03350eb952928f68b5f1594174a3e25bf98 Mon Sep 17 00:00:00 2001 From: Marcel de Rooy Date: Tue, 21 Sep 2021 12:45:46 +0000 Subject: [PATCH] Bug 24674: (follow-up) Simpler regex The 'Polish notation' actually simplifies things. Signed-off-by: Marcel de Rooy Signed-off-by: Katrin Fischer Signed-off-by: Jonathan Druart --- C4/Biblio.pm | 15 +++++--------- t/db_dependent/Biblio/TransformMarcToKoha.t | 23 +++++++++++---------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index e386346825..69e488a1a1 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -2499,16 +2499,11 @@ sub _adjust_pubyear { $retval = $1; } elsif( $retval =~ m/(\d\d\d\d)/ && $1 > 0 ) { $retval = $1; - } elsif( $retval =~ m/ - (?\d)[-]?[.Xx?]{3} - |(?\d{2})[.Xx?]{2} - |(?\d{3})[.Xx?] - |(?\d)[-]{1,3}\?? - |(?\d\d)[-]{1,2}\?? - |(?\d{3})[-]\?? - /xms ) { # the form 198-? occurred in Dutch ISBD rules - my $digits = $+{year}; - $retval = $digits * ( 10 ** ( 4 - length($digits) )); + } elsif( $retval =~ m/(?\d{1,3})[.Xx?-]/ ) { + # See also bug 24674: enough to look at one unknown year char like .Xx-? + # At this point in code 1234? or 1234- already passed the earlier regex + # Things like 2-, 1xx, 1??? are now converted to a four positions-year. + $retval = $+{year} * ( 10 ** (4-length($+{year})) ); } else { $retval = undef; } diff --git a/t/db_dependent/Biblio/TransformMarcToKoha.t b/t/db_dependent/Biblio/TransformMarcToKoha.t index 942c8a14d3..9cd675858b 100755 --- a/t/db_dependent/Biblio/TransformMarcToKoha.t +++ b/t/db_dependent/Biblio/TransformMarcToKoha.t @@ -94,7 +94,7 @@ subtest 'Multiple mappings for one kohafield' => sub { }; subtest 'Testing _adjust_pubyear' => sub { - plan tests => 17; + plan tests => 18; is( C4::Biblio::_adjust_pubyear('2004 c2000 2007'), 2000, 'First cYEAR' ); is( C4::Biblio::_adjust_pubyear('2004 2000 2007'), 2004, 'First year' ); @@ -103,16 +103,17 @@ subtest 'Testing _adjust_pubyear' => sub { is( C4::Biblio::_adjust_pubyear('197X'), 1970, '197X on its own' ); is( C4::Biblio::_adjust_pubyear('1...'), 1000, '1... on its own' ); is( C4::Biblio::_adjust_pubyear('12?? 13xx'), 1200, '12?? first' ); - is( C4::Biblio::_adjust_pubyear('12? 1x'), undef, 'Too short return nothing as data must be int' ); - is( C4::Biblio::_adjust_pubyear('198-'), '1980', '198-' ); - is( C4::Biblio::_adjust_pubyear('19--'), '1900', '19--' ); - is( C4::Biblio::_adjust_pubyear('19-'), '1900', '19-' ); - is( C4::Biblio::_adjust_pubyear('2---'), '2000', '2---' ); - is( C4::Biblio::_adjust_pubyear('2--'), '2000', '2--' ); - is( C4::Biblio::_adjust_pubyear('2-'), '2000', '2-' ); - is( C4::Biblio::_adjust_pubyear('198-?'), '1980', '198-?' ); - is( C4::Biblio::_adjust_pubyear('1981-'), '1981', 'Date range returns first date' ); - is( C4::Biblio::_adjust_pubyear('broken'), undef, 'Non-matchign data returns nothing as the field must be int' ); + is( C4::Biblio::_adjust_pubyear('12? 1x'), 1200, '12? first' ); + is( C4::Biblio::_adjust_pubyear('198-'), 1980, '198-' ); + is( C4::Biblio::_adjust_pubyear('19--'), 1900, '19--' ); + is( C4::Biblio::_adjust_pubyear('19-'), 1900, '19-' ); + is( C4::Biblio::_adjust_pubyear('1-'), 1000, '1-' ); + is( C4::Biblio::_adjust_pubyear('2xxx'), 2000, '2xxx' ); + is( C4::Biblio::_adjust_pubyear('2xx'), 2000, '2xx' ); + is( C4::Biblio::_adjust_pubyear('2x'), 2000, '2x' ); + is( C4::Biblio::_adjust_pubyear('198-?'), 1980, '198-?' ); + is( C4::Biblio::_adjust_pubyear('1981-'), 1981, 'Date range returns first date' ); + is( C4::Biblio::_adjust_pubyear('broken'), undef, 'Non-matching data' ); }; subtest 'Test repeatable subfields' => sub { -- 2.39.5