From c25dc92fd190b21d5ce8a7b9d8ac4d3da7ad665a Mon Sep 17 00:00:00 2001 From: Chris Nighswonger Date: Tue, 15 Sep 2009 09:27:13 -0400 Subject: [PATCH] Bug 2500 Correcting incorrect splitting of cutter numbers This patch does two things to improve the call number splitting algorithms: 1. It makes changes to ensure that cutter numbers are split correctly in ddcns 2. It moves custom/fiction/biography call number splitting to a separate algorithm. Before they were incorrectly placed in ddcns. This patch also modifies the call number splitting tests to accept call numbers from the command line to allow quick testing of any give call number against a particular algorithm. Signed-off-by: Galen Charlton --- C4/Labels/Label.pm | 46 ++++++++++++++----------------- t/Labels_split_ccn.t | 63 +++++++++++++++++++++++++++++++++++++++++++ t/Labels_split_ddcn.t | 53 +++++++++++++++++++++++++----------- t/Labels_split_lccn.t | 48 ++++++++++++++++++++++++++------- 4 files changed, 159 insertions(+), 51 deletions(-) create mode 100755 t/Labels_split_ccn.t diff --git a/C4/Labels/Label.pm b/C4/Labels/Label.pm index 9cf50262ca..c3ef20bbed 100644 --- a/C4/Labels/Label.pm +++ b/C4/Labels/Label.pm @@ -6,6 +6,7 @@ use warnings; use Text::Wrap; use Algorithm::CheckDigits; use Text::CSV_XS; +use Data::Dumper; use C4::Context; use C4::Debug; @@ -133,9 +134,9 @@ sub _split_ddcn { $_ = $ddcn; s/\///g; # in theory we should be able to simply remove all segmentation markers and arrive at the correct call number... my (@parts) = m/ - ^([a-zA-Z-]+(?:$possible_decimal)?) # R220.3 # BIO # first example will require extra splitting + ^([-a-zA-Z]*\s?(?:$possible_decimal)?) # R220.3 CD-ROM 787.87 # will require extra splitting \s+ - (.+) # H2793Z H32 c.2 # R5c.1 # everything else (except bracketing spaces) + (.+) # H2793Z H32 c.2 EAS # everything else (except bracketing spaces) \s* /x; unless (scalar @parts) { @@ -143,41 +144,34 @@ sub _split_ddcn { push @parts, $_; # if no match, just push the whole string. } - if ($parts[ 0] =~ /^([a-zA-Z]+)($possible_decimal)$/) { + if ($parts[0] =~ /^([-a-zA-Z]+)\s?($possible_decimal)$/) { shift @parts; # pull off the mathching first element, like example 1 unshift @parts, $1, $2; # replace it with the two pieces } push @parts, split /\s+/, pop @parts; # split the last piece into an arbitrary number of pieces at spaces - - if ($parts[-1] !~ /^.*\d-\d.*$/ && $parts[-1] =~ /^(.*\d+)(\D.*)$/) { - pop @parts; # pull off the mathching last element, like example 2 - push @parts, $1, $2; # replace it with the two pieces - } - $debug and print STDERR "split_ddcn array: ", join(" | ", @parts), "\n"; return @parts; } -sub _split_fcn { +## NOTE: Custom call number types go here. It may be necessary to create additional splitting algorithms if some custom call numbers +## cannot be made to work here. Presently this splits standard non-ddcn, non-lccn fiction and biography call numbers. + +sub _split_ccn { my ($fcn) = @_; - my @fcn_split = (); - # Split fiction call numbers based on spaces - SPLIT_FCN: - while ($fcn) { - if ($fcn =~ m/([A-Za-z0-9]+\.?[0-9]?)(\W?).*?/x) { - push (@fcn_split, $1); - $fcn = $'; - } - else { - last SPLIT_FCN; # No match, break out of the loop - } + my @parts = (); + # Split call numbers based on spaces + push @parts, split /\s+/, $fcn; # split the call number into an arbitrary number of pieces at spaces + if ($parts[-1] !~ /^.*\d-\d.*$/ && $parts[-1] =~ /^(.*\d+)(\D.*)$/) { + pop @parts; # pull off the matching last element + push @parts, $1, $2; # replace it with the two pieces } - unless (scalar @fcn_split) { + unless (scalar @parts) { warn sprintf('regexp failed to match string: %s', $_); - push (@fcn_split, $_); + push (@parts, $_); } - return @fcn_split; + $debug and print STDERR "split_ccn array: ", join(" | ", @parts), "\n"; + return @parts; } sub _get_barcode_data { @@ -414,11 +408,11 @@ sub draw_label_text { if ((grep {$field->{'code'} =~ m/$_/} @callnumber_list) and ($self->{'printing_type'} eq 'BIB') and ($self->{'callnum_split'})) { # If the field contains the call number, we do some sp if ($cn_source eq 'lcc') { @label_lines = _split_lccn($field_data); - @label_lines = _split_fcn($field_data) if !@label_lines; # If it was not a true lccn, try it as a fiction call number + @label_lines = _split_ccn($field_data) if !@label_lines; # If it was not a true lccn, try it as a custom call number push (@label_lines, $field_data) if !@label_lines; # If it was not that, send it on unsplit } elsif ($cn_source eq 'ddc') { @label_lines = _split_ddcn($field_data); - @label_lines = _split_fcn($field_data) if !@label_lines; + @label_lines = _split_ccn($field_data) if !@label_lines; push (@label_lines, $field_data) if !@label_lines; } else { warn sprintf('Call number splitting failed for: %s. Please add this call number to bug #2500 at bugs.koha.org', $field_data); diff --git a/t/Labels_split_ccn.t b/t/Labels_split_ccn.t new file mode 100755 index 0000000000..82248ba4e7 --- /dev/null +++ b/t/Labels_split_ccn.t @@ -0,0 +1,63 @@ +#!/usr/bin/perl +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA +# +# for context, see http://bugs.koha.org + +use strict; +use warnings; + +use Test::More; + +BEGIN { + our $ccns = {}; + if ($ARGV[0]) { + BAIL_OUT("USAGE: perl Labels_split_ccn.t 'BIO JP2 R5c.1' 'BIO,JP2,R5c.1'") unless $ARGV[1]; + $ccns = {$ARGV[0] => [split (/,/,$ARGV[1])],}; + } + else { + $ccns = { + 'BIO JP2 R5c.1' => [qw(BIO JP2 R5 c.1)], + 'FIC GIR J5c.1' => [qw(FIC GIR J5 c.1)], + 'J DAR G7c.11' => [qw( J DAR G7 c.11)], + 'MP3-CD F PARKER' => [qw(MP3-CD F PARKER)], + }; + } + my $test_num = 1; + foreach (keys(%$ccns)) { + my $split_num += scalar(@{$ccns->{$_}}); + $test_num += 2 * $split_num; + $test_num += 4; + } + plan tests => $test_num; + use_ok('C4::Labels::Label'); + use vars qw($ccns); +} + +foreach my $ccn (sort keys %$ccns) { + my (@parts, @expected); + ok($ccn, "ddcn: $ccn"); + ok(@expected = @{$ccns->{$ccn}}, "split expected to produce " . scalar(@expected) . " pieces"); + ok(@parts = C4::Labels::Label::_split_ccn($ccn), "C4::Labels::Label::_split_ccn($ccn)"); + ok(scalar(@expected) == scalar(@parts), sprintf("%d of %d pieces produced", scalar(@parts), scalar(@expected))); + my $i = 0; + foreach my $unit (@expected) { + my $part; + ok($part = $parts[$i], "($ccn)[$i] populated: " . (defined($part) ? $part : 'UNDEF')); + ok((defined($part) and $part eq $unit), "($ccn)[$i] matches: $unit"); + $i++; + } +} diff --git a/t/Labels_split_ddcn.t b/t/Labels_split_ddcn.t index 26aad55c4b..f70b29fd76 100755 --- a/t/Labels_split_ddcn.t +++ b/t/Labels_split_ddcn.t @@ -1,32 +1,56 @@ #!/usr/bin/perl # +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA +# # for context, see http://bugs.koha.org use strict; use warnings; -use Test::More tests => 82; +use Test::More; BEGIN { - use_ok('C4::Labels'); + our $ddcns = {}; + if ($ARGV[0]) { + BAIL_OUT("USAGE: perl Labels_split_ddcn.t '621.3828 J28l' '621.3828,J28l'") unless $ARGV[1]; + $ddcns = {$ARGV[0] => [split (/,/,$ARGV[1])],}; + } + else { + $ddcns = { + 'R220.3 H2793Z H32 c.2' => [qw(R 220.3 H2793Z H32 c.2)], + 'CD-ROM 787.87 EAS' => [qw(CD-ROM 787.87 EAS)], + '252.051 T147 v.1-2' => [qw(252.051 T147 v.1-2)], + }; + } + my $test_num = 1; + foreach (keys(%$ddcns)) { + my $split_num += scalar(@{$ddcns->{$_}}); + $test_num += 2 * $split_num; + $test_num += 4; + } + plan tests => $test_num; + use_ok('C4::Labels::Label'); + use vars qw($ddcns); } -ok(defined C4::Labels::split_ddcn, 'C4::Labels::split_ddcn defined'); - -my $ddcns = { - 'BIO JP2 R5c.1' => [qw(BIO JP2 R5 c.1 )], - 'FIC GIR J5c.1' => [qw(FIC GIR J5 c.1 )], - 'J DAR G7c.11' => [qw( J DAR G7 c.11)], - 'R220.3 H2793Z H32 c.2' => [qw(R 220.3 H2793Z H32 c.2)], - 'CD-ROM 787.87 EAS' => [qw(CD-ROM 787.87 EAS)], - 'MP3-CD F PARKER' => [qw(MP3-CD F PARKER)], - '252.051 T147 v.1-2' => [qw(252.051 T147 v.1-2)], -}; foreach my $ddcn (sort keys %$ddcns) { my (@parts, @expected); ok($ddcn, "ddcn: $ddcn"); ok(@expected = @{$ddcns->{$ddcn}}, "split expected to produce " . scalar(@expected) . " pieces"); - ok(@parts = C4::Labels::split_ddcn($ddcn), "C4::Labels::split_ddcn($ddcn)"); + ok(@parts = C4::Labels::Label::_split_ddcn($ddcn), "C4::Labels::Label::_split_ddcn($ddcn)"); ok(scalar(@expected) == scalar(@parts), sprintf("%d of %d pieces produced", scalar(@parts), scalar(@expected))); my $i = 0; foreach my $unit (@expected) { @@ -36,4 +60,3 @@ foreach my $ddcn (sort keys %$ddcns) { $i++; } } - diff --git a/t/Labels_split_lccn.t b/t/Labels_split_lccn.t index 1893e9ddab..f29a11347f 100755 --- a/t/Labels_split_lccn.t +++ b/t/Labels_split_lccn.t @@ -1,28 +1,56 @@ #!/usr/bin/perl # +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA +# # for context, see http://bugs.koha.org/cgi-bin/bugzilla/show_bug.cgi?id=2691 use strict; use warnings; -use Test::More tests => 44; +use Test::More; BEGIN { - use_ok('C4::Labels'); + our $lccns = {}; + if ($ARGV[0]) { + BAIL_OUT("USAGE: perl Labels_split_lccn.t 'HE 8700.7 .P6 T44 1983' 'HE,8700.7,.P6,T44,1983'") unless $ARGV[1]; + $lccns = {$ARGV[0] => [split (/,/,$ARGV[1])],}; + } + else { + $lccns = { + 'HE8700.7 .P6T44 1983' => [qw(HE 8700.7 .P6 T44 1983)], + 'BS2545.E8 H39 1996' => [qw(BS 2545 .E8 H39 1996)], + 'NX512.S85 A4 2006' => [qw(NX 512 .S85 A4 2006)], + }; + } + my $test_num = 1; + foreach (keys(%$lccns)) { + my $split_num += scalar(@{$lccns->{$_}}); + $test_num += 2 * $split_num; + $test_num += 4; + } + plan tests => $test_num; + use_ok('C4::Labels::Label'); + use vars qw($lccns); } -ok(defined C4::Labels::split_lccn, 'C4::Labels::split_lccn defined'); - -my $lccns = { - 'HE8700.7 .P6T44 1983' => [qw(HE 8700.7 .P6 T44 1983)], - 'BS2545.E8 H39 1996' => [qw(BS 2545 .E8 H39 1996)], - 'NX512.S85 A4 2006' => [qw(NX 512 .S85 A4 2006)], -}; foreach my $lccn (sort keys %$lccns) { my (@parts, @expected); ok($lccn, "lccn: $lccn"); ok(@expected = @{$lccns->{$lccn}}, "split expected to produce " . scalar(@expected) . " pieces"); - ok(@parts = C4::Labels::split_lccn($lccn), "C4::Labels::split_lccn($lccn)"); + ok(@parts = C4::Labels::Label::_split_lccn($lccn), "C4::Labels::Label::_split_lccn($lccn)"); ok(scalar(@expected) == scalar(@parts), sprintf("%d of %d pieces produced", scalar(@parts), scalar(@expected))); my $i = 0; foreach my $unit (@expected) { -- 2.39.5