From 0275f1898088dfce5153a8d1e1ca4bb0c6314fd0 Mon Sep 17 00:00:00 2001 From: tipaul Date: Fri, 12 Aug 2005 13:39:04 +0000 Subject: [PATCH] removing useless package --- C4/SimpleMarc.pm | 612 ----------------------------------------------- 1 file changed, 612 deletions(-) delete mode 100755 C4/SimpleMarc.pm diff --git a/C4/SimpleMarc.pm b/C4/SimpleMarc.pm deleted file mode 100755 index 3a0dc15956..0000000000 --- a/C4/SimpleMarc.pm +++ /dev/null @@ -1,612 +0,0 @@ -package C4::SimpleMarc; - -# $Id$ - -# Routines for handling import of MARC data into Koha db - -# Koha library project www.koha.org - -# Licensed under the GPL - - -# Copyright 2000-2002 Katipo Communications -# -# This file is part of Koha. -# -# Koha is free software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any later -# version. -# -# Koha is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, -# Suite 330, Boston, MA 02111-1307 USA - -use strict; -use DBI; -require Exporter; -use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); - -# set the version for version checking -$VERSION = 0.01; - -=head1 NAME - -C4::SimpleMarc - Functions for parsing MARC records and files - -=head1 SYNOPSIS - - use C4::SimpleMarc; - -=head1 DESCRIPTION - -This module provides functions for parsing MARC records and files. - -=head1 FUNCTIONS - -=over 2 - -=cut - -@ISA = qw(Exporter); -@EXPORT = qw( - &extractmarcfields - &parsemarcfileformat - &taglabel - %tagtext - %tagmap -); - -# FIXME - %tagtext and %tagmap are in both @EXPORT and @EXPORT_OK. -# They should be in one or the other, but not both (though preferably, -# things shouldn't get exported in the first place). -@EXPORT_OK = qw( - %tagtext - %tagmap -); - -#------------------------------------------------ - -#------------------ -# Constants - -# %tagtext maps MARC tags to descriptive names. -my %tagtext = ( - 'LDR' => 'Leader', - '001' => 'Control number', - '003' => 'Control number identifier', - '005' => 'Date and time of latest transaction', - '006' => 'Fixed-length data elements -- additional material characteristics', - '007' => 'Physical description fixed field', - '008' => 'Fixed length data elements', - '010' => 'LCCN', - '015' => 'National library CN', - '020' => 'ISBN', - '022' => 'ISSN', - '024' => 'Other standard ID', - '035' => 'System control number', - '037' => 'Source of acquisition', - '040' => 'Cataloging source', - '041' => 'Language code', - '043' => 'Geographic area code', - '043' => 'Publishing country code', - '050' => 'Library of Congress call number', - '055' => 'Canadian classification number', - '060' => 'National Library of Medicine call number', - '082' => 'Dewey decimal call number', - '100' => 'Main entry -- Personal name', - '110' => 'Main entry -- Corporate name', - '130' => 'Main entry -- Uniform title', - '240' => 'Uniform title', - '245' => 'Title statement', - '246' => 'Varying form of title', - '250' => 'Edition statement', - '256' => 'Computer file characteristics', - '260' => 'Publication, distribution, etc.', - '263' => 'Projected publication date', - '300' => 'Physical description', - '306' => 'Playing time', - '440' => 'Series statement / Added entry -- Title', - '490' => 'Series statement', - '500' => 'General note', - '504' => 'Bibliography, etc. note', - '505' => 'Formatted contents note', - '508' => 'Creation/production credits note', - '510' => 'Citation/references note', - '511' => 'Participant or performer note', - '520' => 'Summary, etc. note', - '521' => 'Target audience note (ie age)', - '530' => 'Additional physical form available note', - '538' => 'System details note', - '586' => 'Awards note', - '600' => 'Subject added entry -- Personal name', - '610' => 'Subject added entry -- Corporate name', - '650' => 'Subject added entry -- Topical term', - '651' => 'Subject added entry -- Geographic name', - '656' => 'Index term -- Occupation', - '700' => 'Added entry -- Personal name', - '710' => 'Added entry -- Corporate name', - '730' => 'Added entry -- Uniform title', - '740' => 'Added entry -- Uncontrolled related/analytical title', - '800' => 'Series added entry -- Personal name', - '830' => 'Series added entry -- Uniform title', - '852' => 'Location', - '856' => 'Electronic location and access', -); - -# tag, subfield, field name, repeats, striptrailingchars -# FIXME - What is this? Can it be explained without a semester-long -# course in MARC? - -# XXX - Maps MARC (field, subfield) tuples to Koha database field -# names (presumably in 'biblioitems'). $tagmap{$field}->{$subfield} is -# an anonymous hash of the form -# { -# name => "title", # Name of Koha field -# rpt => 0, # I don't know what this is, but -# # it's not used. -# striptrail => ',:;/-', # Lists the set of characters that -# # should be stripped from the end -# # of the MARC field. -# } - -my %tagmap=( - '010'=>{'a'=>{name=> 'lccn', rpt=>0, striptrail=>' ' }}, - '015'=>{'a'=>{name=> 'lccn', rpt=>0 }}, - '020'=>{'a'=>{name=> 'isbn', rpt=>0 }}, - '022'=>{'a'=>{name=> 'issn', rpt=>0 }}, - '082'=>{'a'=>{name=> 'dewey', rpt=>0 }}, - '100'=>{'a'=>{name=> 'author', rpt=>0, striptrail=>',:;/-' }}, - '245'=>{'a'=>{name=> 'title', rpt=>0, striptrail=>',:;/' }, - 'b'=>{name=> 'subtitle', rpt=>0, striptrail=>',:;/' }}, - '260'=>{'a'=>{name=> 'place', rpt=>0, striptrail=>',:;/-' }, - 'b'=>{name=> 'publisher', rpt=>0, striptrail=>',:;/-' }, - 'c'=>{name=> 'year' , rpt=>0, striptrail=>'.,:;/-' }}, - '300'=>{'a'=>{name=> 'pages', rpt=>0, striptrail=>',:;/-' }, - 'c'=>{name=> 'size', rpt=>0, striptrail=>',:;/-' }}, - '362'=>{'a'=>{name=> 'volume-number', rpt=>0 }}, - '440'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' }, - 'v'=>{name=> 'volume-number',rpt=>0 }}, - '490'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' }, - 'v'=>{name=> 'volume-number',rpt=>0 }}, - '700'=>{'a'=>{name=> 'addtional-author-illus',rpt=>1, striptrail=>',:;/' }}, - '5xx'=>{'a'=>{name=> 'notes', rpt=>1 }}, - '65x'=>{'a'=>{name=> 'subject', rpt=>1, striptrail=>'.,:;/-' }}, -); - - -#------------------ - -=item extractmarcfields - - $biblioitem = &extractmarcfields($marc_record); - -C<$marc_record> is a reference-to-array representing a MARC record; -each element is a reference-to-hash specifying a MARC field (possibly -with subfields). - -C<&extractmarcfields> translates C<$marc_record> into a Koha -biblioitem. C<$biblioitem> is a reference-to-hash whose keys are named -after fields in the biblioitems table of the Koha database. - -=cut -#' -# FIXME - Throughout: -# $foo->{bar}->[baz]->{quux} -# can be rewritten as -# $foo->{bar}[baz]{quux} -sub extractmarcfields { - use strict; - # input - my ( - $record, # pointer to list of MARC field hashes. - # Example: $record->[0]->{'tag'} = '100' # Author - # $record->[0]->{'subfields'}->{'a'} = subfieldvalue - )=@_; - - # return - my $bib; # pointer to hash of named output fields - # Example: $bib->{'author'} = "Twain, Mark"; - - my $debug=0; - - my ( - $field, # hash ref - $value, - $subfield, # Marc subfield [a-z] - $fieldname, # name of field "author", "title", etc. - $strip, # chars to remove from end of field - $stripregex, # reg exp pattern - ); - my ($lccn, $isbn, $issn, - $publicationyear, @subjects, $subject, - $controlnumber, - $notes, $additionalauthors, $illustrator, $copyrightdate, - $s, $subdivision, $subjectsubfield, - ); - - print "
\n" if $debug;
-
-    if ( ref($record) eq "ARRAY" ) {
-        foreach $field (@$record) {
-
-	    # Check each subfield in field
-	    # FIXME - Would this code be more readable with
-	    #	while (($subfieldname, $subfield) = each %{$field->{subfields}})
-	    # ?
-	    foreach $subfield ( keys %{$field->{subfields}} ) {
-		# see if it is defined in our Marc to koha mapping table
-		# FIXME - This if-clause takes up the entire loop.
-		# This would be better rewritten as
-		#	next unless defined($tagmap{...});
-		# Then the body of the loop doesn't have to be
-		# indented as much.
-	    	if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} ) {
-		    # Yes, so keep the value
-		    if ( ref($field->{'subfields'}->{$subfield} ) eq 'ARRAY' ) {
-		        # if it was an array, just keep first element.
-		        $bib->{$fieldname}=$field->{'subfields'}->{$subfield}[0];
-		    } else {
-		        $bib->{$fieldname}=$field->{'subfields'}->{$subfield};
-		    } # if array
-		    print "$field->{'tag'} $subfield $fieldname=$bib->{$fieldname}\n" if $debug;
-		    # see if this field should have trailing chars dropped
-	    	    if ($strip=$tagmap{ $field->{'tag'} }->{$subfield}->{striptrail} ) {
-			# FIXME - The next three lines can be rewritten as:
-			#	$bib =~ s/[\Q$strip\E]+$//;
-			$strip=~s//\\/; # backquote each char
-			$stripregex='[ ' . $strip . ']+$';  # remove trailing spaces also
-			$bib->{$fieldname}=~s/$stripregex//;
-			# also strip leading spaces
-			$bib->{$fieldname}=~s/^ +//;
-		    } # if strip
-		    print "Found subfield $field->{'tag'} $subfield " .
-			"$fieldname = $bib->{$fieldname}\n" if $debug;
-		} # if tagmap exists
-
-	    } # foreach subfield
-
-	    # Handle special fields and tags
-	    if ($field->{'tag'} eq '001') {
-		$bib->{controlnumber}=$field->{'indicator'};
-	    }
-	    if ($field->{'tag'} eq '015') {
-		# FIXME - I think this can be rewritten as
-		#	$field->{"subfields"}{"a"} =~ /^\s*C?(\S+)/ and
-		#		$bib->{"lccn"} = $1;
-		# This might break with invalid input, though.
-		$bib->{lccn}=$field->{'subfields'}->{'a'};
-		$bib->{lccn}=~s/^\s*//;
-		$bib->{lccn}=~s/^C//;
-		($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0];
-	    }
-
-
-		# FIXME - Fix indentation
-		if ($field->{'tag'} eq '260') {
-
-		    $publicationyear=$field->{'subfields'}->{'c'};
-		    # FIXME - "\d\d\d\d" can be rewritten as "\d{4}"
-		    if ($publicationyear=~/c(\d\d\d\d)/) {
-			$copyrightdate=$1;
-		    }
-		    if ($publicationyear=~/[^c](\d\d\d\d)/) {
-			$publicationyear=$1;
-		    } elsif ($copyrightdate) {
-			$publicationyear=$copyrightdate;
-		    } else {
-			$publicationyear=~/(\d\d\d\d)/;
-			$publicationyear=$1;
-		    }
-		}
-		if ($field->{'tag'} eq '700') {
-		    my $name=$field->{'subfields'}->{'a'};
-		    if ( defined($field->{'subfields'}->{'e'})
-		        and  $field->{'subfields'}->{'e'}=~/ill/) {
-			$illustrator=$name;
-		    } else {
-			$additionalauthors.="$name\n";
-		    }
-		}
-		if ($field->{'tag'} =~/^5/) {
-		    $notes.="$field->{'subfields'}->{'a'}\n";
-		}
-		if ($field->{'tag'} =~/65\d/) {
-		    my $sub;	# FIXME - Never used
-		    my $subject=$field->{'subfields'}->{'a'};
-		    $subject=~s/\.$//;
-		    print "Subject=$subject\n" if $debug;
-		    foreach $subjectsubfield ( 'x','y','z' ) {
-		      # FIXME - $subdivision is only used in this
-		      # loop. Make it 'my' here, rather than in the
-		      # entire function.
-		      # Ditto $subjectsubfield. Make it 'my' in the
-		      # 'foreach' statement.
-		      if ($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
-			if ( ref($subdivision) eq 'ARRAY' ) {
-			    foreach $s (@$subdivision) {
-				$s=~s/\.$//;
-				$subject.=" -- $s";
-			    } # foreach subdivision
-			} else {
-			    $subdivision=~s/\.$//;
-			    $subject.=" -- $subdivision";
-			} # if array
-		      } # if subfield exists
-		    } # foreach subfield
-		    print "Subject=$subject\n" if $debug;
-		    push @subjects, $subject;
-		} # if tag 65x
-
-
-        } # foreach field
-        # FIXME - Why not do this up in the "Handle special fields and
-        # tags" section?
-	($publicationyear	) && ($bib->{publicationyear}=$publicationyear  );
-	($copyrightdate		) && ($bib->{copyrightdate}=$copyrightdate  );
-	($additionalauthors	) && ($bib->{additionalauthors}=$additionalauthors  );
-	($illustrator		) && ($bib->{illustrator}=$illustrator  );
-	($notes			) && ($bib->{notes}=$notes  );
-	($#subjects		) && ($bib->{subject}=\@subjects  );
-		# FIXME - This doesn't look right: for an array with
-		# one element, $#subjects == 0, which is false. For an
-		# array with 0 elements, $#subjects == -1, which is
-		# true.
-
-	# Misc cleanup
-	if ($bib->{dewey}) {
-	    $bib->{dewey}=~s/\///g;	# drop any slashes
-					# FIXME - Why? Don't the
-					# slashes mean something?
-					# The Dewey code is NOT a number,
-					# it's a string.
-	}
-
-	if ($bib->{lccn}) {
-	   ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0]; # only keep first word
-	}
-
-	if ( $bib->{isbn} ) {
-	    $bib->{isbn}=~s/[^\d]*//g;	# drop non-digits
-			# FIXME - "[^\d]" can be rewritten as "\D"
-			# FIXME - Does this include the check digit? If so,
-			# it might be "X".
-	};
-
-	if ( $bib->{issn} ) {
-	    $bib->{issn}=~s/^\s*//;
-	    ($bib->{issn}) = (split(/\s+/, $bib->{issn}))[0];
-	};
-
-	if ( $bib->{'volume-number'} ) {
-	    if ($bib->{'volume-number'}=~/(\d+).*(\d+)/ ) {
-		$bib->{'volume'}=$1;
-		$bib->{'number'}=$2;
-	    } else {
-		$bib->{volume}=$bib->{'volume-number'};
-	    }
-	    delete $bib->{'volume-number'};
-	} # if volume-number
-
-    } else {
-	# FIXME - Style: this sort of error-checking should really go
-	# closer to the actual test, e.g.:
-	#	if (ref($record) ne "ARRAY")
-	#	{
-	#		die "Not an array!"
-	#	}
-	# then the rest of the code which follows can assume that the
-	# input is good, and you don't have to indent as much.
-	print "Error: extractmarcfields: input ref $record is " .
-		ref($record) . " not ARRAY. Contact sysadmin.\n";
-    }
-    print "
\n" if $debug; - - return $bib; - -} # sub extractmarcfields -#--------------------------------- - -#-------------------------- - -=item parsemarcfileformat - - @records = &parsemarcfileformat($marc_data); - -Parses the contents of a MARC file. - -C<$marc_data> is a string, the contents of a MARC file. -C<&parsemarcfileformat> parses this string into individual MARC -records and returns them. - -C<@records> is an array of references-to-hash. Each element is a MARC -record; its keys are the MARC tags. - -=cut -#' -# Parse MARC data in file format with control-character separators -# May be multiple records. -# FIXME - Is the input ever likely to be more than a few Kb? If so, it -# might be worth changing this function to take a (read-only) -# reference-to-string, to avoid unnecessary copying. -sub parsemarcfileformat { - use strict; - # Input is one big text string - my $data=shift; - # Output is list of records. Each record is list of field hashes - my @records; - - my $splitchar=chr(29); # \c] - my $splitchar2=chr(30); # \c^ - my $splitchar3=chr(31); # \c_ - my $debug=0; - my $record; - foreach $record (split(/$splitchar/, $data)) { - my @record; - my $directory=0; - my $tagcounter=0; - my %tag; - my $field; - - my $leader=substr($record,0,24); - print "
parse Leader:$leader
\n" if $debug; - push (@record, { - 'tag' => 'LDR', - 'indicator' => $leader , - } ); - - $record=substr($record,24); - foreach $field (split(/$splitchar2/, $record)) { - my %field; - my $tag; - my $indicator; - unless ($directory) { - # If we didn't already find a directory, extract one. - $directory=$field; - my $itemcounter=1; - my $counter2=0; - my $item; - my $length; - my $start; - while ($item=substr($directory,0,12)) { - # Pull out location of first field - $tag=substr($directory,0,3); - $length=substr($directory,3,4); - $start=substr($directory,7,6); - - # Bump to next directory entry - $directory=substr($directory,12); - $tag{$counter2}=$tag; - $counter2++; - } - $directory=1; - next; - } - $tag=$tag{$tagcounter}; - $tagcounter++; - $field{'tag'}=$tag; - my @subfields=split(/$splitchar3/, $field); - $indicator=$subfields[0]; - $field{'indicator'}=$indicator; - print "
parse indicator:$indicator
\n" if $debug; - my $firstline=1; - unless ($#subfields==0) { - my %subfields; - my @subfieldlist; - my $i; - for ($i=1; $i<=$#subfields; $i++) { - my $text=$subfields[$i]; - my $subfieldcode=substr($text,0,1); - my $subfield=substr($text,1); - # if this subfield already exists, do array - if ($subfields{$subfieldcode}) { - my $subfieldlist=$subfields{$subfieldcode}; - if ( ref($subfieldlist) eq 'ARRAY' ) { - # Already an array, add on to it - print "$tag Adding to array $subfieldcode -- $subfield
\n" if $debug; - @subfieldlist=@$subfieldlist; - push (@subfieldlist, $subfield); - } else { - # Change simple value to array - print "$tag Arraying $subfieldcode -- $subfield
\n" if $debug; - @subfieldlist=($subfields{$subfieldcode}, $subfield); - } - # keep new array - $subfields{$subfieldcode}=\@subfieldlist; - } else { - # subfield doesn't exist yet, keep simple value - $subfields{$subfieldcode}=$subfield; - } - } - $field{'subfields'}=\%subfields; - } - push (@record, \%field); - } # foreach field in record - push (@records, \@record); - # $counter++; - } - print "" if $debug; - return @records; -} # sub parsemarcfileformat - -#---------------------------------------------- - -=item taglabel - - $label = &taglabel($tag); - -Converts a MARC tag (a three-digit number, or "LDR") and returns a -descriptive label. - -Note that although the tag looks like a number, it is treated here as -a string. Be sure to use - - $label = &taglabel("082"); - -and not - - $label = &taglabel(082); # <-- Invalid octal number! - -=cut -#' -# FIXME - Does this function mean that %tagtext doesn't need to be -# exported? -sub taglabel { - my ($tag)=@_; - - return $tagtext{$tag}; - -} # sub taglabel - -1; - -#--------------------------------------------- -# $Log$ -# Revision 1.7 2002/10/13 08:30:38 arensb -# Deleted unused variables. -# Removed trailing whitespace. -# -# Revision 1.6 2002/10/10 04:44:28 arensb -# Added whitespace to make the POD work. -# -# Revision 1.5 2002/10/07 00:51:22 arensb -# Added POD and some comments. -# -# Revision 1.4 2002/10/05 09:53:11 arensb -# Merged with arensb-context branch: use C4::Context->dbh instead of -# &C4Connect, and generally prefer C4::Context over C4::Database. -# -# Revision 1.3.2.1 2002/10/04 02:57:38 arensb -# Removed useless "use C4::Database;" line. -# -# Revision 1.3 2002/08/14 18:12:52 tonnesen -# Added copyright statement to all .pl and .pm files -# -# Revision 1.2 2002/07/02 20:30:15 tonnesen -# Merged SimpleMarc.pm over from rel-1-2 -# -# Revision 1.1.2.4 2002/06/28 14:36:47 amillar -# Fix broken logic on illustrator vs. add'l author -# -# Revision 1.1.2.3 2002/06/26 20:54:32 tonnesen -# use warnings breaks on perl 5.005... -# -# Revision 1.1.2.2 2002/06/26 15:52:55 amillar -# Fix display of marc tag labels and indicators -# -# Revision 1.1.2.1 2002/06/26 07:27:35 amillar -# Moved acqui.simple MARC handling to new module SimpleMarc.pm -# -__END__ - -=back - -=head1 AUTHOR - -Koha Developement team - -=cut -- 2.39.5