From dcfb3b835ca59b0dc3744ed35d11db85f41aaa06 Mon Sep 17 00:00:00 2001
From: tonnesen <tonnesen>
Date: Tue, 2 Jul 2002 20:30:15 +0000
Subject: [PATCH] Merged SimpleMarc.pm over from rel-1-2

---
 C4/SimpleMarc.pm | 464 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 464 insertions(+)
 create mode 100755 C4/SimpleMarc.pm
diff --git a/C4/SimpleMarc.pm b/C4/SimpleMarc.pm
new file mode 100755
index 0000000000..65b7798e2e
--- /dev/null
+++ b/C4/SimpleMarc.pm
@@ -0,0 +1,464 @@
+#!/usr/bin/perl
+
+# $Id$
+
+package C4::SimpleMarc;
+
+# Routines for handling import of MARC data into Koha db
+
+# Koha library project  www.koha.org
+
+# Licensed under the GPL
+
+use strict;
+
+# standard or CPAN modules used
+use DBI;
+
+# Koha modules used
+use C4::Database;
+
+require Exporter;
+
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+
+# set the version for version checking
+$VERSION = 0.01;
+
+@ISA = qw(Exporter);
+@EXPORT = qw(
+	&extractmarcfields 
+	&parsemarcfileformat 
+	&taglabel
+	%tagtext
+	%tagmap
+);
+%EXPORT_TAGS = ( );     # eg: TAG => [ qw!name1 name2! ],
+
+# your exported package globals go here,
+# as well as any optionally exported functions
+
+@EXPORT_OK   = qw(
+	%tagtext
+	%tagmap
+);
+
+# non-exported package globals go here
+use vars qw(@more $stuff);
+
+# initalize package globals, first exported ones
+
+my $Var1   = '';
+my %Hashit = ();
+
+# then the others (which are still accessible as $Some::Module::stuff)
+my $stuff  = '';
+my @more   = ();
+
+# all file-scoped lexicals must be created before
+# the functions below that use them.
+
+# file-private lexicals go here
+my $priv_var    = '';
+my %secret_hash = ();
+
+# here's a file-private function as a closure,
+# callable as &$priv_func;  it cannot be prototyped.
+my $priv_func = sub {
+  # stuff goes here.
+  };
+  
+# make all your functions, whether exported or not;
+#------------------------------------------------
+
+#------------------
+# Constants
+
+my %tagtext = (
+    'LDR' => 'Leader',
+    '001' => 'Control number',
+    '003' => 'Control number identifier',
+    '005' => 'Date and time of latest transaction',
+    '006' => 'Fixed-length data elements -- additional material characteristics',
+    '007' => 'Physical description fixed field',
+    '008' => 'Fixed length data elements',
+    '010' => 'LCCN',
+    '015' => 'National library CN',
+    '020' => 'ISBN',
+    '022' => 'ISSN',
+    '024' => 'Other standard ID',
+    '035' => 'System control number',
+    '037' => 'Source of acquisition',
+    '040' => 'Cataloging source',
+    '041' => 'Language code',
+    '043' => 'Geographic area code',
+    '043' => 'Publishing country code',
+    '050' => 'Library of Congress call number',
+    '055' => 'Canadian classification number',
+    '060' => 'National Library of Medicine call number',
+    '082' => 'Dewey decimal call number',
+    '100' => 'Main entry -- Personal name',
+    '110' => 'Main entry -- Corporate name',
+    '130' => 'Main entry -- Uniform title',
+    '240' => 'Uniform title',
+    '245' => 'Title statement',
+    '246' => 'Varying form of title',
+    '250' => 'Edition statement',
+    '256' => 'Computer file characteristics',
+    '260' => 'Publication, distribution, etc.',
+    '263' => 'Projected publication date',
+    '300' => 'Physical description',
+    '306' => 'Playing time',
+    '440' => 'Series statement / Added entry -- Title',
+    '490' => 'Series statement',
+    '500' => 'General note',
+    '504' => 'Bibliography, etc. note',
+    '505' => 'Formatted contents note',
+    '508' => 'Creation/production credits note',
+    '510' => 'Citation/references note',
+    '511' => 'Participant or performer note',
+    '520' => 'Summary, etc. note',
+    '521' => 'Target audience note (ie age)',
+    '530' => 'Additional physical form available note',
+    '538' => 'System details note',
+    '586' => 'Awards note',
+    '600' => 'Subject added entry -- Personal name',
+    '610' => 'Subject added entry -- Corporate name',
+    '650' => 'Subject added entry -- Topical term',
+    '651' => 'Subject added entry -- Geographic name',
+    '656' => 'Index term -- Occupation',
+    '700' => 'Added entry -- Personal name',
+    '710' => 'Added entry -- Corporate name',
+    '730' => 'Added entry -- Uniform title',
+    '740' => 'Added entry -- Uncontrolled related/analytical title',
+    '800' => 'Series added entry -- Personal name',
+    '830' => 'Series added entry -- Uniform title',
+    '852' => 'Location',
+    '856' => 'Electronic location and access',
+);
+
+# tag, subfield, field name, repeats, striptrailingchars
+my %tagmap=(
+    '010'=>{'a'=>{name=> 'lccn',	rpt=>0, striptrail=>' ' 	}},
+    '015'=>{'a'=>{name=> 'lccn',	rpt=>0	}},
+    '020'=>{'a'=>{name=> 'isbn',	rpt=>0	}},
+    '022'=>{'a'=>{name=> 'issn',	rpt=>0	}},
+    '082'=>{'a'=>{name=> 'dewey',	rpt=>0	}},
+    '100'=>{'a'=>{name=> 'author',	rpt=>0, striptrail=>',:;/-'	}},
+    '245'=>{'a'=>{name=> 'title',	rpt=>0, striptrail=>',:;/'	},
+            'b'=>{name=> 'subtitle',	rpt=>0, striptrail=>',:;/'	}},
+    '260'=>{'a'=>{name=> 'place',	rpt=>0, striptrail=>',:;/-'	},
+            'b'=>{name=> 'publisher',	rpt=>0, striptrail=>',:;/-'	},
+            'c'=>{name=> 'year' ,	rpt=>0, striptrail=>'.,:;/-'	}},
+    '300'=>{'a'=>{name=> 'pages',	rpt=>0, striptrail=>',:;/-'	},
+            'c'=>{name=> 'size',	rpt=>0, striptrail=>',:;/-'	}},
+    '362'=>{'a'=>{name=> 'volume-number',	rpt=>0	}},
+    '440'=>{'a'=>{name=> 'seriestitle',	rpt=>0, striptrail=>',:;/'	},
+            'v'=>{name=> 'volume-number',rpt=>0	}},
+    '490'=>{'a'=>{name=> 'seriestitle',	rpt=>0, striptrail=>',:;/'	},
+            'v'=>{name=> 'volume-number',rpt=>0	}},
+    '700'=>{'a'=>{name=> 'addtional-author-illus',rpt=>1, striptrail=>',:;/'	}},
+    '5xx'=>{'a'=>{name=> 'notes',	rpt=>1	}},
+    '65x'=>{'a'=>{name=> 'subject',	rpt=>1, striptrail=>'.,:;/-'	}},
+);
+
+
+#------------------
+sub extractmarcfields {
+    use strict;
+    # input
+    my (
+	$record,	# pointer to list of MARC field hashes.
+			# Example: $record->[0]->{'tag'} = '100' # Author
+			# 	$record->[0]->{'subfields'}->{'a'} = subfieldvalue
+    )=@_;
+
+    # return 
+    my $bib;		# pointer to hash of named output fields
+			# Example: $bib->{'author'} = "Twain, Mark";
+
+    my $debug=0;
+
+    my (
+	$field, 	# hash ref
+	$value, 
+	$subfield,	# Marc subfield [a-z]
+	$fieldname,	# name of field "author", "title", etc.
+	$strip,		# chars to remove from end of field
+	$stripregex,	# reg exp pattern
+    );
+    my ($lccn, $isbn, $issn,    
+	$publicationyear, @subjects, $subject,
+	$controlnumber, 
+	$notes, $additionalauthors, $illustrator, $copyrightdate, 
+	$s, $subdivision, $subjectsubfield,
+    );
+
+    print "<PRE>\n" if $debug;
+
+    if ( ref($record) eq "ARRAY" ) {
+        foreach $field (@$record) {
+
+	    # Check each subfield in field
+	    foreach $subfield ( keys %{$field->{subfields}} ) {
+		# see if it is defined in our Marc to koha mapping table
+	    	if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} ) {
+		    # Yes, so keep the value
+		    if ( ref($field->{'subfields'}->{$subfield} ) eq 'ARRAY' ) {
+		        # if it was an array, just keep first element.
+		        $bib->{$fieldname}=$field->{'subfields'}->{$subfield}[0];
+		    } else {
+		        $bib->{$fieldname}=$field->{'subfields'}->{$subfield};
+		    } # if array
+		    print "$field->{'tag'} $subfield $fieldname=$bib->{$fieldname}\n" if $debug;
+		    # see if this field should have trailing chars dropped
+	    	    if ($strip=$tagmap{ $field->{'tag'} }->{$subfield}->{striptrail} ) {
+			$strip=~s//\\/; # backquote each char
+			$stripregex='[ ' . $strip . ']+$';  # remove trailing spaces also
+			$bib->{$fieldname}=~s/$stripregex//;
+			# also strip leading spaces
+			$bib->{$fieldname}=~s/^ +//;
+		    } # if strip
+		    print "Found subfield $field->{'tag'} $subfield " .
+			"$fieldname = $bib->{$fieldname}\n" if $debug;
+		} # if tagmap exists
+
+	    } # foreach subfield
+
+
+	    if ($field->{'tag'} eq '001') {
+		$bib->{controlnumber}=$field->{'indicator'};
+	    }
+	    if ($field->{'tag'} eq '015') {
+		$bib->{lccn}=$field->{'subfields'}->{'a'};
+		$bib->{lccn}=~s/^\s*//;
+		$bib->{lccn}=~s/^C//;
+		($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0];
+	    }
+
+
+		if ($field->{'tag'} eq '260') {
+
+		    $publicationyear=$field->{'subfields'}->{'c'};
+		    if ($publicationyear=~/c(\d\d\d\d)/) {
+			$copyrightdate=$1;
+		    }
+		    if ($publicationyear=~/[^c](\d\d\d\d)/) {
+			$publicationyear=$1;
+		    } elsif ($copyrightdate) {
+			$publicationyear=$copyrightdate;
+		    } else {
+			$publicationyear=~/(\d\d\d\d)/;
+			$publicationyear=$1;
+		    }
+		}
+		if ($field->{'tag'} eq '700') {
+		    my $name=$field->{'subfields'}->{'a'};
+		    if ( defined($field->{'subfields'}->{'e'}) 
+		        and  $field->{'subfields'}->{'e'}=~/ill/) {
+			$illustrator=$name;
+		    } else {
+			$additionalauthors.="$name\n";
+		    }
+		}
+		if ($field->{'tag'} =~/^5/) {
+		    $notes.="$field->{'subfields'}->{'a'}\n";
+		}
+		if ($field->{'tag'} =~/65\d/) {
+		    my $sub;
+		    my $subject=$field->{'subfields'}->{'a'};
+		    $subject=~s/\.$//;
+		    print "Subject=$subject\n" if $debug;
+		    foreach $subjectsubfield ( 'x','y','z' ) {
+		      if ($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
+			if ( ref($subdivision) eq 'ARRAY' ) {
+			    foreach $s (@$subdivision) {
+				$s=~s/\.$//;
+				$subject.=" -- $s";
+			    } # foreach subdivision
+			} else {
+			    $subdivision=~s/\.$//;
+			    $subject.=" -- $subdivision";
+			} # if array
+		      } # if subfield exists
+		    } # foreach subfield
+		    print "Subject=$subject\n" if $debug;
+		    push @subjects, $subject;
+		} # if tag 65x
+
+
+        } # foreach field
+	($publicationyear	) && ($bib->{publicationyear}=$publicationyear  );
+	($copyrightdate		) && ($bib->{copyrightdate}=$copyrightdate  );
+	($additionalauthors	) && ($bib->{additionalauthors}=$additionalauthors  );
+	($illustrator		) && ($bib->{illustrator}=$illustrator  );
+	($notes			) && ($bib->{notes}=$notes  );
+	($#subjects		) && ($bib->{subject}=\@subjects  );
+
+	# Misc cleanup
+	if ($bib->{dewey}) {
+	    $bib->{dewey}=~s/\///g;	# drop any slashes
+	}
+
+	if ($bib->{lccn}) {
+	   ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0]; # only keep first word
+	}
+
+	if ( $bib->{isbn} ) {
+	    $bib->{isbn}=~s/[^\d]*//g;	# drop non-digits
+	};
+
+	if ( $bib->{issn} ) {
+	    $bib->{issn}=~s/^\s*//;
+	    ($bib->{issn}) = (split(/\s+/, $bib->{issn}))[0];
+	};
+
+	if ( $bib->{'volume-number'} ) {
+	    if ($bib->{'volume-number'}=~/(\d+).*(\d+)/ ) {
+		$bib->{'volume'}=$1;
+		$bib->{'number'}=$2;
+	    } else {
+		$bib->{volume}=$bib->{'volume-number'};
+	    }
+	    delete $bib->{'volume-number'};
+	} # if volume-number
+
+    } else {
+	print "Error: extractmarcfields: input ref $record is " .
+		ref($record) . " not ARRAY. Contact sysadmin.\n";
+    }
+    print "</PRE>\n" if $debug;
+
+    return $bib;
+
+} # sub extractmarcfields
+#---------------------------------
+
+#--------------------------
+# Parse MARC data in file format with control-character separators
+#   May be multiple records.
+sub parsemarcfileformat {
+    use strict;
+    # Input is one big text string
+    my $data=shift;
+    # Output is list of records.  Each record is list of field hashes
+    my @records;
+
+    my $splitchar=chr(29);
+    my $splitchar2=chr(30);
+    my $splitchar3=chr(31);
+    my $debug=0;
+    my $record;
+    foreach $record (split(/$splitchar/, $data)) {
+	my @record;
+	my $directory=0;
+	my $tagcounter=0;
+	my %tag;
+	my $field;
+
+	my $leader=substr($record,0,24);
+	print "<pre>parse Leader:$leader</pre>\n" if $debug;
+	push (@record, {
+		'tag' => 'LDR',
+		'indicator' => $leader ,
+	} );
+
+	$record=substr($record,24);
+	foreach $field (split(/$splitchar2/, $record)) {
+	    my %field;
+	    my $tag;
+	    my $indicator;
+	    unless ($directory) {
+		# If we didn't already find a directory, extract one.
+		$directory=$field;
+		my $itemcounter=1;
+		my $counter2=0;
+		my $item;
+		my $length;
+		my $start;
+		while ($item=substr($directory,0,12)) {
+		    # Pull out location of first field
+		    $tag=substr($directory,0,3);
+		    $length=substr($directory,3,4);
+		    $start=substr($directory,7,6);
+
+		    # Bump to next directory entry
+		    $directory=substr($directory,12);
+		    $tag{$counter2}=$tag;
+		    $counter2++;
+		}
+		$directory=1;
+		next;
+	    }
+	    $tag=$tag{$tagcounter};
+	    $tagcounter++;
+	    $field{'tag'}=$tag;
+	    my @subfields=split(/$splitchar3/, $field);
+	    $indicator=$subfields[0];
+	    $field{'indicator'}=$indicator;
+	    print "<pre>parse indicator:$indicator</pre>\n" if $debug;
+	    my $firstline=1;
+	    unless ($#subfields==0) {
+		my %subfields;
+		my @subfieldlist;
+		my $i;
+		for ($i=1; $i<=$#subfields; $i++) {
+		    my $text=$subfields[$i];
+		    my $subfieldcode=substr($text,0,1);
+		    my $subfield=substr($text,1);
+		    # if this subfield already exists, do array
+		    if ($subfields{$subfieldcode}) {
+			my $subfieldlist=$subfields{$subfieldcode};
+			if ( ref($subfieldlist) eq 'ARRAY' ) {
+                            # Already an array, add on to it
+			    print "$tag Adding to array $subfieldcode -- $subfield<br>\n" if $debug;
+			    @subfieldlist=@$subfieldlist;
+			    push (@subfieldlist, $subfield);
+			} else {
+                            # Change simple value to array
+			    print "$tag Arraying $subfieldcode -- $subfield<br>\n" if $debug;
+			    @subfieldlist=($subfields{$subfieldcode}, $subfield);
+			}
+			# keep new array
+			$subfields{$subfieldcode}=\@subfieldlist;
+		    } else {
+			# subfield doesn't exist yet, keep simple value
+			$subfields{$subfieldcode}=$subfield;
+		    }
+		}
+		$field{'subfields'}=\%subfields;
+	    }
+	    push (@record, \%field);
+	} # foreach field in record
+	push (@records, \@record);
+	# $counter++;
+    }
+    print "</pre>" if $debug;
+    return @records;
+} # sub parsemarcfileformat
+
+#----------------------------------------------
+sub taglabel {
+    my ($tag)=@_;
+
+    return $tagtext{$tag};
+
+} # sub taglabel
+
+#---------------------------------------------
+# $Log$
+# Revision 1.2  2002/07/02 20:30:15  tonnesen
+# Merged SimpleMarc.pm over from rel-1-2
+#
+# Revision 1.1.2.4  2002/06/28 14:36:47  amillar
+# Fix broken logic on illustrator vs. add'l author
+#
+# Revision 1.1.2.3  2002/06/26 20:54:32  tonnesen
+# use warnings breaks on perl 5.005...
+#
+# Revision 1.1.2.2  2002/06/26 15:52:55  amillar
+# Fix display of marc tag labels and indicators
+#
+# Revision 1.1.2.1  2002/06/26 07:27:35  amillar
+# Moved acqui.simple MARC handling to new module SimpleMarc.pm
+#
-- 
2.20.1