5 package C4::SimpleMarc;
7 # Routines for handling import of MARC data into Koha db
9 # Koha library project www.koha.org
11 # Licensed under the GPL
15 # standard or CPAN modules used
23 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
25 # set the version for version checking
36 %EXPORT_TAGS = ( ); # eg: TAG => [ qw!name1 name2! ],
38 # your exported package globals go here,
39 # as well as any optionally exported functions
46 # non-exported package globals go here
47 use vars qw(@more $stuff);
49 # initalize package globals, first exported ones
54 # then the others (which are still accessible as $Some::Module::stuff)
58 # all file-scoped lexicals must be created before
59 # the functions below that use them.
61 # file-private lexicals go here
65 # here's a file-private function as a closure,
66 # callable as &$priv_func; it cannot be prototyped.
71 # make all your functions, whether exported or not;
72 #------------------------------------------------
79 '001' => 'Control number',
80 '003' => 'Control number identifier',
81 '005' => 'Date and time of latest transaction',
82 '006' => 'Fixed-length data elements -- additional material characteristics',
83 '007' => 'Physical description fixed field',
84 '008' => 'Fixed length data elements',
86 '015' => 'National library CN',
89 '024' => 'Other standard ID',
90 '035' => 'System control number',
91 '037' => 'Source of acquisition',
92 '040' => 'Cataloging source',
93 '041' => 'Language code',
94 '043' => 'Geographic area code',
95 '043' => 'Publishing country code',
96 '050' => 'Library of Congress call number',
97 '055' => 'Canadian classification number',
98 '060' => 'National Library of Medicine call number',
99 '082' => 'Dewey decimal call number',
100 '100' => 'Main entry -- Personal name',
101 '110' => 'Main entry -- Corporate name',
102 '130' => 'Main entry -- Uniform title',
103 '240' => 'Uniform title',
104 '245' => 'Title statement',
105 '246' => 'Varying form of title',
106 '250' => 'Edition statement',
107 '256' => 'Computer file characteristics',
108 '260' => 'Publication, distribution, etc.',
109 '263' => 'Projected publication date',
110 '300' => 'Physical description',
111 '306' => 'Playing time',
112 '440' => 'Series statement / Added entry -- Title',
113 '490' => 'Series statement',
114 '500' => 'General note',
115 '504' => 'Bibliography, etc. note',
116 '505' => 'Formatted contents note',
117 '508' => 'Creation/production credits note',
118 '510' => 'Citation/references note',
119 '511' => 'Participant or performer note',
120 '520' => 'Summary, etc. note',
121 '521' => 'Target audience note (ie age)',
122 '530' => 'Additional physical form available note',
123 '538' => 'System details note',
124 '586' => 'Awards note',
125 '600' => 'Subject added entry -- Personal name',
126 '610' => 'Subject added entry -- Corporate name',
127 '650' => 'Subject added entry -- Topical term',
128 '651' => 'Subject added entry -- Geographic name',
129 '656' => 'Index term -- Occupation',
130 '700' => 'Added entry -- Personal name',
131 '710' => 'Added entry -- Corporate name',
132 '730' => 'Added entry -- Uniform title',
133 '740' => 'Added entry -- Uncontrolled related/analytical title',
134 '800' => 'Series added entry -- Personal name',
135 '830' => 'Series added entry -- Uniform title',
137 '856' => 'Electronic location and access',
140 # tag, subfield, field name, repeats, striptrailingchars
142 '010'=>{'a'=>{name=> 'lccn', rpt=>0, striptrail=>' ' }},
143 '015'=>{'a'=>{name=> 'lccn', rpt=>0 }},
144 '020'=>{'a'=>{name=> 'isbn', rpt=>0 }},
145 '022'=>{'a'=>{name=> 'issn', rpt=>0 }},
146 '082'=>{'a'=>{name=> 'dewey', rpt=>0 }},
147 '100'=>{'a'=>{name=> 'author', rpt=>0, striptrail=>',:;/-' }},
148 '245'=>{'a'=>{name=> 'title', rpt=>0, striptrail=>',:;/' },
149 'b'=>{name=> 'subtitle', rpt=>0, striptrail=>',:;/' }},
150 '260'=>{'a'=>{name=> 'place', rpt=>0, striptrail=>',:;/-' },
151 'b'=>{name=> 'publisher', rpt=>0, striptrail=>',:;/-' },
152 'c'=>{name=> 'year' , rpt=>0, striptrail=>'.,:;/-' }},
153 '300'=>{'a'=>{name=> 'pages', rpt=>0, striptrail=>',:;/-' },
154 'c'=>{name=> 'size', rpt=>0, striptrail=>',:;/-' }},
155 '362'=>{'a'=>{name=> 'volume-number', rpt=>0 }},
156 '440'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' },
157 'v'=>{name=> 'volume-number',rpt=>0 }},
158 '490'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' },
159 'v'=>{name=> 'volume-number',rpt=>0 }},
160 '700'=>{'a'=>{name=> 'addtional-author-illus',rpt=>1, striptrail=>',:;/' }},
161 '5xx'=>{'a'=>{name=> 'notes', rpt=>1 }},
162 '65x'=>{'a'=>{name=> 'subject', rpt=>1, striptrail=>'.,:;/-' }},
167 sub extractmarcfields {
171 $record, # pointer to list of MARC field hashes.
172 # Example: $record->[0]->{'tag'} = '100' # Author
173 # $record->[0]->{'subfields'}->{'a'} = subfieldvalue
177 my $bib; # pointer to hash of named output fields
178 # Example: $bib->{'author'} = "Twain, Mark";
185 $subfield, # Marc subfield [a-z]
186 $fieldname, # name of field "author", "title", etc.
187 $strip, # chars to remove from end of field
188 $stripregex, # reg exp pattern
190 my ($lccn, $isbn, $issn,
191 $publicationyear, @subjects, $subject,
193 $notes, $additionalauthors, $illustrator, $copyrightdate,
194 $s, $subdivision, $subjectsubfield,
197 print "<PRE>\n" if $debug;
199 if ( ref($record) eq "ARRAY" ) {
200 foreach $field (@$record) {
202 # Check each subfield in field
203 foreach $subfield ( keys %{$field->{subfields}} ) {
204 # see if it is defined in our Marc to koha mapping table
205 if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} ) {
206 # Yes, so keep the value
207 if ( ref($field->{'subfields'}->{$subfield} ) eq 'ARRAY' ) {
208 # if it was an array, just keep first element.
209 $bib->{$fieldname}=$field->{'subfields'}->{$subfield}[0];
211 $bib->{$fieldname}=$field->{'subfields'}->{$subfield};
213 print "$field->{'tag'} $subfield $fieldname=$bib->{$fieldname}\n" if $debug;
214 # see if this field should have trailing chars dropped
215 if ($strip=$tagmap{ $field->{'tag'} }->{$subfield}->{striptrail} ) {
216 $strip=~s//\\/; # backquote each char
217 $stripregex='[ ' . $strip . ']+$'; # remove trailing spaces also
218 $bib->{$fieldname}=~s/$stripregex//;
219 # also strip leading spaces
220 $bib->{$fieldname}=~s/^ +//;
222 print "Found subfield $field->{'tag'} $subfield " .
223 "$fieldname = $bib->{$fieldname}\n" if $debug;
229 if ($field->{'tag'} eq '001') {
230 $bib->{controlnumber}=$field->{'indicator'};
232 if ($field->{'tag'} eq '015') {
233 $bib->{lccn}=$field->{'subfields'}->{'a'};
234 $bib->{lccn}=~s/^\s*//;
235 $bib->{lccn}=~s/^C//;
236 ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0];
240 if ($field->{'tag'} eq '260') {
242 $publicationyear=$field->{'subfields'}->{'c'};
243 if ($publicationyear=~/c(\d\d\d\d)/) {
246 if ($publicationyear=~/[^c](\d\d\d\d)/) {
248 } elsif ($copyrightdate) {
249 $publicationyear=$copyrightdate;
251 $publicationyear=~/(\d\d\d\d)/;
255 if ($field->{'tag'} eq '700') {
256 my $name=$field->{'subfields'}->{'a'};
257 if ( defined($field->{'subfields'}->{'e'})
258 and $field->{'subfields'}->{'e'}=~/ill/) {
261 $additionalauthors.="$name\n";
264 if ($field->{'tag'} =~/^5/) {
265 $notes.="$field->{'subfields'}->{'a'}\n";
267 if ($field->{'tag'} =~/65\d/) {
269 my $subject=$field->{'subfields'}->{'a'};
271 print "Subject=$subject\n" if $debug;
272 foreach $subjectsubfield ( 'x','y','z' ) {
273 if ($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
274 if ( ref($subdivision) eq 'ARRAY' ) {
275 foreach $s (@$subdivision) {
278 } # foreach subdivision
280 $subdivision=~s/\.$//;
281 $subject.=" -- $subdivision";
283 } # if subfield exists
285 print "Subject=$subject\n" if $debug;
286 push @subjects, $subject;
291 ($publicationyear ) && ($bib->{publicationyear}=$publicationyear );
292 ($copyrightdate ) && ($bib->{copyrightdate}=$copyrightdate );
293 ($additionalauthors ) && ($bib->{additionalauthors}=$additionalauthors );
294 ($illustrator ) && ($bib->{illustrator}=$illustrator );
295 ($notes ) && ($bib->{notes}=$notes );
296 ($#subjects ) && ($bib->{subject}=\@subjects );
300 $bib->{dewey}=~s/\///g; # drop any slashes
304 ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0]; # only keep first word
307 if ( $bib->{isbn} ) {
308 $bib->{isbn}=~s/[^\d]*//g; # drop non-digits
311 if ( $bib->{issn} ) {
312 $bib->{issn}=~s/^\s*//;
313 ($bib->{issn}) = (split(/\s+/, $bib->{issn}))[0];
316 if ( $bib->{'volume-number'} ) {
317 if ($bib->{'volume-number'}=~/(\d+).*(\d+)/ ) {
321 $bib->{volume}=$bib->{'volume-number'};
323 delete $bib->{'volume-number'};
327 print "Error: extractmarcfields: input ref $record is " .
328 ref($record) . " not ARRAY. Contact sysadmin.\n";
330 print "</PRE>\n" if $debug;
334 } # sub extractmarcfields
335 #---------------------------------
337 #--------------------------
338 # Parse MARC data in file format with control-character separators
339 # May be multiple records.
340 sub parsemarcfileformat {
342 # Input is one big text string
344 # Output is list of records. Each record is list of field hashes
347 my $splitchar=chr(29);
348 my $splitchar2=chr(30);
349 my $splitchar3=chr(31);
352 foreach $record (split(/$splitchar/, $data)) {
359 my $leader=substr($record,0,24);
360 print "<pre>parse Leader:$leader</pre>\n" if $debug;
363 'indicator' => $leader ,
366 $record=substr($record,24);
367 foreach $field (split(/$splitchar2/, $record)) {
371 unless ($directory) {
372 # If we didn't already find a directory, extract one.
379 while ($item=substr($directory,0,12)) {
380 # Pull out location of first field
381 $tag=substr($directory,0,3);
382 $length=substr($directory,3,4);
383 $start=substr($directory,7,6);
385 # Bump to next directory entry
386 $directory=substr($directory,12);
387 $tag{$counter2}=$tag;
393 $tag=$tag{$tagcounter};
396 my @subfields=split(/$splitchar3/, $field);
397 $indicator=$subfields[0];
398 $field{'indicator'}=$indicator;
399 print "<pre>parse indicator:$indicator</pre>\n" if $debug;
401 unless ($#subfields==0) {
405 for ($i=1; $i<=$#subfields; $i++) {
406 my $text=$subfields[$i];
407 my $subfieldcode=substr($text,0,1);
408 my $subfield=substr($text,1);
409 # if this subfield already exists, do array
410 if ($subfields{$subfieldcode}) {
411 my $subfieldlist=$subfields{$subfieldcode};
412 if ( ref($subfieldlist) eq 'ARRAY' ) {
413 # Already an array, add on to it
414 print "$tag Adding to array $subfieldcode -- $subfield<br>\n" if $debug;
415 @subfieldlist=@$subfieldlist;
416 push (@subfieldlist, $subfield);
418 # Change simple value to array
419 print "$tag Arraying $subfieldcode -- $subfield<br>\n" if $debug;
420 @subfieldlist=($subfields{$subfieldcode}, $subfield);
423 $subfields{$subfieldcode}=\@subfieldlist;
425 # subfield doesn't exist yet, keep simple value
426 $subfields{$subfieldcode}=$subfield;
429 $field{'subfields'}=\%subfields;
431 push (@record, \%field);
432 } # foreach field in record
433 push (@records, \@record);
436 print "</pre>" if $debug;
438 } # sub parsemarcfileformat
440 #----------------------------------------------
444 return $tagtext{$tag};
448 #---------------------------------------------
450 # Revision 1.2 2002/07/02 20:30:15 tonnesen
451 # Merged SimpleMarc.pm over from rel-1-2
453 # Revision 1.1.2.4 2002/06/28 14:36:47 amillar
454 # Fix broken logic on illustrator vs. add'l author
456 # Revision 1.1.2.3 2002/06/26 20:54:32 tonnesen
457 # use warnings breaks on perl 5.005...
459 # Revision 1.1.2.2 2002/06/26 15:52:55 amillar
460 # Fix display of marc tag labels and indicators
462 # Revision 1.1.2.1 2002/06/26 07:27:35 amillar
463 # Moved acqui.simple MARC handling to new module SimpleMarc.pm