5 package C4::SimpleMarc;
7 # Routines for handling import of MARC data into Koha db
9 # Koha library project www.koha.org
11 # Licensed under the GPL
14 # Copyright 2000-2002 Katipo Communications
16 # This file is part of Koha.
18 # Koha is free software; you can redistribute it and/or modify it under the
19 # terms of the GNU General Public License as published by the Free Software
20 # Foundation; either version 2 of the License, or (at your option) any later
23 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
24 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
25 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
27 # You should have received a copy of the GNU General Public License along with
28 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
29 # Suite 330, Boston, MA 02111-1307 USA
33 # standard or CPAN modules used
41 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
43 # set the version for version checking
54 %EXPORT_TAGS = ( ); # eg: TAG => [ qw!name1 name2! ],
56 # your exported package globals go here,
57 # as well as any optionally exported functions
64 # non-exported package globals go here
65 use vars qw(@more $stuff);
67 # initalize package globals, first exported ones
72 # then the others (which are still accessible as $Some::Module::stuff)
76 # all file-scoped lexicals must be created before
77 # the functions below that use them.
79 # file-private lexicals go here
83 # here's a file-private function as a closure,
84 # callable as &$priv_func; it cannot be prototyped.
89 # make all your functions, whether exported or not;
90 #------------------------------------------------
97 '001' => 'Control number',
98 '003' => 'Control number identifier',
99 '005' => 'Date and time of latest transaction',
100 '006' => 'Fixed-length data elements -- additional material characteristics',
101 '007' => 'Physical description fixed field',
102 '008' => 'Fixed length data elements',
104 '015' => 'National library CN',
107 '024' => 'Other standard ID',
108 '035' => 'System control number',
109 '037' => 'Source of acquisition',
110 '040' => 'Cataloging source',
111 '041' => 'Language code',
112 '043' => 'Geographic area code',
113 '043' => 'Publishing country code',
114 '050' => 'Library of Congress call number',
115 '055' => 'Canadian classification number',
116 '060' => 'National Library of Medicine call number',
117 '082' => 'Dewey decimal call number',
118 '100' => 'Main entry -- Personal name',
119 '110' => 'Main entry -- Corporate name',
120 '130' => 'Main entry -- Uniform title',
121 '240' => 'Uniform title',
122 '245' => 'Title statement',
123 '246' => 'Varying form of title',
124 '250' => 'Edition statement',
125 '256' => 'Computer file characteristics',
126 '260' => 'Publication, distribution, etc.',
127 '263' => 'Projected publication date',
128 '300' => 'Physical description',
129 '306' => 'Playing time',
130 '440' => 'Series statement / Added entry -- Title',
131 '490' => 'Series statement',
132 '500' => 'General note',
133 '504' => 'Bibliography, etc. note',
134 '505' => 'Formatted contents note',
135 '508' => 'Creation/production credits note',
136 '510' => 'Citation/references note',
137 '511' => 'Participant or performer note',
138 '520' => 'Summary, etc. note',
139 '521' => 'Target audience note (ie age)',
140 '530' => 'Additional physical form available note',
141 '538' => 'System details note',
142 '586' => 'Awards note',
143 '600' => 'Subject added entry -- Personal name',
144 '610' => 'Subject added entry -- Corporate name',
145 '650' => 'Subject added entry -- Topical term',
146 '651' => 'Subject added entry -- Geographic name',
147 '656' => 'Index term -- Occupation',
148 '700' => 'Added entry -- Personal name',
149 '710' => 'Added entry -- Corporate name',
150 '730' => 'Added entry -- Uniform title',
151 '740' => 'Added entry -- Uncontrolled related/analytical title',
152 '800' => 'Series added entry -- Personal name',
153 '830' => 'Series added entry -- Uniform title',
155 '856' => 'Electronic location and access',
158 # tag, subfield, field name, repeats, striptrailingchars
160 '010'=>{'a'=>{name=> 'lccn', rpt=>0, striptrail=>' ' }},
161 '015'=>{'a'=>{name=> 'lccn', rpt=>0 }},
162 '020'=>{'a'=>{name=> 'isbn', rpt=>0 }},
163 '022'=>{'a'=>{name=> 'issn', rpt=>0 }},
164 '082'=>{'a'=>{name=> 'dewey', rpt=>0 }},
165 '100'=>{'a'=>{name=> 'author', rpt=>0, striptrail=>',:;/-' }},
166 '245'=>{'a'=>{name=> 'title', rpt=>0, striptrail=>',:;/' },
167 'b'=>{name=> 'subtitle', rpt=>0, striptrail=>',:;/' }},
168 '260'=>{'a'=>{name=> 'place', rpt=>0, striptrail=>',:;/-' },
169 'b'=>{name=> 'publisher', rpt=>0, striptrail=>',:;/-' },
170 'c'=>{name=> 'year' , rpt=>0, striptrail=>'.,:;/-' }},
171 '300'=>{'a'=>{name=> 'pages', rpt=>0, striptrail=>',:;/-' },
172 'c'=>{name=> 'size', rpt=>0, striptrail=>',:;/-' }},
173 '362'=>{'a'=>{name=> 'volume-number', rpt=>0 }},
174 '440'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' },
175 'v'=>{name=> 'volume-number',rpt=>0 }},
176 '490'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' },
177 'v'=>{name=> 'volume-number',rpt=>0 }},
178 '700'=>{'a'=>{name=> 'addtional-author-illus',rpt=>1, striptrail=>',:;/' }},
179 '5xx'=>{'a'=>{name=> 'notes', rpt=>1 }},
180 '65x'=>{'a'=>{name=> 'subject', rpt=>1, striptrail=>'.,:;/-' }},
185 sub extractmarcfields {
189 $record, # pointer to list of MARC field hashes.
190 # Example: $record->[0]->{'tag'} = '100' # Author
191 # $record->[0]->{'subfields'}->{'a'} = subfieldvalue
195 my $bib; # pointer to hash of named output fields
196 # Example: $bib->{'author'} = "Twain, Mark";
203 $subfield, # Marc subfield [a-z]
204 $fieldname, # name of field "author", "title", etc.
205 $strip, # chars to remove from end of field
206 $stripregex, # reg exp pattern
208 my ($lccn, $isbn, $issn,
209 $publicationyear, @subjects, $subject,
211 $notes, $additionalauthors, $illustrator, $copyrightdate,
212 $s, $subdivision, $subjectsubfield,
215 print "<PRE>\n" if $debug;
217 if ( ref($record) eq "ARRAY" ) {
218 foreach $field (@$record) {
220 # Check each subfield in field
221 foreach $subfield ( keys %{$field->{subfields}} ) {
222 # see if it is defined in our Marc to koha mapping table
223 if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} ) {
224 # Yes, so keep the value
225 if ( ref($field->{'subfields'}->{$subfield} ) eq 'ARRAY' ) {
226 # if it was an array, just keep first element.
227 $bib->{$fieldname}=$field->{'subfields'}->{$subfield}[0];
229 $bib->{$fieldname}=$field->{'subfields'}->{$subfield};
231 print "$field->{'tag'} $subfield $fieldname=$bib->{$fieldname}\n" if $debug;
232 # see if this field should have trailing chars dropped
233 if ($strip=$tagmap{ $field->{'tag'} }->{$subfield}->{striptrail} ) {
234 $strip=~s//\\/; # backquote each char
235 $stripregex='[ ' . $strip . ']+$'; # remove trailing spaces also
236 $bib->{$fieldname}=~s/$stripregex//;
237 # also strip leading spaces
238 $bib->{$fieldname}=~s/^ +//;
240 print "Found subfield $field->{'tag'} $subfield " .
241 "$fieldname = $bib->{$fieldname}\n" if $debug;
247 if ($field->{'tag'} eq '001') {
248 $bib->{controlnumber}=$field->{'indicator'};
250 if ($field->{'tag'} eq '015') {
251 $bib->{lccn}=$field->{'subfields'}->{'a'};
252 $bib->{lccn}=~s/^\s*//;
253 $bib->{lccn}=~s/^C//;
254 ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0];
258 if ($field->{'tag'} eq '260') {
260 $publicationyear=$field->{'subfields'}->{'c'};
261 if ($publicationyear=~/c(\d\d\d\d)/) {
264 if ($publicationyear=~/[^c](\d\d\d\d)/) {
266 } elsif ($copyrightdate) {
267 $publicationyear=$copyrightdate;
269 $publicationyear=~/(\d\d\d\d)/;
273 if ($field->{'tag'} eq '700') {
274 my $name=$field->{'subfields'}->{'a'};
275 if ( defined($field->{'subfields'}->{'e'})
276 and $field->{'subfields'}->{'e'}=~/ill/) {
279 $additionalauthors.="$name\n";
282 if ($field->{'tag'} =~/^5/) {
283 $notes.="$field->{'subfields'}->{'a'}\n";
285 if ($field->{'tag'} =~/65\d/) {
287 my $subject=$field->{'subfields'}->{'a'};
289 print "Subject=$subject\n" if $debug;
290 foreach $subjectsubfield ( 'x','y','z' ) {
291 if ($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
292 if ( ref($subdivision) eq 'ARRAY' ) {
293 foreach $s (@$subdivision) {
296 } # foreach subdivision
298 $subdivision=~s/\.$//;
299 $subject.=" -- $subdivision";
301 } # if subfield exists
303 print "Subject=$subject\n" if $debug;
304 push @subjects, $subject;
309 ($publicationyear ) && ($bib->{publicationyear}=$publicationyear );
310 ($copyrightdate ) && ($bib->{copyrightdate}=$copyrightdate );
311 ($additionalauthors ) && ($bib->{additionalauthors}=$additionalauthors );
312 ($illustrator ) && ($bib->{illustrator}=$illustrator );
313 ($notes ) && ($bib->{notes}=$notes );
314 ($#subjects ) && ($bib->{subject}=\@subjects );
318 $bib->{dewey}=~s/\///g; # drop any slashes
322 ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0]; # only keep first word
325 if ( $bib->{isbn} ) {
326 $bib->{isbn}=~s/[^\d]*//g; # drop non-digits
329 if ( $bib->{issn} ) {
330 $bib->{issn}=~s/^\s*//;
331 ($bib->{issn}) = (split(/\s+/, $bib->{issn}))[0];
334 if ( $bib->{'volume-number'} ) {
335 if ($bib->{'volume-number'}=~/(\d+).*(\d+)/ ) {
339 $bib->{volume}=$bib->{'volume-number'};
341 delete $bib->{'volume-number'};
345 print "Error: extractmarcfields: input ref $record is " .
346 ref($record) . " not ARRAY. Contact sysadmin.\n";
348 print "</PRE>\n" if $debug;
352 } # sub extractmarcfields
353 #---------------------------------
355 #--------------------------
356 # Parse MARC data in file format with control-character separators
357 # May be multiple records.
358 sub parsemarcfileformat {
360 # Input is one big text string
362 # Output is list of records. Each record is list of field hashes
365 my $splitchar=chr(29);
366 my $splitchar2=chr(30);
367 my $splitchar3=chr(31);
370 foreach $record (split(/$splitchar/, $data)) {
377 my $leader=substr($record,0,24);
378 print "<pre>parse Leader:$leader</pre>\n" if $debug;
381 'indicator' => $leader ,
384 $record=substr($record,24);
385 foreach $field (split(/$splitchar2/, $record)) {
389 unless ($directory) {
390 # If we didn't already find a directory, extract one.
397 while ($item=substr($directory,0,12)) {
398 # Pull out location of first field
399 $tag=substr($directory,0,3);
400 $length=substr($directory,3,4);
401 $start=substr($directory,7,6);
403 # Bump to next directory entry
404 $directory=substr($directory,12);
405 $tag{$counter2}=$tag;
411 $tag=$tag{$tagcounter};
414 my @subfields=split(/$splitchar3/, $field);
415 $indicator=$subfields[0];
416 $field{'indicator'}=$indicator;
417 print "<pre>parse indicator:$indicator</pre>\n" if $debug;
419 unless ($#subfields==0) {
423 for ($i=1; $i<=$#subfields; $i++) {
424 my $text=$subfields[$i];
425 my $subfieldcode=substr($text,0,1);
426 my $subfield=substr($text,1);
427 # if this subfield already exists, do array
428 if ($subfields{$subfieldcode}) {
429 my $subfieldlist=$subfields{$subfieldcode};
430 if ( ref($subfieldlist) eq 'ARRAY' ) {
431 # Already an array, add on to it
432 print "$tag Adding to array $subfieldcode -- $subfield<br>\n" if $debug;
433 @subfieldlist=@$subfieldlist;
434 push (@subfieldlist, $subfield);
436 # Change simple value to array
437 print "$tag Arraying $subfieldcode -- $subfield<br>\n" if $debug;
438 @subfieldlist=($subfields{$subfieldcode}, $subfield);
441 $subfields{$subfieldcode}=\@subfieldlist;
443 # subfield doesn't exist yet, keep simple value
444 $subfields{$subfieldcode}=$subfield;
447 $field{'subfields'}=\%subfields;
449 push (@record, \%field);
450 } # foreach field in record
451 push (@records, \@record);
454 print "</pre>" if $debug;
456 } # sub parsemarcfileformat
458 #----------------------------------------------
462 return $tagtext{$tag};
466 #---------------------------------------------
468 # Revision 1.3 2002/08/14 18:12:52 tonnesen
469 # Added copyright statement to all .pl and .pm files
471 # Revision 1.2 2002/07/02 20:30:15 tonnesen
472 # Merged SimpleMarc.pm over from rel-1-2
474 # Revision 1.1.2.4 2002/06/28 14:36:47 amillar
475 # Fix broken logic on illustrator vs. add'l author
477 # Revision 1.1.2.3 2002/06/26 20:54:32 tonnesen
478 # use warnings breaks on perl 5.005...
480 # Revision 1.1.2.2 2002/06/26 15:52:55 amillar
481 # Fix display of marc tag labels and indicators
483 # Revision 1.1.2.1 2002/06/26 07:27:35 amillar
484 # Moved acqui.simple MARC handling to new module SimpleMarc.pm