5 package C4::SimpleMarc;
7 # Routines for handling import of MARC data into Koha db
9 # Koha library project www.koha.org
11 # Licensed under the GPL
14 # Copyright 2000-2002 Katipo Communications
16 # This file is part of Koha.
18 # Koha is free software; you can redistribute it and/or modify it under the
19 # terms of the GNU General Public License as published by the Free Software
20 # Foundation; either version 2 of the License, or (at your option) any later
23 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
24 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
25 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
27 # You should have received a copy of the GNU General Public License along with
28 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
29 # Suite 330, Boston, MA 02111-1307 USA
33 # standard or CPAN modules used
40 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
42 # set the version for version checking
53 %EXPORT_TAGS = ( ); # eg: TAG => [ qw!name1 name2! ],
55 # your exported package globals go here,
56 # as well as any optionally exported functions
63 # non-exported package globals go here
64 use vars qw(@more $stuff);
66 # initalize package globals, first exported ones
71 # then the others (which are still accessible as $Some::Module::stuff)
75 # all file-scoped lexicals must be created before
76 # the functions below that use them.
78 # file-private lexicals go here
82 # here's a file-private function as a closure,
83 # callable as &$priv_func; it cannot be prototyped.
88 # make all your functions, whether exported or not;
89 #------------------------------------------------
96 '001' => 'Control number',
97 '003' => 'Control number identifier',
98 '005' => 'Date and time of latest transaction',
99 '006' => 'Fixed-length data elements -- additional material characteristics',
100 '007' => 'Physical description fixed field',
101 '008' => 'Fixed length data elements',
103 '015' => 'National library CN',
106 '024' => 'Other standard ID',
107 '035' => 'System control number',
108 '037' => 'Source of acquisition',
109 '040' => 'Cataloging source',
110 '041' => 'Language code',
111 '043' => 'Geographic area code',
112 '043' => 'Publishing country code',
113 '050' => 'Library of Congress call number',
114 '055' => 'Canadian classification number',
115 '060' => 'National Library of Medicine call number',
116 '082' => 'Dewey decimal call number',
117 '100' => 'Main entry -- Personal name',
118 '110' => 'Main entry -- Corporate name',
119 '130' => 'Main entry -- Uniform title',
120 '240' => 'Uniform title',
121 '245' => 'Title statement',
122 '246' => 'Varying form of title',
123 '250' => 'Edition statement',
124 '256' => 'Computer file characteristics',
125 '260' => 'Publication, distribution, etc.',
126 '263' => 'Projected publication date',
127 '300' => 'Physical description',
128 '306' => 'Playing time',
129 '440' => 'Series statement / Added entry -- Title',
130 '490' => 'Series statement',
131 '500' => 'General note',
132 '504' => 'Bibliography, etc. note',
133 '505' => 'Formatted contents note',
134 '508' => 'Creation/production credits note',
135 '510' => 'Citation/references note',
136 '511' => 'Participant or performer note',
137 '520' => 'Summary, etc. note',
138 '521' => 'Target audience note (ie age)',
139 '530' => 'Additional physical form available note',
140 '538' => 'System details note',
141 '586' => 'Awards note',
142 '600' => 'Subject added entry -- Personal name',
143 '610' => 'Subject added entry -- Corporate name',
144 '650' => 'Subject added entry -- Topical term',
145 '651' => 'Subject added entry -- Geographic name',
146 '656' => 'Index term -- Occupation',
147 '700' => 'Added entry -- Personal name',
148 '710' => 'Added entry -- Corporate name',
149 '730' => 'Added entry -- Uniform title',
150 '740' => 'Added entry -- Uncontrolled related/analytical title',
151 '800' => 'Series added entry -- Personal name',
152 '830' => 'Series added entry -- Uniform title',
154 '856' => 'Electronic location and access',
157 # tag, subfield, field name, repeats, striptrailingchars
159 '010'=>{'a'=>{name=> 'lccn', rpt=>0, striptrail=>' ' }},
160 '015'=>{'a'=>{name=> 'lccn', rpt=>0 }},
161 '020'=>{'a'=>{name=> 'isbn', rpt=>0 }},
162 '022'=>{'a'=>{name=> 'issn', rpt=>0 }},
163 '082'=>{'a'=>{name=> 'dewey', rpt=>0 }},
164 '100'=>{'a'=>{name=> 'author', rpt=>0, striptrail=>',:;/-' }},
165 '245'=>{'a'=>{name=> 'title', rpt=>0, striptrail=>',:;/' },
166 'b'=>{name=> 'subtitle', rpt=>0, striptrail=>',:;/' }},
167 '260'=>{'a'=>{name=> 'place', rpt=>0, striptrail=>',:;/-' },
168 'b'=>{name=> 'publisher', rpt=>0, striptrail=>',:;/-' },
169 'c'=>{name=> 'year' , rpt=>0, striptrail=>'.,:;/-' }},
170 '300'=>{'a'=>{name=> 'pages', rpt=>0, striptrail=>',:;/-' },
171 'c'=>{name=> 'size', rpt=>0, striptrail=>',:;/-' }},
172 '362'=>{'a'=>{name=> 'volume-number', rpt=>0 }},
173 '440'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' },
174 'v'=>{name=> 'volume-number',rpt=>0 }},
175 '490'=>{'a'=>{name=> 'seriestitle', rpt=>0, striptrail=>',:;/' },
176 'v'=>{name=> 'volume-number',rpt=>0 }},
177 '700'=>{'a'=>{name=> 'addtional-author-illus',rpt=>1, striptrail=>',:;/' }},
178 '5xx'=>{'a'=>{name=> 'notes', rpt=>1 }},
179 '65x'=>{'a'=>{name=> 'subject', rpt=>1, striptrail=>'.,:;/-' }},
184 sub extractmarcfields {
188 $record, # pointer to list of MARC field hashes.
189 # Example: $record->[0]->{'tag'} = '100' # Author
190 # $record->[0]->{'subfields'}->{'a'} = subfieldvalue
194 my $bib; # pointer to hash of named output fields
195 # Example: $bib->{'author'} = "Twain, Mark";
202 $subfield, # Marc subfield [a-z]
203 $fieldname, # name of field "author", "title", etc.
204 $strip, # chars to remove from end of field
205 $stripregex, # reg exp pattern
207 my ($lccn, $isbn, $issn,
208 $publicationyear, @subjects, $subject,
210 $notes, $additionalauthors, $illustrator, $copyrightdate,
211 $s, $subdivision, $subjectsubfield,
214 print "<PRE>\n" if $debug;
216 if ( ref($record) eq "ARRAY" ) {
217 foreach $field (@$record) {
219 # Check each subfield in field
220 foreach $subfield ( keys %{$field->{subfields}} ) {
221 # see if it is defined in our Marc to koha mapping table
222 if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} ) {
223 # Yes, so keep the value
224 if ( ref($field->{'subfields'}->{$subfield} ) eq 'ARRAY' ) {
225 # if it was an array, just keep first element.
226 $bib->{$fieldname}=$field->{'subfields'}->{$subfield}[0];
228 $bib->{$fieldname}=$field->{'subfields'}->{$subfield};
230 print "$field->{'tag'} $subfield $fieldname=$bib->{$fieldname}\n" if $debug;
231 # see if this field should have trailing chars dropped
232 if ($strip=$tagmap{ $field->{'tag'} }->{$subfield}->{striptrail} ) {
233 $strip=~s//\\/; # backquote each char
234 $stripregex='[ ' . $strip . ']+$'; # remove trailing spaces also
235 $bib->{$fieldname}=~s/$stripregex//;
236 # also strip leading spaces
237 $bib->{$fieldname}=~s/^ +//;
239 print "Found subfield $field->{'tag'} $subfield " .
240 "$fieldname = $bib->{$fieldname}\n" if $debug;
246 if ($field->{'tag'} eq '001') {
247 $bib->{controlnumber}=$field->{'indicator'};
249 if ($field->{'tag'} eq '015') {
250 $bib->{lccn}=$field->{'subfields'}->{'a'};
251 $bib->{lccn}=~s/^\s*//;
252 $bib->{lccn}=~s/^C//;
253 ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0];
257 if ($field->{'tag'} eq '260') {
259 $publicationyear=$field->{'subfields'}->{'c'};
260 if ($publicationyear=~/c(\d\d\d\d)/) {
263 if ($publicationyear=~/[^c](\d\d\d\d)/) {
265 } elsif ($copyrightdate) {
266 $publicationyear=$copyrightdate;
268 $publicationyear=~/(\d\d\d\d)/;
272 if ($field->{'tag'} eq '700') {
273 my $name=$field->{'subfields'}->{'a'};
274 if ( defined($field->{'subfields'}->{'e'})
275 and $field->{'subfields'}->{'e'}=~/ill/) {
278 $additionalauthors.="$name\n";
281 if ($field->{'tag'} =~/^5/) {
282 $notes.="$field->{'subfields'}->{'a'}\n";
284 if ($field->{'tag'} =~/65\d/) {
286 my $subject=$field->{'subfields'}->{'a'};
288 print "Subject=$subject\n" if $debug;
289 foreach $subjectsubfield ( 'x','y','z' ) {
290 if ($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
291 if ( ref($subdivision) eq 'ARRAY' ) {
292 foreach $s (@$subdivision) {
295 } # foreach subdivision
297 $subdivision=~s/\.$//;
298 $subject.=" -- $subdivision";
300 } # if subfield exists
302 print "Subject=$subject\n" if $debug;
303 push @subjects, $subject;
308 ($publicationyear ) && ($bib->{publicationyear}=$publicationyear );
309 ($copyrightdate ) && ($bib->{copyrightdate}=$copyrightdate );
310 ($additionalauthors ) && ($bib->{additionalauthors}=$additionalauthors );
311 ($illustrator ) && ($bib->{illustrator}=$illustrator );
312 ($notes ) && ($bib->{notes}=$notes );
313 ($#subjects ) && ($bib->{subject}=\@subjects );
317 $bib->{dewey}=~s/\///g; # drop any slashes
321 ($bib->{lccn}) = (split(/\s+/, $bib->{lccn}))[0]; # only keep first word
324 if ( $bib->{isbn} ) {
325 $bib->{isbn}=~s/[^\d]*//g; # drop non-digits
328 if ( $bib->{issn} ) {
329 $bib->{issn}=~s/^\s*//;
330 ($bib->{issn}) = (split(/\s+/, $bib->{issn}))[0];
333 if ( $bib->{'volume-number'} ) {
334 if ($bib->{'volume-number'}=~/(\d+).*(\d+)/ ) {
338 $bib->{volume}=$bib->{'volume-number'};
340 delete $bib->{'volume-number'};
344 print "Error: extractmarcfields: input ref $record is " .
345 ref($record) . " not ARRAY. Contact sysadmin.\n";
347 print "</PRE>\n" if $debug;
351 } # sub extractmarcfields
352 #---------------------------------
354 #--------------------------
355 # Parse MARC data in file format with control-character separators
356 # May be multiple records.
357 sub parsemarcfileformat {
359 # Input is one big text string
361 # Output is list of records. Each record is list of field hashes
364 my $splitchar=chr(29);
365 my $splitchar2=chr(30);
366 my $splitchar3=chr(31);
369 foreach $record (split(/$splitchar/, $data)) {
376 my $leader=substr($record,0,24);
377 print "<pre>parse Leader:$leader</pre>\n" if $debug;
380 'indicator' => $leader ,
383 $record=substr($record,24);
384 foreach $field (split(/$splitchar2/, $record)) {
388 unless ($directory) {
389 # If we didn't already find a directory, extract one.
396 while ($item=substr($directory,0,12)) {
397 # Pull out location of first field
398 $tag=substr($directory,0,3);
399 $length=substr($directory,3,4);
400 $start=substr($directory,7,6);
402 # Bump to next directory entry
403 $directory=substr($directory,12);
404 $tag{$counter2}=$tag;
410 $tag=$tag{$tagcounter};
413 my @subfields=split(/$splitchar3/, $field);
414 $indicator=$subfields[0];
415 $field{'indicator'}=$indicator;
416 print "<pre>parse indicator:$indicator</pre>\n" if $debug;
418 unless ($#subfields==0) {
422 for ($i=1; $i<=$#subfields; $i++) {
423 my $text=$subfields[$i];
424 my $subfieldcode=substr($text,0,1);
425 my $subfield=substr($text,1);
426 # if this subfield already exists, do array
427 if ($subfields{$subfieldcode}) {
428 my $subfieldlist=$subfields{$subfieldcode};
429 if ( ref($subfieldlist) eq 'ARRAY' ) {
430 # Already an array, add on to it
431 print "$tag Adding to array $subfieldcode -- $subfield<br>\n" if $debug;
432 @subfieldlist=@$subfieldlist;
433 push (@subfieldlist, $subfield);
435 # Change simple value to array
436 print "$tag Arraying $subfieldcode -- $subfield<br>\n" if $debug;
437 @subfieldlist=($subfields{$subfieldcode}, $subfield);
440 $subfields{$subfieldcode}=\@subfieldlist;
442 # subfield doesn't exist yet, keep simple value
443 $subfields{$subfieldcode}=$subfield;
446 $field{'subfields'}=\%subfields;
448 push (@record, \%field);
449 } # foreach field in record
450 push (@records, \@record);
453 print "</pre>" if $debug;
455 } # sub parsemarcfileformat
457 #----------------------------------------------
461 return $tagtext{$tag};
465 #---------------------------------------------
467 # Revision 1.4 2002/10/05 09:53:11 arensb
468 # Merged with arensb-context branch: use C4::Context->dbh instead of
469 # &C4Connect, and generally prefer C4::Context over C4::Database.
471 # Revision 1.3.2.1 2002/10/04 02:57:38 arensb
472 # Removed useless "use C4::Database;" line.
474 # Revision 1.3 2002/08/14 18:12:52 tonnesen
475 # Added copyright statement to all .pl and .pm files
477 # Revision 1.2 2002/07/02 20:30:15 tonnesen
478 # Merged SimpleMarc.pm over from rel-1-2
480 # Revision 1.1.2.4 2002/06/28 14:36:47 amillar
481 # Fix broken logic on illustrator vs. add'l author
483 # Revision 1.1.2.3 2002/06/26 20:54:32 tonnesen
484 # use warnings breaks on perl 5.005...
486 # Revision 1.1.2.2 2002/06/26 15:52:55 amillar
487 # Fix display of marc tag labels and indicators
489 # Revision 1.1.2.1 2002/06/26 07:27:35 amillar
490 # Moved acqui.simple MARC handling to new module SimpleMarc.pm