3 # Tool for importing bulk marc records
7 # Do not use this script on a production system, it is still in development
15 # Copyright 2000-2002 Katipo Communications
17 # This file is part of Koha.
19 # Koha is free software; you can redistribute it and/or modify it under the
20 # terms of the GNU General Public License as published by the Free Software
21 # Foundation; either version 2 of the License, or (at your option) any later
24 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
25 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
26 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
28 # You should have received a copy of the GNU General Public License along with
29 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
30 # Suite 330, Boston, MA 02111-1307 USA
37 print "USAGE: ./bulkmarcimport.pl filename\n";
55 my $dbh = C4::Context->dbh;
56 my $userid=$ENV{'REMOTE_USER'};
58 # FIXME - Wouldn't it be better to use &C4::SimpleMarc::taglabel
59 # instead of duplicating this information?
61 '001' => 'Control number',
62 '003' => 'Control number identifier',
63 '005' => 'Date and time of latest transaction',
64 '006' => 'Fixed-length data elements -- additional material characteristics',
65 '007' => 'Physical description fixed field',
66 '008' => 'Fixed length data elements',
71 '037' => 'Source of acquisition',
72 '040' => 'Cataloging source',
73 '041' => 'Language code',
74 '043' => 'Geographic area code',
75 '050' => 'Library of Congress call number',
76 '060' => 'National Library of Medicine call number',
77 '082' => 'Dewey decimal call number',
78 '100' => 'Main entry -- Personal name',
79 '110' => 'Main entry -- Corporate name',
80 '130' => 'Main entry -- Uniform title',
81 '240' => 'Uniform title',
82 '245' => 'Title statement',
83 '246' => 'Varying form of title',
84 '250' => 'Edition statement',
85 '256' => 'Computer file characteristics',
86 '260' => 'Publication, distribution, etc.',
87 '263' => 'Projected publication date',
88 '300' => 'Physical description',
89 '306' => 'Playing time',
90 '440' => 'Series statement / Added entry -- Title',
91 '490' => 'Series statement',
92 '500' => 'General note',
93 '504' => 'Bibliography, etc. note',
94 '505' => 'Formatted contents note',
95 '508' => 'Creation/production credits note',
96 '510' => 'Citation/references note',
97 '511' => 'Participant or performer note',
98 '520' => 'Summary, etc. note',
99 '521' => 'Target audience note (ie age)',
100 '530' => 'Additional physical form available note',
101 '538' => 'System details note',
102 '586' => 'Awards note',
103 '600' => 'Subject added entry -- Personal name',
104 '610' => 'Subject added entry -- Corporate name',
105 '650' => 'Subject added entry -- Topical term',
106 '651' => 'Subject added entry -- Geographic name',
107 '656' => 'Index term -- Occupation',
108 '700' => 'Added entry -- Personal name',
109 '710' => 'Added entry -- Corporate name',
110 '730' => 'Added entry -- Uniform title',
111 '740' => 'Added entry -- Uncontrolled related/analytical title',
112 '800' => 'Series added entry -- Personal name',
113 '830' => 'Series added entry -- Uniform title',
115 '856' => 'Electronic location and access',
126 # Cycle through all of the records in the file
130 foreach $record (split(/$splitchar/, $data)) {
131 $leader=substr($record,0,24);
132 print "\n\n---------------------------------------------------------------------------\n";
133 print "Leader: $leader\n";
134 $record=substr($record,24);
141 foreach $field (split(/$splitchar2/, $record)) {
143 unless ($directory) {
144 # Parse the MARC directory and store the cotents in the %tag hash
148 while ($item=substr($directory,0,12)) { # FIXME - $item never used
149 $tag=substr($directory,0,3);
150 $length=substr($directory,3,4); # FIXME - Unused
151 $start=substr($directory,7,6); # FIXME - Unused
152 $directory=substr($directory,12);
159 $tag=$tag{$tagcounter};
162 printf "%4s %-40s ",$tag, $tagtext{$tag};
164 my @subfields=split(/$splitchar3/, $field);
165 $indicator=$subfields[0];
166 $field{'indicator'}=$indicator;
168 if ($#subfields==0) {
169 print "$indicator\n";
173 for ($i=1; $i<=$#subfields; $i++) {
174 my $text=$subfields[$i];
175 my $subfieldcode=substr($text,0,1);
176 my $subfield=substr($text,1);
177 print " $subfieldcode $subfield\n";
178 if ($subfields{$subfieldcode}) {
179 my $subfieldlist=$subfields{$subfieldcode};
180 my @subfieldlist=@$subfieldlist;
181 if ($#subfieldlist>=0) {
182 push (@subfieldlist, $subfield);
184 @subfieldlist=($subfields{$subfieldcode}, $subfield);
186 $subfields{$subfieldcode}=\@subfieldlist;
188 $subfields{$subfieldcode}=$subfield;
191 $field{'subfields'}=\%subfields;
194 my $fieldlist=$record{$tag};
195 if ($fieldlist->{'tag'}) {
196 @fieldlist=($fieldlist, \%field);
197 $fieldlist=\@fieldlist;
199 push (@$fieldlist,\%field);
201 $record{$tag}=$fieldlist;
203 $record{$tag}=[\%field];
205 push (@record, \%field);
209 my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $additionalauthors, $illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes);
211 foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) {
212 # LCCN is stored in field 010 a
213 if ($field->{'tag'} eq '010') {
214 $lccn=$field->{'subfields'}->{'a'};
218 ($lccn) = (split(/\s+/, $lccn))[0];
220 # LCCN is stored in field 015 a
221 if ($field->{'tag'} eq '015') {
222 $lccn=$field->{'subfields'}->{'a'};
225 ($lccn) = (split(/\s+/, $lccn))[0];
227 # ISBN is stored in field 020 a
228 if ($field->{'tag'} eq '020') {
229 $isbn=$field->{'subfields'}->{'a'};
231 ($isbn) = (split(/\s+/, $isbn))[0];
233 # ISSN is stored in field 022 a
234 if ($field->{'tag'} eq '022') {
235 $issn=$field->{'subfields'}->{'a'};
237 ($issn) = (split(/\s+/, $issn))[0];
239 # Dewey number stored in field 082 a
240 # If there is more than one dewey number (more than one 'a'
241 # subfield) I just take the first one
242 if ($field->{'tag'} eq '082') {
243 $dewey=$field->{'subfields'}->{'a'};
249 # Author is stored in field 100 a
250 if ($field->{'tag'} eq '100') {
251 $author=$field->{'subfields'}->{'a'};
253 # Title is stored in field 245 a
254 # Subtitle in field 245 b
255 # Illustrator in field 245 c
256 if ($field->{'tag'} eq '245') {
257 $title=$field->{'subfields'}->{'a'};
259 $subtitle=$field->{'subfields'}->{'b'};
261 my $name=$field->{'subfields'}->{'c'};
262 if ($name=~/illustrated by]*\s+(.*)/) {
266 # Publisher Info in field 260
269 # c = publication date
270 # (also store as copyright date if date starts with a 'c' as in c1995)
271 if ($field->{'tag'} eq '260') {
272 $place=$field->{'subfields'}->{'a'};
277 $publisher=$field->{'subfields'}->{'b'};
279 $publisher=$$publisher[0];
281 $publisher=~s/\s*:$//g;
282 $publicationyear=$field->{'subfields'}->{'c'};
283 if ($publicationyear=~/c(\d\d\d\d)/) {
286 if ($publicationyear=~/[^c](\d\d\d\d)/) {
288 } elsif ($copyrightdate) {
289 $publicationyear=$copyrightdate;
291 $publicationyear=~/(\d\d\d\d)/;
295 # Physical Dimensions in field 300
298 if ($field->{'tag'} eq '300') {
299 $pages=$field->{'subfields'}->{'a'};
301 $size=$field->{'subfields'}->{'c'};
305 # Vol/No in field 362 a
306 if ($field->{'tag'} eq '362') {
307 if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) {
312 # Series Title in field 440 a
313 # Vol/No in field 440 v
314 if ($field->{'tag'} eq '440') {
315 $seriestitle=$field->{'subfields'}->{'a'};
316 if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) {
322 # 852 p stores barcodes
323 # 852 h stores dewey field
324 # 852 9 stores replacement price
325 # I check for an itemtype identifier in 852h as well... pb or pbk means PBK
326 # also if $dewey is > 0, then I assign JNF, otherwise JF.
327 # Note that my libraries are school libraries, so I assume Junior.
328 if ($field->{'tag'} eq '852') {
329 $barcode=$field->{'subfields'}->{'p'};
330 push (@barcodes, $barcode);
331 my $q_barcode=$dbh->quote($barcode);
332 my $deweyfield=$field->{'subfields'}->{'h'};
333 $deweyfield=~/^([\d\.]*)/;
335 if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) {
343 $replacementprice=$field->{'subfields'}->{'9'};
345 # 700 a stores additional authors / illustrator info
346 # 700 c will contain 'ill' if it's an illustrator
347 if ($field->{'tag'} eq '700') {
348 my $name=$field->{'subfields'}->{'a'};
349 if ($field->{'subfields'}->{'c'}=~/ill/) {
352 $additionalauthors.="$name\n";
355 # I concatenate all 5XX a entries as notes
356 if ($field->{'tag'} =~/^5/) {
357 $note.="$field->{'subfields'}->{'a'}\n";
359 # 6XX entries are subject entries
360 # Not sure why I'm skipping 691 tags
361 # 691 a contains the subject.
362 # I take subfield a, and append entries from subfield x (general
363 # subdivision) y (Chronological subdivision) and z (geographic
365 if ($field->{'tag'} =~/6\d\d/) {
366 (next) if ($field->{'tag'} eq '691');
367 my $subject=$field->{'subfields'}->{'a'};
368 print "SUBJECT: $subject\n";
370 if ($gensubdivision=$field->{'subfields'}->{'x'}) {
371 my @sub=@$gensubdivision;
378 $gensubdivision=~s/\.$//;
379 $subject.=" -- $gensubdivision";
382 if ($chronsubdivision=$field->{'subfields'}->{'y'}) {
383 my @sub=@$chronsubdivision;
390 $chronsubdivision=~s/\.$//;
391 $subject.=" -- $chronsubdivision";
394 if ($geosubdivision=$field->{'subfields'}->{'z'}) {
395 my @sub=@$geosubdivision;
402 $geosubdivision=~s/\.$//;
403 $subject.=" -- $geosubdivision";
406 push @subjects, $subject;
410 my $q_isbn=$dbh->quote($isbn);
411 my $q_issn=$dbh->quote($issn);
412 my $q_lccn=$dbh->quote($lccn);
413 my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn");
416 my $biblioitemnumber=0;
418 ($biblionumber, $biblioitemnumber) = $sth->fetchrow;
420 #title already in the database
422 my $q_title=$dbh->quote("$title");
423 my $q_subtitle=$dbh->quote("$subtitle");
424 my $q_author=$dbh->quote($author);
425 my $q_copyrightdate=$dbh->quote($copyrightdate);
426 my $q_seriestitle=$dbh->quote($seriestitle);
427 $sth=$dbh->prepare("select biblionumber from biblio where title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and seriestitle=$q_seriestitle");
430 ($biblionumber) = $sth->fetchrow;
431 #title already in the database
433 $sth=$dbh->prepare("select max(biblionumber) from biblio");
435 ($biblionumber) = $sth->fetchrow;
437 my $q_notes=$dbh->quote($note);
438 $sth=$dbh->prepare("insert into biblio (biblionumber, title, author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, $q_author, $q_copyrightdate, $q_seriestitle, $q_notes)");
440 $sth=$dbh->prepare("insert into bibliosubtitle values ($q_subtitle, $biblionumber)");
443 $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems");
445 ($biblioitemnumber) = $sth->fetchrow;
447 my $q_isbn=$dbh->quote($isbn);
448 my $q_issn=$dbh->quote($issn);
449 my $q_lccn=$dbh->quote($lccn);
450 my $q_volume=$dbh->quote($volume);
451 my $q_number=$dbh->quote($number);
452 my $q_itemtype=$dbh->quote($itemtype);
453 my $q_dewey=$dbh->quote($dewey);
454 $cleanauthor=$author;
455 $cleanauthor=~s/[^A-Za-z]//g;
456 $subclass=uc(substr($cleanauthor,0,3));
457 my $q_subclass=$dbh->quote($subclass);
458 my $q_publicationyear=$dbh->quote($publicationyear);
459 my $q_publishercode=$dbh->quote($publishercode); # FIXME - $publishercode undefined
460 my $q_volumedate=$dbh->quote($volumedate); # FIXME - $volumedate undefined
461 my $q_volumeddesc=$dbh->quote($volumeddesc); # FIXME - $volumeddesc undefined
462 my $q_illus=$dbh->quote($illustrator);
463 my $q_pages=$dbh->quote($pages);
464 my $q_notes=$dbh->quote($note);
465 ($q_notes) || ($q_notes="''");
466 my $q_size=$dbh->quote($size);
467 my $q_place=$dbh->quote($place);
468 my $q_marc=$dbh->quote($marc);
470 $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, $q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, $q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, $q_pages, $q_size, $q_place, $q_lccn, $q_marc)");
473 foreach $subjectheading (@subjects) {
474 # convert to upper case
475 $subjectheading=uc($subjectheading);
477 my $q_subjectheading=$dbh->quote($subjectheading);
478 $sth=$dbh->prepare("insert into bibliosubject (biblionumber,subject)
479 values ($biblionumber, $q_subjectheading)");
482 my @additionalauthors=split(/\n/,$additionalauthors);
483 my $additionalauthor;
484 foreach $additionalauthor (@additionalauthors) {
485 # remove any line ending characters (Ctrl-L or Ctrl-M)
486 $additionalauthor=~s/\013//g;
487 $additionalauthor=~s/\010//g;
488 # convert to upper case
489 $additionalauthor=uc($additionalauthor);
491 my $q_additionalauthor=$dbh->quote($additionalauthor);
492 $sth=$dbh->prepare("insert into additionalauthors (biblionumber,author) values ($biblionumber, $q_additionalauthor)");
496 my $q_barcode=$dbh->quote($barcode);
497 my $q_homebranch="'$branchname'";
499 #my $replacementprice=0;
500 # FIXME - There's already a $sth in this scope.
501 my $sth=$dbh->prepare("select max(itemnumber) from items");
503 my ($itemnumber) = $sth->fetchrow;
505 my @datearr=localtime(time);
506 my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3];
508 foreach $barcode (@barcodes) {
509 my $q_barcode=$dbh->quote($barcode);
510 my $sti=$dbh->prepare("select barcode from items where barcode=$q_barcode");
513 print "Skipping $barcode\n";
516 $replacementprice=~s/^p//;
517 ($replacementprice) || ($replacementprice=0);
518 $replacementprice=~s/\$//;
519 $task="insert into items (itemnumber, biblionumber, biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, $biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, '$branchname', '$date', $replacementprice)";
520 $sth=$dbh->prepare($task);