Fixed a few warnings.
[koha.git] / acqui.simple / bulkmarcimport.pl
1 #!/usr/bin/perl
2 #
3 # Tool for importing bulk marc records
4 #
5 # WARNING!!
6 #
7 # Do not use this script on a production system, it is still in development
8 #
9 #
10
11
12
13
14 $file=$ARGV[0];
15
16 unless ($file) {
17     print "USAGE: ./bulkmarcimport.pl filename\n";
18     exit;
19 }
20
21
22
23
24 my $lc1='#dddddd';
25 my $lc2='#ddaaaa';
26
27
28 use C4::Database;
29 use CGI;
30 use DBI;
31 #use strict;
32 use C4::Acquisitions;
33 use C4::Output;
34 my $dbh=C4Connect;
35 my $userid=$ENV{'REMOTE_USER'};
36 %tagtext = (
37     '001' => 'Control number',
38     '003' => 'Control number identifier',
39     '005' => 'Date and time of latest transaction',
40     '006' => 'Fixed-length data elements -- additional material characteristics',
41     '007' => 'Physical description fixed field',
42     '008' => 'Fixed length data elements',
43     '010' => 'LCCN',
44     '015' => 'LCCN Cdn',
45     '020' => 'ISBN',
46     '022' => 'ISSN',
47     '037' => 'Source of acquisition',
48     '040' => 'Cataloging source',
49     '041' => 'Language code',
50     '043' => 'Geographic area code',
51     '050' => 'Library of Congress call number',
52     '060' => 'National Library of Medicine call number',
53     '082' => 'Dewey decimal call number',
54     '100' => 'Main entry -- Personal name',
55     '110' => 'Main entry -- Corporate name',
56     '130' => 'Main entry -- Uniform title',
57     '240' => 'Uniform title',
58     '245' => 'Title statement',
59     '246' => 'Varying form of title',
60     '250' => 'Edition statement',
61     '256' => 'Computer file characteristics',
62     '260' => 'Publication, distribution, etc.',
63     '263' => 'Projected publication date',
64     '300' => 'Physical description',
65     '306' => 'Playing time',
66     '440' => 'Series statement / Added entry -- Title',
67     '490' => 'Series statement',
68     '500' => 'General note',
69     '504' => 'Bibliography, etc. note',
70     '505' => 'Formatted contents note',
71     '508' => 'Creation/production credits note',
72     '510' => 'Citation/references note',
73     '511' => 'Participant or performer note',
74     '520' => 'Summary, etc. note',
75     '521' => 'Target audience note (ie age)',
76     '530' => 'Additional physical form available note',
77     '538' => 'System details note',
78     '586' => 'Awards note',
79     '600' => 'Subject added entry -- Personal name',
80     '610' => 'Subject added entry -- Corporate name',
81     '650' => 'Subject added entry -- Topical term',
82     '651' => 'Subject added entry -- Geographic name',
83     '656' => 'Index term -- Occupation',
84     '700' => 'Added entry -- Personal name',
85     '710' => 'Added entry -- Corporate name',
86     '730' => 'Added entry -- Uniform title',
87     '740' => 'Added entry -- Uncontrolled related/analytical title',
88     '800' => 'Series added entry -- Personal name',
89     '830' => 'Series added entry -- Uniform title',
90     '852' => 'Location',
91     '856' => 'Electronic location and access',
92 );
93
94
95 my $dbh=C4Connect;
96 if ($file) {
97     open (F, "$file");
98     my $data=<F>;
99     close F;
100     $splitchar=chr(29);
101
102
103 # Cycle through all of the records in the file
104
105
106 RECORD:
107     foreach $record (split(/$splitchar/, $data)) {
108         $leader=substr($record,0,24);
109         print "\n\n---------------------------------------------------------------------------\n";
110         print "Leader: $leader\n";
111         $record=substr($record,24);
112         $splitchar2=chr(30);
113         my $directory=0;
114         my $tagcounter=0;
115         my %tag;
116         my @record;
117         my %record;
118         foreach $field (split(/$splitchar2/, $record)) {
119             my %field;
120             unless ($directory) {
121                 # Parse the MARC directory and store the cotents in the %tag hash
122                 $directory=$field;
123                 my $itemcounter=1;
124                 $counter=0;
125                 while ($item=substr($directory,0,12)) {
126                     $tag=substr($directory,0,3);
127                     $length=substr($directory,3,4);
128                     $start=substr($directory,7,6);
129                     $directory=substr($directory,12);
130                     $tag{$counter}=$tag;
131                     $counter++;
132                 }
133                 $directory=1;
134                 next;
135             }
136             $tag=$tag{$tagcounter};
137             $tagcounter++;
138             $field{'tag'}=$tag;
139             printf "%4s %-40s ",$tag, $tagtext{$tag};
140             $splitchar3=chr(31);
141             my @subfields=split(/$splitchar3/, $field);
142             $indicator=$subfields[0];
143             $field{'indicator'}=$indicator;
144             my $firstline=1;
145             if ($#subfields==0) {
146                 print "$indicator\n";
147             } else {
148                 print "\n";
149                 my %subfields;
150                 for ($i=1; $i<=$#subfields; $i++) {
151                     my $text=$subfields[$i];
152                     my $subfieldcode=substr($text,0,1);
153                     my $subfield=substr($text,1);
154                     print "   $subfieldcode $subfield\n";
155                     if ($subfields{$subfieldcode}) {
156                         my $subfieldlist=$subfields{$subfieldcode};
157                         my @subfieldlist=@$subfieldlist;
158                         if ($#subfieldlist>=0) {
159                             push (@subfieldlist, $subfield);
160                         } else {
161                             @subfieldlist=($subfields{$subfieldcode}, $subfield);
162                         }
163                         $subfields{$subfieldcode}=\@subfieldlist;
164                     } else {
165                         $subfields{$subfieldcode}=$subfield;
166                     }
167                 }
168                 $field{'subfields'}=\%subfields;
169             }
170             if ($record{$tag}) {
171                 my $fieldlist=$record{$tag};
172                 if ($fieldlist->{'tag'}) {
173                     @fieldlist=($fieldlist, \%field);
174                     $fieldlist=\@fieldlist;
175                 } else {
176                     push (@$fieldlist,\%field);
177                 }
178                 $record{$tag}=$fieldlist;
179             } else {
180                 $record{$tag}=[\%field];
181             }
182             push (@record, \%field);
183         }
184         $rec=\@record;
185         $counter++;
186         my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $additionalauthors, $illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes);
187         my $marc=$record;
188         foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) {
189             # LCCN is stored in field 010 a
190             if ($field->{'tag'} eq '010') {
191                 $lccn=$field->{'subfields'}->{'a'};
192                 $lccn=~s/^\s*//;
193                 $lccn=~s/cn //;
194                 $lccn=~s/^\s*//;
195                 ($lccn) = (split(/\s+/, $lccn))[0];
196             }
197             # LCCN is stored in field 015 a
198             if ($field->{'tag'} eq '015') {
199                 $lccn=$field->{'subfields'}->{'a'};
200                 $lccn=~s/^\s*//;
201                 $lccn=~s/^C//;
202                 ($lccn) = (split(/\s+/, $lccn))[0];
203             }
204             # ISBN is stored in field 020 a
205             if ($field->{'tag'} eq '020') {
206                 $isbn=$field->{'subfields'}->{'a'};
207                 $isbn=~s/^\s*//;
208                 ($isbn) = (split(/\s+/, $isbn))[0];
209             }
210             # ISSN is stored in field 022 a
211             if ($field->{'tag'} eq '022') {
212                 $issn=$field->{'subfields'}->{'a'};
213                 $issn=~s/^\s*//;
214                 ($issn) = (split(/\s+/, $issn))[0];
215             }
216             # Dewey number stored in field 082 a
217             # If there is more than one dewey number (more than one 'a'
218             # subfield) I just take the first one
219             if ($field->{'tag'} eq '082') {
220                 $dewey=$field->{'subfields'}->{'a'};
221                 $dewey=~s/\///g;
222                 if (@$dewey) {
223                     $dewey=$$dewey[0];
224                 }
225             }
226             # Author is stored in field 100 a
227             if ($field->{'tag'} eq '100') {
228                 $author=$field->{'subfields'}->{'a'};
229             }
230             # Title is stored in field 245 a
231             # Subtitle in field 245 b
232             # Illustrator in field 245 c
233             if ($field->{'tag'} eq '245') {
234                 $title=$field->{'subfields'}->{'a'};
235                 $title=~s/ \/$//;
236                 $subtitle=$field->{'subfields'}->{'b'};
237                 $subtitle=~s/ \/$//;
238                 my $name=$field->{'subfields'}->{'c'};
239                 if ($name=~/illustrated by]*\s+(.*)/) {
240                     $illustrator=$1;
241                 }
242             }
243             # Publisher Info in field 260
244             #   a = place
245             #   b = publisher
246             #   c = publication date
247             #     (also store as copyright date if date starts with a 'c' as in c1995)
248             if ($field->{'tag'} eq '260') {
249                 $place=$field->{'subfields'}->{'a'};
250                 if (@$place) {
251                     $place=$$place[0];
252                 }
253                 $place=~s/\s*:$//g;
254                 $publisher=$field->{'subfields'}->{'b'};
255                 if (@$publisher) {
256                     $publisher=$$publisher[0];
257                 }
258                 $publisher=~s/\s*:$//g;
259                 $publicationyear=$field->{'subfields'}->{'c'};
260                 if ($publicationyear=~/c(\d\d\d\d)/) {
261                     $copyrightdate=$1;
262                 }
263                 if ($publicationyear=~/[^c](\d\d\d\d)/) {
264                     $publicationyear=$1;
265                 } elsif ($copyrightdate) {
266                     $publicationyear=$copyrightdate;
267                 } else {
268                     $publicationyear=~/(\d\d\d\d)/;
269                     $publicationyear=$1;
270                 }
271             }
272             # Physical Dimensions in field 300
273             #   a = pages
274             #   c = size
275             if ($field->{'tag'} eq '300') {
276                 $pages=$field->{'subfields'}->{'a'};
277                 $pages=~s/ \;$//;
278                 $size=$field->{'subfields'}->{'c'};
279                 $pages=~s/\s*:$//g;
280                 $size=~s/\s*:$//g;
281             }
282             # Vol/No in field 362 a
283             if ($field->{'tag'} eq '362') {
284                 if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) {
285                     $volume=$1;
286                     $number=$2;
287                 }
288             }
289             # Series Title in field 440 a
290             # Vol/No in field 440 v
291             if ($field->{'tag'} eq '440') {
292                 $seriestitle=$field->{'subfields'}->{'a'};
293                 if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) {
294                     $volume=$1;
295                     $number=$2;
296                 }
297             }
298             # BARCODES!!!
299             # 852 p stores barcodes
300             # 852 h stores dewey field
301             # 852 9 stores replacement price
302             #   I check for an itemtype identifier in 852h as well... pb or pbk means PBK
303             #   also if $dewey is > 0, then I assign JNF, otherwise JF.
304             #   Note that my libraries are school libraries, so I assume Junior.
305             if ($field->{'tag'} eq '852') {
306                 $barcode=$field->{'subfields'}->{'p'};
307                 push (@barcodes, $barcode);
308                 my $q_barcode=$dbh->quote($barcode);
309                 my $deweyfield=$field->{'subfields'}->{'h'};
310                 $deweyfield=~/^([\d\.]*)/;
311                 $dewey=$1;
312                 if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) {
313                     $itemtype='PBK';
314                 } elsif ($dewey) {
315                     $itemtype='JNF';
316                 } else {
317                     $itemtype='JF';
318                 }
319
320                 $replacementprice=$field->{'subfields'}->{'9'};
321             }
322             # 700 a stores additional authors / illustrator info
323             # 700 c will contain 'ill' if it's an illustrator
324             if ($field->{'tag'} eq '700') {
325                 my $name=$field->{'subfields'}->{'a'};
326                 if ($field->{'subfields'}->{'c'}=~/ill/) {
327                     $illustrator=$name;
328                 } else {
329                     $additionalauthors.="$name\n";
330                 }
331             }
332             # I concatenate all 5XX a entries as notes
333             if ($field->{'tag'} =~/^5/) {
334                 $note.="$field->{'subfields'}->{'a'}\n";
335             }
336             # 6XX entries are subject entries
337             #   Not sure why I'm skipping 691 tags
338             #   691 a contains the subject.
339             # I take subfield a, and append entries from subfield x (general
340             # subdivision) y (Chronological subdivision) and z (geographic
341             # subdivision)
342             if ($field->{'tag'} =~/6\d\d/) {
343                 (next) if ($field->{'tag'} eq '691');
344                 my $subject=$field->{'subfields'}->{'a'};
345                 print "SUBJECT: $subject\n";
346                 $subject=~s/\.$//;
347                 if ($gensubdivision=$field->{'subfields'}->{'x'}) {
348                     my @sub=@$gensubdivision;
349                     if ($#sub>=0) {
350                         foreach $s (@sub) {
351                             $s=~s/\.$//;
352                             $subject.=" -- $s";
353                         }
354                     } else {
355                         $gensubdivision=~s/\.$//;
356                         $subject.=" -- $gensubdivision";
357                     }
358                 }
359                 if ($chronsubdivision=$field->{'subfields'}->{'y'}) {
360                     my @sub=@$chronsubdivision;
361                     if ($#sub>=0) {
362                         foreach $s (@sub) {
363                             $s=~s/\.$//;
364                             $subject.=" -- $s";
365                         }
366                     } else {
367                         $chronsubdivision=~s/\.$//;
368                         $subject.=" -- $chronsubdivision";
369                     }
370                 }
371                 if ($geosubdivision=$field->{'subfields'}->{'z'}) {
372                     my @sub=@$geosubdivision;
373                     if ($#sub>=0) {
374                         foreach $s (@sub) {
375                             $s=~s/\.$//;
376                             $subject.=" -- $s";
377                         }
378                     } else {
379                         $geosubdivision=~s/\.$//;
380                         $subject.=" -- $geosubdivision";
381                     }
382                 }
383                 push @subjects, $subject;
384             }
385         }
386
387         my $q_isbn=$dbh->quote($isbn);
388         my $q_issn=$dbh->quote($issn);
389         my $q_lccn=$dbh->quote($lccn);
390         my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn");
391         $sth->execute;
392         my $biblionumber=0;
393         my $biblioitemnumber=0;
394         if ($sth->rows) {
395             ($biblionumber, $biblioitemnumber) = $sth->fetchrow;
396             my $title=$title;
397 #title already in the database
398         } else {
399             my $q_title=$dbh->quote("$title");
400             my $q_subtitle=$dbh->quote("$subtitle");
401             my $q_author=$dbh->quote($author);
402             my $q_copyrightdate=$dbh->quote($copyrightdate);
403             my $q_seriestitle=$dbh->quote($seriestitle);
404             $sth=$dbh->prepare("select biblionumber from biblio where title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and seriestitle=$q_seriestitle");
405             $sth->execute;
406             if ($sth->rows) {
407                 ($biblionumber) = $sth->fetchrow;
408 #title already in the database
409             } else {
410                 $sth=$dbh->prepare("select max(biblionumber) from biblio");
411                 $sth->execute;
412                 ($biblionumber) = $sth->fetchrow;
413                 $biblionumber++;
414                 my $q_notes=$dbh->quote($note);
415                 $sth=$dbh->prepare("insert into biblio (biblionumber, title, author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, $q_author, $q_copyrightdate, $q_seriestitle, $q_notes)");
416                 $sth->execute;
417                 $sth=$dbh->prepare("insert into bibliosubtitle values ($q_subtitle, $biblionumber)");
418                 $sth->execute;
419             }
420             $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems");
421             $sth->execute;
422             ($biblioitemnumber) = $sth->fetchrow;
423             $biblioitemnumber++;
424             my $q_isbn=$dbh->quote($isbn);
425             my $q_issn=$dbh->quote($issn);
426             my $q_lccn=$dbh->quote($lccn);
427             my $q_volume=$dbh->quote($volume);
428             my $q_number=$dbh->quote($number);
429             my $q_itemtype=$dbh->quote($itemtype);
430             my $q_dewey=$dbh->quote($dewey);
431             $cleanauthor=$author;
432             $cleanauthor=~s/[^A-Za-z]//g;
433             $subclass=uc(substr($cleanauthor,0,3));
434             my $q_subclass=$dbh->quote($subclass);
435             my $q_publicationyear=$dbh->quote($publicationyear);
436             my $q_publishercode=$dbh->quote($publishercode);
437             my $q_volumedate=$dbh->quote($volumedate);
438             my $q_volumeddesc=$dbh->quote($volumeddesc);
439             my $q_illus=$dbh->quote($illustrator);
440             my $q_pages=$dbh->quote($pages);
441             my $q_notes=$dbh->quote($note);
442             ($q_notes) || ($q_notes="''");
443             my $q_size=$dbh->quote($size);
444             my $q_place=$dbh->quote($place);
445             my $q_marc=$dbh->quote($marc);
446
447             $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, $q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, $q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, $q_pages, $q_size, $q_place, $q_lccn, $q_marc)");
448             $sth->execute;
449             my $subjectheading;
450             foreach $subjectheading (@subjects) {
451                 # convert to upper case
452                 $subjectheading=uc($subjectheading);
453                 # quote value
454                 my $q_subjectheading=$dbh->quote($subjectheading);
455                 $sth=$dbh->prepare("insert into bibliosubject (biblionumber,subject)
456                     values ($biblionumber, $q_subjectheading)");
457                 $sth->execute;
458             }
459             my @additionalauthors=split(/\n/,$additionalauthors);
460             my $additionalauthor;
461             foreach $additionalauthor (@additionalauthors) {
462                 # remove any line ending characters (Ctrl-L or Ctrl-M)
463                 $additionalauthor=~s/\013//g;
464                 $additionalauthor=~s/\010//g;
465                 # convert to upper case
466                 $additionalauthor=uc($additionalauthor);
467                 # quote value
468                 my $q_additionalauthor=$dbh->quote($additionalauthor);
469                 $sth=$dbh->prepare("insert into additionalauthors (biblionumber,author) values ($biblionumber, $q_additionalauthor)");
470                 $sth->execute;
471             }
472         }
473         my $q_barcode=$dbh->quote($barcode);
474         my $q_homebranch="'MAIN'";
475         my $q_notes="''";
476         #my $replacementprice=0;
477         my $sth=$dbh->prepare("select max(itemnumber) from items");
478         $sth->execute;
479         my ($itemnumber) = $sth->fetchrow;
480         $itemnumber++;
481         my @datearr=localtime(time);
482         my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3];
483 BARCODE:
484         foreach $barcode (@barcodes) {
485             my $q_barcode=$dbh->quote($barcode);
486             my $sti=$dbh->prepare("select barcode from items where barcode=$q_barcode");
487             $sti->execute;
488             if ($sti->rows) {
489                 print "Skipping $barcode\n";
490                 next BARCODE;
491             }
492             $replacementprice=~s/^p//;
493             ($replacementprice) || ($replacementprice=0);
494             $replacementprice=~s/\$//;
495             $task="insert into items (itemnumber, biblionumber, biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, $biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, 'MAIN', '$date', $replacementprice)";
496             $sth=$dbh->prepare($task);
497             print "$task\n";
498             $sth->execute;
499         }
500     }
501 }
502 $dbh->disconnect;