47 files to go, 47 files to go, take one down, pass it around...
[koha.git] / acqui.simple / bulkmarcimport.pl
1 #!/usr/bin/perl
2 #
3 # Tool for importing bulk marc records
4 #
5 # WARNING!!
6 #
7 # Do not use this script on a production system, it is still in development
8 #
9 #
10
11
12
13
14 $file=$ARGV[0];
15
16 $branchname='MAIN';
17
18 unless ($file) {
19     print "USAGE: ./bulkmarcimport.pl filename\n";
20     exit;
21 }
22
23
24
25
26 my $lc1='#dddddd';
27 my $lc2='#ddaaaa';
28
29
30 use C4::Database;
31 use CGI;
32 use DBI;
33 #use strict;
34 use C4::Acquisitions;
35 use C4::Biblio;
36 use C4::Output;
37 my $dbh=C4Connect;
38 my $userid=$ENV{'REMOTE_USER'};
39 %tagtext = (
40     '001' => 'Control number',
41     '003' => 'Control number identifier',
42     '005' => 'Date and time of latest transaction',
43     '006' => 'Fixed-length data elements -- additional material characteristics',
44     '007' => 'Physical description fixed field',
45     '008' => 'Fixed length data elements',
46     '010' => 'LCCN',
47     '015' => 'LCCN Cdn',
48     '020' => 'ISBN',
49     '022' => 'ISSN',
50     '037' => 'Source of acquisition',
51     '040' => 'Cataloging source',
52     '041' => 'Language code',
53     '043' => 'Geographic area code',
54     '050' => 'Library of Congress call number',
55     '060' => 'National Library of Medicine call number',
56     '082' => 'Dewey decimal call number',
57     '100' => 'Main entry -- Personal name',
58     '110' => 'Main entry -- Corporate name',
59     '130' => 'Main entry -- Uniform title',
60     '240' => 'Uniform title',
61     '245' => 'Title statement',
62     '246' => 'Varying form of title',
63     '250' => 'Edition statement',
64     '256' => 'Computer file characteristics',
65     '260' => 'Publication, distribution, etc.',
66     '263' => 'Projected publication date',
67     '300' => 'Physical description',
68     '306' => 'Playing time',
69     '440' => 'Series statement / Added entry -- Title',
70     '490' => 'Series statement',
71     '500' => 'General note',
72     '504' => 'Bibliography, etc. note',
73     '505' => 'Formatted contents note',
74     '508' => 'Creation/production credits note',
75     '510' => 'Citation/references note',
76     '511' => 'Participant or performer note',
77     '520' => 'Summary, etc. note',
78     '521' => 'Target audience note (ie age)',
79     '530' => 'Additional physical form available note',
80     '538' => 'System details note',
81     '586' => 'Awards note',
82     '600' => 'Subject added entry -- Personal name',
83     '610' => 'Subject added entry -- Corporate name',
84     '650' => 'Subject added entry -- Topical term',
85     '651' => 'Subject added entry -- Geographic name',
86     '656' => 'Index term -- Occupation',
87     '700' => 'Added entry -- Personal name',
88     '710' => 'Added entry -- Corporate name',
89     '730' => 'Added entry -- Uniform title',
90     '740' => 'Added entry -- Uncontrolled related/analytical title',
91     '800' => 'Series added entry -- Personal name',
92     '830' => 'Series added entry -- Uniform title',
93     '852' => 'Location',
94     '856' => 'Electronic location and access',
95 );
96
97
98 my $dbh=C4Connect;
99 if ($file) {
100     open (F, "$file");
101     my $data=<F>;
102     close F;
103     $splitchar=chr(29);
104
105
106 # Cycle through all of the records in the file
107
108
109 RECORD:
110     foreach $record (split(/$splitchar/, $data)) {
111         $leader=substr($record,0,24);
112         print "\n\n---------------------------------------------------------------------------\n";
113         print "Leader: $leader\n";
114         $record=substr($record,24);
115         $splitchar2=chr(30);
116         my $directory=0;
117         my $tagcounter=0;
118         my %tag;
119         my @record;
120         my %record;
121         foreach $field (split(/$splitchar2/, $record)) {
122             my %field;
123             unless ($directory) {
124                 # Parse the MARC directory and store the cotents in the %tag hash
125                 $directory=$field;
126                 my $itemcounter=1;
127                 $counter=0;
128                 while ($item=substr($directory,0,12)) {
129                     $tag=substr($directory,0,3);
130                     $length=substr($directory,3,4);
131                     $start=substr($directory,7,6);
132                     $directory=substr($directory,12);
133                     $tag{$counter}=$tag;
134                     $counter++;
135                 }
136                 $directory=1;
137                 next;
138             }
139             $tag=$tag{$tagcounter};
140             $tagcounter++;
141             $field{'tag'}=$tag;
142             printf "%4s %-40s ",$tag, $tagtext{$tag};
143             $splitchar3=chr(31);
144             my @subfields=split(/$splitchar3/, $field);
145             $indicator=$subfields[0];
146             $field{'indicator'}=$indicator;
147             my $firstline=1;
148             if ($#subfields==0) {
149                 print "$indicator\n";
150             } else {
151                 print "\n";
152                 my %subfields;
153                 for ($i=1; $i<=$#subfields; $i++) {
154                     my $text=$subfields[$i];
155                     my $subfieldcode=substr($text,0,1);
156                     my $subfield=substr($text,1);
157                     print "   $subfieldcode $subfield\n";
158                     if ($subfields{$subfieldcode}) {
159                         my $subfieldlist=$subfields{$subfieldcode};
160                         my @subfieldlist=@$subfieldlist;
161                         if ($#subfieldlist>=0) {
162                             push (@subfieldlist, $subfield);
163                         } else {
164                             @subfieldlist=($subfields{$subfieldcode}, $subfield);
165                         }
166                         $subfields{$subfieldcode}=\@subfieldlist;
167                     } else {
168                         $subfields{$subfieldcode}=$subfield;
169                     }
170                 }
171                 $field{'subfields'}=\%subfields;
172             }
173             if ($record{$tag}) {
174                 my $fieldlist=$record{$tag};
175                 if ($fieldlist->{'tag'}) {
176                     @fieldlist=($fieldlist, \%field);
177                     $fieldlist=\@fieldlist;
178                 } else {
179                     push (@$fieldlist,\%field);
180                 }
181                 $record{$tag}=$fieldlist;
182             } else {
183                 $record{$tag}=[\%field];
184             }
185             push (@record, \%field);
186         }
187         $rec=\@record;
188         $counter++;
189         my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $additionalauthors, $illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes);
190         my $marc=$record;
191         foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) {
192             # LCCN is stored in field 010 a
193             if ($field->{'tag'} eq '010') {
194                 $lccn=$field->{'subfields'}->{'a'};
195                 $lccn=~s/^\s*//;
196                 $lccn=~s/cn //;
197                 $lccn=~s/^\s*//;
198                 ($lccn) = (split(/\s+/, $lccn))[0];
199             }
200             # LCCN is stored in field 015 a
201             if ($field->{'tag'} eq '015') {
202                 $lccn=$field->{'subfields'}->{'a'};
203                 $lccn=~s/^\s*//;
204                 $lccn=~s/^C//;
205                 ($lccn) = (split(/\s+/, $lccn))[0];
206             }
207             # ISBN is stored in field 020 a
208             if ($field->{'tag'} eq '020') {
209                 $isbn=$field->{'subfields'}->{'a'};
210                 $isbn=~s/^\s*//;
211                 ($isbn) = (split(/\s+/, $isbn))[0];
212             }
213             # ISSN is stored in field 022 a
214             if ($field->{'tag'} eq '022') {
215                 $issn=$field->{'subfields'}->{'a'};
216                 $issn=~s/^\s*//;
217                 ($issn) = (split(/\s+/, $issn))[0];
218             }
219             # Dewey number stored in field 082 a
220             # If there is more than one dewey number (more than one 'a'
221             # subfield) I just take the first one
222             if ($field->{'tag'} eq '082') {
223                 $dewey=$field->{'subfields'}->{'a'};
224                 $dewey=~s/\///g;
225                 if (@$dewey) {
226                     $dewey=$$dewey[0];
227                 }
228             }
229             # Author is stored in field 100 a
230             if ($field->{'tag'} eq '100') {
231                 $author=$field->{'subfields'}->{'a'};
232             }
233             # Title is stored in field 245 a
234             # Subtitle in field 245 b
235             # Illustrator in field 245 c
236             if ($field->{'tag'} eq '245') {
237                 $title=$field->{'subfields'}->{'a'};
238                 $title=~s/ \/$//;
239                 $subtitle=$field->{'subfields'}->{'b'};
240                 $subtitle=~s/ \/$//;
241                 my $name=$field->{'subfields'}->{'c'};
242                 if ($name=~/illustrated by]*\s+(.*)/) {
243                     $illustrator=$1;
244                 }
245             }
246             # Publisher Info in field 260
247             #   a = place
248             #   b = publisher
249             #   c = publication date
250             #     (also store as copyright date if date starts with a 'c' as in c1995)
251             if ($field->{'tag'} eq '260') {
252                 $place=$field->{'subfields'}->{'a'};
253                 if (@$place) {
254                     $place=$$place[0];
255                 }
256                 $place=~s/\s*:$//g;
257                 $publisher=$field->{'subfields'}->{'b'};
258                 if (@$publisher) {
259                     $publisher=$$publisher[0];
260                 }
261                 $publisher=~s/\s*:$//g;
262                 $publicationyear=$field->{'subfields'}->{'c'};
263                 if ($publicationyear=~/c(\d\d\d\d)/) {
264                     $copyrightdate=$1;
265                 }
266                 if ($publicationyear=~/[^c](\d\d\d\d)/) {
267                     $publicationyear=$1;
268                 } elsif ($copyrightdate) {
269                     $publicationyear=$copyrightdate;
270                 } else {
271                     $publicationyear=~/(\d\d\d\d)/;
272                     $publicationyear=$1;
273                 }
274             }
275             # Physical Dimensions in field 300
276             #   a = pages
277             #   c = size
278             if ($field->{'tag'} eq '300') {
279                 $pages=$field->{'subfields'}->{'a'};
280                 $pages=~s/ \;$//;
281                 $size=$field->{'subfields'}->{'c'};
282                 $pages=~s/\s*:$//g;
283                 $size=~s/\s*:$//g;
284             }
285             # Vol/No in field 362 a
286             if ($field->{'tag'} eq '362') {
287                 if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) {
288                     $volume=$1;
289                     $number=$2;
290                 }
291             }
292             # Series Title in field 440 a
293             # Vol/No in field 440 v
294             if ($field->{'tag'} eq '440') {
295                 $seriestitle=$field->{'subfields'}->{'a'};
296                 if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) {
297                     $volume=$1;
298                     $number=$2;
299                 }
300             }
301             # BARCODES!!!
302             # 852 p stores barcodes
303             # 852 h stores dewey field
304             # 852 9 stores replacement price
305             #   I check for an itemtype identifier in 852h as well... pb or pbk means PBK
306             #   also if $dewey is > 0, then I assign JNF, otherwise JF.
307             #   Note that my libraries are school libraries, so I assume Junior.
308             if ($field->{'tag'} eq '852') {
309                 $barcode=$field->{'subfields'}->{'p'};
310                 push (@barcodes, $barcode);
311                 my $q_barcode=$dbh->quote($barcode);
312                 my $deweyfield=$field->{'subfields'}->{'h'};
313                 $deweyfield=~/^([\d\.]*)/;
314                 $dewey=$1;
315                 if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) {
316                     $itemtype='PBK';
317                 } elsif ($dewey) {
318                     $itemtype='JNF';
319                 } else {
320                     $itemtype='JF';
321                 }
322
323                 $replacementprice=$field->{'subfields'}->{'9'};
324             }
325             # 700 a stores additional authors / illustrator info
326             # 700 c will contain 'ill' if it's an illustrator
327             if ($field->{'tag'} eq '700') {
328                 my $name=$field->{'subfields'}->{'a'};
329                 if ($field->{'subfields'}->{'c'}=~/ill/) {
330                     $illustrator=$name;
331                 } else {
332                     $additionalauthors.="$name\n";
333                 }
334             }
335             # I concatenate all 5XX a entries as notes
336             if ($field->{'tag'} =~/^5/) {
337                 $note.="$field->{'subfields'}->{'a'}\n";
338             }
339             # 6XX entries are subject entries
340             #   Not sure why I'm skipping 691 tags
341             #   691 a contains the subject.
342             # I take subfield a, and append entries from subfield x (general
343             # subdivision) y (Chronological subdivision) and z (geographic
344             # subdivision)
345             if ($field->{'tag'} =~/6\d\d/) {
346                 (next) if ($field->{'tag'} eq '691');
347                 my $subject=$field->{'subfields'}->{'a'};
348                 print "SUBJECT: $subject\n";
349                 $subject=~s/\.$//;
350                 if ($gensubdivision=$field->{'subfields'}->{'x'}) {
351                     my @sub=@$gensubdivision;
352                     if ($#sub>=0) {
353                         foreach $s (@sub) {
354                             $s=~s/\.$//;
355                             $subject.=" -- $s";
356                         }
357                     } else {
358                         $gensubdivision=~s/\.$//;
359                         $subject.=" -- $gensubdivision";
360                     }
361                 }
362                 if ($chronsubdivision=$field->{'subfields'}->{'y'}) {
363                     my @sub=@$chronsubdivision;
364                     if ($#sub>=0) {
365                         foreach $s (@sub) {
366                             $s=~s/\.$//;
367                             $subject.=" -- $s";
368                         }
369                     } else {
370                         $chronsubdivision=~s/\.$//;
371                         $subject.=" -- $chronsubdivision";
372                     }
373                 }
374                 if ($geosubdivision=$field->{'subfields'}->{'z'}) {
375                     my @sub=@$geosubdivision;
376                     if ($#sub>=0) {
377                         foreach $s (@sub) {
378                             $s=~s/\.$//;
379                             $subject.=" -- $s";
380                         }
381                     } else {
382                         $geosubdivision=~s/\.$//;
383                         $subject.=" -- $geosubdivision";
384                     }
385                 }
386                 push @subjects, $subject;
387             }
388         }
389
390         my $q_isbn=$dbh->quote($isbn);
391         my $q_issn=$dbh->quote($issn);
392         my $q_lccn=$dbh->quote($lccn);
393         my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn");
394         $sth->execute;
395         my $biblionumber=0;
396         my $biblioitemnumber=0;
397         if ($sth->rows) {
398             ($biblionumber, $biblioitemnumber) = $sth->fetchrow;
399             my $title=$title;
400 #title already in the database
401         } else {
402             my $q_title=$dbh->quote("$title");
403             my $q_subtitle=$dbh->quote("$subtitle");
404             my $q_author=$dbh->quote($author);
405             my $q_copyrightdate=$dbh->quote($copyrightdate);
406             my $q_seriestitle=$dbh->quote($seriestitle);
407             $sth=$dbh->prepare("select biblionumber from biblio where title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and seriestitle=$q_seriestitle");
408             $sth->execute;
409             if ($sth->rows) {
410                 ($biblionumber) = $sth->fetchrow;
411 #title already in the database
412             } else {
413                 $sth=$dbh->prepare("select max(biblionumber) from biblio");
414                 $sth->execute;
415                 ($biblionumber) = $sth->fetchrow;
416                 $biblionumber++;
417                 my $q_notes=$dbh->quote($note);
418                 $sth=$dbh->prepare("insert into biblio (biblionumber, title, author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, $q_author, $q_copyrightdate, $q_seriestitle, $q_notes)");
419                 $sth->execute;
420                 $sth=$dbh->prepare("insert into bibliosubtitle values ($q_subtitle, $biblionumber)");
421                 $sth->execute;
422             }
423             $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems");
424             $sth->execute;
425             ($biblioitemnumber) = $sth->fetchrow;
426             $biblioitemnumber++;
427             my $q_isbn=$dbh->quote($isbn);
428             my $q_issn=$dbh->quote($issn);
429             my $q_lccn=$dbh->quote($lccn);
430             my $q_volume=$dbh->quote($volume);
431             my $q_number=$dbh->quote($number);
432             my $q_itemtype=$dbh->quote($itemtype);
433             my $q_dewey=$dbh->quote($dewey);
434             $cleanauthor=$author;
435             $cleanauthor=~s/[^A-Za-z]//g;
436             $subclass=uc(substr($cleanauthor,0,3));
437             my $q_subclass=$dbh->quote($subclass);
438             my $q_publicationyear=$dbh->quote($publicationyear);
439             my $q_publishercode=$dbh->quote($publishercode);
440             my $q_volumedate=$dbh->quote($volumedate);
441             my $q_volumeddesc=$dbh->quote($volumeddesc);
442             my $q_illus=$dbh->quote($illustrator);
443             my $q_pages=$dbh->quote($pages);
444             my $q_notes=$dbh->quote($note);
445             ($q_notes) || ($q_notes="''");
446             my $q_size=$dbh->quote($size);
447             my $q_place=$dbh->quote($place);
448             my $q_marc=$dbh->quote($marc);
449
450             $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, $q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, $q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, $q_pages, $q_size, $q_place, $q_lccn, $q_marc)");
451             $sth->execute;
452             my $subjectheading;
453             foreach $subjectheading (@subjects) {
454                 # convert to upper case
455                 $subjectheading=uc($subjectheading);
456                 # quote value
457                 my $q_subjectheading=$dbh->quote($subjectheading);
458                 $sth=$dbh->prepare("insert into bibliosubject (biblionumber,subject)
459                     values ($biblionumber, $q_subjectheading)");
460                 $sth->execute;
461             }
462             my @additionalauthors=split(/\n/,$additionalauthors);
463             my $additionalauthor;
464             foreach $additionalauthor (@additionalauthors) {
465                 # remove any line ending characters (Ctrl-L or Ctrl-M)
466                 $additionalauthor=~s/\013//g;
467                 $additionalauthor=~s/\010//g;
468                 # convert to upper case
469                 $additionalauthor=uc($additionalauthor);
470                 # quote value
471                 my $q_additionalauthor=$dbh->quote($additionalauthor);
472                 $sth=$dbh->prepare("insert into additionalauthors (biblionumber,author) values ($biblionumber, $q_additionalauthor)");
473                 $sth->execute;
474             }
475         }
476         my $q_barcode=$dbh->quote($barcode);
477         my $q_homebranch="'$branchname'";
478         my $q_notes="''";
479         #my $replacementprice=0;
480         my $sth=$dbh->prepare("select max(itemnumber) from items");
481         $sth->execute;
482         my ($itemnumber) = $sth->fetchrow;
483         $itemnumber++;
484         my @datearr=localtime(time);
485         my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3];
486 BARCODE:
487         foreach $barcode (@barcodes) {
488             my $q_barcode=$dbh->quote($barcode);
489             my $sti=$dbh->prepare("select barcode from items where barcode=$q_barcode");
490             $sti->execute;
491             if ($sti->rows) {
492                 print "Skipping $barcode\n";
493                 next BARCODE;
494             }
495             $replacementprice=~s/^p//;
496             ($replacementprice) || ($replacementprice=0);
497             $replacementprice=~s/\$//;
498             $task="insert into items (itemnumber, biblionumber, biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, $biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, '$branchname', '$date', $replacementprice)";
499             $sth=$dbh->prepare($task);
500             print "$task\n";
501             $sth->execute;
502         }
503     }
504 }
505 $dbh->disconnect;