Added POD.
[koha.git] / acqui.simple / bulkmarcimport.pl
1 #!/usr/bin/perl
2 #
3 # Tool for importing bulk marc records
4 #
5 # WARNING!!
6 #
7 # Do not use this script on a production system, it is still in development
8 #
9 #
10
11
12
13
14
15 # Copyright 2000-2002 Katipo Communications
16 #
17 # This file is part of Koha.
18 #
19 # Koha is free software; you can redistribute it and/or modify it under the
20 # terms of the GNU General Public License as published by the Free Software
21 # Foundation; either version 2 of the License, or (at your option) any later
22 # version.
23 #
24 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
25 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
26 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
27 #
28 # You should have received a copy of the GNU General Public License along with
29 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
30 # Suite 330, Boston, MA  02111-1307 USA
31
32 $file=$ARGV[0];
33
34 $branchname='MAIN';
35
36 unless ($file) {
37     print "USAGE: ./bulkmarcimport.pl filename\n";
38     exit;
39 }
40
41
42
43
44 my $lc1='#dddddd';
45 my $lc2='#ddaaaa';
46
47
48 use C4::Database;
49 use CGI;
50 use DBI;
51 #use strict;
52 use C4::Catalogue;
53 use C4::Biblio;
54 use C4::Output;
55 my $dbh=C4Connect;
56 my $userid=$ENV{'REMOTE_USER'};
57 %tagtext = (
58     '001' => 'Control number',
59     '003' => 'Control number identifier',
60     '005' => 'Date and time of latest transaction',
61     '006' => 'Fixed-length data elements -- additional material characteristics',
62     '007' => 'Physical description fixed field',
63     '008' => 'Fixed length data elements',
64     '010' => 'LCCN',
65     '015' => 'LCCN Cdn',
66     '020' => 'ISBN',
67     '022' => 'ISSN',
68     '037' => 'Source of acquisition',
69     '040' => 'Cataloging source',
70     '041' => 'Language code',
71     '043' => 'Geographic area code',
72     '050' => 'Library of Congress call number',
73     '060' => 'National Library of Medicine call number',
74     '082' => 'Dewey decimal call number',
75     '100' => 'Main entry -- Personal name',
76     '110' => 'Main entry -- Corporate name',
77     '130' => 'Main entry -- Uniform title',
78     '240' => 'Uniform title',
79     '245' => 'Title statement',
80     '246' => 'Varying form of title',
81     '250' => 'Edition statement',
82     '256' => 'Computer file characteristics',
83     '260' => 'Publication, distribution, etc.',
84     '263' => 'Projected publication date',
85     '300' => 'Physical description',
86     '306' => 'Playing time',
87     '440' => 'Series statement / Added entry -- Title',
88     '490' => 'Series statement',
89     '500' => 'General note',
90     '504' => 'Bibliography, etc. note',
91     '505' => 'Formatted contents note',
92     '508' => 'Creation/production credits note',
93     '510' => 'Citation/references note',
94     '511' => 'Participant or performer note',
95     '520' => 'Summary, etc. note',
96     '521' => 'Target audience note (ie age)',
97     '530' => 'Additional physical form available note',
98     '538' => 'System details note',
99     '586' => 'Awards note',
100     '600' => 'Subject added entry -- Personal name',
101     '610' => 'Subject added entry -- Corporate name',
102     '650' => 'Subject added entry -- Topical term',
103     '651' => 'Subject added entry -- Geographic name',
104     '656' => 'Index term -- Occupation',
105     '700' => 'Added entry -- Personal name',
106     '710' => 'Added entry -- Corporate name',
107     '730' => 'Added entry -- Uniform title',
108     '740' => 'Added entry -- Uncontrolled related/analytical title',
109     '800' => 'Series added entry -- Personal name',
110     '830' => 'Series added entry -- Uniform title',
111     '852' => 'Location',
112     '856' => 'Electronic location and access',
113 );
114
115
116 my $dbh=C4Connect;
117 if ($file) {
118     open (F, "$file");
119     my $data=<F>;
120     close F;
121     $splitchar=chr(29);
122
123
124 # Cycle through all of the records in the file
125
126
127 RECORD:
128     foreach $record (split(/$splitchar/, $data)) {
129         $leader=substr($record,0,24);
130         print "\n\n---------------------------------------------------------------------------\n";
131         print "Leader: $leader\n";
132         $record=substr($record,24);
133         $splitchar2=chr(30);
134         my $directory=0;
135         my $tagcounter=0;
136         my %tag;
137         my @record;
138         my %record;
139         foreach $field (split(/$splitchar2/, $record)) {
140             my %field;
141             unless ($directory) {
142                 # Parse the MARC directory and store the cotents in the %tag hash
143                 $directory=$field;
144                 my $itemcounter=1;
145                 $counter=0;
146                 while ($item=substr($directory,0,12)) {
147                     $tag=substr($directory,0,3);
148                     $length=substr($directory,3,4);
149                     $start=substr($directory,7,6);
150                     $directory=substr($directory,12);
151                     $tag{$counter}=$tag;
152                     $counter++;
153                 }
154                 $directory=1;
155                 next;
156             }
157             $tag=$tag{$tagcounter};
158             $tagcounter++;
159             $field{'tag'}=$tag;
160             printf "%4s %-40s ",$tag, $tagtext{$tag};
161             $splitchar3=chr(31);
162             my @subfields=split(/$splitchar3/, $field);
163             $indicator=$subfields[0];
164             $field{'indicator'}=$indicator;
165             my $firstline=1;
166             if ($#subfields==0) {
167                 print "$indicator\n";
168             } else {
169                 print "\n";
170                 my %subfields;
171                 for ($i=1; $i<=$#subfields; $i++) {
172                     my $text=$subfields[$i];
173                     my $subfieldcode=substr($text,0,1);
174                     my $subfield=substr($text,1);
175                     print "   $subfieldcode $subfield\n";
176                     if ($subfields{$subfieldcode}) {
177                         my $subfieldlist=$subfields{$subfieldcode};
178                         my @subfieldlist=@$subfieldlist;
179                         if ($#subfieldlist>=0) {
180                             push (@subfieldlist, $subfield);
181                         } else {
182                             @subfieldlist=($subfields{$subfieldcode}, $subfield);
183                         }
184                         $subfields{$subfieldcode}=\@subfieldlist;
185                     } else {
186                         $subfields{$subfieldcode}=$subfield;
187                     }
188                 }
189                 $field{'subfields'}=\%subfields;
190             }
191             if ($record{$tag}) {
192                 my $fieldlist=$record{$tag};
193                 if ($fieldlist->{'tag'}) {
194                     @fieldlist=($fieldlist, \%field);
195                     $fieldlist=\@fieldlist;
196                 } else {
197                     push (@$fieldlist,\%field);
198                 }
199                 $record{$tag}=$fieldlist;
200             } else {
201                 $record{$tag}=[\%field];
202             }
203             push (@record, \%field);
204         }
205         $rec=\@record;
206         $counter++;
207         my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $additionalauthors, $illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes);
208         my $marc=$record;
209         foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) {
210             # LCCN is stored in field 010 a
211             if ($field->{'tag'} eq '010') {
212                 $lccn=$field->{'subfields'}->{'a'};
213                 $lccn=~s/^\s*//;
214                 $lccn=~s/cn //;
215                 $lccn=~s/^\s*//;
216                 ($lccn) = (split(/\s+/, $lccn))[0];
217             }
218             # LCCN is stored in field 015 a
219             if ($field->{'tag'} eq '015') {
220                 $lccn=$field->{'subfields'}->{'a'};
221                 $lccn=~s/^\s*//;
222                 $lccn=~s/^C//;
223                 ($lccn) = (split(/\s+/, $lccn))[0];
224             }
225             # ISBN is stored in field 020 a
226             if ($field->{'tag'} eq '020') {
227                 $isbn=$field->{'subfields'}->{'a'};
228                 $isbn=~s/^\s*//;
229                 ($isbn) = (split(/\s+/, $isbn))[0];
230             }
231             # ISSN is stored in field 022 a
232             if ($field->{'tag'} eq '022') {
233                 $issn=$field->{'subfields'}->{'a'};
234                 $issn=~s/^\s*//;
235                 ($issn) = (split(/\s+/, $issn))[0];
236             }
237             # Dewey number stored in field 082 a
238             # If there is more than one dewey number (more than one 'a'
239             # subfield) I just take the first one
240             if ($field->{'tag'} eq '082') {
241                 $dewey=$field->{'subfields'}->{'a'};
242                 $dewey=~s/\///g;
243                 if (@$dewey) {
244                     $dewey=$$dewey[0];
245                 }
246             }
247             # Author is stored in field 100 a
248             if ($field->{'tag'} eq '100') {
249                 $author=$field->{'subfields'}->{'a'};
250             }
251             # Title is stored in field 245 a
252             # Subtitle in field 245 b
253             # Illustrator in field 245 c
254             if ($field->{'tag'} eq '245') {
255                 $title=$field->{'subfields'}->{'a'};
256                 $title=~s/ \/$//;
257                 $subtitle=$field->{'subfields'}->{'b'};
258                 $subtitle=~s/ \/$//;
259                 my $name=$field->{'subfields'}->{'c'};
260                 if ($name=~/illustrated by]*\s+(.*)/) {
261                     $illustrator=$1;
262                 }
263             }
264             # Publisher Info in field 260
265             #   a = place
266             #   b = publisher
267             #   c = publication date
268             #     (also store as copyright date if date starts with a 'c' as in c1995)
269             if ($field->{'tag'} eq '260') {
270                 $place=$field->{'subfields'}->{'a'};
271                 if (@$place) {
272                     $place=$$place[0];
273                 }
274                 $place=~s/\s*:$//g;
275                 $publisher=$field->{'subfields'}->{'b'};
276                 if (@$publisher) {
277                     $publisher=$$publisher[0];
278                 }
279                 $publisher=~s/\s*:$//g;
280                 $publicationyear=$field->{'subfields'}->{'c'};
281                 if ($publicationyear=~/c(\d\d\d\d)/) {
282                     $copyrightdate=$1;
283                 }
284                 if ($publicationyear=~/[^c](\d\d\d\d)/) {
285                     $publicationyear=$1;
286                 } elsif ($copyrightdate) {
287                     $publicationyear=$copyrightdate;
288                 } else {
289                     $publicationyear=~/(\d\d\d\d)/;
290                     $publicationyear=$1;
291                 }
292             }
293             # Physical Dimensions in field 300
294             #   a = pages
295             #   c = size
296             if ($field->{'tag'} eq '300') {
297                 $pages=$field->{'subfields'}->{'a'};
298                 $pages=~s/ \;$//;
299                 $size=$field->{'subfields'}->{'c'};
300                 $pages=~s/\s*:$//g;
301                 $size=~s/\s*:$//g;
302             }
303             # Vol/No in field 362 a
304             if ($field->{'tag'} eq '362') {
305                 if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) {
306                     $volume=$1;
307                     $number=$2;
308                 }
309             }
310             # Series Title in field 440 a
311             # Vol/No in field 440 v
312             if ($field->{'tag'} eq '440') {
313                 $seriestitle=$field->{'subfields'}->{'a'};
314                 if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) {
315                     $volume=$1;
316                     $number=$2;
317                 }
318             }
319             # BARCODES!!!
320             # 852 p stores barcodes
321             # 852 h stores dewey field
322             # 852 9 stores replacement price
323             #   I check for an itemtype identifier in 852h as well... pb or pbk means PBK
324             #   also if $dewey is > 0, then I assign JNF, otherwise JF.
325             #   Note that my libraries are school libraries, so I assume Junior.
326             if ($field->{'tag'} eq '852') {
327                 $barcode=$field->{'subfields'}->{'p'};
328                 push (@barcodes, $barcode);
329                 my $q_barcode=$dbh->quote($barcode);
330                 my $deweyfield=$field->{'subfields'}->{'h'};
331                 $deweyfield=~/^([\d\.]*)/;
332                 $dewey=$1;
333                 if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) {
334                     $itemtype='PBK';
335                 } elsif ($dewey) {
336                     $itemtype='JNF';
337                 } else {
338                     $itemtype='JF';
339                 }
340
341                 $replacementprice=$field->{'subfields'}->{'9'};
342             }
343             # 700 a stores additional authors / illustrator info
344             # 700 c will contain 'ill' if it's an illustrator
345             if ($field->{'tag'} eq '700') {
346                 my $name=$field->{'subfields'}->{'a'};
347                 if ($field->{'subfields'}->{'c'}=~/ill/) {
348                     $illustrator=$name;
349                 } else {
350                     $additionalauthors.="$name\n";
351                 }
352             }
353             # I concatenate all 5XX a entries as notes
354             if ($field->{'tag'} =~/^5/) {
355                 $note.="$field->{'subfields'}->{'a'}\n";
356             }
357             # 6XX entries are subject entries
358             #   Not sure why I'm skipping 691 tags
359             #   691 a contains the subject.
360             # I take subfield a, and append entries from subfield x (general
361             # subdivision) y (Chronological subdivision) and z (geographic
362             # subdivision)
363             if ($field->{'tag'} =~/6\d\d/) {
364                 (next) if ($field->{'tag'} eq '691');
365                 my $subject=$field->{'subfields'}->{'a'};
366                 print "SUBJECT: $subject\n";
367                 $subject=~s/\.$//;
368                 if ($gensubdivision=$field->{'subfields'}->{'x'}) {
369                     my @sub=@$gensubdivision;
370                     if ($#sub>=0) {
371                         foreach $s (@sub) {
372                             $s=~s/\.$//;
373                             $subject.=" -- $s";
374                         }
375                     } else {
376                         $gensubdivision=~s/\.$//;
377                         $subject.=" -- $gensubdivision";
378                     }
379                 }
380                 if ($chronsubdivision=$field->{'subfields'}->{'y'}) {
381                     my @sub=@$chronsubdivision;
382                     if ($#sub>=0) {
383                         foreach $s (@sub) {
384                             $s=~s/\.$//;
385                             $subject.=" -- $s";
386                         }
387                     } else {
388                         $chronsubdivision=~s/\.$//;
389                         $subject.=" -- $chronsubdivision";
390                     }
391                 }
392                 if ($geosubdivision=$field->{'subfields'}->{'z'}) {
393                     my @sub=@$geosubdivision;
394                     if ($#sub>=0) {
395                         foreach $s (@sub) {
396                             $s=~s/\.$//;
397                             $subject.=" -- $s";
398                         }
399                     } else {
400                         $geosubdivision=~s/\.$//;
401                         $subject.=" -- $geosubdivision";
402                     }
403                 }
404                 push @subjects, $subject;
405             }
406         }
407
408         my $q_isbn=$dbh->quote($isbn);
409         my $q_issn=$dbh->quote($issn);
410         my $q_lccn=$dbh->quote($lccn);
411         my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn");
412         $sth->execute;
413         my $biblionumber=0;
414         my $biblioitemnumber=0;
415         if ($sth->rows) {
416             ($biblionumber, $biblioitemnumber) = $sth->fetchrow;
417             my $title=$title;
418 #title already in the database
419         } else {
420             my $q_title=$dbh->quote("$title");
421             my $q_subtitle=$dbh->quote("$subtitle");
422             my $q_author=$dbh->quote($author);
423             my $q_copyrightdate=$dbh->quote($copyrightdate);
424             my $q_seriestitle=$dbh->quote($seriestitle);
425             $sth=$dbh->prepare("select biblionumber from biblio where title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and seriestitle=$q_seriestitle");
426             $sth->execute;
427             if ($sth->rows) {
428                 ($biblionumber) = $sth->fetchrow;
429 #title already in the database
430             } else {
431                 $sth=$dbh->prepare("select max(biblionumber) from biblio");
432                 $sth->execute;
433                 ($biblionumber) = $sth->fetchrow;
434                 $biblionumber++;
435                 my $q_notes=$dbh->quote($note);
436                 $sth=$dbh->prepare("insert into biblio (biblionumber, title, author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, $q_author, $q_copyrightdate, $q_seriestitle, $q_notes)");
437                 $sth->execute;
438                 $sth=$dbh->prepare("insert into bibliosubtitle values ($q_subtitle, $biblionumber)");
439                 $sth->execute;
440             }
441             $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems");
442             $sth->execute;
443             ($biblioitemnumber) = $sth->fetchrow;
444             $biblioitemnumber++;
445             my $q_isbn=$dbh->quote($isbn);
446             my $q_issn=$dbh->quote($issn);
447             my $q_lccn=$dbh->quote($lccn);
448             my $q_volume=$dbh->quote($volume);
449             my $q_number=$dbh->quote($number);
450             my $q_itemtype=$dbh->quote($itemtype);
451             my $q_dewey=$dbh->quote($dewey);
452             $cleanauthor=$author;
453             $cleanauthor=~s/[^A-Za-z]//g;
454             $subclass=uc(substr($cleanauthor,0,3));
455             my $q_subclass=$dbh->quote($subclass);
456             my $q_publicationyear=$dbh->quote($publicationyear);
457             my $q_publishercode=$dbh->quote($publishercode);
458             my $q_volumedate=$dbh->quote($volumedate);
459             my $q_volumeddesc=$dbh->quote($volumeddesc);
460             my $q_illus=$dbh->quote($illustrator);
461             my $q_pages=$dbh->quote($pages);
462             my $q_notes=$dbh->quote($note);
463             ($q_notes) || ($q_notes="''");
464             my $q_size=$dbh->quote($size);
465             my $q_place=$dbh->quote($place);
466             my $q_marc=$dbh->quote($marc);
467
468             $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, $q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, $q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, $q_pages, $q_size, $q_place, $q_lccn, $q_marc)");
469             $sth->execute;
470             my $subjectheading;
471             foreach $subjectheading (@subjects) {
472                 # convert to upper case
473                 $subjectheading=uc($subjectheading);
474                 # quote value
475                 my $q_subjectheading=$dbh->quote($subjectheading);
476                 $sth=$dbh->prepare("insert into bibliosubject (biblionumber,subject)
477                     values ($biblionumber, $q_subjectheading)");
478                 $sth->execute;
479             }
480             my @additionalauthors=split(/\n/,$additionalauthors);
481             my $additionalauthor;
482             foreach $additionalauthor (@additionalauthors) {
483                 # remove any line ending characters (Ctrl-L or Ctrl-M)
484                 $additionalauthor=~s/\013//g;
485                 $additionalauthor=~s/\010//g;
486                 # convert to upper case
487                 $additionalauthor=uc($additionalauthor);
488                 # quote value
489                 my $q_additionalauthor=$dbh->quote($additionalauthor);
490                 $sth=$dbh->prepare("insert into additionalauthors (biblionumber,author) values ($biblionumber, $q_additionalauthor)");
491                 $sth->execute;
492             }
493         }
494         my $q_barcode=$dbh->quote($barcode);
495         my $q_homebranch="'$branchname'";
496         my $q_notes="''";
497         #my $replacementprice=0;
498         my $sth=$dbh->prepare("select max(itemnumber) from items");
499         $sth->execute;
500         my ($itemnumber) = $sth->fetchrow;
501         $itemnumber++;
502         my @datearr=localtime(time);
503         my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3];
504 BARCODE:
505         foreach $barcode (@barcodes) {
506             my $q_barcode=$dbh->quote($barcode);
507             my $sti=$dbh->prepare("select barcode from items where barcode=$q_barcode");
508             $sti->execute;
509             if ($sti->rows) {
510                 print "Skipping $barcode\n";
511                 next BARCODE;
512             }
513             $replacementprice=~s/^p//;
514             ($replacementprice) || ($replacementprice=0);
515             $replacementprice=~s/\$//;
516             $task="insert into items (itemnumber, biblionumber, biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, $biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, '$branchname', '$date', $replacementprice)";
517             $sth=$dbh->prepare($task);
518             print "$task\n";
519             $sth->execute;
520         }
521     }
522 }
523 $dbh->disconnect;