acqui.simple/bulkmarcimport.pl

   1 #!/usr/bin/perl
   2 #
   3 # Tool for importing bulk marc records
   4 #
   5 # WARNING!!
   6 #
   7 # Do not use this script on a production system, it is still in development
   8 #
   9 #
  10
  11
  12
  13
  14
  15 # Copyright 2000-2002 Katipo Communications
  16 #
  17 # This file is part of Koha.
  18 #
  19 # Koha is free software; you can redistribute it and/or modify it under the
  20 # terms of the GNU General Public License as published by the Free Software
  21 # Foundation; either version 2 of the License, or (at your option) any later
  22 # version.
  23 #
  24 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  25 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  26 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  27 #
  28 # You should have received a copy of the GNU General Public License along with
  29 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  30 # Suite 330, Boston, MA  02111-1307 USA
  31
  32 $file=$ARGV[0];
  33
  34 $branchname='MAIN';
  35
  36 unless ($file) {
  37     print "USAGE: ./bulkmarcimport.pl filename\n";
  38     exit;
  39 }
  40
  41
  42
  43
  44 my $lc1='#dddddd';
  45 my $lc2='#ddaaaa';
  46
  47
  48 use C4::Context;
  49 use CGI;
  50 use DBI;
  51 #use strict;
  52 use C4::Catalogue;
  53 use C4::Biblio;
  54 use C4::Output;
  55 my $dbh = C4::Context->dbh;
  56 my $userid=$ENV{'REMOTE_USER'};
  57 %tagtext = (
  58     '001' => 'Control number',
  59     '003' => 'Control number identifier',
  60     '005' => 'Date and time of latest transaction',
  61     '006' => 'Fixed-length data elements -- additional material characteristics',
  62     '007' => 'Physical description fixed field',
  63     '008' => 'Fixed length data elements',
  64     '010' => 'LCCN',
  65     '015' => 'LCCN Cdn',
  66     '020' => 'ISBN',
  67     '022' => 'ISSN',
  68     '037' => 'Source of acquisition',
  69     '040' => 'Cataloging source',
  70     '041' => 'Language code',
  71     '043' => 'Geographic area code',
  72     '050' => 'Library of Congress call number',
  73     '060' => 'National Library of Medicine call number',
  74     '082' => 'Dewey decimal call number',
  75     '100' => 'Main entry -- Personal name',
  76     '110' => 'Main entry -- Corporate name',
  77     '130' => 'Main entry -- Uniform title',
  78     '240' => 'Uniform title',
  79     '245' => 'Title statement',
  80     '246' => 'Varying form of title',
  81     '250' => 'Edition statement',
  82     '256' => 'Computer file characteristics',
  83     '260' => 'Publication, distribution, etc.',
  84     '263' => 'Projected publication date',
  85     '300' => 'Physical description',
  86     '306' => 'Playing time',
  87     '440' => 'Series statement / Added entry -- Title',
  88     '490' => 'Series statement',
  89     '500' => 'General note',
  90     '504' => 'Bibliography, etc. note',
  91     '505' => 'Formatted contents note',
  92     '508' => 'Creation/production credits note',
  93     '510' => 'Citation/references note',
  94     '511' => 'Participant or performer note',
  95     '520' => 'Summary, etc. note',
  96     '521' => 'Target audience note (ie age)',
  97     '530' => 'Additional physical form available note',
  98     '538' => 'System details note',
  99     '586' => 'Awards note',
 100     '600' => 'Subject added entry -- Personal name',
 101     '610' => 'Subject added entry -- Corporate name',
 102     '650' => 'Subject added entry -- Topical term',
 103     '651' => 'Subject added entry -- Geographic name',
 104     '656' => 'Index term -- Occupation',
 105     '700' => 'Added entry -- Personal name',
 106     '710' => 'Added entry -- Corporate name',
 107     '730' => 'Added entry -- Uniform title',
 108     '740' => 'Added entry -- Uncontrolled related/analytical title',
 109     '800' => 'Series added entry -- Personal name',
 110     '830' => 'Series added entry -- Uniform title',
 111     '852' => 'Location',
 112     '856' => 'Electronic location and access',
 113 );
 114
 115
 116 if ($file) {
 117     open (F, "$file");
 118     my $data=<F>;
 119     close F;
 120     $splitchar=chr(29);
 121
 122
 123 # Cycle through all of the records in the file
 124
 125
 126 RECORD:
 127     foreach $record (split(/$splitchar/, $data)) {
 128         $leader=substr($record,0,24);
 129         print "\n\n---------------------------------------------------------------------------\n";
 130         print "Leader: $leader\n";
 131         $record=substr($record,24);
 132         $splitchar2=chr(30);
 133         my $directory=0;
 134         my $tagcounter=0;
 135         my %tag;
 136         my @record;
 137         my %record;
 138         foreach $field (split(/$splitchar2/, $record)) {
 139             my %field;
 140             unless ($directory) {
 141                 # Parse the MARC directory and store the cotents in the %tag hash
 142                 $directory=$field;
 143                 my $itemcounter=1;
 144                 $counter=0;
 145                 while ($item=substr($directory,0,12)) { # FIXME - $item never used
 146                     $tag=substr($directory,0,3);
 147                     $length=substr($directory,3,4);     # FIXME - Unused
 148                     $start=substr($directory,7,6);      # FIXME - Unused
 149                     $directory=substr($directory,12);
 150                     $tag{$counter}=$tag;
 151                     $counter++;
 152                 }
 153                 $directory=1;
 154                 next;
 155             }
 156             $tag=$tag{$tagcounter};
 157             $tagcounter++;
 158             $field{'tag'}=$tag;
 159             printf "%4s %-40s ",$tag, $tagtext{$tag};
 160             $splitchar3=chr(31);
 161             my @subfields=split(/$splitchar3/, $field);
 162             $indicator=$subfields[0];
 163             $field{'indicator'}=$indicator;
 164             my $firstline=1;
 165             if ($#subfields==0) {
 166                 print "$indicator\n";
 167             } else {
 168                 print "\n";
 169                 my %subfields;
 170                 for ($i=1; $i<=$#subfields; $i++) {
 171                     my $text=$subfields[$i];
 172                     my $subfieldcode=substr($text,0,1);
 173                     my $subfield=substr($text,1);
 174                     print "   $subfieldcode $subfield\n";
 175                     if ($subfields{$subfieldcode}) {
 176                         my $subfieldlist=$subfields{$subfieldcode};
 177                         my @subfieldlist=@$subfieldlist;
 178                         if ($#subfieldlist>=0) {
 179                             push (@subfieldlist, $subfield);
 180                         } else {
 181                             @subfieldlist=($subfields{$subfieldcode}, $subfield);
 182                         }
 183                         $subfields{$subfieldcode}=\@subfieldlist;
 184                     } else {
 185                         $subfields{$subfieldcode}=$subfield;
 186                     }
 187                 }
 188                 $field{'subfields'}=\%subfields;
 189             }
 190             if ($record{$tag}) {
 191                 my $fieldlist=$record{$tag};
 192                 if ($fieldlist->{'tag'}) {
 193                     @fieldlist=($fieldlist, \%field);
 194                     $fieldlist=\@fieldlist;
 195                 } else {
 196                     push (@$fieldlist,\%field);
 197                 }
 198                 $record{$tag}=$fieldlist;
 199             } else {
 200                 $record{$tag}=[\%field];
 201             }
 202             push (@record, \%field);
 203         }
 204         $rec=\@record;
 205         $counter++;
 206         my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $additionalauthors, $illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes);
 207         my $marc=$record;
 208         foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) {
 209             # LCCN is stored in field 010 a
 210             if ($field->{'tag'} eq '010') {
 211                 $lccn=$field->{'subfields'}->{'a'};
 212                 $lccn=~s/^\s*//;
 213                 $lccn=~s/cn //;
 214                 $lccn=~s/^\s*//;
 215                 ($lccn) = (split(/\s+/, $lccn))[0];
 216             }
 217             # LCCN is stored in field 015 a
 218             if ($field->{'tag'} eq '015') {
 219                 $lccn=$field->{'subfields'}->{'a'};
 220                 $lccn=~s/^\s*//;
 221                 $lccn=~s/^C//;
 222                 ($lccn) = (split(/\s+/, $lccn))[0];
 223             }
 224             # ISBN is stored in field 020 a
 225             if ($field->{'tag'} eq '020') {
 226                 $isbn=$field->{'subfields'}->{'a'};
 227                 $isbn=~s/^\s*//;
 228                 ($isbn) = (split(/\s+/, $isbn))[0];
 229             }
 230             # ISSN is stored in field 022 a
 231             if ($field->{'tag'} eq '022') {
 232                 $issn=$field->{'subfields'}->{'a'};
 233                 $issn=~s/^\s*//;
 234                 ($issn) = (split(/\s+/, $issn))[0];
 235             }
 236             # Dewey number stored in field 082 a
 237             # If there is more than one dewey number (more than one 'a'
 238             # subfield) I just take the first one
 239             if ($field->{'tag'} eq '082') {
 240                 $dewey=$field->{'subfields'}->{'a'};
 241                 $dewey=~s/\///g;
 242                 if (@$dewey) {
 243                     $dewey=$$dewey[0];
 244                 }
 245             }
 246             # Author is stored in field 100 a
 247             if ($field->{'tag'} eq '100') {
 248                 $author=$field->{'subfields'}->{'a'};
 249             }
 250             # Title is stored in field 245 a
 251             # Subtitle in field 245 b
 252             # Illustrator in field 245 c
 253             if ($field->{'tag'} eq '245') {
 254                 $title=$field->{'subfields'}->{'a'};
 255                 $title=~s/ \/$//;
 256                 $subtitle=$field->{'subfields'}->{'b'};
 257                 $subtitle=~s/ \/$//;
 258                 my $name=$field->{'subfields'}->{'c'};
 259                 if ($name=~/illustrated by]*\s+(.*)/) {
 260                     $illustrator=$1;
 261                 }
 262             }
 263             # Publisher Info in field 260
 264             #   a = place
 265             #   b = publisher
 266             #   c = publication date
 267             #     (also store as copyright date if date starts with a 'c' as in c1995)
 268             if ($field->{'tag'} eq '260') {
 269                 $place=$field->{'subfields'}->{'a'};
 270                 if (@$place) {
 271                     $place=$$place[0];
 272                 }
 273                 $place=~s/\s*:$//g;
 274                 $publisher=$field->{'subfields'}->{'b'};
 275                 if (@$publisher) {
 276                     $publisher=$$publisher[0];
 277                 }
 278                 $publisher=~s/\s*:$//g;
 279                 $publicationyear=$field->{'subfields'}->{'c'};
 280                 if ($publicationyear=~/c(\d\d\d\d)/) {
 281                     $copyrightdate=$1;
 282                 }
 283                 if ($publicationyear=~/[^c](\d\d\d\d)/) {
 284                     $publicationyear=$1;
 285                 } elsif ($copyrightdate) {
 286                     $publicationyear=$copyrightdate;
 287                 } else {
 288                     $publicationyear=~/(\d\d\d\d)/;
 289                     $publicationyear=$1;
 290                 }
 291             }
 292             # Physical Dimensions in field 300
 293             #   a = pages
 294             #   c = size
 295             if ($field->{'tag'} eq '300') {
 296                 $pages=$field->{'subfields'}->{'a'};
 297                 $pages=~s/ \;$//;
 298                 $size=$field->{'subfields'}->{'c'};
 299                 $pages=~s/\s*:$//g;
 300                 $size=~s/\s*:$//g;
 301             }
 302             # Vol/No in field 362 a
 303             if ($field->{'tag'} eq '362') {
 304                 if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) {
 305                     $volume=$1;
 306                     $number=$2;
 307                 }
 308             }
 309             # Series Title in field 440 a
 310             # Vol/No in field 440 v
 311             if ($field->{'tag'} eq '440') {
 312                 $seriestitle=$field->{'subfields'}->{'a'};
 313                 if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) {
 314                     $volume=$1;
 315                     $number=$2;
 316                 }
 317             }
 318             # BARCODES!!!
 319             # 852 p stores barcodes
 320             # 852 h stores dewey field
 321             # 852 9 stores replacement price
 322             #   I check for an itemtype identifier in 852h as well... pb or pbk means PBK
 323             #   also if $dewey is > 0, then I assign JNF, otherwise JF.
 324             #   Note that my libraries are school libraries, so I assume Junior.
 325             if ($field->{'tag'} eq '852') {
 326                 $barcode=$field->{'subfields'}->{'p'};
 327                 push (@barcodes, $barcode);
 328                 my $q_barcode=$dbh->quote($barcode);
 329                 my $deweyfield=$field->{'subfields'}->{'h'};
 330                 $deweyfield=~/^([\d\.]*)/;
 331                 $dewey=$1;
 332                 if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) {
 333                     $itemtype='PBK';
 334                 } elsif ($dewey) {
 335                     $itemtype='JNF';
 336                 } else {
 337                     $itemtype='JF';
 338                 }
 339
 340                 $replacementprice=$field->{'subfields'}->{'9'};
 341             }
 342             # 700 a stores additional authors / illustrator info
 343             # 700 c will contain 'ill' if it's an illustrator
 344             if ($field->{'tag'} eq '700') {
 345                 my $name=$field->{'subfields'}->{'a'};
 346                 if ($field->{'subfields'}->{'c'}=~/ill/) {
 347                     $illustrator=$name;
 348                 } else {
 349                     $additionalauthors.="$name\n";
 350                 }
 351             }
 352             # I concatenate all 5XX a entries as notes
 353             if ($field->{'tag'} =~/^5/) {
 354                 $note.="$field->{'subfields'}->{'a'}\n";
 355             }
 356             # 6XX entries are subject entries
 357             #   Not sure why I'm skipping 691 tags
 358             #   691 a contains the subject.
 359             # I take subfield a, and append entries from subfield x (general
 360             # subdivision) y (Chronological subdivision) and z (geographic
 361             # subdivision)
 362             if ($field->{'tag'} =~/6\d\d/) {
 363                 (next) if ($field->{'tag'} eq '691');
 364                 my $subject=$field->{'subfields'}->{'a'};
 365                 print "SUBJECT: $subject\n";
 366                 $subject=~s/\.$//;
 367                 if ($gensubdivision=$field->{'subfields'}->{'x'}) {
 368                     my @sub=@$gensubdivision;
 369                     if ($#sub>=0) {
 370                         foreach $s (@sub) {
 371                             $s=~s/\.$//;
 372                             $subject.=" -- $s";
 373                         }
 374                     } else {
 375                         $gensubdivision=~s/\.$//;
 376                         $subject.=" -- $gensubdivision";
 377                     }
 378                 }
 379                 if ($chronsubdivision=$field->{'subfields'}->{'y'}) {
 380                     my @sub=@$chronsubdivision;
 381                     if ($#sub>=0) {
 382                         foreach $s (@sub) {
 383                             $s=~s/\.$//;
 384                             $subject.=" -- $s";
 385                         }
 386                     } else {
 387                         $chronsubdivision=~s/\.$//;
 388                         $subject.=" -- $chronsubdivision";
 389                     }
 390                 }
 391                 if ($geosubdivision=$field->{'subfields'}->{'z'}) {
 392                     my @sub=@$geosubdivision;
 393                     if ($#sub>=0) {
 394                         foreach $s (@sub) {
 395                             $s=~s/\.$//;
 396                             $subject.=" -- $s";
 397                         }
 398                     } else {
 399                         $geosubdivision=~s/\.$//;
 400                         $subject.=" -- $geosubdivision";
 401                     }
 402                 }
 403                 push @subjects, $subject;
 404             }
 405         }
 406
 407         my $q_isbn=$dbh->quote($isbn);
 408         my $q_issn=$dbh->quote($issn);
 409         my $q_lccn=$dbh->quote($lccn);
 410         my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn");
 411         $sth->execute;
 412         my $biblionumber=0;
 413         my $biblioitemnumber=0;
 414         if ($sth->rows) {
 415             ($biblionumber, $biblioitemnumber) = $sth->fetchrow;
 416             my $title=$title;
 417 #title already in the database
 418         } else {
 419             my $q_title=$dbh->quote("$title");
 420             my $q_subtitle=$dbh->quote("$subtitle");
 421             my $q_author=$dbh->quote($author);
 422             my $q_copyrightdate=$dbh->quote($copyrightdate);
 423             my $q_seriestitle=$dbh->quote($seriestitle);
 424             $sth=$dbh->prepare("select biblionumber from biblio where title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and seriestitle=$q_seriestitle");
 425             $sth->execute;
 426             if ($sth->rows) {
 427                 ($biblionumber) = $sth->fetchrow;
 428 #title already in the database
 429             } else {
 430                 $sth=$dbh->prepare("select max(biblionumber) from biblio");
 431                 $sth->execute;
 432                 ($biblionumber) = $sth->fetchrow;
 433                 $biblionumber++;
 434                 my $q_notes=$dbh->quote($note);
 435                 $sth=$dbh->prepare("insert into biblio (biblionumber, title, author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, $q_author, $q_copyrightdate, $q_seriestitle, $q_notes)");
 436                 $sth->execute;
 437                 $sth=$dbh->prepare("insert into bibliosubtitle values ($q_subtitle, $biblionumber)");
 438                 $sth->execute;
 439             }
 440             $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems");
 441             $sth->execute;
 442             ($biblioitemnumber) = $sth->fetchrow;
 443             $biblioitemnumber++;
 444             my $q_isbn=$dbh->quote($isbn);
 445             my $q_issn=$dbh->quote($issn);
 446             my $q_lccn=$dbh->quote($lccn);
 447             my $q_volume=$dbh->quote($volume);
 448             my $q_number=$dbh->quote($number);
 449             my $q_itemtype=$dbh->quote($itemtype);
 450             my $q_dewey=$dbh->quote($dewey);
 451             $cleanauthor=$author;
 452             $cleanauthor=~s/[^A-Za-z]//g;
 453             $subclass=uc(substr($cleanauthor,0,3));
 454             my $q_subclass=$dbh->quote($subclass);
 455             my $q_publicationyear=$dbh->quote($publicationyear);
 456             my $q_publishercode=$dbh->quote($publishercode);    # FIXME - $publishercode undefined
 457             my $q_volumedate=$dbh->quote($volumedate);  # FIXME - $volumedate undefined
 458             my $q_volumeddesc=$dbh->quote($volumeddesc);        # FIXME - $volumeddesc undefined
 459             my $q_illus=$dbh->quote($illustrator);
 460             my $q_pages=$dbh->quote($pages);
 461             my $q_notes=$dbh->quote($note);
 462             ($q_notes) || ($q_notes="''");
 463             my $q_size=$dbh->quote($size);
 464             my $q_place=$dbh->quote($place);
 465             my $q_marc=$dbh->quote($marc);
 466
 467             $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, $q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, $q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, $q_pages, $q_size, $q_place, $q_lccn, $q_marc)");
 468             $sth->execute;
 469             my $subjectheading;
 470             foreach $subjectheading (@subjects) {
 471                 # convert to upper case
 472                 $subjectheading=uc($subjectheading);
 473                 # quote value
 474                 my $q_subjectheading=$dbh->quote($subjectheading);
 475                 $sth=$dbh->prepare("insert into bibliosubject (biblionumber,subject)
 476                     values ($biblionumber, $q_subjectheading)");
 477                 $sth->execute;
 478             }
 479             my @additionalauthors=split(/\n/,$additionalauthors);
 480             my $additionalauthor;
 481             foreach $additionalauthor (@additionalauthors) {
 482                 # remove any line ending characters (Ctrl-L or Ctrl-M)
 483                 $additionalauthor=~s/\013//g;
 484                 $additionalauthor=~s/\010//g;
 485                 # convert to upper case
 486                 $additionalauthor=uc($additionalauthor);
 487                 # quote value
 488                 my $q_additionalauthor=$dbh->quote($additionalauthor);
 489                 $sth=$dbh->prepare("insert into additionalauthors (biblionumber,author) values ($biblionumber, $q_additionalauthor)");
 490                 $sth->execute;
 491             }
 492         }
 493         my $q_barcode=$dbh->quote($barcode);
 494         my $q_homebranch="'$branchname'";
 495         my $q_notes="''";
 496         #my $replacementprice=0;
 497         # FIXME - There's already a $sth in this scope.
 498         my $sth=$dbh->prepare("select max(itemnumber) from items");
 499         $sth->execute;
 500         my ($itemnumber) = $sth->fetchrow;
 501         $itemnumber++;
 502         my @datearr=localtime(time);
 503         my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3];
 504 BARCODE:
 505         foreach $barcode (@barcodes) {
 506             my $q_barcode=$dbh->quote($barcode);
 507             my $sti=$dbh->prepare("select barcode from items where barcode=$q_barcode");
 508             $sti->execute;
 509             if ($sti->rows) {
 510                 print "Skipping $barcode\n";
 511                 next BARCODE;
 512             }
 513             $replacementprice=~s/^p//;
 514             ($replacementprice) || ($replacementprice=0);
 515             $replacementprice=~s/\$//;
 516             $task="insert into items (itemnumber, biblionumber, biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, $biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, '$branchname', '$date', $replacementprice)";
 517             $sth=$dbh->prepare($task);
 518             print "$task\n";
 519             $sth->execute;
 520         }
 521     }
 522 }