#!/usr/bin/perl # # Tool for importing bulk marc records # # WARNING!! # # Do not use this script on a production system, it is still in development # # $file=$ARGV[0]; unless ($file) { print "USAGE: ./bulkmarcimport.pl filename\n"; exit; } my $lc1='#dddddd'; my $lc2='#ddaaaa'; use C4::Database; use CGI; use DBI; #use strict; use C4::Acquisitions; use C4::Output; my $dbh=C4Connect; my $userid=$ENV{'REMOTE_USER'}; %tagtext = ( '001' => 'Control number', '003' => 'Control number identifier', '005' => 'Date and time of latest transaction', '006' => 'Fixed-length data elements -- additional material characteristics', '007' => 'Physical description fixed field', '008' => 'Fixed length data elements', '010' => 'LCCN', '015' => 'LCCN Cdn', '020' => 'ISBN', '022' => 'ISSN', '037' => 'Source of acquisition', '040' => 'Cataloging source', '041' => 'Language code', '043' => 'Geographic area code', '050' => 'Library of Congress call number', '060' => 'National Library of Medicine call number', '082' => 'Dewey decimal call number', '100' => 'Main entry -- Personal name', '110' => 'Main entry -- Corporate name', '130' => 'Main entry -- Uniform title', '240' => 'Uniform title', '245' => 'Title statement', '246' => 'Varying form of title', '250' => 'Edition statement', '256' => 'Computer file characteristics', '260' => 'Publication, distribution, etc.', '263' => 'Projected publication date', '300' => 'Physical description', '306' => 'Playing time', '440' => 'Series statement / Added entry -- Title', '490' => 'Series statement', '500' => 'General note', '504' => 'Bibliography, etc. note', '505' => 'Formatted contents note', '508' => 'Creation/production credits note', '510' => 'Citation/references note', '511' => 'Participant or performer note', '520' => 'Summary, etc. note', '521' => 'Target audience note (ie age)', '530' => 'Additional physical form available note', '538' => 'System details note', '586' => 'Awards note', '600' => 'Subject added entry -- Personal name', '610' => 'Subject added entry -- Corporate name', '650' => 'Subject added entry -- Topical term', '651' => 'Subject added entry -- Geographic name', '656' => 'Index term -- Occupation', '700' => 'Added entry -- Personal name', '710' => 'Added entry -- Corporate name', '730' => 'Added entry -- Uniform title', '740' => 'Added entry -- Uncontrolled related/analytical title', '800' => 'Series added entry -- Personal name', '830' => 'Series added entry -- Uniform title', '852' => 'Location', '856' => 'Electronic location and access', ); my $dbh=C4Connect; if ($file) { open (F, "$file"); my $data=; close F; $splitchar=chr(29); # Cycle through all of the records in the file RECORD: foreach $record (split(/$splitchar/, $data)) { $leader=substr($record,0,24); print "\n\n---------------------------------------------------------------------------\n"; print "Leader: $leader\n"; $record=substr($record,24); $splitchar2=chr(30); my $directory=0; my $tagcounter=0; my %tag; my @record; my %record; foreach $field (split(/$splitchar2/, $record)) { my %field; unless ($directory) { # Parse the MARC directory and store the cotents in the %tag hash $directory=$field; my $itemcounter=1; $counter=0; while ($item=substr($directory,0,12)) { $tag=substr($directory,0,3); $length=substr($directory,3,4); $start=substr($directory,7,6); $directory=substr($directory,12); $tag{$counter}=$tag; $counter++; } $directory=1; next; } $tag=$tag{$tagcounter}; $tagcounter++; $field{'tag'}=$tag; printf "%4s %-40s ",$tag, $tagtext{$tag}; $splitchar3=chr(31); my @subfields=split(/$splitchar3/, $field); $indicator=$subfields[0]; $field{'indicator'}=$indicator; my $firstline=1; if ($#subfields==0) { print "$indicator\n"; } else { print "\n"; my %subfields; for ($i=1; $i<=$#subfields; $i++) { my $text=$subfields[$i]; my $subfieldcode=substr($text,0,1); my $subfield=substr($text,1); print " $subfieldcode $subfield\n"; if ($subfields{$subfieldcode}) { my $subfieldlist=$subfields{$subfieldcode}; my @subfieldlist=@$subfieldlist; if ($#subfieldlist>=0) { push (@subfieldlist, $subfield); } else { @subfieldlist=($subfields{$subfieldcode}, $subfield); } $subfields{$subfieldcode}=\@subfieldlist; } else { $subfields{$subfieldcode}=$subfield; } } $field{'subfields'}=\%subfields; } if ($record{$tag}) { my $fieldlist=$record{$tag}; if ($fieldlist->{'tag'}) { @fieldlist=($fieldlist, \%field); $fieldlist=\@fieldlist; } else { push (@$fieldlist,\%field); } $record{$tag}=$fieldlist; } else { $record{$tag}=[\%field]; } push (@record, \%field); } $rec=\@record; $counter++; my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $additionalauthors, $illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes); my $marc=$record; foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) { # LCCN is stored in field 010 a if ($field->{'tag'} eq '010') { $lccn=$field->{'subfields'}->{'a'}; $lccn=~s/^\s*//; $lccn=~s/cn //; $lccn=~s/^\s*//; ($lccn) = (split(/\s+/, $lccn))[0]; } # LCCN is stored in field 015 a if ($field->{'tag'} eq '015') { $lccn=$field->{'subfields'}->{'a'}; $lccn=~s/^\s*//; $lccn=~s/^C//; ($lccn) = (split(/\s+/, $lccn))[0]; } # ISBN is stored in field 020 a if ($field->{'tag'} eq '020') { $isbn=$field->{'subfields'}->{'a'}; $isbn=~s/^\s*//; ($isbn) = (split(/\s+/, $isbn))[0]; } # ISSN is stored in field 022 a if ($field->{'tag'} eq '022') { $issn=$field->{'subfields'}->{'a'}; $issn=~s/^\s*//; ($issn) = (split(/\s+/, $issn))[0]; } # Dewey number stored in field 082 a # If there is more than one dewey number (more than one 'a' # subfield) I just take the first one if ($field->{'tag'} eq '082') { $dewey=$field->{'subfields'}->{'a'}; $dewey=~s/\///g; if (@$dewey) { $dewey=$$dewey[0]; } } # Author is stored in field 100 a if ($field->{'tag'} eq '100') { $author=$field->{'subfields'}->{'a'}; } # Title is stored in field 245 a # Subtitle in field 245 b # Illustrator in field 245 c if ($field->{'tag'} eq '245') { $title=$field->{'subfields'}->{'a'}; $title=~s/ \/$//; $subtitle=$field->{'subfields'}->{'b'}; $subtitle=~s/ \/$//; my $name=$field->{'subfields'}->{'c'}; if ($name=~/illustrated by]*\s+(.*)/) { $illustrator=$1; } } # Publisher Info in field 260 # a = place # b = publisher # c = publication date # (also store as copyright date if date starts with a 'c' as in c1995) if ($field->{'tag'} eq '260') { $place=$field->{'subfields'}->{'a'}; if (@$place) { $place=$$place[0]; } $place=~s/\s*:$//g; $publisher=$field->{'subfields'}->{'b'}; if (@$publisher) { $publisher=$$publisher[0]; } $publisher=~s/\s*:$//g; $publicationyear=$field->{'subfields'}->{'c'}; if ($publicationyear=~/c(\d\d\d\d)/) { $copyrightdate=$1; } if ($publicationyear=~/[^c](\d\d\d\d)/) { $publicationyear=$1; } elsif ($copyrightdate) { $publicationyear=$copyrightdate; } else { $publicationyear=~/(\d\d\d\d)/; $publicationyear=$1; } } # Physical Dimensions in field 300 # a = pages # c = size if ($field->{'tag'} eq '300') { $pages=$field->{'subfields'}->{'a'}; $pages=~s/ \;$//; $size=$field->{'subfields'}->{'c'}; $pages=~s/\s*:$//g; $size=~s/\s*:$//g; } # Vol/No in field 362 a if ($field->{'tag'} eq '362') { if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) { $volume=$1; $number=$2; } } # Series Title in field 440 a # Vol/No in field 440 v if ($field->{'tag'} eq '440') { $seriestitle=$field->{'subfields'}->{'a'}; if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) { $volume=$1; $number=$2; } } # BARCODES!!! # 852 p stores barcodes # 852 h stores dewey field # 852 9 stores replacement price # I check for an itemtype identifier in 852h as well... pb or pbk means PBK # also if $dewey is > 0, then I assign JNF, otherwise JF. # Note that my libraries are school libraries, so I assume Junior. if ($field->{'tag'} eq '852') { $barcode=$field->{'subfields'}->{'p'}; push (@barcodes, $barcode); my $q_barcode=$dbh->quote($barcode); my $deweyfield=$field->{'subfields'}->{'h'}; $deweyfield=~/^([\d\.]*)/; $dewey=$1; if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) { $itemtype='PBK'; } elsif ($dewey) { $itemtype='JNF'; } else { $itemtype='JF'; } $replacementprice=$field->{'subfields'}->{'9'}; } # 700 a stores additional authors / illustrator info # 700 c will contain 'ill' if it's an illustrator if ($field->{'tag'} eq '700') { my $name=$field->{'subfields'}->{'a'}; if ($field->{'subfields'}->{'c'}=~/ill/) { $illustrator=$name; } else { $additionalauthors.="$name\n"; } } # I concatenate all 5XX a entries as notes if ($field->{'tag'} =~/^5/) { $note.="$field->{'subfields'}->{'a'}\n"; } # 6XX entries are subject entries # Not sure why I'm skipping 691 tags # 691 a contains the subject. # I take subfield a, and append entries from subfield x (general # subdivision) y (Chronological subdivision) and z (geographic # subdivision) if ($field->{'tag'} =~/6\d\d/) { (next) if ($field->{'tag'} eq '691'); my $subject=$field->{'subfields'}->{'a'}; print "SUBJECT: $subject\n"; $subject=~s/\.$//; if ($gensubdivision=$field->{'subfields'}->{'x'}) { my @sub=@$gensubdivision; if ($#sub>=0) { foreach $s (@sub) { $s=~s/\.$//; $subject.=" -- $s"; } } else { $gensubdivision=~s/\.$//; $subject.=" -- $gensubdivision"; } } if ($chronsubdivision=$field->{'subfields'}->{'y'}) { my @sub=@$chronsubdivision; if ($#sub>=0) { foreach $s (@sub) { $s=~s/\.$//; $subject.=" -- $s"; } } else { $chronsubdivision=~s/\.$//; $subject.=" -- $chronsubdivision"; } } if ($geosubdivision=$field->{'subfields'}->{'z'}) { my @sub=@$geosubdivision; if ($#sub>=0) { foreach $s (@sub) { $s=~s/\.$//; $subject.=" -- $s"; } } else { $geosubdivision=~s/\.$//; $subject.=" -- $geosubdivision"; } } push @subjects, $subject; } } my $q_isbn=$dbh->quote($isbn); my $q_issn=$dbh->quote($issn); my $q_lccn=$dbh->quote($lccn); my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn"); $sth->execute; my $biblionumber=0; my $biblioitemnumber=0; if ($sth->rows) { ($biblionumber, $biblioitemnumber) = $sth->fetchrow; my $title=$title; #title already in the database } else { my $q_title=$dbh->quote("$title"); my $q_subtitle=$dbh->quote("$subtitle"); my $q_author=$dbh->quote($author); my $q_copyrightdate=$dbh->quote($copyrightdate); my $q_seriestitle=$dbh->quote($seriestitle); $sth=$dbh->prepare("select biblionumber from biblio where title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and seriestitle=$q_seriestitle"); $sth->execute; if ($sth->rows) { ($biblionumber) = $sth->fetchrow; #title already in the database } else { $sth=$dbh->prepare("select max(biblionumber) from biblio"); $sth->execute; ($biblionumber) = $sth->fetchrow; $biblionumber++; my $q_notes=$dbh->quote($note); $sth=$dbh->prepare("insert into biblio (biblionumber, title, author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, $q_author, $q_copyrightdate, $q_seriestitle, $q_notes)"); $sth->execute; $sth=$dbh->prepare("insert into bibliosubtitle values ($q_subtitle, $biblionumber)"); $sth->execute; } $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems"); $sth->execute; ($biblioitemnumber) = $sth->fetchrow; $biblioitemnumber++; my $q_isbn=$dbh->quote($isbn); my $q_issn=$dbh->quote($issn); my $q_lccn=$dbh->quote($lccn); my $q_volume=$dbh->quote($volume); my $q_number=$dbh->quote($number); my $q_itemtype=$dbh->quote($itemtype); my $q_dewey=$dbh->quote($dewey); $cleanauthor=$author; $cleanauthor=~s/[^A-Za-z]//g; $subclass=uc(substr($cleanauthor,0,3)); my $q_subclass=$dbh->quote($subclass); my $q_publicationyear=$dbh->quote($publicationyear); my $q_publishercode=$dbh->quote($publishercode); my $q_volumedate=$dbh->quote($volumedate); my $q_volumeddesc=$dbh->quote($volumeddesc); my $q_illus=$dbh->quote($illustrator); my $q_pages=$dbh->quote($pages); my $q_notes=$dbh->quote($note); ($q_notes) || ($q_notes="''"); my $q_size=$dbh->quote($size); my $q_place=$dbh->quote($place); my $q_marc=$dbh->quote($marc); $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, $q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, $q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, $q_pages, $q_size, $q_place, $q_lccn, $q_marc)"); $sth->execute; my $subjectheading; foreach $subjectheading (@subjects) { # convert to upper case $subjectheading=uc($subjectheading); # quote value my $q_subjectheading=$dbh->quote($subjectheading); $sth=$dbh->prepare("insert into bibliosubject (biblionumber,subject) values ($biblionumber, $q_subjectheading)"); $sth->execute; } my @additionalauthors=split(/\n/,$additionalauthors); my $additionalauthor; foreach $additionalauthor (@additionalauthors) { # remove any line ending characters (Ctrl-L or Ctrl-M) $additionalauthor=~s/\013//g; $additionalauthor=~s/\010//g; # convert to upper case $additionalauthor=uc($additionalauthor); # quote value my $q_additionalauthor=$dbh->quote($additionalauthor); $sth=$dbh->prepare("insert into additionalauthors (biblionumber,author) values ($biblionumber, $q_additionalauthor)"); $sth->execute; } } my $q_barcode=$dbh->quote($barcode); my $q_homebranch="'MAIN'"; my $q_notes="''"; #my $replacementprice=0; my $sth=$dbh->prepare("select max(itemnumber) from items"); $sth->execute; my ($itemnumber) = $sth->fetchrow; $itemnumber++; my @datearr=localtime(time); my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3]; BARCODE: foreach $barcode (@barcodes) { my $q_barcode=$dbh->quote($barcode); my $sti=$dbh->prepare("select barcode from items where barcode=$q_barcode"); $sti->execute; if ($sti->rows) { print "Skipping $barcode\n"; next BARCODE; } $replacementprice=~s/^p//; ($replacementprice) || ($replacementprice=0); $replacementprice=~s/\$//; $task="insert into items (itemnumber, biblionumber, biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, $biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, 'MAIN', '$date', $replacementprice)"; $sth=$dbh->prepare($task); print "$task\n"; $sth->execute; } } } $dbh->disconnect;