From 2a37c19dac0747745787dd8160fd17fc701a4a2a Mon Sep 17 00:00:00 2001 From: Joshua Ferraro Date: Fri, 4 Jan 2008 20:59:43 -0500 Subject: [PATCH] Rudimentary import of MARC21 authorities Also adding support for ingesting format MARCXML in bulkmarcimport and bulkauthimport Signed-off-by: Joshua Ferraro --- misc/migration_tools/bulkauthimport.pl | 135 ++++++++++++++----------- misc/migration_tools/bulkmarcimport.pl | 10 +- 2 files changed, 85 insertions(+), 60 deletions(-) diff --git a/misc/migration_tools/bulkauthimport.pl b/misc/migration_tools/bulkauthimport.pl index cd80aba024..730707521c 100755 --- a/misc/migration_tools/bulkauthimport.pl +++ b/misc/migration_tools/bulkauthimport.pl @@ -10,7 +10,9 @@ BEGIN { } # Koha modules used +use Unicode::Normalize; use MARC::File::USMARC; +use MARC::File::XML; use MARC::Record; use MARC::Batch; use C4::Context; @@ -19,7 +21,7 @@ use Time::HiRes qw(gettimeofday); use Getopt::Long; my ( $input_marc_file, $number) = ('',0); -my ($version, $delete, $test_parameter,$char_encoding, $verbose); +my ($version, $delete, $test_parameter,$char_encoding, $verbose, $format); GetOptions( 'file:s' => \$input_marc_file, 'n' => \$number, @@ -28,24 +30,23 @@ GetOptions( 't' => \$test_parameter, 'c:s' => \$char_encoding, 'v:s' => \$verbose, + 'm:s' => \$format, ); if ($version || ($input_marc_file eq '')) { - print <dbh; if ($delete) { - print "deleting authorities\n"; - $dbh->do("delete from auth_header"); + print "deleting authorities\n"; + $dbh->do("truncate auth_header"); } if ($test_parameter) { - print "TESTING MODE ONLY\n DOING NOTHING\n===============\n"; + print "TESTING MODE ONLY\n DOING NOTHING\n===============\n"; } $char_encoding = 'MARC21' unless ($char_encoding); print "CHAR : $char_encoding\n" if $verbose; my $starttime = gettimeofday; -my $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); +my $batch; +if ($format =~ /XML/i) { + $batch = MARC::Batch->new( 'XML', $input_marc_file ); +} else { + $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); +} $batch->warnings_off(); $batch->strict_off(); my $i=0; while ( my $record = $batch->next() ) { - $i++; - #now, parse the record, extract the item fields, and store them in somewhere else. + $i++; + #now, parse the record, extract the item fields, and store them in somewhere else. ## create an empty record object to populate my $newRecord = MARC::Record->new(); - $newRecord->leader($record->leader); + $newRecord->leader($record->leader); # go through each field in the existing record foreach my $oldField ( $record->fields() ) { - # just reproduce tags < 010 in our new record - if ( $oldField->tag() < 10 ) { - $newRecord->append_fields( $oldField ); - next(); - } - # store our new subfield data in this list - my @newSubfields = (); - - # go through each subfield code/data pair - foreach my $pair ( $oldField->subfields() ) { - $pair->[1] =~ s/\[1] =~ s/\>//g; - push( @newSubfields, $pair->[0], char_decode($pair->[1],$char_encoding) ); - } - - # add the new field to our new record - my $newField = MARC::Field->new( - $oldField->tag(), - $oldField->indicator(1), - $oldField->indicator(2), - @newSubfields - ); - $newRecord->append_fields( $newField ); + # just reproduce tags < 010 in our new record + if ( $oldField->tag() < 10 ) { + $newRecord->append_fields( $oldField ); + next(); + } + # store our new subfield data in this list + my @newSubfields = (); + + # go through each subfield code/data pair + foreach my $pair ( $oldField->subfields() ) { + $pair->[1] =~ s/\[1] =~ s/\>//g; + push( @newSubfields, $pair->[0], $pair->[1], $char_encoding); + } + + # add the new field to our new record + my $newField = MARC::Field->new( + $oldField->tag(), + $oldField->indicator(1), + $oldField->indicator(2), + @newSubfields + ); + $newRecord->append_fields( $newField ); + } + warn "$i ==>".$newRecord->as_formatted() if $verbose eq 2; + my $authtypecode; + if (C4::Context->preference('marcflavour') eq 'MARC21') { + $authtypecode="PERSO_NAME" if ($newRecord->field('100')); + $authtypecode="CORPO_NAME" if ($newRecord->field('110')); + $authtypecode="MEETI_NAME" if ($newRecord->field('111')); + $authtypecode="UNIF_TITLE" if ($newRecord->field('130')); + $authtypecode="CHRON_TERM" if ($newRecord->field('148')); + $authtypecode="TOPIC_TERM" if ($newRecord->field('150')); + $authtypecode="GEOGR_NAME" if ($newRecord->field('151')); + $authtypecode="GENRE/FORM" if ($newRecord->field('155')); + next unless $authtypecode; # skip invalid records FIXME: far too simplistic + } + else { + $authtypecode=substr($newRecord->leader(),9,1); + $authtypecode="NP" if ($authtypecode eq 'a'); # personnes + $authtypecode="CO" if ($authtypecode eq 'b'); # collectivit� + $authtypecode="NG" if ($authtypecode eq 'c'); # g�graphique + $authtypecode="NM" if ($authtypecode eq 'd'); # marque + $authtypecode="NF" if ($authtypecode eq 'e'); # famille + $authtypecode="TI" if ($authtypecode eq 'f'); # Titre uniforme + $authtypecode="TI" if ($authtypecode eq 'h'); # auteur/titre + $authtypecode="MM" if ($authtypecode eq 'j'); # mot mati�e + } + # now, create biblio and items with NEWnewXX call. + unless ($test_parameter) { + my ($authid) = AddAuthority($newRecord,0,$authtypecode); + warn "ADDED authority NB $authid in DB\n" if $verbose; } - warn "$i ==>".$newRecord->as_formatted() if $verbose eq 2; - my $authtypecode=substr($newRecord->leader(),9,1); - $authtypecode="NP" if ($authtypecode eq 'a'); # personnes - $authtypecode="CO" if ($authtypecode eq 'b'); # collectivit� - $authtypecode="NG" if ($authtypecode eq 'c'); # g�graphique - $authtypecode="NM" if ($authtypecode eq 'd'); # marque - $authtypecode="NF" if ($authtypecode eq 'e'); # famille - $authtypecode="TI" if ($authtypecode eq 'f'); # Titre uniforme - $authtypecode="TI" if ($authtypecode eq 'h'); # auteur/titre - $authtypecode="MM" if ($authtypecode eq 'j'); # mot mati�e - warn "XX => $authtypecode"; - # now, create biblio and items with NEWnewXX call. - unless ($test_parameter) { - my ($authid) = AddAuthority($newRecord,0,$authtypecode); - warn "ADDED authority NB $authid in DB\n" if $verbose; - } } # $dbh->do("unlock tables"); my $timeneeded = gettimeofday - $starttime; diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl index 7033668e08..5ecb0600e5 100755 --- a/misc/migration_tools/bulkmarcimport.pl +++ b/misc/migration_tools/bulkmarcimport.pl @@ -40,7 +40,7 @@ binmode(STDOUT, ":utf8"); use Getopt::Long; my ( $input_marc_file, $number) = ('',0); -my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off); +my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format); $|=1; @@ -55,6 +55,7 @@ GetOptions( 'c:s' => \$char_encoding, 'v:s' => \$verbose, 'fk' => \$fk_off, + 'm:s' => \$format, ); # FIXME: Management of error conditions needed for record parsing problems @@ -203,7 +204,12 @@ my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21'; print "Characteristic MARC flavour: $marcFlavour\n" if $verbose; my $starttime = gettimeofday; -my $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); +my $batch; +if ($format =~ /XML/i) { + $batch = MARC::Batch->new( 'XML', $input_marc_file ); +} else { + $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); +} $batch->warnings_off(); $batch->strict_off(); my $i=0; -- 2.39.5