From aa5bf4d3cc0e4d57b958e665ad1c6efa8a76a9fb Mon Sep 17 00:00:00 2001 From: Olli-Antti Kivilahti Date: Fri, 17 Apr 2015 20:12:49 +0300 Subject: [PATCH] Bug 10407: Allow MARCXML records to be imported via GUI (groundwork) Patch from Olli, manual rebase by Marcel (July 7, 2016). Signed-off-by: Marcel de Rooy Needs follow-up. Test plan in the third patch. Signed-off-by: Josef Moravec Signed-off-by: Jonathan Druart Signed-off-by: Kyle M Hall --- C4/ImportBatch.pm | 79 +++++++++++++++++++++++++++++++++----- misc/stage_file.pl | 36 ++++++++--------- tools/stage-marc-import.pl | 20 ++++------ 3 files changed, 94 insertions(+), 41 deletions(-) diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm index 046b8590e3..19853b3383 100644 --- a/C4/ImportBatch.pm +++ b/C4/ImportBatch.pm @@ -414,24 +414,17 @@ sub BatchStageMarcRecords { my $num_items = 0; # FIXME - for now, we're dealing only with bibs my $rec_num = 0; - foreach my $marc_blob (split(/\x1D/, $marc_records)) { - $marc_blob =~ s/^\s+//g; - $marc_blob =~ s/\s+$//g; - next unless $marc_blob; + foreach my $marc_record (@$marc_records) { $rec_num++; if ($progress_interval and (0 == ($rec_num % $progress_interval))) { &$progress_callback($rec_num); } - my ($marc_record, $charset_guessed, $char_errors) = - MarcToUTF8Record($marc_blob, $marc_type, $encoding); - - $encoding = $charset_guessed unless $encoding; ModifyRecordWithTemplate( $marc_modification_template, $marc_record ) if ( $marc_modification_template ); my $import_record_id; if (scalar($marc_record->fields()) == 0) { - push @invalid_records, $marc_blob; + push @invalid_records, $marc_record; } else { # Normalize the record so it doesn't have separated diacritics @@ -1467,7 +1460,6 @@ sub GetImportRecordMatches { } - =head2 SetImportRecordMatches SetImportRecordMatches($import_record_id, @matches); @@ -1490,6 +1482,73 @@ sub SetImportRecordMatches { } } +=head2 RecordsFromISO2709File + + my ($errors, $records) = C4::ImportBatch::RecordsFromISO2709File($input_file, $record_type, $encoding); + +Reads ISO2709 binary porridge from the given file and creates MARC::Record-objects out of it. + +@PARAM1, String, absolute path to the ISO2709 file. +@PARAM2, String, see stage_file.pl +@PARAM3, String, should be utf8 + +=cut + +sub RecordsFromISO2709File { + my ($input_file, $record_type, $encoding) = @_; + my $errors; + + my $marc_type = C4::Context->preference('marcflavour'); + $marc_type .= 'AUTH' if ($marc_type eq 'UNIMARC' && $record_type eq 'auth'); + + open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n"; + my @marc_records; + $/ = "\035"; + while () { + s/^\s+//; + s/\s+$//; + next unless $_; # skip if record has only whitespace, as might occur + # if file includes newlines between each MARC record + my ($marc_record, $charset_guessed, $char_errors) = MarcToUTF8Record($_, $marc_type, $encoding); + push @marc_records, $marc_record; + if ($charset_guessed ne $encoding) { + $errors = '' unless $errors; + $errors .= "Unexpected charset $charset_guessed, expecting $encoding\n"; + } + } + close IN; + return ($errors, \@marc_records); +} + +=head2 RecordsFromMARCXMLFile + + my ($errors, $records) = C4::ImportBatch::RecordsFromMARCXMLFile($input_file, $encoding); + + + +Creates MARC::Record-objects out of the given MARCXML-file. + +@PARAM1, String, absolute path to the ISO2709 file. +@PARAM2, String, should be utf8 + +=cut + +sub RecordsFromMARCXMLFile { + my ( $filename, $encoding ) = @_; + my $batch = MARC::File::XML->in( $filename ); + my @marcRecords; + my @errors; + do { + eval { + my $record = $batch->next($encoding); + push @marcRecords, $record if $record; + }; + if ($@) { + push @errors, $@; + } + } while( $record ); + return (\@errors, \@marcRecords); +} # internal functions diff --git a/misc/stage_file.pl b/misc/stage_file.pl index 5fc70b72f7..95ac9414c5 100755 --- a/misc/stage_file.pl +++ b/misc/stage_file.pl @@ -44,13 +44,15 @@ my $add_items = 0; my $input_file = ""; my $batch_comment = ""; my $want_help = 0; -my $no_replace ; +my $no_replace; +my $format = 'ISO2709'; my $no_create; my $item_action = 'always_add'; my $result = GetOptions( 'encoding:s' => \$encoding, 'file:s' => \$input_file, + 'format:s' => \$format, 'match|match-bibs:s' => \$match, 'add-items' => \$add_items, 'item-action:s' => \$item_action, @@ -71,6 +73,11 @@ if (not $result or $input_file eq "" or $want_help) { print_usage(); exit 0; } +if ( $format !~ /^(MARCXML|ISO2709)$/i ) { + print "\n --format must be MARCXML or ISO2709\n"; + print_usage(); + exit 0; +} unless (-r $input_file) { die "$0: cannot open input file $input_file: $!\n"; @@ -78,28 +85,18 @@ unless (-r $input_file) { my $dbh = C4::Context->dbh; $dbh->{AutoCommit} = 0; -process_batch($input_file, $record_type, $match, $add_items, $batch_comment); +process_batch($format, $input_file, $record_type, $match, $add_items, $batch_comment); $dbh->commit(); exit 0; sub process_batch { - my ($input_file, $record_type, $match, $add_items, $batch_comment) = @_; - - open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n"; - my $marc_records = ""; - $/ = "\035"; - my $num_input_records = 0; - while () { - s/^\s+//; - s/\s+$//; - next unless $_; # skip if record has only whitespace, as might occur - # if file includes newlines between each MARC record - $marc_records .= $_; # FIXME - this sort of string concatenation - # is probably rather inefficient - $num_input_records++; - } - close IN; + my ($format, $input_file, $record_type, $match, $add_items, $batch_comment) = @_; + + my ($errors, $marc_records) = C4::ImportBatch::RecordsFromISO2709File($input_file, $record_type, $encoding) if $format eq 'ISO2709'; + warn $errors if $errors; + $marc_records = C4::ImportBatch::RecordsFromMARCXMLFile($input_file, $encoding) if $format eq 'MARCXML'; + my $num_input_records = ($marc_records) ? scalar(@$marc_records) : 0; print "... staging MARC records -- please wait\n"; #FIXME: We should really allow the use of marc modification frameworks and to_marc plugins here if possible @@ -182,6 +179,9 @@ Parameters: --encoding encoding of MARC records, default is utf8. Other possible options are: MARC-8, ISO_5426, ISO_6937, ISO_8859-1, EUC-KR + --format The MARC transport format to use? + Defaults to ISO2709. + Available values, MARCXML, ISO2709. --match use this option to match records in the file with records already in the database for future overlay. diff --git a/tools/stage-marc-import.pl b/tools/stage-marc-import.pl index 8d94cddb58..e7e9954308 100755 --- a/tools/stage-marc-import.pl +++ b/tools/stage-marc-import.pl @@ -56,7 +56,8 @@ my $parse_items = $input->param('parse_items'); my $item_action = $input->param('item_action'); my $comments = $input->param('comments'); my $record_type = $input->param('record_type'); -my $encoding = $input->param('encoding'); +my $encoding = $input->param('encoding') || 'utf8'; +my $format = $input->param('format') || 'ISO2709'; my $to_marc_plugin = $input->param('to_marc_plugin'); my $marc_modification_template = $input->param('marc_modification_template_id'); @@ -84,22 +85,15 @@ if ($completedJobID) { my $results = $job->results(); $template->param(map { $_ => $results->{$_} } keys %{ $results }); } elsif ($fileID) { - my $upload = Koha::Upload->new->get({ id => $fileID, filehandle => 1 }); - my $fh = $upload->{fh}; - my $filename = $upload->{name}; # filename only, no path + my $upload = Koha::Upload->new->get({ id => $fileID }); + my $filename = $upload->{path}; my $marcrecord=''; - $/ = "\035"; - while (<$fh>) { - s/^\s+//; - s/\s+$//; - $marcrecord.=$_; - } - $fh->close; + my ($errors, $marcrecords) = C4::ImportBatch::RecordsFromISO2709File($uploaded_file->filename(), $record_type, $encoding); my $job = undef; my $dbh; if ($runinbackground) { - my $job_size = () = $marcrecord =~ /\035/g; + my $job_size = scalar(@$marcrecords); # if we're matching, job size is doubled $job_size *= 2 if ($matcher_id ne ""); $job = C4::BackgroundJob->new($sessionID, $filename, '/cgi-bin/koha/tools/stage-marc-import.pl', $job_size); @@ -137,7 +131,7 @@ if ($completedJobID) { my ( $batch_id, $num_valid, $num_items, @import_errors ) = BatchStageMarcRecords( $record_type, $encoding, - $marcrecord, $filename, + $marcrecords, $filename, $to_marc_plugin, $marc_modification_template, $comments, '', $parse_items, 0, -- 2.39.5