From 3badc57c9339e37e5d6ec660ff0b323614ab045d Mon Sep 17 00:00:00 2001 From: Marcel de Rooy Date: Thu, 7 Jul 2016 14:13:16 +0200 Subject: [PATCH] Bug 10407: Add marcxml import (follow-up) This patch makes the following changes: [1] Based on the groundwork of the former patch, add call to RecordsFromMARCXMLFile in stage-marc-import. Use format param. [2] Add format to the template. Use file extension to determine. If you use .xml or .marcxml as extension, MARCXML is selected. [3] In stage-marc-import.tt mark UTF-8 encoding as UTF-8 not as utf8. [4] BatchStageMarcRecords: do not call plugin if you have no records. [5] RecordsFromISO2709File: also return errors in an array. [6] In misc/stage_file.pl also use UTF-8. Handling of errors from [5]. Test plan: [1] Import an empty file as MARC or MARCXML (with Tools/Stage..import). [2] Import an non-empty file with invalid contents as MARC or MARCXML. [3] Export a few records with Tools/Export as MARC and MARCXML. [4] Import these two files. Check selected format versus file extension. [5] Import a MARCXML file with misc/stage_file.pl. Signed-off-by: Marcel de Rooy Signed-off-by: Josef Moravec Signed-off-by: Jonathan Druart Signed-off-by: Kyle M Hall --- C4/ImportBatch.pm | 25 +++++++++---------- .../en/modules/tools/stage-marc-import.tt | 13 +++++++++- misc/stage_file.pl | 15 +++++------ tools/stage-marc-import.pl | 15 ++++++++--- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm index 19853b3383..51fcb7674f 100644 --- a/C4/ImportBatch.pm +++ b/C4/ImportBatch.pm @@ -405,7 +405,7 @@ sub BatchStageMarcRecords { method => 'to_marc', params => { data => $marc_records } } - ) if $to_marc_plugin; + ) if $to_marc_plugin && @$marc_records; my $marc_type = C4::Context->preference('marcflavour'); $marc_type .= 'AUTH' if ($marc_type eq 'UNIMARC' && $record_type eq 'auth'); @@ -1492,11 +1492,13 @@ Reads ISO2709 binary porridge from the given file and creates MARC::Record-objec @PARAM2, String, see stage_file.pl @PARAM3, String, should be utf8 +Returns two array refs. + =cut sub RecordsFromISO2709File { my ($input_file, $record_type, $encoding) = @_; - my $errors; + my @errors; my $marc_type = C4::Context->preference('marcflavour'); $marc_type .= 'AUTH' if ($marc_type eq 'UNIMARC' && $record_type eq 'auth'); @@ -1512,40 +1514,37 @@ sub RecordsFromISO2709File { my ($marc_record, $charset_guessed, $char_errors) = MarcToUTF8Record($_, $marc_type, $encoding); push @marc_records, $marc_record; if ($charset_guessed ne $encoding) { - $errors = '' unless $errors; - $errors .= "Unexpected charset $charset_guessed, expecting $encoding\n"; + push @errors, + "Unexpected charset $charset_guessed, expecting $encoding"; } } close IN; - return ($errors, \@marc_records); + return ( \@errors, \@marc_records ); } =head2 RecordsFromMARCXMLFile my ($errors, $records) = C4::ImportBatch::RecordsFromMARCXMLFile($input_file, $encoding); - - Creates MARC::Record-objects out of the given MARCXML-file. @PARAM1, String, absolute path to the ISO2709 file. @PARAM2, String, should be utf8 +Returns two array refs. + =cut sub RecordsFromMARCXMLFile { my ( $filename, $encoding ) = @_; my $batch = MARC::File::XML->in( $filename ); - my @marcRecords; - my @errors; + my ( @marcRecords, @errors, $record ); do { - eval { - my $record = $batch->next($encoding); - push @marcRecords, $record if $record; - }; + eval { $record = $batch->next( $encoding ); }; if ($@) { push @errors, $@; } + push @marcRecords, $record if $record; } while( $record ); return (\@errors, \@marcRecords); } diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt index 45aeac9858..4d57a13fe8 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tt @@ -66,6 +66,10 @@ function cbUpload( status, fileid ) { $('#fileuploadbutton').prop('disabled',true); $('#fileuploadbutton').show(); $("#fileuploadcancel").hide(); + var filename=$('#fileToUpload').prop('files')[0].name; + if( filename.match( new RegExp(/\.[^.]+xml$/) ) ) { + $('#format').val('MARCXML'); + } $("#processfile").show(); } else { $('#fileuploadbutton').show(); @@ -181,7 +185,7 @@ function cbUpload( status, fileid ) {
  • +
  • + + +
  • [% IF plugins %] diff --git a/misc/stage_file.pl b/misc/stage_file.pl index 95ac9414c5..3e55dba259 100755 --- a/misc/stage_file.pl +++ b/misc/stage_file.pl @@ -37,7 +37,7 @@ $| = 1; # command-line parameters my $record_type = "biblio"; -my $encoding = ""; +my $encoding = "UTF-8"; my $authorities = 0; my $match = 0; my $add_items = 0; @@ -65,10 +65,6 @@ my $result = GetOptions( $record_type = 'auth' if ($authorities); -if ($encoding eq "") { - $encoding = "utf8"; -} - if (not $result or $input_file eq "" or $want_help) { print_usage(); exit 0; @@ -93,9 +89,10 @@ exit 0; sub process_batch { my ($format, $input_file, $record_type, $match, $add_items, $batch_comment) = @_; - my ($errors, $marc_records) = C4::ImportBatch::RecordsFromISO2709File($input_file, $record_type, $encoding) if $format eq 'ISO2709'; - warn $errors if $errors; - $marc_records = C4::ImportBatch::RecordsFromMARCXMLFile($input_file, $encoding) if $format eq 'MARCXML'; + my ( $errors, $marc_records ); + ( $errors, $marc_records ) = C4::ImportBatch::RecordsFromISO2709File($input_file, $record_type, $encoding) if $format eq 'ISO2709'; + ( $errors, $marc_records ) = C4::ImportBatch::RecordsFromMARCXMLFile($input_file, $encoding) if $format eq 'MARCXML'; + warn ( join ',', @$errors ) if @$errors; my $num_input_records = ($marc_records) ? scalar(@$marc_records) : 0; print "... staging MARC records -- please wait\n"; @@ -176,7 +173,7 @@ records into the main Koha database. Parameters: --file name of input MARC bib file --authorities stage authority records instead of bibs - --encoding encoding of MARC records, default is utf8. + --encoding encoding of MARC records, default is UTF-8. Other possible options are: MARC-8, ISO_5426, ISO_6937, ISO_8859-1, EUC-KR --format The MARC transport format to use? diff --git a/tools/stage-marc-import.pl b/tools/stage-marc-import.pl index f353abd39e..a5f60bd1a2 100755 --- a/tools/stage-marc-import.pl +++ b/tools/stage-marc-import.pl @@ -56,7 +56,7 @@ my $parse_items = $input->param('parse_items'); my $item_action = $input->param('item_action'); my $comments = $input->param('comments'); my $record_type = $input->param('record_type'); -my $encoding = $input->param('encoding') || 'utf8'; +my $encoding = $input->param('encoding') || 'UTF-8'; my $format = $input->param('format') || 'ISO2709'; my $to_marc_plugin = $input->param('to_marc_plugin'); my $marc_modification_template = $input->param('marc_modification_template_id'); @@ -86,9 +86,16 @@ if ($completedJobID) { $template->param(map { $_ => $results->{$_} } keys %{ $results }); } elsif ($fileID) { my $upload = Koha::Upload->new->get({ id => $fileID }); - my $filename = $upload->{path}; - my $marcrecord=''; - my ($errors, $marcrecords) = C4::ImportBatch::RecordsFromISO2709File($uploaded_file->filename(), $record_type, $encoding); + my ( $file, $filename ) = ( $upload->{path}, $upload->{name} ); + my ( $errors, $marcrecords ); + if( $format eq 'MARCXML' ) { + ( $errors, $marcrecords ) = C4::ImportBatch::RecordsFromMARCXMLFile( $file, $encoding); + } else { + ( $errors, $marcrecords ) = C4::ImportBatch::RecordsFromISO2709File( $file, $record_type, $encoding ); + } + warn "$filename: " . ( join ',', @$errors ) if @$errors; + # no need to exit if we have no records (or only errors) here + # BatchStageMarcRecords can handle that my $job = undef; my $dbh; -- 2.39.5