Bug 27421: Use Background job for staging MARC records for import

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>

Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
This commit is contained in:
Jonathan Druart 2022-06-13 14:29:45 +02:00 committed by Tomas Cohen Arazi
parent a80a96b933
commit 8497ed67b7
Signed by: tomascohen
GPG key ID: 0A272EA1B2F3C15F
8 changed files with 341 additions and 212 deletions

View file

@ -390,6 +390,10 @@ sub BatchStageMarcRecords {
}
# FIXME branch_code, number of bibs, number of items
_update_batch_record_counts($batch_id);
if ($progress_interval){
&$progress_callback($rec_num);
}
return ($batch_id, $num_valid, $num_items, @invalid_records);
}
@ -494,6 +498,11 @@ sub BatchFindDuplicates {
SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'no_match');
}
}
if ($progress_interval){
&$progress_callback($rec_num);
}
$sth->finish();
return $num_with_matches;
}
@ -690,7 +699,13 @@ sub BatchCommitRecords {
SetImportRecordStatus($rowref->{'import_record_id'}, 'ignored');
}
}
if ($progress_interval){
&$progress_callback($rec_num);
}
$schema->txn_commit; # Commit final records that may not have hit callback threshold
$sth->finish();
if ( @biblio_ids ) {

View file

@ -421,6 +421,7 @@ sub core_types_to_classes {
batch_hold_cancel => 'Koha::BackgroundJob::BatchCancelHold',
update_elastic_index => 'Koha::BackgroundJob::UpdateElasticIndex',
update_holds_queue_for_biblios => 'Koha::BackgroundJob::BatchUpdateBiblioHoldsQueue',
stage_marc_for_import => 'Koha::BackgroundJob::StageMARCForImport',
};
}

View file

@ -0,0 +1,198 @@
package Koha::BackgroundJob::StageMARCForImport;
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
use Modern::Perl;
use Try::Tiny;
use base 'Koha::BackgroundJob';
use C4::Matcher;
use C4::ImportBatch qw(
RecordsFromMARCXMLFile
RecordsFromISO2709File
RecordsFromMarcPlugin
BatchStageMarcRecords
BatchFindDuplicates
SetImportBatchMatcher
SetImportBatchOverlayAction
SetImportBatchNoMatchAction
SetImportBatchItemAction
);
=head1 NAME
Koha::BackgroundJob::StageMARCForImport - Stage MARC records for import
This is a subclass of Koha::BackgroundJob.
=head1 API
=head2 Class methods
=head3 job_type
Define the job type of this job: stage_marc_for_import
=cut
sub job_type {
return 'stage_marc_for_import';
}
=head3 process
Stage the MARC records for import.
=cut
sub process {
my ( $self, $args ) = @_;
$self->start;
my $record_type = $args->{record_type};
my $encoding = $args->{encoding};
my $format = $args->{format};
my $filepath = $args->{filepath};
my $filename = $args->{filename};
my $marc_modification_template = $args->{marc_modification_template};
my $comments = $args->{comments};
my $parse_items = $args->{parse_items};
my $matcher_id = $args->{matcher_id};
my $overlay_action = $args->{overlay_action};
my $nomatch_action = $args->{nomatch_action};
my $item_action = $args->{item_action};
my $vendor_id = $args->{vendor_id};
my $basket_id = $args->{basket_id};
my $profile_id = $args->{profile_id};
my @messages;
my ( $batch_id, $num_valid, $num_items, @import_errors );
my $num_with_matches = 0;
my $checked_matches = 0;
my $matcher_failed = 0;
my $matcher_code = "";
try {
my $schema = Koha::Database->new->schema;
$schema->storage->txn_begin;
my ( $errors, $marcrecords );
if ( $format eq 'MARCXML' ) {
( $errors, $marcrecords ) =
C4::ImportBatch::RecordsFromMARCXMLFile( $filepath, $encoding );
}
elsif ( $format eq 'ISO2709' ) {
( $errors, $marcrecords ) =
C4::ImportBatch::RecordsFromISO2709File( $filepath, $record_type,
$encoding );
}
else { # plugin based
$errors = [];
$marcrecords =
C4::ImportBatch::RecordsFromMarcPlugin( $filepath, $format,
$encoding );
}
$self->size(scalar @$marcrecords)->store;
( $batch_id, $num_valid, $num_items, @import_errors ) =
BatchStageMarcRecords(
$record_type, $encoding,
$marcrecords, $filename,
$marc_modification_template, $comments,
'', $parse_items,
0, 50,
sub {
my $job_progress = shift;
if ($matcher_id) {
$job_progress /= 2;
}
$self->progress( int($job_progress) )->store;
}
);
if ($profile_id) {
my $ibatch = Koha::ImportBatches->find($batch_id);
$ibatch->set( { profile_id => $profile_id } )->store;
}
if ($matcher_id) {
my $matcher = C4::Matcher->fetch($matcher_id);
if ( defined $matcher ) {
$checked_matches = 1;
$matcher_code = $matcher->code();
$num_with_matches =
BatchFindDuplicates( $batch_id, $matcher, 10, 50,
sub { my $job_progress = shift; $self->progress( $self->progress + $job_progress )->store } );
SetImportBatchMatcher( $batch_id, $matcher_id );
SetImportBatchOverlayAction( $batch_id, $overlay_action );
SetImportBatchNoMatchAction( $batch_id, $nomatch_action );
SetImportBatchItemAction( $batch_id, $item_action );
$schema->storage->txn_commit;
}
else {
$matcher_failed = 1;
$schema->storage->txn_rollback;
}
} else {
$schema->storage->txn_commit;
}
}
catch {
warn $_;
die "Something terrible has happened!"
if ( $_ =~ /Rollback failed/ ); # Rollback failed
};
my $report = {
staged => $num_valid,
matched => $num_with_matches,
num_items => $num_items,
import_errors => scalar(@import_errors),
total => $num_valid + scalar(@import_errors),
checked_matches => $checked_matches,
matcher_failed => $matcher_failed,
matcher_code => $matcher_code,
import_batch_id => $batch_id,
vendor_id => $vendor_id,
basket_id => $basket_id,
};
my $data = $self->decoded_data;
$data->{messages} = \@messages;
$data->{report} = $report;
$self->finish($data);
}
=head3 enqueue
Enqueue the new job
=cut
sub enqueue {
my ( $self, $args) = @_;
$self->SUPER::enqueue({
job_size => 0, # unknown for now
job_args => $args
});
}
1;

View file

@ -15,7 +15,6 @@
ProxyPass "/cgi-bin/koha/tools/background-job-progress.pl" "!"
ProxyPass "/cgi-bin/koha/tools/export.pl" "!"
ProxyPass "/cgi-bin/koha/tools/manage-marc-import.pl" "!"
ProxyPass "/cgi-bin/koha/tools/stage-marc-import.pl" "!"
ProxyPass "/cgi-bin/koha/tools/upload-cover-image.pl" "!"
ProxyPass "/cgi-bin/koha/svc/cataloguing/metasearch" "!"

View file

@ -0,0 +1,46 @@
[% USE Koha %]
[% BLOCK report %]
[% SET report = job.report %]
[% IF report %]
<h2>MARC staging results</h2>
[% SWITCH (record_type) %]
[% CASE 'biblio' %]
<h3>Processing bibliographic records</h3>
[% CASE 'auth' %]
<h3>Processing authority records</h3>
[% END %]
<ul>
<li>[% report.total | html %] records in file</li>
<li>[% report.import_errors | html %] records not staged because of MARC error</li>
<li>[% report.staged | html %] records staged</li>
[% IF ( report.checked_matches ) %]
<li>[% report.matched | html %] records with at least one match in catalog per matching rule
&quot;[% report.matcher_code | html %]&quot;</li>
[% ELSE %]
[% IF ( report.matcher_failed ) %]
<li>Record matching failed -- unable to retrieve selected matching rule.</li>
[% ELSE %]
<li>Did not check for matches with existing records in catalog</li>
[% END %]
[% END %]
[% IF report.record_type == 'biblio' %]
<li>[% report.num_items | html %] item records found and staged</li>
[% END %]
[% IF ( report.label_batch ) %]
<li>New label batch created: # [% report.label_batch | html %] </li>
[% END %]
</ul>
[% IF report.basketno && report.booksellerid %]
<p>
<a id="addtobasket" class="btn btn-default" href="/cgi-bin/koha/acqui/addorderiso2709.pl?import_batch_id=[% report.import_batch_id | html %]&basketno=[% report.basketno | html %]&booksellerid=[% report.booksellerid | html %]">Add staged files to basket</a>
</p>
[% END %]
[% END %]
[% END %]
[% BLOCK detail %]
[% END %]
[% BLOCK js %]
[% END %]

View file

@ -41,6 +41,8 @@
<span>Update Elasticsearch index</span>
[% CASE 'update_holds_queue_for_biblios' %]
<span>Holds queue update</span>
[% CASE 'stage_marc_for_import' %]
<span>Staged MARC records for import</span>
[% CASE %]<span>Unknown job type '[% job_type | html %]'</span>
[% END %]

View file

@ -48,54 +48,41 @@
<div class="col-sm-10 col-sm-push-2">
<main>
[% IF ( uploadmarc ) %]
<div id="toolbar" class="btn-toolbar">
<a class="btn btn-default" href="/cgi-bin/koha/tools/stage-marc-import.pl"><i class="fa fa-plus"></i> Stage MARC records</a>
<a class="btn btn-default" href="/cgi-bin/koha/tools/manage-marc-import.pl?import_batch_id=[% import_batch_id | html %]"><i class="fa fa-list-ul"></i> Manage staged records</a>
</div>
[% END %]
[% IF ( uploadmarc ) %]
<h1>MARC staging results</h1>
<ul>
[% SWITCH (record_type) %]
[% CASE 'biblio' %]
<li>Processing bibliographic records</li>
[% CASE 'auth' %]
<li>Processing authority records</li>
[% END %]
<li>[% total | html %] records in file</li>
<li>[% import_errors | html %] records not staged because of MARC error</li>
<li>[% staged | html %] records staged</li>
[% IF ( checked_matches ) %]
<li>[% matched | html %] records with at least one match in catalog per matching rule
&quot;[% matcher_code | html %]&quot;</li>
[% ELSE %]
[% IF ( matcher_failed ) %]
<li>Record matching failed -- unable to retrieve selected matching rule.</li>
[% ELSE %]
<li>Did not check for matches with existing records in catalog</li>
[% FOREACH message IN messages %]
[% IF message.type == 'success' %]
<div class="dialog message">
[% ELSIF message.type == 'warning' %]
<div class="dialog alert">
[% ELSIF message.type == 'error' %]
<div class="dialog alert" style="margin:auto;">
[% END %]
[% IF message.code == 'cannot_enqueue_job' %]
<span>Cannot enqueue this job.</span>
[% END %]
[% IF message.error %]
<span>(The error was: [% message.error | html %], see the Koha log file for more information).</span>
[% END %]
</div>
[% END %]
[% END %]
[% IF record_type == 'biblio' %]
<li>[% num_items | html %] item records found and staged</li>
[% END %]
[% IF ( label_batch ) %]
<li>New label batch created: # [% label_batch | html %] </li>
[% END %]
</ul>
[% IF basketno && booksellerid %]
<p>
<a id="addtobasket" class="btn btn-default" href="/cgi-bin/koha/acqui/addorderiso2709.pl?import_batch_id=[% import_batch_id | html %]&basketno=[% basketno | html %]&booksellerid=[% booksellerid | html %]">Add staged files to basket</a>
</p>
[% END %]
[% ELSE %]
[% IF job_enqueued %]
<div id="toolbar" class="btn-toolbar">
<a class="btn btn-default" href="/cgi-bin/koha/tools/stage-marc-import.pl"><i class="fa fa-plus"></i> Stage MARC records</a>
<a class="btn btn-default" href="/cgi-bin/koha/tools/manage-marc-import.pl?import_batch_id=[% import_batch_id | html %]"><i class="fa fa-list-ul"></i> Manage staged records</a>
</div>
<h1>MARC staging</h1>
<div class="dialog message">
<p>The job has been enqueued! It will be processed as soon as possible.</p>
<p><a href="/cgi-bin/koha/admin/background_jobs.pl?op=view&id=[% job_id | uri %]" title="View detail of the enqueued job">View detail of the enqueued job</a>
</div>
[% ELSE %]
<h1>Stage MARC records for import</h1>
<ul>
<li>Select a MARC file to stage in the import reservoir. It will be parsed, and each valid record staged for later import into the catalog.</li>
<li>You can enter a name for this import. It may be useful, when creating a record, to remember where the suggested MARC data comes from!</li>
</ul>
<form method="post" action="[% SCRIPT_NAME | html %]" id="uploadfile" enctype="multipart/form-data">
<form method="post" id="uploadfile" enctype="multipart/form-data">
<fieldset class="rows" id="uploadform">
<legend>Upload a file to stage</legend>
<ol>
@ -136,7 +123,7 @@
</ol>
</fieldset>
<form method="post" id="processfile" action="[% SCRIPT_NAME | html %]" enctype="multipart/form-data">
<form method="post" id="processfile" enctype="multipart/form-data">
[% IF basketno && booksellerid %]
<input type="hidden" name="basketno" id="basketno" value="[% basketno | html %]" />
<input type="hidden" name="booksellerid" id="booksellerid" value="[% booksellerid | html %]" />
@ -253,10 +240,7 @@
<fieldset class="action">
<input type="button" id="mainformsubmit" value="Stage for import" />
</fieldset>
<div id="jobpanel"><div id="jobstatus" class="progress_panel">Job progress: <div id="jobprogress"></div> <span id="jobprogresspercent">0</span>%</div>
<div id="jobfailed"></div></div>
</form>
[% END %]
@ -273,7 +257,6 @@
[% MACRO jsinclude BLOCK %]
[% Asset.js("js/tools-menu.js") | $raw %]
[% Asset.js("lib/jquery/plugins/humanmsg.js") | $raw %]
[% Asset.js("js/background-job-progressbar.js") | $raw %]
[% Asset.js("js/file-upload.js") | $raw %]
<script>
var xhr;
@ -297,9 +280,17 @@
e.preventDefault();
CancelUpload();
});
$("#mainformsubmit").on("click",function(){
return CheckForm( document.getElementById("processfile"));
$("#mainformsubmit").on("click",function(e){
e.preventDefault();
if ($("#fileToUpload").value == '') {
alert(_("Please upload a file first."));
return false;
} else {
$("#processfile").submit();
return true;
}
});
getProfiles();
$('#profile').change(function(){
if(this.value=='') {
@ -432,14 +423,6 @@
});
});
function CheckForm(f) {
if ($("#fileToUpload").value == '') {
alert(_("Please upload a file first."));
} else {
return submitBackgroundJob(f);
}
return false;
}
function StartUpload() {
if( $('#fileToUpload').prop('files').length == 0 ) return;
$('#fileuploadbutton').hide();

View file

@ -30,24 +30,22 @@ use Modern::Perl;
use CGI qw ( -utf8 );
use CGI::Cookie;
use MARC::File::USMARC;
use Try::Tiny;
# Koha modules used
use C4::Context;
use C4::Auth qw( get_template_and_user );
use C4::Output qw( output_html_with_http_headers );
use C4::ImportBatch qw( RecordsFromMARCXMLFile RecordsFromISO2709File RecordsFromMarcPlugin BatchStageMarcRecords BatchFindDuplicates SetImportBatchMatcher SetImportBatchOverlayAction SetImportBatchNoMatchAction SetImportBatchItemAction );
use C4::Matcher;
use Koha::UploadedFiles;
use C4::BackgroundJob;
use C4::MarcModificationTemplates qw( GetModificationTemplates );
use Koha::Plugins;
use Koha::ImportBatches;
use Koha::BackgroundJob::StageMARCForImport;
my $input = CGI->new;
my $fileID = $input->param('uploadedfileid');
my $runinbackground = $input->param('runinbackground');
my $completedJobID = $input->param('completedJobID');
my $matcher_id = $input->param('matcher');
my $overlay_action = $input->param('overlay_action');
my $nomatch_action = $input->param('nomatch_action');
@ -61,6 +59,7 @@ my $marc_modification_template = $input->param('marc_modification_template_id');
my $basketno = $input->param('basketno');
my $booksellerid = $input->param('booksellerid');
my $profile_id = $input->param('profile_id');
my @messages;
my ( $template, $loggedinuser, $cookie ) = get_template_and_user(
{
@ -72,144 +71,47 @@ my ( $template, $loggedinuser, $cookie ) = get_template_and_user(
);
$template->param(
SCRIPT_NAME => '/cgi-bin/koha/tools/stage-marc-import.pl',
uploadmarc => $fileID,
record_type => $record_type,
basketno => $basketno,
basketno => $basketno,
booksellerid => $booksellerid,
);
my %cookies = CGI::Cookie->fetch();
my $sessionID = $cookies{'CGISESSID'}->value;
if ($completedJobID) {
my $job = C4::BackgroundJob->fetch($sessionID, $completedJobID);
my $results = $job->results();
$template->param(map { $_ => $results->{$_} } keys %{ $results });
} elsif ($fileID) {
if ($fileID) {
my $upload = Koha::UploadedFiles->find( $fileID );
my $file = $upload->full_path;
my $filepath = $upload->full_path;
my $filename = $upload->filename;
my ( $errors, $marcrecords );
if( $format eq 'MARCXML' ) {
( $errors, $marcrecords ) = C4::ImportBatch::RecordsFromMARCXMLFile( $file, $encoding);
} elsif( $format eq 'ISO2709' ) {
( $errors, $marcrecords ) = C4::ImportBatch::RecordsFromISO2709File( $file, $record_type, $encoding );
} else { # plugin based
$errors = [];
$marcrecords = C4::ImportBatch::RecordsFromMarcPlugin( $file, $format, $encoding );
}
warn "$filename: " . ( join ',', @$errors ) if @$errors;
# no need to exit if we have no records (or only errors) here
# BatchStageMarcRecords can handle that
my $job = undef;
if ($runinbackground) {
my $job_size = scalar(@$marcrecords);
# if we're matching, job size is doubled
$job_size *= 2 if ($matcher_id ne "");
$job = C4::BackgroundJob->new($sessionID, $filename, '/cgi-bin/koha/tools/stage-marc-import.pl', $job_size);
my $jobID = $job->id();
# fork off
if (my $pid = fork) {
# parent
# return job ID as JSON
my $reply = CGI->new("");
print $reply->header(-type => 'text/html');
print '{"jobID":"' . $jobID . '"}';
exit 0;
} elsif (defined $pid) {
# child
# close STDOUT/STDERR to signal to end CGI session with Apache
# Otherwise, the AJAX request to this script won't return properly
close STDOUT;
close STDERR;
} else {
# fork failed, so exit immediately
warn "fork failed while attempting to run tools/stage-marc-import.pl as a background job: $!";
exit 0;
}
# if we get here, we're a child that has detached
# itself from Apache
}
my $schema = Koha::Database->new->schema;
$schema->storage->txn_begin;
# FIXME branch code
my ( $batch_id, $num_valid, $num_items, @import_errors ) =
BatchStageMarcRecords(
$record_type, $encoding,
$marcrecords, $filename,
$marc_modification_template,
$comments, '',
$parse_items, 0,
50, staging_progress_callback( $job )
);
if($profile_id) {
my $ibatch = Koha::ImportBatches->find($batch_id);
$ibatch->set({profile_id => $profile_id})->store;
}
my $num_with_matches = 0;
my $checked_matches = 0;
my $matcher_failed = 0;
my $matcher_code = "";
if ($matcher_id ne "") {
my $matcher = C4::Matcher->fetch($matcher_id);
if (defined $matcher) {
$checked_matches = 1;
$matcher_code = $matcher->code();
$num_with_matches =
BatchFindDuplicates( $batch_id, $matcher, 10, 50,
matching_progress_callback($job) );
SetImportBatchMatcher($batch_id, $matcher_id);
SetImportBatchOverlayAction($batch_id, $overlay_action);
SetImportBatchNoMatchAction($batch_id, $nomatch_action);
SetImportBatchItemAction($batch_id, $item_action);
$schema->storage->txn_commit;
} else {
$matcher_failed = 1;
$schema->storage->txn_rollback;
}
} else {
$schema->storage->txn_commit;
}
my $results = {
staged => $num_valid,
matched => $num_with_matches,
num_items => $num_items,
import_errors => scalar(@import_errors),
total => $num_valid + scalar(@import_errors),
checked_matches => $checked_matches,
matcher_failed => $matcher_failed,
matcher_code => $matcher_code,
import_batch_id => $batch_id,
booksellerid => $booksellerid,
basketno => $basketno
my $params = {
record_type => $record_type,
encoding => $encoding,
format => $format,
filepath => $filepath,
filename => $filename,
marc_modification_template => $marc_modification_template,
comments => $comments,
parse_items => $parse_items,
matcher_id => $matcher_id,
overlay_action => $overlay_action,
nomatch_action => $nomatch_action,
item_action => $item_action,
};
if ($runinbackground) {
$job->finish($results);
exit 0;
} else {
$template->param(staged => $num_valid,
matched => $num_with_matches,
num_items => $num_items,
import_errors => scalar(@import_errors),
total => $num_valid + scalar(@import_errors),
checked_matches => $checked_matches,
matcher_failed => $matcher_failed,
matcher_code => $matcher_code,
import_batch_id => $batch_id,
booksellerid => $booksellerid,
basketno => $basketno
);
try {
my $job_id = Koha::BackgroundJob::StageMARCForImport->new->enqueue( $params );
if ($job_id) {
$template->param(
job_enqueued => 1,
job_id => $job_id,
);
}
}
catch {
warn $_;
push @messages,
{
type => 'error',
code => 'cannot_enqueue_job',
error => $_,
};
};
} else {
# initial form
@ -231,23 +133,6 @@ if ($completedJobID) {
}
}
$template->param( messages => \@messages );
output_html_with_http_headers $input, $cookie, $template->output;
exit 0;
sub staging_progress_callback {
my $job = shift;
return sub {
my $progress = shift;
$job->progress($progress);
}
}
sub matching_progress_callback {
my $job = shift;
my $start_progress = $job->progress();
return sub {
my $progress = shift;
$job->progress($start_progress + $progress);
}
}