From d8fc079eee7e6226af7e76a2eadcc1e2e31ff7b5 Mon Sep 17 00:00:00 2001 From: Kyle M Hall Date: Mon, 9 Aug 2021 13:46:00 -0400 Subject: [PATCH] Bug 28833: Speed up holds queue builder via parallel processing The holds queue builder can take a very long time to run on systems with many holds. For example, a partner with 124,784 unfilled ( not found ) holds, is taking about 64 minutes to run. If we run that same number of holds in 5 parallel chunks ( splitting the number of records as evenly as possible, but *not* taking into account the holds per bib ), it takes 21.5 minutes. If we use 10 loops, it takes less then 14 minutes. Test Plan: 0) Install the Perl library Parallel::ForkManager 1) Generate a huge number of holds ( a few thousand at the minimum ) 2) Run the holds queue builder, use the `time` utility to track how much time it took to run 3) Set HoldsQueueParallelLoopsCount to 10 4) Repeat step 2, note the improvement in speed 5) Experiment with other values for HoldsQueueParallelLoopsCount 6) prove t/db_dependent/HoldsQueue.t Signed-off-by: Martin Renvoize Signed-off-by: Katrin Fischer --- C4/HoldsQueue.pm | 53 ++++++++++++++----- .../data/mysql/atomicupdate/bug_28833.perl | 9 ++++ installer/data/mysql/mandatory/sysprefs.sql | 1 + .../admin/preferences/circulation.pref | 5 ++ misc/cronjobs/holds/build_holds_queue.pl | 3 +- 5 files changed, 57 insertions(+), 14 deletions(-) create mode 100644 installer/data/mysql/atomicupdate/bug_28833.perl diff --git a/C4/HoldsQueue.pm b/C4/HoldsQueue.pm index 052c095121..3f3f3d22f9 100644 --- a/C4/HoldsQueue.pm +++ b/C4/HoldsQueue.pm @@ -170,6 +170,7 @@ Top level function that turns reserves into tmp_holdsqueue and hold_fill_targets sub CreateQueue { my $params = shift; my $unallocated = $params->{unallocated}; + my $loops = $params->{loops} || 1; my $dbh = C4::Context->dbh; unless ($unallocated) { @@ -187,7 +188,7 @@ sub CreateQueue { my $use_transport_cost_matrix = C4::Context->preference("UseTransportCostMatrix"); if ($use_transport_cost_matrix) { $transport_cost_matrix = TransportCostMatrix(); - unless (keys %$transport_cost_matrix) { + unless ( keys %$transport_cost_matrix ) { warn "UseTransportCostMatrix set to yes, but matrix not populated"; undef $transport_cost_matrix; } @@ -197,25 +198,51 @@ sub CreateQueue { my $bibs_with_pending_requests = GetBibsWithPendingHoldRequests(); - foreach my $biblionumber (@$bibs_with_pending_requests) { + # Split the list of bibs into chunks to run in parallel + my @chunks; + if ( $loops > 1 ) { + my $i = 0; + while (@$bibs_with_pending_requests) { + push( @{ $chunks[$i] }, pop(@$bibs_with_pending_requests) ); - $total_bibs++; + $i++; + $i = 0 if $i >= $loops; + } - my $result = update_queue_for_biblio( - { - biblio_id => $biblionumber, - branches_to_use => $branches_to_use, - transport_cost_matrix => $transport_cost_matrix, - unallocated => $unallocated + my $pm = Parallel::ForkManager->new($loops); + + DATA_LOOP: + foreach my $chunk (@chunks) { + my $pid = $pm->start and next DATA_LOOP; + foreach my $biblionumber (@$chunk) { + update_queue_for_biblio( + { + biblio_id => $biblionumber, + branches_to_use => $branches_to_use, + transport_cost_matrix => $transport_cost_matrix, + unallocated => $unallocated + } + ); } - ); + $pm->finish; + } - $total_requests += $result->{requests}; - $total_available_items += $result->{available_items}; - $num_items_mapped += $result->{mapped_items}; + $pm->wait_all_children; + } else { + foreach my $biblionumber (@$bibs_with_pending_requests) { + update_queue_for_biblio( + { + biblio_id => $biblionumber, + branches_to_use => $branches_to_use, + transport_cost_matrix => $transport_cost_matrix, + unallocated => $unallocated + } + ); + } } } + =head2 GetBibsWithPendingHoldRequests my $biblionumber_aref = GetBibsWithPendingHoldRequests(); diff --git a/installer/data/mysql/atomicupdate/bug_28833.perl b/installer/data/mysql/atomicupdate/bug_28833.perl new file mode 100644 index 0000000000..1d0c1adcca --- /dev/null +++ b/installer/data/mysql/atomicupdate/bug_28833.perl @@ -0,0 +1,9 @@ +$DBversion = 'XXX'; # will be replaced by the RM +if( CheckVersion( $DBversion ) ) { + $dbh->do(q{ + INSERT IGNORE INTO systempreferences ( `variable`, `value`, `options`, `explanation`, `type` ) VALUES + ('HoldsQueueParallelLoopsCount', '1', NULL, 'Number of parallel loops to use when running the holds queue builder', 'Integer'); + }); + + NewVersion( $DBversion, 28833, "Speed up holds queue builder via parallel processing"); +} diff --git a/installer/data/mysql/mandatory/sysprefs.sql b/installer/data/mysql/mandatory/sysprefs.sql index ffc90d0919..ede74e6d3f 100644 --- a/installer/data/mysql/mandatory/sysprefs.sql +++ b/installer/data/mysql/mandatory/sysprefs.sql @@ -302,6 +302,7 @@ INSERT INTO systempreferences ( `variable`, `value`, `options`, `explanation`, ` ('HoldsAutoFillPrintSlip','0',NULL,'If on, hold slip print dialog will be displayed automatically','YesNo'), ('HoldsLog','0',NULL,'If ON, log create/cancel/suspend/resume actions on holds.','YesNo'), ('HoldsNeedProcessingSIP', '0', NULL, 'Require staff to check-in before hold is set to waiting state', 'YesNo' ), +('HoldsQueueParallelLoopsCount', '1', NULL, 'Number of parallel loops to use when running the holds queue builder', 'Integer'), ('HoldsQueuePrioritizeBranch','homebranch','holdingbranch|homebranch','Decides if holds queue builder patron home library match to home or holding branch','Choice'), ('HoldsQueueSkipClosed', '0', NULL, 'If enabled, any libraries that are closed when the holds queue is built will be ignored for the purpose of filling holds.', 'YesNo'), ('HoldsSplitQueue','nothing','nothing|branch|itemtype|branch_itemtype','In the staff interface, split the holds view by the given criteria','Choice'), diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/circulation.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/circulation.pref index 1ed473bcfa..f08873bbd0 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/circulation.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/circulation.pref @@ -74,6 +74,11 @@ Circulation: - pref: HoldsToPullStartDate class: integer - day(s) ago. Note that the default end date is controlled by the system preference ConfirmFutureHolds. + - + - When building the holds queue, calculate hold matches using + - pref: HoldsQueueParallelLoopsCount + class: integer + - parallel loop(s). The more loops used, the faster it will calculate and the more computing resources it will use. - - pref: AllowAllMessageDeletion choices: diff --git a/misc/cronjobs/holds/build_holds_queue.pl b/misc/cronjobs/holds/build_holds_queue.pl index 600d0091bd..1906b5723b 100755 --- a/misc/cronjobs/holds/build_holds_queue.pl +++ b/misc/cronjobs/holds/build_holds_queue.pl @@ -85,6 +85,7 @@ if ( $rthq && !$force ) { cronlogaction( { info => $command_line_options } ); -CreateQueue( { unallocated => $unallocated } ); +my $loops = C4::Context->preference('HoldsQueueParallelLoopsCount'); +CreateQueue( { loops => $loops, unallocated => $unallocated } ); cronlogaction( { action => 'End', info => "COMPLETED" } ); -- 2.39.5