From da11c85d7d407c1476d2fcae24341c36fd96b490 Mon Sep 17 00:00:00 2001 From: Kyle M Hall Date: Thu, 26 Sep 2013 12:47:13 -0400 Subject: [PATCH] Bug 10955 - Add ability to skip deletions in zebraqueue It seems that record deletions can cause extreme slowdowns for Koha installations with extremely large numbers of records. It would be helpful to be able to skip record deletions when processing the zebraqueue with rebuild_zebra.pl so the deletions can be processed with a lower frequency. Test Plan: 1) Disable any zebra indexing cronjobs you may have 2) Delete a record 3) Note the operation recordDelete in the zebraqueue table having done = 0 4) Run misc/migration_tools/rebuild_zebra.pl -b -z --skip-deletes 5) Note the delete still has done = 0 6) Run misc/migration_tools/rebuild_zebra.pl -b -z 7) Note the delete now has done = 1 Signed-off-by: Jonathan Druart Signed-off-by: Katrin Fischer Passes all tests and QA script. Also tested for authorities, no problems found. Signed-off-by: Galen Charlton RM note: this is at best a work-around, and I will emphasize that --skip-deletes should be used only when absolutely necessary. I hope that --skip-deletes can go away at some point soon, but that may depend on changes to Zebra. (cherry picked from commit b0870311e1b8fae10a6ab17d0e132e911c3ab3aa) Signed-off-by: Fridolin Somers --- misc/migration_tools/rebuild_zebra.pl | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 83e32c4f79..b613a776d9 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -39,6 +39,7 @@ my $noshadow; my $want_help; my $as_xml; my $process_zebraqueue; +my $process_zebraqueue_skip_deletes; my $do_not_clear_zebraqueue; my $length; my $where; @@ -67,6 +68,7 @@ my $result = GetOptions( 'x' => \$as_xml, 'y' => \$do_not_clear_zebraqueue, 'z' => \$process_zebraqueue, + 'skip-deletes' => \$process_zebraqueue_skip_deletes, 'where:s' => \$where, 'length:i' => \$length, 'offset:i' => \$offset, @@ -314,7 +316,7 @@ sub index_records { my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; my $num_records_exported = 0; - my $records_deleted; + my $records_deleted = {}; my $need_reset = check_zebra_dirs($server_dir); if ($need_reset) { print "$0: found broken zebra server directories: forcing a rebuild\n"; @@ -333,15 +335,20 @@ sub index_records { mkdir "$directory" unless (-d $directory); mkdir "$directory/$record_type" unless (-d "$directory/$record_type"); if ($process_zebraqueue) { - my $entries = select_zebraqueue_records($record_type, 'deleted'); - mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); - $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); - mark_zebraqueue_batch_done($entries); + my $entries; + + unless ( $process_zebraqueue_skip_deletes ) { + $entries = select_zebraqueue_records($record_type, 'deleted'); + mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); + $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); + mark_zebraqueue_batch_done($entries); + } + $entries = select_zebraqueue_records($record_type, 'updated'); mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type"); - $num_records_exported = export_marc_records_from_list($record_type, - $entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted); + $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted); mark_zebraqueue_batch_done($entries); + } else { my $sth = select_all_records($record_type); $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize); @@ -846,6 +853,10 @@ Parameters: table. Cannot be used with -r or -s. + --skip-deletes select only updated records marked + in the zebraqueue table, not deletes. + Only effective with -z. + -r clear Zebra index before adding records to index. Implies -w. -- 2.39.5