From 155c3f18244a415812346127304493ec20165ecc Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Thu, 30 Sep 2021 22:53:15 +0300 Subject: [PATCH] Bug 29135: Fix handling of deleted items in OAI-PMH provider When the OAI-PMH provider was configured to include item information in the returned records, deleting an item would cause the record to be reported as deleted. The provider also did some useless checks when determining the timestamp of a deleted record. These checks only had a performance impact. To test: 1) Create /var/lib/koha/kohadev/OAI.yaml with: format: marcxml: metadataPrefix: marcxml metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd include_items: 1 2) Set preference OAI-PMH to Enable 3) Set preference OAI-PMH:ConfFile to /var/lib/koha/kohadev/OAI.yaml 4) Delete an item from a record 5) View the page: http://localhost:8080/cgi-bin/koha/oai.pl?verb=ListRecords&metadataPrefix=marcxml 6) Note the record is now listed as deleted 7) Run prove -v t/db_dependent/OAI/ Sponsored-by: The National Library of Finland Signed-off-by: David Nind Signed-off-by: Nick Clemens Signed-off-by: Jonathan Druart --- Koha/OAI/Server/GetRecord.pm | 51 ++-- Koha/OAI/Server/ListBase.pm | 87 +++---- t/db_dependent/OAI/Server.t | 343 +++++++++++++++++++++++++- t/db_dependent/OAI/oaiconf_items.yaml | 11 + 4 files changed, 412 insertions(+), 80 deletions(-) create mode 100644 t/db_dependent/OAI/oaiconf_items.yaml diff --git a/Koha/OAI/Server/GetRecord.pm b/Koha/OAI/Server/GetRecord.pm index 4e4985ea8e..5e51b2fb99 100644 --- a/Koha/OAI/Server/GetRecord.pm +++ b/Koha/OAI/Server/GetRecord.pm @@ -35,56 +35,48 @@ sub new { my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/; my $items_included = $repository->items_included( $args{metadataPrefix} ); my $dbh = C4::Context->dbh; - my $sql = " - SELECT timestamp - FROM biblioitems - WHERE biblionumber=? - "; + my $sql; my @bind_params = ($biblionumber); if ( $items_included ) { # Take latest timestamp of biblio and any items # Or timestamp of deleted items where bib not deleted $sql .= " - UNION + SELECT timestamp + FROM biblio_metadata + WHERE biblionumber=? + UNION SELECT deleteditems.timestamp FROM deleteditems JOIN biblio USING (biblionumber) - WHERE deleteditems.biblionumber=? - UNION + WHERE deleteditems.biblionumber=? + UNION SELECT timestamp from items - WHERE biblionumber=? + WHERE biblionumber=? "; push @bind_params, $biblionumber; push @bind_params, $biblionumber; $sql = " - SELECT max(timestamp) + SELECT max(timestamp) as timestamp FROM ($sql) bib "; + } else { + $sql = " + SELECT max(timestamp) as timestamp + FROM biblio_metadata + WHERE biblionumber=? + "; } my $sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr ); $sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr ); my ($timestamp, $deleted); + # If there are no rows in biblio_metadata, try deletedbiblio_metadata unless ( ($timestamp = $sth->fetchrow) ) { $sql = " - SELECT timestamp - FROM deletedbiblio - WHERE biblionumber=? + SELECT max(timestamp) + FROM deletedbiblio_metadata + WHERE biblionumber=? "; @bind_params = ($biblionumber); - if ( $items_included ) { - # Take latest timestamp among biblio and items - $sql .= " - UNION - SELECT timestamp from deleteditems - WHERE biblionumber=? - "; - push @bind_params, $biblionumber; - $sql = " - SELECT max(timestamp) - FROM ($sql) bib - "; - } - $sth = $dbh->prepare($sql) || die('Could not prepare statement: ' . $dbh->errstr); $sth->execute( @bind_params ) || die('Could not execute statement: ' . $sth->errstr); unless ( ($timestamp = $sth->fetchrow) ) @@ -113,10 +105,7 @@ sub new { } else { # We fetch it using this method, rather than the database directly, # so it'll include the item data - my $marcxml; - $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix}) - unless $deleted; - + my $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix}); $self->record( Koha::OAI::Server::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args) ); diff --git a/Koha/OAI/Server/ListBase.pm b/Koha/OAI/Server/ListBase.pm index 877f7c137d..4f1092d146 100644 --- a/Koha/OAI/Server/ListBase.pm +++ b/Koha/OAI/Server/ListBase.pm @@ -62,75 +62,70 @@ sub GetRecords { my @part_bind_params = ($next_id); - my $where = "biblionumber >= ?"; + # Note: "main" is alias of the main table in the SELECT statement to avoid ambiquity with joined tables + my $where = "main.biblionumber >= ?"; if ( $from ) { - $where .= " AND timestamp >= ?"; + $where .= " AND main.timestamp >= ?"; push @part_bind_params, $from; } if ( $until ) { - $where .= " AND timestamp <= ?"; + $where .= " AND main.timestamp <= ?"; push @part_bind_params, $until; } if ( defined $set ) { - $where .= " AND biblionumber in (SELECT osb.biblionumber FROM oai_sets_biblios osb WHERE osb.set_id = ?)"; + $where .= " AND main.biblionumber in (SELECT osb.biblionumber FROM oai_sets_biblios osb WHERE osb.set_id = ?)"; push @part_bind_params, $set->{'id'}; } my @bind_params = @part_bind_params; - my $criteria = "WHERE $where ORDER BY biblionumber LIMIT " . ($max + 1); + my $order_limit = 'ORDER BY main.biblionumber LIMIT ' . ($max + 1); - my $sql = " - SELECT biblionumber - FROM $table - $criteria - "; + my $sql; + my $ts_sql; # If items are included, fetch a set of potential biblionumbers from items tables as well. # Then merge them, sort them and take the required number of them from the resulting list. # This may seem counter-intuitive as in worst case we fetch 3 times the biblionumbers needed, # but avoiding joins or subqueries makes this so much faster that it does not matter. - if ( $include_items ) { - $sql = "($sql) UNION (SELECT DISTINCT(biblionumber) FROM deleteditems $criteria)"; + if ( $include_items && !$deleted ) { + $sql = " + (SELECT biblionumber + FROM $table main + WHERE $where $order_limit) + UNION + (SELECT DISTINCT(biblionumber) FROM deleteditems main JOIN biblio USING (biblionumber) WHERE $where + $order_limit) + UNION + (SELECT DISTINCT(biblionumber) FROM items main WHERE $where $order_limit)"; push @bind_params, @part_bind_params; - if (!$deleted) { - $sql .= " UNION (SELECT DISTINCT(biblionumber) FROM items $criteria)"; - push @bind_params, @part_bind_params; - } - $sql = "SELECT biblionumber FROM ($sql) bibnos ORDER BY biblionumber LIMIT " . ($max + 1); + push @bind_params, @part_bind_params; + $sql = "SELECT biblionumber FROM ($sql) main $order_limit"; + + $ts_sql = " + SELECT MAX(timestamp) + FROM ( + SELECT timestamp FROM biblio_metadata WHERE biblionumber = ? + UNION + SELECT timestamp FROM deleteditems WHERE biblionumber = ? + UNION + SELECT timestamp FROM items WHERE biblionumber = ? + ) bi + "; + } else { + $sql = " + SELECT biblionumber + FROM $table main + WHERE $where + "; + + $ts_sql = "SELECT max(timestamp) FROM $table WHERE biblionumber = ?"; } my $sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr ); - $sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr ); - - my $ts_sql; - if ( $include_items ) { - if ( $deleted ) { - $ts_sql = " - SELECT MAX(timestamp) - FROM ( - SELECT timestamp FROM deletedbiblio_metadata WHERE biblionumber = ? - UNION - SELECT timestamp FROM deleteditems WHERE biblionumber = ? - ) bis - "; - } else { - $ts_sql = " - SELECT MAX(timestamp) - FROM ( - SELECT timestamp FROM biblio_metadata WHERE biblionumber = ? - UNION - SELECT timestamp FROM deleteditems WHERE biblionumber = ? - UNION - SELECT timestamp FROM items WHERE biblionumber = ? - ) bi - "; - } - } else { - $ts_sql = "SELECT timestamp FROM $table WHERE biblionumber = ?"; - } my $ts_sth = $dbh->prepare( $ts_sql ) || die( 'Could not prepare statement: ' . $dbh->errstr ); + $sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr ); foreach my $row (@{ $sth->fetchall_arrayref() }) { my $biblionumber = $row->[0]; $count++; @@ -149,7 +144,7 @@ sub GetRecords { last STAGELOOP; } my @params = ($biblionumber); - if ( $include_items ) { + if ( $include_items && !$deleted ) { push @params, $deleted ? ( $biblionumber ) : ( $biblionumber, $biblionumber ); } $ts_sth->execute( @params ) || die( 'Could not execute statement: ' . $ts_sth->errstr ); diff --git a/t/db_dependent/OAI/Server.t b/t/db_dependent/OAI/Server.t index 8f52645edb..22fb77ed57 100755 --- a/t/db_dependent/OAI/Server.t +++ b/t/db_dependent/OAI/Server.t @@ -19,9 +19,9 @@ use Modern::Perl; use Test::Deep qw( cmp_deeply re ); -use Test::MockTime qw/set_fixed_time restore_time/; +use Test::MockTime qw/set_fixed_time set_relative_time restore_time/; -use Test::More tests => 32; +use Test::More tests => 33; use DateTime; use File::Basename; use File::Spec; @@ -31,8 +31,9 @@ use XML::Simple; use YAML::XS; use t::lib::Mocks; +use t::lib::TestBuilder; -use C4::Biblio qw( AddBiblio GetMarcBiblio ModBiblio ); +use C4::Biblio qw( AddBiblio GetMarcBiblio ModBiblio DelBiblio ); use C4::Context; use C4::OAI::Sets qw(AddOAISet); @@ -510,3 +511,339 @@ subtest 'ListSets tests' => sub { $schema->storage->txn_rollback; }; + +subtest 'Tests for timestamp handling' => sub { + + plan tests => 27; + + t::lib::Mocks::mock_preference( 'OAI::PMH' => 1 ); + t::lib::Mocks::mock_preference( 'OAI-PMH:MaxCount' => 3 ); + t::lib::Mocks::mock_preference( 'OAI-PMH:ConfFile' => File::Spec->rel2abs(dirname(__FILE__)) . '/oaiconf_items.yaml' ); + + $schema->storage->txn_begin; + + my $sth_metadata = $dbh->prepare('UPDATE biblio_metadata SET timestamp=? WHERE biblionumber=?'); + my $sth_del_metadata = $dbh->prepare('UPDATE deletedbiblio_metadata SET timestamp=? WHERE biblionumber=?'); + my $sth_item = $dbh->prepare('UPDATE items SET timestamp=? WHERE itemnumber=?'); + my $sth_del_item = $dbh->prepare('UPDATE deleteditems SET timestamp=? WHERE itemnumber=?'); + + my $builder = t::lib::TestBuilder->new; + + set_fixed_time(CORE::time()); + + my $utc_datetime = dt_from_string(undef, undef, 'UTC'); + my $utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z'; + my $timestamp = dt_from_string(undef, 'sql'); + + # Test a bib with one item + my $biblio1 = $builder->build_sample_biblio(); + $sth_metadata->execute($timestamp, $biblio1->biblionumber); + my $item1 = $builder->build_sample_item( + { + biblionumber => $biblio1->biblionumber + } + ); + $sth_item->execute($timestamp, $item1->itemnumber); + + my $list_items = { + verb => 'ListRecords', + metadataPrefix => 'marc21', + from => $utc_timestamp + }; + my $list_no_items = { + verb => 'ListRecords', + metadataPrefix => 'marcxml', + from => $utc_timestamp + }; + + my $get_items = { + verb => 'GetRecord', + metadataPrefix => 'marc21', + identifier => 'TEST:' . $biblio1->biblionumber + }; + my $get_no_items = { + verb => 'GetRecord', + metadataPrefix => 'marcxml', + identifier => 'TEST:' . $biblio1->biblionumber + }; + + my $expected = { + record => { + header => { + datestamp => $utc_timestamp, + identifier => 'TEST:' . $biblio1->biblionumber + }, + metadata => { + record => XMLin( + GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record() + ) + } + } + }; + my $expected_no_items = { + record => { + header => { + datestamp => $utc_timestamp, + identifier => 'TEST:' . $biblio1->biblionumber + }, + metadata => { + record => XMLin( + GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 0, opac => 1 })->as_xml_record() + ) + } + } + }; + + test_query( + 'ListRecords - biblio with a single item', + $list_items, + { ListRecords => $expected } + ); + test_query( + 'ListRecords - biblio with a single item (items not returned)', + $list_no_items, + { ListRecords => $expected_no_items } + ); + test_query( + 'GetRecord - biblio with a single item', + $get_items, + { GetRecord => $expected } + ); + test_query( + 'GetRecord - biblio with a single item (items not returned)', + $get_no_items, + { GetRecord => $expected_no_items } + ); + + # Add an item 10 seconds later and check results + set_relative_time(10); + + $utc_datetime = dt_from_string(undef, undef, 'UTC'); + $utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z'; + $timestamp = dt_from_string(undef, 'sql'); + + my $item2 = $builder->build_sample_item( + { + biblionumber => $biblio1->biblionumber + } + ); + $sth_item->execute($timestamp, $item2->itemnumber); + + $expected->{record}{header}{datestamp} = $utc_timestamp; + $expected->{record}{metadata}{record} = XMLin( + GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record() + ); + + test_query( + 'ListRecords - biblio with two items', + $list_items, + { ListRecords => $expected } + ); + test_query( + 'ListRecords - biblio with two items (items not returned)', + $list_no_items, + { ListRecords => $expected_no_items } + ); + test_query( + 'GetRecord - biblio with a two items', + $get_items, + { GetRecord => $expected } + ); + test_query( + 'GetRecord - biblio with a two items (items not returned)', + $get_no_items, + { GetRecord => $expected_no_items } + ); + + # Set biblio timestamp 10 seconds later and check results + set_relative_time(10); + $utc_datetime = dt_from_string(undef, undef, 'UTC'); + $utc_timestamp= $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z'; + $timestamp = dt_from_string(undef, 'sql'); + + $sth_metadata->execute($timestamp, $biblio1->biblionumber); + + $expected->{record}{header}{datestamp} = $utc_timestamp; + $expected_no_items->{record}{header}{datestamp} = $utc_timestamp; + + test_query( + "ListRecords - biblio with timestamp higher than item's", + $list_items, + { ListRecords => $expected } + ); + test_query( + "ListRecords - biblio with timestamp higher than item's (items not returned)", + $list_no_items, + { ListRecords => $expected_no_items } + ); + test_query( + "GetRecord - biblio with timestamp higher than item's", + $get_items, + { GetRecord => $expected } + ); + test_query( + "GetRecord - biblio with timestamp higher than item's (items not returned)", + $get_no_items, + { GetRecord => $expected_no_items } + ); + + # Delete an item 10 seconds later and check results + set_relative_time(10); + $utc_datetime = dt_from_string(undef, undef, 'UTC'); + $utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z'; + + $item1->safe_delete({ skip_record_index =>1 }); + $sth_del_item->execute($timestamp, $item1->itemnumber); + + $expected->{record}{header}{datestamp} = $utc_timestamp; + $expected->{record}{metadata}{record} = XMLin( + GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record() + ); + + test_query( + 'ListRecords - biblio with existing and deleted item', + $list_items, + { ListRecords => $expected } + ); + test_query( + 'ListRecords - biblio with existing and deleted item (items not returned)', + $list_no_items, + { ListRecords => $expected_no_items } + ); + test_query( + 'GetRecord - biblio with existing and deleted item', + $get_items, + { GetRecord => $expected } + ); + test_query( + 'GetRecord - biblio with existing and deleted item (items not returned)', + $get_no_items, + { GetRecord => $expected_no_items } + ); + + # Delete also the second item and verify results + $item2->safe_delete({ skip_record_index =>1 }); + $sth_del_item->execute($timestamp, $item2->itemnumber); + + $expected->{record}{metadata}{record} = XMLin( + GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record() + ); + + test_query( + 'ListRecords - biblio with two deleted items', + $list_items, + { ListRecords => $expected } + ); + test_query( + 'ListRecords - biblio with two deleted items (items not returned)', + $list_no_items, + { ListRecords => $expected_no_items } + ); + test_query( + 'GetRecord - biblio with two deleted items', + $get_items, + { GetRecord => $expected } + ); + test_query( + 'GetRecord - biblio with two deleted items (items not returned)', + $get_no_items, + { GetRecord => $expected_no_items } + ); + + # Delete the biblio 10 seconds later and check results + set_relative_time(10); + $utc_datetime = dt_from_string(undef, undef, 'UTC'); + $utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z'; + $timestamp = dt_from_string(undef, 'sql'); + + is(undef, DelBiblio($biblio1->biblionumber, { skip_record_index =>1 }), 'Biblio deleted'); + $sth_del_metadata->execute($timestamp, $biblio1->biblionumber); + + my $expected_header = { + record => { + header => { + datestamp => $utc_timestamp, + identifier => 'TEST:' . $biblio1->biblionumber, + status => 'deleted' + } + } + }; + + test_query( + 'ListRecords - deleted biblio with two deleted items', + $list_items, + { ListRecords => $expected_header } + ); + test_query( + 'ListRecords - deleted biblio with two deleted items (items not returned)', + $list_no_items, + { ListRecords => $expected_header } + ); + test_query( + 'GetRecord - deleted biblio with two deleted items', + $get_items, + { GetRecord => $expected_header } + ); + test_query( + 'GetRecord - deleted biblio with two deleted items (items not returned)', + $get_no_items, + { GetRecord => $expected_header } + ); + + # Add a second biblio 10 seconds later and check that both are returned properly + set_relative_time(10); + $utc_datetime = dt_from_string(undef, undef, 'UTC'); + $utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z'; + $timestamp = dt_from_string(undef, 'sql'); + + my $biblio2 = $builder->build_sample_biblio(); + $sth_metadata->execute($timestamp, $biblio2->biblionumber); + + my $expected2 = { + record => [ + $expected_header->{record}, + { + header => { + datestamp => $utc_timestamp, + identifier => 'TEST:' . $biblio2->biblionumber + }, + metadata => { + record => XMLin( + GetMarcBiblio({ biblionumber => $biblio2->biblionumber, embed_items => 1, opac => 1 })->as_xml_record() + ) + } + } + ] + }; + my $expected2_no_items = { + record => [ + $expected_header->{record}, + { + header => { + datestamp => $utc_timestamp, + identifier => 'TEST:' . $biblio2->biblionumber + }, + metadata => { + record => XMLin( + GetMarcBiblio({ biblionumber => $biblio2->biblionumber, embed_items => 0, opac => 1 })->as_xml_record() + ) + } + } + ] + }; + + test_query( + 'ListRecords - deleted biblio and normal biblio', + $list_items, + { ListRecords => $expected2 } + ); + test_query( + 'ListRecords - deleted biblio and normal biblio (items not returned)', + $list_no_items, + { ListRecords => $expected2_no_items } + ); + + restore_time(); + + $schema->storage->txn_rollback; +}; diff --git a/t/db_dependent/OAI/oaiconf_items.yaml b/t/db_dependent/OAI/oaiconf_items.yaml new file mode 100644 index 0000000000..299c79e5fc --- /dev/null +++ b/t/db_dependent/OAI/oaiconf_items.yaml @@ -0,0 +1,11 @@ +format: + marcxml: + metadataPrefix: marcxml + metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim + schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd + include_items: 0 + marc21: + metadataPrefix: marc21 + metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim + schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd + include_items: 1 -- 2.39.5