Bug 29135: Fix handling of deleted items in OAI-PMH provider

When the OAI-PMH provider was configured to include item information
in the returned records, deleting an item would cause the record to
be reported as deleted. The provider also did some useless checks when
determining the timestamp of a deleted record. These checks only had
a performance impact.

To test:
1) Create /var/lib/koha/kohadev/OAI.yaml with:

format:
  marcxml:
    metadataPrefix: marcxml
    metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
    schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
    include_items: 1

2) Set preference OAI-PMH to Enable
3) Set preference OAI-PMH:ConfFile to /var/lib/koha/kohadev/OAI.yaml
4) Delete an item from a record
5) View the page: http://localhost:8080/cgi-bin/koha/oai.pl?verb=ListRecords&metadataPrefix=marcxml
6) Note the record is now listed as deleted
7) Run prove -v t/db_dependent/OAI/

Sponsored-by: The National Library of Finland

Signed-off-by: David Nind <david@davidnind.com>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
This commit is contained in:
Ere Maijala 2021-09-30 22:53:15 +03:00 committed by Kyle Hall
parent 663d9e3c1d
commit ae75b3c4b0
4 changed files with 411 additions and 79 deletions

View file

@ -36,56 +36,48 @@ sub new {
my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
my $items_included = $repository->items_included( $args{metadataPrefix} );
my $dbh = C4::Context->dbh;
my $sql = "
SELECT timestamp
FROM biblioitems
WHERE biblionumber=?
";
my $sql;
my @bind_params = ($biblionumber);
if ( $items_included ) {
# Take latest timestamp of biblio and any items
# Or timestamp of deleted items where bib not deleted
$sql .= "
UNION
SELECT timestamp
FROM biblio_metadata
WHERE biblionumber=?
UNION
SELECT deleteditems.timestamp FROM deleteditems JOIN biblio USING (biblionumber)
WHERE deleteditems.biblionumber=?
UNION
WHERE deleteditems.biblionumber=?
UNION
SELECT timestamp from items
WHERE biblionumber=?
WHERE biblionumber=?
";
push @bind_params, $biblionumber;
push @bind_params, $biblionumber;
$sql = "
SELECT max(timestamp)
SELECT max(timestamp) as timestamp
FROM ($sql) bib
";
} else {
$sql = "
SELECT max(timestamp) as timestamp
FROM biblio_metadata
WHERE biblionumber=?
";
}
my $sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr );
$sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr );
my ($timestamp, $deleted);
# If there are no rows in biblio_metadata, try deletedbiblio_metadata
unless ( ($timestamp = $sth->fetchrow) ) {
$sql = "
SELECT timestamp
FROM deletedbiblio
WHERE biblionumber=?
SELECT max(timestamp)
FROM deletedbiblio_metadata
WHERE biblionumber=?
";
@bind_params = ($biblionumber);
if ( $items_included ) {
# Take latest timestamp among biblio and items
$sql .= "
UNION
SELECT timestamp from deleteditems
WHERE biblionumber=?
";
push @bind_params, $biblionumber;
$sql = "
SELECT max(timestamp)
FROM ($sql) bib
";
}
$sth = $dbh->prepare($sql) || die('Could not prepare statement: ' . $dbh->errstr);
$sth->execute( @bind_params ) || die('Could not execute statement: ' . $sth->errstr);
unless ( ($timestamp = $sth->fetchrow) )
@ -114,10 +106,7 @@ sub new {
} else {
# We fetch it using this method, rather than the database directly,
# so it'll include the item data
my $marcxml;
$marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix})
unless $deleted;
my $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix});
$self->record(
Koha::OAI::Server::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args)
);

View file

@ -63,75 +63,70 @@ sub GetRecords {
my @part_bind_params = ($next_id);
my $where = "biblionumber >= ?";
# Note: "main" is alias of the main table in the SELECT statement to avoid ambiquity with joined tables
my $where = "main.biblionumber >= ?";
if ( $from ) {
$where .= " AND timestamp >= ?";
$where .= " AND main.timestamp >= ?";
push @part_bind_params, $from;
}
if ( $until ) {
$where .= " AND timestamp <= ?";
$where .= " AND main.timestamp <= ?";
push @part_bind_params, $until;
}
if ( defined $set ) {
$where .= " AND biblionumber in (SELECT osb.biblionumber FROM oai_sets_biblios osb WHERE osb.set_id = ?)";
$where .= " AND main.biblionumber in (SELECT osb.biblionumber FROM oai_sets_biblios osb WHERE osb.set_id = ?)";
push @part_bind_params, $set->{'id'};
}
my @bind_params = @part_bind_params;
my $criteria = "WHERE $where ORDER BY biblionumber LIMIT " . ($max + 1);
my $order_limit = 'ORDER BY main.biblionumber LIMIT ' . ($max + 1);
my $sql = "
SELECT biblionumber
FROM $table
$criteria
";
my $sql;
my $ts_sql;
# If items are included, fetch a set of potential biblionumbers from items tables as well.
# Then merge them, sort them and take the required number of them from the resulting list.
# This may seem counter-intuitive as in worst case we fetch 3 times the biblionumbers needed,
# but avoiding joins or subqueries makes this so much faster that it does not matter.
if ( $include_items ) {
$sql = "($sql) UNION (SELECT DISTINCT(biblionumber) FROM deleteditems $criteria)";
if ( $include_items && !$deleted ) {
$sql = "
(SELECT biblionumber
FROM $table main
WHERE $where $order_limit)
UNION
(SELECT DISTINCT(biblionumber) FROM deleteditems main JOIN biblio USING (biblionumber) WHERE $where
$order_limit)
UNION
(SELECT DISTINCT(biblionumber) FROM items main WHERE $where $order_limit)";
push @bind_params, @part_bind_params;
if (!$deleted) {
$sql .= " UNION (SELECT DISTINCT(biblionumber) FROM items $criteria)";
push @bind_params, @part_bind_params;
}
$sql = "SELECT biblionumber FROM ($sql) bibnos ORDER BY biblionumber LIMIT " . ($max + 1);
push @bind_params, @part_bind_params;
$sql = "SELECT biblionumber FROM ($sql) main $order_limit";
$ts_sql = "
SELECT MAX(timestamp)
FROM (
SELECT timestamp FROM biblio_metadata WHERE biblionumber = ?
UNION
SELECT timestamp FROM deleteditems WHERE biblionumber = ?
UNION
SELECT timestamp FROM items WHERE biblionumber = ?
) bi
";
} else {
$sql = "
SELECT biblionumber
FROM $table main
WHERE $where
";
$ts_sql = "SELECT max(timestamp) FROM $table WHERE biblionumber = ?";
}
my $sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr );
$sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr );
my $ts_sql;
if ( $include_items ) {
if ( $deleted ) {
$ts_sql = "
SELECT MAX(timestamp)
FROM (
SELECT timestamp FROM deletedbiblio_metadata WHERE biblionumber = ?
UNION
SELECT timestamp FROM deleteditems WHERE biblionumber = ?
) bis
";
} else {
$ts_sql = "
SELECT MAX(timestamp)
FROM (
SELECT timestamp FROM biblio_metadata WHERE biblionumber = ?
UNION
SELECT timestamp FROM deleteditems WHERE biblionumber = ?
UNION
SELECT timestamp FROM items WHERE biblionumber = ?
) bi
";
}
} else {
$ts_sql = "SELECT timestamp FROM $table WHERE biblionumber = ?";
}
my $ts_sth = $dbh->prepare( $ts_sql ) || die( 'Could not prepare statement: ' . $dbh->errstr );
$sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr );
foreach my $row (@{ $sth->fetchall_arrayref() }) {
my $biblionumber = $row->[0];
$count++;
@ -150,7 +145,7 @@ sub GetRecords {
last STAGELOOP;
}
my @params = ($biblionumber);
if ( $include_items ) {
if ( $include_items && !$deleted ) {
push @params, $deleted ? ( $biblionumber ) : ( $biblionumber, $biblionumber );
}
$ts_sth->execute( @params ) || die( 'Could not execute statement: ' . $ts_sth->errstr );

View file

@ -19,9 +19,9 @@
use Modern::Perl;
use Test::Deep qw( cmp_deeply re );
use Test::MockTime qw/set_fixed_time restore_time/;
use Test::MockTime qw/set_fixed_time set_relative_time restore_time/;
use Test::More tests => 32;
use Test::More tests => 33;
use DateTime;
use File::Basename;
use File::Spec;
@ -31,6 +31,7 @@ use XML::Simple;
use YAML::XS;
use t::lib::Mocks;
use t::lib::TestBuilder;
use C4::Biblio;
use C4::Context;
@ -510,3 +511,339 @@ subtest 'ListSets tests' => sub {
$schema->storage->txn_rollback;
};
subtest 'Tests for timestamp handling' => sub {
plan tests => 27;
t::lib::Mocks::mock_preference( 'OAI::PMH' => 1 );
t::lib::Mocks::mock_preference( 'OAI-PMH:MaxCount' => 3 );
t::lib::Mocks::mock_preference( 'OAI-PMH:ConfFile' => File::Spec->rel2abs(dirname(__FILE__)) . '/oaiconf_items.yaml' );
$schema->storage->txn_begin;
my $sth_metadata = $dbh->prepare('UPDATE biblio_metadata SET timestamp=? WHERE biblionumber=?');
my $sth_del_metadata = $dbh->prepare('UPDATE deletedbiblio_metadata SET timestamp=? WHERE biblionumber=?');
my $sth_item = $dbh->prepare('UPDATE items SET timestamp=? WHERE itemnumber=?');
my $sth_del_item = $dbh->prepare('UPDATE deleteditems SET timestamp=? WHERE itemnumber=?');
my $builder = t::lib::TestBuilder->new;
set_fixed_time(CORE::time());
my $utc_datetime = dt_from_string(undef, undef, 'UTC');
my $utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z';
my $timestamp = dt_from_string(undef, 'sql');
# Test a bib with one item
my $biblio1 = $builder->build_sample_biblio();
$sth_metadata->execute($timestamp, $biblio1->biblionumber);
my $item1 = $builder->build_sample_item(
{
biblionumber => $biblio1->biblionumber
}
);
$sth_item->execute($timestamp, $item1->itemnumber);
my $list_items = {
verb => 'ListRecords',
metadataPrefix => 'marc21',
from => $utc_timestamp
};
my $list_no_items = {
verb => 'ListRecords',
metadataPrefix => 'marcxml',
from => $utc_timestamp
};
my $get_items = {
verb => 'GetRecord',
metadataPrefix => 'marc21',
identifier => 'TEST:' . $biblio1->biblionumber
};
my $get_no_items = {
verb => 'GetRecord',
metadataPrefix => 'marcxml',
identifier => 'TEST:' . $biblio1->biblionumber
};
my $expected = {
record => {
header => {
datestamp => $utc_timestamp,
identifier => 'TEST:' . $biblio1->biblionumber
},
metadata => {
record => XMLin(
GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record()
)
}
}
};
my $expected_no_items = {
record => {
header => {
datestamp => $utc_timestamp,
identifier => 'TEST:' . $biblio1->biblionumber
},
metadata => {
record => XMLin(
GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 0, opac => 1 })->as_xml_record()
)
}
}
};
test_query(
'ListRecords - biblio with a single item',
$list_items,
{ ListRecords => $expected }
);
test_query(
'ListRecords - biblio with a single item (items not returned)',
$list_no_items,
{ ListRecords => $expected_no_items }
);
test_query(
'GetRecord - biblio with a single item',
$get_items,
{ GetRecord => $expected }
);
test_query(
'GetRecord - biblio with a single item (items not returned)',
$get_no_items,
{ GetRecord => $expected_no_items }
);
# Add an item 10 seconds later and check results
set_relative_time(10);
$utc_datetime = dt_from_string(undef, undef, 'UTC');
$utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z';
$timestamp = dt_from_string(undef, 'sql');
my $item2 = $builder->build_sample_item(
{
biblionumber => $biblio1->biblionumber
}
);
$sth_item->execute($timestamp, $item2->itemnumber);
$expected->{record}{header}{datestamp} = $utc_timestamp;
$expected->{record}{metadata}{record} = XMLin(
GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record()
);
test_query(
'ListRecords - biblio with two items',
$list_items,
{ ListRecords => $expected }
);
test_query(
'ListRecords - biblio with two items (items not returned)',
$list_no_items,
{ ListRecords => $expected_no_items }
);
test_query(
'GetRecord - biblio with a two items',
$get_items,
{ GetRecord => $expected }
);
test_query(
'GetRecord - biblio with a two items (items not returned)',
$get_no_items,
{ GetRecord => $expected_no_items }
);
# Set biblio timestamp 10 seconds later and check results
set_relative_time(10);
$utc_datetime = dt_from_string(undef, undef, 'UTC');
$utc_timestamp= $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z';
$timestamp = dt_from_string(undef, 'sql');
$sth_metadata->execute($timestamp, $biblio1->biblionumber);
$expected->{record}{header}{datestamp} = $utc_timestamp;
$expected_no_items->{record}{header}{datestamp} = $utc_timestamp;
test_query(
"ListRecords - biblio with timestamp higher than item's",
$list_items,
{ ListRecords => $expected }
);
test_query(
"ListRecords - biblio with timestamp higher than item's (items not returned)",
$list_no_items,
{ ListRecords => $expected_no_items }
);
test_query(
"GetRecord - biblio with timestamp higher than item's",
$get_items,
{ GetRecord => $expected }
);
test_query(
"GetRecord - biblio with timestamp higher than item's (items not returned)",
$get_no_items,
{ GetRecord => $expected_no_items }
);
# Delete an item 10 seconds later and check results
set_relative_time(10);
$utc_datetime = dt_from_string(undef, undef, 'UTC');
$utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z';
$item1->safe_delete({ skip_record_index =>1 });
$sth_del_item->execute($timestamp, $item1->itemnumber);
$expected->{record}{header}{datestamp} = $utc_timestamp;
$expected->{record}{metadata}{record} = XMLin(
GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record()
);
test_query(
'ListRecords - biblio with existing and deleted item',
$list_items,
{ ListRecords => $expected }
);
test_query(
'ListRecords - biblio with existing and deleted item (items not returned)',
$list_no_items,
{ ListRecords => $expected_no_items }
);
test_query(
'GetRecord - biblio with existing and deleted item',
$get_items,
{ GetRecord => $expected }
);
test_query(
'GetRecord - biblio with existing and deleted item (items not returned)',
$get_no_items,
{ GetRecord => $expected_no_items }
);
# Delete also the second item and verify results
$item2->safe_delete({ skip_record_index =>1 });
$sth_del_item->execute($timestamp, $item2->itemnumber);
$expected->{record}{metadata}{record} = XMLin(
GetMarcBiblio({ biblionumber => $biblio1->biblionumber, embed_items => 1, opac => 1 })->as_xml_record()
);
test_query(
'ListRecords - biblio with two deleted items',
$list_items,
{ ListRecords => $expected }
);
test_query(
'ListRecords - biblio with two deleted items (items not returned)',
$list_no_items,
{ ListRecords => $expected_no_items }
);
test_query(
'GetRecord - biblio with two deleted items',
$get_items,
{ GetRecord => $expected }
);
test_query(
'GetRecord - biblio with two deleted items (items not returned)',
$get_no_items,
{ GetRecord => $expected_no_items }
);
# Delete the biblio 10 seconds later and check results
set_relative_time(10);
$utc_datetime = dt_from_string(undef, undef, 'UTC');
$utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z';
$timestamp = dt_from_string(undef, 'sql');
is(undef, DelBiblio($biblio1->biblionumber, { skip_record_index =>1 }), 'Biblio deleted');
$sth_del_metadata->execute($timestamp, $biblio1->biblionumber);
my $expected_header = {
record => {
header => {
datestamp => $utc_timestamp,
identifier => 'TEST:' . $biblio1->biblionumber,
status => 'deleted'
}
}
};
test_query(
'ListRecords - deleted biblio with two deleted items',
$list_items,
{ ListRecords => $expected_header }
);
test_query(
'ListRecords - deleted biblio with two deleted items (items not returned)',
$list_no_items,
{ ListRecords => $expected_header }
);
test_query(
'GetRecord - deleted biblio with two deleted items',
$get_items,
{ GetRecord => $expected_header }
);
test_query(
'GetRecord - deleted biblio with two deleted items (items not returned)',
$get_no_items,
{ GetRecord => $expected_header }
);
# Add a second biblio 10 seconds later and check that both are returned properly
set_relative_time(10);
$utc_datetime = dt_from_string(undef, undef, 'UTC');
$utc_timestamp = $utc_datetime->ymd . 'T' . $utc_datetime->hms . 'Z';
$timestamp = dt_from_string(undef, 'sql');
my $biblio2 = $builder->build_sample_biblio();
$sth_metadata->execute($timestamp, $biblio2->biblionumber);
my $expected2 = {
record => [
$expected_header->{record},
{
header => {
datestamp => $utc_timestamp,
identifier => 'TEST:' . $biblio2->biblionumber
},
metadata => {
record => XMLin(
GetMarcBiblio({ biblionumber => $biblio2->biblionumber, embed_items => 1, opac => 1 })->as_xml_record()
)
}
}
]
};
my $expected2_no_items = {
record => [
$expected_header->{record},
{
header => {
datestamp => $utc_timestamp,
identifier => 'TEST:' . $biblio2->biblionumber
},
metadata => {
record => XMLin(
GetMarcBiblio({ biblionumber => $biblio2->biblionumber, embed_items => 0, opac => 1 })->as_xml_record()
)
}
}
]
};
test_query(
'ListRecords - deleted biblio and normal biblio',
$list_items,
{ ListRecords => $expected2 }
);
test_query(
'ListRecords - deleted biblio and normal biblio (items not returned)',
$list_no_items,
{ ListRecords => $expected2_no_items }
);
restore_time();
$schema->storage->txn_rollback;
};

View file

@ -0,0 +1,11 @@
format:
marcxml:
metadataPrefix: marcxml
metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
include_items: 0
marc21:
metadataPrefix: marc21
metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
include_items: 1