From d92eb0373e5cb96a20dd7efa170102738e22b986 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Thu, 7 Feb 2008 00:11:47 -0600 Subject: [PATCH] experiment: use PazPar2 to group related works The approach is to use PazPar2 to search just one target, the biblio Zebra database. The results of each set are merged by PazPar2 to generate a hitlist that combines related bibs together; as an example, if a library has the first Harry Potter book in three languages and an audiobook format, the hitlist should ideally return one result for the work that includes links to the individual bibs. The new module C4::Search::PazPar2 implements a simple client for PazPar2's XML-over-HTTP API. It is designed to be generic, and thus may end up getting moved out of Koha to become a stand-alone CPAN module. Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Search.pm | 64 +++++++ C4/Search/PazPar2.pm | 168 +++++++++++++++++++ etc/pazpar2/koha-biblios.xml | 30 ++++ etc/pazpar2/marc21.xsl | 288 ++++++++++++++++++++++++++++++++ etc/pazpar2/pazpar2.xml | 98 +++++++++++ etc/pazpar2/pz2-ourl-base.xsl | 169 +++++++++++++++++++ etc/pazpar2/pz2-ourl-marc21.xsl | 97 +++++++++++ 7 files changed, 914 insertions(+) create mode 100644 C4/Search/PazPar2.pm create mode 100644 etc/pazpar2/koha-biblios.xml create mode 100644 etc/pazpar2/marc21.xsl create mode 100644 etc/pazpar2/pazpar2.xml create mode 100644 etc/pazpar2/pz2-ourl-base.xsl create mode 100644 etc/pazpar2/pz2-ourl-marc21.xsl diff --git a/C4/Search.pm b/C4/Search.pm index 43b9d46c67..4027011ca6 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -611,6 +611,70 @@ sub getRecords { return ( undef, $results_hashref, \@facets_loop ); } +use C4::Search::PazPar2; +use XML::Simple; +use Data::Dumper; +sub pazGetRecords { + my ( + $koha_query, $simple_query, $sort_by_ref, $servers_ref, + $results_per_page, $offset, $expanded_facet, $branches, + $query_type, $scan + ) = @_; + + my $paz = C4::Search::PazPar2->new('http://localhost:10006/search.pz2'); + $paz->init(); + $paz->search($simple_query); + sleep 1; + + # do results + my $results_hashref = {}; + my $stats = XMLin($paz->stat); + $results_hashref->{'biblioserver'}->{'hits'} = $stats->{'hits'}; + my $results = XMLin($paz->show($offset, $results_per_page), forcearray => 1); + #die Dumper($results); + HIT: foreach my $hit (@{ $results->{'hit'} }) { + warn "hit"; + my $recid = $hit->{recid}->[0]; + #if ($recid =~ /[\200-\377]/) { + if ($recid =~ /sodot/) { + #die "bad $recid\n"; + #probably do not want non-ASCII in record ID + last HIT; + } + my $count = 1; + if (exists $hit->{count}) { + $count = $hit->{count}->[0]; + } + #die $count; + for (my $i = 0; $i < $count; $i++) { + warn "look for $recid offset = $i"; + my $rec = $paz->record($recid, $i); + warn "got record $i"; + push @{ $results_hashref->{'biblioserver'}->{'RECORDS'} }, $paz->record($recid, $i); + } + } + warn "past hits"; + + # pass through facets + my $termlist_xml = $paz->termlist('author,subject'); + my $terms = XMLin($termlist_xml, forcearray => 1); + my @facets_loop = (); + foreach my $list (sort keys %{ $terms->{'list'} }) { + my @facets = (); + foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) { + push @facets, { + facet_label_value => $facet->{'name'}->[0], + }; + } + push @facets_loop, ( { + type_label => $list, + facets => \@facets, + } ); + } + + return ( undef, $results_hashref, \@facets_loop ); +} + # STOPWORDS sub _remove_stopwords { my ( $operand, $index ) = @_; diff --git a/C4/Search/PazPar2.pm b/C4/Search/PazPar2.pm new file mode 100644 index 0000000000..f543c96d10 --- /dev/null +++ b/C4/Search/PazPar2.pm @@ -0,0 +1,168 @@ +package C4::Search::PazPar2; + +# Copyright (C) 2007 LibLime +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA + +use strict; + +use LWP::UserAgent; +use URI; +use URI::QueryParam; +use XML::Simple; + +=head1 NAME + +C4::Search::PazPar2 - implement client for PazPar2 + +[Note: may rename to Net::PazPar2 or somesuch if decide to put on CPAN separate + from Koha] + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +=cut + +sub new { + my $class = shift; + my $endpoint = shift; + + my $self = {}; + $self->{'endpoint'} = $endpoint; + $self->{'session'} = ''; + $self->{'ua'} = LWP::UserAgent->new; + bless $self, $class; + + return $self; +} + +sub init { + my $self = shift; + + my $uri = URI->new($self->{'endpoint'}); + $uri->query_param(command => 'init'); + my $response = $self->{'ua'}->get($uri); + if ($response->is_success) { + my $message = XMLin($response->content); + if ($message->{'status'} eq 'OK') { + $self->{'session'} = $message->{'session'}; + } + } else { + warn $response->status_line; + } +} + +sub search { + my $self = shift; + my $query = shift; + + my $uri = URI->new($self->{'endpoint'}); + $uri->query_param(command => 'search'); + $uri->query_param(session => $self->{'session'}); + $uri->query_param(query => $query); + my $response = $self->{'ua'}->get($uri); + if ($response->is_success) { + #print $response->content, "\n"; + } else { + warn $response->status_line; + } + +} + +sub stat { + my $self = shift; + + my $uri = URI->new($self->{'endpoint'}); + $uri->query_param(command => 'stat'); + $uri->query_param(session => $self->{'session'}); + my $response = $self->{'ua'}->get($uri); + if ($response->is_success) { + return $response->content; + } else { + warn $response->status_line; + return; + } +} + +sub show { + my $self = shift; + my $start = shift; + my $count = shift; + + my $uri = URI->new($self->{'endpoint'}); + $uri->query_param(command => 'show'); + $uri->query_param(start => $start); + $uri->query_param(num => $count); + $uri->query_param(block => 1); + $uri->query_param(session => $self->{'session'}); + my $response = $self->{'ua'}->get($uri); + if ($response->is_success) { + return $response->content; + } else { + warn $response->status_line; + return; + } + +} + +sub record { + my $self = shift; + my $id = shift; + my $offset = shift; + + my $uri = URI->new($self->{'endpoint'}); + $uri->query_param(command => 'record'); + $uri->query_param(id => $id); + $uri->query_param(offset => $offset); + $uri->query_param(binary => 1); + $uri->query_param(session => $self->{'session'}); + my $response = $self->{'ua'}->get($uri); + if ($response->is_success) { + return $response->content; + } else { + warn $response->status_line; + return; + } +} + +sub termlist { + my $self = shift; + my $name = shift; + + my $uri = URI->new($self->{'endpoint'}); + $uri->query_param(command => 'termlist'); + $uri->query_param(name => $name); + $uri->query_param(session => $self->{'session'}); + my $response = $self->{'ua'}->get($uri); + if ($response->is_success) { + return $response->content; + } else { + warn $response->status_line; + return; + } + +} + +1; + +=head1 AUTHOR + +Koha Development Team + +Galen Charlton + +=cut diff --git a/etc/pazpar2/koha-biblios.xml b/etc/pazpar2/koha-biblios.xml new file mode 100644 index 0000000000..2872b2a8ea --- /dev/null +++ b/etc/pazpar2/koha-biblios.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/pazpar2/marc21.xsl b/etc/pazpar2/marc21.xsl new file mode 100644 index 0000000000..f47642cc0a --- /dev/null +++ b/etc/pazpar2/marc21.xsl @@ -0,0 +1,288 @@ + + + + + + + + + + + + + + + + + + + + + + + electronic resource + + + electronic resource + + + electronic resource + + + article + + + book + + + + + + title + + + + + + + + + author + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/pazpar2/pazpar2.xml b/etc/pazpar2/pazpar2.xml new file mode 100644 index 0000000000..81a887b606 --- /dev/null +++ b/etc/pazpar2/pazpar2.xml @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/pazpar2/pz2-ourl-base.xsl b/etc/pazpar2/pz2-ourl-base.xsl new file mode 100644 index 0000000000..260dc9f760 --- /dev/null +++ b/etc/pazpar2/pz2-ourl-base.xsl @@ -0,0 +1,169 @@ + + + + + + + + + ?generatedby=pz2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + &aulast= + + + + + &aufirst= + + + + + + + + + + + + + + + + &volume= + + + + + &issue= + + + + + &spage= + + + + + + + + + + + + + + + &date= + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + & + + = + + + + + + + + diff --git a/etc/pazpar2/pz2-ourl-marc21.xsl b/etc/pazpar2/pz2-ourl-marc21.xsl new file mode 100644 index 0000000000..1c8efde457 --- /dev/null +++ b/etc/pazpar2/pz2-ourl-marc21.xsl @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.39.2