Koha/C4/External/Syndetics.pm
Chris Cormack 509d673f10 Bug 7941 : Fix version numbers in modules
Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
Signed-off-by: Paul Poulain <paul.poulain@biblibre.com>
2012-06-11 17:29:38 +02:00

280 lines
8.7 KiB
Perl

package C4::External::Syndetics;
# Copyright (C) 2006 LibLime
# <jmf at liblime dot com>
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with Koha; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use XML::Simple;
use XML::LibXML;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request::Common;
use strict;
use warnings;
use vars qw($VERSION @ISA @EXPORT);
BEGIN {
require Exporter;
$VERSION = 3.07.00.049;
@ISA = qw(Exporter);
@EXPORT = qw(
&get_syndetics_index
&get_syndetics_summary
&get_syndetics_toc
&get_syndetics_editions
&get_syndetics_excerpt
&get_syndetics_reviews
&get_syndetics_anotes
);
}
# package-level variable
my $parser = XML::LibXML->new();
=head1 NAME
C4::External::Syndetics - Functions for retrieving Syndetics content in Koha
=head1 FUNCTIONS
This module provides facilities for retrieving Syndetics.com content in Koha
=head2 get_syndetics_summary
my $syndetics_summary= &get_syndetics_summary( $isbn );
Get Summary data from Syndetics
=cut
sub get_syndetics_index {
my ( $isbn,$upc,$oclc ) = @_;
my $response = _fetch_syndetics_content('INDEX.XML', $isbn, $upc, $oclc);
my $content = $response->content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
) unless !$content;
my $syndetics_elements;
for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) {
$syndetics_elements->{$available_type} = $available_type;
#warn "RESPONSE: $available_type : $response->{$available_type}";
}
}
return $syndetics_elements if $syndetics_elements;
}
sub get_syndetics_summary {
my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_;
my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY.XML' : 'SUMMARY.XML';
my $response = _fetch_syndetics_content($summary_type, $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
my $summary;
eval {
my $doc = $parser->parse_string($content);
$summary = $doc->findvalue('//Fld520');
};
if ($@) {
warn "Error parsing Syndetics $summary_type";
}
return $summary if $summary;
}
sub get_syndetics_toc {
my ( $isbn,$upc,$oclc ) = @_;
my $response = _fetch_syndetics_content('TOC.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
forcearray => [ qw(Fld970) ],
) unless !$content;
# manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
my $toc;
$toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response;
return $toc if $toc;
}
sub get_syndetics_excerpt {
my ( $isbn,$upc,$oclc ) = @_;
my $response = _fetch_syndetics_content('DBCHAPTER.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
forcearray => [ qw(Fld520) ],
) unless !$content;
# manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
my $excerpt;
$excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
return XMLout($excerpt, NoEscape => 1) if $excerpt;
}
sub get_syndetics_reviews {
my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_;
my @reviews;
my $review_sources = [
{title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'},
{title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'},
{title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'},
{title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'},
{title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'},
{title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'},
{title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'},
{title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'},
{title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'},
];
for my $source (@$review_sources) {
if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) {
} else {
#warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
next;
}
my $response = _fetch_syndetics_content($source->{file}, $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
next;
}
my $content = $response->content;
eval {
my $doc = $parser->parse_string($content);
# note that using findvalue strips any HTML elements embedded
# in that review. That helps us handle slight differences
# in the output provided by Syndetics 'old' and 'new' versions
# of their service and cleans any questionable HTML that
# may be present in the reviews, but does mean that any
# <B> and <I> tags used to format the review are also gone.
my $result = $doc->findvalue('//Fld520');
push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
};
if ($@) {
warn "Error parsing Syndetics $source->{title} review";
}
}
return \@reviews;
}
sub get_syndetics_editions {
my ( $isbn,$upc,$oclc ) = @_;
my $response = _fetch_syndetics_content('FICTION.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
forcearray => [ qw(Fld020) ],
) unless !$content;
# manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
my $similar_items;
$similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response;
return $similar_items if $similar_items;
}
sub get_syndetics_anotes {
my ( $isbn,$upc,$oclc) = @_;
my $response = _fetch_syndetics_content('ANOTES.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
forcearray => [ qw(Fld980) ],
ForceContent => 1,
) unless !$content;
my @anotes;
for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) {
# this is absurd, but sometimes this data serializes differently
if(ref($fld980->{a}->{content}) eq 'ARRAY') {
for my $content (@{$fld980->{a}->{content}}) {
push @anotes, {content => $content};
}
}
else {
push @anotes, {content => $fld980->{a}->{content}};
}
}
return \@anotes;
}
sub _fetch_syndetics_content {
my ( $element, $isbn, $upc, $oclc ) = @_;
$isbn = '' unless defined $isbn;
$upc = '' unless defined $upc;
$oclc = '' unless defined $oclc;
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$element&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get($url);
warn "could not retrieve $url" unless $response->content;
return $response;
}
1;
__END__
=head1 NOTES
=cut
=head1 AUTHOR
Joshua Ferraro <jmf@liblime.com>
=cut