1 package C4::External::Syndetics;
2 # Copyright (C) 2006 LibLime
3 # <jmf at liblime dot com>
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along with
17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
18 # Suite 330, Boston, MA 02111-1307 USA
24 use HTTP::Request::Common;
29 use vars qw($VERSION @ISA @EXPORT);
37 &get_syndetics_summary
39 &get_syndetics_editions
40 &get_syndetics_excerpt
41 &get_syndetics_reviews
46 # package-level variable
47 my $parser = XML::LibXML->new();
51 C4::External::Syndetics - Functions for retrieving Syndetics content in Koha
55 This module provides facilities for retrieving Syndetics.com content in Koha
57 =head2 get_syndetics_summary
61 my $syndetics_summary= &get_syndetics_summary( $isbn );
65 Get Summary data from Syndetics
69 sub get_syndetics_index {
70 my ( $isbn,$upc,$oclc ) = @_;
72 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
73 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
75 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/INDEX.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
77 my $ua = LWP::UserAgent->new;
80 my $response = $ua->get($url);
81 unless ($response->content_type =~ /xml/) {
85 my $content = $response->content;
86 warn "could not retrieve $url" unless $content;
87 my $xmlsimple = XML::Simple->new();
88 $response = $xmlsimple->XMLin(
92 my $syndetics_elements;
93 for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW','AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
94 if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) {
95 $syndetics_elements->{$available_type} = $available_type;
96 #warn "RESPONSE: $available_type : $response->{$available_type}";
99 return $syndetics_elements if $syndetics_elements;
102 sub get_syndetics_summary {
103 my ( $isbn,$upc,$oclc ) = @_;
105 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
106 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
108 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/SUMMARY.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
109 my $ua = LWP::UserAgent->new;
112 my $response = $ua->get($url);
113 unless ($response->content_type =~ /xml/) {
117 my $content = $response->content;
119 warn "could not retrieve $url" unless $content;
120 my $xmlsimple = XML::Simple->new();
121 $response = $xmlsimple->XMLin(
123 forcearray => [ qw(Fld520) ],
125 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
127 $summary = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
128 return $summary if $summary;
131 sub get_syndetics_toc {
132 my ( $isbn,$upc,$oclc ) = @_;
134 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
135 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
137 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/TOC.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
138 my $ua = LWP::UserAgent->new;
142 my $response = $ua->get($url);
143 unless ($response->content_type =~ /xml/) {
147 my $content = $response->content;
148 warn "could not retrieve $url" unless $content;
149 my $xmlsimple = XML::Simple->new();
150 $response = $xmlsimple->XMLin(
152 forcearray => [ qw(Fld970) ],
154 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
156 $toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response;
160 sub get_syndetics_excerpt {
161 my ( $isbn,$upc,$oclc ) = @_;
163 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
164 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
166 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/DBCHAPTER.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
167 my $ua = LWP::UserAgent->new;
170 my $response = $ua->get($url);
171 unless ($response->content_type =~ /xml/) {
175 my $content = $response->content;
176 warn "could not retrieve $url" unless $content;
177 my $xmlsimple = XML::Simple->new();
178 $response = $xmlsimple->XMLin(
180 forcearray => [ qw(Fld520) ],
182 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
184 $excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
185 return XMLout($excerpt, NoEscape => 1) if $excerpt;
188 sub get_syndetics_reviews {
189 my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_;
191 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
192 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
194 my $review_sources = [
195 {title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'},
196 {title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'},
197 {title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'},
198 {title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'},
199 {title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'},
200 {title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'},
201 {title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'},
202 {title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'},
203 {title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'},
206 for my $source (@$review_sources) {
207 if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) {
210 #warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
213 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$source->{file}&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
215 my $ua = LWP::UserAgent->new;
219 my $response = $ua->get($url);
220 unless ($response->content_type =~ /xml/) {
224 my $content = $response->content;
225 warn "could not retrieve $url" unless $content;
228 my $doc = $parser->parse_string($content);
230 # note that using findvalue strips any HTML elements embedded
231 # in that review. That helps us handle slight differences
232 # in the output provided by Syndetics 'old' and 'new' versions
233 # of their service and cleans any questionable HTML that
234 # may be present in the reviews, but does mean that any
235 # <B> and <I> tags used to format the review are also gone.
236 my $result = $doc->findvalue('//Fld520');
237 push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
240 warn "Error parsing response from $url";
246 sub get_syndetics_editions {
247 my ( $isbn,$upc,$oclc ) = @_;
249 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
250 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
252 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/FICTION.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
253 my $ua = LWP::UserAgent->new;
257 my $response = $ua->get($url);
258 unless ($response->content_type =~ /xml/) {
262 my $content = $response->content;
264 warn "could not retrieve $url" unless $content;
265 my $xmlsimple = XML::Simple->new();
266 $response = $xmlsimple->XMLin(
268 forcearray => [ qw(Fld020) ],
270 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
272 $similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response;
273 return $similar_items if $similar_items;
276 sub get_syndetics_anotes {
277 my ( $isbn,$upc,$oclc) = @_;
279 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
280 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
282 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/ANOTES.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
283 my $ua = LWP::UserAgent->new;
287 my $response = $ua->get($url);
288 unless ($response->content_type =~ /xml/) {
292 my $content = $response->content;
294 warn "could not retrieve $url" unless $content;
295 my $xmlsimple = XML::Simple->new();
296 $response = $xmlsimple->XMLin(
298 forcearray => [ qw(Fld980) ],
302 for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) {
303 # this is absurd, but sometimes this data serializes differently
304 if(ref($fld980->{a}->{content}) eq 'ARRAY') {
305 for my $content (@{$fld980->{a}->{content}}) {
306 push @anotes, {content => $content};
311 push @anotes, {content => $fld980->{a}->{content}};
324 Joshua Ferraro <jmf@liblime.com>