From 3feade167f2713e3dc3e47fda35b423443af9f07 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Wed, 27 Jan 2021 19:53:21 +0000 Subject: [PATCH] Bug 27563: Remove check-url.pl in favor of check-url-quick.pl The check-url.pl script has been deprecated since Koha 3.8 - this patch removes it. If any users are still referencing that script in their cronjobs they will need to update to the new script upon upgrade. To test: 1 - verify the script is gone 2 - verify check-url-quick.pl works Signed-off-by: Lucas Gass Signed-off-by: Katrin Fischer Signed-off-by: Jonathan Druart --- misc/cronjobs/check-url.pl | 301 ------------------------------------- 1 file changed, 301 deletions(-) delete mode 100755 misc/cronjobs/check-url.pl diff --git a/misc/cronjobs/check-url.pl b/misc/cronjobs/check-url.pl deleted file mode 100755 index 429a99110c..0000000000 --- a/misc/cronjobs/check-url.pl +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/perl - -# -# Copyright 2009 Tamil s.a.r.l. -# -# This file is part of Koha. -# -# Koha is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# Koha is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Koha; if not, see . - - - -package C4::URL::Checker; - -=head1 NAME - -C4::URL::Checker - base object for checking URL stored in Koha DB - -=head1 SYNOPSIS - - use C4::URL::Checker; - - my $checker = C4::URL::Checker->new( ); - $checker->{ host_default } = 'http://mylib.kohalibrary.com'; - my $checked_urls = $checker->check_biblio( 123 ); - foreach my $url ( @$checked_urls ) { - print "url: ", $url->{ url  }, "\n", - "is_success: ", $url->{ is_success }, "\n", - "status: ", $url->{ status }, "\n"; - } - -=head1 FUNCTIONS - -=head2 new - -Create a URL Checker. The returned object can be used to set -default host variable : - - my $checker = C4::URL::Checker->new( ); - $checker->{ host_default } = 'http://mylib.kohalibrary.com'; - -=head2 check_biblio - -Check all URL from a biblio record. Returns a pointer to an array -containing all URLs with checking for each of them. - - my $checked_urls = $checker->check_biblio( 123 ); - -With 2 URLs, the returned array will look like that: - - [ - { - 'url' => 'http://mylib.tamil.fr/img/62265_0055B.JPG', - 'is_success' => 1, - 'status' => 'ok' - }, - { - 'url' => 'http://mylib.tamil.fr//img/62265_0055C.JPG', - 'is_success' => 0, - 'status' => '404 - Page not found' - } - ], - - -=cut - -use strict; -use warnings; -use LWP::UserAgent; -use HTTP::Request; -use Koha::Script -cron; -use C4::Biblio; - - - -sub new { - - my $self = {}; - my ($class, $timeout, $agent) = @_; - - my $uagent = LWP::UserAgent->new; - $uagent->agent( $agent ) if $agent; - $uagent->timeout( $timeout) if $timeout; - $self->{ user_agent } = $uagent; - $self->{ bad_url } = { }; - - bless $self, $class; - return $self; -} - - -sub check_biblio { - my $self = shift; - my $biblionumber = shift; - my $uagent = $self->{ user_agent }; - my $host = $self->{ host_default }; - my $bad_url = $self->{ bad_url }; - - my $record = GetMarcBiblio({ biblionumber => $biblionumber }); - return unless $record->field('856'); - - my @urls = (); - foreach my $field ( $record->field('856') ) { - my $url = $field->subfield('u'); - next unless $url; - $url = "$host/$url" unless $url =~ /^http/; - my $check = { url => $url }; - if ( $bad_url->{ $url } ) { - $check->{ is_success } = 1; - $check->{ status } = '500 Site already checked'; - } - else { - my $req = HTTP::Request->new( GET => $url ); - my $res = $uagent->request( $req, sub { die }, 1 ); - if ( $res->is_success ) { - $check->{ is_success } = 1; - $check->{ status } = 'ok'; - } - else { - $check->{ is_success } = 0; - $check->{ status } = $res->status_line; - $bad_url->{ $url } = 1; - } - } - push @urls, $check; - } - return \@urls; -} - - - -package Main; - -use strict; -use warnings; -use diagnostics; -use Carp; - -use Pod::Usage; -use Getopt::Long; -use Koha::Script -cron; -use C4::Context; - - - -my $verbose = 0; -my $help = 0; -my $host = ''; -my $host_pro = ''; -my $html = 0; -my $uriedit = "/cgi-bin/koha/cataloguing/addbiblio.pl?biblionumber="; -my $agent = ''; -my $timeout = 15; -GetOptions( - 'verbose' => \$verbose, - 'html' => \$html, - 'help' => \$help, - 'host=s' => \$host, - 'host-pro=s' => \$host_pro, - 'agent=s' => \$agent, - 'timeout=i', => \$timeout, -); - - -sub usage { - pod2usage( -verbose => 2 ); - exit; -} - - -sub bibediturl { - my $biblionumber = shift; - my $html = "$biblionumber"; - return $html; -} - - -# -# Check all URLs from all current Koha biblio records -# -sub check_all_url { - my $checker = C4::URL::Checker->new($timeout,$agent); - $checker->{ host_default } = $host; - - my $context = C4::Context->new( ); - my $dbh = $context->dbh; - my $sth = $dbh->prepare( - "SELECT biblionumber FROM biblioitems WHERE url <> ''" ); - $sth->execute; - if ( $html ) { - print < - - -EOS - } - while ( my ($biblionumber) = $sth->fetchrow ) { - my $result = $checker->check_biblio( $biblionumber ); - next unless $result; # No URL - foreach my $url ( @$result ) { - if ( ! $url->{ is_success } || $verbose ) { - print $html - ? "\n\n\n\n\n\n" - : "$biblionumber\t" . $url->{ url } . "\t" . - $url->{ status } . "\n"; - } - } - } - print "
" . bibediturl( $biblionumber ) . - "" . $url->{url} . "" . - $url->{status} . "
\n\n\n" if $html; -} - - -# BEGIN - -usage() if $help; - -if ( $html && !$host_pro ) { - if ( $host ) { - $host_pro = $host; - } - else { - print "Error: host-pro parameter or host must be provided in html mode\n"; - exit; - } -} - -check_all_url(); - - - -=head1 NAME - -check-url.pl - Check URLs from 856$u field. - -=head1 USAGE - -=over - -=item check-url.pl [--verbose|--help] [--agent=agent-string] [--host=http://default.tld] - -Scan all URLs found in 856$u of bib records -and display if resources are available or not. -This script is deprecated. You should rather use check-url-quick.pl. - -=back - -=head1 PARAMETERS - -=over - -=item B<--host=http://default.tld> - -Server host used when URL doesn't have one, ie doesn't begin with 'http:'. -For example, if --host=http://www.mylib.com, then when 856$u contains -'img/image.jpg', the url checked is: http://www.mylib.com/image.jpg'. - -=item B<--verbose|-v> - -Outputs both successful and failed URLs. - -=item B<--html> - -Formats output in HTML. The result can be redirected to a file -accessible by http. This way, it's possible to link directly to biblio -record in edit mode. With this parameter B<--host-pro> is required. - -=item B<--host-pro=http://koha-pro.tld> - -Server host used to link to biblio record editing page. - -=item B<--agent=agent-string> - -Change default libwww user-agent string to custom. Some sites do -not like libwww user-agent and return false 40x failure codes, -so this allows Koha to report itself as Koha, or a browser. - -=item B<--timeout=15> - -Timeout for fetching URLs. By default 15 seconds. - -=item B<--help|-h> - -Print this help page. - -=back - -=cut - - -- 2.39.5