Koha/misc/cronjobs/update_totalissues.pl

#!/usr/bin/perl

# Copyright 2012 C & P Bibliography Services
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with Koha; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

use strict;
use warnings;

BEGIN {

    # find Koha's Perl modules
    # test carefully before changing this
    use FindBin;
    eval { require "$FindBin::Bin/../kohalib.pl" };
}

use Getopt::Long;
use Pod::Usage;
use C4::Context;
use C4::Biblio;
use DateTime;
use DateTime::Format::MySQL;
use Time::HiRes qw/time/;
use POSIX qw/strftime ceil/;

sub usage {
    pod2usage( -verbose => 2 );
    exit;
}

$| = 1;

# command-line parameters
my $verbose   = 0;
my $test_only = 0;
my $want_help = 0;
my $since;
my $interval;
my $usestats    = 0;
my $useitems    = 0;
my $incremental = 0;
my $commit      = 100;
my $unit;

my $result = GetOptions(
    'v|verbose'    => \$verbose,
    't|test'       => \$test_only,
    's|since=s'    => \$since,
    'i|interval=s' => \$interval,
    'use-stats'    => \$usestats,
    'use-items'    => \$useitems,
    'incremental'  => \$incremental,
    'c|commit=i'   => \$commit,
    'h|help'       => \$want_help
);

binmode( STDOUT, ":utf8" );

if ( defined $since && defined $interval ) {
    print "The --since and --interval options are mutually exclusive.\n\n";
    $want_help = 1;
}

if ( $useitems && $incremental ) {
    print
      "The --use-items and --incremental options are mutually exclusive.\n\n";
    $want_help = 1;
}

if ( $incremental && !( defined $since || defined $interval ) ) {
    $interval = '24h';
}

unless ( $usestats || $useitems ) {
    print "You must specify either --use-stats and/or --use-items.\n\n";
    $want_help = 1;
}

if ( not $result or $want_help ) {
    usage();
}

my $dbh = C4::Context->dbh;
$dbh->{AutoCommit} = 0;

my $num_bibs_processed = 0;

my $starttime = time();

process_items() if $useitems;
process_stats() if $usestats;

report();

exit 0;

sub process_items {
    my $query =
"SELECT items.biblionumber, SUM(items.issues) FROM items GROUP BY items.biblionumber;";
    process_query($query);
}

sub process_stats {
    if ($interval) {
        my $dt = DateTime->now;

        my %units = (
            h => 'hours',
            d => 'days',
            w => 'weeks',
            m => 'months',
            y => 'years'
        );

        $interval =~ m/([0-9]*)([hdwmy]?)$/;
        $unit = $2 || 'd';
        $since = DateTime::Format::MySQL->format_datetime(
            $dt->subtract( $units{$unit} => $1 ) );
    }
    my $limit = '';
    $limit = " AND statistics.datetime >= ?" if ( $interval || $since );

    my $query =
"SELECT biblio.biblionumber, COUNT(statistics.itemnumber) FROM biblio LEFT JOIN items ON (biblio.biblionumber=items.biblionumber) LEFT JOIN statistics ON (items.itemnumber=statistics.itemnumber) WHERE statistics.type = 'issue' $limit GROUP BY biblio.biblionumber;";
    process_query( $query, $limit );

    unless ($incremental) {
        $query =
"SELECT biblio.biblionumber, 0 FROM biblio LEFT JOIN items ON (biblio.biblionumber=items.biblionumber) LEFT JOIN statistics ON (items.itemnumber=statistics.itemnumber) WHERE statistics.itemnumber IS NULL GROUP BY biblio.biblionumber;";
        process_query( $query, '' );

        $query =
"SELECT biblio.biblionumber, 0 FROM biblio LEFT JOIN items ON (biblio.biblionumber=items.biblionumber) WHERE items.itemnumber IS NULL GROUP BY biblio.biblionumber;";
        process_query( $query, '' );
    }

    $dbh->commit();
}

sub process_query {
    my $query    = shift;
    my $uselimit = shift;
    my $sth      = $dbh->prepare($query);

    if ( $since && $uselimit ) {
        $sth->execute($since);
    }
    else {
        $sth->execute();
    }

    while ( my ( $biblionumber, $totalissues ) = $sth->fetchrow_array() ) {
        $num_bibs_processed++;
        $totalissues = 0 unless $totalissues;
        print "Processing bib $biblionumber ($totalissues issues)\n"
          if $verbose;
        if ( not $test_only ) {
            if ( $incremental && $totalissues > 0 ) {
                UpdateTotalIssues( $biblionumber, $totalissues );
            }
            else {
                UpdateTotalIssues( $biblionumber, 0, $totalissues );
            }
        }
        if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
            print_progress_and_commit($num_bibs_processed);
        }
    }

    $dbh->commit();
}

sub report {
    my $endtime = time();
    my $totaltime = ceil( ( $endtime - $starttime ) * 1000 );
    $starttime = strftime( '%D %T', localtime($starttime) );
    $endtime   = strftime( '%D %T', localtime($endtime) );

    my $summary = <<_SUMMARY_;

Update total issues count script report
=======================================================
Run started at:                         $starttime
Run ended at:                           $endtime
Total run time:                         $totaltime ms
Number of bibs modified:                $num_bibs_processed
_SUMMARY_
    $summary .= "\n****  Ran in test mode only  ****\n" if $test_only;
    print $summary;
}

sub print_progress_and_commit {
    my $recs = shift;
    $dbh->commit();
    print "... processed $recs records\n";
}

=head1 NAME

update_totalissues.pl

=head1 SYNOPSIS

  update_totalissues.pl --use-stats
  update_totalissues.pl --use-items
  update_totalissues.pl --commit=1000
  update_totalissues.pl --since='2012-01-01'
  update_totalissues.pl --interval=30d

=head1 DESCRIPTION

This batch job populates bibliographic records' total issues count based
on historical issue statistics.

=over 8

=item B<--help>

Prints this help

=item B<-v|--verbose>

Provide verbose log information (list every bib modified).

=item B<--use-stats>

Use the data in the statistics table for populating total issues.

=item B<--use-items>

Use items.issues data for populating total issues. Note that issues
data from the items table does not respect the --since or --interval
options, by definition. Also note that if both --use-stats and
--use-items are specified, the count of biblios processed will be
misleading.

=item B<-s|--since=DATE>

Only process issues recorded in the statistics table since DATE.

=item B<-i|--interval=S>

Only process issues recorded in the statistics table in the last N
units of time. The interval should consist of a number with a one-letter
unit suffix. The valid suffixes are h (hours), d (days), w (weeks),
m (months), and y (years). The default unit is days.

=item B<--incremental>

Add the number of issues found in the statistics table to the existing
total issues count. Intended so that this script can be used as a cron
job to update popularity information during low-usage periods. If neither
--since or --interval are specified, incremental mode will default to
processing the last twenty-four hours.

=item B<--commit=N>

Commit the results to the database after every N records are processed.

=item B<--test>

Only test the popularity population script.

=back

=head1 WARNING

If the time on your database server does not match the time on your Koha
server you will need to take that into account, and probably use the
--since argument instead of the --interval argument for incremental
updating.

=head1 CREDITS

This patch to Koha was sponsored by the Arcadia Public Library and the
Arcadia Public Library Foundation in honor of Jackie Faust-Moreno, late
director of the Arcadia Public Library.

=head1 AUTHOR

Jared Camins-Esakov <jcamins AT cpbibliography DOT com>

=cut