Koha/tools/import_borrowers.pl
Chris Nighswonger ae48ebbcaa Bug 5379 - import_borrowers.pl fails with db insert/update errors
Some spreadsheet programs use smart quotes which causes the db to throw
an error when an insert/update is attempted due to improper processing
of the CSV file. This patch adds code to check for smart quotes and change
them to "dumb" quotes.

This patch also adds more logging of errors and a notice to the user to check
the logs for errors when they occur.

Signed-off-by: Liz Rea <wizzyrea@gmail.com>
Signed-off-by: Chris Cormack <chrisc@catalyst.net.nz>
2011-07-05 11:15:00 +12:00

315 lines
14 KiB
Perl
Executable file

#!/usr/bin/perl
# Copyright 2007 Liblime
# Parts copyright 2010 BibLibre
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with Koha; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# Script to take some borrowers data in a known format and load it into Koha
#
# File format
#
# cardnumber,surname,firstname,title,othernames,initials,streetnumber,streettype,
# address line , address line 2, city, zipcode, contry, email, phone, mobile, fax, work email, work phone,
# alternate streetnumber, alternate streettype, alternate address line 1, alternate city,
# alternate zipcode, alternate country, alternate email, alternate phone, date of birth, branchcode,
# categorycode, enrollment date, expiry date, noaddress, lost, debarred, contact surname,
# contact firstname, contact title, borrower notes, contact relationship, ethnicity, ethnicity notes
# gender, username, opac note, contact note, password, sort one, sort two
#
# any fields except cardnumber can be blank but the number of fields must match
# dates should be in the format you have set up Koha to expect
# branchcode and categorycode need to be valid
use strict;
use warnings;
use C4::Auth;
use C4::Output;
use C4::Dates qw(format_date_in_iso);
use C4::Context;
use C4::Branch qw(GetBranchName);
use C4::Members;
use C4::Members::Attributes qw(:all);
use C4::Members::AttributeTypes;
use C4::Members::Messaging;
use Text::CSV;
# Text::CSV::Unicode, even in binary mode, fails to parse lines with these diacriticals:
# ė
# č
use CGI;
# use encoding 'utf8'; # don't do this
my (@errors, @feedback);
my $extended = C4::Context->preference('ExtendedPatronAttributes');
my $set_messaging_prefs = C4::Context->preference('EnhancedMessagingPreferences');
my @columnkeys = C4::Members->columns;
if ($extended) {
push @columnkeys, 'patron_attributes';
}
my $columnkeystpl = [ map { {'key' => $_} } grep {$_ ne 'borrowernumber' && $_ ne 'cardnumber'} @columnkeys ]; # ref. to array of hashrefs.
my $input = CGI->new();
our $csv = Text::CSV->new({binary => 1}); # binary needed for non-ASCII Unicode
#push @feedback, {feedback=>1, name=>'backend', value=>$csv->backend, backend=>$csv->backend}; #XXX
my ( $template, $loggedinuser, $cookie ) = get_template_and_user({
template_name => "tools/import_borrowers.tmpl",
query => $input,
type => "intranet",
authnotrequired => 0,
flagsrequired => { tools => 'import_patrons' },
debug => 1,
});
$template->param(columnkeys => $columnkeystpl);
if ($input->param('sample')) {
print $input->header(
-type => 'application/vnd.sun.xml.calc', # 'application/vnd.ms-excel' ?
-attachment => 'patron_import.csv',
);
$csv->combine(@columnkeys);
print $csv->string, "\n";
exit 1;
}
my $uploadborrowers = $input->param('uploadborrowers');
my $matchpoint = $input->param('matchpoint');
if ($matchpoint) {
$matchpoint =~ s/^patron_attribute_//;
}
my $overwrite_cardnumber = $input->param('overwrite_cardnumber');
$template->param( SCRIPT_NAME => $ENV{'SCRIPT_NAME'} );
($extended) and $template->param(ExtendedPatronAttributes => 1);
if ( $uploadborrowers && length($uploadborrowers) > 0 ) {
push @feedback, {feedback=>1, name=>'filename', value=>$uploadborrowers, filename=>$uploadborrowers};
my $handle = $input->upload('uploadborrowers');
my $uploadinfo = $input->uploadInfo($uploadborrowers);
foreach (keys %$uploadinfo) {
push @feedback, {feedback=>1, name=>$_, value=>$uploadinfo->{$_}, $_=>$uploadinfo->{$_}};
}
my $imported = 0;
my $alreadyindb = 0;
my $overwritten = 0;
my $invalid = 0;
my $matchpoint_attr_type;
my %defaults = $input->Vars;
# use header line to construct key to column map
my $borrowerline = <$handle>;
my $status = $csv->parse($borrowerline);
($status) or push @errors, {badheader=>1,line=>$., lineraw=>$borrowerline};
my @csvcolumns = $csv->fields();
my %csvkeycol;
my $col = 0;
foreach my $keycol (@csvcolumns) {
# columnkeys don't contain whitespace, but some stupid tools add it
$keycol =~ s/ +//g;
$csvkeycol{$keycol} = $col++;
}
#warn($borrowerline);
my $ext_preserve = $input->param('ext_preserve') || 0;
if ($extended) {
$matchpoint_attr_type = C4::Members::AttributeTypes->fetch($matchpoint);
}
push @feedback, {feedback=>1, name=>'headerrow', value=>join(', ', @csvcolumns)};
my $today_iso = C4::Dates->new()->output('iso');
my @criticals = qw(surname branchcode categorycode); # there probably should be others
my @bad_dates; # I've had a few.
my $date_re = C4::Dates->new->regexp('syspref');
my $iso_re = C4::Dates->new->regexp('iso');
LINE: while ( my $borrowerline = <$handle> ) {
my %borrower;
my @missing_criticals;
my $patron_attributes;
my $status = $csv->parse($borrowerline);
my @columns = $csv->fields();
if (! $status) {
push @missing_criticals, {badparse=>1, line=>$., lineraw=>$borrowerline};
} elsif (@columns == @columnkeys) {
@borrower{@columnkeys} = @columns;
# MJR: try to fill blanks gracefully by using default values
foreach my $key (@columnkeys) {
if ($borrower{$key} !~ /\S/) {
$borrower{$key} = $defaults{$key};
}
}
} else {
# MJR: try to recover gracefully by using default values
foreach my $key (@columnkeys) {
if (defined($csvkeycol{$key}) and $columns[$csvkeycol{$key}] =~ /\S/) {
$borrower{$key} = $columns[$csvkeycol{$key}];
} elsif ( $defaults{$key} ) {
$borrower{$key} = $defaults{$key};
} elsif ( scalar grep {$key eq $_} @criticals ) {
# a critical field is undefined
push @missing_criticals, {key=>$key, line=>$., lineraw=>$borrowerline};
} else {
$borrower{$key} = '';
}
}
}
#warn join(':',%borrower);
if ($borrower{categorycode}) {
push @missing_criticals, {key=>'categorycode', line=>$. , lineraw=>$borrowerline, value=>$borrower{categorycode}, category_map=>1}
unless GetBorrowercategory($borrower{categorycode});
} else {
push @missing_criticals, {key=>'categorycode', line=>$. , lineraw=>$borrowerline};
}
if ($borrower{branchcode}) {
push @missing_criticals, {key=>'branchcode', line=>$. , lineraw=>$borrowerline, value=>$borrower{branchcode}, branch_map=>1}
unless GetBranchName($borrower{branchcode});
} else {
push @missing_criticals, {key=>'branchcode', line=>$. , lineraw=>$borrowerline};
}
if (@missing_criticals) {
foreach (@missing_criticals) {
$_->{borrowernumber} = $borrower{borrowernumber} || 'UNDEF';
$_->{surname} = $borrower{surname} || 'UNDEF';
}
$invalid++;
(25 > scalar @errors) and push @errors, {missing_criticals=>\@missing_criticals};
# The first 25 errors are enough. Keeping track of 30,000+ would destroy performance.
next LINE;
}
if ($extended) {
my $attr_str = $borrower{patron_attributes};
$attr_str =~ s/\xe2\x80\x9c/"/g; # fixup double quotes in case we are passed smart quotes
$attr_str =~ s/\xe2\x80\x9d/"/g;
push @feedback, {feedback=>1, name=>'attribute string', value=>$attr_str, filename=>$uploadborrowers};
delete $borrower{patron_attributes}; # not really a field in borrowers, so we don't want to pass it to ModMember.
$patron_attributes = extended_attributes_code_value_arrayref($attr_str);
}
# Popular spreadsheet applications make it difficult to force date outputs to be zero-padded, but we require it.
foreach (qw(dateofbirth dateenrolled dateexpiry)) {
my $tempdate = $borrower{$_} or next;
if ($tempdate =~ /$date_re/) {
$borrower{$_} = format_date_in_iso($tempdate);
} elsif ($tempdate =~ /$iso_re/) {
$borrower{$_} = $tempdate;
} else {
$borrower{$_} = '';
push @missing_criticals, {key=>$_, line=>$. , lineraw=>$borrowerline, bad_date=>1};
}
}
$borrower{dateenrolled} = $today_iso unless $borrower{dateenrolled};
$borrower{dateexpiry} = GetExpiryDate($borrower{categorycode},$borrower{dateenrolled}) unless $borrower{dateexpiry};
my $borrowernumber;
my $member;
if ( ($matchpoint eq 'cardnumber') && ($borrower{'cardnumber'}) ) {
$member = GetMember( 'cardnumber' => $borrower{'cardnumber'} );
if ($member) {
$borrowernumber = $member->{'borrowernumber'};
}
} elsif ($extended) {
if (defined($matchpoint_attr_type)) {
foreach my $attr (@$patron_attributes) {
if ($attr->{code} eq $matchpoint and $attr->{value} ne '') {
my @borrowernumbers = $matchpoint_attr_type->get_patrons($attr->{value});
$borrowernumber = $borrowernumbers[0] if scalar(@borrowernumbers) == 1;
last;
}
}
}
}
if ($borrowernumber) {
# borrower exists
unless ($overwrite_cardnumber) {
$alreadyindb++;
$template->param('lastalreadyindb'=>$borrower{'surname'}.' / '.$borrowernumber);
next LINE;
}
$borrower{'borrowernumber'} = $borrowernumber;
for my $col (keys %borrower) {
# use values from extant patron unless our csv file includes this column or we provided a default.
# FIXME : You cannot update a field with a perl-evaluated false value using the defaults.
unless(exists($csvkeycol{$col}) || $defaults{$col}) {
$borrower{$col} = $member->{$col} if($member->{$col}) ;
}
}
unless (ModMember(%borrower)) {
$invalid++;
# untill we have better error trapping, we have no way of knowing why ModMember errored out...
push @errors, {unknown_error => 1};
$template->param('lastinvalid'=>$borrower{'surname'}.' / '.$borrowernumber);
next LINE;
}
if ($extended) {
if ($ext_preserve) {
my $old_attributes = GetBorrowerAttributes($borrowernumber);
$patron_attributes = extended_attributes_merge($old_attributes, $patron_attributes); #TODO: expose repeatable options in template
}
push @errors, {unknown_error => 1} unless SetBorrowerAttributes($borrower{'borrowernumber'}, $patron_attributes);
}
$overwritten++;
$template->param('lastoverwritten'=>$borrower{'surname'}.' / '.$borrowernumber);
} else {
# FIXME: fixup_cardnumber says to lock table, but the web interface doesn't so this doesn't either.
# At least this is closer to AddMember than in members/memberentry.pl
if (!$borrower{'cardnumber'}) {
$borrower{'cardnumber'} = fixup_cardnumber(undef);
}
if ($borrowernumber = AddMember(%borrower)) {
if ($extended) {
SetBorrowerAttributes($borrowernumber, $patron_attributes);
}
if ($set_messaging_prefs) {
C4::Members::Messaging::SetMessagingPreferencesFromDefaults({ borrowernumber => $borrowernumber,
categorycode => $borrower{categorycode} });
}
$imported++;
$template->param('lastimported'=>$borrower{'surname'}.' / '.$borrowernumber);
} else {
$invalid++;
push @errors, {unknown_error => 1};
$template->param('lastinvalid'=>$borrower{'surname'}.' / AddMember');
}
}
}
(@errors ) and $template->param( ERRORS=>\@errors );
(@feedback) and $template->param(FEEDBACK=>\@feedback);
$template->param(
'uploadborrowers' => 1,
'imported' => $imported,
'overwritten' => $overwritten,
'alreadyindb' => $alreadyindb,
'invalid' => $invalid,
'total' => $imported + $alreadyindb + $invalid + $overwritten,
);
} else {
if ($extended) {
my @matchpoints = ();
my @attr_types = C4::Members::AttributeTypes::GetAttributeTypes();
foreach my $type (@attr_types) {
my $attr_type = C4::Members::AttributeTypes->fetch($type->{code});
if ($attr_type->unique_id()) {
push @matchpoints, { code => "patron_attribute_" . $attr_type->code(), description => $attr_type->description() };
}
}
$template->param(matchpoints => \@matchpoints);
}
}
output_html_with_http_headers $input, $cookie, $template->output;