C4/Ris.pm

   1 package C4::Ris;
   2
   3 # Original script :
   4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
   5 ##           See comments below for compliance with other MARC dialects
   6 ##
   7 ## usage: perl marc2ris < infile.marc > outfile.ris
   8 ##
   9 ## Dependencies: perl 5.6.0 or later
  10 ##               MARC::Record
  11 ##               MARC::Charset
  12 ##
  13 ## markus@mhoenicka.de 2002-11-16
  14
  15 ##   This program is free software; you can redistribute it and/or modify
  16 ##   it under the terms of the GNU General Public License as published by
  17 ##   the Free Software Foundation; either version 2 of the License, or
  18 ##   (at your option) any later version.
  19 ##
  20 ##   This program is distributed in the hope that it will be useful,
  21 ##   but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 ##   GNU General Public License for more details.
  24
  25 ##   You should have received a copy of the GNU General Public License
  26 ##   along with this program; if not, write to the Free Software
  27 ##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  28
  29 ## Some background about MARC as understood by this script
  30 ## The default input format used in this script is MARC21, which
  31 ## superseded USMARC and CANMARC. The specification can be found at:
  32 ## http://lcweb.loc.gov/marc/
  33 ## UNIMARC follows the specification at:
  34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
  35 ## UKMARC support is a bit shaky because there is no specification available
  36 ## for free. The wisdom used in this script was taken from a PDF document
  37 ## comparing UKMARC to MARC21 found at:
  38 ## www.bl.uk/services/bibliographic/marcchange.pdf
  39
  40
  41 # Modified 2008 by BibLibre for Koha
  42 # Modified 2011 by Catalyst
  43 # Modified 2011 by Equinox Software, Inc.
  44 #
  45 # This file is part of Koha.
  46 #
  47 # Koha is free software; you can redistribute it and/or modify it
  48 # under the terms of the GNU General Public License as published by
  49 # the Free Software Foundation; either version 3 of the License, or
  50 # (at your option) any later version.
  51 #
  52 # Koha is distributed in the hope that it will be useful, but
  53 # WITHOUT ANY WARRANTY; without even the implied warranty of
  54 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  55 # GNU General Public License for more details.
  56 #
  57 # You should have received a copy of the GNU General Public License
  58 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  59 #
  60 #
  61
  62 use Modern::Perl;
  63
  64 use List::MoreUtils qw/uniq/;
  65 use vars qw($VERSION @ISA @EXPORT);
  66
  67 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
  68 use Koha::SimpleMARC qw(read_field);
  69
  70 # set the version for version checking
  71 $VERSION = 3.07.00.049;
  72
  73 @ISA = qw(Exporter);
  74
  75 # only export API methods
  76
  77 @EXPORT = qw(
  78   &marc2ris
  79 );
  80
  81 our $utf;
  82 our $intype;
  83 our $marcprint;
  84 our $protoyear;
  85
  86
  87 =head1 marc2bibtex - Convert from UNIMARC to RIS
  88
  89   my ($ris) = marc2ris($record);
  90
  91 Returns a RIS scalar
  92
  93 C<$record> - a MARC::Record object
  94
  95 =cut
  96
  97 my $intype;
  98
  99 sub marc2ris {
 100     my ($record) = @_;
 101     my $output;
 102
 103     my $marcflavour = C4::Context->preference("marcflavour");
 104     $intype = lc($marcflavour);
 105     my $marcprint = 0; # Debug flag;
 106
 107     # Let's redirect stdout
 108     open my $oldout, ">&STDOUT";
 109     my $outvar;
 110     close STDOUT;
 111     open STDOUT,'>', \$outvar;
 112
 113     ## First we should check the character encoding. This may be
 114     ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
 115     ## by 'a' at position 09 (zero-based) of the leader
 116     my $leader = $record->leader();
 117     if ( $intype eq "marc21" ) {
 118         if ( $leader =~ /^.{9}a/ ) {
 119             print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
 120             $utf = 1;
 121         }
 122         else {
 123             print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
 124         }
 125     }
 126     ## else: other MARC formats do not specify the character encoding
 127     ## we assume it's *not* UTF-8
 128
 129     my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
 130     my $ris_additional_fields;
 131     if ($RisExportAdditionalFields) {
 132         $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
 133         $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
 134         if ($@) {
 135             warn "Unable to parse RisExportAdditionalFields : $@";
 136             $ris_additional_fields = undef;
 137         }
 138     }
 139
 140     ## start RIS dataset
 141     if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
 142         my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
 143         my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
 144         if ($type) {
 145             print "TY  - $type\r\n";
 146         }
 147         else {
 148             &print_typetag($leader);
 149         }
 150     }
 151     else {
 152         &print_typetag($leader);
 153     }
 154
 155         ## retrieve all author fields and collect them in a list
 156         my @author_fields;
 157
 158         if ($intype eq "unimarc") {
 159             ## Fields 700, 701, and 702 can contain author names
 160             @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
 161         }
 162         else {  ## marc21, ukmarc
 163             ## Field 100 sometimes carries main author
 164             ## Field(s) 700 carry added entries - personal names
 165             @author_fields = ($record->field('100'), $record->field('700'));
 166         }
 167
 168         ## loop over all author fields
 169         foreach my $field (@author_fields) {
 170             if (length($field)) {
 171                 my $author = &get_author($field);
 172                 print "AU  - ",&charconv($author),"\r\n";
 173             }
 174         }
 175
 176         # ToDo: should we specify anonymous as author if we didn't find
 177         # one? or use one of the corporate/meeting names below?
 178
 179         ## add corporate names or meeting names as editors ??
 180         my @editor_fields;
 181
 182         if ($intype eq "unimarc") {
 183             ## Fields 710, 711, and 712 can carry corporate names
 184             ## Field(s) 720, 721, 722, 730 have additional candidates
 185             @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
 186         }
 187         else { ## marc21, ukmarc
 188             ## Fields 110 and 111 carry the main entries - corporate name and
 189             ## meeting name, respectively
 190             ## Field(s) 710, 711 carry added entries - personal names
 191             @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
 192         }
 193
 194         ## loop over all editor fields
 195         foreach my $field (@editor_fields) {
 196             if (length($field)) {
 197                 my $editor = &get_editor($field);
 198                 print "ED  - ",&charconv($editor),"\r\n";
 199             }
 200         }
 201
 202         ## get info from the title field
 203         if ($intype eq "unimarc") {
 204             &print_title($record->field('200'));
 205         }
 206         else { ## marc21, ukmarc
 207             &print_title($record->field('245'));
 208         }
 209
 210         ## series title
 211         if ($intype eq "unimarc") {
 212             &print_stitle($record->field('225'));
 213         }
 214         else { ## marc21, ukmarc
 215             &print_stitle($record->field('490'));
 216         }
 217
 218         ## ISBN/ISSN
 219         if ($intype eq "unimarc") {
 220             &print_isbn($record->field('010'));
 221             &print_issn($record->field('011'));
 222         }
 223         elsif ($intype eq "ukmarc") {
 224             &print_isbn($record->field('021'));
 225             ## this is just an assumption
 226             &print_issn($record->field('022'));
 227         }
 228         else { ## assume marc21
 229             &print_isbn($record->field('020'));
 230             &print_issn($record->field('022'));
 231         }
 232
 233         if ($intype eq "marc21") {
 234             &print_loc_callno($record->field('050'));
 235             &print_dewey($record->field('082'));
 236         }
 237         ## else: unimarc, ukmarc do not seem to store call numbers?
 238
 239         ## publication info
 240         if ($intype eq "unimarc") {
 241             &print_pubinfo($record->field('210'));
 242         }
 243         else { ## marc21, ukmarc
 244             if ($record->field('264')) {
 245                  &print_pubinfo($record->field('264'));
 246             }
 247             else {
 248             &print_pubinfo($record->field('260'));
 249             }
 250         }
 251
 252         ## 6XX fields contain KW candidates. We add all of them to a
 253
 254     my @field_list;
 255     if ($intype eq "unimarc") {
 256         @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
 257     } elsif ($intype eq "ukmarc") {
 258         @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
 259     } else { ## assume marc21
 260         @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
 261     }
 262
 263     my @kwpool;
 264     for my $f ( @field_list ) {
 265         my @fields = $record->field($f);
 266         push @kwpool, ( get_keywords("$f",$record->field($f)) );
 267     }
 268
 269     # Remove duplicate
 270     @kwpool = uniq @kwpool;
 271
 272     for my $kw ( @kwpool ) {
 273         print "KW  - ", &charconv($kw), "\r\n";
 274     }
 275
 276         ## 5XX have various candidates for notes and abstracts. We pool
 277         ## all notes-like stuff in one list.
 278         my @notepool;
 279
 280         ## these fields have notes candidates
 281         if ($intype eq "unimarc") {
 282             foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
 283                 &pool_subx(\@notepool, $_, $record->field($_));
 284             }
 285         }
 286         elsif ($intype eq "ukmarc") {
 287             foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
 288                 &pool_subx(\@notepool, $_, $record->field($_));
 289         }
 290         }
 291         else { ## assume marc21
 292             foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
 293                 &pool_subx(\@notepool, $_, $record->field($_));
 294             }
 295         }
 296
 297         my $allnotes = join "; ", @notepool;
 298
 299         if (length($allnotes) > 0) {
 300             print "N1  - ", &charconv($allnotes), "\r\n";
 301         }
 302
 303         ## 320/520 have the abstract
 304         if ($intype eq "unimarc") {
 305             &print_abstract($record->field('320'));
 306         }
 307         elsif ($intype eq "ukmarc") {
 308             &print_abstract($record->field('512'), $record->field('513'));
 309         }
 310         else { ## assume marc21
 311             &print_abstract($record->field('520'));
 312         }
 313
 314     # 856u has the URI
 315     if ($record->field('856')) {
 316         print_uri($record->field('856'));
 317     }
 318
 319     if ($ris_additional_fields) {
 320         foreach my $ris_tag ( keys %$ris_additional_fields ) {
 321             next if $ris_tag eq 'TY';
 322
 323             my @fields =
 324               ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
 325               ? @{ $ris_additional_fields->{$ris_tag} }
 326               : $ris_additional_fields->{$ris_tag};
 327
 328             for my $tag (@fields) {
 329                 my ( $f, $sf ) = split( /\$/, $tag );
 330                 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
 331                 foreach my $v (@values) {
 332                     print "$ris_tag  - $v\r\n";
 333                 }
 334             }
 335         }
 336     }
 337
 338         ## end RIS dataset
 339         print "ER  - \r\n";
 340
 341     # Let's re-redirect stdout
 342     close STDOUT;
 343     open STDOUT, ">&", $oldout;
 344
 345     return $outvar;
 346
 347 }
 348
 349
 350 ##********************************************************************
 351 ## print_typetag(): prints the first line of a RIS dataset including
 352 ## the preceding newline
 353 ## Argument: the leader of a MARC dataset
 354 ## Returns: the value at leader position 06
 355 ##********************************************************************
 356 sub print_typetag {
 357   my ($leader)= @_;
 358     ## the keys of typehash are the allowed values at position 06
 359     ## of the leader of a MARC record, the values are the RIS types
 360     ## that might appropriately represent these types.
 361     my %ustypehash = (
 362                     "a" => "BOOK",
 363                     "c" => "MUSIC",
 364                     "d" => "MUSIC",
 365                     "e" => "MAP",
 366                     "f" => "MAP",
 367                     "g" => "ADVS",
 368                     "i" => "SOUND",
 369                     "j" => "SOUND",
 370                     "k" => "ART",
 371                     "m" => "DATA",
 372                     "o" => "GEN",
 373                     "p" => "GEN",
 374                     "r" => "ART",
 375                     "t" => "GEN",
 376                 );
 377
 378     my %unitypehash = (
 379                     "a" => "BOOK",
 380                     "b" => "BOOK",
 381                     "c" => "MUSIC",
 382                     "d" => "MUSIC",
 383                     "e" => "MAP",
 384                     "f" => "MAP",
 385                     "g" => "ADVS",
 386                     "i" => "SOUND",
 387                     "j" => "SOUND",
 388                     "k" => "ART",
 389                     "l" => "ELEC",
 390                     "m" => "ADVS",
 391                     "r" => "ART",
 392                 );
 393
 394     ## The type of a MARC record is found at position 06 of the leader
 395     my $typeofrecord = substr($leader, 6, 1);
 396
 397     ## ToDo: for books, field 008 positions 24-27 might have a few more
 398     ## hints
 399
 400     my %typehash;
 401
 402     ## the ukmarc here is just a guess
 403     if ($intype eq "marc21" || $intype eq "ukmarc") {
 404         %typehash = %ustypehash;
 405     }
 406     elsif ($intype eq "unimarc") {
 407         %typehash = %unitypehash;
 408     }
 409     else {
 410         ## assume MARC21 as default
 411         %typehash = %ustypehash;
 412     }
 413
 414     if (!exists $typehash{$typeofrecord}) {
 415         print "TY  - BOOK\r\n"; ## most reasonable default
 416         warn ("no type found - assume BOOK") if $marcprint;
 417     }
 418     else {
 419         print "TY  - $typehash{$typeofrecord}\r\n";
 420     }
 421
 422     ## use $typeofrecord as the return value, just in case
 423     $typeofrecord;
 424 }
 425
 426 ##********************************************************************
 427 ## normalize_author(): normalizes an authorname
 428 ## Arguments: authorname subfield a
 429 ##            authorname subfield b
 430 ##            authorname subfield c
 431 ##            name type if known: 0=direct order
 432 ##                               1=only surname or full name in
 433 ##                                 inverted order
 434 ##                               3=family, clan, dynasty name
 435 ## Returns: the normalized authorname
 436 ##********************************************************************
 437 sub normalize_author {
 438     my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
 439
 440     if ($nametype == 0) {
 441         # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
 442         warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
 443         return $rawauthora;
 444     }
 445     elsif ($nametype == 1) {
 446         ## start munging subfield a (the real name part)
 447         ## remove spaces after separators
 448         $rawauthora =~ s%([,.]+) *%$1%g;
 449
 450         ## remove trailing separators after spaces
 451         $rawauthora =~ s% *[,;:/]*$%%;
 452
 453         ## remove periods after a non-abbreviated name
 454         $rawauthora =~ s%(\w{2,})\.%$1%g;
 455
 456         ## start munging subfield b (something like the suffix)
 457         ## remove trailing separators after spaces
 458         $rawauthorb =~ s% *[,;:/]*$%%;
 459
 460         ## we currently ignore subfield c until someone complains
 461         if (length($rawauthorb) > 0) {
 462         return join ", ", ($rawauthora, $rawauthorb);
 463         }
 464         else {
 465             return $rawauthora;
 466         }
 467     }
 468     elsif ($nametype == 3) {
 469         return $rawauthora;
 470     }
 471 }
 472
 473 ##********************************************************************
 474 ## get_author(): gets authorname info from MARC fields 100, 700
 475 ## Argument: field (100 or 700)
 476 ## Returns: an author string in the format found in the record
 477 ##********************************************************************
 478 sub get_author {
 479     my ($authorfield) = @_;
 480     my ($indicator);
 481
 482     ## the sequence of the name parts is encoded either in indicator
 483     ## 1 (marc21) or 2 (unimarc)
 484     if ($intype eq "unimarc") {
 485         $indicator = 2;
 486     }
 487     else { ## assume marc21
 488         $indicator = 1;
 489     }
 490
 491     print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
 492     print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
 493     print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
 494     print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
 495     print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
 496     if ($intype eq "ukmarc") {
 497         my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
 498         normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
 499     }
 500     else {
 501         normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
 502     }
 503 }
 504
 505 ##********************************************************************
 506 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
 507 ## Argument: field (110, 111, 710, or 711)
 508 ## Returns: an author string in the format found in the record
 509 ##********************************************************************
 510 sub get_editor {
 511     my ($editorfield) = @_;
 512
 513     if (!$editorfield) {
 514         return;
 515     }
 516     else {
 517         print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
 518         print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
 519         print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
 520         return $editorfield->subfield('a');
 521     }
 522 }
 523
 524 ##********************************************************************
 525 ## print_title(): gets info from MARC field 245
 526 ## Arguments: field (245)
 527 ## Returns:
 528 ##********************************************************************
 529 sub print_title {
 530     my ($titlefield) = @_;
 531     if (!$titlefield) {
 532         print "<marc>empty title field (245)\r\n" if $marcprint;
 533         warn("empty title field (245)") if $marcprint;
 534     }
 535     else {
 536         print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 537         print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
 538         print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
 539
 540         ## The title is usually written in a very odd notation. The title
 541         ## proper ($a) often ends with a space followed by a separator like
 542         ## a slash or a colon. The subtitle ($b) doesn't start with a space
 543         ## so simple concatenation looks odd. We have to conditionally remove
 544         ## the separator and make sure there's a space between title and
 545         ## subtitle
 546
 547         my $clean_title = $titlefield->subfield('a');
 548
 549         my $clean_subtitle = $titlefield->subfield('b');
 550 $clean_subtitle ||= q{};
 551         $clean_title =~ s% *[/:;.]$%%;
 552         $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
 553
 554         if (length($clean_title) > 0
 555             || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
 556             print "TI  - ", &charconv($clean_title);
 557
 558             ## subfield $b is relevant only for marc21/ukmarc
 559             if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
 560                 print ": ",&charconv($clean_subtitle);
 561             }
 562             print "\r\n";
 563         }
 564
 565         ## The statement of responsibility is just this: horrors. There is
 566         ## no formal definition how authors, editors and the like should
 567         ## be written and designated. The field is free-form and resistant
 568         ## to all parsing efforts, so this information is lost on me
 569     }
 570 }
 571
 572 ##********************************************************************
 573 ## print_stitle(): prints info from series title field
 574 ## Arguments: field
 575 ## Returns:
 576 ##********************************************************************
 577 sub print_stitle {
 578     my ($titlefield) = @_;
 579
 580     if (!$titlefield) {
 581         print "<marc>empty series title field\r\n" if $marcprint;
 582     }
 583     else {
 584         print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 585         my $clean_title = $titlefield->subfield('a');
 586
 587         $clean_title =~ s% *[/:;.]$%%;
 588
 589         if (length($clean_title) > 0) {
 590             print "T2  - ", &charconv($clean_title),"\r\n";
 591         }
 592
 593         if ($intype eq "unimarc") {
 594             print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
 595             if (length($titlefield->subfield('v')) > 0) {
 596                 print "VL  - ", &charconv($titlefield->subfield('v')),"\r\n";
 597             }
 598         }
 599     }
 600 }
 601
 602 ##********************************************************************
 603 ## print_isbn(): gets info from MARC field 020
 604 ## Arguments: field (020)
 605 ##********************************************************************
 606 sub print_isbn {
 607     my($isbnfield) = @_;
 608
 609     if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
 610         print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
 611         warn("no isbn found") if $marcprint;
 612     }
 613     else {
 614         if (length ($isbnfield->subfield('a')) < 10) {
 615             print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
 616             warn("truncated isbn") if $marcprint;
 617         }
 618
 619         my $isbn = substr($isbnfield->subfield('a'), 0, 10);
 620         print "SN  - ", &charconv($isbn), "\r\n";
 621     }
 622 }
 623
 624 ##********************************************************************
 625 ## print_issn(): gets info from MARC field 022
 626 ## Arguments: field (022)
 627 ##********************************************************************
 628 sub print_issn {
 629     my($issnfield) = @_;
 630
 631     if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
 632         print "<marc>no issn found (022\$a)\r\n" if $marcprint;
 633         warn("no issn found") if $marcprint;
 634     }
 635     else {
 636         if (length ($issnfield->subfield('a')) < 9) {
 637             print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
 638             warn("truncated issn") if $marcprint;
 639         }
 640
 641         my $issn = substr($issnfield->subfield('a'), 0, 9);
 642         print "SN  - ", &charconv($issn), "\r\n";
 643     }
 644 }
 645
 646 ###
 647 # print_uri() prints info from 856 u
 648 ###
 649 sub print_uri {
 650     my @f856s = @_;
 651
 652     foreach my $f856 (@f856s) {
 653         if (my $uri = $f856->subfield('u')) {
 654                 print "UR  - ", charconv($uri), "\r\n";
 655         }
 656     }
 657 }
 658
 659 ##********************************************************************
 660 ## print_loc_callno(): gets info from MARC field 050
 661 ## Arguments: field (050)
 662 ##********************************************************************
 663 sub print_loc_callno {
 664     my($callnofield) = @_;
 665
 666     if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
 667         print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
 668         warn("no LOC call number found") if $marcprint;
 669     }
 670     else {
 671         print "AV  - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
 672     }
 673 }
 674
 675 ##********************************************************************
 676 ## print_dewey(): gets info from MARC field 082
 677 ## Arguments: field (082)
 678 ##********************************************************************
 679 sub print_dewey {
 680     my($deweyfield) = @_;
 681
 682     if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
 683         print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
 684         warn("no Dewey number found") if $marcprint;
 685     }
 686     else {
 687         print "U1  - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
 688     }
 689 }
 690
 691 ##********************************************************************
 692 ## print_pubinfo(): gets info from MARC field 260
 693 ## Arguments: field (260)
 694 ##********************************************************************
 695 sub print_pubinfo {
 696     my($pubinfofield) = @_;
 697
 698     if (!$pubinfofield) {
 699     print "<marc>no publication information found (260/264)\r\n" if $marcprint;
 700         warn("no publication information found") if $marcprint;
 701     }
 702     else {
 703         ## the following information is available in MARC21:
 704         ## $a place -> CY
 705         ## $b publisher -> PB
 706         ## $c date -> PY
 707         ## the corresponding subfields for UNIMARC:
 708         ## $a place -> CY
 709         ## $c publisher -> PB
 710         ## $d date -> PY
 711
 712         ## all of them are repeatable. We pool all places into a
 713         ## comma-separated list in CY. We also pool all publishers
 714         ## into a comma-separated list in PB.  We break the rule with
 715         ## the date field because this wouldn't make much sense. In
 716         ## this case, we use the first occurrence for PY, the second
 717         ## for Y2, and ignore the rest
 718
 719         my @pubsubfields = $pubinfofield->subfields();
 720         my @cities;
 721         my @publishers;
 722         my $pycounter = 0;
 723
 724         my $pubsub_place;
 725         my $pubsub_publisher;
 726         my $pubsub_date;
 727
 728         if ($intype eq "unimarc") {
 729             $pubsub_place = "a";
 730             $pubsub_publisher = "c";
 731             $pubsub_date = "d";
 732         }
 733         else { ## assume marc21
 734             $pubsub_place = "a";
 735             $pubsub_publisher = "b";
 736             $pubsub_date = "c";
 737         }
 738
 739         ## loop over all subfield list entries
 740         for my $tuple (@pubsubfields) {
 741             ## each tuple consists of the subfield code and the value
 742             if (@$tuple[0] eq $pubsub_place) {
 743                 ## strip any trailing crap
 744                 $_ = @$tuple[1];
 745                 s% *[,;:/]$%%;
 746                 ## pool all occurrences in a list
 747                 push (@cities, $_);
 748             }
 749             elsif (@$tuple[0] eq $pubsub_publisher) {
 750                 ## strip any trailing crap
 751                 $_ = @$tuple[1];
 752                 s% *[,;:/]$%%;
 753                 ## pool all occurrences in a list
 754                 push (@publishers, $_);
 755             }
 756             elsif (@$tuple[0] eq $pubsub_date) {
 757                 ## the dates are free-form, so we want to extract
 758                 ## a four-digit year and leave the rest as
 759                 ## "other info"
 760                 $protoyear = @$tuple[1];
 761                 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
 762
 763                 ## strip any separator chars at the end
 764                 $protoyear =~ s% *[\.;:/]*$%%;
 765
 766                 ## isolate a four-digit year. We discard anything
 767         ## preceding the year, but keep everything after
 768                 ## the year as other info.
 769                 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
 770
 771                 ## check what we've got. If there is no four-digit
 772                 ## year, make it up. If digits are replaced by '-',
 773                 ## replace those with 0s
 774
 775                 if (index($protoyear, "/") == 4) {
 776                     ## have year info
 777                     ## replace all '-' in the four-digit year
 778                     ## by '0'
 779                     substr($protoyear,0,4) =~ s!-!0!g;
 780                 }
 781                 else {
 782                     ## have no year info
 783                     print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
 784                     $protoyear = "0000///$protoyear";
 785                     warn("no four-digit year found, use 0000") if $marcprint;
 786                 }
 787
 788                 if ($pycounter == 0 && length($protoyear)) {
 789                     print "PY  - $protoyear\r\n";
 790                 }
 791                 elsif ($pycounter == 1 && length($_)) {
 792                     print "Y2  - $protoyear\r\n";
 793                 }
 794                 ## else: discard
 795             }
 796             ## else: discard
 797         }
 798
 799         ## now dump the collected CY and PB lists
 800         if (@cities > 0) {
 801             print "CY  - ", &charconv(join(", ", @cities)), "\r\n";
 802         }
 803         if (@publishers > 0) {
 804             print "PB  - ", &charconv(join(", ", @publishers)), "\r\n";
 805         }
 806     }
 807 }
 808
 809 ##********************************************************************
 810 ## get_keywords(): prints info from MARC fields 6XX
 811 ## Arguments: list of fields (6XX)
 812 ##********************************************************************
 813 sub get_keywords {
 814     my($fieldname, @keywords) = @_;
 815
 816     my @kw;
 817     ## a list of all possible subfields
 818     my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
 819
 820     ## loop over all 6XX fields
 821     foreach my $kwfield (@keywords) {
 822         if ($kwfield != undef) {
 823             ## authornames get special treatment
 824             if ($fieldname eq "600") {
 825                 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
 826                 push @kw, $val;
 827                 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
 828             }
 829             else {
 830                 ## retrieve all available subfields
 831                 my @kwsubfields = $kwfield->subfields();
 832
 833                 ## loop over all available subfield tuples
 834                 foreach my $kwtuple (@kwsubfields) {
 835                     ## loop over all subfields to check
 836                     foreach my $subfield (@subfields) {
 837                         ## [0] contains subfield code
 838                         if (@$kwtuple[0] eq $subfield) {
 839                             ## [1] contains value, remove trailing separators
 840                             @$kwtuple[1] =~ s% *[,;.:/]*$%%;
 841                             if (length(@$kwtuple[1]) > 0) {
 842                                 push @kw, @$kwtuple[1];
 843                                 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
 844                             }
 845                             ## we can leave the subfields loop here
 846                             last;
 847                         }
 848                     }
 849                 }
 850             }
 851         }
 852     }
 853     return @kw;
 854 }
 855
 856 ##********************************************************************
 857 ## pool_subx(): adds contents of several subfields to a list
 858 ## Arguments: reference to a list
 859 ##            field name
 860 ##            list of fields (5XX)
 861 ##********************************************************************
 862 sub pool_subx {
 863     my($aref, $fieldname, @notefields) = @_;
 864
 865     ## we use a list that contains the interesting subfields
 866     ## for each field
 867     # ToDo: this is apparently correct only for marc21
 868     my @subfields;
 869
 870     if ($fieldname eq "500") {
 871         @subfields = ('a');
 872     }
 873     elsif ($fieldname eq "501") {
 874         @subfields = ('a');
 875     }
 876     elsif ($fieldname eq "502") {
 877         @subfields = ('a');
 878             }
 879     elsif ($fieldname eq "504") {
 880         @subfields = ('a', 'b');
 881     }
 882     elsif ($fieldname eq "505") {
 883         @subfields = ('a', 'g', 'r', 't', 'u');
 884     }
 885     elsif ($fieldname eq "506") {
 886         @subfields = ('a', 'b', 'c', 'd', 'e');
 887     }
 888     elsif ($fieldname eq "507") {
 889         @subfields = ('a', 'b');
 890     }
 891     elsif ($fieldname eq "508") {
 892         @subfields = ('a');
 893     }
 894     elsif ($fieldname eq "510") {
 895         @subfields = ('a', 'b', 'c', 'x', '3');
 896     }
 897     elsif ($fieldname eq "511") {
 898         @subfields = ('a');
 899     }
 900     elsif ($fieldname eq "513") {
 901         @subfields = ('a', 'b');
 902     }
 903     elsif ($fieldname eq "514") {
 904         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
 905     }
 906     elsif ($fieldname eq "515") {
 907         @subfields = ('a');
 908     }
 909     elsif ($fieldname eq "516") {
 910         @subfields = ('a');
 911     }
 912     elsif ($fieldname eq "518") {
 913         @subfields = ('a', '3');
 914     }
 915     elsif ($fieldname eq "521") {
 916         @subfields = ('a', 'b', '3');
 917     }
 918     elsif ($fieldname eq "522") {
 919         @subfields = ('a');
 920     }
 921     elsif ($fieldname eq "524") {
 922         @subfields = ('a', '2', '3');
 923     }
 924     elsif ($fieldname eq "525") {
 925         @subfields = ('a');
 926     }
 927     elsif ($fieldname eq "526") {
 928         @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
 929     }
 930     elsif ($fieldname eq "530") {
 931         @subfields = ('a', 'b', 'c', 'd', 'u', '3');
 932     }
 933     elsif ($fieldname eq "533") {
 934         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
 935     }
 936     elsif ($fieldname eq "534") {
 937         @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
 938     }
 939     elsif ($fieldname eq "535") {
 940         @subfields = ('a', 'b', 'c', 'd', 'g', '3');
 941     }
 942
 943     ## loop over all notefields
 944     foreach my $notefield (@notefields) {
 945         if (defined $notefield) {
 946             ## retrieve all available subfield tuples
 947             my @notesubfields = $notefield->subfields();
 948
 949             ## loop over all subfield tuples
 950             foreach my $notetuple (@notesubfields) {
 951                 ## loop over all subfields to check
 952                 foreach my $subfield (@subfields) {
 953                     ## [0] contains subfield code
 954                     if (@$notetuple[0] eq $subfield) {
 955                         ## [1] contains value, remove trailing separators
 956                         print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
 957                         @$notetuple[1] =~ s% *[,;.:/]*$%%;
 958                         if (length(@$notetuple[1]) > 0) {
 959                             ## add to list
 960                             push @{$aref}, @$notetuple[1];
 961                         }
 962                         last;
 963                     }
 964                 }
 965             }
 966         }
 967     }
 968 }
 969
 970 ##********************************************************************
 971 ## print_abstract(): prints abstract fields
 972 ## Arguments: list of fields (520)
 973 ##********************************************************************
 974 sub print_abstract {
 975     # ToDo: take care of repeatable subfields
 976     my(@abfields) = @_;
 977
 978     ## we check the following subfields
 979     my @subfields = ('a', 'b');
 980
 981     ## we generate a list for all useful strings
 982     my @abstrings;
 983
 984     ## loop over all abfields
 985     foreach my $abfield (@abfields) {
 986         foreach my $field (@subfields) {
 987             if ( length( $abfield->subfield($field) ) > 0 ) {
 988                 my $ab = $abfield->subfield($field);
 989
 990                 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
 991
 992                 ## strip trailing separators
 993                 $ab =~ s% *[;,:./]*$%%;
 994
 995                 ## add string to the list
 996                 push( @abstrings, $ab );
 997             }
 998         }
 999     }
1000
1001     my $allabs = join "; ", @abstrings;
1002
1003     if (length($allabs) > 0) {
1004         print "N2  - ", &charconv($allabs), "\r\n";
1005     }
1006
1007 }
1008
1009
1010
1011 ##********************************************************************
1012 ## charconv(): converts to a different charset based on a global var
1013 ## Arguments: string
1014 ## Returns: string
1015 ##********************************************************************
1016 sub charconv {
1017     if ($utf) {
1018         ## return unaltered if already utf-8
1019         return @_;
1020     }
1021     elsif (my $uniout eq "t") {
1022         ## convert to utf-8
1023         return marc8_to_utf8("@_");
1024     }
1025     else {
1026         ## return unaltered if no utf-8 requested
1027         return @_;
1028     }
1029 }
1030 1;