4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
7 ## usage: perl marc2ris < infile.marc > outfile.ris
9 ## Dependencies: perl 5.6.0 or later
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
44 # Modified 2016 by Universidad de El Salvador
46 # This file is part of Koha.
48 # Koha is free software; you can redistribute it and/or modify it
49 # under the terms of the GNU General Public License as published by
50 # the Free Software Foundation; either version 3 of the License, or
51 # (at your option) any later version.
53 # Koha is distributed in the hope that it will be useful, but
54 # WITHOUT ANY WARRANTY; without even the implied warranty of
55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 # GNU General Public License for more details.
58 # You should have received a copy of the GNU General Public License
59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
65 use List::MoreUtils qw/uniq/;
66 use vars qw($VERSION @ISA @EXPORT);
68 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
69 use Koha::SimpleMARC qw(read_field);
71 # set the version for version checking
72 $VERSION = 3.07.00.049;
76 # only export API methods
82 our $marcprint = 0; # Debug flag;
84 =head1 marc2bibtex - Convert from UNIMARC to RIS
86 my ($ris) = marc2ris($record);
90 C<$record> - a MARC::Record object
98 my $marcflavour = C4::Context->preference("marcflavour");
99 my $intype = lc($marcflavour);
101 # Let's redirect stdout
102 open my $oldout, ">&STDOUT";
105 open STDOUT,'>:encoding(utf8)', \$outvar;
107 ## First we should check the character encoding. This may be
108 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
109 ## by 'a' at position 09 (zero-based) of the leader
110 my $leader = $record->leader();
111 if ( $intype eq "marc21" ) {
112 if ( $leader =~ /^.{9}a/ ) {
113 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
116 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
119 ## else: other MARC formats do not specify the character encoding
120 ## we assume it's *not* UTF-8
122 my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
123 my $ris_additional_fields;
124 if ($RisExportAdditionalFields) {
125 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
126 $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
128 warn "Unable to parse RisExportAdditionalFields : $@";
129 $ris_additional_fields = undef;
134 if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
135 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
136 my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
138 print "TY - $type\r\n";
141 &print_typetag($leader);
145 &print_typetag($leader);
148 ## retrieve all author fields and collect them in a list
151 if ($intype eq "unimarc") {
152 ## Fields 700, 701, and 702 can contain author names
153 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
155 else { ## marc21, ukmarc
156 ## Field 100 sometimes carries main author
157 ## Field(s) 700 carry added entries - personal names
158 @author_fields = ($record->field('100'), $record->field('700'));
161 ## loop over all author fields
162 foreach my $field (@author_fields) {
163 if (length($field)) {
164 my $author = &get_author($field);
165 print "AU - ",$author,"\r\n";
169 # ToDo: should we specify anonymous as author if we didn't find
170 # one? or use one of the corporate/meeting names below?
172 ## add corporate names or meeting names as editors ??
175 if ($intype eq "unimarc") {
176 ## Fields 710, 711, and 712 can carry corporate names
177 ## Field(s) 720, 721, 722, 730 have additional candidates
178 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
180 else { ## marc21, ukmarc
181 ## Fields 110 and 111 carry the main entries - corporate name and
182 ## meeting name, respectively
183 ## Field(s) 710, 711 carry added entries - personal names
184 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
187 ## loop over all editor fields
188 foreach my $field (@editor_fields) {
189 if (length($field)) {
190 my $editor = &get_editor($field);
191 print "ED - ",$editor,"\r\n";
195 ## get info from the title field
196 if ($intype eq "unimarc") {
197 &print_title($record->field('200'));
199 else { ## marc21, ukmarc
200 &print_title($record->field('245'));
204 if ($intype eq "unimarc") {
205 &print_stitle($record->field('225'));
207 else { ## marc21, ukmarc
208 &print_stitle($record->field('490'));
212 if ($intype eq "unimarc") {
213 &print_isbn($record->field('010'));
214 &print_issn($record->field('011'));
216 elsif ($intype eq "ukmarc") {
217 &print_isbn($record->field('021'));
218 ## this is just an assumption
219 &print_issn($record->field('022'));
221 else { ## assume marc21
222 &print_isbn($record->field('020'));
223 &print_issn($record->field('022'));
226 if ($intype eq "marc21") {
227 &print_loc_callno($record->field('050'));
228 &print_dewey($record->field('082'));
230 ## else: unimarc, ukmarc do not seem to store call numbers?
233 if ($intype eq "unimarc") {
234 &print_pubinfo($record->field('210'));
236 else { ## marc21, ukmarc
237 if ($record->field('264')) {
238 &print_pubinfo($record->field('264'));
241 &print_pubinfo($record->field('260'));
245 ## 6XX fields contain KW candidates. We add all of them to a
248 if ($intype eq "unimarc") {
249 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
250 } elsif ($intype eq "ukmarc") {
251 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
252 } else { ## assume marc21
253 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
257 for my $f ( @field_list ) {
258 my @fields = $record->field($f);
259 push @kwpool, ( get_keywords("$f",$record->field($f)) );
263 @kwpool = uniq @kwpool;
265 for my $kw ( @kwpool ) {
266 print "KW - ", $kw, "\r\n";
269 ## 5XX have various candidates for notes and abstracts. We pool
270 ## all notes-like stuff in one list.
273 ## these fields have notes candidates
274 if ($intype eq "unimarc") {
275 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
276 &pool_subx(\@notepool, $_, $record->field($_));
279 elsif ($intype eq "ukmarc") {
280 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
281 &pool_subx(\@notepool, $_, $record->field($_));
284 else { ## assume marc21
285 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
286 &pool_subx(\@notepool, $_, $record->field($_));
290 my $allnotes = join "; ", @notepool;
292 if (length($allnotes) > 0) {
293 print "N1 - ", $allnotes, "\r\n";
296 ## 320/520 have the abstract
297 if ($intype eq "unimarc") {
298 &print_abstract($record->field('320'));
300 elsif ($intype eq "ukmarc") {
301 &print_abstract($record->field('512'), $record->field('513'));
303 else { ## assume marc21
304 &print_abstract($record->field('520'));
308 if ($record->field('856')) {
309 print_uri($record->field('856'));
312 if ($ris_additional_fields) {
313 foreach my $ris_tag ( keys %$ris_additional_fields ) {
314 next if $ris_tag eq 'TY';
317 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
318 ? @{ $ris_additional_fields->{$ris_tag} }
319 : $ris_additional_fields->{$ris_tag};
321 for my $tag (@fields) {
322 my ( $f, $sf ) = split( /\$/, $tag );
323 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
324 foreach my $v (@values) {
325 print "$ris_tag - $v\r\n";
334 # Let's re-redirect stdout
336 open STDOUT, ">&", $oldout;
343 ##********************************************************************
344 ## print_typetag(): prints the first line of a RIS dataset including
345 ## the preceding newline
346 ## Argument: the leader of a MARC dataset
347 ## Returns: the value at leader position 06
348 ##********************************************************************
351 ## the keys of typehash are the allowed values at position 06
352 ## of the leader of a MARC record, the values are the RIS types
353 ## that might appropriately represent these types.
387 ## The type of a MARC record is found at position 06 of the leader
388 my $typeofrecord = defined($leader) && length $leader >=6 ?
389 substr($leader, 6, 1): undef;
390 ## Pos 07 == Bibliographic level
391 my $biblevel = defined($leader) && length $leader >=7 ?
392 substr($leader, 7, 1): '';
394 ## TODO: for books, field 008 positions 24-27 might have a few more
398 my $marcflavour = C4::Context->preference("marcflavour");
399 my $intype = lc($marcflavour);
400 if ($intype eq "unimarc") {
401 %typehash = %unitypehash;
404 %typehash = %ustypehash;
407 if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
408 print "TY - GEN\r\n"; ## most reasonable default
409 warn ("no type found - assume GEN") if $marcprint;
410 } elsif ( $typeofrecord =~ "a" ) {
411 if ( $biblevel eq 'a' ) {
412 print "TY - GEN\r\n"; ## monographic component part
413 } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
414 print "TY - SER\r\n"; ## serial or serial component part
415 } elsif ( $biblevel eq 'm' ) {
416 print "TY - $typehash{$typeofrecord}\r\n"; ## book
417 } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
418 print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
419 } elsif ( $biblevel eq 'i' ) {
420 print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
423 print "TY - $typehash{$typeofrecord}\r\n";
426 ## use $typeofrecord as the return value, just in case
430 ##********************************************************************
431 ## normalize_author(): normalizes an authorname
432 ## Arguments: authorname subfield a
433 ## authorname subfield b
434 ## authorname subfield c
435 ## name type if known: 0=direct order
436 ## 1=only surname or full name in
438 ## 3=family, clan, dynasty name
439 ## Returns: the normalized authorname
440 ##********************************************************************
441 sub normalize_author {
442 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
444 if ($nametype == 0) {
445 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
446 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
449 elsif ($nametype == 1) {
450 ## start munging subfield a (the real name part)
451 ## remove spaces after separators
452 $rawauthora =~ s%([,.]+) *%$1%g;
454 ## remove trailing separators after spaces
455 $rawauthora =~ s% *[,;:/]*$%%;
457 ## remove periods after a non-abbreviated name
458 $rawauthora =~ s%(\w{2,})\.%$1%g;
460 ## start munging subfield b (something like the suffix)
461 ## remove trailing separators after spaces
462 $rawauthorb =~ s% *[,;:/]*$%%;
464 ## we currently ignore subfield c until someone complains
465 if (length($rawauthorb) > 0) {
466 return join ", ", ($rawauthora, $rawauthorb);
472 elsif ($nametype == 3) {
477 ##********************************************************************
478 ## get_author(): gets authorname info from MARC fields 100, 700
479 ## Argument: field (100 or 700)
480 ## Returns: an author string in the format found in the record
481 ##********************************************************************
483 my ($authorfield) = @_;
486 ## the sequence of the name parts is encoded either in indicator
487 ## 1 (marc21) or 2 (unimarc)
488 my $marcflavour = C4::Context->preference("marcflavour");
489 my $intype = lc($marcflavour);
490 if ($intype eq "unimarc") {
493 else { ## assume marc21
497 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
498 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
499 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
500 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
501 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
502 if ($intype eq "ukmarc") {
503 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
504 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
507 normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
511 ##********************************************************************
512 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
513 ## Argument: field (110, 111, 710, or 711)
514 ## Returns: an author string in the format found in the record
515 ##********************************************************************
517 my ($editorfield) = @_;
523 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
524 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
525 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
526 return $editorfield->subfield('a');
530 ##********************************************************************
531 ## print_title(): gets info from MARC field 245
532 ## Arguments: field (245)
534 ##********************************************************************
536 my ($titlefield) = @_;
538 print "<marc>empty title field (245)\r\n" if $marcprint;
539 warn("empty title field (245)") if $marcprint;
542 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
543 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
544 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
546 ## The title is usually written in a very odd notation. The title
547 ## proper ($a) often ends with a space followed by a separator like
548 ## a slash or a colon. The subtitle ($b) doesn't start with a space
549 ## so simple concatenation looks odd. We have to conditionally remove
550 ## the separator and make sure there's a space between title and
553 my $clean_title = $titlefield->subfield('a');
555 my $clean_subtitle = $titlefield->subfield('b');
556 $clean_subtitle ||= q{};
557 $clean_title =~ s% *[/:;.]$%%;
558 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
560 my $marcflavour = C4::Context->preference("marcflavour");
561 my $intype = lc($marcflavour);
562 if (length($clean_title) > 0
563 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
564 print "TI - ", $clean_title;
566 ## subfield $b is relevant only for marc21/ukmarc
567 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
568 print ": ",$clean_subtitle;
573 ## The statement of responsibility is just this: horrors. There is
574 ## no formal definition how authors, editors and the like should
575 ## be written and designated. The field is free-form and resistant
576 ## to all parsing efforts, so this information is lost on me
581 ##********************************************************************
582 ## print_stitle(): prints info from series title field
585 ##********************************************************************
587 my ($titlefield) = @_;
590 print "<marc>empty series title field\r\n" if $marcprint;
593 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
594 my $clean_title = $titlefield->subfield('a');
596 $clean_title =~ s% *[/:;.]$%%;
598 if (length($clean_title) > 0) {
599 print "T2 - ", $clean_title,"\r\n";
602 my $marcflavour = C4::Context->preference("marcflavour");
603 my $intype = lc($marcflavour);
604 if ($intype eq "unimarc") {
605 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
606 if (length($titlefield->subfield('v')) > 0) {
607 print "VL - ", $titlefield->subfield('v'),"\r\n";
614 ##********************************************************************
615 ## print_isbn(): gets info from MARC field 020
616 ## Arguments: field (020)
617 ##********************************************************************
621 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
622 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
623 warn("no isbn found") if $marcprint;
626 if (length ($isbnfield->subfield('a')) < 10) {
627 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
628 warn("truncated isbn") if $marcprint;
631 my $isbn = $isbnfield->subfield('a');
632 print "SN - ", $isbn, "\r\n";
636 ##********************************************************************
637 ## print_issn(): gets info from MARC field 022
638 ## Arguments: field (022)
639 ##********************************************************************
643 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
644 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
645 warn("no issn found") if $marcprint;
648 if (length ($issnfield->subfield('a')) < 9) {
649 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
650 warn("truncated issn") if $marcprint;
653 my $issn = substr($issnfield->subfield('a'), 0, 9);
654 print "SN - ", $issn, "\r\n";
659 # print_uri() prints info from 856 u
664 foreach my $f856 (@f856s) {
665 if (my $uri = $f856->subfield('u')) {
666 print "UR - ", $uri, "\r\n";
671 ##********************************************************************
672 ## print_loc_callno(): gets info from MARC field 050
673 ## Arguments: field (050)
674 ##********************************************************************
675 sub print_loc_callno {
676 my($callnofield) = @_;
678 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
679 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
680 warn("no LOC call number found") if $marcprint;
683 print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
687 ##********************************************************************
688 ## print_dewey(): gets info from MARC field 082
689 ## Arguments: field (082)
690 ##********************************************************************
692 my($deweyfield) = @_;
694 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
695 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
696 warn("no Dewey number found") if $marcprint;
699 print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
703 ##********************************************************************
704 ## print_pubinfo(): gets info from MARC field 260
705 ## Arguments: field (260)
706 ##********************************************************************
708 my($pubinfofield) = @_;
710 if (!$pubinfofield) {
711 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
712 warn("no publication information found") if $marcprint;
715 ## the following information is available in MARC21:
717 ## $b publisher -> PB
719 ## the corresponding subfields for UNIMARC:
721 ## $c publisher -> PB
724 ## all of them are repeatable. We pool all places into a
725 ## comma-separated list in CY. We also pool all publishers
726 ## into a comma-separated list in PB. We break the rule with
727 ## the date field because this wouldn't make much sense. In
728 ## this case, we use the first occurrence for PY, the second
729 ## for Y2, and ignore the rest
731 my @pubsubfields = $pubinfofield->subfields();
737 my $pubsub_publisher;
740 my $marcflavour = C4::Context->preference("marcflavour");
741 my $intype = lc($marcflavour);
742 if ($intype eq "unimarc") {
744 $pubsub_publisher = "c";
747 else { ## assume marc21
749 $pubsub_publisher = "b";
753 ## loop over all subfield list entries
754 for my $tuple (@pubsubfields) {
755 ## each tuple consists of the subfield code and the value
756 if (@$tuple[0] eq $pubsub_place) {
757 ## strip any trailing crap
760 ## pool all occurrences in a list
763 elsif (@$tuple[0] eq $pubsub_publisher) {
764 ## strip any trailing crap
767 ## pool all occurrences in a list
768 push (@publishers, $_);
770 elsif (@$tuple[0] eq $pubsub_date) {
771 ## the dates are free-form, so we want to extract
772 ## a four-digit year and leave the rest as
774 my $protoyear = @$tuple[1];
775 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
777 ## strip any separator chars at the end
778 $protoyear =~ s% *[\.;:/]*$%%;
780 ## isolate a four-digit year. We discard anything
781 ## preceding the year, but keep everything after
782 ## the year as other info.
783 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
785 ## check what we've got. If there is no four-digit
786 ## year, make it up. If digits are replaced by '-',
787 ## replace those with 0s
789 if (index($protoyear, "/") == 4) {
791 ## replace all '-' in the four-digit year
793 substr($protoyear,0,4) =~ s!-!0!g;
797 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
798 $protoyear = "0000///$protoyear";
799 warn("no four-digit year found, use 0000") if $marcprint;
802 if ($pycounter == 0 && length($protoyear)) {
803 print "PY - $protoyear\r\n";
805 elsif ($pycounter == 1 && length($_)) {
806 print "Y2 - $protoyear\r\n";
813 ## now dump the collected CY and PB lists
815 print "CY - ", join(", ", @cities), "\r\n";
817 if (@publishers > 0) {
818 print "PB - ", join(", ", @publishers), "\r\n";
823 ##********************************************************************
824 ## get_keywords(): prints info from MARC fields 6XX
825 ## Arguments: list of fields (6XX)
826 ##********************************************************************
828 my($fieldname, @keywords) = @_;
831 ## a list of all possible subfields
832 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
834 ## loop over all 6XX fields
835 foreach my $kwfield (@keywords) {
836 if ($kwfield != undef) {
837 ## authornames get special treatment
838 if ($fieldname eq "600") {
839 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
841 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
844 ## retrieve all available subfields
845 my @kwsubfields = $kwfield->subfields();
847 ## loop over all available subfield tuples
848 foreach my $kwtuple (@kwsubfields) {
849 ## loop over all subfields to check
850 foreach my $subfield (@subfields) {
851 ## [0] contains subfield code
852 if (@$kwtuple[0] eq $subfield) {
853 ## [1] contains value, remove trailing separators
854 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
855 if (length(@$kwtuple[1]) > 0) {
856 push @kw, @$kwtuple[1];
857 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
859 ## we can leave the subfields loop here
870 ##********************************************************************
871 ## pool_subx(): adds contents of several subfields to a list
872 ## Arguments: reference to a list
874 ## list of fields (5XX)
875 ##********************************************************************
877 my($aref, $fieldname, @notefields) = @_;
879 ## we use a list that contains the interesting subfields
881 # ToDo: this is apparently correct only for marc21
884 if ($fieldname eq "500") {
887 elsif ($fieldname eq "501") {
890 elsif ($fieldname eq "502") {
893 elsif ($fieldname eq "504") {
894 @subfields = ('a', 'b');
896 elsif ($fieldname eq "505") {
897 @subfields = ('a', 'g', 'r', 't', 'u');
899 elsif ($fieldname eq "506") {
900 @subfields = ('a', 'b', 'c', 'd', 'e');
902 elsif ($fieldname eq "507") {
903 @subfields = ('a', 'b');
905 elsif ($fieldname eq "508") {
908 elsif ($fieldname eq "510") {
909 @subfields = ('a', 'b', 'c', 'x', '3');
911 elsif ($fieldname eq "511") {
914 elsif ($fieldname eq "513") {
915 @subfields = ('a', 'b');
917 elsif ($fieldname eq "514") {
918 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
920 elsif ($fieldname eq "515") {
923 elsif ($fieldname eq "516") {
926 elsif ($fieldname eq "518") {
927 @subfields = ('a', '3');
929 elsif ($fieldname eq "521") {
930 @subfields = ('a', 'b', '3');
932 elsif ($fieldname eq "522") {
935 elsif ($fieldname eq "524") {
936 @subfields = ('a', '2', '3');
938 elsif ($fieldname eq "525") {
941 elsif ($fieldname eq "526") {
942 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
944 elsif ($fieldname eq "530") {
945 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
947 elsif ($fieldname eq "533") {
948 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
950 elsif ($fieldname eq "534") {
951 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
953 elsif ($fieldname eq "535") {
954 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
957 ## loop over all notefields
958 foreach my $notefield (@notefields) {
959 if (defined $notefield) {
960 ## retrieve all available subfield tuples
961 my @notesubfields = $notefield->subfields();
963 ## loop over all subfield tuples
964 foreach my $notetuple (@notesubfields) {
965 ## loop over all subfields to check
966 foreach my $subfield (@subfields) {
967 ## [0] contains subfield code
968 if (@$notetuple[0] eq $subfield) {
969 ## [1] contains value, remove trailing separators
970 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
971 @$notetuple[1] =~ s% *[,;.:/]*$%%;
972 if (length(@$notetuple[1]) > 0) {
974 push @{$aref}, @$notetuple[1];
984 ##********************************************************************
985 ## print_abstract(): prints abstract fields
986 ## Arguments: list of fields (520)
987 ##********************************************************************
989 # ToDo: take care of repeatable subfields
992 ## we check the following subfields
993 my @subfields = ('a', 'b');
995 ## we generate a list for all useful strings
998 ## loop over all abfields
999 foreach my $abfield (@abfields) {
1000 foreach my $field (@subfields) {
1001 if ( length( $abfield->subfield($field) ) > 0 ) {
1002 my $ab = $abfield->subfield($field);
1004 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1006 ## strip trailing separators
1007 $ab =~ s% *[;,:./]*$%%;
1009 ## add string to the list
1010 push( @abstrings, $ab );
1015 my $allabs = join "; ", @abstrings;
1017 if (length($allabs) > 0) {
1018 print "N2 - ", $allabs, "\r\n";