4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
7 ## usage: perl marc2ris < infile.marc > outfile.ris
9 ## Dependencies: perl 5.6.0 or later
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
44 # Modified 2016 by Universidad de El Salvador
46 # This file is part of Koha.
48 # Koha is free software; you can redistribute it and/or modify it
49 # under the terms of the GNU General Public License as published by
50 # the Free Software Foundation; either version 3 of the License, or
51 # (at your option) any later version.
53 # Koha is distributed in the hope that it will be useful, but
54 # WITHOUT ANY WARRANTY; without even the implied warranty of
55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 # GNU General Public License for more details.
58 # You should have received a copy of the GNU General Public License
59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
65 use List::MoreUtils qw/uniq/;
66 use vars qw($VERSION @ISA @EXPORT);
68 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
69 use Koha::SimpleMARC qw(read_field);
71 # set the version for version checking
72 $VERSION = 3.07.00.049;
76 # only export API methods
82 our $marcprint = 0; # Debug flag;
84 =head1 marc2bibtex - Convert from UNIMARC to RIS
86 my ($ris) = marc2ris($record);
90 C<$record> - a MARC::Record object
98 my $marcflavour = C4::Context->preference("marcflavour");
99 my $intype = lc($marcflavour);
101 # Let's redirect stdout
102 open my $oldout, ">&STDOUT";
105 open STDOUT,'>:encoding(utf8)', \$outvar;
107 ## First we should check the character encoding. This may be
108 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
109 ## by 'a' at position 09 (zero-based) of the leader
110 my $leader = $record->leader();
111 if ( $intype eq "marc21" ) {
112 if ( $leader =~ /^.{9}a/ ) {
113 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
116 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
119 ## else: other MARC formats do not specify the character encoding
120 ## we assume it's *not* UTF-8
122 my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
123 my $ris_additional_fields;
124 if ($RisExportAdditionalFields) {
125 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
126 $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
128 warn "Unable to parse RisExportAdditionalFields : $@";
129 $ris_additional_fields = undef;
134 if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
135 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
136 my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
138 print "TY - $type\r\n";
141 &print_typetag($leader);
145 &print_typetag($leader);
148 ## retrieve all author fields and collect them in a list
151 if ($intype eq "unimarc") {
152 ## Fields 700, 701, and 702 can contain author names
153 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
155 else { ## marc21, ukmarc
156 ## Field 100 sometimes carries main author
157 ## Field(s) 700 carry added entries - personal names
158 @author_fields = ($record->field('100'), $record->field('700'));
161 ## loop over all author fields
162 foreach my $field (@author_fields) {
163 if (length($field)) {
164 my $author = &get_author($field);
165 print "AU - ",$author,"\r\n";
169 # ToDo: should we specify anonymous as author if we didn't find
170 # one? or use one of the corporate/meeting names below?
172 ## add corporate names or meeting names as editors ??
175 if ($intype eq "unimarc") {
176 ## Fields 710, 711, and 712 can carry corporate names
177 ## Field(s) 720, 721, 722, 730 have additional candidates
178 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
180 else { ## marc21, ukmarc
181 ## Fields 110 and 111 carry the main entries - corporate name and
182 ## meeting name, respectively
183 ## Field(s) 710, 711 carry added entries - personal names
184 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
187 ## loop over all editor fields
188 foreach my $field (@editor_fields) {
189 if (length($field)) {
190 my $editor = &get_editor($field);
191 print "ED - ",$editor,"\r\n";
195 ## get info from the title field
196 if ($intype eq "unimarc") {
197 &print_title($record->field('200'));
199 else { ## marc21, ukmarc
200 &print_title($record->field('245'));
204 if ($intype eq "unimarc") {
205 &print_stitle($record->field('225'));
207 else { ## marc21, ukmarc
208 &print_stitle($record->field('490'));
212 if ($intype eq "unimarc") {
213 &print_isbn($record->field('010'));
214 &print_issn($record->field('011'));
216 elsif ($intype eq "ukmarc") {
217 &print_isbn($record->field('021'));
218 ## this is just an assumption
219 &print_issn($record->field('022'));
221 else { ## assume marc21
222 &print_isbn($record->field('020'));
223 &print_issn($record->field('022'));
226 if ($intype eq "marc21") {
227 &print_loc_callno($record->field('050'));
228 &print_dewey($record->field('082'));
230 ## else: unimarc, ukmarc do not seem to store call numbers?
233 if ($intype eq "unimarc") {
234 &print_pubinfo($record->field('210'));
236 else { ## marc21, ukmarc
237 if ($record->field('264')) {
238 &print_pubinfo($record->field('264'));
241 &print_pubinfo($record->field('260'));
245 ## 6XX fields contain KW candidates. We add all of them to a
248 if ($intype eq "unimarc") {
249 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
250 } elsif ($intype eq "ukmarc") {
251 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
252 } else { ## assume marc21
253 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
257 for my $f ( @field_list ) {
258 my @fields = $record->field($f);
259 push @kwpool, ( get_keywords("$f",$record->field($f)) );
263 @kwpool = uniq @kwpool;
265 for my $kw ( @kwpool ) {
266 print "KW - ", $kw, "\r\n";
269 ## 5XX have various candidates for notes and abstracts. We pool
270 ## all notes-like stuff in one list.
273 ## these fields have notes candidates
274 if ($intype eq "unimarc") {
275 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
276 &pool_subx(\@notepool, $_, $record->field($_));
279 elsif ($intype eq "ukmarc") {
280 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
281 &pool_subx(\@notepool, $_, $record->field($_));
284 else { ## assume marc21
285 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
286 &pool_subx(\@notepool, $_, $record->field($_));
290 my $allnotes = join "; ", @notepool;
292 if (length($allnotes) > 0) {
293 print "N1 - ", $allnotes, "\r\n";
296 ## 320/520 have the abstract
297 if ($intype eq "unimarc") {
298 &print_abstract($record->field('320'));
300 elsif ($intype eq "ukmarc") {
301 &print_abstract($record->field('512'), $record->field('513'));
303 else { ## assume marc21
304 &print_abstract($record->field('520'));
308 if ($record->field('856')) {
309 print_uri($record->field('856'));
312 if ($ris_additional_fields) {
313 foreach my $ris_tag ( keys %$ris_additional_fields ) {
314 next if $ris_tag eq 'TY';
317 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
318 ? @{ $ris_additional_fields->{$ris_tag} }
319 : $ris_additional_fields->{$ris_tag};
321 for my $tag (@fields) {
322 my ( $f, $sf ) = split( /\$/, $tag );
323 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
324 foreach my $v (@values) {
325 print "$ris_tag - $v\r\n";
334 # Let's re-redirect stdout
336 open STDOUT, ">&", $oldout;
343 ##********************************************************************
344 ## print_typetag(): prints the first line of a RIS dataset including
345 ## the preceding newline
346 ## Argument: the leader of a MARC dataset
347 ## Returns: the value at leader position 06
348 ##********************************************************************
351 ## the keys of typehash are the allowed values at position 06
352 ## of the leader of a MARC record, the values are the RIS types
353 ## that might appropriately represent these types.
387 ## The type of a MARC record is found at position 06 of the leader
388 my $typeofrecord = defined($leader) && length $leader >=6 ?
389 substr($leader, 6, 1): undef;
390 my $typeofrecord2 = defined($leader) && length $leader >=6 ?
391 substr($leader, 7, 1): undef;
393 ## ToDo: for books, field 008 positions 24-27 might have a few more
397 my $marcflavour = C4::Context->preference("marcflavour");
398 my $intype = lc($marcflavour);
399 if ($intype eq "unimarc") {
400 %typehash = %unitypehash;
403 %typehash = %ustypehash;
406 if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
407 print "TY - GEN\r\n"; ## most reasonable default
408 warn ("no type found - assume GEN") if $marcprint;
409 } elsif ( $typeofrecord =~ "a" ) {
410 if ( $typeofrecord2 =~ "a" ) {
411 print "TY - GEN\r\n"; ## monographic component part
412 } elsif ( $typeofrecord2 =~ "b" || $typeofrecord2 =~ "s" ) {
413 print "TY - SER\r\n"; ## serial or serial component part
414 } elsif ( $typeofrecord2 =~ "m" ) {
415 print "TY - $typehash{$typeofrecord}\r\n"; ## book
416 } elsif ( $typeofrecord2 =~ "c" || $typeofrecord2 =~ "d" ) {
417 print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
418 } elsif ( $typeofrecord2 =~ "i" ) {
419 print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
422 print "TY - $typehash{$typeofrecord}\r\n";
425 ## use $typeofrecord as the return value, just in case
429 ##********************************************************************
430 ## normalize_author(): normalizes an authorname
431 ## Arguments: authorname subfield a
432 ## authorname subfield b
433 ## authorname subfield c
434 ## name type if known: 0=direct order
435 ## 1=only surname or full name in
437 ## 3=family, clan, dynasty name
438 ## Returns: the normalized authorname
439 ##********************************************************************
440 sub normalize_author {
441 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
443 if ($nametype == 0) {
444 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
445 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
448 elsif ($nametype == 1) {
449 ## start munging subfield a (the real name part)
450 ## remove spaces after separators
451 $rawauthora =~ s%([,.]+) *%$1%g;
453 ## remove trailing separators after spaces
454 $rawauthora =~ s% *[,;:/]*$%%;
456 ## remove periods after a non-abbreviated name
457 $rawauthora =~ s%(\w{2,})\.%$1%g;
459 ## start munging subfield b (something like the suffix)
460 ## remove trailing separators after spaces
461 $rawauthorb =~ s% *[,;:/]*$%%;
463 ## we currently ignore subfield c until someone complains
464 if (length($rawauthorb) > 0) {
465 return join ", ", ($rawauthora, $rawauthorb);
471 elsif ($nametype == 3) {
476 ##********************************************************************
477 ## get_author(): gets authorname info from MARC fields 100, 700
478 ## Argument: field (100 or 700)
479 ## Returns: an author string in the format found in the record
480 ##********************************************************************
482 my ($authorfield) = @_;
485 ## the sequence of the name parts is encoded either in indicator
486 ## 1 (marc21) or 2 (unimarc)
487 my $marcflavour = C4::Context->preference("marcflavour");
488 my $intype = lc($marcflavour);
489 if ($intype eq "unimarc") {
492 else { ## assume marc21
496 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
497 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
498 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
499 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
500 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
501 if ($intype eq "ukmarc") {
502 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
503 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
506 normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
510 ##********************************************************************
511 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
512 ## Argument: field (110, 111, 710, or 711)
513 ## Returns: an author string in the format found in the record
514 ##********************************************************************
516 my ($editorfield) = @_;
522 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
523 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
524 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
525 return $editorfield->subfield('a');
529 ##********************************************************************
530 ## print_title(): gets info from MARC field 245
531 ## Arguments: field (245)
533 ##********************************************************************
535 my ($titlefield) = @_;
537 print "<marc>empty title field (245)\r\n" if $marcprint;
538 warn("empty title field (245)") if $marcprint;
541 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
542 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
543 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
545 ## The title is usually written in a very odd notation. The title
546 ## proper ($a) often ends with a space followed by a separator like
547 ## a slash or a colon. The subtitle ($b) doesn't start with a space
548 ## so simple concatenation looks odd. We have to conditionally remove
549 ## the separator and make sure there's a space between title and
552 my $clean_title = $titlefield->subfield('a');
554 my $clean_subtitle = $titlefield->subfield('b');
555 $clean_subtitle ||= q{};
556 $clean_title =~ s% *[/:;.]$%%;
557 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
559 my $marcflavour = C4::Context->preference("marcflavour");
560 my $intype = lc($marcflavour);
561 if (length($clean_title) > 0
562 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
563 print "TI - ", $clean_title;
565 ## subfield $b is relevant only for marc21/ukmarc
566 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
567 print ": ",$clean_subtitle;
572 ## The statement of responsibility is just this: horrors. There is
573 ## no formal definition how authors, editors and the like should
574 ## be written and designated. The field is free-form and resistant
575 ## to all parsing efforts, so this information is lost on me
580 ##********************************************************************
581 ## print_stitle(): prints info from series title field
584 ##********************************************************************
586 my ($titlefield) = @_;
589 print "<marc>empty series title field\r\n" if $marcprint;
592 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
593 my $clean_title = $titlefield->subfield('a');
595 $clean_title =~ s% *[/:;.]$%%;
597 if (length($clean_title) > 0) {
598 print "T2 - ", $clean_title,"\r\n";
601 my $marcflavour = C4::Context->preference("marcflavour");
602 my $intype = lc($marcflavour);
603 if ($intype eq "unimarc") {
604 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
605 if (length($titlefield->subfield('v')) > 0) {
606 print "VL - ", $titlefield->subfield('v'),"\r\n";
613 ##********************************************************************
614 ## print_isbn(): gets info from MARC field 020
615 ## Arguments: field (020)
616 ##********************************************************************
620 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
621 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
622 warn("no isbn found") if $marcprint;
625 if (length ($isbnfield->subfield('a')) < 10) {
626 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
627 warn("truncated isbn") if $marcprint;
630 my $isbn = $isbnfield->subfield('a');
631 print "SN - ", $isbn, "\r\n";
635 ##********************************************************************
636 ## print_issn(): gets info from MARC field 022
637 ## Arguments: field (022)
638 ##********************************************************************
642 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
643 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
644 warn("no issn found") if $marcprint;
647 if (length ($issnfield->subfield('a')) < 9) {
648 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
649 warn("truncated issn") if $marcprint;
652 my $issn = substr($issnfield->subfield('a'), 0, 9);
653 print "SN - ", $issn, "\r\n";
658 # print_uri() prints info from 856 u
663 foreach my $f856 (@f856s) {
664 if (my $uri = $f856->subfield('u')) {
665 print "UR - ", $uri, "\r\n";
670 ##********************************************************************
671 ## print_loc_callno(): gets info from MARC field 050
672 ## Arguments: field (050)
673 ##********************************************************************
674 sub print_loc_callno {
675 my($callnofield) = @_;
677 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
678 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
679 warn("no LOC call number found") if $marcprint;
682 print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
686 ##********************************************************************
687 ## print_dewey(): gets info from MARC field 082
688 ## Arguments: field (082)
689 ##********************************************************************
691 my($deweyfield) = @_;
693 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
694 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
695 warn("no Dewey number found") if $marcprint;
698 print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
702 ##********************************************************************
703 ## print_pubinfo(): gets info from MARC field 260
704 ## Arguments: field (260)
705 ##********************************************************************
707 my($pubinfofield) = @_;
709 if (!$pubinfofield) {
710 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
711 warn("no publication information found") if $marcprint;
714 ## the following information is available in MARC21:
716 ## $b publisher -> PB
718 ## the corresponding subfields for UNIMARC:
720 ## $c publisher -> PB
723 ## all of them are repeatable. We pool all places into a
724 ## comma-separated list in CY. We also pool all publishers
725 ## into a comma-separated list in PB. We break the rule with
726 ## the date field because this wouldn't make much sense. In
727 ## this case, we use the first occurrence for PY, the second
728 ## for Y2, and ignore the rest
730 my @pubsubfields = $pubinfofield->subfields();
736 my $pubsub_publisher;
739 my $marcflavour = C4::Context->preference("marcflavour");
740 my $intype = lc($marcflavour);
741 if ($intype eq "unimarc") {
743 $pubsub_publisher = "c";
746 else { ## assume marc21
748 $pubsub_publisher = "b";
752 ## loop over all subfield list entries
753 for my $tuple (@pubsubfields) {
754 ## each tuple consists of the subfield code and the value
755 if (@$tuple[0] eq $pubsub_place) {
756 ## strip any trailing crap
759 ## pool all occurrences in a list
762 elsif (@$tuple[0] eq $pubsub_publisher) {
763 ## strip any trailing crap
766 ## pool all occurrences in a list
767 push (@publishers, $_);
769 elsif (@$tuple[0] eq $pubsub_date) {
770 ## the dates are free-form, so we want to extract
771 ## a four-digit year and leave the rest as
773 my $protoyear = @$tuple[1];
774 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
776 ## strip any separator chars at the end
777 $protoyear =~ s% *[\.;:/]*$%%;
779 ## isolate a four-digit year. We discard anything
780 ## preceding the year, but keep everything after
781 ## the year as other info.
782 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
784 ## check what we've got. If there is no four-digit
785 ## year, make it up. If digits are replaced by '-',
786 ## replace those with 0s
788 if (index($protoyear, "/") == 4) {
790 ## replace all '-' in the four-digit year
792 substr($protoyear,0,4) =~ s!-!0!g;
796 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
797 $protoyear = "0000///$protoyear";
798 warn("no four-digit year found, use 0000") if $marcprint;
801 if ($pycounter == 0 && length($protoyear)) {
802 print "PY - $protoyear\r\n";
804 elsif ($pycounter == 1 && length($_)) {
805 print "Y2 - $protoyear\r\n";
812 ## now dump the collected CY and PB lists
814 print "CY - ", join(", ", @cities), "\r\n";
816 if (@publishers > 0) {
817 print "PB - ", join(", ", @publishers), "\r\n";
822 ##********************************************************************
823 ## get_keywords(): prints info from MARC fields 6XX
824 ## Arguments: list of fields (6XX)
825 ##********************************************************************
827 my($fieldname, @keywords) = @_;
830 ## a list of all possible subfields
831 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
833 ## loop over all 6XX fields
834 foreach my $kwfield (@keywords) {
835 if ($kwfield != undef) {
836 ## authornames get special treatment
837 if ($fieldname eq "600") {
838 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
840 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
843 ## retrieve all available subfields
844 my @kwsubfields = $kwfield->subfields();
846 ## loop over all available subfield tuples
847 foreach my $kwtuple (@kwsubfields) {
848 ## loop over all subfields to check
849 foreach my $subfield (@subfields) {
850 ## [0] contains subfield code
851 if (@$kwtuple[0] eq $subfield) {
852 ## [1] contains value, remove trailing separators
853 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
854 if (length(@$kwtuple[1]) > 0) {
855 push @kw, @$kwtuple[1];
856 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
858 ## we can leave the subfields loop here
869 ##********************************************************************
870 ## pool_subx(): adds contents of several subfields to a list
871 ## Arguments: reference to a list
873 ## list of fields (5XX)
874 ##********************************************************************
876 my($aref, $fieldname, @notefields) = @_;
878 ## we use a list that contains the interesting subfields
880 # ToDo: this is apparently correct only for marc21
883 if ($fieldname eq "500") {
886 elsif ($fieldname eq "501") {
889 elsif ($fieldname eq "502") {
892 elsif ($fieldname eq "504") {
893 @subfields = ('a', 'b');
895 elsif ($fieldname eq "505") {
896 @subfields = ('a', 'g', 'r', 't', 'u');
898 elsif ($fieldname eq "506") {
899 @subfields = ('a', 'b', 'c', 'd', 'e');
901 elsif ($fieldname eq "507") {
902 @subfields = ('a', 'b');
904 elsif ($fieldname eq "508") {
907 elsif ($fieldname eq "510") {
908 @subfields = ('a', 'b', 'c', 'x', '3');
910 elsif ($fieldname eq "511") {
913 elsif ($fieldname eq "513") {
914 @subfields = ('a', 'b');
916 elsif ($fieldname eq "514") {
917 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
919 elsif ($fieldname eq "515") {
922 elsif ($fieldname eq "516") {
925 elsif ($fieldname eq "518") {
926 @subfields = ('a', '3');
928 elsif ($fieldname eq "521") {
929 @subfields = ('a', 'b', '3');
931 elsif ($fieldname eq "522") {
934 elsif ($fieldname eq "524") {
935 @subfields = ('a', '2', '3');
937 elsif ($fieldname eq "525") {
940 elsif ($fieldname eq "526") {
941 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
943 elsif ($fieldname eq "530") {
944 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
946 elsif ($fieldname eq "533") {
947 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
949 elsif ($fieldname eq "534") {
950 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
952 elsif ($fieldname eq "535") {
953 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
956 ## loop over all notefields
957 foreach my $notefield (@notefields) {
958 if (defined $notefield) {
959 ## retrieve all available subfield tuples
960 my @notesubfields = $notefield->subfields();
962 ## loop over all subfield tuples
963 foreach my $notetuple (@notesubfields) {
964 ## loop over all subfields to check
965 foreach my $subfield (@subfields) {
966 ## [0] contains subfield code
967 if (@$notetuple[0] eq $subfield) {
968 ## [1] contains value, remove trailing separators
969 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
970 @$notetuple[1] =~ s% *[,;.:/]*$%%;
971 if (length(@$notetuple[1]) > 0) {
973 push @{$aref}, @$notetuple[1];
983 ##********************************************************************
984 ## print_abstract(): prints abstract fields
985 ## Arguments: list of fields (520)
986 ##********************************************************************
988 # ToDo: take care of repeatable subfields
991 ## we check the following subfields
992 my @subfields = ('a', 'b');
994 ## we generate a list for all useful strings
997 ## loop over all abfields
998 foreach my $abfield (@abfields) {
999 foreach my $field (@subfields) {
1000 if ( length( $abfield->subfield($field) ) > 0 ) {
1001 my $ab = $abfield->subfield($field);
1003 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1005 ## strip trailing separators
1006 $ab =~ s% *[;,:./]*$%%;
1008 ## add string to the list
1009 push( @abstrings, $ab );
1014 my $allabs = join "; ", @abstrings;
1016 if (length($allabs) > 0) {
1017 print "N2 - ", $allabs, "\r\n";