1 package MARC::File::USMARC;
4 # Copyright 2000-2002 Katipo Communications
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it under the
9 # terms of the GNU General Public License as published by the Free Software
10 # Foundation; either version 2 of the License, or (at your option) any later
13 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along with
18 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 # Suite 330, Boston, MA 02111-1307 USA
23 MARC::File::USMARC - USMARC-specific file handling
30 use vars qw( $VERSION $ERROR );
40 our $VERSION = '0.93';
43 our @ISA = qw( MARC::File );
45 use MARC::Record qw( LEADER_LEN );
46 use constant SUBFIELD_INDICATOR => "\x1F";
47 use constant END_OF_FIELD => "\x1E";
48 use constant END_OF_RECORD => "\x1D";
49 use constant DIRECTORY_ENTRY_LEN => 12;
53 use MARC::File::USMARC;
55 my $file = MARC::File::USMARC::in( $filename );
57 while ( my $marc = $file->next() ) {
71 Internal function to get the next raw record out of a file.
83 read( $fh, $reclen, 5 )
84 or return $self->_gripe( "Error reading record length: $!" );
87 or return $self->_gripe( "Invalid record length \"$reclen\"" );
89 read( $fh, substr($usmarc,5), $reclen-5 )
90 or return $self->_gripe( "Error reading $reclen byte record: $!" );
93 } elsif (defined($self->{data})) {
94 my $data=$self->{data};
95 my $pointer=$self->{pointer};
97 $reclen=substr($data,$pointer,5);
99 or return $self->_gripe( "Invalid record length \"$reclen\"" );
100 my $usmarc=substr($data,$pointer,$reclen);
101 $self->{pointer}=$pointer+$reclen;
108 Constructor for handling data from a USMARC file. This function takes care of all
109 the tag directory parsing & mangling.
111 Any warnings or coercions can be checked in the C<warnings()> function.
117 $text = shift if (ref($text)||$text) =~ /^MARC::File/;
119 my $marc = MARC::Record->new();
121 # Check for an all-numeric record length
122 ($text =~ /^(\d{5})/)
123 or return $marc->_gripe( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric" );
126 ($reclen == length($text))
127 or return $marc->_gripe( "Invalid record length: Leader says $reclen bytes, but it's actually ", length( $text ) );
129 $marc->leader( substr( $text, 0, LEADER_LEN ) );
130 my @fields = split( END_OF_FIELD, substr( $text, LEADER_LEN ) );
131 my $dir = shift @fields or return _gripe( "No directory found" );
133 (length($dir) % 12 == 0)
134 or return $marc->_gripe( "Invalid directory length" );
135 my $nfields = length($dir)/12;
137 my $finalfield = pop @fields;
138 # Check for the record terminator, and ignore it
139 ($finalfield eq END_OF_RECORD)
140 or $marc->_warn( "Invalid record terminator: \"$finalfield\"" );
142 # Walk thru the directories, and shift off the fields while we're at it
143 # Shouldn't be any non-digits anywhere in any directory entry
144 my @directory = unpack( "A3 A4 A5" x $nfields, $dir );
145 my @bad = grep /\D/, @directory;
147 return $marc->_gripe( "Non-numeric entries in the tag directory: ", join( ", ", map { "\"$_\"" } @bad ) );
150 my $databytesused = 0;
151 while ( @directory ) {
152 my $tagno = shift @directory;
153 my $len = shift @directory;
154 my $offset = shift @directory;
155 my $tagdata = shift @fields;
157 # Check directory validity
158 ($tagno =~ /^\d\d\d$/)
159 or return $marc->_gripe( "Invalid field number in directory: \"$tagno\"" );
161 ($len == length($tagdata) + 1)
162 or $marc->_warn( "Invalid length in the directory for tag $tagno" );
164 ($offset == $databytesused)
165 or $marc->_warn( "Directory offsets are out of whack" );
166 $databytesused += $len;
169 $marc->add_fields( $tagno, $tagdata )
170 or return undef; # We're relying on add_fields() having set $MARC::Record::ERROR
172 my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
173 my $indicators = shift @subfields
174 or return $marc->_gripe( "No subfields found." );
176 if ( $indicators =~ /^([0-9 ])([0-9 ])$/ ) {
177 ($ind1,$ind2) = ($1,$2);
179 $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks\n" );
180 ($ind1,$ind2) = (" "," ");
183 # Split the subfield data into subfield name and data pairs
184 my @subfield_data = map { (substr($_,0,1),substr($_,1)) } @subfields;
185 $marc->add_fields( $tagno, $ind1, $ind2, @subfield_data )
190 # Once we're done, there shouldn't be any fields left over: They should all have shifted off.
192 or return $marc->_gripe( "I've got leftover fields that weren't in the directory" );
197 =head2 update_leader()
199 If any changes get made to the MARC record, the first 5 bytes of the
200 leader (the length) will be invalid. This function updates the
201 leader with the correct length of the record as it would be if
202 written out to a file.
206 sub update_leader() {
209 my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
211 $self->_set_leader_lengths( $reclen, $baseaddress );
214 =head2 _build_tag_directory()
216 Function for internal use only: Builds the tag directory that gets
217 put in front of the data in a MARC record.
219 Returns two array references, and two lengths: The tag directory, and the data fields themselves,
220 the length of all data (including the Leader that we expect will be added),
221 and the size of the Leader and tag directory.
225 sub _build_tag_directory {
227 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
228 die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
234 for my $field ( $marc->fields() ) {
235 # Dump data into proper format
236 my $str = $field->as_usmarc;
237 push( @fields, $str );
239 # Create directory entry
240 my $len = length $str;
241 my $direntry = sprintf( "%03d%04d%05d", $field->tag, $len, $dataend );
242 push( @directory, $direntry );
247 LEADER_LEN + # better be 24
248 ( @directory * DIRECTORY_ENTRY_LEN ) +
249 # all the directory entries
250 1; # end-of-field marker
254 $baseaddress + # stuff before first field
255 $dataend + # Length of the fields
256 1; # End-of-record marker
260 return (\@fields, \@directory, $total, $baseaddress);
265 Returns a string of characters suitable for writing out to a USMARC file,
266 including the leader, directory and all the fields.
272 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
274 my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
275 $marc->set_leader_lengths( $reclen, $baseaddress );
277 # Glomp it all together
278 return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
285 =head1 RELATED MODULES
291 Make some sort of autodispatch so that you don't have to explicitly
292 specify the MARC::File::X subclass, sort of like how DBI knows to
293 use DBD::Oracle or DBD::Mysql.
297 This code may be distributed under the same terms as Perl itself.
299 Please note that these modules are not products of or supported by the
300 employers of the various contributors to the code.
304 Andy Lester, E<lt>marc@petdance.comE<gt> or E<lt>alester@flr.follett.comE<gt>