1 package MARC::File::USMARC;
4 # Copyright 2000-2002 Katipo Communications
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it under the
9 # terms of the GNU General Public License as published by the Free Software
10 # Foundation; either version 2 of the License, or (at your option) any later
13 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along with
18 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 # Suite 330, Boston, MA 02111-1307 USA
21 # FIXME - Fix the POD to conform to Perl style. In particular,
22 # functions get an =item, not a =head2.
26 MARC::File::USMARC - USMARC-specific file handling
33 use vars qw( $VERSION $ERROR );
43 our $VERSION = '0.93';
46 our @ISA = qw( MARC::File );
48 use MARC::Record qw( LEADER_LEN );
49 use constant SUBFIELD_INDICATOR => "\x1F";
50 use constant END_OF_FIELD => "\x1E";
51 use constant END_OF_RECORD => "\x1D";
52 use constant DIRECTORY_ENTRY_LEN => 12;
56 use MARC::File::USMARC;
58 my $file = MARC::File::USMARC::in( $filename );
60 while ( my $marc = $file->next() ) {
74 Internal function to get the next raw record out of a file.
86 read( $fh, $reclen, 5 )
87 or return $self->_gripe( "Error reading record length: $!" );
90 or return $self->_gripe( "Invalid record length \"$reclen\"" );
92 read( $fh, substr($usmarc,5), $reclen-5 )
93 or return $self->_gripe( "Error reading $reclen byte record: $!" );
96 } elsif (defined($self->{data})) {
97 my $data=$self->{data};
98 my $pointer=$self->{pointer};
100 $reclen=substr($data,$pointer,5);
102 or return $self->_gripe( "Invalid record length \"$reclen\"" );
103 my $usmarc=substr($data,$pointer,$reclen);
104 $self->{pointer}=$pointer+$reclen;
111 Constructor for handling data from a USMARC file. This function takes care of all
112 the tag directory parsing & mangling.
114 Any warnings or coercions can be checked in the C<warnings()> function.
120 $text = shift if (ref($text)||$text) =~ /^MARC::File/;
122 my $marc = MARC::Record->new();
124 # Check for an all-numeric record length
125 ($text =~ /^(\d{5})/)
126 or return $marc->_gripe( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric" );
129 ($reclen == length($text))
130 or return $marc->_gripe( "Invalid record length: Leader says $reclen bytes, but it's actually ", length( $text ) );
132 $marc->leader( substr( $text, 0, LEADER_LEN ) );
133 my @fields = split( END_OF_FIELD, substr( $text, LEADER_LEN ) );
134 my $dir = shift @fields or return _gripe( "No directory found" );
136 (length($dir) % 12 == 0)
137 or return $marc->_gripe( "Invalid directory length" );
138 my $nfields = length($dir)/12;
140 my $finalfield = pop @fields;
141 # Check for the record terminator, and ignore it
142 ($finalfield eq END_OF_RECORD)
143 or $marc->_warn( "Invalid record terminator: \"$finalfield\"" );
145 # Walk thru the directories, and shift off the fields while we're at it
146 # Shouldn't be any non-digits anywhere in any directory entry
147 my @directory = unpack( "A3 A4 A5" x $nfields, $dir );
148 my @bad = grep /\D/, @directory;
150 return $marc->_gripe( "Non-numeric entries in the tag directory: ", join( ", ", map { "\"$_\"" } @bad ) );
153 my $databytesused = 0;
154 while ( @directory ) {
155 my $tagno = shift @directory;
156 my $len = shift @directory;
157 my $offset = shift @directory;
158 my $tagdata = shift @fields;
160 # Check directory validity
161 ($tagno =~ /^\d\d\d$/)
162 or return $marc->_gripe( "Invalid field number in directory: \"$tagno\"" );
164 ($len == length($tagdata) + 1)
165 or $marc->_warn( "Invalid length in the directory for tag $tagno" );
167 ($offset == $databytesused)
168 or $marc->_warn( "Directory offsets are out of whack" );
169 $databytesused += $len;
172 $marc->add_fields( $tagno, $tagdata )
173 or return undef; # We're relying on add_fields() having set $MARC::Record::ERROR
175 my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
176 my $indicators = shift @subfields
177 or return $marc->_gripe( "No subfields found." );
179 if ( $indicators =~ /^([0-9 ])([0-9 ])$/ ) {
180 ($ind1,$ind2) = ($1,$2);
182 $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks\n" );
183 ($ind1,$ind2) = (" "," ");
186 # Split the subfield data into subfield name and data pairs
187 my @subfield_data = map { (substr($_,0,1),substr($_,1)) } @subfields;
188 $marc->add_fields( $tagno, $ind1, $ind2, @subfield_data )
193 # Once we're done, there shouldn't be any fields left over: They should all have shifted off.
195 or return $marc->_gripe( "I've got leftover fields that weren't in the directory" );
200 =head2 update_leader()
202 If any changes get made to the MARC record, the first 5 bytes of the
203 leader (the length) will be invalid. This function updates the
204 leader with the correct length of the record as it would be if
205 written out to a file.
209 sub update_leader() {
212 my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
214 $self->_set_leader_lengths( $reclen, $baseaddress );
217 =head2 _build_tag_directory()
219 Function for internal use only: Builds the tag directory that gets
220 put in front of the data in a MARC record.
222 Returns two array references, and two lengths: The tag directory, and the data fields themselves,
223 the length of all data (including the Leader that we expect will be added),
224 and the size of the Leader and tag directory.
228 sub _build_tag_directory {
230 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
231 die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
237 for my $field ( $marc->fields() ) {
238 # Dump data into proper format
239 my $str = $field->as_usmarc;
240 push( @fields, $str );
242 # Create directory entry
243 my $len = length $str;
244 my $direntry = sprintf( "%03d%04d%05d", $field->tag, $len, $dataend );
245 push( @directory, $direntry );
250 LEADER_LEN + # better be 24
251 ( @directory * DIRECTORY_ENTRY_LEN ) +
252 # all the directory entries
253 1; # end-of-field marker
257 $baseaddress + # stuff before first field
258 $dataend + # Length of the fields
259 1; # End-of-record marker
263 return (\@fields, \@directory, $total, $baseaddress);
268 Returns a string of characters suitable for writing out to a USMARC file,
269 including the leader, directory and all the fields.
275 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
277 my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
278 $marc->set_leader_lengths( $reclen, $baseaddress );
280 # Glomp it all together
281 return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
288 =head1 RELATED MODULES
294 Make some sort of autodispatch so that you don't have to explicitly
295 specify the MARC::File::X subclass, sort of like how DBI knows to
296 use DBD::Oracle or DBD::Mysql.
300 This code may be distributed under the same terms as Perl itself.
302 Please note that these modules are not products of or supported by the
303 employers of the various contributors to the code.
307 Andy Lester, E<lt>marc@petdance.comE<gt> or E<lt>alester@flr.follett.comE<gt>