2 # small script that import an iso2709 file into koha 2.0
7 # find Koha's Perl modules
8 # test carefully before changing this
10 eval { require "$FindBin::Bin/../kohalib.pl" };
14 use MARC::File::USMARC;
15 # Uncomment the line below and use MARC::File::XML again when it works better.
17 # use MARC::File::XML;
22 # According to kados, an undocumented feature of setting MARC::Charset to
23 # ignore_errors(1) is that errors are not ignored. Instead of deleting the
24 # whole subfield when a character does not translate properly from MARC8 into
25 # UTF-8, just the problem characters are deleted. This should solve at least
26 # some of the fixme problems for fMARC8ToUTF8().
28 # Problems remain if there are MARC 21 records where 000/09 is set incorrectly.
30 # MARC::Charset->ignore_errors(1);
34 use Time::HiRes qw(gettimeofday);
36 binmode(STDOUT, ":utf8");
40 my ( $input_marc_file, $number) = ('',0);
41 my ($version, $delete, $test_parameter,$char_encoding, $verbose, $commit,$fk_off);
46 'commit:f' => \$commit,
47 'file:s' => \$input_marc_file,
51 't' => \$test_parameter,
52 'c:s' => \$char_encoding,
57 # FIXME: Management of error conditions needed for record parsing problems
58 # and MARC8 character sets with mappings to Unicode not yet included in
59 # MARC::Charset. The real world rarity of these problems is not fully tested.
60 # Unmapped character sets will throw a warning currently and processing will
61 # continue with the error condition. A fairly trivial correction should
62 # address some record parsing and unmapped character set problems but I need
63 # time to implement a test and correction for undef subfields and revert to
64 # MARC8 if mappings are missing. -- thd
65 sub fMARC8ToUTF8($$) {
67 my ($verbose) = shift;
70 my $leader = $record->leader();
72 print "\n000 " . $leader;
75 foreach my $field ($record->fields()) {
76 if ($field->is_control_field()) {
79 my $fieldName = $field->tag();
80 my $fieldValue = $field->data();
81 $fieldValue =~ s/ /#/g;
82 print "\n" . $fieldName;
83 print ' ' . $fieldValue;
88 my $fieldName = $field->tag();
89 my $indicator1Value = $field->indicator(1);
90 my $indicator2Value = $field->indicator(2);
93 $indicator1Value =~ s/ /#/;
94 $indicator2Value =~ s/ /#/;
95 print "\n" . $fieldName . ' ' .
100 foreach my $subfield ($field->subfields()) {
101 my $subfieldName = $subfield->[0];
102 my $subfieldValue = $subfield->[1];
103 $subfieldValue = MARC::Charset::marc8_to_utf8($subfieldValue);
105 # Alas, MARC::Field::update() does not work correctly.
106 ## push (@subfieldsArray, $subfieldName, $subfieldValue);
108 push @subfieldsArray, [$subfieldName, $subfieldValue];
111 print " \$" . $subfieldName . ' ' . $subfieldValue;
116 # Alas, MARC::Field::update() does not work correctly.
118 # The first instance in the field of a of a repeated subfield
119 # overwrites the content from later instances with the content
120 # from the first instance.
121 ## $field->update(@subfieldsArray);
123 foreach my $subfieldRow(@subfieldsArray) {
124 my $subfieldName = $subfieldRow->[0];
125 $field->delete_subfields($subfieldName);
127 foreach my $subfieldRow(@subfieldsArray) {
128 $field->add_subfields(@$subfieldRow);
133 # Reading the indicator values again is not necessary.
134 # They were not converted.
135 # $indicator1Value = $field->indicator(1);
136 # $indicator2Value = $field->indicator(2);
137 # $indicator1Value =~ s/ /#/;
138 # $indicator2Value =~ s/ /#/;
139 print "\nCONVERTED TO UTF-8:\n" . $fieldName . ' ' .
142 foreach my $subfield ($field->subfields()) {
143 my $subfieldName = $subfield->[0];
144 my $subfieldValue = $subfield->[1];
145 print " \$" . $subfieldName . ' ' . $subfieldValue;
151 print "\n" if $verbose;
156 $record->encoding('UTF-8');
161 if ($version || ($input_marc_file eq '')) {
163 small script to import an iso2709 file into Koha.
165 \th : this version/help screen
166 \tfile /path/to/file/to/dump : the file to import
167 \tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
168 \tfk : Turn off foreign key checks during import.
169 \tn : the number of records to import. If missing, all the file is imported
170 \tcommit : the number of records to wait before performing a 'commit' operation
171 \tt : test mode : parses the file, saying what he would do, but doing nothing.
172 \tc : the characteristic MARC flavour. At the moment, only MARC21 and UNIMARC
173 \tsupported. MARC21 by default.
174 \td : delete EVERYTHING related to biblio in koha-DB before import :tables :
175 \t\tbiblio, \tbiblioitems,\titems
176 IMPORTANT : don't use this script before you've entered and checked your MARC parameters tables twice (or more!).
177 Otherwise, the import won't work correctly and you will get invalid data.
180 \t\$ export KOHA_CONF=/etc/koha.conf
181 \t\$ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 -file /home/jmf/koha.mrc -n 3000
187 my $dbh = C4::Context->dbh;
189 # save the CataloguingLog property : we don't want to log a bulkmarcimport. It will slow the import &
190 # will create problems in the action_logs table, that can't handle more than 1 entry per second per user.
191 my $CataloguingLog = C4::Context->preference('CataloguingLog');
192 $dbh->do("UPDATE systempreferences SET value=0 WHERE variable='CataloguingLog'");
195 print "deleting biblios\n";
196 $dbh->do("truncate biblio");
197 $dbh->do("truncate biblioitems");
198 $dbh->do("truncate items");
201 $dbh->do("SET FOREIGN_KEY_CHECKS = 0");
203 if ($test_parameter) {
204 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
207 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
209 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
211 my $starttime = gettimeofday;
212 my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
213 $batch->warnings_off();
214 $batch->strict_off();
220 $commitnum = $commit;
224 #1st of all, find item MARC tag.
225 my ($tagfield,$tagsubfield) = &GetMarcFromKohaField("items.itemnumber",'');
226 # $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write, stopwords write");
227 while ( my $record = $batch->next() ) {
228 # warn "=>".$record->as_formatted;
230 # warn "NUM:".$number;
233 print "\r$i" unless $i % 100;
235 # z3950_extended_services('commit',set_service_options('commit'));
236 # print "COMMIT OPERATION SUCCESSFUL\n";
238 # my $timeneeded = gettimeofday - $starttime;
239 # die "$i MARC records imported in $timeneeded seconds\n";
241 # # perform the commit operation ever so often
243 # z3950_extended_services('commit',set_service_options('commit'));
244 # $commit+=$commitnum;
245 # print "COMMIT OPERATION SUCCESSFUL\n";
247 #now, parse the record, extract the item fields, and store them in somewhere else.
249 ## create an empty record object to populate
250 my $newRecord = MARC::Record->new();
251 $newRecord->leader($record->leader());
253 # go through each field in the existing record
254 foreach my $oldField ( $record->fields() ) {
256 # just reproduce tags < 010 in our new record
258 # Fields are not necessarily only numeric in the actual world of records
259 # nor in what I would recommend for additonal safe non-interfering local
260 # use fields. The following regular expression match is much safer than
261 # a numeric evaluation. -- thd
262 if ( $oldField->tag() =~ m/^00/ ) {
263 $newRecord->append_fields( $oldField );
267 # store our new subfield data in this list
268 my @newSubfields = ();
270 # go through each subfield code/data pair
271 foreach my $pair ( $oldField->subfields() ) {
272 #$pair->[1] =~ s/\<//g;
273 #$pair->[1] =~ s/\>//g;
274 push( @newSubfields, $pair->[0], $pair->[1] ); #char_decode($pair->[1],$char_encoding) );
277 # add the new field to our new record
278 my $newField = MARC::Field->new(
280 $oldField->indicator(1),
281 $oldField->indicator(2),
285 $newRecord->append_fields( $newField );
289 warn "$i ==>".$newRecord->as_formatted() if $verbose eq 2;
290 my @fields = $newRecord->field($tagfield);
294 foreach my $field (@fields) {
295 my $item = MARC::Record->new();
296 $item->append_fields($field);
298 $newRecord->delete_field($field);
301 print "$i : $nbitems items found\n" if $verbose;
302 # now, create biblio and items with Addbiblio call.
304 unless ($test_parameter) {
305 my ( $bibid, $oldbibitemnum );
306 eval { ( $bibid, $oldbibitemnum ) = AddBiblio( $newRecord, '' ); };
309 warn "ERROR: Adding biblio $bibid failed\n" if $verbose
311 warn "ADDED biblio NB $bibid in DB\n" if $verbose;
312 for ( my $it = 0 ; $it <= $#items ; $it++ ) {
313 # FIXME - duplicate barcode check needs to become part of AddItem()
314 my $itemhash = TransformMarcToKoha($dbh, $items[$it]);
315 my $duplicate_barcode = exists($itemhash->{'barcode'}) && GetItemnumberFromBarcode($itemhash->{'barcode'});
316 if ($duplicate_barcode) {
317 warn "ERROR: cannot add item $itemhash->{'barcode'} for biblio $bibid: duplicate barcode\n" if $verbose;
319 eval { AddItem( $items[$it], $bibid, $oldbibitemnum ); };
320 warn "ERROR: Adding item $it, rec $i failed\n" if ($@);
325 last if $i == $number;
330 $dbh->do("SET FOREIGN_KEY_CHECKS = 1");
332 # final commit of the changes
333 #z3950_extended_services('commit',set_service_options('commit'));
334 #print "COMMIT OPERATION SUCCESSFUL\n";
336 # restore CataloguingLog
337 $dbh->do("UPDATE systempreferences SET value=$CataloguingLog WHERE variable='CataloguingLog'");
339 my $timeneeded = gettimeofday - $starttime;
340 print "$i MARC records done in $timeneeded seconds\n";