2 # small script that import an iso2709 file into koha 2.0
8 use MARC::File::USMARC;
9 # Uncomment the line below and use MARC::File::XML again when it works better.
11 # use MARC::File::XML;
16 # According to kados, an undocumented feature of setting MARC::Charset to
17 # ignore_errors(1) is that errors are not ignored. Instead of deleting the
18 # whole subfield when a character does not translate properly from MARC8 into
19 # UTF-8, just the problem characters are deleted. This should solve at least
20 # some of the fixme problems for fMARC8ToUTF8().
22 # Problems remain if there are MARC 21 records where 000/09 is set incorrectly.
24 # MARC::Charset->ignore_errors(1);
28 use Time::HiRes qw(gettimeofday);
30 binmode(STDOUT, ":utf8");
34 my ( $input_marc_file, $number) = ('',0);
35 my ($version, $delete, $test_parameter,$char_encoding, $verbose, $commit,$fk_off);
40 'commit:f' => \$commit,
41 'file:s' => \$input_marc_file,
45 't' => \$test_parameter,
46 'c:s' => \$char_encoding,
51 # FIXME: Management of error conditions needed for record parsing problems
52 # and MARC8 character sets with mappings to Unicode not yet included in
53 # MARC::Charset. The real world rarity of these problems is not fully tested.
54 # Unmapped character sets will throw a warning currently and processing will
55 # continue with the error condition. A fairly trivial correction should
56 # address some record parsing and unmapped character set problems but I need
57 # time to implement a test and correction for undef subfields and revert to
58 # MARC8 if mappings are missing. -- thd
59 sub fMARC8ToUTF8($$) {
61 my ($verbose) = shift;
64 my $leader = $record->leader();
66 print "\n000 " . $leader;
69 foreach my $field ($record->fields()) {
70 if ($field->is_control_field()) {
73 my $fieldName = $field->tag();
74 my $fieldValue = $field->data();
75 $fieldValue =~ s/ /#/g;
76 print "\n" . $fieldName;
77 print ' ' . $fieldValue;
82 my $fieldName = $field->tag();
83 my $indicator1Value = $field->indicator(1);
84 my $indicator2Value = $field->indicator(2);
87 $indicator1Value =~ s/ /#/;
88 $indicator2Value =~ s/ /#/;
89 print "\n" . $fieldName . ' ' .
94 foreach my $subfield ($field->subfields()) {
95 my $subfieldName = $subfield->[0];
96 my $subfieldValue = $subfield->[1];
97 $subfieldValue = MARC::Charset::marc8_to_utf8($subfieldValue);
99 # Alas, MARC::Field::update() does not work correctly.
100 ## push (@subfieldsArray, $subfieldName, $subfieldValue);
102 push @subfieldsArray, [$subfieldName, $subfieldValue];
105 print " \$" . $subfieldName . ' ' . $subfieldValue;
110 # Alas, MARC::Field::update() does not work correctly.
112 # The first instance in the field of a of a repeated subfield
113 # overwrites the content from later instances with the content
114 # from the first instance.
115 ## $field->update(@subfieldsArray);
117 foreach my $subfieldRow(@subfieldsArray) {
118 my $subfieldName = $subfieldRow->[0];
119 $field->delete_subfields($subfieldName);
121 foreach my $subfieldRow(@subfieldsArray) {
122 $field->add_subfields(@$subfieldRow);
127 # Reading the indicator values again is not necessary.
128 # They were not converted.
129 # $indicator1Value = $field->indicator(1);
130 # $indicator2Value = $field->indicator(2);
131 # $indicator1Value =~ s/ /#/;
132 # $indicator2Value =~ s/ /#/;
133 print "\nCONVERTED TO UTF-8:\n" . $fieldName . ' ' .
136 foreach my $subfield ($field->subfields()) {
137 my $subfieldName = $subfield->[0];
138 my $subfieldValue = $subfield->[1];
139 print " \$" . $subfieldName . ' ' . $subfieldValue;
145 print "\n" if $verbose;
150 $record->encoding('UTF-8');
155 if ($version || ($input_marc_file eq '')) {
157 small script to import an iso2709 file into Koha.
159 \th : this version/help screen
160 \tfile /path/to/file/to/dump : the file to import
161 \tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
162 \tfk : Turn off foreign key checks during import.
163 \tn : the number of records to import. If missing, all the file is imported
164 \tcommit : the number of records to wait before performing a 'commit' operation
165 \tt : test mode : parses the file, saying what he would do, but doing nothing.
166 \tc : the characteristic MARC flavour. At the moment, only MARC21 and UNIMARC
167 \tsupported. MARC21 by default.
168 \td : delete EVERYTHING related to biblio in koha-DB before import :tables :
169 \t\tbiblio, \tbiblioitems,\titems
170 IMPORTANT : don't use this script before you've entered and checked your MARC parameters tables twice (or more!).
171 Otherwise, the import won't work correctly and you will get invalid data.
174 \t\$ export KOHA_CONF=/etc/koha.conf
175 \t\$ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 -file /home/jmf/koha.mrc -n 3000
181 my $dbh = C4::Context->dbh;
183 # save the CataloguingLog property : we don't want to log a bulkmarcimport. It will slow the import &
184 # will create problems in the action_logs table, that can't handle more than 1 entry per second per user.
185 my $CataloguingLog = C4::Context->preference('CataloguingLog');
186 $dbh->do("UPDATE systempreferences SET value=0 WHERE variable='CataloguingLog'");
189 print "deleting biblios\n";
190 $dbh->do("truncate biblio");
191 $dbh->do("truncate biblioitems");
192 $dbh->do("truncate items");
195 $dbh->do("SET FOREIGN_KEY_CHECKS = 0");
197 if ($test_parameter) {
198 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
201 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
203 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
205 my $starttime = gettimeofday;
206 my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
207 $batch->warnings_off();
208 $batch->strict_off();
214 $commitnum = $commit;
218 #1st of all, find item MARC tag.
219 my ($tagfield,$tagsubfield) = &GetMarcFromKohaField("items.itemnumber",'');
220 # $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write, stopwords write");
221 while ( my $record = $batch->next() ) {
222 # warn "=>".$record->as_formatted;
224 # warn "NUM:".$number;
227 print "\r$i" unless $i % 100;
229 # z3950_extended_services('commit',set_service_options('commit'));
230 # print "COMMIT OPERATION SUCCESSFUL\n";
232 # my $timeneeded = gettimeofday - $starttime;
233 # die "$i MARC records imported in $timeneeded seconds\n";
235 # # perform the commit operation ever so often
237 # z3950_extended_services('commit',set_service_options('commit'));
238 # $commit+=$commitnum;
239 # print "COMMIT OPERATION SUCCESSFUL\n";
241 #now, parse the record, extract the item fields, and store them in somewhere else.
243 ## create an empty record object to populate
244 my $newRecord = MARC::Record->new();
245 $newRecord->leader($record->leader());
247 # go through each field in the existing record
248 foreach my $oldField ( $record->fields() ) {
250 # just reproduce tags < 010 in our new record
252 # Fields are not necessarily only numeric in the actual world of records
253 # nor in what I would recommend for additonal safe non-interfering local
254 # use fields. The following regular expression match is much safer than
255 # a numeric evaluation. -- thd
256 if ( $oldField->tag() =~ m/^00/ ) {
257 $newRecord->append_fields( $oldField );
261 # store our new subfield data in this list
262 my @newSubfields = ();
264 # go through each subfield code/data pair
265 foreach my $pair ( $oldField->subfields() ) {
266 #$pair->[1] =~ s/\<//g;
267 #$pair->[1] =~ s/\>//g;
268 push( @newSubfields, $pair->[0], $pair->[1] ); #char_decode($pair->[1],$char_encoding) );
271 # add the new field to our new record
272 my $newField = MARC::Field->new(
274 $oldField->indicator(1),
275 $oldField->indicator(2),
279 $newRecord->append_fields( $newField );
283 warn "$i ==>".$newRecord->as_formatted() if $verbose eq 2;
284 my @fields = $newRecord->field($tagfield);
288 foreach my $field (@fields) {
289 my $item = MARC::Record->new();
290 $item->append_fields($field);
292 $newRecord->delete_field($field);
295 print "$i : $nbitems items found\n" if $verbose;
296 # now, create biblio and items with Addbiblio call.
298 unless ($test_parameter) {
299 my ( $bibid, $oldbibitemnum );
300 eval { ( $bibid, $oldbibitemnum ) = AddBiblio( $newRecord, '' ); };
303 warn "ERROR: Adding biblio $bibid failed\n" if $verbose
305 warn "ADDED biblio NB $bibid in DB\n" if $verbose;
306 for ( my $it = 0 ; $it <= $#items ; $it++ ) {
307 eval { AddItem( $items[$it], $bibid, $oldbibitemnum ); };
308 warn "ERROR: Adding item $it, rec $i failed\n" if ($@);
312 last if $i == $number;
317 $dbh->do("SET FOREIGN_KEY_CHECKS = 1");
319 # final commit of the changes
320 #z3950_extended_services('commit',set_service_options('commit'));
321 #print "COMMIT OPERATION SUCCESSFUL\n";
323 # restore CataloguingLog
324 $dbh->do("UPDATE systempreferences SET value=$CataloguingLog WHERE variable='CataloguingLog'");
326 my $timeneeded = gettimeofday - $starttime;
327 print "$i MARC records done in $timeneeded seconds\n";