2 # small script that import an iso2709 file into koha 2.0
8 use MARC::File::USMARC;
9 # Uncomment the line below and use MARC::File::XML again when it works better.
11 # use MARC::File::XML;
16 # According to kados, an undocumented feature of setting MARC::Charset to
17 # ignore_errors(1) is that errors are not ignored. Instead of deleting the
18 # whole subfield when a character does not translate properly from MARC8 into
19 # UTF-8, just the problem characters are deleted. This should solve at least
20 # some of the fixme problems for fMARC8ToUTF8().
22 # Problems remain if there are MARC 21 records where 000/09 is set incorrectly.
24 # MARC::Charset->ignore_errors(1);
28 use Time::HiRes qw(gettimeofday);
30 binmode(STDOUT, ":utf8");
34 my ( $input_marc_file, $number) = ('',0);
35 my ($version, $delete, $test_parameter,$char_encoding, $verbose, $commit,$fk_off);
40 'commit:f' => \$commit,
41 'file:s' => \$input_marc_file,
45 't' => \$test_parameter,
46 'c:s' => \$char_encoding,
51 # FIXME: Management of error conditions needed for record parsing problems
52 # and MARC8 character sets with mappings to Unicode not yet included in
53 # MARC::Charset. The real world rarity of these problems is not fully tested.
54 # Unmapped character sets will throw a warning currently and processing will
55 # continue with the error condition. A fairly trivial correction should
56 # address some record parsing and unmapped character set problems but I need
57 # time to implement a test and correction for undef subfields and revert to
58 # MARC8 if mappings are missing. -- thd
59 sub fMARC8ToUTF8($$) {
61 my ($verbose) = shift;
64 my $leader = $record->leader();
66 print "\n000 " . $leader;
69 foreach my $field ($record->fields()) {
70 if ($field->is_control_field()) {
73 my $fieldName = $field->tag();
74 my $fieldValue = $field->data();
75 $fieldValue =~ s/ /#/g;
76 print "\n" . $fieldName;
77 print ' ' . $fieldValue;
82 my $fieldName = $field->tag();
83 my $indicator1Value = $field->indicator(1);
84 my $indicator2Value = $field->indicator(2);
87 $indicator1Value =~ s/ /#/;
88 $indicator2Value =~ s/ /#/;
89 print "\n" . $fieldName . ' ' .
94 foreach my $subfield ($field->subfields()) {
95 my $subfieldName = $subfield->[0];
96 my $subfieldValue = $subfield->[1];
97 $subfieldValue = MARC::Charset::marc8_to_utf8($subfieldValue);
99 # Alas, MARC::Field::update() does not work correctly.
100 ## push (@subfieldsArray, $subfieldName, $subfieldValue);
102 push @subfieldsArray, [$subfieldName, $subfieldValue];
105 print " \$" . $subfieldName . ' ' . $subfieldValue;
110 # Alas, MARC::Field::update() does not work correctly.
112 # The first instance in the field of a of a repeated subfield
113 # overwrites the content from later instances with the content
114 # from the first instance.
115 ## $field->update(@subfieldsArray);
117 foreach my $subfieldRow(@subfieldsArray) {
118 my $subfieldName = $subfieldRow->[0];
119 $field->delete_subfields($subfieldName);
121 foreach my $subfieldRow(@subfieldsArray) {
122 $field->add_subfields(@$subfieldRow);
127 # Reading the indicator values again is not necessary.
128 # They were not converted.
129 # $indicator1Value = $field->indicator(1);
130 # $indicator2Value = $field->indicator(2);
131 # $indicator1Value =~ s/ /#/;
132 # $indicator2Value =~ s/ /#/;
133 print "\nCONVERTED TO UTF-8:\n" . $fieldName . ' ' .
136 foreach my $subfield ($field->subfields()) {
137 my $subfieldName = $subfield->[0];
138 my $subfieldValue = $subfield->[1];
139 print " \$" . $subfieldName . ' ' . $subfieldValue;
145 print "\n" if $verbose;
150 $record->encoding('UTF-8');
155 if ($version || ($input_marc_file eq '')) {
157 small script to import an iso2709 file into Koha.
159 \th : this version/help screen
160 \tfile /path/to/file/to/dump : the file to dump
161 \tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
162 \tfk : Turn off foreign key checks during import.
163 \tn : the number of records to import. If missing, all the file is imported
164 \tcommit : the number of records to wait before performing a 'commit' operation
165 \tt : test mode : parses the file, saying what he would do, but doing nothing.
166 \tc : the characteristic MARC flavour. At the moment, only MARC21 and UNIMARC
167 \tsupported. MARC21 by default.
168 \td : delete EVERYTHING related to biblio in koha-DB before import :tables :
169 \t\tbiblio, \t\tbiblioitems, \t\tsubjects,\titems
171 \t\tmarc_subfield_table, \tmarc_word, \t\tmarc_blob_subfield
172 IMPORTANT : don't use this script before you've entered and checked your MARC parameters tables twice (or more!).
173 Otherwise, the import won't work correctly and you will get invalid data.
176 \t\$ export KOHA_CONF=/etc/koha.conf
177 \t\$ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 -file /home/jmf/koha.mrc -n 3000
183 my $dbh = C4::Context->dbh;
186 print "deleting biblios\n";
187 $dbh->do("truncate biblio");
188 $dbh->do("truncate biblioitems");
189 $dbh->do("truncate items");
192 $dbh->do("SET FOREIGN_KEY_CHECKS = 0");
194 if ($test_parameter) {
195 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
198 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
200 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
202 my $starttime = gettimeofday;
203 my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
204 $batch->warnings_off();
205 $batch->strict_off();
211 $commitnum = $commit;
215 #1st of all, find item MARC tag.
216 my ($tagfield,$tagsubfield) = &GetMarcFromKohaField("items.itemnumber",'');
217 # $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write, stopwords write");
218 while ( my $record = $batch->next() ) {
219 # warn "=>".$record->as_formatted;
221 # warn "NUM:".$number;
224 print "\r$i" unless $i % 100;
226 # z3950_extended_services('commit',set_service_options('commit'));
227 # print "COMMIT OPERATION SUCCESSFUL\n";
229 # my $timeneeded = gettimeofday - $starttime;
230 # die "$i MARC records imported in $timeneeded seconds\n";
232 # # perform the commit operation ever so often
234 # z3950_extended_services('commit',set_service_options('commit'));
235 # $commit+=$commitnum;
236 # print "COMMIT OPERATION SUCCESSFUL\n";
238 #now, parse the record, extract the item fields, and store them in somewhere else.
240 ## create an empty record object to populate
241 my $newRecord = MARC::Record->new();
242 $newRecord->leader($record->leader());
244 # go through each field in the existing record
245 foreach my $oldField ( $record->fields() ) {
247 # just reproduce tags < 010 in our new record
249 # Fields are not necessarily only numeric in the actual world of records
250 # nor in what I would recommend for additonal safe non-interfering local
251 # use fields. The following regular expression match is much safer than
252 # a numeric evaluation. -- thd
253 if ( $oldField->tag() =~ m/^00/ ) {
254 $newRecord->append_fields( $oldField );
258 # store our new subfield data in this list
259 my @newSubfields = ();
261 # go through each subfield code/data pair
262 foreach my $pair ( $oldField->subfields() ) {
263 #$pair->[1] =~ s/\<//g;
264 #$pair->[1] =~ s/\>//g;
265 push( @newSubfields, $pair->[0], $pair->[1] ); #char_decode($pair->[1],$char_encoding) );
268 # add the new field to our new record
269 my $newField = MARC::Field->new(
271 $oldField->indicator(1),
272 $oldField->indicator(2),
276 $newRecord->append_fields( $newField );
280 warn "$i ==>".$newRecord->as_formatted() if $verbose eq 2;
281 my @fields = $newRecord->field($tagfield);
285 foreach my $field (@fields) {
286 my $item = MARC::Record->new();
287 $item->append_fields($field);
289 $newRecord->delete_field($field);
292 print "$i : $nbitems items found\n" if $verbose;
293 # now, create biblio and items with Addbiblio call.
294 unless ($test_parameter) {
295 warn "NEWREC : ".$newRecord->as_formatted;
296 my ($bibid,$oldbibitemnum) = AddBiblio($newRecord,'');
297 warn "ADDED biblio NB $bibid in DB\n" if $verbose;
298 for (my $i=0;$i<=$#items;$i++) {
299 # warn "here is the biblioitemnumber $oldbibitemnum";
300 AddItem($items[$i],$bibid,$oldbibitemnum);
305 $dbh->do("SET FOREIGN_KEY_CHECKS = 1");
307 # final commit of the changes
308 #z3950_extended_services('commit',set_service_options('commit'));
309 #print "COMMIT OPERATION SUCCESSFUL\n";
311 my $timeneeded = gettimeofday - $starttime;
312 print "$i MARC records done in $timeneeded seconds\n";