From 3584c4426bbe7687f4bbcf5fe9658a85e4021add Mon Sep 17 00:00:00 2001 From: Henri-Damien LAURENT Date: Fri, 3 Sep 2010 12:16:52 +0200 Subject: [PATCH] Bug 5579: remove items from MARC bib This is a squash of four patches by Henri-Damien Laurent starting work on removing the copy of item record information in the 9XX field of bibliographic records. The reason for doing this is primarily to improve performance, in particular, the expense of having to add/modify the bib record whenever an item changes. Now, whenever an item changes, the bib record is put in the queue to be reindexed; when the bib is indexed, the 9XX fields are inserted into the version of the bib that Zebra indexes. Since rebuild_zebra.pl runs in a separate process, the processing of the bib record will not delay (e.g.) circulation. As part of upgrading to 3.4, the following batch script should be run: misc/maintenance/remove_items_from_biblioitems.pl --run This should be followed by a complete reindexing of the bib records, e.g., misc/migration_tools/rebuild_zebra.pl -b -r Signed-off-by: Galen Charlton Signed-off-by: Claire Hernandez Signed-off-by: Chris Cormack --- C4/Biblio.pm | 24 +-------- C4/Items.pm | 28 ++++------- .../remove_items_from_biblioitems.pl | 50 +++++++++++++++++++ misc/migration_tools/rebuild_zebra.pl | 31 ++++++++++-- 4 files changed, 89 insertions(+), 44 deletions(-) create mode 100644 misc/maintenance/remove_items_from_biblioitems.pl diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 7be1b3e22e..ff4a24ed38 100755 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -313,29 +313,6 @@ sub ModBiblio { $record->delete_field($field); } - # parse each item, and, for an unknown reason, re-encode each subfield - # if you don't do that, the record will have encoding mixed - # and the biblio will be re-encoded. - # strange, I (Paul P.) searched more than 1 day to understand what happends - # but could only solve the problem this way... - my @fields = $oldRecord->field($itemtag); - foreach my $fielditem (@fields) { - my $field; - foreach ( $fielditem->subfields() ) { - # re-encode the subfield only if it isn't already in utf-8. - my ($tag, $value) = @$_; - $tag = Encode::encode('utf-8', $tag) unless utf8::is_utf8($tag); - $value = Encode::encode('utf-8', $value) unless utf8::is_utf8($value); - - if ($field) { - $field->add_subfields( $tag => $value ); - } else { - $field = MARC::Field->new( "$itemtag", '', '', $tag => $value ); - } - } - $record->append_fields($field); - } - foreach my $field ($record->fields()) { if (! $field->is_control_field()) { if (scalar($field->subfields()) == 0) { @@ -1067,6 +1044,7 @@ sub GetMarcBiblio { if ($marcxml) { $record = eval { MARC::Record::new_from_xml( $marcxml, "utf8", C4::Context->preference('marcflavour') ) }; if ($@) { warn " problem with :$biblionumber : $@ \n$marcxml"; } + return unless $record; # $record = MARC::Record::new_from_usmarc( $marc) if $marc; return $record; diff --git a/C4/Items.pm b/C4/Items.pm index 01509c2974..2da21cdb89 100644 --- a/C4/Items.pm +++ b/C4/Items.pm @@ -265,6 +265,7 @@ sub AddItem { # create MARC tag representing item and add to bib my $new_item_marc = _marc_from_item_hash($item, $frameworkcode, $unlinked_item_subfields); _add_item_field_to_biblio($new_item_marc, $item->{'biblionumber'}, $frameworkcode ); + #_add_item_field_to_biblio($new_item_marc, $item->{'biblionumber'}, $frameworkcode ); logaction("CATALOGUING", "ADD", $itemnumber, "item") if C4::Context->preference("CataloguingLog"); @@ -370,7 +371,7 @@ sub AddItemBatchFromMarc { } # update the MARC biblio - $biblionumber = ModBiblioMarc( $record, $biblionumber, $frameworkcode ); + # $biblionumber = ModBiblioMarc( $record, $biblionumber, $frameworkcode ); return (\@itemnumbers, \@errors); } @@ -515,7 +516,7 @@ sub ModItem { my $new_item_marc = _marc_from_item_hash($whole_item, $frameworkcode, $unlinked_item_subfields) or die "FAILED _marc_from_item_hash($whole_item, $frameworkcode)"; - _replace_item_field_in_biblio($new_item_marc, $biblionumber, $itemnumber, $frameworkcode); + #_replace_item_field_in_biblio($new_item_marc, $biblionumber, $itemnumber, $frameworkcode); ($new_item_marc eq '0') and die "$new_item_marc is '0', not hashref"; # logaction line would crash anyway logaction("CATALOGUING", "MODIFY", $itemnumber, $new_item_marc->as_formatted) if C4::Context->preference("CataloguingLog"); } @@ -578,23 +579,13 @@ sub DelItem { # get the MARC record my $record = GetMarcBiblio($biblionumber); - my $frameworkcode = GetFrameworkCode($biblionumber); + ModZebra( $biblionumber, "specialUpdate", "biblioserver", undef, undef ); # backup the record my $copy2deleted = $dbh->prepare("UPDATE deleteditems SET marc=? WHERE itemnumber=?"); $copy2deleted->execute( $record->as_usmarc(), $itemnumber ); #search item field code - my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",$frameworkcode); - my @fields = $record->field($itemtag); - - # delete the item specified - foreach my $field (@fields) { - if ( $field->subfield($itemsubfield) eq $itemnumber ) { - $record->delete_field($field); - } - } - &ModBiblioMarc( $record, $biblionumber, $frameworkcode ); logaction("CATALOGUING", "DELETE", $itemnumber, "item") if C4::Context->preference("CataloguingLog"); } @@ -2119,16 +2110,16 @@ sub MoveItemFromBiblio { } # Saving the modification - ModBiblioMarc($record, $frombiblio, $frameworkcode); + #ModBiblioMarc($record, $frombiblio, $frameworkcode); # Getting the record we want to move the item to - $record = GetMarcBiblio($tobiblio); + #$record = GetMarcBiblio($tobiblio); # Inserting the previously saved item - $record->insert_fields_ordered($item); + #$record->insert_fields_ordered($item); # Saving the modification - ModBiblioMarc($record, $tobiblio, $frameworkcode); + #ModBiblioMarc($record, $tobiblio, $frameworkcode); } else { return undef; @@ -2212,6 +2203,7 @@ sub _koha_modify_item { $error.="ERROR in _koha_modify_item $query".$dbh->errstr; warn $error; } + ModZebra( $item->{biblionumber}, "specialUpdate", "biblioserver", undef, undef ); return ($item->{'itemnumber'},$error); } @@ -2355,7 +2347,7 @@ sub _replace_item_field_in_biblio { } # save the record - ModBiblioMarc($completeRecord, $biblionumber, $frameworkcode); + #ModBiblioMarc($completeRecord, $biblionumber, $frameworkcode); } =head2 _repack_item_errors diff --git a/misc/maintenance/remove_items_from_biblioitems.pl b/misc/maintenance/remove_items_from_biblioitems.pl new file mode 100644 index 0000000000..1a50f629de --- /dev/null +++ b/misc/maintenance/remove_items_from_biblioitems.pl @@ -0,0 +1,50 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use C4::Context; +use C4::Biblio; +use Getopt::Long; + +my ($wherestring,$run,$want_help); +my $result = GetOptions( + 'where:s' => \$wherestring, + '--run' => \$run, + 'help|h' => \$want_help, +); +if ( not $result or $want_help ) { + print_usage(); + exit 0; +} + + +my $dbh=C4::Context->dbh; +my $querysth=qq{SELECT biblionumber from biblioitems }; +$querysth.=" WHERE $wherestring " if ($wherestring); +my $query=$dbh->prepare($querysth); + +$query->execute; +while (my $biblionumber=$query->fetchrow){ + my $record=GetMarcBiblio($biblionumber); + + if ($record){ + ModBiblio($record,$biblionumber,GetFrameworkCode($biblionumber)) ; + } + else { + print "error in $biblionumber : can't parse biblio"; + } +} +sub print_usage { + print <<_USAGE_; +$0: removes items from selected biblios + + +Parameters: + -where use this to limit modifications to some biblios + --run run the command + --help or -h show this message. +_USAGE_ +} + +# diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 682c280e1d..74c73a1460 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -9,6 +9,7 @@ use File::Temp qw/ tempdir /; use File::Path; use C4::Biblio; use C4::AuthoritiesMarc; +use C4::Items; # # script that checks zebradir structure & create directories & mandatory files if needed @@ -310,6 +311,7 @@ sub export_marc_records_from_sth { my $num_exported = 0; open (OUT, ">:utf8 ", "$directory/exported_records") or die $!; my $i = 0; + my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",''); while (my ($record_number) = $sth->fetchrow_array) { print "." if ( $verbose_logging ); print "\r$i" unless ($i++ %100 or !$verbose_logging); @@ -317,6 +319,29 @@ sub export_marc_records_from_sth { my $marcxml = $record_type eq 'biblio' ? GetXmlBiblio( $record_number ) : GetAuthorityXML( $record_number ); + if ($record_type eq 'biblio'){ + #CALL sub ProcessItems + my @items=GetItemsInfo($record_number,'intra',30); + if (@items){ + my $record=MARC::Record->new; + my @itemsrecord; + foreach my $item (@items){ + my $record=Item2Marc($item, $record_number); + push @itemsrecord, $record->field($itemtag); + #if xml then print itemfield as xml + # and update marcxml + # else push field + } + $record->insert_fields_ordered(@itemsrecord); + my $itemsxml=$record->as_xml_record(); + my $searchstring='\n'; + my $index=index($itemsxml,'\n',0); + $itemsxml=substr($itemsxml,$index+length($searchstring)); + $searchstring=''; + $marcxml=substr($marcxml,0,index($marcxml,$searchstring)); + $marcxml.=$itemsxml; + } + } if ( $marcxml ) { print OUT $marcxml if $marcxml; $num_exported++; @@ -330,7 +355,7 @@ sub export_marc_records_from_sth { # strung together with no single root element. zebraidx doesn't seem # to care, though, at least if you're using the GRS-1 filter. It does # care if you're using the DOM filter, which requires valid XML file(s). - print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc(); + print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc(); $num_exported++; } } @@ -358,7 +383,7 @@ sub export_marc_records_from_list { # strung together with no single root element. zebraidx doesn't seem # to care, though, at least if you're using the GRS-1 filter. It does # care if you're using the DOM filter, which requires valid XML file(s). - print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc(); + print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc(); $num_exported++; } } @@ -387,7 +412,7 @@ sub generate_deleted_marc_records { fix_unimarc_100($marc); } - print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc(); + print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference("marcflavour")) : $marc->as_usmarc(); $num_exported++; } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); -- 2.20.1