From ca201e36afb70782dde4463a33856d1e54630686 Mon Sep 17 00:00:00 2001 From: tipaul Date: Thu, 10 May 2007 14:45:15 +0000 Subject: [PATCH] Koha NoZebra : - support for authorities - some bugfixes in ordering and "CCL" parsing - support for authorities <=> biblios walking Seems I can do what I want now, so I consider its done, except for bugfixes that will be needed i m sure ! --- C4/AuthoritiesMarc.pm | 469 +++++++++++++++--------- C4/Biblio.pm | 123 ++++--- C4/Search.pm | 78 ++-- authorities/authorities.pl | 1 - misc/migration_tools/rebuild_nozebra.pl | 124 ++++++- misc/migration_tools/rebuild_zebra.pl | 2 +- opac/opac-rss.pl | 2 +- 7 files changed, 525 insertions(+), 274 deletions(-) diff --git a/C4/AuthoritiesMarc.pm b/C4/AuthoritiesMarc.pm index 9e670ea42f..bbde916956 100644 --- a/C4/AuthoritiesMarc.pm +++ b/C4/AuthoritiesMarc.pm @@ -92,128 +92,221 @@ returns ref to array result and count of results returned =cut sub SearchAuthorities { - my ($tags, $and_or, $excluding, $operator, $value, $offset,$length,$authtypecode,$sortby) = @_; - my $dbh=C4::Context->dbh; - my $query; - my $attr; - # the marclist may contain "mainentry". In this case, search the tag_to_report, that depends on - # the authtypecode. Then, search on $a of this tag_to_report - # also store main entry MARC tag, to extract it at end of search - my $mainentrytag; - ##first set the authtype search and may be multiple authorities - my $n=0; - my @authtypecode; - my @auths=split / /,$authtypecode ; - foreach my $auth (@auths){ - $query .=" \@attr 1=Authority/format-id \@attr 5=100 ".$auth; ##No truncation on authtype - push @authtypecode ,$auth; - $n++; - } - if ($n>1){ - $query= "\@or ".$query; - } - - my $dosearch; - my $and; - my $q2; - for(my $i = 0 ; $i <= $#{$value} ; $i++) - { - if (@$value[$i]){ - ##If mainentry search $a tag - if (@$tags[$i] eq "mainmainentry") { - $attr =" \@attr 1=Heading "; - }elsif (@$tags[$i] eq "mainentry") { - $attr =" \@attr 1=Heading-Entity "; - }else{ - $attr =" \@attr 1=Any "; + my ($tags, $and_or, $excluding, $operator, $value, $offset,$length,$authtypecode,$sortby) = @_; +# warn "CALL : $tags, $and_or, $excluding, $operator, $value, $offset,$length,$authtypecode,$sortby"; + my $dbh=C4::Context->dbh; + if (C4::Context->preference('NoZebra')) { + + # + # build the query + # + my $query; + my @auths=split / /,$authtypecode ; + foreach my $auth (@auths){ + $query .="AND auth_type= $auth "; } - if (@$operator[$i] eq 'is') { - $attr.=" \@attr 4=1 \@attr 5=100 ";##Phrase, No truncation,all of subfield field must match - }elsif (@$operator[$i] eq "="){ - $attr.=" \@attr 4=107 "; #Number Exact match - }elsif (@$operator[$i] eq "start"){ - $attr.=" \@attr 4=1 \@attr 5=1 ";#Phrase, Right truncated + $query =~ s/^AND //; + my $dosearch; + for(my $i = 0 ; $i <= $#{$value} ; $i++) + { + if (@$value[$i]){ + if (@$tags[$i] eq "mainmainentry") { + $query .=" AND mainmainentry"; + }elsif (@$tags[$i] eq "mainentry") { + $query .=" AND mainentry"; + } else { + $query .=" AND "; + } + if (@$operator[$i] eq 'is') { + $query.=(@$tags[$i]?"=":""). '"'.@$value[$i].'"'; + }elsif (@$operator[$i] eq "="){ + $query.=(@$tags[$i]?"=":""). '"'.@$value[$i].'"'; + }elsif (@$operator[$i] eq "start"){ + $query.=(@$tags[$i]?"=":"").'"'.@$value[$i].'%"'; + } else { + $query.=(@$tags[$i]?"=":"").'"'.@$value[$i].'%"'; + } + $dosearch=1; + }#if value + } + # + # do the query (if we had some search term + # + if ($dosearch) { +# warn "QUERY : $query"; + my $result = C4::Search::NZanalyse($query,'authorityserver'); +# warn "result : $result"; + my %result; + foreach (split /;/,$result) { + my ($authid,$title) = split /,/,$_; + # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title + # and we don't want to get only 1 result for each of them !!! + # hint & speed improvement : we can order without reading the record + # so order, and read records only for the requested page ! + $result{$title.$authid}=$authid; + } + # sort the hash and return the same structure as GetRecords (Zebra querying) + my @finalresult = (); + my $numbers=0; + if ($sortby eq 'HeadingDsc') { # sort by mainmainentry desc + foreach my $key (sort {$b cmp $a} (keys %result)) { + push @finalresult, $result{$key}; +# warn "push..."$#finalresult; + $numbers++; + } + } else { # sort by mainmainentry ASC + foreach my $key (sort (keys %result)) { + push @finalresult, $result{$key}; +# warn "push..."$#finalresult; + $numbers++; + } + } + # limit the $results_per_page to result size if it's more + $length = $numbers-1 if $numbers < $length; + # for the requested page, replace authid by the complete record + # speed improvement : avoid reading too much things + for (my $counter=$offset;$counter<=$offset+$length;$counter++) { +# $finalresult[$counter] = GetAuthority($finalresult[$counter])->as_usmarc; + my $separator=C4::Context->preference('authoritysep'); + my $authrecord = MARC::File::USMARC::decode(GetAuthority($finalresult[$counter])->as_usmarc); + my $authid=$authrecord->field('001')->data(); + my $summary=BuildSummary($authrecord,$authid,$authtypecode); + my $query_auth_tag = "SELECT auth_tag_to_report FROM auth_types WHERE authtypecode=?"; + my $sth = $dbh->prepare($query_auth_tag); + $sth->execute($authtypecode); + my $auth_tag_to_report = $sth->fetchrow; + my %newline; + $newline{used}=CountUsage($authid); + $newline{summary} = $summary; + $newline{authid} = $authid; + $newline{even} = $counter % 2; + $finalresult[$counter]= \%newline; + } + return (\@finalresult, $numbers); } else { - $attr .=" \@attr 5=1 ";## Word list, right truncated, anywhere + return; } - $and .=" \@and " ; - $attr =$attr."\"".@$value[$i]."\""; - $q2 .=$attr; - $dosearch=1; - }#if value - } - ##Add how many queries generated - $query= $and.$query.$q2; - ## Adding order - $query=' @or @attr 7=1 @attr 1=Heading 0 @or @attr 7=1 @attr 1=Heading-Entity 1'.$query if ($sortby eq "HeadingAsc"); - $query=' @or @attr 7=2 @attr 1=Heading 0 @or @attr 7=1 @attr 1=Heading-Entity 1'.$query if ($sortby eq "HeadingDsc"); - warn $query; - - $offset=0 unless $offset; - my $counter = $offset; - $length=10 unless $length; - my @oAuth; - my $i; - $oAuth[0]=C4::Context->Zconn("authorityserver" , 1); - my $Anewq= new ZOOM::Query::PQF($query,$oAuth[0]); - my $oAResult; - $oAResult= $oAuth[0]->search($Anewq) ; - while (($i = ZOOM::event(\@oAuth)) != 0) { - my $ev = $oAuth[$i-1]->last_event(); - last if $ev == ZOOM::Event::ZEND; - } - my($error, $errmsg, $addinfo, $diagset) = $oAuth[0]->error_x(); - if ($error) { - warn "oAuth error: $errmsg ($error) $addinfo $diagset\n"; - goto NOLUCK; - } - - my $nbresults; - $nbresults=$oAResult->size(); - my $nremains=$nbresults; - my @result = (); - my @finalresult = (); - - if ($nbresults>0){ - - ##Find authid and linkid fields - ##we may be searching multiple authoritytypes. - ## FIXME this assumes that all authid and linkid fields are the same for all authority types - # my ($authidfield,$authidsubfield)=GetAuthMARCFromKohaField($dbh,"auth_header.authid",$authtypecode[0]); - # my ($linkidfield,$linkidsubfield)=GetAuthMARCFromKohaField($dbh,"auth_header.linkid",$authtypecode[0]); - while (($counter < $nbresults) && ($counter < ($offset + $length))) { - - ##Here we have to extract MARC record and $authid from ZEBRA AUTHORITIES - my $rec=$oAResult->record($counter); - my $marcdata=$rec->raw(); - my $authrecord; - my $separator=C4::Context->preference('authoritysep'); - $authrecord = MARC::File::USMARC::decode($marcdata); - my $authid=$authrecord->field('001')->data(); - my $summary=BuildSummary($authrecord,$authid,$authtypecode); - my $query_auth_tag = "SELECT auth_tag_to_report FROM auth_types WHERE authtypecode=?"; - my $sth = $dbh->prepare($query_auth_tag); - $sth->execute($authtypecode); - my $auth_tag_to_report = $sth->fetchrow; - my %newline; - $newline{summary} = $summary; - $newline{authid} = $authid; - $newline{even} = $counter % 2; - $counter++; - push @finalresult, \%newline; - }## while counter - ### - for (my $z=0; $z<@finalresult; $z++){ - my $count=CountUsage($finalresult[$z]{authid}); - $finalresult[$z]{used}=$count; - }# all $z's - - }## if nbresult -NOLUCK: -# $oAResult->destroy(); -# $oAuth[0]->destroy(); - - return (\@finalresult, $nbresults); + } else { + my $query; + my $attr; + # the marclist may contain "mainentry". In this case, search the tag_to_report, that depends on + # the authtypecode. Then, search on $a of this tag_to_report + # also store main entry MARC tag, to extract it at end of search + my $mainentrytag; + ##first set the authtype search and may be multiple authorities + my $n=0; + my @authtypecode; + my @auths=split / /,$authtypecode ; + foreach my $auth (@auths){ + $query .=" \@attr 1=Authority/format-id \@attr 5=100 ".$auth; ##No truncation on authtype + push @authtypecode ,$auth; + $n++; + } + if ($n>1){ + $query= "\@or ".$query; + } + + my $dosearch; + my $and; + my $q2; + for(my $i = 0 ; $i <= $#{$value} ; $i++) + { + if (@$value[$i]){ + ##If mainentry search $a tag + if (@$tags[$i] eq "mainmainentry") { + $attr =" \@attr 1=Heading "; + }elsif (@$tags[$i] eq "mainentry") { + $attr =" \@attr 1=Heading-Entity "; + }else{ + $attr =" \@attr 1=Any "; + } + if (@$operator[$i] eq 'is') { + $attr.=" \@attr 4=1 \@attr 5=100 ";##Phrase, No truncation,all of subfield field must match + }elsif (@$operator[$i] eq "="){ + $attr.=" \@attr 4=107 "; #Number Exact match + }elsif (@$operator[$i] eq "start"){ + $attr.=" \@attr 4=1 \@attr 5=1 ";#Phrase, Right truncated + } else { + $attr .=" \@attr 5=1 ";## Word list, right truncated, anywhere + } + $and .=" \@and " ; + $attr =$attr."\"".@$value[$i]."\""; + $q2 .=$attr; + $dosearch=1; + }#if value + } + ##Add how many queries generated + $query= $and.$query.$q2; + ## Adding order + $query=' @or @attr 7=1 @attr 1=Heading 0 @or @attr 7=1 @attr 1=Heading-Entity 1'.$query if ($sortby eq "HeadingAsc"); + $query=' @or @attr 7=2 @attr 1=Heading 0 @or @attr 7=1 @attr 1=Heading-Entity 1'.$query if ($sortby eq "HeadingDsc"); + + $offset=0 unless $offset; + my $counter = $offset; + $length=10 unless $length; + my @oAuth; + my $i; + $oAuth[0]=C4::Context->Zconn("authorityserver" , 1); + my $Anewq= new ZOOM::Query::PQF($query,$oAuth[0]); + my $oAResult; + $oAResult= $oAuth[0]->search($Anewq) ; + while (($i = ZOOM::event(\@oAuth)) != 0) { + my $ev = $oAuth[$i-1]->last_event(); + last if $ev == ZOOM::Event::ZEND; + } + my($error, $errmsg, $addinfo, $diagset) = $oAuth[0]->error_x(); + if ($error) { + warn "oAuth error: $errmsg ($error) $addinfo $diagset\n"; + goto NOLUCK; + } + + my $nbresults; + $nbresults=$oAResult->size(); + my $nremains=$nbresults; + my @result = (); + my @finalresult = (); + + if ($nbresults>0){ + + ##Find authid and linkid fields + ##we may be searching multiple authoritytypes. + ## FIXME this assumes that all authid and linkid fields are the same for all authority types + # my ($authidfield,$authidsubfield)=GetAuthMARCFromKohaField($dbh,"auth_header.authid",$authtypecode[0]); + # my ($linkidfield,$linkidsubfield)=GetAuthMARCFromKohaField($dbh,"auth_header.linkid",$authtypecode[0]); + while (($counter < $nbresults) && ($counter < ($offset + $length))) { + + ##Here we have to extract MARC record and $authid from ZEBRA AUTHORITIES + my $rec=$oAResult->record($counter); + my $marcdata=$rec->raw(); + my $authrecord; + my $separator=C4::Context->preference('authoritysep'); + $authrecord = MARC::File::USMARC::decode($marcdata); + my $authid=$authrecord->field('001')->data(); + my $summary=BuildSummary($authrecord,$authid,$authtypecode); + my $query_auth_tag = "SELECT auth_tag_to_report FROM auth_types WHERE authtypecode=?"; + my $sth = $dbh->prepare($query_auth_tag); + $sth->execute($authtypecode); + my $auth_tag_to_report = $sth->fetchrow; + my %newline; + $newline{summary} = $summary; + $newline{authid} = $authid; + $newline{even} = $counter % 2; + $counter++; + push @finalresult, \%newline; + }## while counter + ### + for (my $z=0; $z<@finalresult; $z++){ + my $count=CountUsage($finalresult[$z]{authid}); + $finalresult[$z]{used}=$count; + }# all $z's + + }## if nbresult + NOLUCK: + # $oAResult->destroy(); + # $oAuth[0]->destroy(); + + return (\@finalresult, $nbresults); + } } =head2 CountUsage @@ -227,21 +320,26 @@ counts Usage of Authid in bibliorecords. =cut sub CountUsage { - my ($authid) = @_; - ### try ZOOM search here - my $oConnection=C4::Context->Zconn("biblioserver",1); - my $query; - $query= "an=".$authid; - - my $oResult = $oConnection->search(new ZOOM::Query::CCL2RPN( $query, $oConnection )); - my $result; - while ((my $i = ZOOM::event([ $oConnection ])) != 0) { - my $ev = $oConnection->last_event(); - if ($ev == ZOOM::Event::ZEND) { - $result = $oResult->size(); - } - } - return ($result); + my ($authid) = @_; + if (C4::Context->preference('NoZebra')) { + # Read the index Koha-Auth-Number for this authid and count the lines + my $result = C4::Search::NZanalyse("an=$authid"); + return scalar split /;/,$result; + } else { + ### ZOOM search here + my $oConnection=C4::Context->Zconn("biblioserver",1); + my $query; + $query= "an=".$authid; + my $oResult = $oConnection->search(new ZOOM::Query::CCL2RPN( $query, $oConnection )); + my $result; + while ((my $i = ZOOM::event([ $oConnection ])) != 0) { + my $ev = $oConnection->last_event(); + if ($ev == ZOOM::Event::ZEND) { + $result = $oResult->size(); + } + } + return ($result); + } } =head2 CountUsageChildren @@ -402,7 +500,7 @@ sub AddAuthority { ##Both authid and authtypecode is expected to be in the same field. Modify if other requirements arise $record->add_fields('001',$authid) unless $record->field('001'); $record->add_fields('152','','','b'=>$authtypecode) unless $record->field('152'); - warn $record->as_formatted; +# warn $record->as_formatted; $dbh->do("lock tables auth_header WRITE"); $sth=$dbh->prepare("insert into auth_header (authid,datecreated,authtypecode,marc) values (?,now(),?,?)"); $sth->execute($authid,$authtypecode,$record->as_usmarc); @@ -417,8 +515,7 @@ sub AddAuthority { $sth->finish; } $dbh->do("unlock tables"); - ModZebra($authid,'specialUpdate',"authorityserver"); - + ModZebra($authid,'specialUpdate',"authorityserver",$record); return ($authid); } @@ -439,7 +536,7 @@ sub DelAuthority { my ($authid) = @_; my $dbh=C4::Context->dbh; - ModZebra($authid,"recordDelete","authorityserver"); + ModZebra($authid,"recordDelete","authorityserver",GetAuthority($authid)); $dbh->do("delete from auth_header where authid=$authid") ; } @@ -950,50 +1047,54 @@ sub merge { push @tags_using_authtype,$tagfield."9" ; } - # now, find every biblio using this authority - my $oConnection=C4::Context->Zconn("biblioserver"); - my $query; - $query= "an= ".$mergefrom; - my $oResult = $oConnection->search(new ZOOM::Query::CCL2RPN( $query, $oConnection )); - my $count=$oResult->size() if ($oResult); - my @reccache; - my $z=0; - while ( $z<$count ) { - my $rec; - $rec=$oResult->record($z); - my $marcdata = $rec->raw(); - push @reccache, $marcdata; - $z++; - } - $oResult->destroy(); - foreach my $marc(@reccache){ - my $update; - my $marcrecord; - $marcrecord = MARC::File::USMARC::decode($marc); - foreach my $tagfield (@tags_using_authtype){ - $tagfield=substr($tagfield,0,3); - my @tags = $marcrecord->field($tagfield); - foreach my $tag (@tags){ - my $tagsubs=$tag->subfield("9"); - #warn "$tagfield:$tagsubs:$mergefrom"; - if ($tagsubs== $mergefrom) { - $tag->update("9" =>$mergeto); - foreach my $subfield (@record_to) { - # warn "$subfield,$subfield->[0],$subfield->[1]"; - $tag->update($subfield->[0] =>$subfield->[1]); - }#for $subfield + if (C4::Context->preference('NoZebra')) { + warn "MERGE TO DO"; + } else { + # now, find every biblio using this authority + my $oConnection=C4::Context->Zconn("biblioserver"); + my $query; + $query= "an= ".$mergefrom; + my $oResult = $oConnection->search(new ZOOM::Query::CCL2RPN( $query, $oConnection )); + my $count=$oResult->size() if ($oResult); + my @reccache; + my $z=0; + while ( $z<$count ) { + my $rec; + $rec=$oResult->record($z); + my $marcdata = $rec->raw(); + push @reccache, $marcdata; + $z++; } - $marcrecord->delete_field($tag); - $marcrecord->add_fields($tag); - $update=1; - }#for each tag - }#foreach tagfield - my $oldbiblio = TransformMarcToKoha($dbh,$marcrecord,"") ; - if ($update==1){ - &ModBiblio($marcrecord,$oldbiblio->{'biblionumber'},GetFrameworkCode($oldbiblio->{'biblionumber'})) ; + $oResult->destroy(); + foreach my $marc(@reccache){ + my $update; + my $marcrecord; + $marcrecord = MARC::File::USMARC::decode($marc); + foreach my $tagfield (@tags_using_authtype){ + $tagfield=substr($tagfield,0,3); + my @tags = $marcrecord->field($tagfield); + foreach my $tag (@tags){ + my $tagsubs=$tag->subfield("9"); + #warn "$tagfield:$tagsubs:$mergefrom"; + if ($tagsubs== $mergefrom) { + $tag->update("9" =>$mergeto); + foreach my $subfield (@record_to) { + # warn "$subfield,$subfield->[0],$subfield->[1]"; + $tag->update($subfield->[0] =>$subfield->[1]); + }#for $subfield + } + $marcrecord->delete_field($tag); + $marcrecord->add_fields($tag); + $update=1; + }#for each tag + }#foreach tagfield + my $oldbiblio = TransformMarcToKoha($dbh,$marcrecord,"") ; + if ($update==1){ + &ModBiblio($marcrecord,$oldbiblio->{'biblionumber'},GetFrameworkCode($oldbiblio->{'biblionumber'})) ; + } + + }#foreach $marc } - - }#foreach $marc # now, find every other authority linked with this authority # my $oConnection=C4::Context->Zconn("authorityserver"); # my $query; @@ -1056,6 +1157,14 @@ Paul POULAIN paul.poulain@free.fr # $Id$ # $Log$ +# Revision 1.46 2007/05/10 14:45:15 tipaul +# Koha NoZebra : +# - support for authorities +# - some bugfixes in ordering and "CCL" parsing +# - support for authorities <=> biblios walking +# +# Seems I can do what I want now, so I consider its done, except for bugfixes that will be needed i m sure ! +# # Revision 1.45 2007/04/06 14:48:45 hdl # Code Cleaning : AuthoritiesMARC. # diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 23d4d99684..ef1066e788 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -2712,6 +2712,7 @@ ModZebra( $biblionumber, $op, $server, $newRecord ); sub ModZebra { ###Accepts a $server variable thus we can use it for biblios authorities or other zebra dbs my ( $biblionumber, $op, $server, $newRecord ) = @_; +# warn "ModZebra with : ( $biblionumber, $op, $server, $newRecord )"; my $dbh=C4::Context->dbh; #warn "SERVER:".$server; # @@ -2724,36 +2725,42 @@ sub ModZebra { # lock the nozebra table : we will read index lines, update them in Perl process # and write everything in 1 transaction. # lock the table to avoid someone else overwriting what we are doing - $dbh->do('LOCK TABLES nozebra WRITE,biblio WRITE,biblioitems WRITE, systempreferences WRITE'); + $dbh->do('LOCK TABLES nozebra WRITE,biblio WRITE,biblioitems WRITE, systempreferences WRITE, auth_types WRITE, auth_header WRITE'); my %result; # the result hash that will be builded by deletion / add, and written on mySQL at the end, to improve speed - my $record= GetMarcBiblio($biblionumber); + my $record; + if ($server eq 'biblioserver') { + $record= GetMarcBiblio($biblionumber); + } else { + $record= C4::AuthoritiesMarc::GetAuthority($biblionumber); + } if ($op eq 'specialUpdate') { # OK, we have to add or update the record # 1st delete (virtually, in indexes) ... - %result = _DelBiblioNoZebra($biblionumber,$record); + %result = _DelBiblioNoZebra($biblionumber,$record,$server); # ... add the record - %result=_AddBiblioNoZebra($biblionumber,$newRecord, %result); + %result=_AddBiblioNoZebra($biblionumber,$newRecord, $server, %result); } else { # it's a deletion, delete the record... - %result=_DelBiblioNoZebra($biblionumber,$record); +# warn "DELETE the record $biblionumber on $server".$record->as_formatted; + %result=_DelBiblioNoZebra($biblionumber,$record,$server); } # ok, now update the database... - my $sth = $dbh->prepare("UPDATE nozebra SET biblionumbers=? WHERE indexname=? AND value=?"); + my $sth = $dbh->prepare("UPDATE nozebra SET biblionumbers=? WHERE server=? AND indexname=? AND value=?"); foreach my $key (keys %result) { foreach my $index (keys %{$result{$key}}) { - warn "UPDATING : $key , $index with :".$result{$key}->{$index}; - $sth->execute($result{$key}->{$index},$key,$index); +# warn "UPDATING : $server $key , $index with :".$result{$key}->{$index}; + $sth->execute($result{$key}->{$index}, $server, $key, $index); } } $dbh->do('UNLOCK TABLES'); } else { - # - # we use zebra, just fill zebraqueue table - # - my $sth=$dbh->prepare("insert into zebraqueue (biblio_auth_number ,server,operation) values(?,?,?)"); - $sth->execute($biblionumber,$server,$op); - $sth->finish; + # + # we use zebra, just fill zebraqueue table + # + my $sth=$dbh->prepare("insert into zebraqueue (biblio_auth_number ,server,operation) values(?,?,?)"); + $sth->execute($biblionumber,$server,$op); + $sth->finish; } } @@ -2777,7 +2784,7 @@ sub GetNoZebraIndexes { =head1 INTERNAL FUNCTIONS -=head2 _DelBiblioNoZebra($biblionumber,$record); +=head2 _DelBiblioNoZebra($biblionumber,$record,$server); function to delete a biblio in NoZebra indexes This function does NOT delete anything in database : it reads all the indexes entries @@ -2785,21 +2792,33 @@ sub GetNoZebraIndexes { The SQL part is done either : - after the Add if we are modifying a biblio (delete + add again) - immediatly after this sub if we are doing a true deletion. + $server can be 'biblioserver' or 'authorityserver' : it indexes biblios or authorities (in the same table, $server being part of the table itself =cut sub _DelBiblioNoZebra { - my ($biblionumber,$record)=@_; + my ($biblionumber, $record, $server)=@_; - warn "DELETING".$record->as_formatted; # Get the indexes my $dbh = C4::Context->dbh; # Get the indexes - my %index=GetNoZebraIndexes; - # get title of the record (to store the 10 first letters with the index) - my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title'); - my $title = lc($record->subfield($titletag,$titlesubfield)); + my %index; + my $title; + if ($server eq 'biblioserver') { + %index=GetNoZebraIndexes; + # get title of the record (to store the 10 first letters with the index) + my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title'); + $title = lc($record->subfield($titletag,$titlesubfield)); + } else { + # for authorities, the "title" is the $a mainentry + my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b')); + warn "ERROR : authtype undefined for ".$record->as_formatted unless $authref; + $title = $record->subfield($authref->{auth_tag_to_report},'a'); + $index{'mainmainentry'}= $authref->{'auth_tag_to_report'}.'a'; + $index{'mainentry'} = $authref->{'auth_tag_to_report'}.'*'; + $index{'auth_type'} = '152b'; + } my %result; # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values @@ -2807,7 +2826,7 @@ sub _DelBiblioNoZebra { # limit to 10 char, should be enough, and limit the DB size $title = substr($title,0,10); #parse each field - my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE indexname=? AND value=?'); + my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value=?'); foreach my $field ($record->fields()) { #parse each subfield next if $field->tag <10; @@ -2823,22 +2842,20 @@ sub _DelBiblioNoZebra { $indexed=1; my $line= lc $subfield->[1]; # remove meaningless value in the field... - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; # ... and split in words - warn "DELETING : $key / $tag / $subfieldcode / $line"; foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) # if the entry is already here, do nothing, the biblionumber has already be removed unless ($result{$key}->{$_} =~ /$biblionumber,$title\-(\d);/) { # get the index value if it exist in the nozebra table and remove the entry, otherwise, do nothing - $sth2->execute($key,$_); + $sth2->execute($server,$key,$_); my $existing_biblionumbers = $sth2->fetchrow; # it exists if ($existing_biblionumbers) { # warn " existing for $key $_: $existing_biblionumbers"; $result{$key}->{$_} =$existing_biblionumbers; $result{$key}->{$_} =~ s/$biblionumber,$title\-(\d);//; - warn "after cleaning : $key / $_ = ".$result{$key}->{$_}; } } } @@ -2847,18 +2864,17 @@ sub _DelBiblioNoZebra { # the subfield is not indexed, store it in __RAW__ index anyway unless ($indexed) { my $line= lc $subfield->[1]; - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; # ... and split in words foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) # if the entry is already here, do nothing, the biblionumber has already be removed unless ($result{'__RAW__'}->{$_} =~ /$biblionumber,$title\-(\d);/) { # get the index value if it exist in the nozebra table and remove the entry, otherwise, do nothing - $sth2->execute('__RAW__',$_); + $sth2->execute($server,'__RAW__',$_); my $existing_biblionumbers = $sth2->fetchrow; # it exists if ($existing_biblionumbers) { - warn " existing for __RAW__ $_ : $existing_biblionumbers"; $result{'__RAW__'}->{$_} =$existing_biblionumbers; $result{'__RAW__'}->{$_} =~ s/$biblionumber,$title\-(\d);//; } @@ -2878,20 +2894,33 @@ sub _DelBiblioNoZebra { sub _AddBiblioNoZebra { - my ($biblionumber,$record,%result)=@_; + my ($biblionumber, $record, $server, %result)=@_; my $dbh = C4::Context->dbh; # Get the indexes - my %index=GetNoZebraIndexes; - # get title of the record (to store the 10 first letters with the index) - my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title'); - my $title = lc($record->subfield($titletag,$titlesubfield)); + my %index; + my $title; + if ($server eq 'biblioserver') { + %index=GetNoZebraIndexes; + # get title of the record (to store the 10 first letters with the index) + my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title'); + $title = lc($record->subfield($titletag,$titlesubfield)); + } else { +# warn "server : $server"; + # for authorities, the "title" is the $a mainentry + my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b')); + warn "ERROR : authtype undefined for ".$record->as_formatted unless $authref; + $title = $record->subfield($authref->{auth_tag_to_report},'a'); + $index{'mainmainentry'}=$authref->{auth_tag_to_report}.'a'; + $index{'mainentry'} = $authref->{auth_tag_to_report}.'*'; + $index{'auth_type'} = '152b'; + } # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g; # limit to 10 char, should be enough, and limit the DB size $title = substr($title,0,10); #parse each field - my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE indexname=? AND value=?'); + my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value=?'); foreach my $field ($record->fields()) { #parse each subfield next if $field->tag <10; @@ -2907,7 +2936,7 @@ sub _AddBiblioNoZebra { $indexed=1; my $line= lc $subfield->[1]; # remove meaningless value in the field... - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; # ... and split in words foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) @@ -2919,18 +2948,18 @@ sub _AddBiblioNoZebra { $result{$key}->{$_} .= "$biblionumber,$title-$weight;"; } else { # get the value if it exist in the nozebra table, otherwise, create it - $sth2->execute($key,$_); + $sth2->execute($server,$key,$_); my $existing_biblionumbers = $sth2->fetchrow; # it exists if ($existing_biblionumbers) { - warn" existing : $existing_biblionumbers"; $result{$key}->{$_} =$existing_biblionumbers; my $weight=$1+1; $result{$key}->{$_} =~ s/$biblionumber,$title\-(\d);//; $result{$key}->{$_} .= "$biblionumber,$title-$weight;"; # create a new ligne for this entry } else { - $dbh->do('INSERT INTO nozebra SET indexname='.$dbh->quote($key).',value='.$dbh->quote($_)); +# warn "INSERT : $server / $key / $_"; + $dbh->do('INSERT INTO nozebra SET server='.$dbh->quote($server).', indexname='.$dbh->quote($key).',value='.$dbh->quote($_)); $result{$key}->{$_}.="$biblionumber,$title-1;"; } } @@ -2940,7 +2969,7 @@ sub _AddBiblioNoZebra { # the subfield is not indexed, store it in __RAW__ index anyway unless ($indexed) { my $line= lc $subfield->[1]; - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; # ... and split in words foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) @@ -2951,7 +2980,7 @@ sub _AddBiblioNoZebra { $result{'__RAW__'}->{$_} .= "$biblionumber,$title-$weight;"; } else { # get the value if it exist in the nozebra table, otherwise, create it - $sth2->execute('__RAW__',$_); + $sth2->execute($server,'__RAW__',$_); my $existing_biblionumbers = $sth2->fetchrow; # it exists if ($existing_biblionumbers) { @@ -2961,7 +2990,7 @@ sub _AddBiblioNoZebra { $result{'__RAW__'}->{$_} .= "$biblionumber,$title-$weight;"; # create a new ligne for this entry } else { - $dbh->do('INSERT INTO nozebra SET indexname="__RAW__",value='.$dbh->quote($_)); + $dbh->do('INSERT INTO nozebra SET server='.$dbh->quote($server).', indexname="__RAW__",value='.$dbh->quote($_)); $result{'__RAW__'}->{$_}.="$biblionumber,$title-1;"; } } @@ -3174,7 +3203,7 @@ sub _koha_modify_biblioitem { $dbh->do($query); if ( $dbh->errstr ) { - warn "$query"; + warn "ERROR in _koha_modify_biblioitem $query"; } } @@ -3868,6 +3897,14 @@ Joshua Ferraro jmf@liblime.com # $Id$ # $Log$ +# Revision 1.204 2007/05/10 14:45:15 tipaul +# Koha NoZebra : +# - support for authorities +# - some bugfixes in ordering and "CCL" parsing +# - support for authorities <=> biblios walking +# +# Seems I can do what I want now, so I consider its done, except for bugfixes that will be needed i m sure ! +# # Revision 1.203 2007/05/03 15:16:02 tipaul # BUGFIX for : NoZebra # - NoZebra features : seems they work fine now (adding, modifying, deleting) diff --git a/C4/Search.pm b/C4/Search.pm index 839d2315e8..2d15e7f604 100755 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -1145,7 +1145,10 @@ sub NZgetRecords { =cut sub NZanalyse { - my ($string) = @_; + my ($string,$server) = @_; + # $server contains biblioserver or authorities, depending on what we search on. + warn "querying : $string on $server"; + $server='biblioserver' unless $server; # if we have a ", replace the content to discard temporarily any and/or/not inside my $commacontent; if ($string =~/"/) { @@ -1156,32 +1159,32 @@ sub NZanalyse { # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y" # then, call again NZanalyse with $left and $right # (recursive until we find a leaf (=> something without and/or/not) - $string =~ /(.*)( and | or | not )(.*)/; + $string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/; my $left = $1; my $right = $3; - my $operand = $2; + my $operand = lc($2); # it's not a leaf, we have a and/or/not if ($operand) { # reintroduce comma content if needed $right =~ s/__X__/"$commacontent"/ if $commacontent; $left =~ s/__X__/"$commacontent"/ if $commacontent; -# print "noeud : $left / $operand / $right\n"; - my $leftresult = NZanalyse($left); - my $rightresult = NZanalyse($right); +# warn "node : $left / $operand / $right\n"; + my $leftresult = NZanalyse($left,$server); + my $rightresult = NZanalyse($right,$server); # OK, we have the results for right and left part of the query # depending of operand, intersect, union or exclude both lists # to get a result list if ($operand eq ' and ') { - my @leftresult = split /,/, $leftresult; -# my @rightresult = split /,/,$leftresult; + my @leftresult = split /;/, $leftresult; +# my @rightresult = split /;/,$leftresult; my $finalresult; # parse the left results, and if the biblionumber exist in the right result, save it in finalresult # the result is stored twice, to have the same weight for AND than OR. # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64 foreach (@leftresult) { - if ($rightresult =~ "$_,") { - $finalresult .= "$_,$_,"; + if ($rightresult =~ "$_;") { + $finalresult .= "$_;$_;"; } } return $finalresult; @@ -1189,12 +1192,12 @@ sub NZanalyse { # just merge the 2 strings return $leftresult.$rightresult; } elsif ($operand eq ' not ') { - my @leftresult = split /,/, $leftresult; -# my @rightresult = split /,/,$leftresult; + my @leftresult = split /;/, $leftresult; +# my @rightresult = split /;/,$leftresult; my $finalresult; foreach (@leftresult) { - unless ($rightresult =~ "$_,") { - $finalresult .= "$_,"; + unless ($rightresult =~ "$_;") { + $finalresult .= "$_;"; } } return $finalresult; @@ -1206,28 +1209,32 @@ sub NZanalyse { } else { $string =~ s/__X__/"$commacontent"/ if $commacontent; $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\// /g; -# print "feuille : $string\n"; +# warn "leaf : $string\n"; # parse the string in in operator/operand/value again $string =~ /(.*)(=|>|>=|<|<=)(.*)/; my $left = $1; my $operator = $2; my $right = $3; my $results; - # automatic replace for short operator - $left='title' if $left eq 'ti'; - $left='author' if $left eq 'au'; + # automatic replace for short operators + $left='title' if $left eq 'ti'; + $left='author' if $left eq 'au'; + $left='koha-Auth-Number' if $left eq 'an'; if ($operator) { #do a specific search my $dbh = C4::Context->dbh; $operator='LIKE' if $operator eq '=' and $right=~ /%/; - my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE indexname=? AND value $operator ?"); -# print "$left / $operator / $right\n"; + my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"); + warn "$left / $operator / $right\n"; # split each word, query the DB and build the biblionumbers result foreach (split / /,$right) { my $biblionumbers; - $sth->execute($left,$_); + next unless $_; +# warn "EXECUTE : $server, $left, $_"; + $sth->execute($server, $left, $_); while (my $line = $sth->fetchrow) { $biblionumbers .= $line; +# warn "result : $line"; } # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { @@ -1246,17 +1253,19 @@ sub NZanalyse { } else { #do a complete search (all indexes) my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE value LIKE ?"); + my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"); # split each word, query the DB and build the biblionumbers result foreach (split / /,$string) { +# warn "search on all indexes on $_"; my $biblionumbers; - $sth->execute($_); + next unless $_; + $sth->execute($server, $_); while (my $line = $sth->fetchrow) { $biblionumbers .= $line; } # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { - my @leftresult = split /,/, $biblionumbers; + my @leftresult = split /;/, $biblionumbers; my $temp; foreach (@leftresult) { if ($results =~ "$_;") { @@ -1269,6 +1278,7 @@ sub NZanalyse { } } } +# warn "return : $results for LEAF : $string"; return $results; } } @@ -1303,7 +1313,7 @@ sub NZorder { my $result_hash; my $numbers=0; if ($ordering eq '1=9523 >i') { # sort popularity DESC - foreach my $key (sort {$b <=> $a} (keys %popularity)) { + foreach my $key (sort {$b cmp $a} (keys %popularity)) { $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc(); } } else { # sort popularity ASC @@ -1337,12 +1347,12 @@ sub NZorder { # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers=0; - if ($ordering eq '1=1003 {'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); } - } else { # sort by title ASC - foreach my $key (sort { $a <=> $b } (keys %result)) { + } else { # sort by author ASC + foreach my $key (sort { $a cmp $b } (keys %result)) { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); } } @@ -1378,7 +1388,7 @@ sub NZorder { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); } } else { # sort by title ASC - foreach my $key (sort { $a <=> $b } (keys %result)) { + foreach my $key (sort { $a cmp $b } (keys %result)) { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); } } @@ -1400,12 +1410,12 @@ sub NZorder { # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers=0; - if ($ordering eq '1=31 {'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); } - } else { # sort by title ASC - foreach my $key (sort { $a <=> $b } (keys %result)) { + } else { # sort by pub year ASC + foreach my $key (sort { $b cmp $a } (keys %result)) { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); } } @@ -1435,7 +1445,7 @@ sub NZorder { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}; } } else { # sort by title ASC - foreach my $key (sort { $a <=> $b } (keys %result)) { + foreach my $key (sort { $b cmp $a } (keys %result)) { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}; } } @@ -1476,7 +1486,7 @@ sub NZorder { # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers=0; - foreach my $key (sort {$b <=> $a} (keys %result)) { + foreach my $key (sort {$b cmp $a} (keys %result)) { $result_hash->{'RECORDS'}[$numbers++] = $result{$key}; } # limit the $results_per_page to result size if it's more diff --git a/authorities/authorities.pl b/authorities/authorities.pl index 5c04e0911a..293be4a523 100755 --- a/authorities/authorities.pl +++ b/authorities/authorities.pl @@ -392,7 +392,6 @@ if ($op eq "add") { #warn $record->as_formatted; # check for a duplicate my ($duplicateauthid,$duplicateauthvalue) = FindDuplicateAuthority($record,$authtypecode) if ($op eq "add") && (!$is_a_modif); -warn "duplicate:$duplicateauthid,$duplicateauthvalue"; my $confirm_not_duplicate = $input->param('confirm_not_duplicate'); # it is not a duplicate (determined either by Koha itself or by user checking it's not a duplicate) if (!$duplicateauthid or $confirm_not_duplicate) { diff --git a/misc/migration_tools/rebuild_nozebra.pl b/misc/migration_tools/rebuild_nozebra.pl index 4d3989e3cb..471b5a5371 100755 --- a/misc/migration_tools/rebuild_nozebra.pl +++ b/misc/migration_tools/rebuild_nozebra.pl @@ -34,18 +34,15 @@ $directory = "export" unless $directory; my $dbh=C4::Context->dbh; $dbh->do("update systempreferences set value=1 where variable='NoZebra'"); $dbh->do("CREATE TABLE `nozebra` ( - `indexname` varchar(40) character set latin1 NOT NULL, - `value` varchar(250) character set latin1 NOT NULL, - `biblionumbers` longtext character set latin1 NOT NULL, - KEY `indexname` (`indexname`), - KEY `value` (`value`)) + `server` varchar(20) NOT NULL, + `indexname` varchar(40) NOT NULL, + `value` varchar(250) NOT NULL, + `biblionumbers` longtext NOT NULL, + KEY `indexname` (`server`,`indexname`), + KEY `value` (`server`,`value`)) ENGINE=InnoDB DEFAULT CHARSET=utf8"); + $dbh->do("truncate nozebra"); -my $sth; -$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit"); -$sth->execute(); -my $i=0; -my %result; my %index = GetNoZebraIndexes(); @@ -57,11 +54,11 @@ unless (%index) { 'issn' => '011a', 'biblionumber' =>'0909', 'itemtype' => '200b', - 'language' => '010a', + 'language' => '101a', 'publisher' => '210x', 'date' => '210d', 'note' => '300a,301a,302a,303a,304a,305a,306az,307a,308a,309a,310a,311a,312a,313a,314a,315a,316a,317a,318a,319a,320a,321a,322a,323a,324a,325a,326a,327a,328a,330a,332a,333a,336a,337a,345a', - 'Koha-Auth-Number' => '6009,6019,6029,6039,6049,6059,6069,6109', + 'Koha-Auth-Number' => '6009,6019,6029,6039,6049,6059,6069,6109,7009,7019,7029,7109,7119,7129', 'subject' => '600*,601*,606*,610*', 'dewey' => '676a', 'host-item' => '995a,995c',\" where variable='NoZebraIndexes'"); @@ -71,6 +68,15 @@ unless (%index) { } } $|=1; + +print "***********************************\n"; +print "***** building BIBLIO indexes *****\n"; +print "***********************************\n"; +my $sth; +$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit"); +$sth->execute(); +my $i=0; +my %result; while (my ($biblionumber) = $sth->fetchrow) { $i++; print "\r$i"; @@ -99,7 +105,7 @@ while (my ($biblionumber) = $sth->fetchrow) { $indexed=1; my $line= lc $subfield->[1]; # remove meaningless value in the field... - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; # ... and split in words foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) @@ -135,7 +141,97 @@ while (my ($biblionumber) = $sth->fetchrow) { } } } -my $sth = $dbh->prepare("INSERT INTO nozebra (indexname,value,biblionumbers) VALUES (?,?,?)"); +my $sth = $dbh->prepare("INSERT INTO nozebra (server,indexname,value,biblionumbers) VALUES ('biblioserver',?,?,?)"); +foreach my $key (keys %result) { + foreach my $index (keys %{$result{$key}}) { + if (length($result{$key}->{$index}) > 1000000) { + print "very long index (".length($result{$key}->{$index}).")for $key / $index. update mySQL config file if you have an error just after this warning (max_paquet_size parameter)\n"; + } + $sth->execute($key,$index,$result{$key}->{$index}); + } +} + +print "\n***********************************\n"; +print "***** building AUTHORITIES indexes *****\n"; +print "***********************************\n"; + +my $sth; +$sth=$dbh->prepare("select authid from auth_header order by authid $limit"); +$sth->execute(); +my $i=0; +my %result; +while (my ($authid) = $sth->fetchrow) { + $i++; + print "\r$i"; + my $record = GetAuthority($authid); + + my %index; + # for authorities, the "title" is the $a mainentry + my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b')); + use Data::Dumper; +# warn "for $authid / ".$record->as_formatted. "Dumper : ".Dumper($authref); + warn "ERROR : authtype undefined for ".$record->as_formatted unless $authref; + my $title = $record->subfield($authref->{auth_tag_to_report},'a'); + $index{'mainmainentry'}= $authref->{'auth_tag_to_report'}.'a'; + $index{'mainentry'} = $authref->{'auth_tag_to_report'}.'*'; + $index{'auth_type'} = '152b'; + + # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values + $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g; + # limit to 10 char, should be enough, and limit the DB size + $title = substr($title,0,10); + #parse each field + foreach my $field ($record->fields()) { + #parse each subfield + next if $field->tag <10; + foreach my $subfield ($field->subfields()) { + my $tag = $field->tag(); + my $subfieldcode = $subfield->[0]; + my $indexed=0; + # check each index to see if the subfield is stored somewhere + # otherwise, store it in __RAW__ index + foreach my $key (keys %index) { + if ($index{$key} =~ /$tag\*/ or $index{$key} =~ /$tag$subfieldcode/) { + $indexed=1; + my $line= lc $subfield->[1]; + # remove meaningless value in the field... + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; + # ... and split in words + foreach (split / /,$line) { + next unless $_; # skip empty values (multiple spaces) + # if the entry is already here, improve weight + if ($result{$key}->{$_} =~ /$authid,$title\-(\d);/) { + my $weight=$1+1; + $result{$key}->{$_} =~ s/$authid,$title\-(\d);//; + $result{$key}->{$_} .= "$authid,$title-$weight;"; + # otherwise, create it, with weight=1 + } else { + $result{$key}->{$_}.="$authid,$title-1;"; + } + } + } + } + # the subfield is not indexed, store it in __RAW__ index anyway + unless ($indexed) { + my $line= lc $subfield->[1]; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g; + foreach (split / /,$line) { + next unless $_; +# warn $record->as_formatted."$_ =>".$title; + if ($result{__RAW__}->{$_} =~ /$authid,$title\-(\d);/) { + my $weight=$1+1; +# $weight++; + $result{__RAW__}->{$_} =~ s/$authid,$title\-(\d);//; + $result{__RAW__}->{$_} .= "$authid,$title-$weight;"; + } else { + $result{__RAW__}->{$_}.="$authid,$title-1;"; + } + } + } + } + } +} +my $sth = $dbh->prepare("INSERT INTO nozebra (server,indexname,value,biblionumbers) VALUES ('authorityserver',?,?,?)"); foreach my $key (keys %result) { foreach my $index (keys %{$result{$key}}) { if (length($result{$key}->{$index}) > 1000000) { diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 5269a63366..211e92b17b 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -14,7 +14,7 @@ use strict; $|=1; # flushes output # limit for database dumping -my $limit = "LIMIT 500"; +my $limit;# = "LIMIT 500"; my $directory; my $skip_export; my $keep_export; diff --git a/opac/opac-rss.pl b/opac/opac-rss.pl index 62467da233..eb4fb891e2 100755 --- a/opac/opac-rss.pl +++ b/opac/opac-rss.pl @@ -68,7 +68,7 @@ my $query = $cgi->param('q'); $query =~ s/:/=/g; # the number of lines to retrieve -my $size=$cgi->param('size') || 20; +my $size=$cgi->param('size') || 50; # the filename of the cached rdf file. my $filename = md5_base64($query); -- 2.39.5