From fcc3986cfdf716533af335808403ff7a5aa0a370 Mon Sep 17 00:00:00 2001 From: Joshua Ferraro Date: Mon, 17 Dec 2007 11:54:54 -0600 Subject: [PATCH] Updates to date indexing and search processing Summary of Koha 3.0 date indexing for MARC21: Index Expected format Notes ----------------------------------------------------- date-entered-on-file [yymmdd] (008/0-5, indexed in word and sort indexes) copydate [yyyy] (260$c, indexed in word and sort indexes) acqdate [yyyy-mm-dd] (952$d, indexed in date,word,sort indexes) pubdate [yyyy] (008/7-10, indexed in year,word,sort indexes) Template Search Parameters Tested: limit-yr (either yyyy or yyyy-yyyy) (added processing for ge le, structure attribute st-numeric, etc.) yr pubdate (yyyy) acqdate,st-date-normalized (yyyy-mm-dd) Template Sort Parameters Tested: pubdate_dsc pubdate_asc acqdate_dsc acqdate_asc Signed-off-by: Joshua Ferraro --- C4/Search.pm | 41 +++++++++++++------ catalogue/search.pl | 15 ++++++- etc/zebradb/biblios/etc/bib1.att | 7 ++-- etc/zebradb/biblios/etc/record.abs | 18 +++++--- etc/zebradb/ccl.properties | 32 +++++++-------- .../prog/en/includes/search_indexes.inc | 5 ++- .../prog/en/modules/labels/search.tmpl | 4 +- .../prog/en/modules/opac-advsearch.tmpl | 4 +- opac/opac-search.pl | 13 +++++- 9 files changed, 90 insertions(+), 49 deletions(-) diff --git a/C4/Search.pm b/C4/Search.pm index 72121a8036..b3bf8f32b5 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -660,9 +660,9 @@ sub _build_weighted_query { $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title #$weighted_query .= " or any,ext,r4=$operand"; # exact any #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any - $weighted_query .= " or wrd,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list - $weighted_query .= " or wrd,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation - $weighted_query .= " or wrd,r9=\"$operand\""; + $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list + $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation + $weighted_query .= " or wrdl,r9=\"$operand\""; # embedded sorting: 0 a-z; 1 z-a # $weighted_query .= ") or (sort1,aut=1"; @@ -680,7 +680,7 @@ sub _build_weighted_query { $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)"; $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index - $weighted_query .= " or $index,rt,wrd,r3=\"$operand\""; # word list index + $weighted_query .= " or $index,rt,wrdl,r3=\"$operand\""; # word list index } $weighted_query .= "))"; # close rank specification return $weighted_query; @@ -753,7 +753,8 @@ sub buildQuery { # a flag to determine whether or not to add the index to the query my $indexes_set; - # if the user is sophisticated enough to specify an index, turn off some defaults + + # if the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling if ($operands[$i] =~ /(:|=)/ || $scan) { $weight_fields = 0; $stemming = 0; @@ -761,15 +762,30 @@ sub buildQuery { } my $operand = $operands[$i]; my $index = $indexes[$i]; - $DEBUG=1; - # some helpful index modifs - my $wrdl; - unless (!$index || $index =~ /(phr|ext)/) { - $wrdl = ",wrdl"; + # add some attributes for certain index types + # Date of Publication + if ($index eq 'yr') { + $index .=",st-numeric"; + $indexes_set++; + ($stemming,$auto_truncation,$weight_fields, $fuzzy_enabled, $remove_stopwords) = (0,0,0,0,0); + } + # Date of Acquisition + elsif ($index eq 'acqdate') { + $index.=",st-date-normalized"; + $indexes_set++; + ($stemming,$auto_truncation,$weight_fields, $fuzzy_enabled, $remove_stopwords) = (0,0,0,0,0); + } - my $index_plus = $index.$wrdl.":" if $index; - my $index_plus_comma=$index.$wrdl."," if $index; + + # set default structure attribute (word list) + my $struct_attr; + unless (!$index || $index =~ /(st-|phr|ext|wrdl)/) { + $struct_attr = ",wrdl"; + } + # some helpful index modifs + my $index_plus = $index.$struct_attr.":" if $index; + my $index_plus_comma=$index.$struct_attr."," if $index; # Remove Stopwords if ($remove_stopwords) { @@ -885,7 +901,6 @@ sub buildQuery { $limit_cgi .="&limit=$this_limit"; $limit_desc .= "$this_limit"; } - # regular old limits else { $limit .= " and " if $limit || $query; diff --git a/catalogue/search.pl b/catalogue/search.pl index 40be2cd478..c9c21adc55 100755 --- a/catalogue/search.pl +++ b/catalogue/search.pl @@ -387,7 +387,18 @@ foreach my $limit(@limits) { $template->param(available => $available); # append year limits if they exist -push @limits, map "yr:".$_, split("\0",$params->{'limit-yr'}) if $params->{'limit-yr'}; +if ($params->{'limit-yr'}) { + if ($params->{'limit-yr'} =~ /\d{4}-\d{4}/) { + my ($yr1,$yr2) = split(/-/, $params->{'limit-yr'}); + push @limits, "yr,st-numeric,ge=$yr1 and yr,st-numeric,le=$yr2"; + } + elsif ($params->{'limit-yr'} =~ /\d{4}/) { + push @limits, "yr,st-numeric=$params->{'limit-yr'}"; + } + else { + #FIXME: Should return a error to the user, incorect date format specified + } +} # Params that can only have one value my $scan = $params->{'scan'}; @@ -546,7 +557,7 @@ for (my $i=0;$i<=@servers;$i++) { $template->param( PAGE_NUMBERS => \@page_numbers, previous_page_offset => $previous_page_offset) unless $pages < 2; $template->param(next_page_offset => $next_page_offset) unless $pages eq $current_page_number; - } + } } # end of the if local else { # check if it's a z3950 or opensearch source diff --git a/etc/zebradb/biblios/etc/bib1.att b/etc/zebradb/biblios/etc/bib1.att index 3fa27219df..e4c584b972 100644 --- a/etc/zebradb/biblios/etc/bib1.att +++ b/etc/zebradb/biblios/etc/bib1.att @@ -32,7 +32,7 @@ att 26 PA-subject att 27 LC-subject-heading att 28 RVM-subject-heading att 29 Local-subject-index -att 30 Date +att 30 copydate att 31 pubdate att 32 Date-of-acquisition att 33 Title-key @@ -78,7 +78,7 @@ att 1007 Identifier-standard att 1008 Subject-LC-childrens att 1009 Subject-name-personal att 1010 Body-of-text -att 1011 dateaddeddb +att 1011 date-entered-on-file att 1012 Date/time-last-modified att 1013 Authority/format-id att 1014 Concept-text @@ -131,7 +131,8 @@ att 8010 itemnumber att 8011 homebranch att 8012 holdingbranch att 8013 location -att 8014 Date-of-acquisition +# handled in bib1 attr 1=32 +#att 8014 Date-of-acquisition att 8015 acqsource att 8016 coded-location-qualifier att 8017 price diff --git a/etc/zebradb/biblios/etc/record.abs b/etc/zebradb/biblios/etc/record.abs index 30f612c6c9..0eed7b4aad 100644 --- a/etc/zebradb/biblios/etc/record.abs +++ b/etc/zebradb/biblios/etc/record.abs @@ -16,7 +16,7 @@ esetname B @ marc usmarc.mar systag sysno rank xpath enable - +# Some notes: # pl = Published Place # ta = Target Audience 002/22 # ff8-23 @@ -27,6 +27,13 @@ xpath enable # ctype = Content type: review, catalog, encyclopedia, dictionary # pubdate Publication Date # rtype = Record type (leader 06) +# +# Date indexing in Koha 3.0 for MARC21: +# Index Expected format Notes +# date-entered-on-file [yymmdd] (008/0-5, indexed in word and sort indexes) +# copydate [yyyy] (260$c, indexed in word and sort indexes) +# acqdate [yyyy-mm-dd] (952$d, indexed in date,word,sort indexes) +# pubdate [yyyy] (008/7-10, indexed in year,word,num,sort indexes) all any # melm 000 rtype:n:range(data,06,1),Bib-level:w:range(data,07,01) @@ -34,10 +41,10 @@ xelm /record/leader llength:w:range(data,0,5),rtype:w:range(data,6,1),Bib-level: # example: xelm /record/leader l1:w:range(data,0,5),l2:w:range(data,10,2) melm 001 Control-number -melm 005 Date,Date/time-last-modified +melm 005 Date/time-last-modified melm 007 Microform-generation:n:range(data,11,1),Material-type,ff7-00:w:range(data,0,1),ff7-01:w:range(data,1,1),ff7-02:w:range(data,2,1),ff7-01-02:w:range(data,0,2) -melm 008 ln:n:range(data,35,3),ctype:w:range(data,24,4),Date:n:range(data,0,5),Date:s:range(data,0,5),Date:n:range(data,7,4),Date:s:range(data,7,4),Date:n:range(data,11,4),Date:s:range(data,11,4),pubdate:n:range(data,7,4),pubdate:s:range(data,7,4),dateaddeddb:n:range(data,0,5),dateaddeddb:s:range(data,0,5),pl:w:range(data,15,3),ta:w:range(data,22,1),ff8-23:w:range(data,23,1),ff8-29:w:range(data,29,1),lf:w:range(data,33,1),bio:w:range(data,34,1),Record-source:w:range(data,39,0) +melm 008 date-entered-on-file:n:range(data,0,5),date-entered-on-file:s:range(data,0,5),pubdate:w:range(data,7,4),pubdate:n:range(data,7,4),pubdate:y:range(data,7,4),pubdate:s:range(data,7,4),pl:w:range(data,15,3),ta:w:range(data,22,1),ff8-23:w:range(data,23,1),ff8-29:w:range(data,29,1),lf:w:range(data,33,1),bio:w:range(data,34,1),ln:n:range(data,35,3),ctype:w:range(data,24,4),Record-source:w:range(data,39,0) melm 010 LC-card-number,Identifier-standard melm 011 LC-card-number,Identifier-standard @@ -54,7 +61,7 @@ melm 025 Identifier-standard melm 027 Report-number,Identifier-standard melm 028 Number-music-publisher,Identifier-standard melm 030 CODEN,Identifier-standard -melm 033 Date +#melm 033 Date melm 034 Map-scale #melm 035 Local-number,Identifier-standard melm 037 Identifier-standard,Stock-number @@ -107,8 +114,7 @@ melm 246 Title,Title:p,Title-abbreviated,Title-expanded,Title-former melm 247 Title,Title:p,Title-former,Title-other-variant,Related-periodical melm 260$a pl:w,pl:p melm 260$b Publisher:w,Publisher:p -melm 260$c Date,Date:s,Date:y -#,pubdate,pubdate:s +melm 260$c copydate,copydate:s melm 260 pl melm 300 Extent:w,Extent:p melm 400$a Name-and-title diff --git a/etc/zebradb/ccl.properties b/etc/zebradb/ccl.properties index 8643e2aaba..7e49986831 100644 --- a/etc/zebradb/ccl.properties +++ b/etc/zebradb/ccl.properties @@ -183,7 +183,7 @@ aut 1=1003 # number from a system not # specified elsewhere in this # list of attributes. -Local-classification 4=1 1=20 +Local-classification 1=20 lcn Local-classification callnum Local-classification #Local-classification cc callnum dewey @@ -326,18 +326,19 @@ Local-number 1=12 #Date 30 The point of time at which 005, 008/00-05, # a transaction or event 008/07-10, 260$c, # takes place. 008/11-14, 033,etc. -Date 1=30 4=109 r=r -#yr Date +# interpreting this as the copyright date in 260$c +copydate 1=30 r=r #Date-publication 31 The date (usually year) in 008/07-10, 260$c # which a document is published. 046, 533$d -Date-of-publication 1=31 4=109 r=r +Date-of-publication 1=pubdate r=r #dp Date-of-publication yr Date-of-publication +pubdate Date-of-publication #Date-acquisition 32 The date when a document was 541$d # acquired. -Date-of-acquisition 1=32 +Date-of-acquisition 1=Date-of-acquisition acqdate Date-of-acquisition #da Date-of-acquisition @@ -847,12 +848,7 @@ st-key 4=3 st-year 4=4 st-date-normalized 4=5 st-word-list 4=6 -wrdl 4=6 - -# there was a reason I didn't want to use this but it's -# escaped me -- JF -wrd 4=6 - +wrdl st-word-list #st-word st-date-un-normalized 4=100 st-name-normalized 4=101 @@ -861,7 +857,7 @@ st-structure 4=103 st-urx 4=104 st-free-form-text 4=105 st-document-text 4=106 -st-local number 4=107 +st-local-number 4=107 st-string 4=108 st-numeric 4=109 #string 109 @@ -908,6 +904,7 @@ cn-item 1=9008 cn-prefix 1=9009 cn-suffix 1=9010 Suppress 1=9011 +date-entered-on-file 1=date-entered-on-file # Items Index withdrawn 1=8001 @@ -923,14 +920,14 @@ itemnumber 1=8010 Code-institution 1=8011 holdingbranch 1=8012 location 1=8013 -Date-of-acquisition 1=8014 +#Date-of-acquisition 1=8014 acqsource 1=8015 coded-location-qualifier 1=8016 price 1=8017 -stack 1=8018 4=109 -issues 1=8019 4=109 -renewals 1=8020 4=109 -reserves 1=8021 4=109 +stack 1=8018 +issues 1=8019 +renewals 1=8020 +reserves 1=8021 Local-classification 1=8022 barcode 1=8023 bc barcode @@ -954,7 +951,6 @@ pl Place-publication #att 8900 #Call-Number 1=8900 -#date-entered-on-file 1=8800 #date1 1=8801 #date2 1=8802 #language 8805 diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc index 5f255dcba0..21788b6b1a 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc +++ b/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc @@ -12,11 +12,12 @@ - + + - + diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/labels/search.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/labels/search.tmpl index a024ee8521..c1160d8658 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/labels/search.tmpl +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/labels/search.tmpl @@ -27,7 +27,7 @@ - + @@ -44,7 +44,7 @@ - + diff --git a/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tmpl b/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tmpl index b7251a48d9..d70291a197 100644 --- a/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tmpl +++ b/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tmpl @@ -147,9 +147,9 @@ - + - + diff --git a/opac/opac-search.pl b/opac/opac-search.pl index fb0e46d660..54838e9c4d 100755 --- a/opac/opac-search.pl +++ b/opac/opac-search.pl @@ -394,7 +394,18 @@ foreach my $limit(@limits) { $template->param(available => $available); # append year limits if they exist -push @limits, map "yr:".$_, split("\0",$params->{'limit-yr'}) if $params->{'limit-yr'}; +if ($params->{'limit-yr'}) { + if ($params->{'limit-yr'} =~ /\d{4}-\d{4}/) { + my ($yr1,$yr2) = split(/-/, $params->{'limit-yr'}); + push @limits, "yr,st-numeric,ge=$yr1 and yr,st-numeric,le=$yr2"; + } + elsif ($params->{'limit-yr'} =~ /\d{4}/) { + push @limits, "yr,st-numeric=$params->{'limit-yr'}"; + } + else { + #FIXME: Should return a error to the user, incorect date format specified + } +} # Params that can only have one value my $scan = $params->{'scan'}; -- 2.39.5