6 use C4::AuthoritiesMarc;
10 # script that fills the nozebra table
14 $|=1; # flushes output
16 # limit for database dumping
17 my $limit = "LIMIT 100";
33 $directory = "export" unless $directory;
34 my $dbh=C4::Context->dbh;
35 $dbh->do("update systempreferences set value=1 where variable='NoZebra'");
36 $dbh->do("CREATE TABLE `nozebra` (
37 `indexname` varchar(40) character set latin1 NOT NULL,
38 `value` varchar(250) character set latin1 NOT NULL,
39 `biblionumbers` longtext character set latin1 NOT NULL,
40 KEY `indexname` (`indexname`),
41 KEY `value` (`value`))
42 ENGINE=InnoDB DEFAULT CHARSET=utf8");
43 $dbh->do("truncate nozebra");
45 $sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit");
50 my %index = GetNoZebraIndexes();
53 while (my ($biblionumber) = $sth->fetchrow) {
56 my $record = GetMarcBiblio($biblionumber);
58 # get title of the record (to store the 10 first letters with the index)
59 my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title');
60 my $title = lc($record->subfield($titletag,$titlesubfield));
62 # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values
63 $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
64 # limit to 10 char, should be enough, and limit the DB size
65 $title = substr($title,0,10);
67 foreach my $field ($record->fields()) {
69 next if $field->tag <10;
70 foreach my $subfield ($field->subfields()) {
71 my $tag = $field->tag();
72 my $subfieldcode = $subfield->[0];
74 # check each index to see if the subfield is stored somewhere
75 # otherwise, store it in __RAW__ index
76 foreach my $key (keys %index) {
77 if ($index{$key} =~ /$tag\*/ or $index{$key} =~ /$tag$subfieldcode/) {
79 my $line= lc $subfield->[1];
80 # remove meaningless value in the field...
81 $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
82 # ... and split in words
83 foreach (split / /,$line) {
84 next unless $_; # skip empty values (multiple spaces)
85 # if the entry is already here, improve weight
86 if ($result{$key}->{$_} =~ /$biblionumber,$title\-(\d);/) {
88 $result{$key}->{$_} =~ s/$biblionumber,$title\-(\d);//;
89 $result{$key}->{$_} .= "$biblionumber,$title-$weight;";
90 # otherwise, create it, with weight=1
92 $result{$key}->{$_}.="$biblionumber,$title-1;";
97 # the subfield is not indexed, store it in __RAW__ index anyway
99 my $line= lc $subfield->[1];
100 $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
101 foreach (split / /,$line) {
103 # warn $record->as_formatted."$_ =>".$title;
104 if ($result{__RAW__}->{$_} =~ /$biblionumber,$title\-(\d);/) {
107 $result{__RAW__}->{$_} =~ s/$biblionumber,$title\-(\d);//;
108 $result{__RAW__}->{$_} .= "$biblionumber,$title-$weight;";
110 $result{__RAW__}->{$_}.="$biblionumber,$title-1;";
117 my $sth = $dbh->prepare("INSERT INTO nozebra (indexname,value,biblionumbers) VALUES (?,?,?)");
118 foreach my $key (keys %result) {
119 foreach my $index (keys %{$result{$key}}) {
120 $sth->execute($key,$index,$result{$key}->{$index});
121 if (length($result{$key}->{$index}) > 40000) {
122 print length($result{$key}->{$index})."\n for $key / $index\n";