improving NOzebra search :
[koha.git] / misc / migration_tools / buildEDITORS.pl
1 #!/usr/bin/perl
2 # script that rebuild EDITORS
3
4 use strict;
5
6 # Koha modules used
7 use MARC::File::USMARC;
8 use MARC::Record;
9 use MARC::Batch;
10 use C4::Context;
11 use C4::Biblio;
12 use C4::AuthoritiesMarc;
13 use Time::HiRes qw(gettimeofday);
14
15 use Getopt::Long;
16 my ( $input_marc_file, $number) = ('',0);
17 my ($version, $verbose, $test_parameter, $confirm,$delete);
18 GetOptions(
19     'h' => \$version,
20     'd' => \$delete,
21     't' => \$test_parameter,
22     'v' => \$verbose,
23     'c' => \$confirm,
24 );
25
26 if ($version or !$confirm) {
27         print <<EOF
28 small script to recreate a authority table into Koha.
29 This will parse all your biblios to recreate isbn / editor / collections for the unimarc_210c and unimarc_225a plugins.
30
31 Remember those plugins will work only if you have an EDITORS authority type, with
32 \t200a being the first 2 parts of an ISBN
33 \t200b being the editor name
34 \t200c (repeatable) being the series title
35
36 parameters :
37 \t-c : confirmation flag. the script will run only with this flag. Otherwise, it will just show this help screen.
38 \t-d : delete existing EDITORS before rebuilding them
39 \t-t : test parameters : run the script but don't create really the EDITORS
40 EOF
41 ;#'
42
43 exit;
44 }
45
46 my $dbh = C4::Context->dbh;
47 if ($delete) {
48         print "deleting EDITORS\n";
49         my $del1 = $dbh->prepare("delete from auth_subfield_table where authid=?");
50         my $del2 = $dbh->prepare("delete from auth_word where authid=?");
51         my $sth = $dbh->prepare("select authid from auth_header where authtypecode='EDITORS'");
52         $sth->execute;
53         while (my ($authid) = $sth->fetchrow) {
54                 $del1->execute($authid);
55                 $del2->execute($authid);
56         }
57         $dbh->do("delete from auth_header where authtypecode='EDITORS'");
58 }
59
60 if ($test_parameter) {
61         print "TESTING MODE ONLY\n    DOING NOTHING\n===============\n";
62 }
63 $|=1; # flushes output
64 my $starttime = gettimeofday;
65 my $sth = $dbh->prepare("select bibid from marc_biblio");
66 $sth->execute;
67 my $i=1;
68 my %alreadydone;
69 my $counter;
70 my %hash;
71 while (my ($bibid) = $sth->fetchrow) {
72         my $record = GetMarcBiblio($bibid);
73         my $isbnField = $record->field('010');
74         next unless $isbnField;
75         my $isbn=$isbnField->subfield('a');
76         my $seg1;
77         if(substr($isbn, 0, 1) <=7) {
78                 $seg1 = substr($isbn, 0, 1);
79         } elsif(substr($isbn, 0, 2) <= 94) {
80                 $seg1 = substr($isbn, 0, 2);
81         } elsif(substr($isbn, 0, 3) <= 995) {
82                 $seg1 = substr($isbn, 0, 3);
83         } elsif(substr($isbn, 0, 4) <= 9989) {
84                 $seg1 = substr($isbn, 0, 4);
85         } else {
86                 $seg1 = substr($isbn, 0, 5);
87         }
88         my $x = substr($isbn, length($seg1));
89         my $seg2;
90         if(substr($x, 0, 2) <= 19) {
91 #               if(sTmp2 < 10) sTmp2 = "0" sTmp2;
92                 $seg2 = substr($x, 0, 2);
93         } elsif(substr($x, 0, 3) <= 699) {
94                 $seg2 = substr($x, 0, 3);
95         } elsif(substr($x, 0, 4) <= 8399) {
96                 $seg2 = substr($x, 0, 4);
97         } elsif(substr($x, 0, 5) <= 89999) {
98                 $seg2 = substr($x, 0, 5);
99         } elsif(substr($x, 0, 6) <= 9499999) {
100                 $seg2 = substr($x, 0, 6);
101         } else {
102                 $seg2 = substr($x, 0, 7);
103         }
104         $counter++;
105         print ".";
106         my $timeneeded = gettimeofday - $starttime;
107         print "$counter in $timeneeded s\n" unless ($counter % 100);
108         
109         my $field = $record->field('210');
110         my $editor;
111         $editor=$field->subfield('c') if $field;
112         
113         $field = $record->field('225');
114         my $collection;
115         $collection=$field->subfield('a') if $field;
116         
117 #       print "WARNING : editor empty for ".$record->as_formatted unless $editor and !$verbose;
118
119         $hash{$seg1.$seg2}->{editors} = $editor unless ($hash{$seg1.$seg2}->{editors});
120         $hash{$seg1.$seg2}->{collections}->{$collection}++ if $collection;
121 }
122
123 foreach my $isbnstart (sort keys %hash) {
124         print "$isbnstart -- ".$hash{$isbnstart}->{editors} if $verbose;
125         my $collections = $hash{$isbnstart}->{collections};
126         my $seriestitlelist;
127         foreach my $collection (sort keys %$collections) {
128                 print " CC $collection : ".$collections->{$collection} if $verbose;
129                 $seriestitlelist.=$collection."|";
130         }
131         my $authorityRecord = MARC::Record->new();
132         my $newfield = MARC::Field->new(200,'','','a' => "".$isbnstart,
133                                                                                                 'b' => "".$hash{$isbnstart}->{editors},
134                                                                                                 'c' => "".$seriestitlelist);
135         $authorityRecord->insert_fields_ordered($newfield);
136         my $authid=AUTHaddauthority($dbh,$authorityRecord,'','EDITORS');
137
138 #       print $authorityRecord->as_formatted."\n";
139         print "\n" if $verbose;
140 }
141 exit;
142
143 #       my $timeneeded = gettimeofday - $starttime;
144 #       print "$i in $timeneeded s\n" unless ($i % 50);
145 #       foreach my $field ($record->field(995)) {
146 #               $record->delete_field($field);
147 #       }
148 #       my $totdone=0;
149 #       my $authid;
150 #       foreach my $fieldnumber (('710','711','712')) {
151 #               foreach my $field ($record->field($fieldnumber)) {
152 #       #               print "=>".$field->as_formatted."\n";
153 #                       foreach my $authentry ($field->subfield("a")) {
154 #                               my $hashentry = $authentry;
155 #                               # la particularit�de ce script l� c'est que l'entr� dans la table d'autorit�est $a -- $b (et pas $x -- $x -- $x -- $a comme pour les autorit� NC)
156 #                               # si n�essaire, compl�er avec le $c (n'existe pas dans le fichier que j'ai migr�avec cette moulinette
157 #                               # supprimer les accents, certaines entr�s sont sans, d'autres avec !
158 #                               # mysql ne diff�encie pas, mais les hash perl oui !
159 #                               $hashentry =~ s/���e/g;
160 #                               $hashentry =~ s/��a/g;
161 #                               $hashentry =~ s/�i/g;
162 #                               $hashentry =~ s/�o/g;
163 #                               $hashentry =~ s/|/u/g;
164 #                               $hashentry = uc($hashentry);
165 #                               print "==>$hashentry" if $hashentry =~ /.*ETATS.*/;
166 #                               $totdone++;
167 #                               if ($alreadydone{$hashentry}) {
168 #                                       $authid = $alreadydone{$hashentry};
169 #                                       print ".";
170 #                               } else {
171 #                                       print "*";
172 #                                       #create authority.
173 #                                       my $authorityRecord = MARC::Record->new();
174 #                                       my $newfield = MARC::Field->new(210,'','','a' => "".$authentry, 
175 #                                                                                               'b' => "".$field->subfield('b'),
176 #                                                                                               'c' => "".$field->subfield('c'),
177 #                                                                                               );
178 #                                       $authorityRecord->insert_fields_ordered($newfield);
179 #                                       $authid=AUTHaddauthority($dbh,$authorityRecord,'','CO');
180 #                                       $alreadydone{$hashentry} = $authid;
181 #                                       # OK, on garde la notice d'autorit� on cherche les notices biblio et on les met �jour...
182 #                                       if ($fieldnumber eq '710') {
183 #                                               $sthBIBLIOS710->execute($authentry);
184 #                                               while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS710->fetchrow) {
185 #                                                       my $inbiblio = GetMarcBiblio($bibid);
186 #                                                       my $isOK = 0;
187 #                                                       foreach my $in7xx ($inbiblio->field($fieldnumber)) {
188 #                                                               # !!!!! ici, il faut reconstruire l'entr� de la table de hachage comme ci dessus
189 #                                                               # sinon, 
190 #                                                               my $inEntry = $in7xx->subfield('a');
191 #                                                               $inEntry =~ s/���e/g;
192 #                                                               $inEntry =~ s/��a/g;
193 #                                                               $inEntry =~ s/�i/g;
194 #                                                               $inEntry =~ s/�o/g;
195 #                                                               $inEntry =~ s/|/u/g;
196 #                                                               $inEntry = uc($inEntry);
197 #                                                               $isOK=1 if $inEntry eq $hashentry;
198 #                                                       }
199 #                                                       C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
200 #                                               }
201 #                                       }
202 #                                       if ($fieldnumber eq '711') {
203 #                                               $sthBIBLIOS711->execute($authentry);
204 #                                               while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS711->fetchrow) {
205 #                                                       my $inbiblio = GetMarcBiblio($bibid);
206 #                                                       my $isOK = 0;
207 #                                                       foreach my $in7xx ($inbiblio->field($fieldnumber)) {
208 #                                                               # !!!!! ici, il faut reconstruire l'entr� de la table de hachage comme ci dessus
209 #                                                               # sinon, 
210 #                                                               my $inEntry = $in7xx->subfield('a');
211 #                                                               $inEntry =~ s/���e/g;
212 #                                                               $inEntry =~ s/��a/g;
213 #                                                               $inEntry =~ s/�i/g;
214 #                                                               $inEntry =~ s/�o/g;
215 #                                                               $inEntry =~ s/|/u/g;
216 #                                                               $inEntry = uc($inEntry);
217 #                                                               $isOK=1 if $inEntry eq $hashentry;
218 #                                                       }
219 #                                                       C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
220 #                                               }
221 #                                       }
222 #                                       if ($fieldnumber eq '712') {
223 #                                               $sthBIBLIOS712->execute($authentry);
224 #                                               while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS712->fetchrow) {
225 #                                                       my $inbiblio = GetMarcBiblio($bibid);
226 #                                                       my $isOK = 0;
227 #                                                       foreach my $in7xx ($inbiblio->field($fieldnumber)) {
228 #                                                               # !!!!! ici, il faut reconstruire l'entr� de la table de hachage comme ci dessus
229 #                                                               # sinon, 
230 #                                                               my $inEntry = $in7xx->subfield('a');
231 #                                                               $inEntry =~ s/���e/g;
232 #                                                               $inEntry =~ s/��a/g;
233 #                                                               $inEntry =~ s/�i/g;
234 #                                                               $inEntry =~ s/�o/g;
235 #                                                               $inEntry =~ s/|/u/g;
236 #                                                               $inEntry = uc($inEntry);
237 #                                                               $isOK=1 if $inEntry eq $hashentry;
238 #                                                       }
239 #                                                       C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
240 #                                               }
241 #                                       }
242 #                               }
243 #                       }
244 #               }
245 #       }
246 #       $i++;
247 # }
248 # my $timeneeded = gettimeofday - $starttime;
249 # print "$i entries done in $timeneeded seconds (".($i/$timeneeded)." per second)\n";