2 # script that rebuild thesaurus from biblio table.
4 # delete FROM `marc_subfield_table` WHERE tag = "606" AND subfieldcode = 9;
8 use MARC::File::USMARC;
13 use C4::AuthoritiesMarc;
14 use Time::HiRes qw(gettimeofday);
17 my ( $input_marc_file, $number) = ('',0);
18 my ($version, $verbose, $test_parameter, $field,$delete,$category,$subfields);
22 't' => \$test_parameter,
28 if ($version || ($category eq '')) {
30 small script to recreate a authority table into Koha.
32 \tc : thesaurus category. Can be filled with anything, the NC is hardcoded. But mandatory to confirm that you want to rebuild 6xx
33 \d : delete every entry of the selected category before doing work.
36 ./build6xx.pl -c NC -d
42 my $dbh = C4::Context->dbh;
43 my @subf = $subfields =~ /(##\d\d\d##.)/g;
45 print "deleting thesaurus\n";
46 my $del1 = $dbh->prepare("delete from auth_subfield_table where authid=?");
47 my $del2 = $dbh->prepare("delete from auth_word where authid=?");
48 my $sth = $dbh->prepare("select authid from auth_header where authtypecode='NC'");
50 while (my ($authid) = $sth->fetchrow) {
51 $del1->execute($authid);
52 $del2->execute($authid);
54 $dbh->do("delete from auth_header where authtypecode='NC'");
55 $dbh->do("delete from marc_subfield_table where tag='606' and subfieldcode='9'");
56 $dbh->do("delete from marc_word where tagsubfield='6069'");
59 if ($test_parameter) {
60 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
62 $|=1; # flushes output
63 my $starttime = gettimeofday;
64 my $sth = $dbh->prepare("select bibid from marc_biblio");
69 # search biblios to "connect" to an authority with any number of $x (limited to 4 $x in this script)
70 my $sthBIBLIOS = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 where tag in (606) and subfieldcode='a' and subfieldvalue=?");
71 my $sthBIBLIOSx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=?");
72 my $sthBIBLIOSxx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid left join marc_subfield_table as m3 on m1.bibid=m3.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m3.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=? and m3.subfieldvalue=?");
73 my $sthBIBLIOSxxx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid left join marc_subfield_table as m3 on m1.bibid=m4.bibid left join marc_subfield_table as m4 on m1.bibid=m4.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m3.subfieldcode='x' and m4.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=? and m3.subfieldvalue=? and m4.subfieldvalue=?");
74 my $sthBIBLIOSxxxx = $dbh->prepare("select distinct m1.bibid,m1.tag,m1.tagorder,m1.subfieldorder from marc_subfield_table as m1 left join marc_subfield_table as m2 on m1.bibid=m2.bibid left join marc_subfield_table as m3 on m1.bibid=m4.bibid left join marc_subfield_table as m4 on m1.bibid=m4.bibid left join marc_subfield_table as m5 on m1.bibid=m5.bibid where m1.tag in (606) and m1.subfieldcode='a' and m2.subfieldcode='x' and m3.subfieldcode='x' and m4.subfieldcode='x' and m5.subfieldcode='x' and m1.subfieldvalue=? and m2.subfieldvalue=? and m3.subfieldvalue=? and m4.subfieldvalue=? and m5.subfieldvalue=?");
76 # loop through each biblio
77 while (my ($bibid) = $sth->fetchrow) {
78 my $record = MARCgetbiblio($dbh,$bibid);
79 my $timeneeded = gettimeofday - $starttime;
80 print "$i in $timeneeded s\n" unless ($i % 50);
81 foreach my $field ($record->field(995)) {
82 $record->delete_field($field);
86 # search the 606 field(s)
87 foreach my $field ($record->field("606")) {
88 foreach my $authentry ($field->subfield("a")) {
89 # the hashentry variable contains all $x fields and the $a in a single string. Used to differenciate
90 # $xsomething$aelse and $asomething else
91 my $hashentry = $authentry;
92 foreach my $x ($field->subfield('x')) {
96 # all the same for mysql, but NOT for perl hashes !
97 # without those lines, tôt is not tot and patée is not patee
98 $hashentry =~ s/é|ê|è/e/g;
99 $hashentry =~ s/â|à/a/g;
100 $hashentry =~ s/î/i/g;
101 $hashentry =~ s/ô/o/g;
102 $hashentry =~ s/ù|û/u/g;
103 # uppercase all, in case of typing error.
104 $hashentry = uc($hashentry);
106 if ($alreadydone{$hashentry}) {
107 $authid = $alreadydone{$hashentry};
112 my $authorityRecord = MARC::Record->new();
113 my $newfield = MARC::Field->new(250,'','','a' => "".$authentry);
114 foreach my $x ($field->subfield('x')) {
115 $newfield->add_subfields('x' => $x);
117 foreach my $z ($field->subfield('z')) {
118 $newfield->add_subfields('z' => $z);
120 $authorityRecord->insert_fields_ordered($newfield);
121 $authid=AUTHaddauthority($dbh,$authorityRecord,'','NC');
122 $alreadydone{$hashentry} = $authid;
123 # we have the authority number, now we update all biblios that use this authority...
124 my @x = $field->subfield('x'); # depending on the number of $x in the subfield
125 if ($#x eq -1) { # no $x
126 $sthBIBLIOS->execute($authentry);
127 while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOS->fetchrow) {
128 # check that the field does not already have a $x (if it has, it will or has been managed by another authority
129 my $inbiblio = MARCgetbiblio($dbh,$bibid);
131 # loop in each 606 field
132 foreach my $in606 ($inbiblio->field('606')) {
133 my $inEntry = $in606->subfield('a');
134 # and rebuild the $x -- $x -- $a string (like for $hashentry, few lines before)
135 foreach my $x ($in606->subfield('x')) {
138 $inEntry =~ s/é|ê|è/e/g;
139 $inEntry =~ s/â|à/a/g;
142 $inEntry =~ s/ù|û/u/g;
143 $inEntry = uc($inEntry);
144 # ok, it's confirmed that we must add the $9 subfield for this biblio, so...
145 $isOK=1 if $inEntry eq $hashentry;
148 C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
151 if ($#x eq 0) { # one $x
152 $sthBIBLIOSx->execute($authentry,$x[0]);
153 while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSx->fetchrow) {
154 my $inbiblio = MARCgetbiblio($dbh,$bibid);
156 foreach my $in606 ($inbiblio->field('606')) {
157 my $inEntry = $in606->subfield('a');
158 foreach my $x ($in606->subfield('x')) {
161 $inEntry =~ s/é|ê|è/e/g;
162 $inEntry =~ s/â|à/a/g;
165 $inEntry =~ s/ù|û/u/g;
166 $inEntry = uc($inEntry);
167 $isOK=1 if $inEntry eq $hashentry;
169 C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
172 if ($#x eq 1) { # two $x
173 $sthBIBLIOSxx->execute($authentry,$x[0],$x[1]);
174 while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSxx->fetchrow) {
175 my $inbiblio = MARCgetbiblio($dbh,$bibid);
177 foreach my $in606 ($inbiblio->field('606')) {
178 my $inEntry = $in606->subfield('a');
179 foreach my $x ($in606->subfield('x')) {
182 $inEntry =~ s/é|ê|è/e/g;
183 $inEntry =~ s/â|à/a/g;
186 $inEntry =~ s/ù|û/u/g;
187 $inEntry = uc($inEntry);
188 $isOK=1 if $inEntry eq $hashentry;
190 C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
193 if ($#x eq 2) { # 3 $x
194 $sthBIBLIOSxxx->execute($authentry,$x[0],$x[1],$x[2]);
195 while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSxxx->fetchrow) {
196 my $inbiblio = MARCgetbiblio($dbh,$bibid);
198 foreach my $in606 ($inbiblio->field('606')) {
199 my $inEntry = $in606->subfield('a');
200 foreach my $x ($in606->subfield('x')) {
203 $inEntry =~ s/é|ê|è/e/g;
204 $inEntry =~ s/â|à/a/g;
207 $inEntry =~ s/ù|û/u/g;
208 $inEntry = uc($inEntry);
209 $isOK=1 if $inEntry eq $hashentry;
211 C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
214 if ($#x eq 3) { # 3 $x
215 $sthBIBLIOSxxxx->execute($authentry,$x[0],$x[1],$x[2],$x[3]);
216 while (my ($bibid,$tag,$tagorder,$subfieldorder) = $sthBIBLIOSxxxx->fetchrow) {
217 my $inbiblio = MARCgetbiblio($dbh,$bibid);
219 foreach my $in606 ($inbiblio->field('606')) {
220 my $inEntry = $in606->subfield('a');
221 foreach my $x ($in606->subfield('x')) {
224 $inEntry =~ s/é|ê|è/e/g;
225 $inEntry =~ s/â|à/a/g;
228 $inEntry =~ s/ù|û/u/g;
229 $inEntry = uc($inEntry);
230 $isOK=1 if $inEntry eq $hashentry;
232 C4::Biblio::MARCaddsubfield($dbh,$bibid,$tag,'',$tagorder,9,$subfieldorder,$authid) if $isOK;
236 # too many $x, not handled, warn the developper that tries to migrate
237 print "warning there is ".$#x.'$x values';
244 my $timeneeded = gettimeofday - $starttime;
245 print "$i entries done in $timeneeded seconds (".($i/$timeneeded)." per second)\n";