2 #-----------------------------------
3 # Script Name: build_marc_word.pl
4 # Script Version: 0.1.0
6 # Author: Joshua Ferraro [jmf at kados dot org]
7 # Description: This script builds a new marc_word
8 # table with a reduced number of tags (only those
9 # tags that should be searched) allowing for
10 # faster and more accurate searching when used
11 # with the SearchMarc routines. Make sure that
12 # the MARCaddword routine in Biblio.pm will index
13 # characters >= 1 char; otherwise, searches like
14 # "O'brian, Patrick" will fail as the search
15 # routines will seperate that query into "o",
16 # "brian", and "patrick". (If "o" is not in the
17 # database the search will fail)
18 # Usage: build_marc_word.pl
20 # 0.1.0 2004/06/11: first working version.
21 # Thanks to Chris Cormack
22 # for helping with the $data object
23 # and Stephen Hedges for providing
24 # the list of MARC tags.
26 # *Should add a few parameters like 'delete from
27 # marc_word' or make script ask user whether to
28 # perform that task ...
29 # *Add a 'status' report as the data is loaded ...
30 #-----------------------------------
31 use lib '/usr/local/koha/intranet/modules/';
35 my $dbh=C4::Context->dbh;
37 #Here is where you name the tags that you wish to index. If you
38 # are using MARC21 this set of default tags should be fine but you
39 # may need to add holdings tags specific to your library (e.g., holding
40 # branch for Nelsonville is 942k but that may not be the case for your
44 #Tag documentation from http://lcweb.loc.gov/marc/bibliographic/ecbdhome.html
46 "020a", # INTERNATIONAL STANDARD BOOK NUMBER
47 "022a", # INTERNATIONAL STANDARD SERIAL NUMBER
48 "100a", # MAIN ENTRY--PERSONAL NAME
49 "110a", # MAIN ENTRY--CORPORATE NAME
50 "110b", # Subordinate unit
51 "110c", # Location of meeting
52 "111a", # MAIN ENTRY--MEETING NAME
53 "111c", # Location of meeting
54 "130a", # MAIN ENTRY--UNIFORM TITLE
55 "240a", # UNIFORM TITLE
56 "245a", # TITLE STATEMENT
57 "245b", # Remainder of title
58 "245c", # Statement of responsibility, etc.
59 "245p", # Name of part/section of a work
60 "246a", # VARYING FORM OF TITLE
61 "246b", # Remainder of title
62 "260b", # PUBLICATION, DISTRIBUTION, ETC. (IMPRINT)
63 "440a", # SERIES STATEMENT/ADDED ENTRY--TITLE
64 "440p", # Name of part/section of a work
65 "500a", # GENERAL NOTE
66 "505t", # FORMATTED CONTENTS NOTE (t is Title)
67 "511a", # PARTICIPANT OR PERFORMER NOTE
68 "520a", # SUMMARY, ETC.
69 "534a", # ORIGINAL VERSION NOTE
70 "534k", # Key title of original
71 "534t", # Title statement of original
73 "600a", # SUBJECT ADDED ENTRY--PERSONAL NAME
74 "610a", # SUBJECT ADDED ENTRY--CORPORATE NAME
75 "611a", # SUBJECT ADDED ENTRY--MEETING NAME
76 "630a", # SUBJECT ADDED ENTRY--UNIFORM TITLE
77 "650a", # SUBJECT ADDED ENTRY--TOPICAL TERM
78 "651a", # SUBJECT ADDED ENTRY--GEOGRAPHIC NAME
79 "700a", # ADDED ENTRY--PERSONAL NAME
80 "710a", # ADDED ENTRY--CORPORATE NAME
81 "711a", # ADDED ENTRY--MEETING NAME
82 "720a", # ADDED ENTRY--UNCONTROLLED NAME
83 "730a", # ADDED ENTRY--UNIFORM TITLE
84 "740a", # ADDED ENTRY--UNCONTROLLED RELATED/ANALYTICAL TITLE
85 "752a", # ADDED ENTRY--HIERARCHICAL PLACE NAME
86 "800a", # SERIES ADDED ENTRY--PERSONAL NAME
87 "810a", # SERIES ADDED ENTRY--CORPORATE NAME
88 "811a", # SERIES ADDED ENTRY--MEETING NAME
89 "830a", # SERIES ADDED ENTRY--UNIFORM TITLE
90 "942k" # Holdings Branch ?? Unique to NPL??
93 #note that subfieldcode in marc_subfield_table is subfieldid in marc_word ... even
94 #though there is another subfieldid in marc_subfield_table--very confusing naming conventions!
96 #For each tag we run a search to find the necessary data for building the marc_word table
97 foreach my $this_tagid(@tags) {
98 my $query="SELECT bibid,tag,tagorder,subfieldcode,subfieldorder,subfieldvalue FROM marc_subfield_table WHERE tag=? AND subfieldcode=?";
99 my $sth=$dbh->prepare($query);
101 my ($tag, $subfieldid);
103 #split the tag into tag, subfield
104 if ($this_tagid =~ s/(\D+)//) {
108 #Then we pass this information on to MARCaddword in Biblio.pm to actually perform the import into marc_word
109 $sth->execute($tag, $subfieldid);
110 while (my $data=$sth->fetchrow_hashref()){
111 MARCaddword($dbh,$data->{'bibid'},$data->{'tag'},$data->{'tagorder'},$data->{'subfieldcode'},$data->{'subfieldorder'},$data->{'subfieldvalue'});