#!/usr/bin/perl -w #----------------------------------- # Script Name: build_marc_word.pl # Script Version: 0.1.0 # Date: 2004/06/05 # Author: Joshua Ferraro [jmf at kados dot org] # Description: This script builds a new marc_word # table with a reduced number of tags (only those # tags that should be searched) allowing for # faster and more accurate searching when used # with the SearchMarc routines. Make sure that # the MARCaddword routine in Biblio.pm will index # characters >= 1 char; otherwise, searches like # "O'brian, Patrick" will fail as the search # routines will seperate that query into "o", # "brian", and "patrick". (If "o" is not in the # database the search will fail) # Usage: build_marc_word.pl # Revision History: # 0.1.0 2004/06/11: first working version. # Thanks to Chris Cormack # for helping with the $data object # and Stephen Hedges for providing # the list of MARC tags. # FixMe: # *Should add a few parameters like 'delete from # marc_word' or make script ask user whether to # perform that task ... # *Add a 'status' report as the data is loaded ... #----------------------------------- use lib '/usr/local/koha/intranet/modules/'; use strict; use C4::Context; use C4::Biblio; my $dbh=C4::Context->dbh; #Here is where you name the tags that you wish to index. If you # are using MARC21 this set of default tags should be fine but you # may need to add holdings tags specific to your library (e.g., holding # branch for Nelsonville is 942k but that may not be the case for your # library). my @tags=( #Tag documentation from http://lcweb.loc.gov/marc/bibliographic/ecbdhome.html "020a", # INTERNATIONAL STANDARD BOOK NUMBER "022a", # INTERNATIONAL STANDARD SERIAL NUMBER "100a", # MAIN ENTRY--PERSONAL NAME "110a", # MAIN ENTRY--CORPORATE NAME "110b", # Subordinate unit "110c", # Location of meeting "111a", # MAIN ENTRY--MEETING NAME "111c", # Location of meeting "130a", # MAIN ENTRY--UNIFORM TITLE "240a", # UNIFORM TITLE "245a", # TITLE STATEMENT "245b", # Remainder of title "245c", # Statement of responsibility, etc. "245p", # Name of part/section of a work "246a", # VARYING FORM OF TITLE "246b", # Remainder of title "260b", # PUBLICATION, DISTRIBUTION, ETC. (IMPRINT) "440a", # SERIES STATEMENT/ADDED ENTRY--TITLE "440p", # Name of part/section of a work "500a", # GENERAL NOTE "505t", # FORMATTED CONTENTS NOTE (t is Title) "511a", # PARTICIPANT OR PERFORMER NOTE "520a", # SUMMARY, ETC. "534a", # ORIGINAL VERSION NOTE "534k", # Key title of original "534t", # Title statement of original "586a", # AWARDS NOTE "600a", # SUBJECT ADDED ENTRY--PERSONAL NAME "610a", # SUBJECT ADDED ENTRY--CORPORATE NAME "611a", # SUBJECT ADDED ENTRY--MEETING NAME "630a", # SUBJECT ADDED ENTRY--UNIFORM TITLE "650a", # SUBJECT ADDED ENTRY--TOPICAL TERM "651a", # SUBJECT ADDED ENTRY--GEOGRAPHIC NAME "700a", # ADDED ENTRY--PERSONAL NAME "710a", # ADDED ENTRY--CORPORATE NAME "711a", # ADDED ENTRY--MEETING NAME "720a", # ADDED ENTRY--UNCONTROLLED NAME "730a", # ADDED ENTRY--UNIFORM TITLE "740a", # ADDED ENTRY--UNCONTROLLED RELATED/ANALYTICAL TITLE "752a", # ADDED ENTRY--HIERARCHICAL PLACE NAME "800a", # SERIES ADDED ENTRY--PERSONAL NAME "810a", # SERIES ADDED ENTRY--CORPORATE NAME "811a", # SERIES ADDED ENTRY--MEETING NAME "830a", # SERIES ADDED ENTRY--UNIFORM TITLE "942k" # Holdings Branch ?? Unique to NPL?? ); #note that subfieldcode in marc_subfield_table is subfieldid in marc_word ... even #though there is another subfieldid in marc_subfield_table--very confusing naming conventions! #For each tag we run a search to find the necessary data for building the marc_word table foreach my $this_tagid(@tags) { my $query="SELECT bibid,tag,tagorder,subfieldcode,subfieldorder,subfieldvalue FROM marc_subfield_table WHERE tag=? AND subfieldcode=?"; my $sth=$dbh->prepare($query); my ($tag, $subfieldid); #split the tag into tag, subfield if ($this_tagid =~ s/(\D+)//) { $subfieldid = $1; $tag = $this_tagid; } #Then we pass this information on to MARCaddword in Biblio.pm to actually perform the import into marc_word $sth->execute($tag, $subfieldid); while (my $data=$sth->fetchrow_hashref()){ MARCaddword($dbh,$data->{'bibid'},$data->{'tag'},$data->{'tagorder'},$data->{'subfieldcode'},$data->{'subfieldorder'},$data->{'subfieldvalue'}); } } $dbh->disconnect();