From 1b33f5f63a002a377afe383ea37cb448211f11ea Mon Sep 17 00:00:00 2001 From: joshferraro Date: Fri, 11 Jun 2004 15:07:48 +0000 Subject: [PATCH] This script builds a new marc_word table with a reduced number of tags (only those tags that should be searched) allowing for faster and more accurate searching when used with the SearchMarc routines. Make sure that the MARCaddword routine in Biblio.pm will index characters >= 1 char; otherwise, searches like "O'brian, Patrick" will fail as the search routines will seperate that query into "o", "brian", and "patrick". (If "o" is not in the database the search will fail) --- misc/build_marc_word.pl | 114 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100755 misc/build_marc_word.pl diff --git a/misc/build_marc_word.pl b/misc/build_marc_word.pl new file mode 100755 index 0000000000..d17df6f553 --- /dev/null +++ b/misc/build_marc_word.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl -w +#----------------------------------- +# Script Name: build_marc_word.pl +# Script Version: 0.1.0 +# Date: 2004/06/05 +# Author: Joshua Ferraro [jmf at kados dot org] +# Description: This script builds a new marc_word +# table with a reduced number of tags (only those +# tags that should be searched) allowing for +# faster and more accurate searching when used +# with the SearchMarc routines. Make sure that +# the MARCaddword routine in Biblio.pm will index +# characters >= 1 char; otherwise, searches like +# "O'brian, Patrick" will fail as the search +# routines will seperate that query into "o", +# "brian", and "patrick". (If "o" is not in the +# database the search will fail) +# Usage: build_marc_word.pl +# Revision History: +# 0.1.0 2004/06/11: first working version. +# Thanks to Chris Cormack +# for helping with the $data object +# and Stephen Hedges for providing +# the list of MARC tags. +# FixMe: +# *Should add a few parameters like 'delete from +# marc_word' or make script ask user whether to +# perform that task ... +# *Add a 'status' report as the data is loaded ... +#----------------------------------- +use lib '/usr/local/koha/intranet/modules/'; +use strict; +use C4::Context; +use C4::Biblio; +my $dbh=C4::Context->dbh; + +#Here is where you name the tags that you wish to index. If you +# are using MARC21 this set of default tags should be fine but you +# may need to add holdings tags specific to your library (e.g., holding +# branch for Nelsonville is 942k but that may not be the case for your +# library). +my @tags=( + +#Tag documentation from http://lcweb.loc.gov/marc/bibliographic/ecbdhome.html + +"020a", # INTERNATIONAL STANDARD BOOK NUMBER +"022a", # INTERNATIONAL STANDARD SERIAL NUMBER +"100a", # MAIN ENTRY--PERSONAL NAME +"110a", # MAIN ENTRY--CORPORATE NAME +"110b", # Subordinate unit +"110c", # Location of meeting +"111a", # MAIN ENTRY--MEETING NAME +"111c", # Location of meeting +"130a", # MAIN ENTRY--UNIFORM TITLE +"240a", # UNIFORM TITLE +"245a", # TITLE STATEMENT +"245b", # Remainder of title +"245c", # Statement of responsibility, etc. +"245p", # Name of part/section of a work +"246a", # VARYING FORM OF TITLE +"246b", # Remainder of title +"260b", # PUBLICATION, DISTRIBUTION, ETC. (IMPRINT) +"440a", # SERIES STATEMENT/ADDED ENTRY--TITLE +"440p", # Name of part/section of a work +"500a", # GENERAL NOTE +"505t", # FORMATTED CONTENTS NOTE (t is Title) +"511a", # PARTICIPANT OR PERFORMER NOTE +"520a", # SUMMARY, ETC. +"534a", # ORIGINAL VERSION NOTE +"534k", # Key title of original +"534t", # Title statement of original +"586a", # AWARDS NOTE +"600a", # SUBJECT ADDED ENTRY--PERSONAL NAME +"610a", # SUBJECT ADDED ENTRY--CORPORATE NAME +"611a", # SUBJECT ADDED ENTRY--MEETING NAME +"630a", # SUBJECT ADDED ENTRY--UNIFORM TITLE +"650a", # SUBJECT ADDED ENTRY--TOPICAL TERM +"651a", # SUBJECT ADDED ENTRY--GEOGRAPHIC NAME +"700a", # ADDED ENTRY--PERSONAL NAME +"710a", # ADDED ENTRY--CORPORATE NAME +"711a", # ADDED ENTRY--MEETING NAME +"720a", # ADDED ENTRY--UNCONTROLLED NAME +"730a", # ADDED ENTRY--UNIFORM TITLE +"740a", # ADDED ENTRY--UNCONTROLLED RELATED/ANALYTICAL TITLE +"752a", # ADDED ENTRY--HIERARCHICAL PLACE NAME +"800a", # SERIES ADDED ENTRY--PERSONAL NAME +"810a", # SERIES ADDED ENTRY--CORPORATE NAME +"811a", # SERIES ADDED ENTRY--MEETING NAME +"830a", # SERIES ADDED ENTRY--UNIFORM TITLE +"942k" # Holdings Branch ?? Unique to NPL?? +); + +#note that subfieldcode in marc_subfield_table is subfieldid in marc_word ... even +#though there is another subfieldid in marc_subfield_table--very confusing naming conventions! + +#For each tag we run a search to find the necessary data for building the marc_word table +foreach my $this_tagid(@tags) { + my $query="SELECT bibid,tag,tagorder,subfieldcode,subfieldorder,subfieldvalue FROM marc_subfield_table WHERE tag=? AND subfieldcode=?"; + my $sth=$dbh->prepare($query); + + my ($tag, $subfieldid); + +#split the tag into tag, subfield + if ($this_tagid =~ s/(\D+)//) { + $subfieldid = $1; + $tag = $this_tagid; + } +#Then we pass this information on to MARCaddword in Biblio.pm to actually perform the import into marc_word + $sth->execute($tag, $subfieldid); + while (my $data=$sth->fetchrow_hashref()){ + MARCaddword($dbh,$data->{'bibid'},$data->{'tag'},$data->{'tagorder'},$data->{'subfieldcode'},$data->{'subfieldorder'},$data->{'subfieldvalue'}); + } +} +$dbh->disconnect(); -- 2.39.5