misc/build_marc_word.pl

   1 #!/usr/bin/perl -w
   2 #-----------------------------------
   3 # Script Name: build_marc_word.pl
   4 # Script Version: 0.1.0
   5 # Date:  2004/06/05
   6 # Author:  Joshua Ferraro [jmf at kados dot org]
   7 # Description: This script builds a new marc_word
   8 #  table with a reduced number of tags (only those
   9 #  tags that should be searched) allowing for
  10 #  faster and more accurate searching when used
  11 #  with the SearchMarc routines.  Make sure that
  12 #  the MARCaddword routine in Biblio.pm will index
  13 #  characters >= 1 char; otherwise, searches like
  14 #  "O'brian, Patrick" will fail as the search
  15 #  routines will seperate that query into "o",
  16 #  "brian", and "patrick".  (If "o" is not in the
  17 #  database the search will fail)
  18 # Usage: build_marc_word.pl
  19 # Revision History:
  20 #    0.1.0  2004/06/11:  first working version.
  21 #                        Thanks to Chris Cormack
  22 #                        for helping with the $data object
  23 #                        and Stephen Hedges for providing
  24 #                        the list of MARC tags.
  25 # FixMe:
  26 #   *Should add a few parameters like 'delete from
  27 #    marc_word' or make script ask user whether to
  28 #    perform that task ...
  29 #   *Add a 'status' report as the data is loaded ...
  30 #-----------------------------------
  31 use lib '/usr/local/koha/intranet/modules/';
  32 use strict;
  33 use C4::Context;
  34 use C4::Biblio;
  35 my $dbh=C4::Context->dbh;
  36
  37 #Here is where you name the tags that you wish to index.  If you
  38 # are using MARC21 this set of default tags should be fine but you
  39 # may need to add holdings tags specific to your library (e.g., holding
  40 # branch for Nelsonville is 942k but that may not be the case for your
  41 # library).
  42 my @tags=(
  43
  44 #Tag documentation from http://lcweb.loc.gov/marc/bibliographic/ecbdhome.html
  45
  46 "020a", # INTERNATIONAL STANDARD BOOK NUMBER
  47 "022a", # INTERNATIONAL STANDARD SERIAL NUMBER
  48 "100a", # MAIN ENTRY--PERSONAL NAME
  49 "110a", # MAIN ENTRY--CORPORATE NAME
  50 "110b", #   Subordinate unit
  51 "110c", #   Location of meeting
  52 "111a", # MAIN ENTRY--MEETING NAME
  53 "111c", #   Location of meeting
  54 "130a", # MAIN ENTRY--UNIFORM TITLE
  55 "240a", # UNIFORM TITLE
  56 "245a", # TITLE STATEMENT
  57 "245b", #   Remainder of title
  58 "245c", #   Statement of responsibility, etc.
  59 "245p", #   Name of part/section of a work
  60 "246a", # VARYING FORM OF TITLE
  61 "246b", #   Remainder of title
  62 "260b", # PUBLICATION, DISTRIBUTION, ETC. (IMPRINT)
  63 "440a", # SERIES STATEMENT/ADDED ENTRY--TITLE
  64 "440p", #   Name of part/section of a work
  65 "500a", # GENERAL NOTE
  66 "505t", # FORMATTED CONTENTS NOTE (t is Title)
  67 "511a", # PARTICIPANT OR PERFORMER NOTE
  68 "520a", # SUMMARY, ETC.
  69 "534a", # ORIGINAL VERSION NOTE
  70 "534k", #   Key title of original
  71 "534t", #   Title statement of original
  72 "586a", # AWARDS NOTE
  73 "600a", # SUBJECT ADDED ENTRY--PERSONAL NAME
  74 "610a", # SUBJECT ADDED ENTRY--CORPORATE NAME
  75 "611a", # SUBJECT ADDED ENTRY--MEETING NAME
  76 "630a", # SUBJECT ADDED ENTRY--UNIFORM TITLE
  77 "650a", # SUBJECT ADDED ENTRY--TOPICAL TERM
  78 "651a", # SUBJECT ADDED ENTRY--GEOGRAPHIC NAME
  79 "700a", # ADDED ENTRY--PERSONAL NAME
  80 "710a", # ADDED ENTRY--CORPORATE NAME
  81 "711a", # ADDED ENTRY--MEETING NAME
  82 "720a", # ADDED ENTRY--UNCONTROLLED NAME
  83 "730a", # ADDED ENTRY--UNIFORM TITLE
  84 "740a", # ADDED ENTRY--UNCONTROLLED RELATED/ANALYTICAL TITLE
  85 "752a", # ADDED ENTRY--HIERARCHICAL PLACE NAME
  86 "800a", # SERIES ADDED ENTRY--PERSONAL NAME
  87 "810a", # SERIES ADDED ENTRY--CORPORATE NAME
  88 "811a", # SERIES ADDED ENTRY--MEETING NAME
  89 "830a", # SERIES ADDED ENTRY--UNIFORM TITLE
  90 "942k"  # Holdings Branch ?? Unique to NPL??
  91 );
  92
  93 #note that subfieldcode in marc_subfield_table is subfieldid in marc_word ... even
  94 #though there is another subfieldid in marc_subfield_table--very confusing naming conventions!
  95
  96 #For each tag we run a search to find the necessary data for building the marc_word table
  97 foreach my $this_tagid(@tags) {
  98         my $query="SELECT bibid,tag,tagorder,subfieldcode,subfieldorder,subfieldvalue FROM marc_subfield_table WHERE tag=? AND subfieldcode=?";
  99         my $sth=$dbh->prepare($query);
 100
 101         my ($tag, $subfieldid);
 102
 103 #split the tag into tag, subfield
 104         if ($this_tagid =~ s/(\D+)//) {
 105                 $subfieldid = $1;
 106                 $tag = $this_tagid;
 107         }
 108 #Then we pass this information on to MARCaddword in Biblio.pm to actually perform the import into marc_word
 109         $sth->execute($tag, $subfieldid);
 110         while (my $data=$sth->fetchrow_hashref()){
 111                 MARCaddword($dbh,$data->{'bibid'},$data->{'tag'},$data->{'tagorder'},$data->{'subfieldcode'},$data->{'subfieldorder'},$data->{'subfieldvalue'});
 112         }
 113 }
 114 $dbh->disconnect();