From 84bf354eea295fe7cac3d4fa69df877c162e3491 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 23 Mar 2012 18:52:27 -0400 Subject: [PATCH] Bug 7818: update installer for biblio DOM indexing Adds the necessary bits to enable DOM indexing for bib records as an option during installation from source. Signed-off-by: Galen Charlton Signed-off-by: Jared Camins-Esakov Signed-off-by: Paul Poulain Signed-off-by: Chris Cormack --- Makefile.PL | 22 +++++++++ etc/koha-conf.xml | 16 ++++-- etc/zebradb/biblios/etc/dom-config-marc.xml | 54 ++++++++++++++++++++ etc/zebradb/biblios/etc/dom-config.xml | 54 ++++++++++++++++++++ etc/zebradb/retrieval-info-bib-dom.xml | 22 +++++++++ etc/zebradb/retrieval-info-bib-grs1.xml | 40 +++++++++++++++ etc/zebradb/zebra-biblios-dom.cfg | 55 +++++++++++++++++++++ misc/koha-install-log | 4 +- rewrite-config.PL | 3 ++ 9 files changed, 266 insertions(+), 4 deletions(-) create mode 100644 etc/zebradb/biblios/etc/dom-config-marc.xml create mode 100644 etc/zebradb/biblios/etc/dom-config.xml create mode 100644 etc/zebradb/retrieval-info-bib-dom.xml create mode 100644 etc/zebradb/retrieval-info-bib-grs1.xml create mode 100644 etc/zebradb/zebra-biblios-dom.cfg diff --git a/Makefile.PL b/Makefile.PL index 3c1504f3a9..9ac147405e 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -409,6 +409,7 @@ my %config_defaults = ( 'INSTALL_SRU' => 'yes', 'INSTALL_PAZPAR2' => 'no', 'AUTH_INDEX_MODE' => 'dom', + 'BIB_INDEX_MODE' => 'dom', 'ZEBRA_MARC_FORMAT' => 'marc21', 'ZEBRA_LANGUAGE' => 'en', 'ZEBRA_TOKENIZER' => 'chr', @@ -461,6 +462,7 @@ my %valid_config_values = ( 'INSTALL_ZEBRA' => { 'yes' => 1, 'no' => 1 }, 'INSTALL_SRU' => { 'yes' => 1, 'no' => 1 }, 'AUTH_INDEX_MODE' => { 'grs1' => 1, 'dom' => 1 }, + 'BIB_INDEX_MODE' => { 'grs1' => 1, 'dom' => 1 }, 'ZEBRA_MARC_FORMAT' => { 'marc21' => 1, 'normarc' => 1, 'unimarc' => 1 }, # FIXME should generate from contents of distributation 'ZEBRA_LANGUAGE' => { 'en' => 1, 'fr' => 1, 'nb' => 1 }, # FIXME should generate from contents of distribution 'ZEBRA_TOKENIZER' => { chr => 1, icu => 1 }, @@ -502,12 +504,15 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") { push @{ $pl_files->{'rewrite-config.PL'} }, ( 'blib/ZEBRA_CONF_DIR/etc/passwd', 'blib/ZEBRA_CONF_DIR/zebra-biblios.cfg', + 'blib/ZEBRA_CONF_DIR/zebra-biblios-dom.cfg', 'blib/ZEBRA_CONF_DIR/zebra-authorities.cfg', 'blib/ZEBRA_CONF_DIR/zebra-authorities-dom.cfg', 'blib/ZEBRA_CONF_DIR/explain-authorities.xml', 'blib/ZEBRA_CONF_DIR/explain-biblios.xml', 'blib/ZEBRA_CONF_DIR/retrieval-info-auth-grs1.xml', 'blib/ZEBRA_CONF_DIR/retrieval-info-auth-dom.xml', + 'blib/ZEBRA_CONF_DIR/retrieval-info-bib-grs1.xml', + 'blib/ZEBRA_CONF_DIR/retrieval-info-bib-dom.xml', ); push @{ $pl_files->{'rewrite-config.PL'} }, ( 'blib/SCRIPT_DIR/koha-zebra-ctl.sh', @@ -521,8 +526,11 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") { ); } $config{'ZEBRA_AUTH_CFG'} = $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'zebra-authorities-dom.cfg' : 'zebra-authorities.cfg'; + $config{'ZEBRA_BIB_CFG'} = $config{'BIB_INDEX_MODE'} eq 'dom' ? 'zebra-biblios-dom.cfg' : 'zebra-biblios.cfg'; $config{'AUTH_RETRIEVAL_CFG'} = $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'retrieval-info-auth-dom.xml' : 'retrieval-info-auth-grs1.xml'; + $config{'BIB_RETRIEVAL_CFG'} = + $config{'BIB_INDEX_MODE'} eq 'dom' ? 'retrieval-info-bib-dom.xml' : 'retrieval-info-bib-grs1.xml'; } if ($config{'INSTALL_MODE'} ne "dev") { @@ -941,6 +949,20 @@ Primary language for Zebra indexing); $msg .= _add_valid_values_disp('ZEBRA_LANGUAGE', $valid_values); $config{'ZEBRA_LANGUAGE'} = _get_value('ZEBRA_LANGUAGE', $msg, $defaults->{'ZEBRA_LANGUAGE'}, $valid_values, $install_log_values); + $msg = q( +Koha can use one of two different indexing modes +for the MARC bibliographic records: + +grs1 - uses the Zebra GRS-1 filter, available + for legacy support +dom - uses the DOM XML filter; offers improved + functionality. + +Bibliographic indexing mode); + $msg .= _add_valid_values_disp('BIB_INDEX_MODE', $valid_values); + $config{'BIB_INDEX_MODE'} = _get_value('BIB_INDEX_MODE', $msg, $defaults->{'BIB_INDEX_MODE'}, $valid_values, $install_log_values); + + $msg = q( Koha can use one of two different indexing modes for the MARC authorities records: diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml index f5e2c0ffd6..e593fd5d85 100644 --- a/etc/koha-conf.xml +++ b/etc/koha-conf.xml @@ -27,7 +27,7 @@ __PAZPAR2_TOGGLE_XML_PRE__ tcp:@:__MERGE_SERVER_PORT__ __ZEBRA_DATA_DIR__/biblios - __ZEBRA_CONF_DIR__/zebra-biblios.cfg + __ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__ __ZEBRA_CONF_DIR__/pqf.properties __PAZPAR2_TOGGLE_XML_POST__ @@ -35,11 +35,14 @@ __PAZPAR2_TOGGLE_XML_POST__ __ZEBRA_DATA_DIR__/biblios - __ZEBRA_CONF_DIR__/zebra-biblios.cfg + __ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__ __ZEBRA_CONF_DIR__/pqf.properties + + @@ -88,6 +91,8 @@ __PAZPAR2_TOGGLE_XML_POST__ + + @@ -182,8 +187,11 @@ __PAZPAR2_TOGGLE_XML_POST__ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/zebradb/biblios/etc/dom-config.xml b/etc/zebradb/biblios/etc/dom-config.xml new file mode 100644 index 0000000000..9e09834635 --- /dev/null +++ b/etc/zebradb/biblios/etc/dom-config.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/zebradb/retrieval-info-bib-dom.xml b/etc/zebradb/retrieval-info-bib-dom.xml new file mode 100644 index 0000000000..3093c722b8 --- /dev/null +++ b/etc/zebradb/retrieval-info-bib-dom.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + diff --git a/etc/zebradb/retrieval-info-bib-grs1.xml b/etc/zebradb/retrieval-info-bib-grs1.xml new file mode 100644 index 0000000000..58f319df87 --- /dev/null +++ b/etc/zebradb/retrieval-info-bib-grs1.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/zebradb/zebra-biblios-dom.cfg b/etc/zebradb/zebra-biblios-dom.cfg new file mode 100644 index 0000000000..a748ddd4b4 --- /dev/null +++ b/etc/zebradb/zebra-biblios-dom.cfg @@ -0,0 +1,55 @@ +# Simple Zebra configuration file that defines +# a database with MARCXML records. +# $Id: zebra.cfg,v 1.1.2.2 2006/05/09 12:03:16 rangi Exp $ +# +# Where are the config files located? +profilePath:__ZEBRA_CONF_DIR__/biblios/etc:__ZEBRA_CONF_DIR__/etc:__ZEBRA_CONF_DIR__/marc_defs/__ZEBRA_MARC_FORMAT__/biblios:__ZEBRA_CONF_DIR__/lang_defs/__ZEBRA_LANGUAGE__:__ZEBRA_CONF_DIR__/xsl +# modulePath - where to look for loadable zebra modules +modulePath: /usr/lib/idzebra-2.0/modules + +encoding: UTF-8 +# Files that describe the attribute sets supported. +attset: bib1.att +attset: explain.att +attset: gils.att + +# systag sysno rank + +# Specify record type +# group .recordType[ .name ]: type +# type is split into fundamental type. file-read-type . argument +# http://www.indexdata.dk/zebra/doc/zebra-cfg.tkl +# http://www.indexdata.dk/zebra/doc/grs.tkl + +recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config.xml +marcxml.recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config.xml +iso2709.recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config-marc.xml + +recordId: (bib1,Local-number) +storeKeys:1 +storeData:1 + + +# Lock File Area +lockDir: __ZEBRA_LOCK_DIR__/biblios +perm.anonymous:ar +perm.__ZEBRA_USER__:rw +passwd: __ZEBRA_CONF_DIR__/etc/passwd +register: __ZEBRA_DATA_DIR__/biblios/register:20G +shadow: __ZEBRA_DATA_DIR__/biblios/shadow:20G + +# Temp File area for result sets +setTmpDir: __ZEBRA_DATA_DIR__/biblios/tmp + +# Temp File area for index program +keyTmpDir: __ZEBRA_DATA_DIR__/biblios/key + +# Approx. Memory usage during indexing +memMax: 50M +rank:rank-1 +truncmax: 1000000000 + +# Specifies the maximum number of records that will be sorted in a result set. +# If the result set contains more than that limit, the records after the limit +# will not be sorted. If omitted, the default value is 1,000. +sortmax: 1000 diff --git a/misc/koha-install-log b/misc/koha-install-log index 6f6a0eba19..d842484060 100644 --- a/misc/koha-install-log +++ b/misc/koha-install-log @@ -44,7 +44,9 @@ ZEBRA_RUN_DIR=__ZEBRA_RUN_DIR__ ZEBRA_MARC_FORMAT=__ZEBRA_MARC_FORMAT__ ZEBRA_LANGUAGE=__ZEBRA_LANGUAGE__ ZEBRA_AUTH_CFG=__ZEBRA_AUTH_CFG__ +ZEBRA_BIB_CFG=__ZEBRA_BIB_CFG__ AUTH_RETRIEVAL_CFG=__AUTH_RETRIEVAL_CFG__ +BIB_RETRIEVAL_CFG=__BIB_RETRIEVAL_CFG__ MERGE_SERVER_HOST=__MERGE_SERVER_HOST__ MERGE_SERVER_PORT=__MERGE_SERVER_PORT__ PAZPAR2_HOST=__PAZPAR2_HOST__ @@ -54,7 +56,7 @@ INSTALL_BASE=__INSTALL_BASE__ INSTALL_ZEBRA=__INSTALL_ZEBRA__ INSTALL_SRU=__INSTALL_SRU__ INSTALL_PAZPAR2=__INSTALL_PAZPAR2__ -AUTH_INDEX_MODE=__AUTH_INDEX_MODE__ +BIB_INDEX_MODE=__BIB_INDEX_MODE__ RUN_DATABASE_TESTS=__RUN_DATABASE_TESTS__ PATH_TO_ZEBRA=__PATH_TO_ZEBRA__ USE_MEMCACHED=__USE_MEMCACHED__ diff --git a/rewrite-config.PL b/rewrite-config.PL index 967a546c29..083b3a898f 100644 --- a/rewrite-config.PL +++ b/rewrite-config.PL @@ -122,7 +122,9 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr"; '__ZEBRA_LANGUAGE__' => 'en', '__ZEBRA_TOKENIZER_STMT__' => 'charmap word-phrase-utf.chr', '__ZEBRA_AUTH_CFG__' => 'zebra-authorities.cfg', + '__ZEBRA_BIB_CFG__' => 'zebra-biblios.cfg', '__AUTH_RETRIEVAL_CFG__' => 'retrieval-info-auth-grs1.xml', + '__BIB_RETRIEVAL_CFG__' => 'retrieval-info-bib-grs1.xml', "__MERGE_SERVER_HOST__" => $myhost, "__MERGE_SERVER_PORT__" => '11001', "__PAZPAR2_HOST__" => $myhost, @@ -135,6 +137,7 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr"; "__PAZPAR2_TOGGLE_XML_PRE__" => '', "__AUTH_INDEX_MODE__" => 'grs1', + "__BIB_INDEX_MODE__" => 'grs1', "__RUN_DATABASE_TESTS__" => 'no', "__PATH_TO_ZEBRA__" => "", "__USE_MEMCACHED__" => 'no', -- 2.39.5