From 7b64e4a8aff07a94b3cc7e98a22a2ef9d5a0f2f3 Mon Sep 17 00:00:00 2001 From: tgarip1957 Date: Fri, 25 Aug 2006 21:17:20 +0000 Subject: [PATCH] New indexing stylsheets for zebra we now use stylesheets for indexing and retrieval --- misc/zebra/usmarc/bib1.att | 110 ----- misc/zebra/usmarc/collection.abs | 106 ----- misc/zebra/usmarc/explain.att | 27 -- misc/zebra/usmarc/explain.xml | 11 + misc/zebra/usmarc/filter_alvis_conf.xml | 37 ++ misc/zebra/usmarc/identity.xsl | 37 ++ misc/zebra/usmarc/koha2MARC21slimbiblios.xsl | 25 ++ misc/zebra/usmarc/koha2MARC21slimholdings.xsl | 28 ++ misc/zebra/usmarc/koha2index.xsl | 400 ++++++++++++++++++ misc/zebra/usmarc/record.abs | 105 ----- misc/zebra/usmarc/usmarc.abs | 46 -- misc/zebra/usmarc/usmarc.mar | 3 - misc/zebra/usmarc/usmarc.tag | 13 - misc/zebra/usmarc/zebra.cfg | 78 ++-- misc/zebra/usmarc/zebra.xsl | 49 +++ 15 files changed, 614 insertions(+), 461 deletions(-) delete mode 100644 misc/zebra/usmarc/bib1.att delete mode 100644 misc/zebra/usmarc/collection.abs delete mode 100644 misc/zebra/usmarc/explain.att create mode 100644 misc/zebra/usmarc/explain.xml create mode 100644 misc/zebra/usmarc/filter_alvis_conf.xml create mode 100644 misc/zebra/usmarc/identity.xsl create mode 100644 misc/zebra/usmarc/koha2MARC21slimbiblios.xsl create mode 100644 misc/zebra/usmarc/koha2MARC21slimholdings.xsl create mode 100644 misc/zebra/usmarc/koha2index.xsl delete mode 100644 misc/zebra/usmarc/record.abs delete mode 100644 misc/zebra/usmarc/usmarc.abs delete mode 100644 misc/zebra/usmarc/usmarc.mar delete mode 100644 misc/zebra/usmarc/usmarc.tag create mode 100644 misc/zebra/usmarc/zebra.xsl diff --git a/misc/zebra/usmarc/bib1.att b/misc/zebra/usmarc/bib1.att deleted file mode 100644 index 506bea940e..0000000000 --- a/misc/zebra/usmarc/bib1.att +++ /dev/null @@ -1,110 +0,0 @@ -# $Id$ -# Bib-1 Attribute Set -name bib1 -reference Bib-1 - -att 1 Personal-name -att 2 Corporate-name -att 3 Conference-name -att 4 Title -att 5 Title-series -att 6 Title-uniform -att 7 ISBN -att 8 ISSN -att 9 LC-card-number -att 10 BNB-card-number -att 11 BGF-number -att 12 Local-number -att 13 Dewey-classification -att 14 UDC-classification -att 15 Bliss-classification -att 16 LC-call-number -att 17 NLM-call-number -att 18 NAL-call-number -att 19 MOS-call-number -att 20 Local-classification -att 21 Subject-heading -att 22 Subject-Rameau -att 23 BDI-index-subject -att 24 INSPEC-subject -att 25 MESH-subject -att 26 PA-subject -att 27 LC-subject-heading -att 28 RVM-subject-heading -att 29 Local-subject-index -att 30 Date -att 31 Date-of-publication -att 32 Date-of-acquisition -att 33 Title-key -att 34 Title-collective -att 35 Title-parallel -att 36 Title-cover -att 37 Title-added-title-page -att 38 Title-caption -att 39 Title-running -att 40 Title-spine -att 41 Title-other-variant -att 42 Title-former -att 43 Title-abbreviated -att 44 Title-expanded -att 45 Subject-precis -att 46 Subject-rswk -att 47 Subject-subdivision -att 48 Number-natl-biblio -att 49 Number-legal-deposit -att 50 Number-govt-pub -att 51 Number-music-publisher -att 52 Number-db -att 53 Identifier-local-call -att 54 Code-language -att 55 Code-geographic -att 56 Code-institution -att 57 Name-and-title -att 58 Name-geographic -att 59 Place-publication -att 60 CODEN -att 61 Microform-generation -att 62 Abstract -att 63 Note -att 1000 Author-title -att 1001 Record-type -att 1002 Name -att 1003 Author -att 1004 Author-name-personal -att 1005 Author-name-corporate -att 1006 Author-name-conference -att 1007 Identifier-standard -att 1008 Subject-LC-childrens -att 1009 Subject-name-personal -att 1010 Body-of-text -att 1011 Date/time-added-to-db -att 1012 Date/time-last-modified -att 1013 Authority/format-id -att 1014 Concept-text -att 1015 Concept-reference -att 1016 Any 1016,4,1005,62 -att 1017 Server-choice -att 1018 Publisher -att 1019 Record-source -att 1020 Editor -att 1021 Bib-level -att 1022 Geographic-class -att 1023 Indexed-by -att 1024 Map-scale -att 1025 Music-key -att 1026 Related-periodical -att 1027 Report-number -att 1028 Stock-number -att 1030 Thematic-number -att 1031 Material-type -att 1032 Doc-id -att 1033 Host-item -att 1034 Content-type -att 1035 Anywhere -att 1036 Author-Title-Subject - -## My extra definitions to handle LC sorting and other holdings data -att 8000 Holding-branch -att 8001 LCalpha -att 8002 LCnumber -att 8005 Shelving \ No newline at end of file diff --git a/misc/zebra/usmarc/collection.abs b/misc/zebra/usmarc/collection.abs deleted file mode 100644 index af90628b56..0000000000 --- a/misc/zebra/usmarc/collection.abs +++ /dev/null @@ -1,106 +0,0 @@ -# $Id$ - -# This is a fairly simple example of a set of MARC21 indexing rules. It -# results in a server which provides a passable Bath level 0 and 1 service -# (author, title, subject, keyword and exact services). Feel free to -# elaborate on it, and if you do, please consider sharing your additions. -# NOTE: This is designed to be used with the grs.marcxml input filter -# for ISO2709 (ANSI Z39.2) or grs.xml for MARCXML-formatted records. It -# won't work for the old grs.marc input filter, which yields a different -# internal structure. -encoding utf-8 -name marc21 -attset bib1.att -attset gils.att -esetname F @ -esetname B @ - -marc usmarc.mar - -xpath disable - -all any -melm 090$c identifier-standard,identifier-standard:p -melm 020$a ISBN:w,ISBN:s -melm 022$a ISSN:w,ISBN:w -melm 050 LC-call-number,LC-call-number:s -melm 082 Dewey-classification:w,Dewey-classification:s -melm 090$c Local-number:w -melm 100$9 Cross-Reference !:w -melm 100 author,author:p,author:s -melm 110 author -melm 111 author -melm 130 title -melm 240 title,title:p -melm 242 title,title:p -melm 243 title,title:p -melm 245$a title:w,title:s -melm 245$c author -melm 245 title,title:p -melm 246 title,title:p -melm 247 title,title:p -melm 260$a Place-publication !:w,!:p -melm 260$b Publisher,Publisher:w,Publisher:p,Publisher:s -melm 260$c Date,Date:y,Date:s -melm 260 Place-publication -melm 300$f Material-type,Material-type:w -melm 400$t title,author -melm 400 author -melm 410$t title,author -melm 410 author -melm 411$t title,author -melm 411 author -melm 440$a title,title:p -melm 440 title -melm 490$a title,title:p -melm 490 title -melm 600$t title -melm 600 subject-heading,subject-heading:p -melm 610$t title -melm 610 subject-heading -melm 611$t title -melm 611 subject-heading -melm 630 subject-heading -melm 650$9 Controlled-Subject-Index -melm 650 subject-heading,subject-heading:p -melm 651$9 Controlled-Subject-Index -melm 651 subject-heading,subject-heading:p -melm 652$9 Controlled-Subject-Index -melm 652 subject-heading,subject-heading:p -melm 653$9 Controlled-Subject-Index -melm 653 subject-heading,subject-heading:p -melm 654$9 Controlled-Subject-Index -melm 654 subject-heading -melm 655$9 Controlled-Subject-Index -melm 655 subject-heading -melm 656$9 Controlled-Subject-Index -melm 656 subject-heading -melm 657$9 Controlled-Subject-Index -melm 657 subject-heading -melm 650$9 Controlled-Subject-Index - -melm 700$9 Cross-Reference -melm 700$t title,author -melm 700$a author,author:p -melm 700 author -melm 710$t title,author -melm 710$a author,author:p -melm 710 author -melm 711$t title,author -melm 711 author -melm 730 title -melm 740 title -melm 800$t title,author -melm 800 author -melm 810$t title,author -melm 810 author -melm 811$t title,author -melm 811 author -melm 830 title -melm 942$c Material-type -melm 952$a Local-classification -melm 952$b Record-source -melm 952$d Host-item -melm 952$p Stock-number -melm 952$v Date-of-acquisition,Date-of-acquisition:d,Date-of-acquisition:s -melm 952 Local-classification diff --git a/misc/zebra/usmarc/explain.att b/misc/zebra/usmarc/explain.att deleted file mode 100644 index 04eb034c76..0000000000 --- a/misc/zebra/usmarc/explain.att +++ /dev/null @@ -1,27 +0,0 @@ -# The Explain Attribute Set -# -# $Id$ -name explain -reference Exp-1 - -att 1 ExplainCategory -att 2 HumanStringLanguage -att 3 DatabaseName -att 4 Targetname -att 5 AttributeSetOID -att 6 RecordSyntaxOID -att 7 TagSetOID -att 8 ExtendedServicesOID -att 9 DateAdded -att 10 DateChanged -att 11 DateExpired -att 12 ElementSetName -att 13 ProcessingContext -att 14 ProcessingName -att 15 TermListName -att 16 SchemaOID -att 17 Producer -att 18 Supplier -att 19 Availability -att 20 Proprietary -att 21 UserFee diff --git a/misc/zebra/usmarc/explain.xml b/misc/zebra/usmarc/explain.xml new file mode 100644 index 0000000000..0358a2c3af --- /dev/null +++ b/misc/zebra/usmarc/explain.xml @@ -0,0 +1,11 @@ + + + localhost + 9999 + biblios + + + My Database + My database description + + diff --git a/misc/zebra/usmarc/filter_alvis_conf.xml b/misc/zebra/usmarc/filter_alvis_conf.xml new file mode 100644 index 0000000000..80ba38c210 --- /dev/null +++ b/misc/zebra/usmarc/filter_alvis_conf.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + diff --git a/misc/zebra/usmarc/identity.xsl b/misc/zebra/usmarc/identity.xsl new file mode 100644 index 0000000000..e26373c8d9 --- /dev/null +++ b/misc/zebra/usmarc/identity.xsl @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + diff --git a/misc/zebra/usmarc/koha2MARC21slimbiblios.xsl b/misc/zebra/usmarc/koha2MARC21slimbiblios.xsl new file mode 100644 index 0000000000..173bb96899 --- /dev/null +++ b/misc/zebra/usmarc/koha2MARC21slimbiblios.xsl @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/misc/zebra/usmarc/koha2MARC21slimholdings.xsl b/misc/zebra/usmarc/koha2MARC21slimholdings.xsl new file mode 100644 index 0000000000..6bdc515161 --- /dev/null +++ b/misc/zebra/usmarc/koha2MARC21slimholdings.xsl @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/misc/zebra/usmarc/koha2index.xsl b/misc/zebra/usmarc/koha2index.xsl new file mode 100644 index 0000000000..15fbcb481f --- /dev/null +++ b/misc/zebra/usmarc/koha2index.xsl @@ -0,0 +1,400 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/misc/zebra/usmarc/record.abs b/misc/zebra/usmarc/record.abs deleted file mode 100644 index 932035b198..0000000000 --- a/misc/zebra/usmarc/record.abs +++ /dev/null @@ -1,105 +0,0 @@ -# $Id$ - -# This is a fairly simple example of a set of MARC21 indexing rules. It -# results in a server which provides a passable Bath level 0 and 1 service -# (author, title, subject, keyword and exact services). Feel free to -# elaborate on it, and if you do, please consider sharing your additions. -# NOTE: This is designed to be used with the grs.marcxml input filter -# for ISO2709 (ANSI Z39.2) or grs.xml for MARCXML-formatted records. It -# won't work for the old grs.marc input filter, which yields a different -# internal structure. -encoding utf-8 -name marc21 -attset bib1.att -attset gils.att -esetname F @ -esetname B @ - -marc usmarc.mar -systag sysno rank -xpath disable - -all any -melm 020$a ISBN:w -melm 022$a ISSN:w,ISBN:w -melm 050$b LC-call-number:s -melm 050 LC-call-number -melm 090$a LCalpha:s -melm 090$b LCnumber:s -melm 090$c Identifier-standard:w -melm 100$9 Cross-Reference !:w -melm 100$a author,author:p,author:s -melm 100 author -melm 110 author -melm 111 author -melm 130 title -melm 240 title,title:p -melm 242 title,title:p -melm 243 title,title:p -melm 245$a title:w,title:p,title:s -melm 245$c author -melm 245 title,title:p -melm 246 title,title:p -melm 247 title,title:p -melm 260$a Place-publication !:w,!:p -melm 260$b Publisher !:w,!:p -melm 260$c Date !:y -melm 260 Place-publication -melm 400$t title,author -melm 400 author -melm 410$t title,author -melm 410 author -melm 411$t title,author -melm 411 author -melm 440$a title,title:p -melm 440 title -melm 490$a title,title:p -melm 490 title -melm 600$t title -melm 600 subject-heading,subject-heading:p -melm 610$t title -melm 610 subject-heading -melm 611$t title -melm 611 subject-heading -melm 630 subject-heading -melm 650$9 Controlled-Subject-Index -melm 650 subject-heading,subject-heading:p -melm 651$9 Controlled-Subject-Index -melm 651 subject-heading,subject-heading:p -melm 652$9 Controlled-Subject-Index -melm 652 subject-heading,subject-heading:p -melm 653$9 Controlled-Subject-Index -melm 653 subject-heading,subject-heading:p -melm 654$9 Controlled-Subject-Index -melm 654 subject-heading -melm 655$9 Controlled-Subject-Index -melm 655 subject-heading -melm 656$9 Controlled-Subject-Index -melm 656 subject-heading -melm 657$9 Controlled-Subject-Index -melm 657 subject-heading - -melm 700$9 Cross-Reference -melm 700$t title,author -melm 700$a author,author:p -melm 700 author -melm 710$t title,author -melm 710$a author,author:p -melm 710 author -melm 711$t title,author -melm 711 author -melm 730 title -melm 740 title -melm 800$t title,author -melm 800 author -melm 810$t title,author -melm 810 author -melm 811$t title,author -melm 811 author -melm 830 title -melm 952$a Local-classification -melm 952$b Holding-branch -melm 952$d Shelving -melm 952$p Stock-number -melm 952$v Date-of-acquisition:d - diff --git a/misc/zebra/usmarc/usmarc.abs b/misc/zebra/usmarc/usmarc.abs deleted file mode 100644 index f000cc2417..0000000000 --- a/misc/zebra/usmarc/usmarc.abs +++ /dev/null @@ -1,46 +0,0 @@ -# $Id$ -name usmarc -reference USmarc -attset bib1.att -tagset usmarc.tag - -marc usmarc.mar - -esetname B usmarc-b.est -esetname F @ - -# 020a is isbn -elm 020 isbn - -elm 020/? isbn !:w -elm 020/?/a isbn !:w,!:p - -# 090 is mapped to biblionumber in Koha -elm 090 Local-number - -elm 090/? Local-number - -elm 090/?/c Local-number !:w - -# All 245 subfields mapped to title (word) and -# 245 subfield a mapped to tile (phrase). -elm 245 title - -elm 245/? title !:w -elm 245/?/a title !:w,!:p - -# 100 mapped to Author-name-personal and Author. -elm 100 Author-name-personal - -elm 100/? Author-name-personal !:w,!:p,Author:w,Author:p - -# 110 mapped to Author-name-corporate and Author -elm 110 Author-name-corporate - -elm 110/? Author-name-corporate !:w,!:p,Author:w,Author:p - -# 111 mapped to Author-name-conference and Author -elm 111 Author-name-conference - -elm 111/? Author-name-conference !:w,!:p,Author:w,Author:p - -# Tag 260 subfield a mapped to Place-publication -elm 260 Place-publication - -elm 260/? Place-publication - -elm 260/?/a Place-publication !:w -elm 260/?/b Publisher !:w -elm 260/?/c Date !:w - diff --git a/misc/zebra/usmarc/usmarc.mar b/misc/zebra/usmarc/usmarc.mar deleted file mode 100644 index 25559682a7..0000000000 --- a/misc/zebra/usmarc/usmarc.mar +++ /dev/null @@ -1,3 +0,0 @@ -# $Id$ -name usmarc -reference USmarc diff --git a/misc/zebra/usmarc/usmarc.tag b/misc/zebra/usmarc/usmarc.tag deleted file mode 100644 index 4a0b9cd54d..0000000000 --- a/misc/zebra/usmarc/usmarc.tag +++ /dev/null @@ -1,13 +0,0 @@ -# Pseudo-tagset for USMARC -# -# $Id$ - -name usmarc -type 4 -include tagsetm.tag - -#tag 1 a string -#tag 2 b string -tag 245 245 string -tag 100 100 string -tag 090 c string diff --git a/misc/zebra/usmarc/zebra.cfg b/misc/zebra/usmarc/zebra.cfg index 31607c0dca..eac709359e 100644 --- a/misc/zebra/usmarc/zebra.cfg +++ b/misc/zebra/usmarc/zebra.cfg @@ -1,65 +1,41 @@ # Simple Zebra configuration file that defines -# a database with USMARC records for Koha -# Joshua Ferraro < jmf at liblime dot com > +# a database with MARCXML records. +# $Id$ # +# Where are the config files located? +profilePath: ${srcdir:-.}:${srcdir:-.}/tab +encoding: UTF-8 +# Files that describe the attribute sets supported. +attset: bib1.att +attset: explain.att +attset:gils.att +###Zebra path to index folder +modulePath: /zebra/index/.libs +# Specify record type -## EDIT TO MATCH YOUR LOCAL CONFIG -# name of the database (should match koha.conf's zebradb entry) -database: kohaplugin +recordtype: alvis.filter_alvis_conf.xml -# Specifies the location of the various register files that Zebra # uses to represent your databases. See: # http://indexdata.dk/zebra/doc/register-location.tkl +#database:kohalis +storeKeys:1 +storeData:1 -register: /home/kohaplugin/register:4g -# Enables the safe update facility of Zebra, and tells the system # where to place the required, temporary files. -shadow /home/kohaplugin/shadow:4g # Lock File Area -lockDir: /home/kohaplugin/lock +lockDir: lock +perm.anonymous:r +perm.author:rw +passw.c:kohalis +#shadow +#register: /register:500M +#shadow: /zebrashadow:1G # Temp File area for result sets -setTmpDir: /home/kohaplugin/tmp +setTmpDir:tmp # Temp File area for index program -keyTmpDir: /home/kohaplugin/tmp +keyTmpDir:tmp # Approx. Memory usage during indexing -memMax: 400 - -# Where are the config files located? -profilePath: ${srcdir:-.}:/usr/share/idzebra/tab/ - -# set our encoding to utf-8 -encoding: UTF-8 - -# we're using ranking -systag rank rank -systag sysno sysno - -# permissions (add passwords eventually) -perm.anonymous: rw - -# specifies how the records are to be identified when updating -recordId: (bib1,Identifier-standard) - -# key info is stored so we can update/delete records (File Record IDs) -storeKeys:1 - -# records are stored internally -storeData:1 - -# Approx. Memory usage during indexing -memMax: 400 - -# Files that describe the attribute sets supported. -attset: bib1.att -attset: explain.att -attset: gils.att - -# Specify record type -# the syntax is: group.recordType[.name]:type -# so in this case, iso2709 is the group name -iso2709.recordType:grs.marcxml.record -recordType: grs.xml -#iso2079.recordType:grs.marcxml.collection -#recordTyle: grs.xml.collection +memMax: 200M +rank:rank-1 \ No newline at end of file diff --git a/misc/zebra/usmarc/zebra.xsl b/misc/zebra/usmarc/zebra.xsl new file mode 100644 index 0000000000..8f7bbd9b62 --- /dev/null +++ b/misc/zebra/usmarc/zebra.xsl @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + -- 2.20.1