4 local scriptname=$(basename $0)
8 Index Koha records by chunks. It is useful when a record causes errors and
9 stops the indexing process. With this script, if indexing of one chunk fails,
10 that chunk is split into two or more chunks, and indexing continues on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using yaz-marcdump and zebraidx).
15 $scriptname -t type -l X [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f]
18 -o | --offset Offset parameter of rebuild_zebra.pl
19 -l | --length Length parameter of rebuild_zebra.pl
20 -s | --chunks-size Initial chunk size (number of records indexed at once)
21 -d | --export-dir Where rebuild_zebra.pl will export data
22 -L | --log-dir Log directory
23 -r | --remove-logs Clean log directory before start
24 -t | --type Record type ('biblios' or 'authorities')
25 -f | --force Don't ask for confirmation before start
26 -h | --help Display this help message
34 if [ $chunkssize -lt 1 ]; then
35 echo "Fail on file $file"
38 local prefix="${file}_${chunkssize}_"
39 echo "Splitting file in chunks of $chunkssize records"
40 YAZMARCDUMP_CMD="$YAZMARCDUMP -n -s $prefix -C $chunkssize $file"
43 dir=$(dirname $prefix)
44 local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
45 for chunkfile in $files; do
46 echo "Indexing $chunkfile"
47 size=$($YAZMARCDUMP -p $chunkfile | grep '<!-- Record [0-9]\+ offset .* -->' | wc -l)
48 logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
49 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 update $chunkfile"
50 $ZEBRAIDX_CMD >$logfile 2>&1
51 grep "Records: $size" $logfile >/dev/null 2>&1
53 echo "Indexing failed. Split file and continue..."
54 indexfile $chunkfile $(($chunkssize/2))
56 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 commit"
57 $ZEBRAIDX_CMD >> $logfile 2>&1
66 EXPORTDIR=/tmp/rebuild/export
67 LOGDIR=/tmp/rebuild/logs
116 if [ $HELP = "yes" ]; then
121 if [ -z $LENGTH ]; then
122 echo "--length parameter is mandatory"
135 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
140 ZEBRAIDX=`which zebraidx`
141 if [ -z $ZEBRAIDX ]; then
142 echo "zebraidx not found"
146 YAZMARCDUMP=`which yaz-marcdump`
147 if [ -z $YAZMARCDUMP ]; then
148 echo "yaz-marcdump not found"
152 REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
153 if [ ! -f $REBUILDZEBRA ]; then
154 echo "$REBUILDZEBRA: file not found"
160 echo "========================================================================="
161 echo "Start at offset: $OFFSET"
162 echo "Total number of records to index: $LENGTH"
163 echo "Initial chunk size: $CHUNKSSIZE"
164 echo "Export directory: $EXPORTDIR"
165 echo "Log directory: $LOGDIR"
166 echo "Remove logs before start? $RMLOGS"
167 echo "Type of record: $TYPE"
168 echo "-------------------------------------------------------------------------"
169 echo "zebraidx path: $ZEBRAIDX"
170 echo "yaz-marcdump path: $YAZMARCDUMP"
171 echo "rebuild_zebra path: $REBUILDZEBRA"
172 echo "========================================================================="
174 if [ $NOCONFIRM != "yes" ]; then
176 echo -n "Confirm ? [Y/n] "
178 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
182 if [ $confirm = "n" ]; then
188 if [ $? -ne 0 ]; then
189 echo "Failed to create directory $EXPORTDIR. Aborting."
194 if [ $? -ne 0 ]; then
195 echo "Failed to create directory $LOGDIR. Aborting."
199 if [ $RMLOGS = "yes" ]; then
203 REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
204 echo "\n$REBUILDZEBRA_CMD"
210 EXPORTFILE="$EXPORTDIR/biblio/exported_records"
213 EXPORTFILE="$EXPORTDIR/authority/exported_records"
216 echo "Error: TYPE '$TYPE' is not supported"
220 CONFIGFILE="$(dirname $KOHA_CONF)/zebradb/zebra-$TYPE.cfg"
223 indexfile $EXPORTFILE $CHUNKSSIZE