57424a9fdc
Complete rewrite of rebuild_zebra_sliced.zsh (renamed to .sh). Main improvements are: - both biblio and authority records are handled - records are exported only once It also add an option --skip-index to rebuild_zebra.pl that permit to use rebuild_zebra.pl as an 'export only' script. Description: Index Koha records by chunks. It is useful when some record causes errors and stop the indexation process. With this script, if indexation of one chunk fails, chunk is splitted in 2 (or 3) chunks, and indexation continue on these chunks. rebuild_zebra.pl is called only once to export records. Splitting and indexing is handled by this script (using yaz-marcdump and zebraidx). Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com> Signed-off-by: Paul Poulain <paul.poulain@biblibre.com>
223 lines
5.6 KiB
Bash
Executable file
223 lines
5.6 KiB
Bash
Executable file
#!/bin/sh
|
|
|
|
usage() {
|
|
local scriptname=$(basename $0)
|
|
cat <<EOF
|
|
$scriptname
|
|
|
|
Index Koha records by chunks. It is useful when some record causes errors and
|
|
stop the indexation process. With this script, if indexation of one chunk fails,
|
|
chunk is splitted in two or more chunks, and indexation continue on these chunks.
|
|
rebuild_zebra.pl is called only once to export records. Splitting and indexing
|
|
is handled by this script (using yaz-marcdump and zebraidx).
|
|
|
|
Usage:
|
|
$scriptname -t type -l X [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f]
|
|
$scriptname -h
|
|
|
|
-o | --offset Offset parameter of rebuild_zebra.pl
|
|
-l | --length Length parameter of rebuild_zebra.pl
|
|
-s | --chunks-size Initial chunk size (number of records indexed at once)
|
|
-d | --export-dir Where rebuild_zebra.pl will export data
|
|
-L | --log-dir Log directory
|
|
-r | --remove-logs Clean log directory before start
|
|
-t | --type Record type ('biblios' or 'authorities')
|
|
-f | --force Don't ask for confirmation before start
|
|
-h | --help Display this help message
|
|
EOF
|
|
}
|
|
|
|
indexfile() {
|
|
local file=$1
|
|
local chunkssize=$2
|
|
|
|
if [ $chunkssize -lt 1 ]; then
|
|
echo "Fail on file $file"
|
|
else
|
|
|
|
local prefix="${file}_${chunkssize}_"
|
|
echo "Splitting file in chunks of $chunkssize records"
|
|
YAZMARCDUMP_CMD="$YAZMARCDUMP -n -s $prefix -C $chunkssize $file"
|
|
$YAZMARCDUMP_CMD
|
|
|
|
dir=$(dirname $prefix)
|
|
local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
|
|
for chunkfile in $files; do
|
|
echo "Indexing $chunkfile"
|
|
size=$($YAZMARCDUMP -p $chunkfile | grep '<!-- Record [0-9]\+ offset .* -->' | wc -l)
|
|
logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
|
|
ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 update $chunkfile"
|
|
$ZEBRAIDX_CMD >$logfile 2>&1
|
|
grep "Records: $size" $logfile >/dev/null 2>&1
|
|
if [ $? -ne 0 ]; then
|
|
echo "Indexing failed. Split file and continue..."
|
|
indexfile $chunkfile $(($chunkssize/2))
|
|
else
|
|
ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 commit"
|
|
$ZEBRAIDX_CMD >> $logfile 2>&1
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
OFFSET=0
|
|
LENGTH=
|
|
CHUNKSSIZE=10000
|
|
EXPORTDIR=/tmp/rebuild/export
|
|
LOGDIR=/tmp/rebuild/logs
|
|
RMLOGS=no
|
|
NOCONFIRM=no
|
|
TYPE=biblios
|
|
HELP=no
|
|
|
|
# Get parameters
|
|
while [ $1 ]; do
|
|
case $1 in
|
|
-o | --offset )
|
|
shift
|
|
OFFSET=$1
|
|
;;
|
|
-l | --length )
|
|
shift
|
|
LENGTH=$1
|
|
;;
|
|
-s | --chunks-size )
|
|
shift
|
|
CHUNKSSIZE=$1
|
|
;;
|
|
-d | --export-dir )
|
|
shift
|
|
EXPORTDIR=$1
|
|
;;
|
|
-L | --log-dir )
|
|
shift
|
|
LOGDIR=$1
|
|
;;
|
|
-r | --remove-logs )
|
|
RMLOGS=yes
|
|
;;
|
|
-t | --type )
|
|
shift
|
|
TYPE=$1
|
|
;;
|
|
-f | --force )
|
|
NOCONFIRM=yes
|
|
;;
|
|
-h | --help)
|
|
HELP=yes
|
|
;;
|
|
* )
|
|
usage
|
|
exit 1
|
|
esac
|
|
shift
|
|
done
|
|
|
|
if [ $HELP = "yes" ]; then
|
|
usage
|
|
exit 0
|
|
fi
|
|
|
|
if [ -z $LENGTH ]; then
|
|
echo "--length parameter is mandatory"
|
|
exit 1
|
|
fi
|
|
|
|
TYPESWITCH=
|
|
case $TYPE in
|
|
biblios )
|
|
TYPESWITCH=-b
|
|
;;
|
|
authorities )
|
|
TYPESWITCH=-a
|
|
;;
|
|
* )
|
|
echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
|
|
TYPESWITCH=-b
|
|
TYPE=biblios
|
|
esac
|
|
|
|
ZEBRAIDX=`which zebraidx`
|
|
if [ -z $ZEBRAIDX ]; then
|
|
echo "zebraidx not found"
|
|
exit 1
|
|
fi
|
|
|
|
YAZMARCDUMP=`which yaz-marcdump`
|
|
if [ -z $YAZMARCDUMP ]; then
|
|
echo "yaz-marcdump not found"
|
|
exit 1
|
|
fi
|
|
|
|
REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
|
|
if [ ! -f $REBUILDZEBRA ]; then
|
|
echo "$REBUILDZEBRA: file not found"
|
|
exit 1
|
|
fi
|
|
|
|
echo ""
|
|
echo "Configuration"
|
|
echo "========================================================================="
|
|
echo "Start at offset: $OFFSET"
|
|
echo "Total number of records to index: $LENGTH"
|
|
echo "Initial chunk size: $CHUNKSSIZE"
|
|
echo "Export directory: $EXPORTDIR"
|
|
echo "Log directory: $LOGDIR"
|
|
echo "Remove logs before start? $RMLOGS"
|
|
echo "Type of record: $TYPE"
|
|
echo "-------------------------------------------------------------------------"
|
|
echo "zebraidx path: $ZEBRAIDX"
|
|
echo "yaz-marcdump path: $YAZMARCDUMP"
|
|
echo "rebuild_zebra path: $REBUILDZEBRA"
|
|
echo "========================================================================="
|
|
|
|
if [ $NOCONFIRM != "yes" ]; then
|
|
confirm=y
|
|
echo -n "Confirm ? [Y/n] "
|
|
read response
|
|
if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
|
|
confirm=n
|
|
fi
|
|
|
|
if [ $confirm = "n" ]; then
|
|
exit 0
|
|
fi
|
|
fi
|
|
|
|
mkdir -p $EXPORTDIR
|
|
if [ $? -ne 0 ]; then
|
|
echo "Failed to create directory $EXPORTDIR. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p $LOGDIR
|
|
if [ $? -ne 0 ]; then
|
|
echo "Failed to create directory $LOGDIR. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [ $RMLOGS = "yes" ]; then
|
|
rm -f $LOGDIR/*.log
|
|
fi
|
|
|
|
REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
|
|
echo "\n$REBUILDZEBRA_CMD"
|
|
$REBUILDZEBRA_CMD
|
|
|
|
EXPORTFILE=
|
|
case $TYPE in
|
|
biblios )
|
|
EXPORTFILE="$EXPORTDIR/biblio/exported_records"
|
|
;;
|
|
authorities )
|
|
EXPORTFILE="$EXPORTDIR/authority/exported_records"
|
|
;;
|
|
* )
|
|
echo "Error: TYPE '$TYPE' is not supported"
|
|
exit 1
|
|
esac
|
|
|
|
CONFIGFILE="$(dirname $KOHA_CONF)/zebradb/zebra-$TYPE.cfg"
|
|
|
|
|
|
indexfile $EXPORTFILE $CHUNKSSIZE
|