3 # This inserts records from a Koha database into elastic search
5 # Copyright 2014 Catalyst IT
7 # This file is part of Koha.
9 # Koha is free software; you can redistribute it and/or modify it under the
10 # terms of the GNU General Public License as published by the Free Software
11 # Foundation; either version 3 of the License, or (at your option) any later
14 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with Koha; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 rebuild_elastic_search.pl - inserts records from a Koha database into Elasticsearch
28 B<rebuild_elastic_search.pl>
29 [B<-c|--commit>=C<count>]
36 Inserts records from a Koha database into Elasticsearch.
42 =item B<-c|--commit>=C<count>
44 Specify how many records will be batched up before they're added to Elasticsearch.
45 Higher should be faster, but will cause more RAM usage. Default is 5000.
49 Delete the index and recreate it before indexing.
51 =item B<-a|--authorities>
53 Index the authorities only. Combining this with B<-b> is the same as
54 specifying neither and so both get indexed.
58 Index the biblios only. Combining this with B<-a> is the same as
59 specifying neither and so both get indexed.
61 =item B<-bn|--bnumber>
63 Only index the supplied biblionumber, mostly for testing purposes. May be
64 repeated. This also applies to authorities via authid, so if you're using it,
65 you probably only want to do one or the other at a time.
69 By default, this program only emits warnings and errors. This makes it talk
70 more. Add more to make it even more wordy, in particular when debugging.
87 use Koha::BiblioUtils;
88 use Koha::ElasticSearch::Indexer;
94 use Data::Dumper; # TODO remove
98 my ($delete, $help, $man);
99 my ($index_biblios, $index_authorities);
103 'c|commit=i' => \$commit,
104 'd|delete' => \$delete,
105 'a|authorities' => \$index_authorities,
106 'b|biblios' => \$index_biblios,
107 'bn|bnumber=i' => \@biblionumbers,
108 'v|verbose+' => \$verbose,
113 # Default is to do both
114 unless ($index_authorities || $index_biblios) {
115 $index_authorities = $index_biblios = 1;
118 pod2usage(1) if $help;
119 pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
122 if ($index_biblios) {
123 _log(1, "Indexing biblios\n");
124 if (@biblionumbers) {
126 my $r = shift @biblionumbers;
127 return () unless defined $r;
128 return ($r, Koha::BiblioUtils->get_from_biblionumber($r, item_data => 1 ));
131 my $records = Koha::BiblioUtils->get_all_biblios_iterator();
136 do_reindex($next, $Koha::ElasticSearch::BIBLIOS_INDEX);
138 if ($index_authorities) {
139 _log(1, "Indexing authorities\n");
140 if (@biblionumbers) {
142 my $r = shift @biblionumbers;
143 return () unless defined $r;
144 my $a = Koha::Authority->get_from_authid($r);
145 return ($r, $a->record);
148 my $records = Koha::Authority->get_all_authorities_iterator();
153 do_reindex($next, $Koha::ElasticSearch::AUTHORITIES_INDEX);
157 my ( $next, $index_name ) = @_;
159 my $indexer = Koha::ElasticSearch::Indexer->new( { index => $index_name } );
162 # We know it's safe to not recreate the indexer because update_index
163 # hasn't been called yet.
164 $indexer->drop_index();
168 my $commit_count = $commit;
169 my ( @id_buffer, @commit_buffer );
170 while ( my $record = $next->() ) {
171 my $id = $record->id;
172 my $record = $record->record;
176 push @id_buffer, $id;
177 push @commit_buffer, $record;
178 if ( !( --$commit_count ) ) {
179 _log( 2, "Committing...\n" );
180 $indexer->update_index( \@id_buffer, \@commit_buffer );
181 $commit_count = $commit;
187 # There are probably uncommitted records
188 $indexer->update_index( \@id_buffer, \@commit_buffer );
189 _log( 1, "$count records indexed.\n" );
192 # Output progress information.
194 # _log($level, $msg);
196 # Will output $msg if the verbosity setting is set to $level or more. Will
197 # not include a trailing newline.
199 my ($level, $msg) = @_;
201 print $msg if ($verbose >= $level);