From 4bed77da08730d4e9c66d55d898531d39cb47b8a Mon Sep 17 00:00:00 2001 From: Mark Hofstetter Date: Thu, 11 Apr 2024 09:14:06 +0000 Subject: [PATCH] Bug 31652: Add geo-search MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch adds geosearch to Koha (using Elasticsearch 7). ElasticSearch search_mappings get new types to store lat/lon, which can be indexed from MARC 034$s and 034$t. There is a small change to the DB to allow a new value in search_field.type ENUM. The QueryBuilder is extended to allow for building advanced ElasticSearch Querys (eg geo_distance) that cannot be represented in a simple string query. The UI for searching (including showing the results on a OSM/Leaflet map) is implemented in a separate plugin (https://github.com/HKS3/HKS3GeoSearch) Test Plan: * make sure you're running ElasticSearch 7 (eg via `curl http://es:9200?pretty | grep number`) * apply patch * got to a Framework, check Editor for 034$s and 034$t and save * got to some books (in the correct framework) and enter some lat and lon into 034$s and 034$t (for example lat=48.216, lon=16.395) * Run the elasticsearch indexer, maybe limited on the books you edited (-bn 123 -bn 456): misc/search_tools/rebuild_elasticsearch.pl -b -v * You can check if the indexing worked by inspecting the document in elasticsearch: * get the biblionumber (eg 123) * curl http://es:9200/koha_kohadev_biblios/_doc/123?pretty | grep -A5 geolocation * You should get back a JSON fragment containing the lat/lon you stored * You can query elasticsearch directly: * Run the following curl command, but adapt the value for lat/lng and/or the distance (in meters) * curl -X GET "http://es:9200/koha_kohadev_biblios/_search?pretty" -H 'Content-Type: application/json' -d '{"query": {"bool":{"must":{"match_all":{}},"filter":{"geo_distance":{"distance":100000,"geolocation":{"lat":48.2,"lon":16.4}}}}}}' * To run the search via Koha, you need to either install and use https://github.com/HKS3/HKS3GeoSearch or create a handcrafted query string: * handcrafted query string: * /cgi-bin/koha/opac-search.pl?advsearch=1&idx=geolocation&q=lat:48.25+lng:18.35+distance:100km&do=Search * HKS3GeoSearch * install the plugin and enable it * got to OPAC / Advanced Search * There is a new input box "Geographic Search" where you can enter lat/long/radius * On the search result page a map is shown with pins for each found biblioitem Sponsored-by: ZAMG - Zentralanstalt für Meterologie und Geodynamik, Austria - https://www.zamg.ac.at/ Sponsored-by: Geosphere - https://www.geosphere.at/ Signed-off-by: Martin Renvoize Signed-off-by: Nick Clemens Additional finetuning: - Fix update and remove fixed fixme - Update test count as well - fix last small issues raised in Comment 23 Signed-off-by: Katrin Fischer --- Koha/SearchEngine/Elasticsearch.pm | 22 ++++++++ .../Elasticsearch/QueryBuilder.pm | 51 ++++++++++++++++++- Koha/SearchEngine/Elasticsearch/Search.pm | 1 + .../elasticsearch/field_config.yaml | 2 + .../searchengine/elasticsearch/mappings.yaml | 18 +++++++ .../atomicupdate/bug_31652_add_geo_search.pl | 15 ++++++ installer/data/mysql/kohastructure.sql | 2 +- .../searchengine/elasticsearch/mappings.tt | 5 ++ .../SearchEngine/Elasticsearch/QueryBuilder.t | 39 +++++++++++++- 9 files changed, 152 insertions(+), 3 deletions(-) create mode 100755 installer/data/mysql/atomicupdate/bug_31652_add_geo_search.pl diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 3f26cbc027..54f3ea8f72 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -212,6 +212,12 @@ sub get_elasticsearch_mappings { $es_type = 'year'; } elsif ($type eq 'callnumber') { $es_type = 'cn_sort'; + } elsif ($type eq 'geo_point') { + $es_type = 'geo_point'; + } + + if ($type eq 'geo_point') { + $name =~ s/_(lat|lon)$//; } if ($search) { @@ -735,6 +741,19 @@ sub marc_records_to_documents { } } + foreach my $field (@{$rules->{geo_point}}) { + next unless $record_document->{$field}; + my $geofield = $field; + $geofield =~ s/_(lat|lon)$//; + my $axis = $1; + my $vals = $record_document->{$field}; + for my $i (0 .. @$vals - 1) { + my $val = $record_document->{$field}[$i]; + $record_document->{$geofield}[$i]{$axis} = $val; + } + delete $record_document->{$field}; + } + # Remove duplicate values and collapse sort fields foreach my $field (keys %{$record_document}) { if (ref($record_document->{$field}) eq 'ARRAY') { @@ -1070,6 +1089,9 @@ sub _get_marc_mapping_rules { elsif ($type eq 'isbn') { push @{$rules->{isbn}}, $name; } + elsif ($type eq 'geo_point') { + push @{$rules->{geo_point}}, $name; + } elsif ($type eq 'boolean') { # boolean gets special handling, if value doesn't exist for a field, # it is set to false diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index edde18cc09..53541c4022 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -137,6 +137,7 @@ our %index_field_convert = ( ); my $field_name_pattern = '[\w\-]+'; my $multi_field_pattern = "(?:\\.$field_name_pattern)*"; +my $es_advanced_searches = []; =head2 get_index_field_convert @@ -245,6 +246,8 @@ sub build_query { or $display_library_facets eq 'holding' ) { $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet", size => $size } }; } + + $res = _rebuild_to_es_advanced_query($res) if @$es_advanced_searches ; return $res; } @@ -930,6 +933,17 @@ operand. sub _create_query_string { my ( $self, @queries ) = @_; + $es_advanced_searches = []; + my @string_queries; + foreach my $q (@queries) { + if ($q->{field} && $q->{field} eq 'geolocation') { + push(@$es_advanced_searches, $q); + } else { + push(@string_queries, $q) + } + } + + @queries = @string_queries; map { my $otor = $_->{operator} ? $_->{operator} . ' ' : ''; @@ -1083,7 +1097,6 @@ sub _fix_limit_special_cases { my @new_lim; foreach my $l (@$limits) { - # This is set up by opac-search.pl if ( $l =~ /^yr,st-numeric,ge[=:]/ ) { my ( $start, $end ) = @@ -1407,4 +1420,40 @@ sub _is_safe_to_auto_truncate { return 1; } +sub _rebuild_to_es_advanced_query { + my ($res) = @_; + my $query_string = $res->{query}->{query_string}; + $query_string->{query} = '*' unless $query_string->{query}; + delete $res->{query}->{query_string}; + + my %filter; + for my $advanced_query (@$es_advanced_searches) { + if ( $advanced_query->{field} eq 'geolocation') { + my ($lat, $lon, $distance) = map { $_ =~ /:(.*)\*/ } split('\s+', $advanced_query->{operand}); + $filter{geo_distance} = { + distance => $distance, + geolocation => { + lat => $lat, + lon => $lon, + } + }; + } + else { + warn "unknown advanced ElasticSearch query: ".join(', ',%$advanced_query); + } + } + + $res->{query} = { + bool => { + must => { + query_string => $query_string + }, + filter => \%filter, + } + }; + + return $res; +} + + 1; diff --git a/Koha/SearchEngine/Elasticsearch/Search.pm b/Koha/SearchEngine/Elasticsearch/Search.pm index 933b52f79d..12b90cd0ef 100644 --- a/Koha/SearchEngine/Elasticsearch/Search.pm +++ b/Koha/SearchEngine/Elasticsearch/Search.pm @@ -91,6 +91,7 @@ sub search { $query->{from} = $page * $query->{size}; } my $elasticsearch = $self->get_elasticsearch(); + my $results = eval { $elasticsearch->search( index => $self->index_name, diff --git a/admin/searchengine/elasticsearch/field_config.yaml b/admin/searchengine/elasticsearch/field_config.yaml index c87cbcae8d..41adcef7ed 100644 --- a/admin/searchengine/elasticsearch/field_config.yaml +++ b/admin/searchengine/elasticsearch/field_config.yaml @@ -41,6 +41,8 @@ search: ci_raw: type: keyword normalizer: icu_folding_normalizer + geo_point: + type: geo_point default: type: text analyzer: analyzer_standard diff --git a/admin/searchengine/elasticsearch/mappings.yaml b/admin/searchengine/elasticsearch/mappings.yaml index 9cd3db5c96..6011923d73 100644 --- a/admin/searchengine/elasticsearch/mappings.yaml +++ b/admin/searchengine/elasticsearch/mappings.yaml @@ -1855,6 +1855,24 @@ biblios: opac: 1 staff_client: 1 type: '' + geolocation_lat: + label: geolocation_lat + mappings: + - facet: '' + marc_field: 034s + marc_type: marc21 + sort: 0 + suggestible: '' + type: geo_point + geolocation_lon: + label: geolocation_lon + mappings: + - facet: '' + marc_field: 034t + marc_type: marc21 + sort: 0 + suggestible: '' + type: geo_point holdingbranch: facet_order: 8 label: holdinglibrary diff --git a/installer/data/mysql/atomicupdate/bug_31652_add_geo_search.pl b/installer/data/mysql/atomicupdate/bug_31652_add_geo_search.pl new file mode 100755 index 0000000000..f996d47c13 --- /dev/null +++ b/installer/data/mysql/atomicupdate/bug_31652_add_geo_search.pl @@ -0,0 +1,15 @@ +use Modern::Perl; + +return { + bug_number => "31652", + description => "Add geo-search: new value for search_field.type enum", + up => sub { + my ($args) = @_; + my ( $dbh, $out ) = @$args{qw(dbh out)}; + + $dbh->do( + q{ alter table search_field MODIFY COLUMN type enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber','geo_point') } + ); + say $out "Added new value 'geo_point' to search_field.type enum"; + }, +}; diff --git a/installer/data/mysql/kohastructure.sql b/installer/data/mysql/kohastructure.sql index 7c805d536b..cfcce6f665 100644 --- a/installer/data/mysql/kohastructure.sql +++ b/installer/data/mysql/kohastructure.sql @@ -5644,7 +5644,7 @@ CREATE TABLE `search_field` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(255) NOT NULL COMMENT 'the name of the field as it will be stored in the search engine', `label` varchar(255) NOT NULL COMMENT 'the human readable name of the field, for display', - `type` enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine', + `type` enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber','geo_point') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine', `weight` decimal(5,2) DEFAULT NULL, `facet_order` tinyint(4) DEFAULT NULL COMMENT 'the order place of the field in facet list if faceted', `staff_client` tinyint(1) NOT NULL DEFAULT 1, diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt index 1821af2515..18baa39ad2 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt @@ -225,6 +225,11 @@ a.add, a.delete { [% ELSE %] [% END %] + [% IF search_field.type == "geo_point" %] + + [% ELSE %] + + [% END %] diff --git a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t index c2938000e4..b6b670d7d5 100755 --- a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t +++ b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t @@ -17,7 +17,7 @@ use Modern::Perl; -use Test::More tests => 7; +use Test::More tests => 8; use t::lib::Mocks; use_ok('Koha::SearchEngine::Elasticsearch::QueryBuilder'); @@ -333,4 +333,41 @@ subtest '_join_queries' => sub { is($query, '(homebranch:foo) AND itype:(BOOK OR EBOOK) AND location:(SHELF)', 'should join "mc-" parts with AND if not the same field'); }; +subtest '_create_query_string' => sub { + plan tests => 2; + + my $params = { + index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX, + }; + my $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new($params); + + my @queries; + my $normal_query = [ + { + 'operand' => 'perl*', + 'operator' => undef + } + ]; + + @queries = $qb->_create_query_string(@$normal_query); + my $expect = ['(perl*)']; + + is( @queries, @$expect, 'expected search structure' ); + + my $geo_query = [ + { + 'operator' => undef, + 'field' => 'geolocation', + 'type' => undef, + 'operand' => 'lat:48.25* lng:16.35* distance:100km*' + } + ]; + + @queries = $qb->_create_query_string(@$geo_query); + my $expect_geo = []; + is( @queries, @$expect_geo, 'expected geo search structure => empty normal search string' ); + +}; + + 1; -- 2.39.5