From 8c7230b8024828edb96838f87cea0101155fdccb Mon Sep 17 00:00:00 2001 From: Frank Hansen Date: Tue, 15 Mar 2022 15:50:18 +0000 Subject: [PATCH] Bug 30280: Add support for subject headings from different thesaurus when using Elasticsearch MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch allows subject headings from different thesaurus to co-exist in Koha. Test plan: 1. Enable Elasticsearch as the search engine (SearchEngine system preference = Elasticsearch) and reindex (koha-elasticsearch --rebuild -d -b -a kohadev). 2. Load sample authority records attached to the bug (in this case the authorities.mrc file was saved in the koha directory - adjust file location as appropriate): misc/migration_tools/bulkmarcimport.pl -v -a -file authorities.mrc -insert -c=MARC21 3. Load the sample bibliographic record attached to the bug: misc/migration_tools/bulkmarcimport.pl -v -b -file biblios.mrc -insert -c=MARC21 4. Search for ISBN 0704328623 and verify that the subject terms are not linked to any authority records (if you hover over the link terms, all links should look like ../cgi-bin/koha/catalogue/search.pl?q=su:"subjectterm" - none should have any ?q=an:XXX (where XXX = authority record ids) in the link). 5. Apply the patches. 6. Reset the Elasticsearch mappings (Administration > Catalog > Search engine configuration (Elasticsearch)). 7. Reindex: koha-elasticsearch --rebuild -d -b -a kohadev 8. Link bibliographic records to authority records: misc/link_bibs_to_authorities.pl -v -l 9. Repeat the search in step 4 (or refresh the record details page) and verify that the first three Feminism headings[1] are linked to an authority record (should have ?q=an:XXX (where XXX = different authority record IDs for the various feminism authority records linked to a specific thesaurus)). 10. Reindex: koha-elasticsearch --rebuild -d -b -a kohadev [1] Links for subject terms for step 9 in order are (the authority ids may be different depending on how ou are testing): Subject(s:) Feminism | feminism | Feminism | Feminism | Idéhistoria | Litteratur http://127.0.0.1:8081/cgi-bin/koha/catalogue/search.pl?q=an:1709 http://127.0.0.1:8081/cgi-bin/koha/catalogue/search.pl?q=an:1710 http://127.0.0.1:8081/cgi-bin/koha/catalogue/search.pl?q=an:1708 http://127.0.0.1:8081/cgi-bin/koha/catalogue/search.pl?q=su:"Feminism" http://127.0.0.1:8081/cgi-bin/koha/catalogue/search.pl?q=su:"Idéhistoria" http://127.0.0.1:8081/cgi-bin/koha/catalogue/search.pl?q=su:"Litteratur" MARC info: 650 #7 - SUBJECT ADDED ENTRY--TOPICAL TERM a Topical term or geographic name entry element Feminism 0 Authority record control number or standard number https://id.kb.se/term/sao/Feminism 2 Source of heading or term sao 650 #7 - SUBJECT ADDED ENTRY--TOPICAL TERM a Topical term or geographic name entry element feminism 2 Source of heading or term bnb 650 #0 - SUBJECT ADDED ENTRY--TOPICAL TERM a Topical term or geographic name entry element Feminism 650 #4 - SUBJECT ADDED ENTRY--TOPICAL TERM a Topical term or geographic name entry element Feminism 650 #4 - SUBJECT ADDED ENTRY--TOPICAL TERM a Topical term or geographic name entry element Idéhistoria 650 #4 - SUBJECT ADDED ENTRY--TOPICAL TERM a Topical term or geographic name entry element Litteratur Note: The fourth Feminism term has second indicator 4 which means "Source not specified". So this term is only a local term and does not belong to any specific thesaurus and thus not linked. To specify a source of a term, the second indicator must be set as 7. The source of heading is then specified in subfield 2. Sponsored-by: Lund University Library, Sweden Signed-off-by: David Nind Signed-off-by: Katrin Fischer Signed-off-by: Tomas Cohen Arazi --- C4/Heading.pm | 41 +++++++++++++++---- C4/Linker/Default.pm | 24 ++++++----- .../Elasticsearch/QueryBuilder.pm | 1 + 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/C4/Heading.pm b/C4/Heading.pm index 4d67cd5624..3384c3a7f0 100644 --- a/C4/Heading.pm +++ b/C4/Heading.pm @@ -194,6 +194,8 @@ sub _search { my $self = shift; my $index = shift || undef; my $skipmetadata = shift || undef; + my $ind2 = $self->{field}->{_ind2}; + my $subject_heading_thesaurus = ''; my @marclist; my @and_or; my @excluding = []; @@ -207,13 +209,38 @@ sub _search { push @value, $self->{'search_form'}; } - # if ($self->{'thesaurus'}) { - # push @marclist, 'thesaurus'; - # push @and_or, 'AND'; - # push @excluding, ''; - # push @operator, 'is'; - # push @value, $self->{'thesaurus'}; - # } + if ($self->{'thesaurus'}) { + if ($ind2 eq '0') { + $subject_heading_thesaurus = 'a'; + } elsif ($ind2 eq '1') { + $subject_heading_thesaurus = 'b'; + } elsif ($ind2 eq '2') { + $subject_heading_thesaurus = 'c'; + } elsif ($ind2 eq '3') { + $subject_heading_thesaurus = 'd'; + } elsif ($ind2 eq '4') { + $subject_heading_thesaurus = 'n'; + } elsif ($ind2 eq '5') { + $subject_heading_thesaurus = 'k'; + } elsif ($ind2 eq '6') { + $subject_heading_thesaurus = 'v'; + } else { + $subject_heading_thesaurus = 'z'; + } + push @marclist, 'thesaurus'; + push @and_or, 'and'; + push @excluding, ''; + push @operator, 'is'; + push @value, $subject_heading_thesaurus; + } + + if ($ind2 eq '7') { + push @marclist, 'thesaurus-conventions'; + push @and_or, 'and'; + push @excluding, ''; + push @operator, 'is'; + push @value, $self->{'thesaurus'}; + } require Koha::SearchEngine::QueryBuilder; require Koha::SearchEngine::Search; diff --git a/C4/Linker/Default.pm b/C4/Linker/Default.pm index 88d75ff1be..9ce64fd5d0 100644 --- a/C4/Linker/Default.pm +++ b/C4/Linker/Default.pm @@ -30,14 +30,15 @@ sub get_link { my $behavior = shift || 'default'; my $search_form = $heading->search_form(); my $auth_type = $heading->auth_type(); + my $thesaurus = $heading->{thesaurus} || 'notdefined'; my $authid; my $fuzzy = 0; my $match_count; - if ( $self->{'cache'}->{$search_form.$auth_type}->{'cached'} ) { - $authid = $self->{'cache'}->{$search_form.$auth_type}->{'authid'}; - $fuzzy = $self->{'cache'}->{$search_form.$auth_type}->{'fuzzy'}; - $match_count = $self->{'cache'}->{$search_form.$auth_type}->{'match_count'}; + if ( $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'cached'} ) { + $authid = $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'authid'}; + $fuzzy = $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'fuzzy'}; + $match_count = $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'match_count'}; } else { @@ -75,10 +76,10 @@ sub get_link { } } - $self->{'cache'}->{$search_form.$auth_type}->{'cached'} = 1; - $self->{'cache'}->{$search_form.$auth_type}->{'authid'} = $authid; - $self->{'cache'}->{$search_form.$auth_type}->{'fuzzy'} = $fuzzy; - $self->{'cache'}->{$search_form.$auth_type}->{'match_count'} = $match_count; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'cached'} = 1; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'authid'} = $authid; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'fuzzy'} = $fuzzy; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'match_count'} = $match_count; } return $self->SUPER::_handle_auth_limit($authid), $fuzzy, $match_count; } @@ -89,11 +90,12 @@ sub update_cache { my $authid = shift; my $search_form = $heading->search_form(); my $auth_type = $heading->auth_type(); + my $thesaurus = $heading->{thesaurus} || 'notdefined'; my $fuzzy = 0; - $self->{'cache'}->{$search_form.$auth_type}->{'cached'} = 1; - $self->{'cache'}->{$search_form.$auth_type}->{'authid'} = $authid; - $self->{'cache'}->{$search_form.$auth_type}->{'fuzzy'} = $fuzzy; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'cached'} = 1; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'authid'} = $authid; + $self->{'cache'}->{$search_form.$auth_type.$thesaurus}->{'fuzzy'} = $fuzzy; } sub flip_heading { diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index a6ebecfe10..1791670382 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -559,6 +559,7 @@ our $koha_to_index_name = { 'match-heading' => 'match-heading', 'see-from' => 'match-heading-see-from', thesaurus => 'subject-heading-thesaurus', + 'thesaurus-conventions' => 'subject-heading-thesaurus-conventions', any => '', all => '' }; -- 2.39.5