Bug 35455: Remove = in ICU for indexing/searching

This change more closely aligns ICU and CHR so that ICU also removes the = character. This fixes issues in ICU when searching with a : which gets transformed into a =. Without this change, the Analytics features won't work for titles with a colon in them. Test plan: 0. Apply the patch and import bibs from Bugzilla (using Staged MARC tools) 1. cp ./etc/zebradb/etc/phrases-icu.xml /etc/koha/zebradb/etc/phrases-icu.xml 2. cp ./etc/zebradb/etc/words-icu.xml /etc/koha/zebradb/etc/words-icu.xml 3. vi /etc/koha/zebradb/etc/default.idx Change "charmap word-phrase-utf.chr" to "icuchain words-icu.xml" for "index w" and "icuchain phrases-icu.xml" for "index p" 4. koha-zebra --stop kohadev 5. pkill zebrasrv 6. koha-zebra --start kohadev 7. koha-rebuild-zebra -a -b -f -v kohadev 8. Search for "Awesome title" and open the detail page 9. Note that the "Analytics: Show analytics" line shows up 10. Click that link 11. Note that it opens the "Cool article" record and it displays "In: Awesome title: awesome subtitle" 12. Click that link 13. Note that it opens the "Awesome title" record Signed-off-by: David Nind <david@davidnind.com> Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com> Signed-off-by: Katrin Fischer <katrin.fischer@bsz-bw.de> (cherry picked from commit 7375d82c40) Signed-off-by: Fridolin Somers <fridolin.somers@biblibre.com>
2023-12-01 02:36:56 +00:00 · 2023-12-01 02:36:56 +00:00 · 5035e12440
commit 5035e12440
parent 83c18ad1c6
2 changed files with 2 additions and 0 deletions
--- a/etc/zebradb/etc/phrases-icu.xml
+++ b/etc/zebradb/etc/phrases-icu.xml
@ -6,6 +6,7 @@
  <!-- Remove control characters except \t\n\r -->
  <transform rule="[\x00-\x08\x0B\x0C\x0E-\x1F\x7F] Any-Remove"/>
  <transform rule="[:Punctuation:] Remove"/>
+  <transform rule="[=] Remove"/>
  <transform rule="NFD"/>
  <transform rule="[:Nonspacing Mark:] Remove"/>
  <transform rule="NFC"/>
--- a/etc/zebradb/etc/words-icu.xml
+++ b/etc/zebradb/etc/words-icu.xml
@ -9,6 +9,7 @@
  <transliterate rule="[:Number:] { '-' > '' "/>
  <!-- Remove control characters except \t\n\r -->
  <transform rule="[\x00-\x08\x0B\x0C\x0E-\x1F\x7F] Any-Remove"/>
+  <transform rule="[=] Remove"/>
  <tokenize rule="l"/>
  <transform rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
  <transform rule="NFD"/>