From d74c5393edf35d6cb3728ef109125bff05869a02 Mon Sep 17 00:00:00 2001 From: Tomas Cohen Arazi Date: Mon, 22 Jan 2018 13:16:26 -0300 Subject: [PATCH] Bug 18336: Full stack tests for supplemental UTF-8 chars This patch introduces tests for Koha's support for 4-byte supplemental UTF-8 chars. encoding/decoding tools handle this gracefuly. The missing piece is the MySQL DB backend. The tests in this patch: - Adds a couple records for each flavour (MARC21 and UNIMARC) so search_utf8.t tests 4-byte chars are handled correctly - Adds emoji testing in auth_values_input_www.t To test: - Apply this patch - Run: $ kshell k$ prove t/db_dependent/www/search_utf8.t \ t/db_dependent/www/auth_values_input_www.t => FAIL: It should fail if the DB hasn't been migrated into using utf8mb4 Sponsored-by: Hotchkiss School Signed-off-by: Mark Tompsett Signed-off-by: Katrin Fischer Signed-off-by: Tomas Cohen Arazi Signed-off-by: Kyle M Hall Signed-off-by: Jonathan Druart --- t/db_dependent/www/auth_values_input_www.t | 7 ++++--- t/db_dependent/www/data/marc21utf8supprec.mrc | 1 + t/db_dependent/www/data/unimarcutf8supprec.mrc | 1 + t/db_dependent/www/search_utf8.t | 18 +++++++++++++++++- 4 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 t/db_dependent/www/data/marc21utf8supprec.mrc create mode 100644 t/db_dependent/www/data/unimarcutf8supprec.mrc diff --git a/t/db_dependent/www/auth_values_input_www.t b/t/db_dependent/www/auth_values_input_www.t index 27e7310ae1..c4f10fc538 100644 --- a/t/db_dependent/www/auth_values_input_www.t +++ b/t/db_dependent/www/auth_values_input_www.t @@ -49,7 +49,7 @@ elsif (not defined $intranet) { plan skip_all => "Tests skip. You must set env. variable KOHA_INTRANET_URL to do tests\n"; } else { - plan tests => 33; + plan tests => 34; } my $dbh = C4::Context->dbh; @@ -71,9 +71,9 @@ $agent->field( 'branch', '' ); $agent->click_ok( '', 'login to staff client' ); $agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'load main page' ); -#--------------------------------------------------- Test with corean and greek chars +#---------------------------------------- Test with corean, greek and emoji chars -$category = '学協会μμ'; +$category = '学協会μμ😀'; $dbh->do(q|DELETE FROM authorised_values WHERE category = ?|, undef, $category); $dbh->do(q|DELETE FROM authorised_value_categories WHERE category_name = ?|, undef, $category); @@ -123,6 +123,7 @@ my $text = $agent->text() ; ok ( ( length(Encode::encode('UTF-8', $text)) != length($text) ) , 'UTF-8 are multi-byte. Good') ; ok ($text =~ m/学協会μμ/, 'UTF-8 (Asia) chars are correctly present. Good'); ok ($text =~ m/επιμεq/, 'UTF-8 (Greek) chars are correctly present. Good'); +ok ($text =~ m/😀/, 'UTF-8 (emoji) chars are correctly present. Good'); my @links = $agent->links; my $id_to_del =''; $delete_re = q|op=delete\&searchfield=| . uri_escape_utf8($category) . '\&id=(\d+)'; diff --git a/t/db_dependent/www/data/marc21utf8supprec.mrc b/t/db_dependent/www/data/marc21utf8supprec.mrc new file mode 100644 index 0000000000..9fa2d16572 --- /dev/null +++ b/t/db_dependent/www/data/marc21utf8supprec.mrc @@ -0,0 +1 @@ +00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005 gr a 001 0 gre d02aA tiny record (😀) a𠻺tomasito𠻺 4a😀00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005 gr a 001 0 gre d02aA tiny record (😀) a𠻺tomasito𠻺 4a😀 \ No newline at end of file diff --git a/t/db_dependent/www/data/unimarcutf8supprec.mrc b/t/db_dependent/www/data/unimarcutf8supprec.mrc new file mode 100644 index 0000000000..6c183928bb --- /dev/null +++ b/t/db_dependent/www/data/unimarcutf8supprec.mrc @@ -0,0 +1 @@ +00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413220141116143328.0 b𠻺tomasito𠻺 a2 a20130409d2005 ||||0itay50 ba aita1 aA tiny record (😀) aAthinacEvraico Mouseio Ellathosd2005 a73 p.cill.d27 cm. 2学協会. μμ 2学協会. μμ 2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104 cBK00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413320141116143328.0 b𠻺tomasito𠻺 a2 a20130409d2005 ||||0itay50 ba aita1 aA tiny record (😀) aAthinacEvraico Mouseio Ellathosd2005 a73 p.cill.d27 cm. 2学協会. μμ 2学協会. μμ 2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104 cBK \ No newline at end of file diff --git a/t/db_dependent/www/search_utf8.t b/t/db_dependent/www/search_utf8.t index 2ad7b9388b..3b3c22f9b5 100644 --- a/t/db_dependent/www/search_utf8.t +++ b/t/db_dependent/www/search_utf8.t @@ -55,6 +55,11 @@ my $file2 = ? "$testdir/data/unimarclatin1utf8rec.mrc" : "$testdir/data/marc21latin1utf8rec.mrc"; +my $file3 = + $marcflavour eq 'UNIMARC' + ? "$testdir/data/unimarcutf8supprec.mrc" + : "$testdir/data/marc21utf8supprec.mrc"; + my $user = $ENV{KOHA_USER} || $xml->{config}->{user}; my $password = $ENV{KOHA_PASS} || $xml->{config}->{pass}; my $intranet = $ENV{KOHA_INTRANET_URL}; @@ -70,7 +75,7 @@ elsif ( not defined $opac ) { plan skip_all => "Tests skip. You must set env. variable KOHA_OPAC_URL to do tests\n"; } else { - plan tests => 66; + plan tests => 99; } $intranet =~ s#/$##; @@ -104,6 +109,17 @@ if ( not defined $indexer_pid ) { my $utf8_reg2 = qr/Tòmas/; test_search($file2,'Ramòn', 'Tòmas',$utf8_reg2); +#--------------------------------- Test with supplementary utf-8 chars; +launch_zebra( $datadir, $koha_conf ); +if ( not defined $zebra_pid ) { + plan skip_all => "Tests skip. Error starting Zebra Server to do those tests\n"; +} +launch_indexer( ); +if ( not defined $indexer_pid ) { + plan skip_all => "Tests skip. Error starting the indexer daemon to do those tests\n"; +} +my $utf8_reg3 = qr/😀/; +test_search($file3, "𠻺tomasito𠻺", 'A tiny record', $utf8_reg3); sub test_search{ #Params -- 2.39.5