Bug 18336: Full stack tests for supplemental UTF-8 chars

This patch introduces tests for Koha's support for 4-byte supplemental UTF-8 chars. encoding/decoding tools handle this gracefuly. The missing piece is the MySQL DB backend. The tests in this patch: - Adds a couple records for each flavour (MARC21 and UNIMARC) so search_utf8.t tests 4-byte chars are handled correctly - Adds emoji testing in auth_values_input_www.t To test: - Apply this patch - Run: $ kshell k$ prove t/db_dependent/www/search_utf8.t \ t/db_dependent/www/auth_values_input_www.t => FAIL: It should fail if the DB hasn't been migrated into using utf8mb4 Sponsored-by: Hotchkiss School Signed-off-by: Mark Tompsett <mtompset@hotmail.com> Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de> Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io> Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com> Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
2018-01-22 13:16:26 -03:00 · 2018-01-22 13:16:26 -03:00 · d74c5393ed
commit d74c5393ed
parent d1ccd5ed35
4 changed files with 23 additions and 4 deletions
--- a/t/db_dependent/www/auth_values_input_www.t
+++ b/t/db_dependent/www/auth_values_input_www.t
@ -49,7 +49,7 @@ elsif (not defined $intranet) {
    plan skip_all => "Tests skip. You must set env. variable KOHA_INTRANET_URL to do tests\n";
 }
 else {
-    plan tests => 33;
+    plan tests => 34;
 }

 my $dbh = C4::Context->dbh;
@ -71,9 +71,9 @@ $agent->field( 'branch',   '' );
 $agent->click_ok( '', 'login to staff client' );
 $agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'load main page' );

-#--------------------------------------------------- Test with corean and greek chars
+#---------------------------------------- Test with corean, greek and emoji chars

-$category = '学協会μμ';
+$category = '学協会μμ😀';
 $dbh->do(q|DELETE FROM authorised_values WHERE category = ?|, undef, $category);
 $dbh->do(q|DELETE FROM authorised_value_categories WHERE category_name = ?|, undef, $category);

@ -123,6 +123,7 @@ my $text = $agent->text() ;
 ok ( ( length(Encode::encode('UTF-8', $text)) != length($text) ) , 'UTF-8 are multi-byte. Good') ;
 ok ($text =~  m/学協会μμ/, 'UTF-8 (Asia) chars are correctly present. Good');
 ok ($text =~  m/επιμεq/, 'UTF-8 (Greek) chars are correctly present. Good');
+ok ($text =~  m/😀/, 'UTF-8 (emoji) chars are correctly present. Good');
 my @links = $agent->links;
 my $id_to_del ='';
 $delete_re = q|op=delete\&searchfield=| . uri_escape_utf8($category) . '\&id=(\d+)';
--- a/t/db_dependent/www/data/marc21utf8supprec.mrc
+++ b/t/db_dependent/www/data/marc21utf8supprec.mrc
@ -0,0 +1 @@
+00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005    gr a          001 0 gre d02aA tiny record (😀)  a𠻺tomasito𠻺 4a😀00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005    gr a          001 0 gre d02aA tiny record (😀)  a𠻺tomasito𠻺 4a😀
--- a/t/db_dependent/www/data/unimarcutf8supprec.mrc
+++ b/t/db_dependent/www/data/unimarcutf8supprec.mrc
@ -0,0 +1 @@
+00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413220141116143328.0  b𠻺tomasito𠻺  a2  a20130409d2005    ||||0itay50      ba  aita1 aA tiny record (😀)  aAthinacEvraico Mouseio Ellathosd2005  a73 p.cill.d27 cm.  2学協会. μμ  2学協会. μμ  2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104  cBK00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413320141116143328.0  b𠻺tomasito𠻺  a2  a20130409d2005    ||||0itay50      ba  aita1 aA tiny record (😀)  aAthinacEvraico Mouseio Ellathosd2005  a73 p.cill.d27 cm.  2学協会. μμ  2学協会. μμ  2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104  cBK
--- a/t/db_dependent/www/search_utf8.t
+++ b/t/db_dependent/www/search_utf8.t
@ -55,6 +55,11 @@ my $file2 =
  ? "$testdir/data/unimarclatin1utf8rec.mrc"
  : "$testdir/data/marc21latin1utf8rec.mrc";

+my $file3 =
+  $marcflavour eq 'UNIMARC'
+  ? "$testdir/data/unimarcutf8supprec.mrc"
+  : "$testdir/data/marc21utf8supprec.mrc";
+
 my $user     = $ENV{KOHA_USER} || $xml->{config}->{user};
 my $password = $ENV{KOHA_PASS} || $xml->{config}->{pass};
 my $intranet = $ENV{KOHA_INTRANET_URL};
@ -70,7 +75,7 @@ elsif ( not defined $opac ) {
   plan skip_all => "Tests skip. You must set env. variable KOHA_OPAC_URL to do tests\n";
 }
 else {
-    plan tests => 66;
+    plan tests => 99;
 }

 $intranet =~ s#/$##;
@ -104,6 +109,17 @@ if ( not defined $indexer_pid ) {
 my $utf8_reg2 = qr/Tòmas/;
 test_search($file2,'Ramòn', 'Tòmas',$utf8_reg2);

+#--------------------------------- Test with supplementary utf-8 chars;
+launch_zebra( $datadir, $koha_conf );
+if ( not defined $zebra_pid ) {
+    plan skip_all => "Tests skip. Error starting Zebra Server to do those tests\n";
+}
+launch_indexer( );
+if ( not defined $indexer_pid ) {
+    plan skip_all => "Tests skip. Error starting the indexer daemon to do those tests\n";
+}
+my $utf8_reg3 = qr/😀/;
+test_search($file3, "𠻺tomasito𠻺", 'A tiny record', $utf8_reg3);

 sub test_search{
    #Params
				`@ -0,0 +1 @@`
				`00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005 gr a 001 0 gre d02aA tiny record (😀) a𠻺tomasito𠻺 4a😀00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005 gr a 001 0 gre d02aA tiny record (😀) a𠻺tomasito𠻺 4a😀`
				`@ -0,0 +1 @@`
				00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413220141116143328.0 b𠻺tomasito𠻺 a2 a20130409d2005 \|\|\|\|0itay50 ba aita1 aA tiny record (😀) aAthinacEvraico Mouseio Ellathosd2005 a73 p.cill.d27 cm. 2学協会. μμ 2学協会. μμ 2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104 cBK00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413320141116143328.0 b𠻺tomasito𠻺 a2 a20130409d2005 \|\|\|\|0itay50 ba aita1 aA tiny record (😀) aAthinacEvraico Mouseio Ellathosd2005 a73 p.cill.d27 cm. 2学協会. μμ 2学協会. μμ 2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104 cBK