Bug 18336: Full stack tests for supplemental UTF-8 chars

This patch introduces tests for Koha's support for 4-byte supplemental
UTF-8 chars. encoding/decoding tools handle this gracefuly. The missing
piece is the MySQL DB backend.

The tests in this patch:
- Adds a couple records for each flavour (MARC21 and UNIMARC) so
  search_utf8.t tests 4-byte chars are handled correctly
- Adds emoji testing in auth_values_input_www.t

To test:
- Apply this patch
- Run:
  $ kshell
 k$ prove t/db_dependent/www/search_utf8.t \
          t/db_dependent/www/auth_values_input_www.t
=> FAIL: It should fail if the DB hasn't been migrated into using
utf8mb4

Sponsored-by: Hotchkiss School

Signed-off-by: Mark Tompsett <mtompset@hotmail.com>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
This commit is contained in:
Tomás Cohen Arazi 2018-01-22 13:16:26 -03:00 committed by Jonathan Druart
parent d1ccd5ed35
commit d74c5393ed
4 changed files with 23 additions and 4 deletions

View file

@ -49,7 +49,7 @@ elsif (not defined $intranet) {
plan skip_all => "Tests skip. You must set env. variable KOHA_INTRANET_URL to do tests\n";
}
else {
plan tests => 33;
plan tests => 34;
}
my $dbh = C4::Context->dbh;
@ -71,9 +71,9 @@ $agent->field( 'branch', '' );
$agent->click_ok( '', 'login to staff client' );
$agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'load main page' );
#--------------------------------------------------- Test with corean and greek chars
#---------------------------------------- Test with corean, greek and emoji chars
$category = '学協会μμ';
$category = '学協会μμ😀';
$dbh->do(q|DELETE FROM authorised_values WHERE category = ?|, undef, $category);
$dbh->do(q|DELETE FROM authorised_value_categories WHERE category_name = ?|, undef, $category);
@ -123,6 +123,7 @@ my $text = $agent->text() ;
ok ( ( length(Encode::encode('UTF-8', $text)) != length($text) ) , 'UTF-8 are multi-byte. Good') ;
ok ($text =~ m/学協会μμ/, 'UTF-8 (Asia) chars are correctly present. Good');
ok ($text =~ m/επιμεq/, 'UTF-8 (Greek) chars are correctly present. Good');
ok ($text =~ m/😀/, 'UTF-8 (emoji) chars are correctly present. Good');
my @links = $agent->links;
my $id_to_del ='';
$delete_re = q|op=delete\&searchfield=| . uri_escape_utf8($category) . '\&id=(\d+)';

View file

@ -0,0 +1 @@
00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005 gr a 001 0 gre d02aA tiny record (😀) a𠻺tomasito𠻺 4a😀00219cam a2200097 a 4500001000800000005001700008008004100025245002500066260002100091650000900112100880920140204215200.0140204m2005 gr a 001 0 gre d02aA tiny record (😀) a𠻺tomasito𠻺 4a😀

View file

@ -0,0 +1 @@
00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413220141116143328.0 b𠻺tomasito𠻺 a2 a20130409d2005 ||||0itay50 ba aita1 aA tiny record (😀) aAthinacEvraico Mouseio Ellathosd2005 a73 p.cill.d27 cm. 2学協会. μμ 2学協会. μμ 2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104 cBK00650nam0a22002293i 4500001000200000005001700002071002100019090000600040100004100046101000800087200002500095210004300120215002400163600002000187601002000207606000900227702004700236702005200283710005500335801002300390942000700413320141116143328.0 b𠻺tomasito𠻺 a2 a20130409d2005 ||||0itay50 ba aita1 aA tiny record (😀) aAthinacEvraico Mouseio Ellathosd2005 a73 p.cill.d27 cm. 2学協会. μμ 2学協会. μμ 2😀 1aMenexiadisb, Alexios3IT\ICCU\LO1V\404551 1aAndrianopouloub, Panayota3IT\ICCU\LO1V\40455202aEvraico Mouseio Ellathos3IT\ICCU\LO1V\4045534070 3aITbICCUc20141104 cBK

View file

@ -55,6 +55,11 @@ my $file2 =
? "$testdir/data/unimarclatin1utf8rec.mrc"
: "$testdir/data/marc21latin1utf8rec.mrc";
my $file3 =
$marcflavour eq 'UNIMARC'
? "$testdir/data/unimarcutf8supprec.mrc"
: "$testdir/data/marc21utf8supprec.mrc";
my $user = $ENV{KOHA_USER} || $xml->{config}->{user};
my $password = $ENV{KOHA_PASS} || $xml->{config}->{pass};
my $intranet = $ENV{KOHA_INTRANET_URL};
@ -70,7 +75,7 @@ elsif ( not defined $opac ) {
plan skip_all => "Tests skip. You must set env. variable KOHA_OPAC_URL to do tests\n";
}
else {
plan tests => 66;
plan tests => 99;
}
$intranet =~ s#/$##;
@ -104,6 +109,17 @@ if ( not defined $indexer_pid ) {
my $utf8_reg2 = qr/Tòmas/;
test_search($file2,'Ramòn', 'Tòmas',$utf8_reg2);
#--------------------------------- Test with supplementary utf-8 chars;
launch_zebra( $datadir, $koha_conf );
if ( not defined $zebra_pid ) {
plan skip_all => "Tests skip. Error starting Zebra Server to do those tests\n";
}
launch_indexer( );
if ( not defined $indexer_pid ) {
plan skip_all => "Tests skip. Error starting the indexer daemon to do those tests\n";
}
my $utf8_reg3 = qr/😀/;
test_search($file3, "𠻺tomasito𠻺", 'A tiny record', $utf8_reg3);
sub test_search{
#Params