From 556b15f7258eb63bf57ac9116088aede8c18201d Mon Sep 17 00:00:00 2001 From: Zeno Tajoli Date: Mon, 29 Dec 2014 11:37:50 +0100 Subject: [PATCH] Bug 13264: More tests for Latin-1 vs. UTF-8 interpretation New tests in search_utf8.t, now are tested also records with only utf-8 chars that could see also as latin-1 chars http://bugs.koha-community.org/show_bug.cgi?id=13264 Signed-off-by: Tomas Cohen Arazi Signed-off-by: Katrin Fischer --- t/db_dependent/www/search_utf8.t | 394 +++++++++++++++++-------------- 1 file changed, 214 insertions(+), 180 deletions(-) diff --git a/t/db_dependent/www/search_utf8.t b/t/db_dependent/www/search_utf8.t index 605e21acc6..737ed39017 100644 --- a/t/db_dependent/www/search_utf8.t +++ b/t/db_dependent/www/search_utf8.t @@ -18,7 +18,7 @@ use Modern::Perl; use utf8; -use Test::More tests => 32; +use Test::More tests => 64; use Test::WWW::Mechanize; use Data::Dumper; use XML::Simple; @@ -45,26 +45,22 @@ my $xml = XMLin($koha_conf); my $marcflavour = C4::Context->preference('marcflavour') || 'MARC21'; # For the purpose of this test, we can reasonably take MARC21 and NORMARC to be the same -my $file = +my $file1 = $marcflavour eq 'UNIMARC' ? "$testdir/data/unimarcutf8record.mrc" : "$testdir/data/marc21utf8record.mrc"; +my $file2 = + $marcflavour eq 'UNIMARC' + ? "$testdir/data/unimarclatin1utf8rec.mrc" + : "$testdir/data/marc21latin1utf8rec.mrc"; + my $user = $ENV{KOHA_USER} || $xml->{config}->{user}; my $password = $ENV{KOHA_PASS} || $xml->{config}->{pass}; my $intranet = $ENV{KOHA_INTRANET_URL}; my $opac = $ENV{KOHA_OPAC_URL}; -# launch the zebra process -launch_zebra( $datadir, $koha_conf ); -if ( not defined $zebra_pid ) { - plan skip_all => "Tests skip. Error starting Zebra Server to do those tests\n"; -} -# launch the zebra process -launch_indexer( ); -if ( not defined $indexer_pid ) { - plan skip_all => "Tests skip. Error starting the indexer daemon to do those tests\n"; -} + # test KOHA_INTRANET_URL is set if ( not defined $intranet ) { plan skip_all => "Tests skip. You must set env. variable KOHA_INTRANET_URL to do tests\n"; @@ -77,182 +73,220 @@ if ( not defined $opac ) { $intranet =~ s#/$##; $opac =~ s#/$##; -my $agent = Test::WWW::Mechanize->new( autocheck => 1 ); -my $jsonresponse; - -# -------------------------------------------------- LOAD RECORD - -$agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'connect to intranet' ); -$agent->form_name('loginform'); -$agent->field( 'password', $password ); -$agent->field( 'userid', $user ); -$agent->field( 'branch', '' ); -$agent->click_ok( '', 'login to staff client' ); - -$agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'load main page' ); - -$agent->follow_link_ok( { url_regex => qr/tools-home/i }, 'open tools module' ); -$agent->follow_link_ok( { text => 'Stage MARC records for import' }, - 'go to stage MARC' ); - -$agent->post( - "$intranet/cgi-bin/koha/tools/upload-file.pl", - [ 'fileToUpload' => [$file], ], - 'Content_Type' => 'form-data', -); -ok( $agent->success, 'uploaded file' ); - -$jsonresponse = decode_json $agent->content(); -is( $jsonresponse->{'status'}, 'done', 'upload succeeded' ); -my $fileid = $jsonresponse->{'fileid'}; - -$agent->get_ok( "$intranet/cgi-bin/koha/tools/stage-marc-import.pl", - 'reopen stage MARC page' ); -$agent->submit_form_ok( - { - form_number => 5, - fields => { - 'uploadedfileid' => $fileid, - 'nomatch_action' => 'create_new', - 'overlay_action' => 'replace', - 'item_action' => 'always_add', - 'matcher' => '', - 'comments' => '', - 'encoding' => 'utf8', - 'parse_items' => '1', - 'runinbackground' => '1', - } - }, - 'stage MARC' -); +#-------------------------------- Test with greek and corean chars; +# launch the zebra saerch process +launch_zebra( $datadir, $koha_conf ); +if ( not defined $zebra_pid ) { + plan skip_all => "Tests skip. Error starting Zebra Server to do those tests\n"; +} +# launch the zebra index process +launch_indexer( ); +if ( not defined $indexer_pid ) { + plan skip_all => "Tests skip. Error starting the indexer daemon to do those tests\n"; +} + +my $utf8_reg1 = qr/学協会. μμ/; +test_search($file1,'Αθήνα', 'deuteros', $utf8_reg1); + + +#--------------------------------- Test with only utf-8 chars in the latin-1 range; +launch_zebra( $datadir, $koha_conf ); +if ( not defined $zebra_pid ) { + plan skip_all => "Tests skip. Error starting Zebra Server to do those tests\n"; +} +launch_indexer( ); +if ( not defined $indexer_pid ) { + plan skip_all => "Tests skip. Error starting the indexer daemon to do those tests\n"; +} +my $utf8_reg2 = qr/Tòmas/; +test_search($file2,'Ramòn', 'Tòmas',$utf8_reg2); + -$jsonresponse = decode_json $agent->content(); -my $jobID = $jsonresponse->{'jobID'}; -ok( $jobID, 'have job ID' ); +sub test_search{ + #Params + my $file = $_[0]; + my $publisher = $_[1]; + my $search_key = $_[2]; + my $utf8_reg = $_[3]; -my $completed = 0; + my $agent = Test::WWW::Mechanize->new( autocheck => 1 ); + my $jsonresponse; -# if we haven't completed the batch in two minutes, it's not happening -for my $counter ( 1 .. 24 ) { - $agent->get( - "$intranet/cgi-bin/koha/tools/background-job-progress.pl?jobID=$jobID", - "get job progress" + # -------------------------------------------------- LOAD RECORD + + $agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'connect to intranet' ); + $agent->form_name('loginform'); + $agent->field( 'password', $password ); + $agent->field( 'userid', $user ); + $agent->field( 'branch', '' ); + $agent->click_ok( '', 'login to staff client' ); + + $agent->get_ok( "$intranet/cgi-bin/koha/mainpage.pl", 'load main page' ); + + $agent->follow_link_ok( { url_regex => qr/tools-home/i }, 'open tools module' ); + $agent->follow_link_ok( { text => 'Stage MARC records for import' }, + 'go to stage MARC' ); + + $agent->post( + "$intranet/cgi-bin/koha/tools/upload-file.pl", + [ 'fileToUpload' => [$file], ], + 'Content_Type' => 'form-data', ); + ok( $agent->success, 'uploaded file' ); + + $jsonresponse = decode_json $agent->content(); + is( $jsonresponse->{'status'}, 'done', 'upload succeeded' ); + my $fileid = $jsonresponse->{'fileid'}; + + $agent->get_ok( "$intranet/cgi-bin/koha/tools/stage-marc-import.pl", + 'reopen stage MARC page' ); + $agent->submit_form_ok( + { + form_number => 5, + fields => { + 'uploadedfileid' => $fileid, + 'nomatch_action' => 'create_new', + 'overlay_action' => 'replace', + 'item_action' => 'always_add', + 'matcher' => '', + 'comments' => '', + 'encoding' => 'utf8', + 'parse_items' => '1', + 'runinbackground' => '1', + } + }, + 'stage MARC' + ); + $jsonresponse = decode_json $agent->content(); - if ( $jsonresponse->{'job_status'} eq 'completed' ) { - $completed = 1; - last; + my $jobID = $jsonresponse->{'jobID'}; + ok( $jobID, 'have job ID' ); + + my $completed = 0; + + # if we haven't completed the batch in two minutes, it's not happening + for my $counter ( 1 .. 24 ) { + $agent->get( + "$intranet/cgi-bin/koha/tools/background-job-progress.pl?jobID=$jobID", + "get job progress" + ); + $jsonresponse = decode_json $agent->content(); + if ( $jsonresponse->{'job_status'} eq 'completed' ) { + $completed = 1; + last; + } + warn( + ( + $jsonresponse->{'job_size'} + ? floor( + 100 * $jsonresponse->{'progress'} / $jsonresponse->{'job_size'} + ) + : '100' + ) + . "% completed" + ); + sleep 5; } - warn( - ( - $jsonresponse->{'job_size'} - ? floor( - 100 * $jsonresponse->{'progress'} / $jsonresponse->{'job_size'} - ) - : '100' - ) - . "% completed" + is( $jsonresponse->{'job_status'}, 'completed', 'job was completed' ); + + $agent->get_ok( + "$intranet/cgi-bin/koha/tools/stage-marc-import.pl", + 'reopen stage MARC page at end of upload' ); - sleep 5; + $agent->submit_form_ok( + { + form_number => 5, + fields => { + 'uploadedfileid' => $fileid, + 'nomatch_action' => 'create_new', + 'overlay_action' => 'replace', + 'item_action' => 'always_add', + 'matcher' => '1', + 'comments' => '', + 'encoding' => 'utf8', + 'parse_items' => '1', + 'runinbackground' => '1', + 'completedJobID' => $jobID, + } + }, + 'stage MARC' + ); + + $agent->follow_link_ok( { text => 'Manage staged records' }, 'view batch' ); + + + $agent->form_number(5); + $agent->field( 'framework', '' ); + $agent->click_ok( 'mainformsubmit', "imported records into catalog" ); + my $webpage = $agent->{content}; + + $webpage =~ /(.*.*?)(\d{1,})(.*<\/title>)/sx; + my $id_batch = $2; + my $id_bib_number = GetBiblionumberFromImport($id_batch); + + # wait enough time for the indexer + sleep 10; + + # --------------------------------- TEST INTRANET SEARCH + + + $agent->get_ok( "$intranet/cgi-bin/koha/catalogue/search.pl" , "got search on intranet"); + $agent->form_number(1); + $agent->field('idx', 'kw'); + $agent->field('q', $search_key); + $agent->click(); + my $intra_text = $agent->text() ; + like( $intra_text, qr|Publisher: $publisher|, ); + + $agent->get_ok( "$intranet/cgi-bin/koha/catalogue/search.pl" , "got search on intranet"); + $agent->form_number(1); + $agent->field('idx', 'kw'); + $agent->field('q', $publisher); + $agent->click(); + $intra_text = $agent->text(); + + like( $intra_text, qr|Publisher: $publisher|, ); + my $expected_base = q|search.pl\?idx=kw&q=| . uri_escape_utf8( $publisher ); + $agent->base_like(qr|$expected_base|, ); + + ok ( ( length(Encode::encode('UTF-8', $intra_text)) != length($intra_text) ) , 'UTF-8 are multi-byte. Goog') ; + ok ($intra_text =~ $utf8_reg, 'UTF-8 chars are correctly present. Good'); + # -------------------------------------------------- TEST ON OPAC + + $agent->get_ok( "$opac" , "got opac"); + $agent->form_name('searchform'); + $agent->field( 'q', $search_key ); + $agent->field( 'idx', '' ); + $agent->click( ); + my $opac_text = $agent->text() ; + like( $opac_text, qr|Publisher: $publisher|, ); + + $agent->get_ok( "$opac" , "got opac"); + $agent->form_name('searchform'); + $agent->field('q', $publisher); + $agent->field( 'idx', '' ); + $agent->click(); + $opac_text = $agent->text(); + + like( $opac_text, qr|Publisher: $publisher|, ); + $expected_base = q|opac-search.pl\?idx=&q=| . uri_escape_utf8( $publisher ); + $agent->base_like(qr|$expected_base|, ); + + ok ( ( length(Encode::encode('UTF-8', $opac_text)) != length($opac_text) ) , 'UTF-8 are multi-byte. Goog') ; + ok ($opac_text =~ $utf8_reg, 'UTF-8 chars are correctly present. Good'); + + #-------------------------------------------------- REVERT + + $agent->get_ok( "$intranet/cgi-bin/koha/tools/manage-marc-import.pl", 'view and clean batch' ); + $agent->form_name('clean_batch_'.$id_batch); + $agent->click(); + $agent->get_ok( "$intranet/cgi-bin/koha/catalogue/detail.pl?biblionumber=$id_bib_number", 'biblio on intranet' ); + $agent->get_ok( "$intranet/cgi-bin/koha/cataloguing/addbiblio.pl?op=delete&biblionumber=$id_bib_number", 'biblio deleted' ); + + # clean + cleanup(); } -is( $jsonresponse->{'job_status'}, 'completed', 'job was completed' ); - -$agent->get_ok( - "$intranet/cgi-bin/koha/tools/stage-marc-import.pl", - 'reopen stage MARC page at end of upload' -); -$agent->submit_form_ok( - { - form_number => 5, - fields => { - 'uploadedfileid' => $fileid, - 'nomatch_action' => 'create_new', - 'overlay_action' => 'replace', - 'item_action' => 'always_add', - 'matcher' => '1', - 'comments' => '', - 'encoding' => 'utf8', - 'parse_items' => '1', - 'runinbackground' => '1', - 'completedJobID' => $jobID, - } - }, - 'stage MARC' -); - -$agent->follow_link_ok( { text => 'Manage staged records' }, 'view batch' ); - - -$agent->form_number(5); -$agent->field( 'framework', '' ); -$agent->click_ok( 'mainformsubmit', "imported records into catalog" ); -my $webpage = $agent->{content}; - -$webpage =~ /(.*<title>.*?)(\d{1,})(.*<\/title>)/sx; -my $id_batch = $2; -my $id_bib_number = GetBiblionumberFromImport($id_batch); - -# wait enough time for the indexer -sleep 10; - -# --------------------------------- TEST INTRANET SEARCH - -my $publisher = 'Αθήνα'; -$agent->get_ok( "$intranet/cgi-bin/koha/catalogue/search.pl" , "got search on intranet"); -$agent->form_number(1); -$agent->field('idx', 'kw'); -$agent->field('q', 'deuteros'); -$agent->click(); -my $intra_text = $agent->text() ; -like( $intra_text, qr|Publisher: $publisher|, ); - -$agent->get_ok( "$intranet/cgi-bin/koha/catalogue/search.pl" , "got search on intranet"); -$agent->form_number(1); -$agent->field('idx', 'kw'); -$agent->field('q', $publisher); -$agent->click(); -$intra_text = $agent->text(); - -like( $intra_text, qr|Publisher: $publisher|, ); -my $expected_base = q|search.pl\?idx=kw&q=| . uri_escape_utf8( $publisher ); -$agent->base_like(qr|$expected_base|, ); - -ok ( ( length(Encode::encode('UTF-8', $intra_text)) != length($intra_text) ) , 'UTF-8 are multi-byte. Goog') ; -ok ($intra_text =~ m/学協会. μμ/, 'UTF-8 chars are correctly present. Good'); -# -------------------------------------------------- TEST ON OPAC - -$agent->get_ok( "$opac" , "got opac"); -$agent->form_name('searchform'); -$agent->field( 'q', 'deuteros' ); -$agent->field( 'idx', '' ); -$agent->click( ); -my $opac_text = $agent->text() ; -like( $opac_text, qr|Publisher: $publisher|, ); - -$agent->get_ok( "$opac" , "got opac"); -$agent->form_name('searchform'); -$agent->field('q', $publisher); -$agent->field( 'idx', '' ); -$agent->click(); -$opac_text = $agent->text(); - -like( $opac_text, qr|Publisher: $publisher|, ); -$expected_base = q|opac-search.pl\?idx=&q=| . uri_escape_utf8( $publisher ); -$agent->base_like(qr|$expected_base|, ); - -ok ( ( length(Encode::encode('UTF-8', $opac_text)) != length($opac_text) ) , 'UTF-8 are multi-byte. Goog') ; -ok ($opac_text =~ m/学協会. μμ/, 'UTF-8 chars are correctly present. Good'); - -#-------------------------------------------------- REVERT - -$agent->get_ok( "$intranet/cgi-bin/koha/tools/manage-marc-import.pl", 'view and clean batch' ); -$agent->form_name('clean_batch_'.$id_batch); -$agent->click(); -$agent->get_ok( "$intranet/cgi-bin/koha/catalogue/detail.pl?biblionumber=$id_bib_number", 'biblio on intranet' ); -$agent->get_ok( "$intranet/cgi-bin/koha/cataloguing/addbiblio.pl?op=delete&biblionumber=$id_bib_number", 'biblio deleted' ); - -# clean -cleanup(); + # function that launches the zebra daemon sub launch_zebra { -- 2.39.5