3 # Copyright 2000-2002 Katipo Communications
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along with
17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
18 # Suite 330, Boston, MA 02111-1307 USA
24 use vars qw($VERSION @ISA @EXPORT $DEBUG);
33 &subfield_is_koha_internal_p
34 &GetPrinters &GetPrinter
35 &GetItemTypes &getitemtypeinfo
38 &getframeworks &getframeworkinfo
39 &getauthtypes &getauthtype
44 &getitemtypeimagesrcfromurl
46 &get_notforloan_label_of
51 &GetKohaAuthorisedValues
53 &GetManagedTagSubfields
62 C4::Koha - Perl Module containing convenience functions for Koha scripts
71 Koha.pm provides many functions for Koha scripts.
80 $slash_date = &slashifyDate($dash_date);
82 Takes a string of the form "DD-MM-YYYY" (or anything separated by
83 dashes), converts it to the form "YYYY/MM/DD", and returns the result.
89 # accepts a date of the form xx-xx-xx[xx] and returns it in the
91 my @dateOut = split( '-', shift );
92 return ("$dateOut[2]/$dateOut[1]/$dateOut[0]");
98 my $string = DisplayISBN( $isbn );
104 if (length ($isbn)<13){
106 if ( substr( $isbn, 0, 1 ) <= 7 ) {
107 $seg1 = substr( $isbn, 0, 1 );
109 elsif ( substr( $isbn, 0, 2 ) <= 94 ) {
110 $seg1 = substr( $isbn, 0, 2 );
112 elsif ( substr( $isbn, 0, 3 ) <= 995 ) {
113 $seg1 = substr( $isbn, 0, 3 );
115 elsif ( substr( $isbn, 0, 4 ) <= 9989 ) {
116 $seg1 = substr( $isbn, 0, 4 );
119 $seg1 = substr( $isbn, 0, 5 );
121 my $x = substr( $isbn, length($seg1) );
123 if ( substr( $x, 0, 2 ) <= 19 ) {
125 # if(sTmp2 < 10) sTmp2 = "0" sTmp2;
126 $seg2 = substr( $x, 0, 2 );
128 elsif ( substr( $x, 0, 3 ) <= 699 ) {
129 $seg2 = substr( $x, 0, 3 );
131 elsif ( substr( $x, 0, 4 ) <= 8399 ) {
132 $seg2 = substr( $x, 0, 4 );
134 elsif ( substr( $x, 0, 5 ) <= 89999 ) {
135 $seg2 = substr( $x, 0, 5 );
137 elsif ( substr( $x, 0, 6 ) <= 9499999 ) {
138 $seg2 = substr( $x, 0, 6 );
141 $seg2 = substr( $x, 0, 7 );
143 my $seg3 = substr( $x, length($seg2) );
144 $seg3 = substr( $seg3, 0, length($seg3) - 1 );
145 my $seg4 = substr( $x, -1, 1 );
146 return "$seg1-$seg2-$seg3-$seg4";
149 $seg1 = substr( $isbn, 0, 3 );
151 if ( substr( $isbn, 3, 1 ) <= 7 ) {
152 $seg2 = substr( $isbn, 3, 1 );
154 elsif ( substr( $isbn, 3, 2 ) <= 94 ) {
155 $seg2 = substr( $isbn, 3, 2 );
157 elsif ( substr( $isbn, 3, 3 ) <= 995 ) {
158 $seg2 = substr( $isbn, 3, 3 );
160 elsif ( substr( $isbn, 3, 4 ) <= 9989 ) {
161 $seg2 = substr( $isbn, 3, 4 );
164 $seg2 = substr( $isbn, 3, 5 );
166 my $x = substr( $isbn, length($seg2) +3);
168 if ( substr( $x, 0, 2 ) <= 19 ) {
170 # if(sTmp2 < 10) sTmp2 = "0" sTmp2;
171 $seg3 = substr( $x, 0, 2 );
173 elsif ( substr( $x, 0, 3 ) <= 699 ) {
174 $seg3 = substr( $x, 0, 3 );
176 elsif ( substr( $x, 0, 4 ) <= 8399 ) {
177 $seg3 = substr( $x, 0, 4 );
179 elsif ( substr( $x, 0, 5 ) <= 89999 ) {
180 $seg3 = substr( $x, 0, 5 );
182 elsif ( substr( $x, 0, 6 ) <= 9499999 ) {
183 $seg3 = substr( $x, 0, 6 );
186 $seg3 = substr( $x, 0, 7 );
188 my $seg4 = substr( $x, length($seg3) );
189 $seg4 = substr( $seg4, 0, length($seg4) - 1 );
190 my $seg5 = substr( $x, -1, 1 );
191 return "$seg1-$seg2-$seg3-$seg4-$seg5";
195 # FIXME.. this should be moved to a MARC-specific module
196 sub subfield_is_koha_internal_p ($) {
199 # We could match on 'lib' and 'tab' (and 'mandatory', & more to come!)
200 # But real MARC subfields are always single-character
201 # so it really is safer just to check the length
203 return length $subfield != 1;
208 $itemtypes = &GetItemTypes();
210 Returns information about existing itemtypes.
212 build a HTML select with the following code :
214 =head3 in PERL SCRIPT
216 my $itemtypes = GetItemTypes;
218 foreach my $thisitemtype (sort keys %$itemtypes) {
219 my $selected = 1 if $thisitemtype eq $itemtype;
220 my %row =(value => $thisitemtype,
221 selected => $selected,
222 description => $itemtypes->{$thisitemtype}->{'description'},
224 push @itemtypesloop, \%row;
226 $template->param(itemtypeloop => \@itemtypesloop);
230 <form action='<!-- TMPL_VAR name="script_name" -->' method=post>
231 <select name="itemtype">
232 <option value="">Default</option>
233 <!-- TMPL_LOOP name="itemtypeloop" -->
234 <option value="<!-- TMPL_VAR name="value" -->" <!-- TMPL_IF name="selected" -->selected<!-- /TMPL_IF -->><!-- TMPL_VAR name="description" --></option>
237 <input type=text name=searchfield value="<!-- TMPL_VAR name="searchfield" -->">
238 <input type="submit" value="OK" class="button">
245 # returns a reference to a hash of references to branches...
247 my $dbh = C4::Context->dbh;
252 my $sth = $dbh->prepare($query);
254 while ( my $IT = $sth->fetchrow_hashref ) {
255 $itemtypes{ $IT->{'itemtype'} } = $IT;
257 return ( \%itemtypes );
260 sub get_itemtypeinfos_of {
269 WHERE itemtype IN (' . join( ',', map( { "'" . $_ . "'" } @itemtypes ) ) . ')
272 return get_infos_of( $query, 'itemtype' );
275 # this is temporary until we separate collection codes and item types
279 my $dbh = C4::Context->dbh;
282 "SELECT * FROM authorised_values ORDER BY authorised_value");
284 while ( my $data = $sth->fetchrow_hashref ) {
285 if ( $data->{category} eq "CCODE" ) {
287 $results[$count] = $data;
293 return ( $count, @results );
298 $authtypes = &getauthtypes();
300 Returns information about existing authtypes.
302 build a HTML select with the following code :
304 =head3 in PERL SCRIPT
306 my $authtypes = getauthtypes;
308 foreach my $thisauthtype (keys %$authtypes) {
309 my $selected = 1 if $thisauthtype eq $authtype;
310 my %row =(value => $thisauthtype,
311 selected => $selected,
312 authtypetext => $authtypes->{$thisauthtype}->{'authtypetext'},
314 push @authtypesloop, \%row;
316 $template->param(itemtypeloop => \@itemtypesloop);
320 <form action='<!-- TMPL_VAR name="script_name" -->' method=post>
321 <select name="authtype">
322 <!-- TMPL_LOOP name="authtypeloop" -->
323 <option value="<!-- TMPL_VAR name="value" -->" <!-- TMPL_IF name="selected" -->selected<!-- /TMPL_IF -->><!-- TMPL_VAR name="authtypetext" --></option>
326 <input type=text name=searchfield value="<!-- TMPL_VAR name="searchfield" -->">
327 <input type="submit" value="OK" class="button">
335 # returns a reference to a hash of references to authtypes...
337 my $dbh = C4::Context->dbh;
338 my $sth = $dbh->prepare("select * from auth_types order by authtypetext");
340 while ( my $IT = $sth->fetchrow_hashref ) {
341 $authtypes{ $IT->{'authtypecode'} } = $IT;
343 return ( \%authtypes );
347 my ($authtypecode) = @_;
349 # returns a reference to a hash of references to authtypes...
351 my $dbh = C4::Context->dbh;
352 my $sth = $dbh->prepare("select * from auth_types where authtypecode=?");
353 $sth->execute($authtypecode);
354 my $res = $sth->fetchrow_hashref;
360 $frameworks = &getframework();
362 Returns information about existing frameworks
364 build a HTML select with the following code :
366 =head3 in PERL SCRIPT
368 my $frameworks = frameworks();
370 foreach my $thisframework (keys %$frameworks) {
371 my $selected = 1 if $thisframework eq $frameworkcode;
372 my %row =(value => $thisframework,
373 selected => $selected,
374 description => $frameworks->{$thisframework}->{'frameworktext'},
376 push @frameworksloop, \%row;
378 $template->param(frameworkloop => \@frameworksloop);
382 <form action='<!-- TMPL_VAR name="script_name" -->' method=post>
383 <select name="frameworkcode">
384 <option value="">Default</option>
385 <!-- TMPL_LOOP name="frameworkloop" -->
386 <option value="<!-- TMPL_VAR name="value" -->" <!-- TMPL_IF name="selected" -->selected<!-- /TMPL_IF -->><!-- TMPL_VAR name="frameworktext" --></option>
389 <input type=text name=searchfield value="<!-- TMPL_VAR name="searchfield" -->">
390 <input type="submit" value="OK" class="button">
398 # returns a reference to a hash of references to branches...
400 my $dbh = C4::Context->dbh;
401 my $sth = $dbh->prepare("select * from biblio_framework");
403 while ( my $IT = $sth->fetchrow_hashref ) {
404 $itemtypes{ $IT->{'frameworkcode'} } = $IT;
406 return ( \%itemtypes );
409 =head2 getframeworkinfo
411 $frameworkinfo = &getframeworkinfo($frameworkcode);
413 Returns information about an frameworkcode.
417 sub getframeworkinfo {
418 my ($frameworkcode) = @_;
419 my $dbh = C4::Context->dbh;
421 $dbh->prepare("select * from biblio_framework where frameworkcode=?");
422 $sth->execute($frameworkcode);
423 my $res = $sth->fetchrow_hashref;
427 =head2 getitemtypeinfo
429 $itemtype = &getitemtype($itemtype);
431 Returns information about an itemtype.
435 sub getitemtypeinfo {
437 my $dbh = C4::Context->dbh;
438 my $sth = $dbh->prepare("select * from itemtypes where itemtype=?");
439 $sth->execute($itemtype);
440 my $res = $sth->fetchrow_hashref;
442 $res->{imageurl} = getitemtypeimagesrcfromurl( $res->{imageurl} );
447 sub getitemtypeimagesrcfromurl {
450 if ( defined $imageurl and $imageurl !~ m/^http/ ) {
451 $imageurl = getitemtypeimagesrc() . '/' . $imageurl;
457 sub getitemtypeimagedir {
458 return C4::Context->opachtdocs . '/'
459 . C4::Context->preference('template')
463 sub getitemtypeimagesrc {
464 return '/opac-tmpl' . '/'
465 . C4::Context->preference('template')
471 $printers = &GetPrinters();
472 @queues = keys %$printers;
474 Returns information about existing printer queues.
476 C<$printers> is a reference-to-hash whose keys are the print queues
477 defined in the printers table of the Koha database. The values are
478 references-to-hash, whose keys are the fields in the printers table.
484 my $dbh = C4::Context->dbh;
485 my $sth = $dbh->prepare("select * from printers");
487 while ( my $printer = $sth->fetchrow_hashref ) {
488 $printers{ $printer->{'printqueue'} } = $printer;
490 return ( \%printers );
495 $printer = GetPrinter( $query, $printers );
499 sub GetPrinter ($$) {
500 my ( $query, $printers ) = @_; # get printer for this query from printers
501 my $printer = $query->param('printer');
502 my %cookie = $query->cookie('userenv');
503 ($printer) || ( $printer = $cookie{'printer'} ) || ( $printer = '' );
504 ( $printers->{$printer} ) || ( $printer = ( keys %$printers )[0] );
510 Returns the number of pages to display in a pagination bar, given the number
511 of items and the number of items per page.
516 my ( $nb_items, $nb_items_per_page ) = @_;
518 return int( ( $nb_items - 1 ) / $nb_items_per_page ) + 1;
523 (@themes) = &getallthemes('opac');
524 (@themes) = &getallthemes('intranet');
526 Returns an array of all available themes.
534 if ( $type eq 'intranet' ) {
535 $htdocs = C4::Context->config('intrahtdocs');
538 $htdocs = C4::Context->config('opachtdocs');
540 opendir D, "$htdocs";
541 my @dirlist = readdir D;
542 foreach my $directory (@dirlist) {
543 -d "$htdocs/$directory/en" and push @themes, $directory;
550 if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
553 link_value => 'su-to',
554 label_value => 'Topics',
556 [ '600', '601', '602', '603', '604', '605', '606', '610' ],
560 link_value => 'su-geo',
561 label_value => 'Places',
566 link_value => 'su-ut',
567 label_value => 'Titles',
568 tags => [ '500', '501', '502', '503', '504', ],
573 label_value => 'Authors',
574 tags => [ '700', '701', '702', ],
579 label_value => 'Series',
584 link_value => 'branch',
585 label_value => 'Libraries',
595 link_value => 'su-to',
596 label_value => 'Topics',
602 # link_value => 'su-na',
603 # label_value => 'People and Organizations',
604 # tags => ['600', '610', '611'],
608 link_value => 'su-geo',
609 label_value => 'Places',
614 link_value => 'su-ut',
615 label_value => 'Titles',
621 label_value => 'Authors',
622 tags => [ '100', '110', '700', ],
627 label_value => 'Series',
628 tags => [ '440', '490', ],
632 link_value => 'branch',
633 label_value => 'Libraries',
645 Return a href where a key is associated to a href. You give a query, the
646 name of the key among the fields returned by the query. If you also give as
647 third argument the name of the value, the function returns a href of scalar.
656 # generic href of any information on the item, href of href.
657 my $iteminfos_of = get_infos_of($query, 'itemnumber');
658 print $iteminfos_of->{$itemnumber}{barcode};
660 # specific information, href of scalar
661 my $barcode_of_item = get_infos_of($query, 'itemnumber', 'barcode');
662 print $barcode_of_item->{$itemnumber};
667 my ( $query, $key_name, $value_name ) = @_;
669 my $dbh = C4::Context->dbh;
671 my $sth = $dbh->prepare($query);
675 while ( my $row = $sth->fetchrow_hashref ) {
676 if ( defined $value_name ) {
677 $infos_of{ $row->{$key_name} } = $row->{$value_name};
680 $infos_of{ $row->{$key_name} } = $row;
688 =head2 get_notforloan_label_of
690 my $notforloan_label_of = get_notforloan_label_of();
692 Each authorised value of notforloan (information available in items and
693 itemtypes) is link to a single label.
695 Returns a href where keys are authorised values and values are corresponding
698 foreach my $authorised_value (keys %{$notforloan_label_of}) {
700 "authorised_value: %s => %s\n",
702 $notforloan_label_of->{$authorised_value}
708 # FIXME - why not use GetAuthorisedValues ??
710 sub get_notforloan_label_of {
711 my $dbh = C4::Context->dbh;
714 SELECT authorised_value
715 FROM marc_subfield_structure
716 WHERE kohafield = \'items.notforloan\'
719 my $sth = $dbh->prepare($query);
721 my ($statuscode) = $sth->fetchrow_array();
726 FROM authorised_values
729 $sth = $dbh->prepare($query);
730 $sth->execute($statuscode);
731 my %notforloan_label_of;
732 while ( my $row = $sth->fetchrow_hashref ) {
733 $notforloan_label_of{ $row->{authorised_value} } = $row->{lib};
737 return \%notforloan_label_of;
741 my ( $position, $type ) = @_;
742 my $dbh = C4::Context->dbh;
743 my $strsth = "SELECT * FROM z3950servers where 1";
744 $strsth .= " AND position=\"$position\"" if ($position);
745 $strsth .= " AND type=\"$type\"" if ($type);
746 my $rq = $dbh->prepare($strsth);
748 my @primaryserverloop;
750 while ( my $data = $rq->fetchrow_hashref ) {
752 $cell{label} = $data->{'description'};
753 $cell{id} = $data->{'name'};
756 . ( $data->{port} ? ":" . $data->{port} : "" ) . "/"
758 if ( $data->{host} );
759 $cell{checked} = $data->{checked};
760 push @primaryserverloop,
762 label => $data->{description},
765 value => $data->{host} . ":"
766 . $data->{port} . "/"
768 checked => "checked",
769 icon => $data->{icon},
770 zed => $data->{type} eq 'zed',
771 opensearch => $data->{type} eq 'opensearch'
774 return \@primaryserverloop;
777 sub displaySecondaryServers {
779 # my $secondary_servers_loop = [
780 # { inner_sup_servers_loop => [
781 # {label => "Google", id=>"GOOG", value=>"google",icon => "google.ico",opensearch => "1"},
782 # {label => "Yahoo", id=>"YAH", value=>"yahoo", icon =>"yahoo.ico", zed => "1"},
783 # {label => "Worldcat", id=>"WCT", value=>"worldcat", icon => "worldcat.gif", zed => "1"},
784 # {label => "Library of Congress", id=>"LOC", name=> "server", value=>"z3950.loc.gov:7090/Voyager", icon =>"loc.ico", zed => "1"},
788 return; #$secondary_servers_loop;
791 =head2 GetAuthValCode
793 $authvalcode = GetAuthValCode($kohafield,$frameworkcode);
798 my ($kohafield,$fwcode) = @_;
799 my $dbh = C4::Context->dbh;
800 $fwcode='' unless $fwcode;
801 my $sth = $dbh->prepare('select authorised_value from marc_subfield_structure where kohafield=? and frameworkcode=?');
802 $sth->execute($kohafield,$fwcode);
803 my ($authvalcode) = $sth->fetchrow_array;
807 =head2 GetAuthorisedValues
809 $authvalues = GetAuthorisedValues($category);
811 this function get all authorised values from 'authosied_value' table into a reference to array which
812 each value containt an hashref.
814 Set C<$category> on input args if you want to limits your query to this one. This params is not mandatory.
818 sub GetAuthorisedValues {
819 my ($category,$selected) = @_;
822 my $dbh = C4::Context->dbh;
823 my $query = "SELECT * FROM authorised_values";
824 $query .= " WHERE category = '" . $category . "'" if $category;
826 my $sth = $dbh->prepare($query);
828 while (my $data=$sth->fetchrow_hashref) {
829 if ($selected eq $data->{'authorised_value'} ) {
830 $data->{'selected'} = 1;
832 $results[$count] = $data;
835 #my $data = $sth->fetchall_arrayref({});
836 return \@results; #$data;
839 =head2 GetKohaAuthorisedValues
841 Takes $dbh , $kohafield as parameters.
842 returns hashref of authvalCode => liblibrarian
843 or undef if no authvals defined for kohafield.
847 sub GetKohaAuthorisedValues {
848 my ($kohafield,$fwcode) = @_;
849 $fwcode='' unless $fwcode;
851 my $dbh = C4::Context->dbh;
852 my $avcode = GetAuthValCode($kohafield,$fwcode);
854 my $sth = $dbh->prepare("select authorised_value, lib from authorised_values where category=? ");
855 $sth->execute($avcode);
856 while ( my ($val, $lib) = $sth->fetchrow_array ) {
863 =head2 GetManagedTagSubfields
867 $res = GetManagedTagSubfields();
871 Returns a reference to a big hash of hash, with the Marc structure fro the given frameworkcode
873 NOTE: This function is used only by the (incomplete) bulk editing feature. Since
874 that feature currently does not deal with items and biblioitems changes
875 correctly, those tags are specifically excluded from the list prepared
878 For future reference, if a bulk item editing feature is implemented at some point, it
879 needs some design thought -- for example, circulation status fields should not
880 be changed willy-nilly.
884 sub GetManagedTagSubfields{
885 my $dbh=C4::Context->dbh;
886 my $rq=$dbh->prepare(qq|
888 DISTINCT CONCAT( marc_subfield_structure.tagfield, tagsubfield ) AS tagsubfield,
889 marc_subfield_structure.liblibrarian as subfielddesc,
890 marc_tag_structure.liblibrarian as tagdesc
891 FROM marc_subfield_structure
892 LEFT JOIN marc_tag_structure
893 ON marc_tag_structure.tagfield = marc_subfield_structure.tagfield
894 AND marc_tag_structure.frameworkcode = marc_subfield_structure.frameworkcode
895 WHERE marc_subfield_structure.tab>=0
896 AND marc_tag_structure.tagfield NOT IN (SELECT tagfield FROM marc_subfield_structure WHERE kohafield like 'items.%')
897 AND marc_tag_structure.tagfield NOT IN (SELECT tagfield FROM marc_subfield_structure WHERE kohafield = 'biblioitems.itemtype')
898 AND marc_subfield_structure.kohafield <> 'biblio.biblionumber'
899 AND marc_subfield_structure.kohafield <> 'biblioitems.biblioitemnumber'
900 ORDER BY marc_subfield_structure.tagfield, tagsubfield|);
902 my $data=$rq->fetchall_arrayref({});
909 $marcrecord = &fixEncoding($marcblob);
911 Returns a well encoded marcrecord.
917 my $record = MARC::Record->new_from_usmarc($marc);
918 if (C4::Context->preference("marcflavour") eq "UNIMARC"){
919 my $targetcharset="utf8";
920 if ($encoding && $targetcharset ne $encoding){
921 my $newRecord=MARC::Record->new();
922 if ($encoding!~/5426/){
924 my $decoder = Text::Iconv->new($encoding,$targetcharset);
925 my $newRecord=MARC::Record->new();
926 foreach my $field ($record->fields()){
927 if ($field->tag()<'010'){
928 $newRecord->insert_grouped_field($field);
932 foreach my $subfield ($field->subfields()){
934 if (($newField->tag eq '100')) {
935 substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8");
937 map {$decoder->convert($_)} @$subfield;
938 $newField->add_subfields($subfield->[0]=>$subfield->[1]);
940 map {$decoder->convert($_)} @$subfield;
941 $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]);
945 $newRecord->insert_grouped_field($newField);
948 }elsif ($encoding=~/5426/){
949 foreach my $field ($record->fields()){
950 if ($field->tag()<'010'){
951 $newRecord->insert_grouped_field($field);
955 foreach my $subfield ($field->subfields()){
956 # my $utf8=eval{MARC::Charset::marc8_to_utf8($subfield->[1])};
957 # if ($@) {warn "z3950 character conversion error $@ ";$utf8=$subfield->[1]};
958 my $utf8=char_decode5426($subfield->[1]);
959 if (($field->tag eq '100')) {
960 substr($utf8,26,4,"5050");
963 $newField->add_subfields($subfield->[0]=>$utf8);
965 $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$utf8);
969 $newRecord->insert_grouped_field($newField);
973 # warn $newRecord->as_formatted();
982 sub char_decode5426 {
986 $chars{0xb0}=0x0101;#3/0ayn[ain]
987 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
988 #$chars{0xb2}=0x00e0;#'Ã ';
989 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
990 #$chars{0xb3}=0x00e7;#'ç';
991 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
996 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
997 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
998 $chars{0xfa}=0x0153;#oe
999 $chars{0x81d1}=0x00b0;
1002 ## combined characters iso5426
1004 $chars{0xc041}=0x1ea2; # capital a with hook above
1005 $chars{0xc045}=0x1eba; # capital e with hook above
1006 $chars{0xc049}=0x1ec8; # capital i with hook above
1007 $chars{0xc04f}=0x1ece; # capital o with hook above
1008 $chars{0xc055}=0x1ee6; # capital u with hook above
1009 $chars{0xc059}=0x1ef6; # capital y with hook above
1010 $chars{0xc061}=0x1ea3; # small a with hook above
1011 $chars{0xc065}=0x1ebb; # small e with hook above
1012 $chars{0xc069}=0x1ec9; # small i with hook above
1013 $chars{0xc06f}=0x1ecf; # small o with hook above
1014 $chars{0xc075}=0x1ee7; # small u with hook above
1015 $chars{0xc079}=0x1ef7; # small y with hook above
1018 $chars{0xc141}=0x00c0; # capital a with grave accent
1019 $chars{0xc145}=0x00c8; # capital e with grave accent
1020 $chars{0xc149}=0x00cc; # capital i with grave accent
1021 $chars{0xc14f}=0x00d2; # capital o with grave accent
1022 $chars{0xc155}=0x00d9; # capital u with grave accent
1023 $chars{0xc157}=0x1e80; # capital w with grave
1024 $chars{0xc159}=0x1ef2; # capital y with grave
1025 $chars{0xc161}=0x00e0; # small a with grave accent
1026 $chars{0xc165}=0x00e8; # small e with grave accent
1027 $chars{0xc169}=0x00ec; # small i with grave accent
1028 $chars{0xc16f}=0x00f2; # small o with grave accent
1029 $chars{0xc175}=0x00f9; # small u with grave accent
1030 $chars{0xc177}=0x1e81; # small w with grave
1031 $chars{0xc179}=0x1ef3; # small y with grave
1033 $chars{0xc241}=0x00c1; # capital a with acute accent
1034 $chars{0xc243}=0x0106; # capital c with acute accent
1035 $chars{0xc245}=0x00c9; # capital e with acute accent
1036 $chars{0xc247}=0x01f4; # capital g with acute
1037 $chars{0xc249}=0x00cd; # capital i with acute accent
1038 $chars{0xc24b}=0x1e30; # capital k with acute
1039 $chars{0xc24c}=0x0139; # capital l with acute accent
1040 $chars{0xc24d}=0x1e3e; # capital m with acute
1041 $chars{0xc24e}=0x0143; # capital n with acute accent
1042 $chars{0xc24f}=0x00d3; # capital o with acute accent
1043 $chars{0xc250}=0x1e54; # capital p with acute
1044 $chars{0xc252}=0x0154; # capital r with acute accent
1045 $chars{0xc253}=0x015a; # capital s with acute accent
1046 $chars{0xc255}=0x00da; # capital u with acute accent
1047 $chars{0xc257}=0x1e82; # capital w with acute
1048 $chars{0xc259}=0x00dd; # capital y with acute accent
1049 $chars{0xc25a}=0x0179; # capital z with acute accent
1050 $chars{0xc261}=0x00e1; # small a with acute accent
1051 $chars{0xc263}=0x0107; # small c with acute accent
1052 $chars{0xc265}=0x00e9; # small e with acute accent
1053 $chars{0xc267}=0x01f5; # small g with acute
1054 $chars{0xc269}=0x00ed; # small i with acute accent
1055 $chars{0xc26b}=0x1e31; # small k with acute
1056 $chars{0xc26c}=0x013a; # small l with acute accent
1057 $chars{0xc26d}=0x1e3f; # small m with acute
1058 $chars{0xc26e}=0x0144; # small n with acute accent
1059 $chars{0xc26f}=0x00f3; # small o with acute accent
1060 $chars{0xc270}=0x1e55; # small p with acute
1061 $chars{0xc272}=0x0155; # small r with acute accent
1062 $chars{0xc273}=0x015b; # small s with acute accent
1063 $chars{0xc275}=0x00fa; # small u with acute accent
1064 $chars{0xc277}=0x1e83; # small w with acute
1065 $chars{0xc279}=0x00fd; # small y with acute accent
1066 $chars{0xc27a}=0x017a; # small z with acute accent
1067 $chars{0xc2e1}=0x01fc; # capital ae with acute
1068 $chars{0xc2f1}=0x01fd; # small ae with acute
1069 # 4/3 circumflex accent
1070 $chars{0xc341}=0x00c2; # capital a with circumflex accent
1071 $chars{0xc343}=0x0108; # capital c with circumflex
1072 $chars{0xc345}=0x00ca; # capital e with circumflex accent
1073 $chars{0xc347}=0x011c; # capital g with circumflex
1074 $chars{0xc348}=0x0124; # capital h with circumflex
1075 $chars{0xc349}=0x00ce; # capital i with circumflex accent
1076 $chars{0xc34a}=0x0134; # capital j with circumflex
1077 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
1078 $chars{0xc353}=0x015c; # capital s with circumflex
1079 $chars{0xc355}=0x00db; # capital u with circumflex
1080 $chars{0xc357}=0x0174; # capital w with circumflex
1081 $chars{0xc359}=0x0176; # capital y with circumflex
1082 $chars{0xc35a}=0x1e90; # capital z with circumflex
1083 $chars{0xc361}=0x00e2; # small a with circumflex accent
1084 $chars{0xc363}=0x0109; # small c with circumflex
1085 $chars{0xc365}=0x00ea; # small e with circumflex accent
1086 $chars{0xc367}=0x011d; # small g with circumflex
1087 $chars{0xc368}=0x0125; # small h with circumflex
1088 $chars{0xc369}=0x00ee; # small i with circumflex accent
1089 $chars{0xc36a}=0x0135; # small j with circumflex
1090 $chars{0xc36e}=0x00f1; # small n with tilde
1091 $chars{0xc36f}=0x00f4; # small o with circumflex accent
1092 $chars{0xc373}=0x015d; # small s with circumflex
1093 $chars{0xc375}=0x00fb; # small u with circumflex
1094 $chars{0xc377}=0x0175; # small w with circumflex
1095 $chars{0xc379}=0x0177; # small y with circumflex
1096 $chars{0xc37a}=0x1e91; # small z with circumflex
1098 $chars{0xc441}=0x00c3; # capital a with tilde
1099 $chars{0xc445}=0x1ebc; # capital e with tilde
1100 $chars{0xc449}=0x0128; # capital i with tilde
1101 $chars{0xc44e}=0x00d1; # capital n with tilde
1102 $chars{0xc44f}=0x00d5; # capital o with tilde
1103 $chars{0xc455}=0x0168; # capital u with tilde
1104 $chars{0xc456}=0x1e7c; # capital v with tilde
1105 $chars{0xc459}=0x1ef8; # capital y with tilde
1106 $chars{0xc461}=0x00e3; # small a with tilde
1107 $chars{0xc465}=0x1ebd; # small e with tilde
1108 $chars{0xc469}=0x0129; # small i with tilde
1109 $chars{0xc46e}=0x00f1; # small n with tilde
1110 $chars{0xc46f}=0x00f5; # small o with tilde
1111 $chars{0xc475}=0x0169; # small u with tilde
1112 $chars{0xc476}=0x1e7d; # small v with tilde
1113 $chars{0xc479}=0x1ef9; # small y with tilde
1115 $chars{0xc541}=0x0100; # capital a with macron
1116 $chars{0xc545}=0x0112; # capital e with macron
1117 $chars{0xc547}=0x1e20; # capital g with macron
1118 $chars{0xc549}=0x012a; # capital i with macron
1119 $chars{0xc54f}=0x014c; # capital o with macron
1120 $chars{0xc555}=0x016a; # capital u with macron
1121 $chars{0xc561}=0x0101; # small a with macron
1122 $chars{0xc565}=0x0113; # small e with macron
1123 $chars{0xc567}=0x1e21; # small g with macron
1124 $chars{0xc569}=0x012b; # small i with macron
1125 $chars{0xc56f}=0x014d; # small o with macron
1126 $chars{0xc575}=0x016b; # small u with macron
1127 $chars{0xc572}=0x0159; # small r with macron
1128 $chars{0xc5e1}=0x01e2; # capital ae with macron
1129 $chars{0xc5f1}=0x01e3; # small ae with macron
1131 $chars{0xc641}=0x0102; # capital a with breve
1132 $chars{0xc645}=0x0114; # capital e with breve
1133 $chars{0xc647}=0x011e; # capital g with breve
1134 $chars{0xc649}=0x012c; # capital i with breve
1135 $chars{0xc64f}=0x014e; # capital o with breve
1136 $chars{0xc655}=0x016c; # capital u with breve
1137 $chars{0xc661}=0x0103; # small a with breve
1138 $chars{0xc665}=0x0115; # small e with breve
1139 $chars{0xc667}=0x011f; # small g with breve
1140 $chars{0xc669}=0x012d; # small i with breve
1141 $chars{0xc66f}=0x014f; # small o with breve
1142 $chars{0xc675}=0x016d; # small u with breve
1144 $chars{0xc7b0}=0x01e1; # Ain with dot above
1145 $chars{0xc742}=0x1e02; # capital b with dot above
1146 $chars{0xc743}=0x010a; # capital c with dot above
1147 $chars{0xc744}=0x1e0a; # capital d with dot above
1148 $chars{0xc745}=0x0116; # capital e with dot above
1149 $chars{0xc746}=0x1e1e; # capital f with dot above
1150 $chars{0xc747}=0x0120; # capital g with dot above
1151 $chars{0xc748}=0x1e22; # capital h with dot above
1152 $chars{0xc749}=0x0130; # capital i with dot above
1153 $chars{0xc74d}=0x1e40; # capital m with dot above
1154 $chars{0xc74e}=0x1e44; # capital n with dot above
1155 $chars{0xc750}=0x1e56; # capital p with dot above
1156 $chars{0xc752}=0x1e58; # capital r with dot above
1157 $chars{0xc753}=0x1e60; # capital s with dot above
1158 $chars{0xc754}=0x1e6a; # capital t with dot above
1159 $chars{0xc757}=0x1e86; # capital w with dot above
1160 $chars{0xc758}=0x1e8a; # capital x with dot above
1161 $chars{0xc759}=0x1e8e; # capital y with dot above
1162 $chars{0xc75a}=0x017b; # capital z with dot above
1163 $chars{0xc761}=0x0227; # small b with dot above
1164 $chars{0xc762}=0x1e03; # small b with dot above
1165 $chars{0xc763}=0x010b; # small c with dot above
1166 $chars{0xc764}=0x1e0b; # small d with dot above
1167 $chars{0xc765}=0x0117; # small e with dot above
1168 $chars{0xc766}=0x1e1f; # small f with dot above
1169 $chars{0xc767}=0x0121; # small g with dot above
1170 $chars{0xc768}=0x1e23; # small h with dot above
1171 $chars{0xc76d}=0x1e41; # small m with dot above
1172 $chars{0xc76e}=0x1e45; # small n with dot above
1173 $chars{0xc770}=0x1e57; # small p with dot above
1174 $chars{0xc772}=0x1e59; # small r with dot above
1175 $chars{0xc773}=0x1e61; # small s with dot above
1176 $chars{0xc774}=0x1e6b; # small t with dot above
1177 $chars{0xc777}=0x1e87; # small w with dot above
1178 $chars{0xc778}=0x1e8b; # small x with dot above
1179 $chars{0xc779}=0x1e8f; # small y with dot above
1180 $chars{0xc77a}=0x017c; # small z with dot above
1181 # 4/8 trema, diaresis
1182 $chars{0xc820}=0x00a8; # diaeresis
1183 $chars{0xc841}=0x00c4; # capital a with diaeresis
1184 $chars{0xc845}=0x00cb; # capital e with diaeresis
1185 $chars{0xc848}=0x1e26; # capital h with diaeresis
1186 $chars{0xc849}=0x00cf; # capital i with diaeresis
1187 $chars{0xc84f}=0x00d6; # capital o with diaeresis
1188 $chars{0xc855}=0x00dc; # capital u with diaeresis
1189 $chars{0xc857}=0x1e84; # capital w with diaeresis
1190 $chars{0xc858}=0x1e8c; # capital x with diaeresis
1191 $chars{0xc859}=0x0178; # capital y with diaeresis
1192 $chars{0xc861}=0x00e4; # small a with diaeresis
1193 $chars{0xc865}=0x00eb; # small e with diaeresis
1194 $chars{0xc868}=0x1e27; # small h with diaeresis
1195 $chars{0xc869}=0x00ef; # small i with diaeresis
1196 $chars{0xc86f}=0x00f6; # small o with diaeresis
1197 $chars{0xc874}=0x1e97; # small t with diaeresis
1198 $chars{0xc875}=0x00fc; # small u with diaeresis
1199 $chars{0xc877}=0x1e85; # small w with diaeresis
1200 $chars{0xc878}=0x1e8d; # small x with diaeresis
1201 $chars{0xc879}=0x00ff; # small y with diaeresis
1203 $chars{0xc920}=0x00a8; # [diaeresis]
1204 $chars{0xc961}=0x00e4; # a with umlaut
1205 $chars{0xc965}=0x00eb; # e with umlaut
1206 $chars{0xc969}=0x00ef; # i with umlaut
1207 $chars{0xc96f}=0x00f6; # o with umlaut
1208 $chars{0xc975}=0x00fc; # u with umlaut
1210 $chars{0xca41}=0x00c5; # capital a with ring above
1211 $chars{0xcaad}=0x016e; # capital u with ring above
1212 $chars{0xca61}=0x00e5; # small a with ring above
1213 $chars{0xca75}=0x016f; # small u with ring above
1214 $chars{0xca77}=0x1e98; # small w with ring above
1215 $chars{0xca79}=0x1e99; # small y with ring above
1216 # 4/11 high comma off centre
1217 # 4/12 inverted high comma centred
1218 # 4/13 double acute accent
1219 $chars{0xcd4f}=0x0150; # capital o with double acute
1220 $chars{0xcd55}=0x0170; # capital u with double acute
1221 $chars{0xcd6f}=0x0151; # small o with double acute
1222 $chars{0xcd75}=0x0171; # small u with double acute
1224 $chars{0xce54}=0x01a0; # latin capital letter o with horn
1225 $chars{0xce55}=0x01af; # latin capital letter u with horn
1226 $chars{0xce74}=0x01a1; # latin small letter o with horn
1227 $chars{0xce75}=0x01b0; # latin small letter u with horn
1229 $chars{0xcf41}=0x01cd; # capital a with caron
1230 $chars{0xcf43}=0x010c; # capital c with caron
1231 $chars{0xcf44}=0x010e; # capital d with caron
1232 $chars{0xcf45}=0x011a; # capital e with caron
1233 $chars{0xcf47}=0x01e6; # capital g with caron
1234 $chars{0xcf49}=0x01cf; # capital i with caron
1235 $chars{0xcf4b}=0x01e8; # capital k with caron
1236 $chars{0xcf4c}=0x013d; # capital l with caron
1237 $chars{0xcf4e}=0x0147; # capital n with caron
1238 $chars{0xcf4f}=0x01d1; # capital o with caron
1239 $chars{0xcf52}=0x0158; # capital r with caron
1240 $chars{0xcf53}=0x0160; # capital s with caron
1241 $chars{0xcf54}=0x0164; # capital t with caron
1242 $chars{0xcf55}=0x01d3; # capital u with caron
1243 $chars{0xcf5a}=0x017d; # capital z with caron
1244 $chars{0xcf61}=0x01ce; # small a with caron
1245 $chars{0xcf63}=0x010d; # small c with caron
1246 $chars{0xcf64}=0x010f; # small d with caron
1247 $chars{0xcf65}=0x011b; # small e with caron
1248 $chars{0xcf67}=0x01e7; # small g with caron
1249 $chars{0xcf69}=0x01d0; # small i with caron
1250 $chars{0xcf6a}=0x01f0; # small j with caron
1251 $chars{0xcf6b}=0x01e9; # small k with caron
1252 $chars{0xcf6c}=0x013e; # small l with caron
1253 $chars{0xcf6e}=0x0148; # small n with caron
1254 $chars{0xcf6f}=0x01d2; # small o with caron
1255 $chars{0xcf72}=0x0159; # small r with caron
1256 $chars{0xcf73}=0x0161; # small s with caron
1257 $chars{0xcf74}=0x0165; # small t with caron
1258 $chars{0xcf75}=0x01d4; # small u with caron
1259 $chars{0xcf7a}=0x017e; # small z with caron
1261 $chars{0xd020}=0x00b8; # cedilla
1262 $chars{0xd043}=0x00c7; # capital c with cedilla
1263 $chars{0xd044}=0x1e10; # capital d with cedilla
1264 $chars{0xd047}=0x0122; # capital g with cedilla
1265 $chars{0xd048}=0x1e28; # capital h with cedilla
1266 $chars{0xd04b}=0x0136; # capital k with cedilla
1267 $chars{0xd04c}=0x013b; # capital l with cedilla
1268 $chars{0xd04e}=0x0145; # capital n with cedilla
1269 $chars{0xd052}=0x0156; # capital r with cedilla
1270 $chars{0xd053}=0x015e; # capital s with cedilla
1271 $chars{0xd054}=0x0162; # capital t with cedilla
1272 $chars{0xd063}=0x00e7; # small c with cedilla
1273 $chars{0xd064}=0x1e11; # small d with cedilla
1274 $chars{0xd065}=0x0119; # small e with cedilla
1275 $chars{0xd067}=0x0123; # small g with cedilla
1276 $chars{0xd068}=0x1e29; # small h with cedilla
1277 $chars{0xd06b}=0x0137; # small k with cedilla
1278 $chars{0xd06c}=0x013c; # small l with cedilla
1279 $chars{0xd06e}=0x0146; # small n with cedilla
1280 $chars{0xd072}=0x0157; # small r with cedilla
1281 $chars{0xd073}=0x015f; # small s with cedilla
1282 $chars{0xd074}=0x0163; # small t with cedilla
1285 # 5/3 ogonek (hook to right
1286 $chars{0xd320}=0x02db; # ogonek
1287 $chars{0xd341}=0x0104; # capital a with ogonek
1288 $chars{0xd345}=0x0118; # capital e with ogonek
1289 $chars{0xd349}=0x012e; # capital i with ogonek
1290 $chars{0xd34f}=0x01ea; # capital o with ogonek
1291 $chars{0xd355}=0x0172; # capital u with ogonek
1292 $chars{0xd361}=0x0105; # small a with ogonek
1293 $chars{0xd365}=0x0119; # small e with ogonek
1294 $chars{0xd369}=0x012f; # small i with ogonek
1295 $chars{0xd36f}=0x01eb; # small o with ogonek
1296 $chars{0xd375}=0x0173; # small u with ogonek
1298 $chars{0xd441}=0x1e00; # capital a with ring below
1299 $chars{0xd461}=0x1e01; # small a with ring below
1300 # 5/5 half circle below
1301 $chars{0xf948}=0x1e2a; # capital h with breve below
1302 $chars{0xf968}=0x1e2b; # small h with breve below
1304 $chars{0xd641}=0x1ea0; # capital a with dot below
1305 $chars{0xd642}=0x1e04; # capital b with dot below
1306 $chars{0xd644}=0x1e0c; # capital d with dot below
1307 $chars{0xd645}=0x1eb8; # capital e with dot below
1308 $chars{0xd648}=0x1e24; # capital h with dot below
1309 $chars{0xd649}=0x1eca; # capital i with dot below
1310 $chars{0xd64b}=0x1e32; # capital k with dot below
1311 $chars{0xd64c}=0x1e36; # capital l with dot below
1312 $chars{0xd64d}=0x1e42; # capital m with dot below
1313 $chars{0xd64e}=0x1e46; # capital n with dot below
1314 $chars{0xd64f}=0x1ecc; # capital o with dot below
1315 $chars{0xd652}=0x1e5a; # capital r with dot below
1316 $chars{0xd653}=0x1e62; # capital s with dot below
1317 $chars{0xd654}=0x1e6c; # capital t with dot below
1318 $chars{0xd655}=0x1ee4; # capital u with dot below
1319 $chars{0xd656}=0x1e7e; # capital v with dot below
1320 $chars{0xd657}=0x1e88; # capital w with dot below
1321 $chars{0xd659}=0x1ef4; # capital y with dot below
1322 $chars{0xd65a}=0x1e92; # capital z with dot below
1323 $chars{0xd661}=0x1ea1; # small a with dot below
1324 $chars{0xd662}=0x1e05; # small b with dot below
1325 $chars{0xd664}=0x1e0d; # small d with dot below
1326 $chars{0xd665}=0x1eb9; # small e with dot below
1327 $chars{0xd668}=0x1e25; # small h with dot below
1328 $chars{0xd669}=0x1ecb; # small i with dot below
1329 $chars{0xd66b}=0x1e33; # small k with dot below
1330 $chars{0xd66c}=0x1e37; # small l with dot below
1331 $chars{0xd66d}=0x1e43; # small m with dot below
1332 $chars{0xd66e}=0x1e47; # small n with dot below
1333 $chars{0xd66f}=0x1ecd; # small o with dot below
1334 $chars{0xd672}=0x1e5b; # small r with dot below
1335 $chars{0xd673}=0x1e63; # small s with dot below
1336 $chars{0xd674}=0x1e6d; # small t with dot below
1337 $chars{0xd675}=0x1ee5; # small u with dot below
1338 $chars{0xd676}=0x1e7f; # small v with dot below
1339 $chars{0xd677}=0x1e89; # small w with dot below
1340 $chars{0xd679}=0x1ef5; # small y with dot below
1341 $chars{0xd67a}=0x1e93; # small z with dot below
1342 # 5/7 double dot below
1343 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1344 $chars{0xd775}=0x1e73; # small u with diaeresis below
1346 $chars{0xd820}=0x005f; # underline
1347 # 5/9 double underline
1348 $chars{0xd920}=0x2017; # double underline
1349 # 5/10 small low vertical bar
1350 $chars{0xda20}=0x02cc; #
1351 # 5/11 circumflex below
1352 # 5/12 (this position shall not be used)
1353 # 5/13 left half of ligature sign and of double tilde
1354 # 5/14 right half of ligature sign
1355 # 5/15 right half of double tilde
1356 # map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1357 my @data = unpack("C*", $string);
1359 my $length=scalar(@data);
1360 for (my $i = 0; $i < scalar(@data); $i++) {
1361 my $char= $data[$i];
1362 if ($char >= 0x00 && $char <= 0x7F){
1365 push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1366 }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1369 if ($chars{$char*256+$data[$i+1]}) {
1370 $convchar= $chars{$char * 256 + $data[$i+1]};
1372 # printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1373 } elsif ($chars{$char}) {
1374 $convchar= $chars{$char};
1375 # printf "0xC char %x, converted %x\n",$char,$chars{$char};
1379 push @characters,$convchar;
1382 if ($chars{$char}) {
1383 $convchar= $chars{$char};
1384 # printf "char %x, converted %x\n",$char,$chars{$char};
1386 # printf "char %x $char\n",$char;
1389 push @characters,$convchar;
1392 $result=pack "U*",@characters;
1393 # $result=~s/\x01//;
1394 # $result=~s/\x00//;
1398 $result=~s/\x1b\x5b//;
1399 # map{printf "%x",$_} @characters;