3 # Copyright 2000-2002 Katipo Communications
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along with
17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
18 # Suite 330, Boston, MA 02111-1307 USA
24 use vars qw($VERSION @ISA @EXPORT $DEBUG);
33 &subfield_is_koha_internal_p
34 &GetPrinters &GetPrinter
35 &GetItemTypes &getitemtypeinfo
38 &getframeworks &getframeworkinfo
39 &getauthtypes &getauthtype
44 &getitemtypeimagesrcfromurl
46 &get_notforloan_label_of
51 &GetKohaAuthorisedValues
53 &GetManagedTagSubfields
62 C4::Koha - Perl Module containing convenience functions for Koha scripts
71 Koha.pm provides many functions for Koha scripts.
80 $slash_date = &slashifyDate($dash_date);
82 Takes a string of the form "DD-MM-YYYY" (or anything separated by
83 dashes), converts it to the form "YYYY/MM/DD", and returns the result.
89 # accepts a date of the form xx-xx-xx[xx] and returns it in the
91 my @dateOut = split( '-', shift );
92 return ("$dateOut[2]/$dateOut[1]/$dateOut[0]");
98 my $string = DisplayISBN( $isbn );
104 if (length ($isbn)<13){
106 if ( substr( $isbn, 0, 1 ) <= 7 ) {
107 $seg1 = substr( $isbn, 0, 1 );
109 elsif ( substr( $isbn, 0, 2 ) <= 94 ) {
110 $seg1 = substr( $isbn, 0, 2 );
112 elsif ( substr( $isbn, 0, 3 ) <= 995 ) {
113 $seg1 = substr( $isbn, 0, 3 );
115 elsif ( substr( $isbn, 0, 4 ) <= 9989 ) {
116 $seg1 = substr( $isbn, 0, 4 );
119 $seg1 = substr( $isbn, 0, 5 );
121 my $x = substr( $isbn, length($seg1) );
123 if ( substr( $x, 0, 2 ) <= 19 ) {
125 # if(sTmp2 < 10) sTmp2 = "0" sTmp2;
126 $seg2 = substr( $x, 0, 2 );
128 elsif ( substr( $x, 0, 3 ) <= 699 ) {
129 $seg2 = substr( $x, 0, 3 );
131 elsif ( substr( $x, 0, 4 ) <= 8399 ) {
132 $seg2 = substr( $x, 0, 4 );
134 elsif ( substr( $x, 0, 5 ) <= 89999 ) {
135 $seg2 = substr( $x, 0, 5 );
137 elsif ( substr( $x, 0, 6 ) <= 9499999 ) {
138 $seg2 = substr( $x, 0, 6 );
141 $seg2 = substr( $x, 0, 7 );
143 my $seg3 = substr( $x, length($seg2) );
144 $seg3 = substr( $seg3, 0, length($seg3) - 1 );
145 my $seg4 = substr( $x, -1, 1 );
146 return "$seg1-$seg2-$seg3-$seg4";
149 $seg1 = substr( $isbn, 0, 3 );
151 if ( substr( $isbn, 3, 1 ) <= 7 ) {
152 $seg2 = substr( $isbn, 3, 1 );
154 elsif ( substr( $isbn, 3, 2 ) <= 94 ) {
155 $seg2 = substr( $isbn, 3, 2 );
157 elsif ( substr( $isbn, 3, 3 ) <= 995 ) {
158 $seg2 = substr( $isbn, 3, 3 );
160 elsif ( substr( $isbn, 3, 4 ) <= 9989 ) {
161 $seg2 = substr( $isbn, 3, 4 );
164 $seg2 = substr( $isbn, 3, 5 );
166 my $x = substr( $isbn, length($seg2) +3);
168 if ( substr( $x, 0, 2 ) <= 19 ) {
170 # if(sTmp2 < 10) sTmp2 = "0" sTmp2;
171 $seg3 = substr( $x, 0, 2 );
173 elsif ( substr( $x, 0, 3 ) <= 699 ) {
174 $seg3 = substr( $x, 0, 3 );
176 elsif ( substr( $x, 0, 4 ) <= 8399 ) {
177 $seg3 = substr( $x, 0, 4 );
179 elsif ( substr( $x, 0, 5 ) <= 89999 ) {
180 $seg3 = substr( $x, 0, 5 );
182 elsif ( substr( $x, 0, 6 ) <= 9499999 ) {
183 $seg3 = substr( $x, 0, 6 );
186 $seg3 = substr( $x, 0, 7 );
188 my $seg4 = substr( $x, length($seg3) );
189 $seg4 = substr( $seg4, 0, length($seg4) - 1 );
190 my $seg5 = substr( $x, -1, 1 );
191 return "$seg1-$seg2-$seg3-$seg4-$seg5";
195 # FIXME.. this should be moved to a MARC-specific module
196 sub subfield_is_koha_internal_p ($) {
199 # We could match on 'lib' and 'tab' (and 'mandatory', & more to come!)
200 # But real MARC subfields are always single-character
201 # so it really is safer just to check the length
203 return length $subfield != 1;
208 $itemtypes = &GetItemTypes();
210 Returns information about existing itemtypes.
212 build a HTML select with the following code :
214 =head3 in PERL SCRIPT
216 my $itemtypes = GetItemTypes;
218 foreach my $thisitemtype (sort keys %$itemtypes) {
219 my $selected = 1 if $thisitemtype eq $itemtype;
220 my %row =(value => $thisitemtype,
221 selected => $selected,
222 description => $itemtypes->{$thisitemtype}->{'description'},
224 push @itemtypesloop, \%row;
226 $template->param(itemtypeloop => \@itemtypesloop);
230 <form action='<!-- TMPL_VAR name="script_name" -->' method=post>
231 <select name="itemtype">
232 <option value="">Default</option>
233 <!-- TMPL_LOOP name="itemtypeloop" -->
234 <option value="<!-- TMPL_VAR name="value" -->" <!-- TMPL_IF name="selected" -->selected<!-- /TMPL_IF -->><!-- TMPL_VAR name="description" --></option>
237 <input type=text name=searchfield value="<!-- TMPL_VAR name="searchfield" -->">
238 <input type="submit" value="OK" class="button">
245 # returns a reference to a hash of references to branches...
247 my $dbh = C4::Context->dbh;
252 my $sth = $dbh->prepare($query);
254 while ( my $IT = $sth->fetchrow_hashref ) {
255 $itemtypes{ $IT->{'itemtype'} } = $IT;
257 return ( \%itemtypes );
260 sub get_itemtypeinfos_of {
269 WHERE itemtype IN (' . join( ',', map( { "'" . $_ . "'" } @itemtypes ) ) . ')
272 return get_infos_of( $query, 'itemtype' );
275 # this is temporary until we separate collection codes and item types
279 my $dbh = C4::Context->dbh;
282 "SELECT * FROM authorised_values ORDER BY authorised_value");
284 while ( my $data = $sth->fetchrow_hashref ) {
285 if ( $data->{category} eq "CCODE" ) {
287 $results[$count] = $data;
293 return ( $count, @results );
298 $authtypes = &getauthtypes();
300 Returns information about existing authtypes.
302 build a HTML select with the following code :
304 =head3 in PERL SCRIPT
306 my $authtypes = getauthtypes;
308 foreach my $thisauthtype (keys %$authtypes) {
309 my $selected = 1 if $thisauthtype eq $authtype;
310 my %row =(value => $thisauthtype,
311 selected => $selected,
312 authtypetext => $authtypes->{$thisauthtype}->{'authtypetext'},
314 push @authtypesloop, \%row;
316 $template->param(itemtypeloop => \@itemtypesloop);
320 <form action='<!-- TMPL_VAR name="script_name" -->' method=post>
321 <select name="authtype">
322 <!-- TMPL_LOOP name="authtypeloop" -->
323 <option value="<!-- TMPL_VAR name="value" -->" <!-- TMPL_IF name="selected" -->selected<!-- /TMPL_IF -->><!-- TMPL_VAR name="authtypetext" --></option>
326 <input type=text name=searchfield value="<!-- TMPL_VAR name="searchfield" -->">
327 <input type="submit" value="OK" class="button">
335 # returns a reference to a hash of references to authtypes...
337 my $dbh = C4::Context->dbh;
338 my $sth = $dbh->prepare("select * from auth_types order by authtypetext");
340 while ( my $IT = $sth->fetchrow_hashref ) {
341 $authtypes{ $IT->{'authtypecode'} } = $IT;
343 return ( \%authtypes );
347 my ($authtypecode) = @_;
349 # returns a reference to a hash of references to authtypes...
351 my $dbh = C4::Context->dbh;
352 my $sth = $dbh->prepare("select * from auth_types where authtypecode=?");
353 $sth->execute($authtypecode);
354 my $res = $sth->fetchrow_hashref;
360 $frameworks = &getframework();
362 Returns information about existing frameworks
364 build a HTML select with the following code :
366 =head3 in PERL SCRIPT
368 my $frameworks = frameworks();
370 foreach my $thisframework (keys %$frameworks) {
371 my $selected = 1 if $thisframework eq $frameworkcode;
372 my %row =(value => $thisframework,
373 selected => $selected,
374 description => $frameworks->{$thisframework}->{'frameworktext'},
376 push @frameworksloop, \%row;
378 $template->param(frameworkloop => \@frameworksloop);
382 <form action='<!-- TMPL_VAR name="script_name" -->' method=post>
383 <select name="frameworkcode">
384 <option value="">Default</option>
385 <!-- TMPL_LOOP name="frameworkloop" -->
386 <option value="<!-- TMPL_VAR name="value" -->" <!-- TMPL_IF name="selected" -->selected<!-- /TMPL_IF -->><!-- TMPL_VAR name="frameworktext" --></option>
389 <input type=text name=searchfield value="<!-- TMPL_VAR name="searchfield" -->">
390 <input type="submit" value="OK" class="button">
398 # returns a reference to a hash of references to branches...
400 my $dbh = C4::Context->dbh;
401 my $sth = $dbh->prepare("select * from biblio_framework");
403 while ( my $IT = $sth->fetchrow_hashref ) {
404 $itemtypes{ $IT->{'frameworkcode'} } = $IT;
406 return ( \%itemtypes );
409 =head2 getframeworkinfo
411 $frameworkinfo = &getframeworkinfo($frameworkcode);
413 Returns information about an frameworkcode.
417 sub getframeworkinfo {
418 my ($frameworkcode) = @_;
419 my $dbh = C4::Context->dbh;
421 $dbh->prepare("select * from biblio_framework where frameworkcode=?");
422 $sth->execute($frameworkcode);
423 my $res = $sth->fetchrow_hashref;
427 =head2 getitemtypeinfo
429 $itemtype = &getitemtype($itemtype);
431 Returns information about an itemtype.
435 sub getitemtypeinfo {
437 my $dbh = C4::Context->dbh;
438 my $sth = $dbh->prepare("select * from itemtypes where itemtype=?");
439 $sth->execute($itemtype);
440 my $res = $sth->fetchrow_hashref;
442 $res->{imageurl} = getitemtypeimagesrcfromurl( $res->{imageurl} );
447 sub getitemtypeimagesrcfromurl {
450 if ( defined $imageurl and $imageurl !~ m/^http/ ) {
451 $imageurl = getitemtypeimagesrc() . '/' . $imageurl;
457 sub getitemtypeimagedir {
458 return C4::Context->opachtdocs . '/'
459 . C4::Context->preference('template')
463 sub getitemtypeimagesrc {
464 return '/opac-tmpl' . '/'
465 . C4::Context->preference('template')
471 $printers = &GetPrinters();
472 @queues = keys %$printers;
474 Returns information about existing printer queues.
476 C<$printers> is a reference-to-hash whose keys are the print queues
477 defined in the printers table of the Koha database. The values are
478 references-to-hash, whose keys are the fields in the printers table.
484 my $dbh = C4::Context->dbh;
485 my $sth = $dbh->prepare("select * from printers");
487 while ( my $printer = $sth->fetchrow_hashref ) {
488 $printers{ $printer->{'printqueue'} } = $printer;
490 return ( \%printers );
495 $printer = GetPrinter( $query, $printers );
499 sub GetPrinter ($$) {
500 my ( $query, $printers ) = @_; # get printer for this query from printers
501 my $printer = $query->param('printer');
502 my %cookie = $query->cookie('userenv');
503 ($printer) || ( $printer = $cookie{'printer'} ) || ( $printer = '' );
504 ( $printers->{$printer} ) || ( $printer = ( keys %$printers )[0] );
510 Returns the number of pages to display in a pagination bar, given the number
511 of items and the number of items per page.
516 my ( $nb_items, $nb_items_per_page ) = @_;
518 return int( ( $nb_items - 1 ) / $nb_items_per_page ) + 1;
523 (@themes) = &getallthemes('opac');
524 (@themes) = &getallthemes('intranet');
526 Returns an array of all available themes.
534 if ( $type eq 'intranet' ) {
535 $htdocs = C4::Context->config('intrahtdocs');
538 $htdocs = C4::Context->config('opachtdocs');
540 opendir D, "$htdocs";
541 my @dirlist = readdir D;
542 foreach my $directory (@dirlist) {
543 -d "$htdocs/$directory/en" and push @themes, $directory;
550 if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
553 link_value => 'su-to',
554 label_value => 'Topics',
556 [ '600', '601', '602', '603', '604', '605', '606', '610' ],
560 link_value => 'su-geo',
561 label_value => 'Places',
566 link_value => 'su-ut',
567 label_value => 'Titles',
568 tags => [ '500', '501', '502', '503', '504', ],
573 label_value => 'Authors',
574 tags => [ '700', '701', '702', ],
579 label_value => 'Series',
584 link_value => 'branch',
585 label_value => 'Libraries',
595 link_value => 'su-to',
596 label_value => 'Topics',
602 # link_value => 'su-na',
603 # label_value => 'People and Organizations',
604 # tags => ['600', '610', '611'],
608 link_value => 'su-geo',
609 label_value => 'Places',
614 link_value => 'su-ut',
615 label_value => 'Titles',
621 label_value => 'Authors',
622 tags => [ '100', '110', '700', ],
627 label_value => 'Series',
628 tags => [ '440', '490', ],
632 link_value => 'branch',
633 label_value => 'Libraries',
645 Return a href where a key is associated to a href. You give a query, the
646 name of the key among the fields returned by the query. If you also give as
647 third argument the name of the value, the function returns a href of scalar.
656 # generic href of any information on the item, href of href.
657 my $iteminfos_of = get_infos_of($query, 'itemnumber');
658 print $iteminfos_of->{$itemnumber}{barcode};
660 # specific information, href of scalar
661 my $barcode_of_item = get_infos_of($query, 'itemnumber', 'barcode');
662 print $barcode_of_item->{$itemnumber};
667 my ( $query, $key_name, $value_name ) = @_;
669 my $dbh = C4::Context->dbh;
671 my $sth = $dbh->prepare($query);
675 while ( my $row = $sth->fetchrow_hashref ) {
676 if ( defined $value_name ) {
677 $infos_of{ $row->{$key_name} } = $row->{$value_name};
680 $infos_of{ $row->{$key_name} } = $row;
688 =head2 get_notforloan_label_of
690 my $notforloan_label_of = get_notforloan_label_of();
692 Each authorised value of notforloan (information available in items and
693 itemtypes) is link to a single label.
695 Returns a href where keys are authorised values and values are corresponding
698 foreach my $authorised_value (keys %{$notforloan_label_of}) {
700 "authorised_value: %s => %s\n",
702 $notforloan_label_of->{$authorised_value}
708 # FIXME - why not use GetAuthorisedValues ??
710 sub get_notforloan_label_of {
711 my $dbh = C4::Context->dbh;
714 SELECT authorised_value
715 FROM marc_subfield_structure
716 WHERE kohafield = \'items.notforloan\'
719 my $sth = $dbh->prepare($query);
721 my ($statuscode) = $sth->fetchrow_array();
726 FROM authorised_values
729 $sth = $dbh->prepare($query);
730 $sth->execute($statuscode);
731 my %notforloan_label_of;
732 while ( my $row = $sth->fetchrow_hashref ) {
733 $notforloan_label_of{ $row->{authorised_value} } = $row->{lib};
737 return \%notforloan_label_of;
741 my ( $position, $type ) = @_;
742 my $dbh = C4::Context->dbh;
743 my $strsth = "SELECT * FROM z3950servers where 1";
744 $strsth .= " AND position=\"$position\"" if ($position);
745 $strsth .= " AND type=\"$type\"" if ($type);
746 my $rq = $dbh->prepare($strsth);
748 my @primaryserverloop;
750 while ( my $data = $rq->fetchrow_hashref ) {
752 $cell{label} = $data->{'description'};
753 $cell{id} = $data->{'name'};
756 . ( $data->{port} ? ":" . $data->{port} : "" ) . "/"
758 if ( $data->{host} );
759 $cell{checked} = $data->{checked};
760 push @primaryserverloop,
762 label => $data->{description},
765 value => $data->{host} . ":"
766 . $data->{port} . "/"
768 encoding => ($data->{encoding}?$data->{encoding}:"iso-5426"),
769 checked => "checked",
770 icon => $data->{icon},
771 zed => $data->{type} eq 'zed',
772 opensearch => $data->{type} eq 'opensearch'
775 return \@primaryserverloop;
778 sub displaySecondaryServers {
780 # my $secondary_servers_loop = [
781 # { inner_sup_servers_loop => [
782 # {label => "Google", id=>"GOOG", value=>"google",icon => "google.ico",opensearch => "1"},
783 # {label => "Yahoo", id=>"YAH", value=>"yahoo", icon =>"yahoo.ico", zed => "1"},
784 # {label => "Worldcat", id=>"WCT", value=>"worldcat", icon => "worldcat.gif", zed => "1"},
785 # {label => "Library of Congress", id=>"LOC", name=> "server", value=>"z3950.loc.gov:7090/Voyager", icon =>"loc.ico", zed => "1"},
789 return; #$secondary_servers_loop;
792 =head2 GetAuthValCode
794 $authvalcode = GetAuthValCode($kohafield,$frameworkcode);
799 my ($kohafield,$fwcode) = @_;
800 my $dbh = C4::Context->dbh;
801 $fwcode='' unless $fwcode;
802 my $sth = $dbh->prepare('select authorised_value from marc_subfield_structure where kohafield=? and frameworkcode=?');
803 $sth->execute($kohafield,$fwcode);
804 my ($authvalcode) = $sth->fetchrow_array;
808 =head2 GetAuthorisedValues
810 $authvalues = GetAuthorisedValues($category);
812 this function get all authorised values from 'authosied_value' table into a reference to array which
813 each value containt an hashref.
815 Set C<$category> on input args if you want to limits your query to this one. This params is not mandatory.
819 sub GetAuthorisedValues {
820 my ($category,$selected) = @_;
823 my $dbh = C4::Context->dbh;
824 my $query = "SELECT * FROM authorised_values";
825 $query .= " WHERE category = '" . $category . "'" if $category;
827 my $sth = $dbh->prepare($query);
829 while (my $data=$sth->fetchrow_hashref) {
830 if ($selected eq $data->{'authorised_value'} ) {
831 $data->{'selected'} = 1;
833 $results[$count] = $data;
836 #my $data = $sth->fetchall_arrayref({});
837 return \@results; #$data;
840 =head2 GetKohaAuthorisedValues
842 Takes $dbh , $kohafield as parameters.
843 returns hashref of authvalCode => liblibrarian
844 or undef if no authvals defined for kohafield.
848 sub GetKohaAuthorisedValues {
849 my ($kohafield,$fwcode) = @_;
850 $fwcode='' unless $fwcode;
852 my $dbh = C4::Context->dbh;
853 my $avcode = GetAuthValCode($kohafield,$fwcode);
855 my $sth = $dbh->prepare("select authorised_value, lib from authorised_values where category=? ");
856 $sth->execute($avcode);
857 while ( my ($val, $lib) = $sth->fetchrow_array ) {
864 =head2 GetManagedTagSubfields
868 $res = GetManagedTagSubfields();
872 Returns a reference to a big hash of hash, with the Marc structure fro the given frameworkcode
874 NOTE: This function is used only by the (incomplete) bulk editing feature. Since
875 that feature currently does not deal with items and biblioitems changes
876 correctly, those tags are specifically excluded from the list prepared
879 For future reference, if a bulk item editing feature is implemented at some point, it
880 needs some design thought -- for example, circulation status fields should not
881 be changed willy-nilly.
885 sub GetManagedTagSubfields{
886 my $dbh=C4::Context->dbh;
887 my $rq=$dbh->prepare(qq|
889 DISTINCT CONCAT( marc_subfield_structure.tagfield, tagsubfield ) AS tagsubfield,
890 marc_subfield_structure.liblibrarian as subfielddesc,
891 marc_tag_structure.liblibrarian as tagdesc
892 FROM marc_subfield_structure
893 LEFT JOIN marc_tag_structure
894 ON marc_tag_structure.tagfield = marc_subfield_structure.tagfield
895 AND marc_tag_structure.frameworkcode = marc_subfield_structure.frameworkcode
896 WHERE marc_subfield_structure.tab>=0
897 AND marc_tag_structure.tagfield NOT IN (SELECT tagfield FROM marc_subfield_structure WHERE kohafield like 'items.%')
898 AND marc_tag_structure.tagfield NOT IN (SELECT tagfield FROM marc_subfield_structure WHERE kohafield = 'biblioitems.itemtype')
899 AND marc_subfield_structure.kohafield <> 'biblio.biblionumber'
900 AND marc_subfield_structure.kohafield <> 'biblioitems.biblioitemnumber'
901 ORDER BY marc_subfield_structure.tagfield, tagsubfield|);
903 my $data=$rq->fetchall_arrayref({});
910 $marcrecord = &fixEncoding($marcblob);
912 Returns a well encoded marcrecord.
918 my $record = MARC::Record->new_from_usmarc($marc);
919 # if (C4::Context->preference("marcflavour") eq "UNIMARC"){
920 my $targetcharset="utf8";
921 if ($encoding && $targetcharset ne $encoding){
922 my $newRecord=MARC::Record->new();
923 if ($encoding!~/5426/){
925 my $decoder = Text::Iconv->new($encoding,$targetcharset);
926 my $newRecord=MARC::Record->new();
927 foreach my $field ($record->fields()){
928 if ($field->tag()<'010'){
929 $newRecord->insert_grouped_field($field);
933 foreach my $subfield ($field->subfields()){
935 if ((C4::Context->preference("marcflavour") eq "UNIMARC") && ($newField->tag eq '100')) {
936 substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8");
937 } elsif (C4::Context->preference("marcflavour") eq "USMARC"){
938 $newRecord->encoding("UTF-8");
940 map {$decoder->convert($_)} @$subfield;
941 $newField->add_subfields($subfield->[0]=>$subfield->[1]);
943 map {$decoder->convert($_)} @$subfield;
944 $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]);
948 $newRecord->insert_grouped_field($newField);
951 }elsif ($encoding=~/5426/){
952 foreach my $field ($record->fields()){
953 if ($field->tag()<'010'){
954 $newRecord->insert_grouped_field($field);
958 foreach my $subfield ($field->subfields()){
959 # my $utf8=eval{MARC::Charset::marc8_to_utf8($subfield->[1])};
960 # if ($@) {warn "z3950 character conversion error $@ ";$utf8=$subfield->[1]};
961 my $utf8=char_decode5426($subfield->[1]);
962 if ((C4::Context->preference("marcflavour") eq "UNIMARC") && ($field->tag eq '100')) {
963 substr($utf8,26,4,"5050");
964 } elsif (C4::Context->preference("marcflavour") eq "USMARC"){
965 $newRecord->encoding("UTF-8");
968 $newField->add_subfields($subfield->[0]=>$utf8);
970 $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$utf8);
974 $newRecord->insert_grouped_field($newField);
978 # warn $newRecord->as_formatted();
987 sub char_decode5426 {
991 $chars{0xb0}=0x0101;#3/0ayn[ain]
992 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
993 #$chars{0xb2}=0x00e0;#'Ã ';
994 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
995 #$chars{0xb3}=0x00e7;#'ç';
996 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
1000 $chars{0xb5}=0x00e9;
1001 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
1002 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
1003 $chars{0xfa}=0x0153;#oe
1004 $chars{0x81d1}=0x00b0;
1007 ## combined characters iso5426
1009 $chars{0xc041}=0x1ea2; # capital a with hook above
1010 $chars{0xc045}=0x1eba; # capital e with hook above
1011 $chars{0xc049}=0x1ec8; # capital i with hook above
1012 $chars{0xc04f}=0x1ece; # capital o with hook above
1013 $chars{0xc055}=0x1ee6; # capital u with hook above
1014 $chars{0xc059}=0x1ef6; # capital y with hook above
1015 $chars{0xc061}=0x1ea3; # small a with hook above
1016 $chars{0xc065}=0x1ebb; # small e with hook above
1017 $chars{0xc069}=0x1ec9; # small i with hook above
1018 $chars{0xc06f}=0x1ecf; # small o with hook above
1019 $chars{0xc075}=0x1ee7; # small u with hook above
1020 $chars{0xc079}=0x1ef7; # small y with hook above
1023 $chars{0xc141}=0x00c0; # capital a with grave accent
1024 $chars{0xc145}=0x00c8; # capital e with grave accent
1025 $chars{0xc149}=0x00cc; # capital i with grave accent
1026 $chars{0xc14f}=0x00d2; # capital o with grave accent
1027 $chars{0xc155}=0x00d9; # capital u with grave accent
1028 $chars{0xc157}=0x1e80; # capital w with grave
1029 $chars{0xc159}=0x1ef2; # capital y with grave
1030 $chars{0xc161}=0x00e0; # small a with grave accent
1031 $chars{0xc165}=0x00e8; # small e with grave accent
1032 $chars{0xc169}=0x00ec; # small i with grave accent
1033 $chars{0xc16f}=0x00f2; # small o with grave accent
1034 $chars{0xc175}=0x00f9; # small u with grave accent
1035 $chars{0xc177}=0x1e81; # small w with grave
1036 $chars{0xc179}=0x1ef3; # small y with grave
1038 $chars{0xc241}=0x00c1; # capital a with acute accent
1039 $chars{0xc243}=0x0106; # capital c with acute accent
1040 $chars{0xc245}=0x00c9; # capital e with acute accent
1041 $chars{0xc247}=0x01f4; # capital g with acute
1042 $chars{0xc249}=0x00cd; # capital i with acute accent
1043 $chars{0xc24b}=0x1e30; # capital k with acute
1044 $chars{0xc24c}=0x0139; # capital l with acute accent
1045 $chars{0xc24d}=0x1e3e; # capital m with acute
1046 $chars{0xc24e}=0x0143; # capital n with acute accent
1047 $chars{0xc24f}=0x00d3; # capital o with acute accent
1048 $chars{0xc250}=0x1e54; # capital p with acute
1049 $chars{0xc252}=0x0154; # capital r with acute accent
1050 $chars{0xc253}=0x015a; # capital s with acute accent
1051 $chars{0xc255}=0x00da; # capital u with acute accent
1052 $chars{0xc257}=0x1e82; # capital w with acute
1053 $chars{0xc259}=0x00dd; # capital y with acute accent
1054 $chars{0xc25a}=0x0179; # capital z with acute accent
1055 $chars{0xc261}=0x00e1; # small a with acute accent
1056 $chars{0xc263}=0x0107; # small c with acute accent
1057 $chars{0xc265}=0x00e9; # small e with acute accent
1058 $chars{0xc267}=0x01f5; # small g with acute
1059 $chars{0xc269}=0x00ed; # small i with acute accent
1060 $chars{0xc26b}=0x1e31; # small k with acute
1061 $chars{0xc26c}=0x013a; # small l with acute accent
1062 $chars{0xc26d}=0x1e3f; # small m with acute
1063 $chars{0xc26e}=0x0144; # small n with acute accent
1064 $chars{0xc26f}=0x00f3; # small o with acute accent
1065 $chars{0xc270}=0x1e55; # small p with acute
1066 $chars{0xc272}=0x0155; # small r with acute accent
1067 $chars{0xc273}=0x015b; # small s with acute accent
1068 $chars{0xc275}=0x00fa; # small u with acute accent
1069 $chars{0xc277}=0x1e83; # small w with acute
1070 $chars{0xc279}=0x00fd; # small y with acute accent
1071 $chars{0xc27a}=0x017a; # small z with acute accent
1072 $chars{0xc2e1}=0x01fc; # capital ae with acute
1073 $chars{0xc2f1}=0x01fd; # small ae with acute
1074 # 4/3 circumflex accent
1075 $chars{0xc341}=0x00c2; # capital a with circumflex accent
1076 $chars{0xc343}=0x0108; # capital c with circumflex
1077 $chars{0xc345}=0x00ca; # capital e with circumflex accent
1078 $chars{0xc347}=0x011c; # capital g with circumflex
1079 $chars{0xc348}=0x0124; # capital h with circumflex
1080 $chars{0xc349}=0x00ce; # capital i with circumflex accent
1081 $chars{0xc34a}=0x0134; # capital j with circumflex
1082 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
1083 $chars{0xc353}=0x015c; # capital s with circumflex
1084 $chars{0xc355}=0x00db; # capital u with circumflex
1085 $chars{0xc357}=0x0174; # capital w with circumflex
1086 $chars{0xc359}=0x0176; # capital y with circumflex
1087 $chars{0xc35a}=0x1e90; # capital z with circumflex
1088 $chars{0xc361}=0x00e2; # small a with circumflex accent
1089 $chars{0xc363}=0x0109; # small c with circumflex
1090 $chars{0xc365}=0x00ea; # small e with circumflex accent
1091 $chars{0xc367}=0x011d; # small g with circumflex
1092 $chars{0xc368}=0x0125; # small h with circumflex
1093 $chars{0xc369}=0x00ee; # small i with circumflex accent
1094 $chars{0xc36a}=0x0135; # small j with circumflex
1095 $chars{0xc36e}=0x00f1; # small n with tilde
1096 $chars{0xc36f}=0x00f4; # small o with circumflex accent
1097 $chars{0xc373}=0x015d; # small s with circumflex
1098 $chars{0xc375}=0x00fb; # small u with circumflex
1099 $chars{0xc377}=0x0175; # small w with circumflex
1100 $chars{0xc379}=0x0177; # small y with circumflex
1101 $chars{0xc37a}=0x1e91; # small z with circumflex
1103 $chars{0xc441}=0x00c3; # capital a with tilde
1104 $chars{0xc445}=0x1ebc; # capital e with tilde
1105 $chars{0xc449}=0x0128; # capital i with tilde
1106 $chars{0xc44e}=0x00d1; # capital n with tilde
1107 $chars{0xc44f}=0x00d5; # capital o with tilde
1108 $chars{0xc455}=0x0168; # capital u with tilde
1109 $chars{0xc456}=0x1e7c; # capital v with tilde
1110 $chars{0xc459}=0x1ef8; # capital y with tilde
1111 $chars{0xc461}=0x00e3; # small a with tilde
1112 $chars{0xc465}=0x1ebd; # small e with tilde
1113 $chars{0xc469}=0x0129; # small i with tilde
1114 $chars{0xc46e}=0x00f1; # small n with tilde
1115 $chars{0xc46f}=0x00f5; # small o with tilde
1116 $chars{0xc475}=0x0169; # small u with tilde
1117 $chars{0xc476}=0x1e7d; # small v with tilde
1118 $chars{0xc479}=0x1ef9; # small y with tilde
1120 $chars{0xc541}=0x0100; # capital a with macron
1121 $chars{0xc545}=0x0112; # capital e with macron
1122 $chars{0xc547}=0x1e20; # capital g with macron
1123 $chars{0xc549}=0x012a; # capital i with macron
1124 $chars{0xc54f}=0x014c; # capital o with macron
1125 $chars{0xc555}=0x016a; # capital u with macron
1126 $chars{0xc561}=0x0101; # small a with macron
1127 $chars{0xc565}=0x0113; # small e with macron
1128 $chars{0xc567}=0x1e21; # small g with macron
1129 $chars{0xc569}=0x012b; # small i with macron
1130 $chars{0xc56f}=0x014d; # small o with macron
1131 $chars{0xc575}=0x016b; # small u with macron
1132 $chars{0xc572}=0x0159; # small r with macron
1133 $chars{0xc5e1}=0x01e2; # capital ae with macron
1134 $chars{0xc5f1}=0x01e3; # small ae with macron
1136 $chars{0xc641}=0x0102; # capital a with breve
1137 $chars{0xc645}=0x0114; # capital e with breve
1138 $chars{0xc647}=0x011e; # capital g with breve
1139 $chars{0xc649}=0x012c; # capital i with breve
1140 $chars{0xc64f}=0x014e; # capital o with breve
1141 $chars{0xc655}=0x016c; # capital u with breve
1142 $chars{0xc661}=0x0103; # small a with breve
1143 $chars{0xc665}=0x0115; # small e with breve
1144 $chars{0xc667}=0x011f; # small g with breve
1145 $chars{0xc669}=0x012d; # small i with breve
1146 $chars{0xc66f}=0x014f; # small o with breve
1147 $chars{0xc675}=0x016d; # small u with breve
1149 $chars{0xc7b0}=0x01e1; # Ain with dot above
1150 $chars{0xc742}=0x1e02; # capital b with dot above
1151 $chars{0xc743}=0x010a; # capital c with dot above
1152 $chars{0xc744}=0x1e0a; # capital d with dot above
1153 $chars{0xc745}=0x0116; # capital e with dot above
1154 $chars{0xc746}=0x1e1e; # capital f with dot above
1155 $chars{0xc747}=0x0120; # capital g with dot above
1156 $chars{0xc748}=0x1e22; # capital h with dot above
1157 $chars{0xc749}=0x0130; # capital i with dot above
1158 $chars{0xc74d}=0x1e40; # capital m with dot above
1159 $chars{0xc74e}=0x1e44; # capital n with dot above
1160 $chars{0xc750}=0x1e56; # capital p with dot above
1161 $chars{0xc752}=0x1e58; # capital r with dot above
1162 $chars{0xc753}=0x1e60; # capital s with dot above
1163 $chars{0xc754}=0x1e6a; # capital t with dot above
1164 $chars{0xc757}=0x1e86; # capital w with dot above
1165 $chars{0xc758}=0x1e8a; # capital x with dot above
1166 $chars{0xc759}=0x1e8e; # capital y with dot above
1167 $chars{0xc75a}=0x017b; # capital z with dot above
1168 $chars{0xc761}=0x0227; # small b with dot above
1169 $chars{0xc762}=0x1e03; # small b with dot above
1170 $chars{0xc763}=0x010b; # small c with dot above
1171 $chars{0xc764}=0x1e0b; # small d with dot above
1172 $chars{0xc765}=0x0117; # small e with dot above
1173 $chars{0xc766}=0x1e1f; # small f with dot above
1174 $chars{0xc767}=0x0121; # small g with dot above
1175 $chars{0xc768}=0x1e23; # small h with dot above
1176 $chars{0xc76d}=0x1e41; # small m with dot above
1177 $chars{0xc76e}=0x1e45; # small n with dot above
1178 $chars{0xc770}=0x1e57; # small p with dot above
1179 $chars{0xc772}=0x1e59; # small r with dot above
1180 $chars{0xc773}=0x1e61; # small s with dot above
1181 $chars{0xc774}=0x1e6b; # small t with dot above
1182 $chars{0xc777}=0x1e87; # small w with dot above
1183 $chars{0xc778}=0x1e8b; # small x with dot above
1184 $chars{0xc779}=0x1e8f; # small y with dot above
1185 $chars{0xc77a}=0x017c; # small z with dot above
1186 # 4/8 trema, diaresis
1187 $chars{0xc820}=0x00a8; # diaeresis
1188 $chars{0xc841}=0x00c4; # capital a with diaeresis
1189 $chars{0xc845}=0x00cb; # capital e with diaeresis
1190 $chars{0xc848}=0x1e26; # capital h with diaeresis
1191 $chars{0xc849}=0x00cf; # capital i with diaeresis
1192 $chars{0xc84f}=0x00d6; # capital o with diaeresis
1193 $chars{0xc855}=0x00dc; # capital u with diaeresis
1194 $chars{0xc857}=0x1e84; # capital w with diaeresis
1195 $chars{0xc858}=0x1e8c; # capital x with diaeresis
1196 $chars{0xc859}=0x0178; # capital y with diaeresis
1197 $chars{0xc861}=0x00e4; # small a with diaeresis
1198 $chars{0xc865}=0x00eb; # small e with diaeresis
1199 $chars{0xc868}=0x1e27; # small h with diaeresis
1200 $chars{0xc869}=0x00ef; # small i with diaeresis
1201 $chars{0xc86f}=0x00f6; # small o with diaeresis
1202 $chars{0xc874}=0x1e97; # small t with diaeresis
1203 $chars{0xc875}=0x00fc; # small u with diaeresis
1204 $chars{0xc877}=0x1e85; # small w with diaeresis
1205 $chars{0xc878}=0x1e8d; # small x with diaeresis
1206 $chars{0xc879}=0x00ff; # small y with diaeresis
1208 $chars{0xc920}=0x00a8; # [diaeresis]
1209 $chars{0xc961}=0x00e4; # a with umlaut
1210 $chars{0xc965}=0x00eb; # e with umlaut
1211 $chars{0xc969}=0x00ef; # i with umlaut
1212 $chars{0xc96f}=0x00f6; # o with umlaut
1213 $chars{0xc975}=0x00fc; # u with umlaut
1215 $chars{0xca41}=0x00c5; # capital a with ring above
1216 $chars{0xcaad}=0x016e; # capital u with ring above
1217 $chars{0xca61}=0x00e5; # small a with ring above
1218 $chars{0xca75}=0x016f; # small u with ring above
1219 $chars{0xca77}=0x1e98; # small w with ring above
1220 $chars{0xca79}=0x1e99; # small y with ring above
1221 # 4/11 high comma off centre
1222 # 4/12 inverted high comma centred
1223 # 4/13 double acute accent
1224 $chars{0xcd4f}=0x0150; # capital o with double acute
1225 $chars{0xcd55}=0x0170; # capital u with double acute
1226 $chars{0xcd6f}=0x0151; # small o with double acute
1227 $chars{0xcd75}=0x0171; # small u with double acute
1229 $chars{0xce54}=0x01a0; # latin capital letter o with horn
1230 $chars{0xce55}=0x01af; # latin capital letter u with horn
1231 $chars{0xce74}=0x01a1; # latin small letter o with horn
1232 $chars{0xce75}=0x01b0; # latin small letter u with horn
1234 $chars{0xcf41}=0x01cd; # capital a with caron
1235 $chars{0xcf43}=0x010c; # capital c with caron
1236 $chars{0xcf44}=0x010e; # capital d with caron
1237 $chars{0xcf45}=0x011a; # capital e with caron
1238 $chars{0xcf47}=0x01e6; # capital g with caron
1239 $chars{0xcf49}=0x01cf; # capital i with caron
1240 $chars{0xcf4b}=0x01e8; # capital k with caron
1241 $chars{0xcf4c}=0x013d; # capital l with caron
1242 $chars{0xcf4e}=0x0147; # capital n with caron
1243 $chars{0xcf4f}=0x01d1; # capital o with caron
1244 $chars{0xcf52}=0x0158; # capital r with caron
1245 $chars{0xcf53}=0x0160; # capital s with caron
1246 $chars{0xcf54}=0x0164; # capital t with caron
1247 $chars{0xcf55}=0x01d3; # capital u with caron
1248 $chars{0xcf5a}=0x017d; # capital z with caron
1249 $chars{0xcf61}=0x01ce; # small a with caron
1250 $chars{0xcf63}=0x010d; # small c with caron
1251 $chars{0xcf64}=0x010f; # small d with caron
1252 $chars{0xcf65}=0x011b; # small e with caron
1253 $chars{0xcf67}=0x01e7; # small g with caron
1254 $chars{0xcf69}=0x01d0; # small i with caron
1255 $chars{0xcf6a}=0x01f0; # small j with caron
1256 $chars{0xcf6b}=0x01e9; # small k with caron
1257 $chars{0xcf6c}=0x013e; # small l with caron
1258 $chars{0xcf6e}=0x0148; # small n with caron
1259 $chars{0xcf6f}=0x01d2; # small o with caron
1260 $chars{0xcf72}=0x0159; # small r with caron
1261 $chars{0xcf73}=0x0161; # small s with caron
1262 $chars{0xcf74}=0x0165; # small t with caron
1263 $chars{0xcf75}=0x01d4; # small u with caron
1264 $chars{0xcf7a}=0x017e; # small z with caron
1266 $chars{0xd020}=0x00b8; # cedilla
1267 $chars{0xd043}=0x00c7; # capital c with cedilla
1268 $chars{0xd044}=0x1e10; # capital d with cedilla
1269 $chars{0xd047}=0x0122; # capital g with cedilla
1270 $chars{0xd048}=0x1e28; # capital h with cedilla
1271 $chars{0xd04b}=0x0136; # capital k with cedilla
1272 $chars{0xd04c}=0x013b; # capital l with cedilla
1273 $chars{0xd04e}=0x0145; # capital n with cedilla
1274 $chars{0xd052}=0x0156; # capital r with cedilla
1275 $chars{0xd053}=0x015e; # capital s with cedilla
1276 $chars{0xd054}=0x0162; # capital t with cedilla
1277 $chars{0xd063}=0x00e7; # small c with cedilla
1278 $chars{0xd064}=0x1e11; # small d with cedilla
1279 $chars{0xd065}=0x0119; # small e with cedilla
1280 $chars{0xd067}=0x0123; # small g with cedilla
1281 $chars{0xd068}=0x1e29; # small h with cedilla
1282 $chars{0xd06b}=0x0137; # small k with cedilla
1283 $chars{0xd06c}=0x013c; # small l with cedilla
1284 $chars{0xd06e}=0x0146; # small n with cedilla
1285 $chars{0xd072}=0x0157; # small r with cedilla
1286 $chars{0xd073}=0x015f; # small s with cedilla
1287 $chars{0xd074}=0x0163; # small t with cedilla
1290 # 5/3 ogonek (hook to right
1291 $chars{0xd320}=0x02db; # ogonek
1292 $chars{0xd341}=0x0104; # capital a with ogonek
1293 $chars{0xd345}=0x0118; # capital e with ogonek
1294 $chars{0xd349}=0x012e; # capital i with ogonek
1295 $chars{0xd34f}=0x01ea; # capital o with ogonek
1296 $chars{0xd355}=0x0172; # capital u with ogonek
1297 $chars{0xd361}=0x0105; # small a with ogonek
1298 $chars{0xd365}=0x0119; # small e with ogonek
1299 $chars{0xd369}=0x012f; # small i with ogonek
1300 $chars{0xd36f}=0x01eb; # small o with ogonek
1301 $chars{0xd375}=0x0173; # small u with ogonek
1303 $chars{0xd441}=0x1e00; # capital a with ring below
1304 $chars{0xd461}=0x1e01; # small a with ring below
1305 # 5/5 half circle below
1306 $chars{0xf948}=0x1e2a; # capital h with breve below
1307 $chars{0xf968}=0x1e2b; # small h with breve below
1309 $chars{0xd641}=0x1ea0; # capital a with dot below
1310 $chars{0xd642}=0x1e04; # capital b with dot below
1311 $chars{0xd644}=0x1e0c; # capital d with dot below
1312 $chars{0xd645}=0x1eb8; # capital e with dot below
1313 $chars{0xd648}=0x1e24; # capital h with dot below
1314 $chars{0xd649}=0x1eca; # capital i with dot below
1315 $chars{0xd64b}=0x1e32; # capital k with dot below
1316 $chars{0xd64c}=0x1e36; # capital l with dot below
1317 $chars{0xd64d}=0x1e42; # capital m with dot below
1318 $chars{0xd64e}=0x1e46; # capital n with dot below
1319 $chars{0xd64f}=0x1ecc; # capital o with dot below
1320 $chars{0xd652}=0x1e5a; # capital r with dot below
1321 $chars{0xd653}=0x1e62; # capital s with dot below
1322 $chars{0xd654}=0x1e6c; # capital t with dot below
1323 $chars{0xd655}=0x1ee4; # capital u with dot below
1324 $chars{0xd656}=0x1e7e; # capital v with dot below
1325 $chars{0xd657}=0x1e88; # capital w with dot below
1326 $chars{0xd659}=0x1ef4; # capital y with dot below
1327 $chars{0xd65a}=0x1e92; # capital z with dot below
1328 $chars{0xd661}=0x1ea1; # small a with dot below
1329 $chars{0xd662}=0x1e05; # small b with dot below
1330 $chars{0xd664}=0x1e0d; # small d with dot below
1331 $chars{0xd665}=0x1eb9; # small e with dot below
1332 $chars{0xd668}=0x1e25; # small h with dot below
1333 $chars{0xd669}=0x1ecb; # small i with dot below
1334 $chars{0xd66b}=0x1e33; # small k with dot below
1335 $chars{0xd66c}=0x1e37; # small l with dot below
1336 $chars{0xd66d}=0x1e43; # small m with dot below
1337 $chars{0xd66e}=0x1e47; # small n with dot below
1338 $chars{0xd66f}=0x1ecd; # small o with dot below
1339 $chars{0xd672}=0x1e5b; # small r with dot below
1340 $chars{0xd673}=0x1e63; # small s with dot below
1341 $chars{0xd674}=0x1e6d; # small t with dot below
1342 $chars{0xd675}=0x1ee5; # small u with dot below
1343 $chars{0xd676}=0x1e7f; # small v with dot below
1344 $chars{0xd677}=0x1e89; # small w with dot below
1345 $chars{0xd679}=0x1ef5; # small y with dot below
1346 $chars{0xd67a}=0x1e93; # small z with dot below
1347 # 5/7 double dot below
1348 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1349 $chars{0xd775}=0x1e73; # small u with diaeresis below
1351 $chars{0xd820}=0x005f; # underline
1352 # 5/9 double underline
1353 $chars{0xd920}=0x2017; # double underline
1354 # 5/10 small low vertical bar
1355 $chars{0xda20}=0x02cc; #
1356 # 5/11 circumflex below
1357 # 5/12 (this position shall not be used)
1358 # 5/13 left half of ligature sign and of double tilde
1359 # 5/14 right half of ligature sign
1360 # 5/15 right half of double tilde
1361 # map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1362 my @data = unpack("C*", $string);
1364 my $length=scalar(@data);
1365 for (my $i = 0; $i < scalar(@data); $i++) {
1366 my $char= $data[$i];
1367 if ($char >= 0x00 && $char <= 0x7F){
1370 push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1371 }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1374 if ($chars{$char*256+$data[$i+1]}) {
1375 $convchar= $chars{$char * 256 + $data[$i+1]};
1377 # printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1378 } elsif ($chars{$char}) {
1379 $convchar= $chars{$char};
1380 # printf "0xC char %x, converted %x\n",$char,$chars{$char};
1384 push @characters,$convchar;
1387 if ($chars{$char}) {
1388 $convchar= $chars{$char};
1389 # printf "char %x, converted %x\n",$char,$chars{$char};
1391 # printf "char %x $char\n",$char;
1394 push @characters,$convchar;
1397 $result=pack "U*",@characters;
1398 # $result=~s/\x01//;
1399 # $result=~s/\x00//;
1403 $result=~s/\x1b\x5b//;
1404 # map{printf "%x",$_} @characters;