Main Koha release repository https://koha-community.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

337 lines
8.1 KiB

package Koha::Edifact;
# Copyright 2014,2015 PTFS-Europe Ltd
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
use strict;
use warnings;
use File::Slurp;
use Carp;
use Encode qw( from_to );
use Koha::Edifact::Segment;
use Koha::Edifact::Message;
my $separator = {
component => q{\:},
data => q{\+},
decimal => q{.},
release => q{\?},
reserved => q{ },
segment => q{\'},
};
sub new {
my ( $class, $param_hashref ) = @_;
my $transmission;
my $self = ();
if ( $param_hashref->{filename} ) {
if ( $param_hashref->{transmission} ) {
carp
"Cannot instantiate $class : both filename and transmission passed";
return;
}
$transmission = read_file( $param_hashref->{filename} );
}
else {
$transmission = $param_hashref->{transmission};
}
$self->{transmission} = _init($transmission);
bless $self, $class;
return $self;
}
sub interchange_header {
my ( $self, $field ) = @_;
my %element = (
sender => 1,
recipient => 2,
datetime => 3,
interchange_control_reference => 4,
application_reference => 6,
);
if ( !exists $element{$field} ) {
carp "No interchange header field $field available";
return;
}
my $data = $self->{transmission}->[0]->elem( $element{$field} );
return $data;
}
sub interchange_trailer {
my ( $self, $field ) = @_;
my $trailer = $self->{transmission}->[-1];
if ( $field eq 'interchange_control_count' ) {
return $trailer->elem(0);
}
elsif ( $field eq 'interchange_control_reference' ) {
return $trailer->elem(1);
}
carp "Trailer field $field not recognized";
return;
}
sub new_data_iterator {
my $self = shift;
my $offset = 0;
while ( $self->{transmission}->[$offset]->tag() ne 'UNH' ) {
++$offset;
if ( $offset == @{ $self->{transmission} } ) {
carp 'Cannot find message start';
return;
}
}
$self->{data_iterator} = $offset;
return 1;
}
sub next_segment {
my $self = shift;
if ( defined $self->{data_iterator} ) {
my $seg = $self->{transmission}->[ $self->{data_iterator} ];
if ( $seg->tag eq 'UNH' ) {
$self->{msg_type} = $seg->elem( 1, 0 );
}
elsif ( $seg->tag eq 'LIN' ) {
$self->{msg_type} = 'detail';
}
if ( $seg->tag ne 'UNZ' ) {
$self->{data_iterator}++;
}
else {
$self->{data_iterator} = undef;
}
return $seg;
}
return;
}
# for debugging return whole transmission
sub get_transmission {
my $self = shift;
return $self->{transmission};
}
sub message_type {
my $self = shift;
return $self->{msg_type};
}
sub _init {
my $msg = shift;
if ( !$msg ) {
return;
}
if ( $msg =~ s/^UNA(.{6})// ) {
if ( service_string_advice($1) ) {
return segmentize($msg);
}
return;
}
else {
my $s = substr $msg, 10;
croak "File does not start with a Service string advice :$s";
}
}
# return an array of Message objects
sub message_array {
my $self = shift;
# return an array of array_refs 1 ref to a message
my $msg_arr = [];
my $msg = [];
my $in_msg = 0;
foreach my $seg ( @{ $self->{transmission} } ) {
if ( $seg->tag eq 'UNH' ) {
$in_msg = 1;
push @{$msg}, $seg;
}
elsif ( $seg->tag eq 'UNT' ) {
$in_msg = 0;
if ( @{$msg} ) {
push @{$msg_arr}, Koha::Edifact::Message->new($msg);
$msg = [];
}
}
elsif ($in_msg) {
push @{$msg}, $seg;
}
}
return $msg_arr;
}
#
# internal parsing routines used in _init
#
sub service_string_advice {
my $ssa = shift;
# At present this just validates that the ssa
# is standard Edifact
# TBD reset the seps if non standard
if ( $ssa ne q{:+.? '} ) {
carp " Non standard Service String Advice [$ssa]";
return;
}
# else use default separators
return 1;
}
sub segmentize {
my $raw = shift;
# In practice edifact uses latin-1 but check
# Transport now converts to utf-8 on ingest
# Do not convert here
#my $char_set = 'iso-8859-1';
#if ( $raw =~ m/^UNB[+]UNO(.)/ ) {
# $char_set = msgcharset($1);
#}
#from_to( $raw, $char_set, 'utf8' );
my $re = qr{
(?> # dont backtrack into this group
[?]. # either the escape character
# followed by any other character
| # or
[^'?] # a character that is neither escape
# nor split
)+
}x;
my @segmented;
while ( $raw =~ /($re)/g ) {
push @segmented, Koha::Edifact::Segment->new( { seg_string => $1 } );
}
return \@segmented;
}
sub msgcharset {
my $code = shift;
if ( $code =~ m/^[^ABCDEF]$/ ) {
$code = 'default';
}
my %encoding_map = (
A => 'ascii',
B => 'ascii',
C => 'iso-8859-1',
D => 'iso-8859-1',
E => 'iso-8859-1',
F => 'iso-8859-1',
default => 'iso-8859-1',
);
return $encoding_map{$code};
}
1;
__END__
=head1 NAME
Edifact - Edifact message handler
=head1 DESCRIPTION
Koha module for parsing Edifact messages
=head1 SUBROUTINES
=head2 new
my $e = Koha::Edifact->new( { filename => 'myfilename' } );
or
my $e = Koha::Edifact->new( { transmission => $msg_variable } );
instantiate the Edifact parser, requires either to be passed an in-memory
edifact message as transmission or a filename which it will read on creation
=head2 interchange_header
will return the data in the header field designated by the parameter
specified. Valid parameters are: 'sender', 'recipient', 'datetime',
'interchange_control_reference', and 'application_reference'
=head2 interchange_trailer
called either with the string 'interchange_control_count' or
'interchange_control_reference' will return the corresponding field from
the interchange trailer
=head2 new_data_iterator
Sets the object's data_iterator to point to the UNH segment
=head2 next_segment
Returns the next segment pointed to by the data_iterator. Increments the
data_iterator member or destroys it if segment UNZ has been reached
=head2 get_transmission
This method is useful in debugg:ing. Call on an Edifact object
it returns the object's transmission member
=head2 message_type
return the object's message type
=head2 message_array
return an array of Message objects contained in the Edifact transmission
=head1 Internal Methods
=head2 _init
Called by the constructor to do the parsing of the transmission
=head2 service_string_advice
Examines the Service String Advice returns 1 if the default separartors are in use
undef otherwise
=head2 segmentize
takes a raw Edifact message and returns a reference to an array of
its segments
=head2 msgcharset
Return the character set the message was encoded in. The default is iso-8859-1
We preserve this info but will have converted to utf-8 on ingest
=head1 AUTHOR
Colin Campbell <colin.campbell@ptfs-europe.com>
=head1 COPYRIGHT
Copyright 2014,2015, PTFS-Europe Ltd
This program is free software, You may redistribute it under
under the terms of the GNU General Public License
=cut