Main Koha release repository
https://koha-community.org
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
337 lines
8.1 KiB
337 lines
8.1 KiB
package Koha::Edifact;
|
|
|
|
# Copyright 2014,2015 PTFS-Europe Ltd
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
use strict;
|
|
use warnings;
|
|
use File::Slurp;
|
|
use Carp;
|
|
use Encode qw( from_to );
|
|
use Koha::Edifact::Segment;
|
|
use Koha::Edifact::Message;
|
|
|
|
my $separator = {
|
|
component => q{\:},
|
|
data => q{\+},
|
|
decimal => q{.},
|
|
release => q{\?},
|
|
reserved => q{ },
|
|
segment => q{\'},
|
|
};
|
|
|
|
sub new {
|
|
my ( $class, $param_hashref ) = @_;
|
|
my $transmission;
|
|
my $self = ();
|
|
|
|
if ( $param_hashref->{filename} ) {
|
|
if ( $param_hashref->{transmission} ) {
|
|
carp
|
|
"Cannot instantiate $class : both filename and transmission passed";
|
|
return;
|
|
}
|
|
$transmission = read_file( $param_hashref->{filename} );
|
|
}
|
|
else {
|
|
$transmission = $param_hashref->{transmission};
|
|
}
|
|
$self->{transmission} = _init($transmission);
|
|
|
|
bless $self, $class;
|
|
return $self;
|
|
}
|
|
|
|
sub interchange_header {
|
|
my ( $self, $field ) = @_;
|
|
|
|
my %element = (
|
|
sender => 1,
|
|
recipient => 2,
|
|
datetime => 3,
|
|
interchange_control_reference => 4,
|
|
application_reference => 6,
|
|
);
|
|
if ( !exists $element{$field} ) {
|
|
carp "No interchange header field $field available";
|
|
return;
|
|
}
|
|
my $data = $self->{transmission}->[0]->elem( $element{$field} );
|
|
return $data;
|
|
}
|
|
|
|
sub interchange_trailer {
|
|
my ( $self, $field ) = @_;
|
|
my $trailer = $self->{transmission}->[-1];
|
|
if ( $field eq 'interchange_control_count' ) {
|
|
return $trailer->elem(0);
|
|
}
|
|
elsif ( $field eq 'interchange_control_reference' ) {
|
|
return $trailer->elem(1);
|
|
}
|
|
carp "Trailer field $field not recognized";
|
|
return;
|
|
}
|
|
|
|
sub new_data_iterator {
|
|
my $self = shift;
|
|
my $offset = 0;
|
|
while ( $self->{transmission}->[$offset]->tag() ne 'UNH' ) {
|
|
++$offset;
|
|
if ( $offset == @{ $self->{transmission} } ) {
|
|
carp 'Cannot find message start';
|
|
return;
|
|
}
|
|
}
|
|
$self->{data_iterator} = $offset;
|
|
return 1;
|
|
}
|
|
|
|
sub next_segment {
|
|
my $self = shift;
|
|
if ( defined $self->{data_iterator} ) {
|
|
my $seg = $self->{transmission}->[ $self->{data_iterator} ];
|
|
if ( $seg->tag eq 'UNH' ) {
|
|
|
|
$self->{msg_type} = $seg->elem( 1, 0 );
|
|
}
|
|
elsif ( $seg->tag eq 'LIN' ) {
|
|
$self->{msg_type} = 'detail';
|
|
}
|
|
|
|
if ( $seg->tag ne 'UNZ' ) {
|
|
$self->{data_iterator}++;
|
|
}
|
|
else {
|
|
$self->{data_iterator} = undef;
|
|
}
|
|
return $seg;
|
|
}
|
|
return;
|
|
}
|
|
|
|
# for debugging return whole transmission
|
|
sub get_transmission {
|
|
my $self = shift;
|
|
|
|
return $self->{transmission};
|
|
}
|
|
|
|
sub message_type {
|
|
my $self = shift;
|
|
return $self->{msg_type};
|
|
}
|
|
|
|
sub _init {
|
|
my $msg = shift;
|
|
if ( !$msg ) {
|
|
return;
|
|
}
|
|
if ( $msg =~ s/^UNA(.{6})// ) {
|
|
if ( service_string_advice($1) ) {
|
|
return segmentize($msg);
|
|
|
|
}
|
|
return;
|
|
}
|
|
else {
|
|
my $s = substr $msg, 10;
|
|
croak "File does not start with a Service string advice :$s";
|
|
}
|
|
}
|
|
|
|
# return an array of Message objects
|
|
sub message_array {
|
|
my $self = shift;
|
|
|
|
# return an array of array_refs 1 ref to a message
|
|
my $msg_arr = [];
|
|
my $msg = [];
|
|
my $in_msg = 0;
|
|
foreach my $seg ( @{ $self->{transmission} } ) {
|
|
if ( $seg->tag eq 'UNH' ) {
|
|
$in_msg = 1;
|
|
push @{$msg}, $seg;
|
|
}
|
|
elsif ( $seg->tag eq 'UNT' ) {
|
|
$in_msg = 0;
|
|
if ( @{$msg} ) {
|
|
push @{$msg_arr}, Koha::Edifact::Message->new($msg);
|
|
$msg = [];
|
|
}
|
|
}
|
|
elsif ($in_msg) {
|
|
push @{$msg}, $seg;
|
|
}
|
|
}
|
|
return $msg_arr;
|
|
}
|
|
|
|
#
|
|
# internal parsing routines used in _init
|
|
#
|
|
sub service_string_advice {
|
|
my $ssa = shift;
|
|
|
|
# At present this just validates that the ssa
|
|
# is standard Edifact
|
|
# TBD reset the seps if non standard
|
|
if ( $ssa ne q{:+.? '} ) {
|
|
carp " Non standard Service String Advice [$ssa]";
|
|
return;
|
|
}
|
|
|
|
# else use default separators
|
|
return 1;
|
|
}
|
|
|
|
sub segmentize {
|
|
my $raw = shift;
|
|
|
|
# In practice edifact uses latin-1 but check
|
|
# Transport now converts to utf-8 on ingest
|
|
# Do not convert here
|
|
#my $char_set = 'iso-8859-1';
|
|
#if ( $raw =~ m/^UNB[+]UNO(.)/ ) {
|
|
# $char_set = msgcharset($1);
|
|
#}
|
|
#from_to( $raw, $char_set, 'utf8' );
|
|
|
|
my $re = qr{
|
|
(?> # dont backtrack into this group
|
|
[?]. # either the escape character
|
|
# followed by any other character
|
|
| # or
|
|
[^'?] # a character that is neither escape
|
|
# nor split
|
|
)+
|
|
}x;
|
|
my @segmented;
|
|
while ( $raw =~ /($re)/g ) {
|
|
push @segmented, Koha::Edifact::Segment->new( { seg_string => $1 } );
|
|
}
|
|
return \@segmented;
|
|
}
|
|
|
|
sub msgcharset {
|
|
my $code = shift;
|
|
if ( $code =~ m/^[^ABCDEF]$/ ) {
|
|
$code = 'default';
|
|
}
|
|
my %encoding_map = (
|
|
A => 'ascii',
|
|
B => 'ascii',
|
|
C => 'iso-8859-1',
|
|
D => 'iso-8859-1',
|
|
E => 'iso-8859-1',
|
|
F => 'iso-8859-1',
|
|
default => 'iso-8859-1',
|
|
);
|
|
return $encoding_map{$code};
|
|
}
|
|
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
Edifact - Edifact message handler
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Koha module for parsing Edifact messages
|
|
|
|
=head1 SUBROUTINES
|
|
|
|
=head2 new
|
|
|
|
my $e = Koha::Edifact->new( { filename => 'myfilename' } );
|
|
or
|
|
my $e = Koha::Edifact->new( { transmission => $msg_variable } );
|
|
|
|
instantiate the Edifact parser, requires either to be passed an in-memory
|
|
edifact message as transmission or a filename which it will read on creation
|
|
|
|
=head2 interchange_header
|
|
|
|
will return the data in the header field designated by the parameter
|
|
specified. Valid parameters are: 'sender', 'recipient', 'datetime',
|
|
'interchange_control_reference', and 'application_reference'
|
|
|
|
=head2 interchange_trailer
|
|
|
|
called either with the string 'interchange_control_count' or
|
|
'interchange_control_reference' will return the corresponding field from
|
|
the interchange trailer
|
|
|
|
=head2 new_data_iterator
|
|
|
|
Sets the object's data_iterator to point to the UNH segment
|
|
|
|
=head2 next_segment
|
|
|
|
Returns the next segment pointed to by the data_iterator. Increments the
|
|
data_iterator member or destroys it if segment UNZ has been reached
|
|
|
|
=head2 get_transmission
|
|
|
|
This method is useful in debugg:ing. Call on an Edifact object
|
|
it returns the object's transmission member
|
|
|
|
=head2 message_type
|
|
|
|
return the object's message type
|
|
|
|
=head2 message_array
|
|
|
|
return an array of Message objects contained in the Edifact transmission
|
|
|
|
=head1 Internal Methods
|
|
|
|
=head2 _init
|
|
|
|
Called by the constructor to do the parsing of the transmission
|
|
|
|
=head2 service_string_advice
|
|
|
|
Examines the Service String Advice returns 1 if the default separartors are in use
|
|
undef otherwise
|
|
|
|
=head2 segmentize
|
|
|
|
takes a raw Edifact message and returns a reference to an array of
|
|
its segments
|
|
|
|
=head2 msgcharset
|
|
|
|
Return the character set the message was encoded in. The default is iso-8859-1
|
|
|
|
We preserve this info but will have converted to utf-8 on ingest
|
|
|
|
=head1 AUTHOR
|
|
|
|
Colin Campbell <colin.campbell@ptfs-europe.com>
|
|
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright 2014,2015, PTFS-Europe Ltd
|
|
This program is free software, You may redistribute it under
|
|
under the terms of the GNU General Public License
|
|
|
|
|
|
=cut
|
|
|