Koha/Koha/Edifact.pm


								package Koha::Edifact;


								# Copyright 2014,2015 PTFS-Europe Ltd

								#

								# This file is part of Koha.

								#

								# Koha is free software; you can redistribute it and/or modify it

								# under the terms of the GNU General Public License as published by

								# the Free Software Foundation; either version 3 of the License, or

								# (at your option) any later version.

								#

								# Koha is distributed in the hope that it will be useful, but

								# WITHOUT ANY WARRANTY; without even the implied warranty of

								# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

								# GNU General Public License for more details.

								#

								# You should have received a copy of the GNU General Public License

								# along with Koha; if not, see <http://www.gnu.org/licenses>.


								use strict;

								use warnings;

								use File::Slurp;

								use Carp;

								use Encode qw( from_to );

								use Koha::Edifact::Segment;

								use Koha::Edifact::Message;


								my $separator = {

								    component => q{\:},

								    data      => q{\+},

								    decimal   => q{.},

								    release   => q{\?},

								    reserved  => q{ },

								    segment   => q{\'},

								};


								sub new {

								    my ( $class, $param_hashref ) = @_;

								    my $transmission;

								    my $self = ();


								    if ( $param_hashref->{filename} ) {

								        if ( $param_hashref->{transmission} ) {

								            carp

								"Cannot instantiate $class : both filename and transmission passed";

								            return;

								        }

								        $transmission = read_file( $param_hashref->{filename} );

								    }

								    else {

								        $transmission = $param_hashref->{transmission};

								    }

								    $self->{transmission} = _init($transmission);


								    bless $self, $class;

								    return $self;

								}


								sub interchange_header {

								    my ( $self, $field ) = @_;


								    my %element = (

								        sender                        => 1,

								        recipient                     => 2,

								        datetime                      => 3,

								        interchange_control_reference => 4,

								        application_reference         => 6,

								    );

								    if ( !exists $element{$field} ) {

								        carp "No interchange header field $field available";

								        return;

								    }

								    my $data = $self->{transmission}->[0]->elem( $element{$field} );

								    return $data;

								}


								sub interchange_trailer {

								    my ( $self, $field ) = @_;

								    my $trailer = $self->{transmission}->[-1];

								    if ( $field eq 'interchange_control_count' ) {

								        return $trailer->elem(0);

								    }

								    elsif ( $field eq 'interchange_control_reference' ) {

								        return $trailer->elem(1);

								    }

								    carp "Trailer field $field not recognized";

								    return;

								}


								sub new_data_iterator {

								    my $self   = shift;

								    my $offset = 0;

								    while ( $self->{transmission}->[$offset]->tag() ne 'UNH' ) {

								        ++$offset;

								        if ( $offset == @{ $self->{transmission} } ) {

								            carp 'Cannot find message start';

								            return;

								        }

								    }

								    $self->{data_iterator} = $offset;

								    return 1;

								}


								sub next_segment {

								    my $self = shift;

								    if ( defined $self->{data_iterator} ) {

								        my $seg = $self->{transmission}->[ $self->{data_iterator} ];

								        if ( $seg->tag eq 'UNH' ) {


								            $self->{msg_type} = $seg->elem( 1, 0 );

								        }

								        elsif ( $seg->tag eq 'LIN' ) {

								            $self->{msg_type} = 'detail';

								        }


								        if ( $seg->tag ne 'UNZ' ) {

								            $self->{data_iterator}++;

								        }

								        else {

								            $self->{data_iterator} = undef;

								        }

								        return $seg;

								    }

								    return;

								}


								# for debugging return whole transmission

								sub get_transmission {

								    my $self = shift;


								    return $self->{transmission};

								}


								sub message_type {

								    my $self = shift;

								    return $self->{msg_type};

								}


								sub _init {

								    my $msg = shift;

								    if ( !$msg ) {

								        return;

								    }

								    if ( $msg =~ s/^UNA(.{6})// ) {

								        if ( service_string_advice($1) ) {

								            return segmentize($msg);


								        }

								        return;

								    }

								    else {

								        my $s = substr $msg, 10;

								        croak "File does not start with a Service string advice :$s";

								    }

								}


								# return an array of Message objects

								sub message_array {

								    my $self = shift;


								    # return an array of array_refs 1 ref to a message

								    my $msg_arr = [];

								    my $msg     = [];

								    my $in_msg  = 0;

								    foreach my $seg ( @{ $self->{transmission} } ) {

								        if ( $seg->tag eq 'UNH' ) {

								            $in_msg = 1;

								            push @{$msg}, $seg;

								        }

								        elsif ( $seg->tag eq 'UNT' ) {

								            $in_msg = 0;

								            if ( @{$msg} ) {

								                push @{$msg_arr}, Koha::Edifact::Message->new($msg);

								                $msg = [];

								            }

								        }

								        elsif ($in_msg) {

								            push @{$msg}, $seg;

								        }

								    }

								    return $msg_arr;

								}


								#

								# internal parsing routines used in _init

								#

								sub service_string_advice {

								    my $ssa = shift;


								    # At present this just validates that the ssa

								    # is standard Edifact

								    # TBD reset the seps if non standard

								    if ( $ssa ne q{:+.? '} ) {

								        carp " Non standard Service String Advice [$ssa]";

								        return;

								    }


								    # else use default separators

								    return 1;

								}


								sub segmentize {

								    my $raw = shift;


								    # In practice edifact uses latin-1 but check

								    # Transport now converts to utf-8 on ingest

								    # Do not convert here

								    #my $char_set = 'iso-8859-1';

								    #if ( $raw =~ m/^UNB[+]UNO(.)/ ) {

								    #    $char_set = msgcharset($1);

								    #}

								    #from_to( $raw, $char_set, 'utf8' );


								    my $re = qr{

								(?>         # dont backtrack into this group

								    [?].     # either the escape character

								            # followed by any other character

								     |      # or

								     [^'?]   # a character that is neither escape

								             # nor split

								             )+

								}x;

								    my @segmented;

								    while ( $raw =~ /($re)/g ) {

								        push @segmented, Koha::Edifact::Segment->new( { seg_string => $1 } );

								    }

								    return \@segmented;

								}


								sub msgcharset {

								    my $code = shift;

								    if ( $code =~ m/^[^ABCDEF]$/ ) {

								        $code = 'default';

								    }

								    my %encoding_map = (

								        A       => 'ascii',

								        B       => 'ascii',

								        C       => 'iso-8859-1',

								        D       => 'iso-8859-1',

								        E       => 'iso-8859-1',

								        F       => 'iso-8859-1',

								        default => 'iso-8859-1',

								    );

								    return $encoding_map{$code};

								}


								1;

								__END__


								=head1 NAME


								Edifact - Edifact message handler


								=head1 DESCRIPTION


								   Koha module for parsing Edifact messages


								=head1 SUBROUTINES


								=head2 new


								     my $e = Koha::Edifact->new( { filename => 'myfilename' } );

								     or

								     my $e = Koha::Edifact->new( { transmission => $msg_variable } );


								     instantiate the Edifact parser, requires either to be passed an in-memory

								     edifact message as transmission or a filename which it will read on creation


								=head2 interchange_header


								     will return the data in the header field designated by the parameter

								     specified. Valid parameters are: 'sender', 'recipient', 'datetime',

								    'interchange_control_reference', and 'application_reference'


								=head2 interchange_trailer


								     called either with the string 'interchange_control_count' or

								     'interchange_control_reference' will return the corresponding field from

								     the interchange trailer


								=head2 new_data_iterator


								     Sets the object's data_iterator to point to the UNH segment


								=head2 next_segment


								     Returns the next segment pointed to by the data_iterator. Increments the

								     data_iterator member or destroys it if segment UNZ has been reached


								=head2 get_transmission


								     This method is useful in debugg:ing. Call on an Edifact object

								     it returns the object's transmission member


								=head2 message_type


								     return the object's message type


								=head2 message_array


								     return an array of Message objects contained in the Edifact transmission


								=head1 Internal Methods


								=head2 _init


								  Called by the constructor to do the parsing of the transmission


								=head2 service_string_advice


								  Examines the Service String Advice returns 1 if the default separartors are in use

								  undef otherwise


								=head2 segmentize


								   takes a raw Edifact message and returns a reference to an array of

								   its segments


								=head2 msgcharset


								    Return the character set the message was encoded in. The default is iso-8859-1


								    We preserve this info but will have converted to utf-8 on ingest


								=head1 AUTHOR


								   Colin Campbell <colin.campbell@ptfs-europe.com>


								=head1 COPYRIGHT


								   Copyright 2014,2015, PTFS-Europe Ltd

								   This program is free software, You may redistribute it under

								   under the terms of the GNU General Public License


								=cut