From f6a5960f83d02d3b97f2f49aba65c8b418637856 Mon Sep 17 00:00:00 2001
From: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
Date: Mon, 1 Feb 2021 14:50:52 +0100
Subject: [PATCH] Bug 27569: Fix CSV export for biblio frameworks

The CSV import of the biblio frameworks is broken.
A change in LibreOffice resulted in CSV not formatted how the
C4::ImportExportFramework code is expected.
This code is quite broken, it expects all the CSV cells to be quoted.

The "add tests" patch shows the different between what we expected
before this patch, and what is the new version generated by LibreOffice.

Test plan:
Export a biblio framework in CSV
Open it with LibreOffice and save it undef a different name
Create a new biblio framework and use the new file to construct it
Compare the 2 frameworks and make sure they are identical

Signed-off-by: Ron Houk <rhouk@ottumwapubliclibrary.org>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
(cherry picked from commit ad5de74cbc43e500c34ae6fa50958c6848d1586f)
Signed-off-by: Fridolin Somers <fridolin.somers@biblibre.com>
(cherry picked from commit 69ff54b1945142da58f73a80bb3d2890339013b6)

Signed-off-by: Andrew Fuerste-Henry <andrew@bywatersolutions.com>
(cherry picked from commit b8410536dcdbb772fe593b2c3cf7a3985437c977)
Signed-off-by: Victor Grousset/tuxayo <victor@tuxayo.net>
---
 C4/ImportExportFramework.pm | 107 +++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 56 deletions(-)

diff --git a/C4/ImportExportFramework.pm b/C4/ImportExportFramework.pm
index 8886ff30cf..2350cfa446 100644
--- a/C4/ImportExportFramework.pm
+++ b/C4/ImportExportFramework.pm
@@ -23,6 +23,8 @@ use XML::LibXML;
 use XML::LibXML::XPathContext;
 use Digest::MD5 qw();
 use POSIX qw(strftime);
+use Text::CSV_XS;
+use List::MoreUtils qw(indexes);
 
 use C4::Context;
 use C4::Debug;
@@ -927,70 +929,63 @@ sub _import_table_csv
     my $numFields = @$fields;
     my $fieldsNameRead = 0;
     my @arrData;
-    my ($fieldsStr, $dataStr, $updateStr);
+    my ($fieldsStr, $dataStr, $updateStr, @empty_indexes);
     my @fieldsPK = @$PKArray;
     shift @fieldsPK;
     my $ok = 0;
-    my $numRow = 0;
     my $pos = 0;
-    while (<$dom>) {
-        $row = $_;
-        # Check whether the line has an unfinished field, i.e., a field with CR/LF in its data
-        if ($row =~ /,"[^"]*[\r\n]+$/ || $row =~ /^[^"]+[\r\n]+$/) {
-            $row =~ s/[\r\n]+$//;
-            $partialRow .= $row;
-            next;
-        }
-        if ($partialRow) {
-            $row = $partialRow . $row;
-            $partialRow = '';
-        }
-        # Line OK, process it
-        if ($row =~ /(?:".*?",?)+/) {
-            @arrData = split('","', $row);
-            $arrData[0] = substr($arrData[0], 1) if ($arrData[0] =~ /^"/);
-            $arrData[$#arrData] =~ s/[\r\n]+$//;
-            chop $arrData[$#arrData] if ($arrData[$#arrData] =~ /"$/);
-            if (@arrData) {
-                if ($arrData[0] eq '#-#' && $arrData[$#arrData] eq '#-#') {
-                    # Change of table with separators #-#
-                    return 1;
-                } elsif ($fieldsNameRead && $arrData[0] eq 'tagfield') {
-                    # Change of table because we begin with field name with former field names read
-                    seek($dom, $pos, 0);
-                    return 1;
-                }
-                if (!$fieldsNameRead) {
-                    # New table, we read the field names
-                    $fieldsNameRead = 1;
-                    $fields = [@arrData];
-                    $fieldsStr = join(',', @$fields);
-                    $dataStr = '';
-                    map { $dataStr .= '?,';} @$fields;
-                    chop($dataStr) if ($dataStr);
-                    $updateStr = '';
-                    map { $updateStr .= $_ . '=?,';} @$fields;
-                    chop($updateStr) if ($updateStr);
-                } else {
-                    # Read data
-                    my $j = 0;
-                    my %dataFields = ();
-                    for (@arrData) {
-                        if ($fields->[$j] eq 'frameworkcode' && $_ ne $frameworkcode) {
-                            $dataFields{$fields->[$j]} = $frameworkcode;
-                            $arrData[$j] = $frameworkcode;
-                        } else {
-                            $dataFields{$fields->[$j]} = $_;
-                        }
-                        $j++
+    my $csv = Text::CSV_XS->new ({ binary => 1 });
+    while ( my $row = $csv->getline($dom) ) {
+        my @fields = @$row;
+        @arrData = @fields;
+        next if scalar @arrData == grep { $_ eq '' } @arrData; # Emtpy lines
+        #$arrData[0] = substr($arrData[0], 1) if ($arrData[0] =~ /^"/);
+        #$arrData[$#arrData] =~ s/[\r\n]+$//;
+        #chop $arrData[$#arrData] if ($arrData[$#arrData] =~ /"$/);
+        if (@arrData) {
+            if ($arrData[0] eq '#-#' && $arrData[$#arrData] eq '#-#') {
+                # Change of table with separators #-#
+                return 1;
+            } elsif ($fieldsNameRead && $arrData[0] eq 'tagfield') {
+                # Change of table because we begin with field name with former field names read
+                seek($dom, $pos, 0);
+                return 1;
+            }
+            if (!$fieldsNameRead) {
+                # New table, we read the field names
+                $fieldsNameRead = 1;
+                $fields = [@arrData];
+                my $non_empty_fields = [ grep { $_ ne '' } @$fields ];
+                @empty_indexes = indexes { $_ eq '' } @$fields;
+                $fieldsStr = join(',', @$non_empty_fields);
+                $dataStr = '';
+                map { $dataStr .= '?,';} @$non_empty_fields;
+                chop($dataStr) if ($dataStr);
+                $updateStr = '';
+                map { $updateStr .= $_ . '=?,';} @$non_empty_fields;
+                chop($updateStr) if ($updateStr);
+            } else {
+                # Read data
+                my $j = 0;
+                my %dataFields = ();
+                my @values;
+                for my $value (@arrData) {
+                    if ( grep { $_ == $j } @empty_indexes ) {
+                        # empty field
+                    } elsif ($fields->[$j] eq 'frameworkcode' && $value ne $frameworkcode) {
+                        $dataFields{$fields->[$j]} = $frameworkcode;
+                        push @values, $frameworkcode;
+                    } else {
+                        $dataFields{$fields->[$j]} = $value;
+                        push @values, $value;
                     }
-                    $ok = _processRow_DB($dbh, $table, $fieldsStr, $dataStr, $updateStr, \@arrData, \%dataFields, $PKArray, \@fieldsPK, $fields2Delete);
+                    $j++
                 }
-                $pos = tell($dom);
+                $ok = _processRow_DB($dbh, $table, $fieldsStr, $dataStr, $updateStr, \@values, \%dataFields, $PKArray, \@fieldsPK, $fields2Delete);
             }
-            @arrData = ();
+            $pos = tell($dom);
         }
-        $numRow++;
+        @arrData = ();
     }
     return $ok;
 }#_import_table_csv
-- 
2.39.5