From f6a5960f83d02d3b97f2f49aba65c8b418637856 Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Mon, 1 Feb 2021 14:50:52 +0100 Subject: [PATCH] Bug 27569: Fix CSV export for biblio frameworks The CSV import of the biblio frameworks is broken. A change in LibreOffice resulted in CSV not formatted how the C4::ImportExportFramework code is expected. This code is quite broken, it expects all the CSV cells to be quoted. The "add tests" patch shows the different between what we expected before this patch, and what is the new version generated by LibreOffice. Test plan: Export a biblio framework in CSV Open it with LibreOffice and save it undef a different name Create a new biblio framework and use the new file to construct it Compare the 2 frameworks and make sure they are identical Signed-off-by: Ron Houk Signed-off-by: Katrin Fischer Signed-off-by: Jonathan Druart (cherry picked from commit ad5de74cbc43e500c34ae6fa50958c6848d1586f) Signed-off-by: Fridolin Somers (cherry picked from commit 69ff54b1945142da58f73a80bb3d2890339013b6) Signed-off-by: Andrew Fuerste-Henry (cherry picked from commit b8410536dcdbb772fe593b2c3cf7a3985437c977) Signed-off-by: Victor Grousset/tuxayo --- C4/ImportExportFramework.pm | 107 +++++++++++++++++------------------- 1 file changed, 51 insertions(+), 56 deletions(-) diff --git a/C4/ImportExportFramework.pm b/C4/ImportExportFramework.pm index 8886ff30cf..2350cfa446 100644 --- a/C4/ImportExportFramework.pm +++ b/C4/ImportExportFramework.pm @@ -23,6 +23,8 @@ use XML::LibXML; use XML::LibXML::XPathContext; use Digest::MD5 qw(); use POSIX qw(strftime); +use Text::CSV_XS; +use List::MoreUtils qw(indexes); use C4::Context; use C4::Debug; @@ -927,70 +929,63 @@ sub _import_table_csv my $numFields = @$fields; my $fieldsNameRead = 0; my @arrData; - my ($fieldsStr, $dataStr, $updateStr); + my ($fieldsStr, $dataStr, $updateStr, @empty_indexes); my @fieldsPK = @$PKArray; shift @fieldsPK; my $ok = 0; - my $numRow = 0; my $pos = 0; - while (<$dom>) { - $row = $_; - # Check whether the line has an unfinished field, i.e., a field with CR/LF in its data - if ($row =~ /,"[^"]*[\r\n]+$/ || $row =~ /^[^"]+[\r\n]+$/) { - $row =~ s/[\r\n]+$//; - $partialRow .= $row; - next; - } - if ($partialRow) { - $row = $partialRow . $row; - $partialRow = ''; - } - # Line OK, process it - if ($row =~ /(?:".*?",?)+/) { - @arrData = split('","', $row); - $arrData[0] = substr($arrData[0], 1) if ($arrData[0] =~ /^"/); - $arrData[$#arrData] =~ s/[\r\n]+$//; - chop $arrData[$#arrData] if ($arrData[$#arrData] =~ /"$/); - if (@arrData) { - if ($arrData[0] eq '#-#' && $arrData[$#arrData] eq '#-#') { - # Change of table with separators #-# - return 1; - } elsif ($fieldsNameRead && $arrData[0] eq 'tagfield') { - # Change of table because we begin with field name with former field names read - seek($dom, $pos, 0); - return 1; - } - if (!$fieldsNameRead) { - # New table, we read the field names - $fieldsNameRead = 1; - $fields = [@arrData]; - $fieldsStr = join(',', @$fields); - $dataStr = ''; - map { $dataStr .= '?,';} @$fields; - chop($dataStr) if ($dataStr); - $updateStr = ''; - map { $updateStr .= $_ . '=?,';} @$fields; - chop($updateStr) if ($updateStr); - } else { - # Read data - my $j = 0; - my %dataFields = (); - for (@arrData) { - if ($fields->[$j] eq 'frameworkcode' && $_ ne $frameworkcode) { - $dataFields{$fields->[$j]} = $frameworkcode; - $arrData[$j] = $frameworkcode; - } else { - $dataFields{$fields->[$j]} = $_; - } - $j++ + my $csv = Text::CSV_XS->new ({ binary => 1 }); + while ( my $row = $csv->getline($dom) ) { + my @fields = @$row; + @arrData = @fields; + next if scalar @arrData == grep { $_ eq '' } @arrData; # Emtpy lines + #$arrData[0] = substr($arrData[0], 1) if ($arrData[0] =~ /^"/); + #$arrData[$#arrData] =~ s/[\r\n]+$//; + #chop $arrData[$#arrData] if ($arrData[$#arrData] =~ /"$/); + if (@arrData) { + if ($arrData[0] eq '#-#' && $arrData[$#arrData] eq '#-#') { + # Change of table with separators #-# + return 1; + } elsif ($fieldsNameRead && $arrData[0] eq 'tagfield') { + # Change of table because we begin with field name with former field names read + seek($dom, $pos, 0); + return 1; + } + if (!$fieldsNameRead) { + # New table, we read the field names + $fieldsNameRead = 1; + $fields = [@arrData]; + my $non_empty_fields = [ grep { $_ ne '' } @$fields ]; + @empty_indexes = indexes { $_ eq '' } @$fields; + $fieldsStr = join(',', @$non_empty_fields); + $dataStr = ''; + map { $dataStr .= '?,';} @$non_empty_fields; + chop($dataStr) if ($dataStr); + $updateStr = ''; + map { $updateStr .= $_ . '=?,';} @$non_empty_fields; + chop($updateStr) if ($updateStr); + } else { + # Read data + my $j = 0; + my %dataFields = (); + my @values; + for my $value (@arrData) { + if ( grep { $_ == $j } @empty_indexes ) { + # empty field + } elsif ($fields->[$j] eq 'frameworkcode' && $value ne $frameworkcode) { + $dataFields{$fields->[$j]} = $frameworkcode; + push @values, $frameworkcode; + } else { + $dataFields{$fields->[$j]} = $value; + push @values, $value; } - $ok = _processRow_DB($dbh, $table, $fieldsStr, $dataStr, $updateStr, \@arrData, \%dataFields, $PKArray, \@fieldsPK, $fields2Delete); + $j++ } - $pos = tell($dom); + $ok = _processRow_DB($dbh, $table, $fieldsStr, $dataStr, $updateStr, \@values, \%dataFields, $PKArray, \@fieldsPK, $fields2Delete); } - @arrData = (); + $pos = tell($dom); } - $numRow++; + @arrData = (); } return $ok; }#_import_table_csv -- 2.39.5