Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions src/AmiLoDService.php
Original file line number Diff line number Diff line change
Expand Up @@ -571,29 +571,6 @@ public function invokeCustomLoD(string $query, string $lod_custom_lod_id):array
return $results_processed;
}


/**
* Checks if a string is valid JSON
*
* @param $string
*
* @return bool
*/
public function isJson($string) {
json_decode($string);
return json_last_error() === JSON_ERROR_NONE;
}

/**
* Helper function that negates ::isJson.
* @param $string
*
* @return bool
*/
public function isNotJson($string) {
return !$this->isJson($string);
}

public function getCustomLoDEndpoints($as_arguments = FALSE) {
$active_plugins = [];
/* @var $plugin_config_entities \Drupal\webform_strawberryfield\Entity\LoDendpointEntity[] */
Expand Down
203 changes: 159 additions & 44 deletions src/AmiUtilityService.php
Original file line number Diff line number Diff line change
Expand Up @@ -661,9 +661,9 @@ public function retrieve_remote_file(
* @param \Drupal\file\Entity\File $zip_file
* A Zip file with that may contain the $uri
*
* @return mixed
* @return false|string
* One of these possibilities:
* - If it succeeds an managed file object
* - If it succeeds a Path to a managed file object
* - If it fails or NULL, FALSE.
*/
public function retrieve_fromzip_file($uri, $destination = NULL, $replace = FileExists::Rename, File $zip_file = NULL) {
Expand Down Expand Up @@ -1088,7 +1088,6 @@ public function csv_touch(string $filename = NULL, ?string $subpath = NULL, bool
return $file->id();
}


/**
* Creates an CSV from array and returns file.
*
Expand All @@ -1102,11 +1101,14 @@ public function csv_touch(string $filename = NULL, ?string $subpath = NULL, bool
* @param boolean $auto_uuid
* Defines if we are going to generate UUIDs when not valid/not present
* Or leave the $uuid_key field as it is and let this fail/if later.
* @param bool $permanent
* @param bool $escape_character
* If used, CSV might not end being RFC 4180 compliant.
* @param string $logger_channel
*
* @return int|string|null
* @throws \Drupal\Core\Entity\EntityStorageException
*/
public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE, $permanent = TRUE, $logger_channel = 'ami') {
public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE, $permanent = TRUE, bool $escape_character = FALSE, $logger_channel = 'ami') {

//$temporary_directory = $this->fileSystem->getTempDirectory();
// We should be allowing downloads for this from temp
Expand Down Expand Up @@ -1160,9 +1162,12 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE
if ($haskey === FALSE) {
array_unshift($data['headers'], $uuid_key);
}

$fh->fputcsv($data['headers']);

if ($escape_character) {
$fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "\\");
}
else {
$fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "");
}
foreach ($data['data'] as $row) {
if ($haskey === FALSE) {
array_unshift($row, $uuid_key);
Expand All @@ -1185,8 +1190,13 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE
}
}
}
if ($escape_character) {
$fh->fputcsv($row, separator: ',', enclosure: '"', escape: "\\");
}
else {
$fh->fputcsv($row, separator: ',', enclosure: '"', escape: "");
}

$fh->fputcsv($row);
}
// PHP Bug! This should happen automatically
clearstatcache(TRUE, $url);
Expand All @@ -1213,7 +1223,6 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE
return $file->id();
}


/**
* Appends CSV from array and returns file.
*
Expand All @@ -1224,20 +1233,19 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE
*
* @param \Drupal\file\Entity\File $file
*
* @param string|null $uuid_key
* @param string $uuid_key
* IF NULL then no attempt of using UUIDS will be made.
* Needed for LoD Reconciling CSVs
* @param bool $append_header
*
* @param bool $escape_characters
* Defaults to internal PHP mechanism for escaping characters (a "/")
* Set to FALSE if you are passing JSON encoded strings into cells.
* NOTE: Make sure you also disable it IF reading back from files generated through this
* @param bool $escape_character
* If used, CSV might not end being RFC 4180 compliant.
* @param bool $auto_uuid
*
* @return int|string|null
* @throws \Drupal\Core\Entity\EntityStorageException
*/
public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE, $escape_characters = TRUE, $auto_uuid = TRUE) {
public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE, bool $escape_character = TRUE, $auto_uuid = TRUE) {

$wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri());
if (!$wrapper) {
Expand All @@ -1259,7 +1267,12 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo
}
}
if ($append_header) {
$fh->fputcsv($data['headers']);
if ($escape_character) {
$fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "\\");
}
else {
$fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "");
}
}

foreach ($data['data'] as $row) {
Expand All @@ -1282,11 +1295,11 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo
}
}
}
if ($escape_characters) {
$fh->fputcsv($row);
if ($escape_character) {
$fh->fputcsv($row, separator: ',', enclosure: '"', escape: "\\");
}
else {
$fh->fputcsv($row, ',', '"', "");
$fh->fputcsv($row, separator: ',', enclosure: '"', escape: "");
}
}
// PHP Bug! This should happen automatically
Expand All @@ -1310,32 +1323,32 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo
* @param bool $always_include_header
* Always return header even with an offset.
*
* @param bool $escape_characters
*
* @param bool $escape_character
* If used, CSV might not end being RFC 4180 compliant.
* @return array|null
* Returning array will be in this form:
* 'headers' => $rowHeaders_utf8 or [] if $always_include_header == FALSE
* 'data' => $table,
* 'totalrows' => $maxRow,
*/
public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE, bool $escape_characters = TRUE) {
public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE, bool $escape_character = FALSE) {
// 1.6.0: wrapper around this function now that we moved it to strawberryfield so webform module does not depend on AMI for CSV reading
// This was needed bc if not we would have an undeclared circular dependency/or would have to change all the code (many parts)
// there this service was used to call csv_read(). Only thing new is that the logging/caller_module is used for logging now.
return $this->strawberryfieldUtility->csv_read($file, $offset, $count, $always_include_header, $escape_characters, 'ami');
return $this->strawberryfieldUtility->csv_read($file, $offset, $count, $always_include_header, $escape_character, 'ami');
}


/**
* Removes columns from an existing CSV.
*
* @param \Drupal\file\Entity\File $file
* @param array $headerwithdata
*
* @param bool $escape_character
* If used, CSV might not end being RFC 4180 compliant.
* @return int|mixed|string|null
* @throws \Drupal\Core\Entity\EntityStorageException
*/
public function csv_clean(File $file, array $headerwithdata = []) {
public function csv_clean(File $file, array $headerwithdata = [], bool $escape_character = FALSE) {
$wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri());
if (!$wrapper) {
return NULL;
Expand Down Expand Up @@ -1382,7 +1395,12 @@ public function csv_clean(File $file, array $headerwithdata = []) {
unset($data[$key]);
}
$data = array_values($data);
$spltmp->fputcsv($data);
if ($escape_character) {
$spltmp->fputcsv($data, separator: ',', enclosure: '"', escape: "\\");
}
else {
$spltmp->fputcsv($data, separator: ',', enclosure: '"', escape: "");
}
$i++;
}
$size = $spltmp->getSize();
Expand All @@ -1399,11 +1417,11 @@ public function csv_clean(File $file, array $headerwithdata = []) {
/**
* @param \Drupal\file\Entity\File $file
*
* @param bool $escape_characters
*
* @param bool $escape_character
* If used, CSV might not end being RFC 4180 compliant.
* @return int
*/
public function csv_count(File $file, $escape_characters = TRUE) {
public function csv_count(File $file, $escape_character = FALSE) {
$wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri());
if (!$wrapper) {
return NULL;
Expand All @@ -1419,11 +1437,11 @@ public function csv_count(File $file, $escape_characters = TRUE) {
SplFileObject::DROP_NEW_LINE
);
while (!$spl->eof()) {
if (!$escape_characters) {
if (!$escape_character) {
$spl->fgetcsv( ',', '"', "");
}
else {
$spl->fgetcsv();
$spl->fgetcsv( ',', '"', "\\");
}
$key = $spl->key();
}
Expand Down Expand Up @@ -1536,15 +1554,17 @@ public function provideDifferentColumnValuesFromCSV(File $file, array $columns):
$data = $this->csv_read($file);
$column_keys = $data['headers'] ?? [];
$alldifferent = [];
$alldifferent_json = [];
foreach ($columns as $column) {
$column_index = array_search($column, $column_keys);
if ($column_index !== FALSE) {
// New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive
// but also more precise.
$alldifferent[$column] = $this->getDifferentValuesfromColumnSplit($data,
$column_index);
if (empty($alldifferent[$column])) {
$alldifferent[$column] = $this->getDifferentValuesfromColumnJSON($data,
$column_index);
}
$alldifferent_json[$column] = $this->getDifferentValuesfromColumnJSON($data,
$column_index);
$alldifferent[$column] = array_unique(array_merge($alldifferent[$column], $alldifferent_json[$column]));
}
}
return $alldifferent;
Expand Down Expand Up @@ -2601,10 +2621,12 @@ public function processMetadataDisplay(\stdClass $data, array $additional_contex
$data_to_clean['data'][0] = [$context['data'][$source_column]];
$labels = $this->getDifferentValuesfromColumnSplit($data_to_clean,
0);
if (empty($labels)) {
$labels = $this->getDifferentValuesfromColumnJSON($data_to_clean,
0);
}
// New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive
// but also more precise. The Preview does the same now
$labels_json= $this->getDifferentValuesfromColumnJSON($data_to_clean,
0);
// WE merge both results and make them unique
$labels = array_unique(array_merge($labels, $labels_json));
foreach ($labels as $label) {
$lod_for_label = $this->AmiLoDService->getKeyValuePerAmiSet($label, $set_id);
if (is_array($lod_for_label) && count($lod_for_label) > 0) {
Expand Down Expand Up @@ -2868,15 +2890,27 @@ public function getDifferentValuesfromColumnJSON(array $data, int $key, array $v


/**
* Checks if a string is valid JSON
* Checks if a string is valid RFC JSON (object or array)
* Skips if its a valid JSON-y-fable native, like a pure string
* or a number
*
* @param $string
*
* @return bool
*/
public function isJson($string) {
json_decode($string);
return json_last_error() === JSON_ERROR_NONE;
try {
$decoded = json_decode($string, TRUE, 512, JSON_THROW_ON_ERROR);
if (is_array($decoded) ) {
return TRUE;
}
else {
return FALSE;
}
}
catch (\Throwable $e) {
return FALSE;
}
}

/**
Expand Down Expand Up @@ -2964,4 +2998,85 @@ public static function invalidateAmiSetDeleteAdosAccessCache(EntityInterface $en
\Drupal::cache()->invalidate($cache_id);
}

/**
* Checks if a CSV has escaped special characters
*
* Old PHP defaults used "\" as escaping mechanis
* Which can break amongst other JSON encoded ROWS
* Starting with AMI 1.1.0 and 2.1.0 we read/write CSV unescaped
* following RFC 4180 to ensure a safe round trip and also
* Excel and Google Sheets editing and export compatibility
*
* @param \Drupal\file\Entity\File $file
*
* @return bool
* TRUE means it has escaping or some other sanity issue
* FALSE means all is good
*/
public function csv_check_escaped(File $file): bool {
$needs_review = FALSE;
$wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri());
if (!$wrapper) {
return FALSE;
}
$url = $wrapper->getUri();
$fh = new \SplFileObject($url, 'r');
if (!$fh) {
$this->messenger()->addError(
$this->t('Error reading the CSV file!.')
);
return FALSE;
}
// Instead or using PHP's CSV read line, we will get the complete lines first
// Then decode and apply a temporary fix to output unescaped.
// We compare the original read with the unescaped generation using md5
// ,and we also compare that each ROW has exactly the same columns
// as the header.
// In this case, since we are only checking, we bail out on the first encountered
// abnormality. Of course if all is OK we have to still iterate over all of them
$i = 0;
$header_count = FALSE;
while (!$fh->eof()) {
if ($i == 0) {
// Read the header unescaped. We do not support headers with double quotes
$header = $fh->fgetcsv(',', '"', "");
$header_count = is_array($header) ? count($header) : FALSE;
$needs_review = !($header_count);
$i++;
}
elseif ($header_count) {
$row_string = $fh->fgets();
if (!empty($row_string)) {
$row_unescaped = str_getcsv($row_string, ',', '"', "");
// Try replacing \" with \""
// but only if:
// - not before a comma.
// - 2 x double quote.
// - or a double quote followed by a space.
$pattern = '}\\\\"(?!(,|""|"\s))}';
$replacement = '\""';
$row_string_replaced = preg_replace($pattern, $replacement, $row_string);
$row_unescaped_after_replace = str_getcsv($row_string_replaced, ',', '"', "");
if (count($row_unescaped_after_replace) != $header_count) {
$needs_review = TRUE;
break;
}
if (md5(implode(";", $row_unescaped)) != md5(implode(";", $row_unescaped_after_replace))) {
$needs_review = TRUE;
break;
}
else {
$needs_review = FALSE;
}
}
}
else {
// $header_count is FALSE, means the CSV is malformed.
$needs_review = TRUE;
}
}
// Closes the SPL File Object.
$fh = NULL;
return $needs_review;
}
}
Loading