diff --git a/src/AmiLoDService.php b/src/AmiLoDService.php index 43509cd..3602eee 100644 --- a/src/AmiLoDService.php +++ b/src/AmiLoDService.php @@ -571,29 +571,6 @@ public function invokeCustomLoD(string $query, string $lod_custom_lod_id):array return $results_processed; } - - /** - * Checks if a string is valid JSON - * - * @param $string - * - * @return bool - */ - public function isJson($string) { - json_decode($string); - return json_last_error() === JSON_ERROR_NONE; - } - - /** - * Helper function that negates ::isJson. - * @param $string - * - * @return bool - */ - public function isNotJson($string) { - return !$this->isJson($string); - } - public function getCustomLoDEndpoints($as_arguments = FALSE) { $active_plugins = []; /* @var $plugin_config_entities \Drupal\webform_strawberryfield\Entity\LoDendpointEntity[] */ diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index b22345d..898c9f5 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -661,9 +661,9 @@ public function retrieve_remote_file( * @param \Drupal\file\Entity\File $zip_file * A Zip file with that may contain the $uri * - * @return mixed + * @return false|string * One of these possibilities: - * - If it succeeds an managed file object + * - If it succeeds a Path to a managed file object * - If it fails or NULL, FALSE. */ public function retrieve_fromzip_file($uri, $destination = NULL, $replace = FileExists::Rename, File $zip_file = NULL) { @@ -1088,7 +1088,6 @@ public function csv_touch(string $filename = NULL, ?string $subpath = NULL, bool return $file->id(); } - /** * Creates an CSV from array and returns file. * @@ -1102,11 +1101,14 @@ public function csv_touch(string $filename = NULL, ?string $subpath = NULL, bool * @param boolean $auto_uuid * Defines if we are going to generate UUIDs when not valid/not present * Or leave the $uuid_key field as it is and let this fail/if later. + * @param bool $permanent + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. + * @param string $logger_channel * * @return int|string|null - * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE, $permanent = TRUE, $logger_channel = 'ami') { + public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE, $permanent = TRUE, bool $escape_character = FALSE, $logger_channel = 'ami') { //$temporary_directory = $this->fileSystem->getTempDirectory(); // We should be allowing downloads for this from temp @@ -1160,9 +1162,12 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE if ($haskey === FALSE) { array_unshift($data['headers'], $uuid_key); } - - $fh->fputcsv($data['headers']); - + if ($escape_character) { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "\\"); + } + else { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: ""); + } foreach ($data['data'] as $row) { if ($haskey === FALSE) { array_unshift($row, $uuid_key); @@ -1185,8 +1190,13 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE } } } + if ($escape_character) { + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: "\\"); + } + else { + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: ""); + } - $fh->fputcsv($row); } // PHP Bug! This should happen automatically clearstatcache(TRUE, $url); @@ -1213,7 +1223,6 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE return $file->id(); } - /** * Appends CSV from array and returns file. * @@ -1224,20 +1233,19 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE * * @param \Drupal\file\Entity\File $file * - * @param string|null $uuid_key + * @param string $uuid_key * IF NULL then no attempt of using UUIDS will be made. * Needed for LoD Reconciling CSVs * @param bool $append_header * - * @param bool $escape_characters - * Defaults to internal PHP mechanism for escaping characters (a "/") - * Set to FALSE if you are passing JSON encoded strings into cells. - * NOTE: Make sure you also disable it IF reading back from files generated through this + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. + * @param bool $auto_uuid * * @return int|string|null * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE, $escape_characters = TRUE, $auto_uuid = TRUE) { + public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE, bool $escape_character = TRUE, $auto_uuid = TRUE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { @@ -1259,7 +1267,12 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } } if ($append_header) { - $fh->fputcsv($data['headers']); + if ($escape_character) { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "\\"); + } + else { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: ""); + } } foreach ($data['data'] as $row) { @@ -1282,11 +1295,11 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } } } - if ($escape_characters) { - $fh->fputcsv($row); + if ($escape_character) { + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: "\\"); } else { - $fh->fputcsv($row, ',', '"', ""); + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: ""); } } // PHP Bug! This should happen automatically @@ -1310,32 +1323,32 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo * @param bool $always_include_header * Always return header even with an offset. * - * @param bool $escape_characters - * + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. * @return array|null * Returning array will be in this form: * 'headers' => $rowHeaders_utf8 or [] if $always_include_header == FALSE * 'data' => $table, * 'totalrows' => $maxRow, */ - public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE, bool $escape_characters = TRUE) { + public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE, bool $escape_character = FALSE) { // 1.6.0: wrapper around this function now that we moved it to strawberryfield so webform module does not depend on AMI for CSV reading // This was needed bc if not we would have an undeclared circular dependency/or would have to change all the code (many parts) // there this service was used to call csv_read(). Only thing new is that the logging/caller_module is used for logging now. - return $this->strawberryfieldUtility->csv_read($file, $offset, $count, $always_include_header, $escape_characters, 'ami'); + return $this->strawberryfieldUtility->csv_read($file, $offset, $count, $always_include_header, $escape_character, 'ami'); } - /** * Removes columns from an existing CSV. * * @param \Drupal\file\Entity\File $file * @param array $headerwithdata - * + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. * @return int|mixed|string|null * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_clean(File $file, array $headerwithdata = []) { + public function csv_clean(File $file, array $headerwithdata = [], bool $escape_character = FALSE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { return NULL; @@ -1382,7 +1395,12 @@ public function csv_clean(File $file, array $headerwithdata = []) { unset($data[$key]); } $data = array_values($data); - $spltmp->fputcsv($data); + if ($escape_character) { + $spltmp->fputcsv($data, separator: ',', enclosure: '"', escape: "\\"); + } + else { + $spltmp->fputcsv($data, separator: ',', enclosure: '"', escape: ""); + } $i++; } $size = $spltmp->getSize(); @@ -1399,11 +1417,11 @@ public function csv_clean(File $file, array $headerwithdata = []) { /** * @param \Drupal\file\Entity\File $file * - * @param bool $escape_characters - * + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. * @return int */ - public function csv_count(File $file, $escape_characters = TRUE) { + public function csv_count(File $file, $escape_character = FALSE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { return NULL; @@ -1419,11 +1437,11 @@ public function csv_count(File $file, $escape_characters = TRUE) { SplFileObject::DROP_NEW_LINE ); while (!$spl->eof()) { - if (!$escape_characters) { + if (!$escape_character) { $spl->fgetcsv( ',', '"', ""); } else { - $spl->fgetcsv(); + $spl->fgetcsv( ',', '"', "\\"); } $key = $spl->key(); } @@ -1536,15 +1554,17 @@ public function provideDifferentColumnValuesFromCSV(File $file, array $columns): $data = $this->csv_read($file); $column_keys = $data['headers'] ?? []; $alldifferent = []; + $alldifferent_json = []; foreach ($columns as $column) { $column_index = array_search($column, $column_keys); if ($column_index !== FALSE) { + // New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive + // but also more precise. $alldifferent[$column] = $this->getDifferentValuesfromColumnSplit($data, $column_index); - if (empty($alldifferent[$column])) { - $alldifferent[$column] = $this->getDifferentValuesfromColumnJSON($data, - $column_index); - } + $alldifferent_json[$column] = $this->getDifferentValuesfromColumnJSON($data, + $column_index); + $alldifferent[$column] = array_unique(array_merge($alldifferent[$column], $alldifferent_json[$column])); } } return $alldifferent; @@ -2601,10 +2621,12 @@ public function processMetadataDisplay(\stdClass $data, array $additional_contex $data_to_clean['data'][0] = [$context['data'][$source_column]]; $labels = $this->getDifferentValuesfromColumnSplit($data_to_clean, 0); - if (empty($labels)) { - $labels = $this->getDifferentValuesfromColumnJSON($data_to_clean, - 0); - } + // New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive + // but also more precise. The Preview does the same now + $labels_json= $this->getDifferentValuesfromColumnJSON($data_to_clean, + 0); + // WE merge both results and make them unique + $labels = array_unique(array_merge($labels, $labels_json)); foreach ($labels as $label) { $lod_for_label = $this->AmiLoDService->getKeyValuePerAmiSet($label, $set_id); if (is_array($lod_for_label) && count($lod_for_label) > 0) { @@ -2868,15 +2890,27 @@ public function getDifferentValuesfromColumnJSON(array $data, int $key, array $v /** - * Checks if a string is valid JSON + * Checks if a string is valid RFC JSON (object or array) + * Skips if its a valid JSON-y-fable native, like a pure string + * or a number * * @param $string * * @return bool */ public function isJson($string) { - json_decode($string); - return json_last_error() === JSON_ERROR_NONE; + try { + $decoded = json_decode($string, TRUE, 512, JSON_THROW_ON_ERROR); + if (is_array($decoded) ) { + return TRUE; + } + else { + return FALSE; + } + } + catch (\Throwable $e) { + return FALSE; + } } /** @@ -2964,4 +2998,85 @@ public static function invalidateAmiSetDeleteAdosAccessCache(EntityInterface $en \Drupal::cache()->invalidate($cache_id); } + /** + * Checks if a CSV has escaped special characters + * + * Old PHP defaults used "\" as escaping mechanis + * Which can break amongst other JSON encoded ROWS + * Starting with AMI 1.1.0 and 2.1.0 we read/write CSV unescaped + * following RFC 4180 to ensure a safe round trip and also + * Excel and Google Sheets editing and export compatibility + * + * @param \Drupal\file\Entity\File $file + * + * @return bool + * TRUE means it has escaping or some other sanity issue + * FALSE means all is good + */ + public function csv_check_escaped(File $file): bool { + $needs_review = FALSE; + $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); + if (!$wrapper) { + return FALSE; + } + $url = $wrapper->getUri(); + $fh = new \SplFileObject($url, 'r'); + if (!$fh) { + $this->messenger()->addError( + $this->t('Error reading the CSV file!.') + ); + return FALSE; + } + // Instead or using PHP's CSV read line, we will get the complete lines first + // Then decode and apply a temporary fix to output unescaped. + // We compare the original read with the unescaped generation using md5 + // ,and we also compare that each ROW has exactly the same columns + // as the header. + // In this case, since we are only checking, we bail out on the first encountered + // abnormality. Of course if all is OK we have to still iterate over all of them + $i = 0; + $header_count = FALSE; + while (!$fh->eof()) { + if ($i == 0) { + // Read the header unescaped. We do not support headers with double quotes + $header = $fh->fgetcsv(',', '"', ""); + $header_count = is_array($header) ? count($header) : FALSE; + $needs_review = !($header_count); + $i++; + } + elseif ($header_count) { + $row_string = $fh->fgets(); + if (!empty($row_string)) { + $row_unescaped = str_getcsv($row_string, ',', '"', ""); + // Try replacing \" with \"" + // but only if: + // - not before a comma. + // - 2 x double quote. + // - or a double quote followed by a space. + $pattern = '}\\\\"(?!(,|""|"\s))}'; + $replacement = '\""'; + $row_string_replaced = preg_replace($pattern, $replacement, $row_string); + $row_unescaped_after_replace = str_getcsv($row_string_replaced, ',', '"', ""); + if (count($row_unescaped_after_replace) != $header_count) { + $needs_review = TRUE; + break; + } + if (md5(implode(";", $row_unescaped)) != md5(implode(";", $row_unescaped_after_replace))) { + $needs_review = TRUE; + break; + } + else { + $needs_review = FALSE; + } + } + } + else { + // $header_count is FALSE, means the CSV is malformed. + $needs_review = TRUE; + } + } + // Closes the SPL File Object. + $fh = NULL; + return $needs_review; + } } diff --git a/src/Controller/AmiRowAutocompleteHandler.php b/src/Controller/AmiRowAutocompleteHandler.php index c3066da..fb08a08 100644 --- a/src/Controller/AmiRowAutocompleteHandler.php +++ b/src/Controller/AmiRowAutocompleteHandler.php @@ -213,10 +213,12 @@ public static function ajaxPreviewAmiSet($form, FormStateInterface $form_state) $labels = \Drupal::service('ami.utility') ->getDifferentValuesfromColumnSplit($data_to_clean, 0); - if (empty($labels)) { - $labels = \Drupal::service('ami.utility')->getDifferentValuesfromColumnJSON($data_to_clean, + // New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive + // but also more precise. + $labels_from_json = \Drupal::service('ami.utility')->getDifferentValuesfromColumnJSON($data_to_clean, 0); - } + $labels = array_unique(array_merge($labels, $labels_from_json)); + foreach ($labels as $label) { $lod_for_label = \Drupal::service('ami.lod') ->getKeyValuePerAmiSet($label, $id); @@ -463,7 +465,7 @@ public static function ajaxPreviewAmiSet($form, FormStateInterface $form_state) else { $message = !$file ? 'The AMI set has no CSV File. The AMI set is empty.': 'The AMI set has no data for chosen row. The AMI set is empty.'; if (!empty($message)) { - $preview_error = MetadataDisplayForm::buildAjaxPreviewError($message); + $preview_error = MetadataDisplayForm::buildAjaxPreviewError($message, TRUE); $output['preview_error'] = $preview_error; } $response->addCommand(new OpenOffCanvasDialogCommand(t('Preview'), $output, ['width' => '50%'])); diff --git a/src/Entity/amiSetEntity.php b/src/Entity/amiSetEntity.php index f2bfff8..15c852c 100644 --- a/src/Entity/amiSetEntity.php +++ b/src/Entity/amiSetEntity.php @@ -147,6 +147,7 @@ class amiSetEntity extends ContentEntityBase implements amiSetEntityInterface { public const STATUS_READY = 'READY'; + public const STATUS_NEEDS_REVIEW = 'NEEDS_REVIEW'; public const STATUS_NOT_READY = 'NOT_READY'; public const STATUS_PROCESSING = 'PROCESSING'; public const STATUS_PROCESSED = 'PROCESSED'; @@ -166,6 +167,7 @@ class amiSetEntity extends ContentEntityBase implements amiSetEntityInterface { amiSetEntity::STATUS_PROCESSED_WITH_ERRORS => 'Processed with errors', amiSetEntity::STATUS_FAILED => 'Failed', amiSetEntity::STATUS_ENTITIES_DELETED => 'ADOs Deleted', + amiSetEntity::STATUS_NEEDS_REVIEW => 'CSV associated data needs review' ]; /** diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index 074e6f0..f917246 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -258,7 +258,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#title' => $this->t('Choose a Column to Preview'), '#options' => array_combine($source_options, $source_options), '#default_value' => $form_state->getValue(['lod_options','select_preview']), - '#description' => $this->t('We will attempt to fetch first cells holding a string of delimited values (by "|@|" or ";"). If no luck, and the selected column cell\'s holds a valid JSON, any simple lists of values (e.g ["pup","dog","canine"], and/or any property where the JSON key name contains one of the following strings: "label, value, name". Any URL/URN or URI will be not taken in account'), + '#description' => $this->t('We will attempt to fetch first cells holding a string of delimited values (by "|@|" or ";"). Additionally, if any selected column cell\'s holds a valid JSON, e.g. any simple lists of values (e.g ["pup","dog","canine"], and/or an object with any property where the JSON key name contains one of the following strings: "label, value, type, name". URL/URN or URI will be not taken in account.'), ]; $form['lod_options']['preview'] = [ '#type' => 'button', diff --git a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php index 0d565eb..94a772d 100644 --- a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php +++ b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php @@ -5,7 +5,6 @@ use Drupal\Core\Render\RendererInterface; use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; use Drupal\Core\Url; -use Drupal\Core\Link; use Drupal\Core\Access\AccessResult; use Drupal\Core\Action\ConfigurableActionBase; use Drupal\Core\Entity\EntityTypeManagerInterface; @@ -24,7 +23,6 @@ use Drupal\views_bulk_operations\Action\ViewsBulkOperationsPreconfigurationInterface; use Psr\Log\LoggerInterface; use Symfony\Component\DependencyInjection\ContainerInterface; -use Symfony\Component\HttpFoundation\RedirectResponse; /** * Provides an action that export SBFs to CSV. @@ -372,8 +370,11 @@ protected function sendToFile($output) { if ($this->configuration['create_ami_set'] && $this->context['sandbox']['ado_type_exists']) { $ami_set = TRUE; } - $logger_channel = (string) $this->context['sandbox']['logger_channel'] ?? 'ami'; - $file_id = $this->AmiUtilityService->csv_save($data, 'node_uuid', TRUE, $ami_set, $logger_channel); + $logger_channel = 'ami'; + if (isset($this->context['sandbox']['logger_channel']) && !empty($this->context['sandbox']['logger_channel'])) { + $logger_channel = (string) $this->context['sandbox']['logger_channel']; + } + $file_id = $this->AmiUtilityService->csv_save($data, 'node_uuid', TRUE, $ami_set, FALSE, $logger_channel); if ($file_id && $this->configuration['create_ami_set'] && $this->context['sandbox']['ado_type_exists']) { $amisetdata = new \stdClass(); $amisetdata->plugin = 'spreadsheet'; @@ -408,17 +409,17 @@ protected function sendToFile($output) { if ($amiset_id) { $url = Url::fromRoute('entity.ami_set_entity.canonical', ['ami_set_entity' => $amiset_id]); - $message = $this->t('Well Done! New AMI Set was created and you can see it here', - ['@url' => $url->toString()]); + $message = $this->t('Well Done! New AMI Set was created and you can see it here', + [':url' => $url->toString()]); $this->messenger() ->addStatus($message); } return $message; } else if ($this->configuration['create_ami_set'] && !$this->context['sandbox']['ado_type_exists']) { - $message = $this->t('AMI Set could not be created because object(s) are missing the type key.'); + $message = $this->t('AMI Set could not be created because object(s) are missing the "type" key.'); $this->messenger() - ->addStatus($message); + ->addWarning($message); return $message; } } @@ -578,21 +579,6 @@ protected function getCid() { return $this->context['sandbox']['cid_prefix'] . $this->context['sandbox']['current_batch']; } - /** - * Prepares sandbox data (header and cache ID). - * - * @return array - * Table header. - */ - protected function getHeader() { - // Build output header array. - $header = &$this->context['sandbox']['header']; - if (!empty($header)) { - return $header; - } - return $this->setHeader(); - } - public function getConfiguration() { return parent::getConfiguration(); // TODO: Change the autogenerated stub } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index bf34e23..88d6729 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -952,6 +952,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { // Ignore status for updates if status_keep == TRUE. if ($status && is_string($status) && $status_keep == FALSE) { $node->set('moderation_state', $status); + $nodeValues['moderation_state'] = $status; $status = 0; } /** @var \Drupal\strawberryfield\Field\StrawberryFieldItemList $field */ diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php index aa7a9ef..4f92e8a 100644 --- a/src/Plugin/QueueWorker/LoDQueueWorker.php +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -202,9 +202,9 @@ public function processItem($data) { } } - $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE) ?? ''; + $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE|JSON_HEX_QUOT) ?? ''; $newdata['data'][0]['original'] = (string) $data->info['label']; - $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns']) ?? ''; + $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns'], JSON_HEX_QUOT) ?? ''; // Adds a "Checked" column used to mark manually reconciliated elements. $newdata['data'][0]['checked'] = FALSE; // Context data is simpler