diff --git a/src/AmiLoDService.php b/src/AmiLoDService.php index 43509cd4..3602eee5 100644 --- a/src/AmiLoDService.php +++ b/src/AmiLoDService.php @@ -571,29 +571,6 @@ public function invokeCustomLoD(string $query, string $lod_custom_lod_id):array return $results_processed; } - - /** - * Checks if a string is valid JSON - * - * @param $string - * - * @return bool - */ - public function isJson($string) { - json_decode($string); - return json_last_error() === JSON_ERROR_NONE; - } - - /** - * Helper function that negates ::isJson. - * @param $string - * - * @return bool - */ - public function isNotJson($string) { - return !$this->isJson($string); - } - public function getCustomLoDEndpoints($as_arguments = FALSE) { $active_plugins = []; /* @var $plugin_config_entities \Drupal\webform_strawberryfield\Entity\LoDendpointEntity[] */ diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index b22345db..99e984fc 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -1088,7 +1088,6 @@ public function csv_touch(string $filename = NULL, ?string $subpath = NULL, bool return $file->id(); } - /** * Creates an CSV from array and returns file. * @@ -1102,11 +1101,14 @@ public function csv_touch(string $filename = NULL, ?string $subpath = NULL, bool * @param boolean $auto_uuid * Defines if we are going to generate UUIDs when not valid/not present * Or leave the $uuid_key field as it is and let this fail/if later. + * @param bool $permanent + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. + * @param string $logger_channel * * @return int|string|null - * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE, $permanent = TRUE, $logger_channel = 'ami') { + public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE, $permanent = TRUE, bool $escape_character = FALSE, $logger_channel = 'ami') { //$temporary_directory = $this->fileSystem->getTempDirectory(); // We should be allowing downloads for this from temp @@ -1160,9 +1162,12 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE if ($haskey === FALSE) { array_unshift($data['headers'], $uuid_key); } - - $fh->fputcsv($data['headers']); - + if ($escape_character) { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "\\"); + } + else { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: ""); + } foreach ($data['data'] as $row) { if ($haskey === FALSE) { array_unshift($row, $uuid_key); @@ -1185,8 +1190,13 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE } } } + if ($escape_character) { + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: "\\"); + } + else { + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: ""); + } - $fh->fputcsv($row); } // PHP Bug! This should happen automatically clearstatcache(TRUE, $url); @@ -1213,7 +1223,6 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE return $file->id(); } - /** * Appends CSV from array and returns file. * @@ -1224,20 +1233,19 @@ public function csv_save(array $data, $uuid_key = 'node_uuid', $auto_uuid = TRUE * * @param \Drupal\file\Entity\File $file * - * @param string|null $uuid_key + * @param string $uuid_key * IF NULL then no attempt of using UUIDS will be made. * Needed for LoD Reconciling CSVs * @param bool $append_header * - * @param bool $escape_characters - * Defaults to internal PHP mechanism for escaping characters (a "/") - * Set to FALSE if you are passing JSON encoded strings into cells. - * NOTE: Make sure you also disable it IF reading back from files generated through this + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. + * @param bool $auto_uuid * * @return int|string|null * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE, $escape_characters = TRUE, $auto_uuid = TRUE) { + public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE, bool $escape_character = TRUE, $auto_uuid = TRUE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { @@ -1259,7 +1267,12 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } } if ($append_header) { - $fh->fputcsv($data['headers']); + if ($escape_character) { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: "\\"); + } + else { + $fh->fputcsv($data['headers'], separator: ',', enclosure: '"', escape: ""); + } } foreach ($data['data'] as $row) { @@ -1282,11 +1295,11 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } } } - if ($escape_characters) { - $fh->fputcsv($row); + if ($escape_character) { + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: "\\"); } else { - $fh->fputcsv($row, ',', '"', ""); + $fh->fputcsv($row, separator: ',', enclosure: '"', escape: ""); } } // PHP Bug! This should happen automatically @@ -1310,32 +1323,32 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo * @param bool $always_include_header * Always return header even with an offset. * - * @param bool $escape_characters - * + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. * @return array|null * Returning array will be in this form: * 'headers' => $rowHeaders_utf8 or [] if $always_include_header == FALSE * 'data' => $table, * 'totalrows' => $maxRow, */ - public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE, bool $escape_characters = TRUE) { + public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE, bool $escape_character = FALSE) { // 1.6.0: wrapper around this function now that we moved it to strawberryfield so webform module does not depend on AMI for CSV reading // This was needed bc if not we would have an undeclared circular dependency/or would have to change all the code (many parts) // there this service was used to call csv_read(). Only thing new is that the logging/caller_module is used for logging now. - return $this->strawberryfieldUtility->csv_read($file, $offset, $count, $always_include_header, $escape_characters, 'ami'); + return $this->strawberryfieldUtility->csv_read($file, $offset, $count, $always_include_header, $escape_character, 'ami'); } - /** * Removes columns from an existing CSV. * * @param \Drupal\file\Entity\File $file * @param array $headerwithdata - * + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. * @return int|mixed|string|null * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_clean(File $file, array $headerwithdata = []) { + public function csv_clean(File $file, array $headerwithdata = [], bool $escape_character = FALSE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { return NULL; @@ -1382,7 +1395,12 @@ public function csv_clean(File $file, array $headerwithdata = []) { unset($data[$key]); } $data = array_values($data); - $spltmp->fputcsv($data); + if ($escape_character) { + $spltmp->fputcsv($data, separator: ',', enclosure: '"', escape: "\\"); + } + else { + $spltmp->fputcsv($data, separator: ',', enclosure: '"', escape: ""); + } $i++; } $size = $spltmp->getSize(); @@ -1399,11 +1417,11 @@ public function csv_clean(File $file, array $headerwithdata = []) { /** * @param \Drupal\file\Entity\File $file * - * @param bool $escape_characters - * + * @param bool $escape_character + * If used, CSV might not end being RFC 4180 compliant. * @return int */ - public function csv_count(File $file, $escape_characters = TRUE) { + public function csv_count(File $file, $escape_character = FALSE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { return NULL; @@ -1419,11 +1437,11 @@ public function csv_count(File $file, $escape_characters = TRUE) { SplFileObject::DROP_NEW_LINE ); while (!$spl->eof()) { - if (!$escape_characters) { + if (!$escape_character) { $spl->fgetcsv( ',', '"', ""); } else { - $spl->fgetcsv(); + $spl->fgetcsv( ',', '"', "\\"); } $key = $spl->key(); } @@ -1536,15 +1554,17 @@ public function provideDifferentColumnValuesFromCSV(File $file, array $columns): $data = $this->csv_read($file); $column_keys = $data['headers'] ?? []; $alldifferent = []; + $alldifferent_json = []; foreach ($columns as $column) { $column_index = array_search($column, $column_keys); if ($column_index !== FALSE) { + // New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive + // but also more precise. $alldifferent[$column] = $this->getDifferentValuesfromColumnSplit($data, $column_index); - if (empty($alldifferent[$column])) { - $alldifferent[$column] = $this->getDifferentValuesfromColumnJSON($data, + $alldifferent_json[$column] = $this->getDifferentValuesfromColumnJSON($data, $column_index); - } + $alldifferent[$column] = array_unique(array_merge($alldifferent[$column], $alldifferent_json[$column])); } } return $alldifferent; @@ -2601,10 +2621,12 @@ public function processMetadataDisplay(\stdClass $data, array $additional_contex $data_to_clean['data'][0] = [$context['data'][$source_column]]; $labels = $this->getDifferentValuesfromColumnSplit($data_to_clean, 0); - if (empty($labels)) { - $labels = $this->getDifferentValuesfromColumnJSON($data_to_clean, + // New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive + // but also more precise. The Preview does the same now + $labels_json= $this->getDifferentValuesfromColumnJSON($data_to_clean, 0); - } + // WE merge both results and make them unique + $labels = array_unique(array_merge($labels, $labels_json)); foreach ($labels as $label) { $lod_for_label = $this->AmiLoDService->getKeyValuePerAmiSet($label, $set_id); if (is_array($lod_for_label) && count($lod_for_label) > 0) { @@ -2868,15 +2890,27 @@ public function getDifferentValuesfromColumnJSON(array $data, int $key, array $v /** - * Checks if a string is valid JSON + * Checks if a string is valid RFC JSON (object or array) + * Skips if its a valid JSON-y-fable native, like a pure string + * or a number * * @param $string * * @return bool */ public function isJson($string) { - json_decode($string); - return json_last_error() === JSON_ERROR_NONE; + try { + $decoded = json_decode($string, TRUE, 512, JSON_THROW_ON_ERROR); + if (is_array($decoded) ) { + return TRUE; + } + else { + return FALSE; + } + } + catch (\Throwable $e) { + return FALSE; + } } /** diff --git a/src/Controller/AmiRowAutocompleteHandler.php b/src/Controller/AmiRowAutocompleteHandler.php index c3066da5..fb08a08b 100644 --- a/src/Controller/AmiRowAutocompleteHandler.php +++ b/src/Controller/AmiRowAutocompleteHandler.php @@ -213,10 +213,12 @@ public static function ajaxPreviewAmiSet($form, FormStateInterface $form_state) $labels = \Drupal::service('ami.utility') ->getDifferentValuesfromColumnSplit($data_to_clean, 0); - if (empty($labels)) { - $labels = \Drupal::service('ami.utility')->getDifferentValuesfromColumnJSON($data_to_clean, + // New for 1.7.0/2.1.0 We process both. Strings and JSON. More expensive + // but also more precise. + $labels_from_json = \Drupal::service('ami.utility')->getDifferentValuesfromColumnJSON($data_to_clean, 0); - } + $labels = array_unique(array_merge($labels, $labels_from_json)); + foreach ($labels as $label) { $lod_for_label = \Drupal::service('ami.lod') ->getKeyValuePerAmiSet($label, $id); @@ -463,7 +465,7 @@ public static function ajaxPreviewAmiSet($form, FormStateInterface $form_state) else { $message = !$file ? 'The AMI set has no CSV File. The AMI set is empty.': 'The AMI set has no data for chosen row. The AMI set is empty.'; if (!empty($message)) { - $preview_error = MetadataDisplayForm::buildAjaxPreviewError($message); + $preview_error = MetadataDisplayForm::buildAjaxPreviewError($message, TRUE); $output['preview_error'] = $preview_error; } $response->addCommand(new OpenOffCanvasDialogCommand(t('Preview'), $output, ['width' => '50%'])); diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index 074e6f0f..f9172465 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -258,7 +258,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#title' => $this->t('Choose a Column to Preview'), '#options' => array_combine($source_options, $source_options), '#default_value' => $form_state->getValue(['lod_options','select_preview']), - '#description' => $this->t('We will attempt to fetch first cells holding a string of delimited values (by "|@|" or ";"). If no luck, and the selected column cell\'s holds a valid JSON, any simple lists of values (e.g ["pup","dog","canine"], and/or any property where the JSON key name contains one of the following strings: "label, value, name". Any URL/URN or URI will be not taken in account'), + '#description' => $this->t('We will attempt to fetch first cells holding a string of delimited values (by "|@|" or ";"). Additionally, if any selected column cell\'s holds a valid JSON, e.g. any simple lists of values (e.g ["pup","dog","canine"], and/or an object with any property where the JSON key name contains one of the following strings: "label, value, type, name". URL/URN or URI will be not taken in account.'), ]; $form['lod_options']['preview'] = [ '#type' => 'button', diff --git a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php index 0d565ebe..55b8392b 100644 --- a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php +++ b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php @@ -373,7 +373,7 @@ protected function sendToFile($output) { $ami_set = TRUE; } $logger_channel = (string) $this->context['sandbox']['logger_channel'] ?? 'ami'; - $file_id = $this->AmiUtilityService->csv_save($data, 'node_uuid', TRUE, $ami_set, $logger_channel); + $file_id = $this->AmiUtilityService->csv_save($data, 'node_uuid', TRUE, $ami_set, FALSE, $logger_channel); if ($file_id && $this->configuration['create_ami_set'] && $this->context['sandbox']['ado_type_exists']) { $amisetdata = new \stdClass(); $amisetdata->plugin = 'spreadsheet'; diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index bf34e232..88d67297 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -952,6 +952,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { // Ignore status for updates if status_keep == TRUE. if ($status && is_string($status) && $status_keep == FALSE) { $node->set('moderation_state', $status); + $nodeValues['moderation_state'] = $status; $status = 0; } /** @var \Drupal\strawberryfield\Field\StrawberryFieldItemList $field */ diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php index aa7a9ef6..4f92e8a6 100644 --- a/src/Plugin/QueueWorker/LoDQueueWorker.php +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -202,9 +202,9 @@ public function processItem($data) { } } - $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE) ?? ''; + $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE|JSON_HEX_QUOT) ?? ''; $newdata['data'][0]['original'] = (string) $data->info['label']; - $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns']) ?? ''; + $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns'], JSON_HEX_QUOT) ?? ''; // Adds a "Checked" column used to mark manually reconciliated elements. $newdata['data'][0]['checked'] = FALSE; // Context data is simpler