From 575323e0a774f9a31fe9c1a16a2c5b78beb73af6 Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Wed, 24 Jul 2024 03:43:56 +0100 Subject: [PATCH 1/8] Investigate performance improvements on bulk data handling --- Service/ChangeEncryptionKey.php | 65 ++++++++++++++++++++++++-------- dev/stub_sales_order_payment.php | 31 +++++++++++++++ dev/test.sh | 4 ++ 3 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 dev/stub_sales_order_payment.php diff --git a/Service/ChangeEncryptionKey.php b/Service/ChangeEncryptionKey.php index cc78b1b..1624a57 100644 --- a/Service/ChangeEncryptionKey.php +++ b/Service/ChangeEncryptionKey.php @@ -32,13 +32,14 @@ public function setSkipSavedCreditCards($skipSavedCreditCards) } /** - * @param string $text + * @param $text + * @param int $type * @return void */ - private function writeOutput($text) + private function writeOutput($text, $type = OutputInterface::OUTPUT_NORMAL) { if ($this->output instanceof OutputInterface) { - $this->output->writeln($text); + $this->output->writeln($text, $type); } } @@ -69,22 +70,54 @@ protected function _reEncryptCreditCardNumbers() } $this->writeOutput('_reEncryptCreditCardNumbers - start'); $table = $this->getTable('sales_order_payment'); - $select = $this->getConnection()->select()->from($table, ['entity_id', 'cc_number_enc']); - $attributeValues = $this->getConnection()->fetchPairs($select); - // save new values - foreach ($attributeValues as $valueId => $value) { - // GENE CHANGE START - if (!$value) { - continue; + $batchSize = 10000; // TODO worth making configurable? + + $minId = (int) $this->getConnection()->fetchOne( + $this->getConnection()->select() + ->from($table, ['min(entity_id) AS min_id']) + ); + $maxId = (int) $this->getConnection()->fetchOne( + $this->getConnection()->select() + ->from($table, ['max(entity_id) AS max_id']) + ); + $totalCount = ($maxId - $minId) + 1; // the numbers are inclusive so add 1 + + $numberOfBatches = ceil($totalCount / $batchSize); + $this->writeOutput("_reEncryptCreditCardNumbers - total possible records: $totalCount"); + $this->writeOutput("_reEncryptCreditCardNumbers - batch size: $batchSize"); + $this->writeOutput("_reEncryptCreditCardNumbers - batch count: $numberOfBatches"); + + $updatedCount = 0; + $currentMin = $minId; + for ($i = 0; $i < $numberOfBatches; $i++) { + $currentMax = $currentMin + $batchSize; + $select = $this->getConnection()->select() + ->from($table, ['entity_id', 'cc_number_enc']) + ->where("entity_id >= ?", $currentMin) + ->where("entity_id < ?", $currentMax); + + $this->writeOutput((string)$select, OutputInterface::VERBOSITY_VERBOSE); + + $attributeValues = $this->getConnection()->fetchPairs($select); + foreach ($attributeValues as $valueId => $value) { + if (!$value) { + continue; + } + // TODO can we collect these, and do the updates in batches as the updates are the limiting factor + // https://stackoverflow.com/a/3466 + $this->getConnection()->update( + $table, + ['cc_number_enc' => $this->encryptor->encrypt($this->encryptor->decrypt($value))], + ['entity_id = ?' => (int)$valueId] + ); + $updatedCount++; } - // GENE CHANGE END - $this->getConnection()->update( - $table, - ['cc_number_enc' => $this->encryptor->encrypt($this->encryptor->decrypt($value))], - ['entity_id = ?' => (int)$valueId] - ); + $currentMin = $currentMax; + $this->writeOutput("running total records updated: $updatedCount", OutputInterface::VERBOSITY_VERBOSE); } + + $this->writeOutput("_reEncryptCreditCardNumbers - total records updated: $updatedCount"); $this->writeOutput('_reEncryptCreditCardNumbers - end'); } } diff --git a/dev/stub_sales_order_payment.php b/dev/stub_sales_order_payment.php new file mode 100644 index 0000000..49c551c --- /dev/null +++ b/dev/stub_sales_order_payment.php @@ -0,0 +1,31 @@ +getObjectManager(); + +/** @var \Magento\Framework\App\ResourceConnection $connection */ +$connection = $obj->get(\Magento\Framework\App\ResourceConnection::class); +$connection->getConnection()->query('SET FOREIGN_KEY_CHECKS = 0;'); +$connection->getConnection()->query('delete from sales_order_payment where parent_id=1;'); + +/** @var \Magento\Framework\Encryption\EncryptorInterface $encryptor */ +$encryptor = $obj->get(\Magento\Framework\Encryption\EncryptorInterface::class); +$ccNumberEnc = $encryptor->encrypt('cc_number_enc_abc123'); + +$rowData = "(1, '$ccNumberEnc'),"; + +$insertQueryNull = trim('INSERT INTO sales_order_payment (parent_id, cc_number_enc) VALUES ' . str_repeat($rowData, 10000), ", "); +for ($i = 0; $i < 100; $i++) { + $connection->getConnection()->query($insertQueryNull); +} +$connection->getConnection()->query('SET FOREIGN_KEY_CHECKS = 1;'); + +// Get the total count of records +$countSelect = $connection->getConnection()->select() + ->from('sales_order_payment', ['COUNT(*) AS total_count']); +$totalCount = $connection->getConnection()->fetchOne($countSelect); + +echo "There are $totalCount items in sales_order_payment" . PHP_EOL; +echo "DONE stub_sales_order_payment.php". PHP_EOL; diff --git a/dev/test.sh b/dev/test.sh index a844c16..27bf818 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -30,6 +30,10 @@ FAKE_RP_TOKEN=$(vendor/bin/n98-magerun2 dev:encrypt 'abc123') vendor/bin/n98-magerun2 db:query "update admin_user set rp_token='$FAKE_RP_TOKEN' where username='$ADMIN'" echo "Generated FAKE_RP_TOKEN=$FAKE_RP_TOKEN and assigned to $ADMIN" +echo "Stubbing in a large volume of data to sales_order_payment" +vendor/bin/n98-magerun2 db:query "delete from sales_order_payment where parent_id=1"; + + echo "";echo ""; echo "Verifying commands need to use --force" From a6c4e96e243d0158763f4fe5a38536ef4b6692fb Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Wed, 24 Jul 2024 14:14:34 +0100 Subject: [PATCH 2/8] Bulk insert data --- Service/ChangeEncryptionKey.php | 89 ++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/Service/ChangeEncryptionKey.php b/Service/ChangeEncryptionKey.php index 1624a57..cfb6e68 100644 --- a/Service/ChangeEncryptionKey.php +++ b/Service/ChangeEncryptionKey.php @@ -99,20 +99,36 @@ protected function _reEncryptCreditCardNumbers() $this->writeOutput((string)$select, OutputInterface::VERBOSITY_VERBOSE); + /** + * @see https://github.com/magento/inventory/blob/750be5b07053331bc0bf3cb0f4d19366a67694f4/Inventory/Model/ResourceModel/SourceItem/SaveMultiple.php#L165-L188 + */ + $pairsToUpdate = []; $attributeValues = $this->getConnection()->fetchPairs($select); foreach ($attributeValues as $valueId => $value) { if (!$value) { continue; } - // TODO can we collect these, and do the updates in batches as the updates are the limiting factor - // https://stackoverflow.com/a/3466 - $this->getConnection()->update( - $table, - ['cc_number_enc' => $this->encryptor->encrypt($this->encryptor->decrypt($value))], - ['entity_id = ?' => (int)$valueId] - ); + // TODO i think the encryption is the limiting factor here + // TODO i can insert 1 million rows as asdfasdfsdfasdf in ~8 seconds locally but ~2 mins for the full process +// $pairsToUpdate[(int)$valueId] = 'asdfasdfsdfasdf'; + $pairsToUpdate[(int)$valueId] = $this->encryptor->encrypt($this->encryptor->decrypt($value)); $updatedCount++; } + + $columnsSql = $this->buildColumnsSqlPart(['entity_id', 'cc_number_enc']); + $valuesSql = $this->buildValuesSqlPart($pairsToUpdate); + $onDuplicateSql = $this->buildOnDuplicateSqlPart(['cc_number_enc']); + $bind = $this->getSqlBindData($pairsToUpdate); + + // todo worth checking this isnt artifically inflating the auto increment id + $insertSql = sprintf( + 'INSERT INTO `%s` (%s) VALUES %s %s', + $table, + $columnsSql, + $valuesSql, + $onDuplicateSql + ); + $this->getConnection()->query($insertSql, $bind); $currentMin = $currentMax; $this->writeOutput("running total records updated: $updatedCount", OutputInterface::VERBOSITY_VERBOSE); } @@ -120,4 +136,63 @@ protected function _reEncryptCreditCardNumbers() $this->writeOutput("_reEncryptCreditCardNumbers - total records updated: $updatedCount"); $this->writeOutput('_reEncryptCreditCardNumbers - end'); } + + /** + * Build sql query for on duplicate event + * + * @param array $fields + * @return string + */ + private function buildOnDuplicateSqlPart(array $fields): string + { + $connection = $this->getConnection(); + $processedFields = []; + foreach ($fields as $field) { + $processedFields[] = sprintf('%1$s = VALUES(%1$s)', $connection->quoteIdentifier($field)); + } + $sql = 'ON DUPLICATE KEY UPDATE ' . implode(', ', $processedFields); + return $sql; + } + + /** + * Build column sql part + * + * @param array $columns + * @return string + */ + private function buildColumnsSqlPart(array $columns): string + { + $connection = $this->getConnection(); + $processedColumns = array_map([$connection, 'quoteIdentifier'], $columns); + $sql = implode(', ', $processedColumns); + return $sql; + } + + /** + * Build sql query for values + * + * @param array $rows + * @return string + */ + private function buildValuesSqlPart(array $rows): string + { + $sql = rtrim(str_repeat('(?, ?), ', count($rows)), ', '); + return $sql; + } + + /** + * Get Sql bind data + * + * @param array $rows + * @return array + */ + private function getSqlBindData(array $rows): array + { + $bind = []; + foreach ($rows as $id => $encryptedValue) { + $bind[] = $id; + $bind[] = $encryptedValue; + } + return $bind; + } } From 542edf8fc27a77b686ca73188d9a0ca23d3d44f9 Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Wed, 24 Jul 2024 14:19:53 +0100 Subject: [PATCH 3/8] run test in -vvv --- dev/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/test.sh b/dev/test.sh index 27bf818..a323244 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -38,7 +38,7 @@ echo "";echo ""; echo "Verifying commands need to use --force" -php bin/magento gene:encryption-key-manager:generate > test.txt || true; +php bin/magento gene:encryption-key-manager:generate -vvv > test.txt || true; if grep -q 'Run with --force' test.txt; then echo "PASS: generate needs to run with force" else From 19e1792e6f516a4a548431e313758a094aa775b2 Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Wed, 24 Jul 2024 15:08:18 +0100 Subject: [PATCH 4/8] Update ChangeEncryptionKey.php --- Service/ChangeEncryptionKey.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Service/ChangeEncryptionKey.php b/Service/ChangeEncryptionKey.php index cfb6e68..245b551 100644 --- a/Service/ChangeEncryptionKey.php +++ b/Service/ChangeEncryptionKey.php @@ -115,6 +115,11 @@ protected function _reEncryptCreditCardNumbers() $updatedCount++; } + $currentMin = $currentMax; + if (empty($pairsToUpdate)) { + continue; + } + $columnsSql = $this->buildColumnsSqlPart(['entity_id', 'cc_number_enc']); $valuesSql = $this->buildValuesSqlPart($pairsToUpdate); $onDuplicateSql = $this->buildOnDuplicateSqlPart(['cc_number_enc']); @@ -129,7 +134,6 @@ protected function _reEncryptCreditCardNumbers() $onDuplicateSql ); $this->getConnection()->query($insertSql, $bind); - $currentMin = $currentMax; $this->writeOutput("running total records updated: $updatedCount", OutputInterface::VERBOSITY_VERBOSE); } From 7d5f4f57cf10abb0f2e716116ae925ef9490db9b Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Thu, 25 Jul 2024 07:03:58 +0100 Subject: [PATCH 5/8] Stub in sales order payment data 1 million rows --- dev/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/test.sh b/dev/test.sh index e4d8993..33c2b05 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -48,7 +48,7 @@ vendor/bin/n98-magerun2 db:query "insert into fake_json_table(text_column) value vendor/bin/n98-magerun2 db:query "select * from fake_json_table"; echo "Stubbing in a large volume of data to sales_order_payment" -vendor/bin/n98-magerun2 db:query "delete from sales_order_payment where parent_id=1"; +php vendor/gene/module-encryption-key-manager/dev/stub_sales_order_payment.php echo "";echo ""; From b71c23b446471cc402acf9a1b8b9f8ba0b4ed76e Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Thu, 25 Jul 2024 09:36:43 +0100 Subject: [PATCH 6/8] debug output to circleci --- dev/test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/test.sh b/dev/test.sh index 33c2b05..60bd5c9 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -54,12 +54,13 @@ echo "";echo ""; echo "Verifying commands need to use --force" -php bin/magento gene:encryption-key-manager:generate -vvv > test.txt || true; +time php bin/magento gene:encryption-key-manager:generate -vvv > test.txt || true; if grep -q 'Run with --force' test.txt; then echo "PASS: generate needs to run with force" else echo "FAIL: generate needs to run with force" && false fi +cat test.txt php bin/magento gene:encryption-key-manager:invalidate > test.txt || true if grep -q 'Run with --force' test.txt; then From 73b036e741a80ca4f2e32dafe803f00f82bd533b Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Thu, 25 Jul 2024 09:41:49 +0100 Subject: [PATCH 7/8] debug output to circleci --- dev/test.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/test.sh b/dev/test.sh index 60bd5c9..ffe3da9 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -54,13 +54,12 @@ echo "";echo ""; echo "Verifying commands need to use --force" -time php bin/magento gene:encryption-key-manager:generate -vvv > test.txt || true; +php bin/magento gene:encryption-key-manager:generate -vvv > test.txt || true; if grep -q 'Run with --force' test.txt; then echo "PASS: generate needs to run with force" else echo "FAIL: generate needs to run with force" && false fi -cat test.txt php bin/magento gene:encryption-key-manager:invalidate > test.txt || true if grep -q 'Run with --force' test.txt; then @@ -102,7 +101,7 @@ echo "";echo ""; echo "Generating a new encryption key" grep -q "$ENCRYPTED_ENV_VALUE" app/etc/env.php -php bin/magento gene:encryption-key-manager:generate --force > test.txt +time php bin/magento gene:encryption-key-manager:generate --force > test.txt if grep -q "$ENCRYPTED_ENV_VALUE" app/etc/env.php; then echo "FAIL: The old encrypted value in env.php was not updated" && false fi @@ -112,6 +111,7 @@ grep -q '_reEncryptSystemConfigurationValues - end' test.txt grep -q '_reEncryptCreditCardNumbers - start' test.txt grep -q '_reEncryptCreditCardNumbers - end' test.txt echo "PASS" +cat test.txt echo "";echo ""; echo "Generating a new encryption key - skipping _reEncryptCreditCardNumbers" From 12700bba590361c5b02d8e2a28e0c7ba1b94bb48 Mon Sep 17 00:00:00 2001 From: Luke Rodgers Date: Thu, 25 Jul 2024 10:20:08 +0100 Subject: [PATCH 8/8] update test case to 2.5 million orders --- dev/stub_sales_order_payment.php | 2 +- dev/test.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/stub_sales_order_payment.php b/dev/stub_sales_order_payment.php index 49c551c..8ac3584 100644 --- a/dev/stub_sales_order_payment.php +++ b/dev/stub_sales_order_payment.php @@ -17,7 +17,7 @@ $rowData = "(1, '$ccNumberEnc'),"; $insertQueryNull = trim('INSERT INTO sales_order_payment (parent_id, cc_number_enc) VALUES ' . str_repeat($rowData, 10000), ", "); -for ($i = 0; $i < 100; $i++) { +for ($i = 0; $i < 250; $i++) { $connection->getConnection()->query($insertQueryNull); } $connection->getConnection()->query('SET FOREIGN_KEY_CHECKS = 1;'); diff --git a/dev/test.sh b/dev/test.sh index ffe3da9..c108ef5 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -49,6 +49,7 @@ vendor/bin/n98-magerun2 db:query "select * from fake_json_table"; echo "Stubbing in a large volume of data to sales_order_payment" php vendor/gene/module-encryption-key-manager/dev/stub_sales_order_payment.php +vendor/bin/n98-magerun2 db:query "select cc_number_enc from sales_order_payment where parent_id=1 limit 5"; echo "";echo ""; @@ -112,6 +113,7 @@ grep -q '_reEncryptCreditCardNumbers - start' test.txt grep -q '_reEncryptCreditCardNumbers - end' test.txt echo "PASS" cat test.txt +vendor/bin/n98-magerun2 db:query "select cc_number_enc from sales_order_payment where parent_id=1 limit 5"; echo "";echo ""; echo "Generating a new encryption key - skipping _reEncryptCreditCardNumbers"