diff --git a/cdk/lib/cdk-stack.ts b/cdk/lib/cdk-stack.ts index 1fad4b74..b5a28084 100644 --- a/cdk/lib/cdk-stack.ts +++ b/cdk/lib/cdk-stack.ts @@ -91,6 +91,16 @@ export class S3ECPythonGithub extends cdk.Stack { } ) + // New bucket for static test objects + const S3ECStaticTestObjectsBucket = new Bucket( + this, + "S3ECStaticTestObjectsBucket", + { + bucketName: "s3ec-static-test-objects", + blockPublicAccess: new BlockPublicAccess(AccessConfiguration) + } + ) + // S3 bucket policy const S3ECGithubS3BucketPolicy = new ManagedPolicy( this, @@ -110,6 +120,7 @@ export class S3ECPythonGithub extends cdk.Stack { resources: [ S3ECGithubTestS3Bucket.bucketArn + "/*", // object-level permissions need this extra path S3ECTestServerGithubBucket.bucketArn + "/*", // Add permissions for the new test-server bucket + S3ECStaticTestObjectsBucket.bucketArn + "/*", // Add permissions for static test objects bucket "arn:aws:s3:::aws-net-sdk-*/*" // permission for object inside S3EC .net bucket. For S3EC-NET repo ], }), @@ -125,6 +136,7 @@ export class S3ECPythonGithub extends cdk.Stack { resources: [ S3ECGithubTestS3Bucket.bucketArn, S3ECTestServerGithubBucket.bucketArn, // Add permissions for the new test-server bucket + S3ECStaticTestObjectsBucket.bucketArn, // Add permissions for static test objects bucket "arn:aws:s3:::aws-net-sdk-*", // permission for S3EC .net bucket. For S3EC-NET repo ], }), diff --git a/cdk/package-lock.json b/cdk/package-lock.json index 4f44562c..fa174491 100644 --- a/cdk/package-lock.json +++ b/cdk/package-lock.json @@ -8,7 +8,7 @@ "name": "cdk", "version": "0.1.0", "dependencies": { - "aws-cdk-lib": "2.92.0", + "aws-cdk-lib": "^2.240.0", "constructs": "^10.0.0", "source-map-support": "^0.5.21" }, @@ -40,16 +40,9 @@ } }, "node_modules/@aws-cdk/asset-awscli-v1": { - "version": "2.2.247", - "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.247.tgz", - "integrity": "sha512-PGFzztdu5YozUgoUd8gq5qi1FR3EYMjNrl5JFrAlYh2w1PcTfExEwqDzZy9z6uzogEJKwQJDgyhWe+OcZzQqFg==", - "license": "Apache-2.0" - }, - "node_modules/@aws-cdk/asset-kubectl-v20": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/@aws-cdk/asset-kubectl-v20/-/asset-kubectl-v20-2.1.4.tgz", - "integrity": "sha512-Ps2MkmjYgMyflagqQ4dgTElc7Vwpqj8spw8dQVFiSeaaMPsuDSNsPax3/HjuDuwqsmLdaCZc6umlxYLpL0kYDA==", - "license": "Apache-2.0" + "version": "2.2.263", + "resolved": "https://registry.npmjs.org/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.263.tgz", + "integrity": "sha512-X9JvcJhYcb7PHs8R7m4zMablO5C9PGb/hYfLnxds9h/rKJu6l7MiXE/SabCibuehxPnuO/vk+sVVJiUWrccarQ==" }, "node_modules/@aws-cdk/asset-node-proxy-agent-v6": { "version": "2.1.0", @@ -57,6 +50,41 @@ "integrity": "sha512-7bY3J8GCVxLupn/kNmpPc5VJz8grx+4RKfnnJiO1LG+uxkZfANZG3RMHhE+qQxxwkyQ9/MfPtTpf748UhR425A==", "license": "Apache-2.0" }, + "node_modules/@aws-cdk/cloud-assembly-schema": { + "version": "50.4.0", + "resolved": "https://registry.npmjs.org/@aws-cdk/cloud-assembly-schema/-/cloud-assembly-schema-50.4.0.tgz", + "integrity": "sha512-9Cplwc5C+SNe3hMfqZET7gXeM68tiH2ytQytCi+zz31Bn7O3GAgAnC2dYe+HWnZAgVH788ZkkBwnYXkeqx7v4g==", + "bundleDependencies": [ + "jsonschema", + "semver" + ], + "dependencies": { + "jsonschema": "~1.4.1", + "semver": "^7.7.3" + }, + "engines": { + "node": ">= 18.0.0" + } + }, + "node_modules/@aws-cdk/cloud-assembly-schema/node_modules/jsonschema": { + "version": "1.4.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/@aws-cdk/cloud-assembly-schema/node_modules/semver": { + "version": "7.7.3", + "inBundle": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/@babel/code-frame": { "version": "7.27.1", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", @@ -1230,11 +1258,12 @@ } }, "node_modules/aws-cdk-lib": { - "version": "2.92.0", - "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.92.0.tgz", - "integrity": "sha512-J+SUFSnOt9u2GbY5QIABgjGNiw8bL/v0S3zsPhhO1dVwK+G7oE+bhLcAi3iILrw2sIpirNWH9K3W0by9K+cyMw==", + "version": "2.240.0", + "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.240.0.tgz", + "integrity": "sha512-3dXmUnPB5kK0VgrNHOlV3jiQM4Dungukk/CV91nclO2lgNcrGyigauJdzmz9sOmI1gbKJJ2SRAotaXityzZMRw==", "bundleDependencies": [ "@balena/dockerignore", + "@aws-cdk/cloud-assembly-api", "case", "fs-extra", "ignore", @@ -1243,29 +1272,69 @@ "punycode", "semver", "table", - "yaml" + "yaml", + "mime-types" ], - "license": "Apache-2.0", "dependencies": { - "@aws-cdk/asset-awscli-v1": "^2.2.200", - "@aws-cdk/asset-kubectl-v20": "^2.1.2", - "@aws-cdk/asset-node-proxy-agent-v6": "^2.0.1", + "@aws-cdk/asset-awscli-v1": "2.2.263", + "@aws-cdk/asset-node-proxy-agent-v6": "^2.1.0", + "@aws-cdk/cloud-assembly-api": "^2.0.1", + "@aws-cdk/cloud-assembly-schema": "^50.3.0", "@balena/dockerignore": "^1.0.2", "case": "1.6.3", - "fs-extra": "^11.1.1", - "ignore": "^5.2.4", - "jsonschema": "^1.4.1", - "minimatch": "^3.1.2", - "punycode": "^2.3.0", - "semver": "^7.5.4", - "table": "^6.8.1", + "fs-extra": "^11.3.3", + "ignore": "^5.3.2", + "jsonschema": "^1.5.0", + "mime-types": "^2.1.35", + "minimatch": "^10.2.1", + "punycode": "^2.3.1", + "semver": "^7.7.4", + "table": "^6.9.0", "yaml": "1.10.2" }, "engines": { - "node": ">= 14.15.0" + "node": ">= 18.0.0" }, "peerDependencies": { - "constructs": "^10.0.0" + "constructs": "^10.5.0" + } + }, + "node_modules/aws-cdk-lib/node_modules/@aws-cdk/cloud-assembly-api": { + "version": "2.0.1", + "bundleDependencies": [ + "jsonschema", + "semver" + ], + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "jsonschema": "~1.4.1", + "semver": "^7.7.3" + }, + "engines": { + "node": ">= 18.0.0" + }, + "peerDependencies": { + "@aws-cdk/cloud-assembly-schema": ">=50.3.0" + } + }, + "node_modules/aws-cdk-lib/node_modules/@aws-cdk/cloud-assembly-api/node_modules/jsonschema": { + "version": "1.4.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/aws-cdk-lib/node_modules/@aws-cdk/cloud-assembly-api/node_modules/semver": { + "version": "7.7.3", + "inBundle": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" } }, "node_modules/aws-cdk-lib/node_modules/@balena/dockerignore": { @@ -1274,14 +1343,14 @@ "license": "Apache-2.0" }, "node_modules/aws-cdk-lib/node_modules/ajv": { - "version": "8.12.0", + "version": "8.18.0", "inBundle": true, "license": "MIT", "dependencies": { - "fast-deep-equal": "^3.1.1", + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2", - "uri-js": "^4.2.2" + "require-from-string": "^2.0.2" }, "funding": { "type": "github", @@ -1319,17 +1388,22 @@ } }, "node_modules/aws-cdk-lib/node_modules/balanced-match": { - "version": "1.0.2", + "version": "4.0.4", "inBundle": true, - "license": "MIT" + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } }, "node_modules/aws-cdk-lib/node_modules/brace-expansion": { - "version": "1.1.11", + "version": "5.0.3", "inBundle": true, "license": "MIT", "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" } }, "node_modules/aws-cdk-lib/node_modules/case": { @@ -1356,11 +1430,6 @@ "inBundle": true, "license": "MIT" }, - "node_modules/aws-cdk-lib/node_modules/concat-map": { - "version": "0.0.1", - "inBundle": true, - "license": "MIT" - }, "node_modules/aws-cdk-lib/node_modules/emoji-regex": { "version": "8.0.0", "inBundle": true, @@ -1371,8 +1440,23 @@ "inBundle": true, "license": "MIT" }, + "node_modules/aws-cdk-lib/node_modules/fast-uri": { + "version": "3.1.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "inBundle": true, + "license": "BSD-3-Clause" + }, "node_modules/aws-cdk-lib/node_modules/fs-extra": { - "version": "11.1.1", + "version": "11.3.3", "inBundle": true, "license": "MIT", "dependencies": { @@ -1390,7 +1474,7 @@ "license": "ISC" }, "node_modules/aws-cdk-lib/node_modules/ignore": { - "version": "5.2.4", + "version": "5.3.2", "inBundle": true, "license": "MIT", "engines": { @@ -1411,7 +1495,7 @@ "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/jsonfile": { - "version": "6.1.0", + "version": "6.2.0", "inBundle": true, "license": "MIT", "dependencies": { @@ -1422,7 +1506,7 @@ } }, "node_modules/aws-cdk-lib/node_modules/jsonschema": { - "version": "1.4.1", + "version": "1.5.0", "inBundle": true, "license": "MIT", "engines": { @@ -1434,30 +1518,41 @@ "inBundle": true, "license": "MIT" }, - "node_modules/aws-cdk-lib/node_modules/lru-cache": { - "version": "6.0.0", + "node_modules/aws-cdk-lib/node_modules/mime-db": { + "version": "1.52.0", "inBundle": true, - "license": "ISC", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/aws-cdk-lib/node_modules/mime-types": { + "version": "2.1.35", + "inBundle": true, + "license": "MIT", "dependencies": { - "yallist": "^4.0.0" + "mime-db": "1.52.0" }, "engines": { - "node": ">=10" + "node": ">= 0.6" } }, "node_modules/aws-cdk-lib/node_modules/minimatch": { - "version": "3.1.2", + "version": "10.2.2", "inBundle": true, - "license": "ISC", + "license": "BlueOak-1.0.0", "dependencies": { - "brace-expansion": "^1.1.7" + "brace-expansion": "^5.0.2" }, "engines": { - "node": "*" + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/aws-cdk-lib/node_modules/punycode": { - "version": "2.3.0", + "version": "2.3.1", "inBundle": true, "license": "MIT", "engines": { @@ -1473,12 +1568,9 @@ } }, "node_modules/aws-cdk-lib/node_modules/semver": { - "version": "7.5.4", + "version": "7.7.4", "inBundle": true, "license": "ISC", - "dependencies": { - "lru-cache": "^6.0.0" - }, "bin": { "semver": "bin/semver.js" }, @@ -1527,7 +1619,7 @@ } }, "node_modules/aws-cdk-lib/node_modules/table": { - "version": "6.8.1", + "version": "6.9.0", "inBundle": true, "license": "BSD-3-Clause", "dependencies": { @@ -1542,26 +1634,13 @@ } }, "node_modules/aws-cdk-lib/node_modules/universalify": { - "version": "2.0.0", + "version": "2.0.1", "inBundle": true, "license": "MIT", "engines": { "node": ">= 10.0.0" } }, - "node_modules/aws-cdk-lib/node_modules/uri-js": { - "version": "4.4.1", - "inBundle": true, - "license": "BSD-2-Clause", - "dependencies": { - "punycode": "^2.1.0" - } - }, - "node_modules/aws-cdk-lib/node_modules/yallist": { - "version": "4.0.0", - "inBundle": true, - "license": "ISC" - }, "node_modules/aws-cdk-lib/node_modules/yaml": { "version": "1.10.2", "inBundle": true, @@ -1690,12 +1769,14 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, "license": "MIT" }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "dev": true, "license": "MIT", "dependencies": { "balanced-match": "^1.0.0", @@ -1925,13 +2006,13 @@ "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, "license": "MIT" }, "node_modules/constructs": { - "version": "10.4.2", - "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.4.2.tgz", - "integrity": "sha512-wsNxBlAott2qg8Zv87q3eYZYgheb9lchtBfjHzzLHtXbttwSrHPs1NNQbBrmbb1YZvYg2+Vh0Dor76w4mFxJkA==", - "license": "Apache-2.0" + "version": "10.5.1", + "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.5.1.tgz", + "integrity": "sha512-f/TfFXiS3G/yVIXDjOQn9oTlyu9Wo7Fxyjj7lb8r92iO81jR2uST+9MstxZTmDGx/CgIbxCXkFXgupnLTNxQZg==" }, "node_modules/convert-source-map": { "version": "2.0.0", @@ -2038,11 +2119,10 @@ } }, "node_modules/diff": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", - "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.4.tgz", + "integrity": "sha512-X07nttJQkwkfKfvTPG/KSnE2OMdcUCao6+eXF3wmnIQRn2aPAHH3VxDbDOdegkd6JbPsXqShpvEOHfAT+nCNwQ==", "dev": true, - "license": "BSD-3-Clause", "engines": { "node": ">=0.3.1" } @@ -3209,11 +3289,10 @@ "license": "MIT" }, "node_modules/js-yaml": { - "version": "3.14.1", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", - "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "version": "3.14.2", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz", + "integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==", "dev": true, - "license": "MIT", "dependencies": { "argparse": "^1.0.7", "esprima": "^4.0.0" @@ -3390,10 +3469,10 @@ } }, "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "license": "ISC", + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.3.tgz", + "integrity": "sha512-M2GCs7Vk83NxkUyQV1bkABc4yxgz9kILhHImZiBPAZ9ybuvCb0/H7lEl5XvIg3g+9d4eNotkZA5IWwYl0tibaA==", + "dev": true, "dependencies": { "brace-expansion": "^1.1.7" }, @@ -3783,6 +3862,7 @@ "version": "6.3.1", "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, "license": "ISC", "bin": { "semver": "bin/semver.js" diff --git a/cdk/package.json b/cdk/package.json index f1e769db..7cc118ae 100644 --- a/cdk/package.json +++ b/cdk/package.json @@ -13,14 +13,14 @@ "devDependencies": { "@types/jest": "^29.5.3", "@types/node": "20.4.10", + "aws-cdk": "2.92.0", "jest": "^29.6.2", "ts-jest": "^29.1.1", - "aws-cdk": "2.92.0", "ts-node": "^10.9.1", "typescript": "~5.1.6" }, "dependencies": { - "aws-cdk-lib": "2.92.0", + "aws-cdk-lib": "^2.240.0", "constructs": "^10.0.0", "source-map-support": "^0.5.21" } diff --git a/pyproject.toml b/pyproject.toml index b7489a03..ef2b0121 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ test = [ dev = [ "black>=24.3.0,<27.0.0", "ruff>=0.3.0", + "boto3-stubs~=1.42.49", ] [build-system] @@ -56,3 +57,4 @@ known-first-party = ["s3_encryption"] [tool.ruff.lint.per-file-ignores] "test/**/*.py" = ["D100", "D101", "D102", "D103", "D104", "E501"] +"src/s3_encryption/pipelines.py" = ["E501"] diff --git a/src/s3_encryption/__init__.py b/src/s3_encryption/__init__.py index 064096bb..a3558195 100644 --- a/src/s3_encryption/__init__.py +++ b/src/s3_encryption/__init__.py @@ -9,6 +9,7 @@ from botocore.response import StreamingBody from .exceptions import S3EncryptionClientError +from .instruction_file import parse_instruction_file from .materials.crypto_materials_manager import ( AbstractCryptoMaterialsManager, DefaultCryptoMaterialsManager, @@ -25,6 +26,16 @@ class S3EncryptionClientConfig: keyring: AbstractKeyring cmm: AbstractCryptoMaterialsManager = field() + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=implementation + ##% The S3EC SHOULD support providing a custom Instruction File suffix + ##% on GetObject requests, regardless of whether or not re-encryption is supported. + + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=implementation + ##% The default Instruction File behavior uses the same S3 object key + ##% as its associated object suffixed with ".instruction". + instruction_file_suffix: str = field(default=".instruction") @cmm.default def _default_cmm_for_keyring(self): @@ -56,6 +67,11 @@ def on_put_object_before_call(self, params, **kwargs): params: Dictionary of parameters for the PutObject call (after serialization) **kwargs: Additional event arguments """ + if getattr(self._context, "instruction_file_mode", False): + raise S3EncryptionClientError( + "Instruction file mode is exclusively for reading instruction files " + "and not supported in put_object!" + ) # At this point, boto3 has already serialized the Body # Extract the serialized body from the request body = params.get("body") @@ -101,6 +117,11 @@ def on_get_object_after_call(self, parsed, **kwargs): parsed: Dictionary containing the parsed response **kwargs: Additional event arguments (includes 'params' with request parameters) """ + # Check if plaintext mode is enabled via thread-local flag + if getattr(self._context, "instruction_file_mode", False): + self.process_instruction_file(parsed) + return + # Get encryption context from thread-local storage (set by get_object wrapper) encryption_context = getattr(self._context, "encryption_context", None) @@ -114,12 +135,48 @@ def on_get_object_after_call(self, parsed, **kwargs): } # Create a pipeline and decrypt the data - pipeline = GetEncryptedObjectPipeline(self.config.cmm) - decrypted_data = pipeline.decrypt(response, encryption_context) + pipeline = GetEncryptedObjectPipeline( + self.config.cmm, + s3_client=getattr(self._context, "s3_client", None), + ) + decrypted_data = pipeline.decrypt( + response, + encryption_context, + bucket=getattr(self._context, "bucket", None), + key=getattr(self._context, "key", None), + instruction_suffix=self.config.instruction_file_suffix, + ) - # Replace body with decrypted data + # Create a new streaming body with the decrypted data stream = io.BytesIO(decrypted_data) streaming_body = StreamingBody(stream, len(decrypted_data)) + + # Replace body with decrypted data + parsed["Body"] = streaming_body + + def process_instruction_file(self, parsed): + """Process instruction file in plaintext mode. + + Validates the instruction file marker, parses the JSON body, + and updates the response metadata with parsed content. + + Args: + parsed: Dictionary containing the parsed response + """ + instruction_key = getattr(self._context, "key", None) + + # In plaintext mode, parse instruction file and append to metadata + existing_metadata = parsed.get("Metadata", {}) + instruction_data = parsed.get("Body").read() + instruction_metadata = parse_instruction_file(instruction_data, instruction_key) + + # Append parsed instruction file content to existing metadata + existing_metadata.update(instruction_metadata) + parsed["Metadata"] = existing_metadata + + # Clear the body since instruction files shouldn't return body content + stream = io.BytesIO(b"") + streaming_body = StreamingBody(stream, 0) parsed["Body"] = streaming_body @@ -143,6 +200,9 @@ def __attrs_post_init__(self): # Create the plugin object.__setattr__(self, "_plugin", S3EncryptionClientPlugin(self.config)) + # Expose plugin context on wrapped client for instruction file fetching + self.wrapped_s3_client._s3ec_plugin_context = self._plugin._context + # Register event handlers using boto3's event system event_system = self.wrapped_s3_client.meta.events event_system.register("before-call.s3.PutObject", self._plugin.on_put_object_before_call) @@ -207,6 +267,11 @@ def get_object(self, **kwargs): # Store encryption context in thread-local storage for the event handler self._plugin._context.encryption_context = encryption_context + # Store wrapped client in thread-local storage for + # the event handler to fetch instruction files + self._plugin._context.s3_client = self.wrapped_s3_client + self._plugin._context.bucket = kwargs.get("Bucket") + self._plugin._context.key = kwargs.get("Key") try: return self.wrapped_s3_client.get_object(**kwargs) @@ -217,6 +282,9 @@ def get_object(self, **kwargs): # Wrap any unexpected errors during decryption raise S3EncryptionClientError(f"Failed to decrypt object: {str(e)}") from e finally: - # Clean up thread-local storage - if hasattr(self._plugin._context, "encryption_context"): - delattr(self._plugin._context, "encryption_context") + # Clean up thread-local storage; + # do not clean up the client as it is not thread local only + attrs = ["encryption_context", "Bucket", "Key"] + for attr in attrs: + if hasattr(self._plugin._context, attr): + delattr(self._plugin._context, attr) diff --git a/src/s3_encryption/instruction_file.py b/src/s3_encryption/instruction_file.py new file mode 100644 index 00000000..351c4b15 --- /dev/null +++ b/src/s3_encryption/instruction_file.py @@ -0,0 +1,117 @@ +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Instruction file handling for S3 Encryption Client. + +This module provides utilities for fetching and parsing instruction files +that contain encryption metadata for S3 objects. +""" + +import json +from typing import Any + +from .exceptions import S3EncryptionClientError +from .metadata import VALID_S3EC_METADATA_KEYS + + +def parse_instruction_file(instruction_data: bytes, key: str) -> dict[str, Any]: + """Parse and validate instruction file data. + + This function strictly validates that: + 1. The instruction file body is valid JSON + 2. The JSON contains only S3 Encryption Client metadata keys + + Args: + instruction_data: Raw bytes from instruction file body + key: Instruction file key (for error messages) + + Returns: + dict: Parsed JSON metadata from instruction file + + Raises: + S3EncryptionClientError: If the instruction file is not valid JSON + or contains non-S3EC metadata keys + """ + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=implementation + ##% The content metadata stored in the Instruction File MUST be serialized to a JSON string. + + # Validate JSON format + try: + metadata = json.loads(instruction_data) + except json.JSONDecodeError as e: + raise S3EncryptionClientError(f"Instruction file is not valid JSON: {key}") from e + + # Validate that it's a dictionary + if not isinstance(metadata, dict): + raise S3EncryptionClientError( + f"Instruction file must contain a JSON object, " f"got {type(metadata).__name__}: {key}" + ) + + # Validate that all keys are S3EC metadata keys + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=implementation + ##% The serialized JSON string MUST be the only contents of the Instruction File. + invalid_keys = set(metadata.keys()) - VALID_S3EC_METADATA_KEYS + if invalid_keys: + raise S3EncryptionClientError( + f"Instruction file contains invalid keys: {invalid_keys} in {key}" + ) + + return metadata + + +def fetch_instruction_file(s3_client, bucket: str, key: str) -> dict[str, Any]: + """Fetch and parse an instruction file from S3. + + This function: + 1. Fetches the instruction file in plaintext mode + 2. Returns the parsed metadata from the response Metadata field + + S3EncryptionClientPlugin's event handler (on_get_object_after_call) handles: + - Parsing and validating the instruction file content + - Placing parsed metadata in response["Metadata"] + + Args: + s3_client: Boto3 S3 client to use for fetching + bucket: S3 bucket name + key: S3 object key + Returns: + dict: Parsed JSON metadata from instruction file + + Raises: + S3EncryptionClientError: If the instruction file is not valid JSON, + or contains non-S3EC metadata keys + """ + # Set plaintext mode flag in thread-local context before calling get_object + # This will be checked by the event handler to skip decryption + if hasattr(s3_client, "_s3ec_plugin_context"): + s3_client._s3ec_plugin_context.instruction_file_mode = True + s3_client._s3ec_plugin_context.key = key + else: + raise S3EncryptionClientError( + f"Could not fetch instruction file without " + f"the S3 Encryption Client Plugin installed. Instruction key: {key}" + ) + + try: + response = s3_client.get_object(Bucket=bucket, Key=key) + finally: + # Clear the flags after the call + if hasattr(s3_client, "_s3ec_plugin_context"): + s3_client._s3ec_plugin_context.instruction_file_mode = False + + # In plaintext mode, the event handler places parsed metadata in Metadata field + metadata = response.get("Metadata", {}) + + # Verify metadata is not empty + if not metadata: + raise S3EncryptionClientError(f"Instruction file returned empty metadata: {key}") + + # Verify metadata contains at least one S3EC key + has_s3ec_key = any(key in VALID_S3EC_METADATA_KEYS for key in metadata) + if not has_s3ec_key: + raise S3EncryptionClientError( + f"Instruction file metadata does not contain any S3EC keys: {key}" + ) + + return metadata diff --git a/src/s3_encryption/metadata.py b/src/s3_encryption/metadata.py index f42feadb..5c8bbda3 100644 --- a/src/s3_encryption/metadata.py +++ b/src/s3_encryption/metadata.py @@ -48,6 +48,15 @@ class ObjectMetadata: # Marker for instruction files instruction_file: str | None = field(default=None) + # V3 format fields (compressed) + content_cipher_v3: str | None = field(default=None) + encrypted_data_key_v3: str | None = field(default=None) + mat_desc_v3: str | None = field(default=None) + encryption_context_v3: str | None = field(default=None) + encrypted_data_key_algorithm_v3: str | None = field(default=None) + key_commitment_v3: str | None = field(default=None) + message_id_v3: str | None = field(default=None) + # Constants for metadata keys ENCRYPTED_DATA_KEY_V1 = "x-amz-key" ENCRYPTED_DATA_KEY_V2 = "x-amz-key-v2" @@ -58,6 +67,15 @@ class ObjectMetadata: CONTENT_CIPHER_TAG_LENGTH = "x-amz-tag-len" INSTRUCTION_FILE = "x-amz-crypto-instr-file" + # V3 format constants (compressed) + CONTENT_CIPHER_V3 = "x-amz-c" + ENCRYPTED_DATA_KEY_V3 = "x-amz-3" + MAT_DESC_V3 = "x-amz-m" + ENCRYPTION_CONTEXT_V3 = "x-amz-t" + ENCRYPTED_DATA_KEY_ALGORITHM_V3 = "x-amz-w" + KEY_COMMITMENT_V3 = "x-amz-d" + MESSAGE_ID_V3 = "x-amz-i" + @classmethod def from_dict(cls, metadata_dict: dict[str, Any]) -> "ObjectMetadata": """Create an ObjectMetadata instance from a dictionary. @@ -84,6 +102,13 @@ def from_dict(cls, metadata_dict: dict[str, Any]) -> "ObjectMetadata": content_cipher=metadata_dict.get(cls.CONTENT_CIPHER), content_cipher_tag_length=metadata_dict.get(cls.CONTENT_CIPHER_TAG_LENGTH), instruction_file=metadata_dict.get(cls.INSTRUCTION_FILE), + content_cipher_v3=metadata_dict.get(cls.CONTENT_CIPHER_V3), + encrypted_data_key_v3=metadata_dict.get(cls.ENCRYPTED_DATA_KEY_V3), + mat_desc_v3=metadata_dict.get(cls.MAT_DESC_V3), + encryption_context_v3=metadata_dict.get(cls.ENCRYPTION_CONTEXT_V3), + encrypted_data_key_algorithm_v3=metadata_dict.get(cls.ENCRYPTED_DATA_KEY_ALGORITHM_V3), + key_commitment_v3=metadata_dict.get(cls.KEY_COMMITMENT_V3), + message_id_v3=metadata_dict.get(cls.MESSAGE_ID_V3), ) def to_dict(self) -> dict[str, str]: @@ -118,4 +143,145 @@ def to_dict(self) -> dict[str, str]: if self.instruction_file is not None: result[self.INSTRUCTION_FILE] = self.instruction_file + if self.content_cipher_v3 is not None: + result[self.CONTENT_CIPHER_V3] = self.content_cipher_v3 + + if self.encrypted_data_key_v3 is not None: + result[self.ENCRYPTED_DATA_KEY_V3] = self.encrypted_data_key_v3 + + if self.mat_desc_v3 is not None: + result[self.MAT_DESC_V3] = self.mat_desc_v3 + + if self.encryption_context_v3 is not None: + result[self.ENCRYPTION_CONTEXT_V3] = self.encryption_context_v3 + + if self.encrypted_data_key_algorithm_v3 is not None: + result[self.ENCRYPTED_DATA_KEY_ALGORITHM_V3] = self.encrypted_data_key_algorithm_v3 + + if self.key_commitment_v3 is not None: + result[self.KEY_COMMITMENT_V3] = self.key_commitment_v3 + + if self.message_id_v3 is not None: + result[self.MESSAGE_ID_V3] = self.message_id_v3 + return result + + def is_v1_format(self) -> bool: + """Check if metadata is in V1 format. + + Returns: + bool: True if metadata contains V1 keys and excludes V2/V3 keys + """ + return ( + self.content_iv is not None + and self.encrypted_data_key_context is not None + and self.encrypted_data_key_v1 is not None + and self.encrypted_data_key_v2 is None + ) + + def is_v2_format(self) -> bool: + """Check if metadata is in V2 format. + + Returns: + bool: True if metadata contains V2 keys and excludes V1/V3 keys + """ + return ( + self.content_cipher is not None + and self.content_iv is not None + and self.encrypted_data_key_algorithm is not None + and self.encrypted_data_key_v2 is not None + and self.encrypted_data_key_v1 is None + ) + + def is_v3_format(self) -> bool: + """Check if metadata is in V3 format. + + Returns: + bool: True if metadata contains V3 keys and excludes V1/V2 keys + """ + return ( + self.content_cipher_v3 is not None + and self.encrypted_data_key_algorithm_v3 is not None + and self.key_commitment_v3 is not None + and self.message_id_v3 is not None + and self.encrypted_data_key_v3 is not None + and self.encrypted_data_key_v2 is None + and self.encrypted_data_key_v1 is None + ) + + def has_exclusive_key_collision(self) -> bool: + """Check if metadata has multiple exclusive version keys. + + Returns: + bool: True if more than one version key (V1, V2, V3) is present + """ + has_v1_key = self.encrypted_data_key_v1 is not None + has_v2_key = self.encrypted_data_key_v2 is not None + has_v3_key = self.encrypted_data_key_v3 is not None + + exclusive_key_count = sum([has_v1_key, has_v2_key, has_v3_key]) + return exclusive_key_count > 1 + + def is_v3_in_object_metadata(self) -> bool: + """Check if V3 content keys are in object metadata (without encrypted data key). + + ##= specification/s3-encryption/data-format/metadata-strategy.md#v3-instruction-files + ##= type=implementation + ##% In the V3 message format, only the content metadata related to + ##% the encrypted data is stored in the Instruction File. + ##% In the V3 message format, the content metadata related to + ##% the encrypted content is stored in the Object Metadata. + + Returns: + bool: True if V3 content keys present but no encrypted data key + """ + return ( + self.content_cipher_v3 is not None + and self.key_commitment_v3 is not None + and self.message_id_v3 is not None + and self.encrypted_data_key_v3 is None + ) + + ##= specification/s3-encryption/data-format/content-metadata.md#determining-s3ec-object-status + ##= type=implementation + ##% If the object matches none of the V1/V2/V3 formats, + ##% the S3EC MUST attempt to get the instruction file. + def should_use_instruction_file(self) -> bool: + """Check if instruction file should be used for decryption. + + Returns: + bool: True if instruction file should be fetched + """ + # V3 with content keys but no encrypted data key -> instruction file + if self.is_v3_in_object_metadata(): + return True + + # No version keys at all -> try instruction file for V1/V2 + has_any_key = ( + self.encrypted_data_key_v1 is not None + or self.encrypted_data_key_v2 is not None + or self.encrypted_data_key_v3 is not None + ) + return not has_any_key + + +# Valid S3 Encryption Client metadata keys +VALID_S3EC_METADATA_KEYS = { + # V1/V2 format keys + "x-amz-key", + "x-amz-key-v2", + "x-amz-wrap-alg", + "x-amz-matdesc", + "x-amz-iv", + "x-amz-cek-alg", + "x-amz-tag-len", + "x-amz-crypto-instr-file", + # V3 format keys (compressed) + "x-amz-c", + "x-amz-3", + "x-amz-m", + "x-amz-t", + "x-amz-w", + "x-amz-d", + "x-amz-i", +} diff --git a/src/s3_encryption/pipelines.py b/src/s3_encryption/pipelines.py index 3a83a359..02a5a9c9 100644 --- a/src/s3_encryption/pipelines.py +++ b/src/s3_encryption/pipelines.py @@ -12,6 +12,8 @@ from attrs import define, field from cryptography.hazmat.primitives.ciphers.aead import AESGCM +from .exceptions import S3EncryptionClientError +from .instruction_file import fetch_instruction_file from .materials.crypto_materials_manager import AbstractCryptoMaterialsManager from .materials.encrypted_data_key import EncryptedDataKey from .materials.materials import DecryptionMaterials, EncryptionMaterials @@ -90,13 +92,24 @@ class GetEncryptedObjectPipeline: """ cmm: AbstractCryptoMaterialsManager = field() - - def decrypt(self, response, encryption_context=None): + s3_client: object = field(default=None) + + def decrypt( + self, + response, + encryption_context=None, + bucket=None, + key=None, + instruction_suffix=".instruction", + ): """Decrypt the data after it is retrieved from S3. Args: response (dict): The response from S3 containing the encrypted data and metadata encryption_context (dict, optional): Additional context for decryption + bucket (str, optional): S3 bucket name (required for instruction file) + key (str, optional): S3 object key (required for instruction file) + instruction_suffix(str, optional): suffix for instruction file; defaults to ".instruction". Returns: bytes: The decrypted data @@ -111,44 +124,45 @@ def decrypt(self, response, encryption_context=None): if encryption_context is None: encryption_context = {} - iv_b64 = metadata.content_iv - edk_b64 = metadata.encrypted_data_key_v2 - - # TODO: probably move this to ObjectMetadata - iv_bytes = base64.b64decode(iv_b64) - - # Create a list of encrypted data keys to try - encrypted_data_keys = [] - # Create an instance of EncryptedDataKey - if edk_b64: - edk_bytes = base64.b64decode(edk_b64) - encrypted_data_key = EncryptedDataKey( - key_provider_id=b"S3Keyring", - key_provider_info=metadata.encrypted_data_key_algorithm, - encrypted_data_key=edk_bytes, - ) - encrypted_data_keys.append(encrypted_data_key) - - # Also check for legacy encrypted data key (v1) if available - if metadata.encrypted_data_key_v1: - legacy_edk_bytes = base64.b64decode(metadata.encrypted_data_key_v1) - legacy_encrypted_data_key = EncryptedDataKey( - key_provider_id=b"S3Keyring", - key_provider_info=metadata.encrypted_data_key_algorithm, - encrypted_data_key=legacy_edk_bytes, + # Check if we need to fetch instruction file + if metadata.should_use_instruction_file(): + + if self.s3_client is None: + raise S3EncryptionClientError("s3_client required to fetch instruction file") + if bucket is None or key is None: + raise S3EncryptionClientError("Bucket and key required to fetch instruction file") + + instruction_key = key + instruction_suffix + instruction_metadata = fetch_instruction_file(self.s3_client, bucket, instruction_key) + instruction_metadata.update(encryption_metadata) + metadata = ObjectMetadata.from_dict(instruction_metadata) + ##= specification/s3-encryption/data-format/metadata-strategy.md#v1-v2-instruction-files + ##= type=implementation + ##% In the V1/V2 message format, all of the content metadata + ##% MUST be stored in the Instruction File. + if metadata.is_v1_format() or metadata.is_v2_format(): + object_metadata = ObjectMetadata.from_dict(encryption_metadata) + if not ( + object_metadata.content_cipher is None + and object_metadata.content_iv is None + and object_metadata.encrypted_data_key_algorithm is None + ): + raise S3EncryptionClientError( + "Content metadata found in object metadata for V1 or V2 message format " + "BUT Instruction File is being used. This is an illegal combination. " + f"bucket: {bucket}\n key:{key}\n instruction_file:{instruction_key}" + ) + # Determine which format we're dealing with and get decryption materials + if metadata.is_v1_format(): + dec_materials = self._decrypt_v1(metadata, encryption_context) + elif metadata.is_v2_format(): + dec_materials = self._decrypt_v2(metadata, encryption_context) + elif metadata.is_v3_format(): + dec_materials = self._decrypt_v3(metadata, encryption_context) + else: + raise S3EncryptionClientError( + "Unable to determine S3 Encryption Client message format." ) - encrypted_data_keys.append(legacy_encrypted_data_key) - - # Create a DecryptionMaterials instance - dec_materials = DecryptionMaterials( - iv=iv_bytes, - encrypted_data_keys=encrypted_data_keys, - encryption_context_stored=metadata.encrypted_data_key_context or {}, - encryption_context_from_request=encryption_context or {}, - ) - - # Get decryption materials from the crypto materials manager - dec_materials = self.cmm.decrypt_materials(dec_materials) ##= specification/s3-encryption/decryption.md#cbc-decryption ##= type=TODO @@ -157,6 +171,51 @@ def decrypt(self, response, encryption_context=None): ##% the S3EC MUST throw an error which details that client was ##% not configured to decrypt objects with ALG_AES_256_CBC_IV16_NO_KDF. + # Perform decryption aesgcm = AESGCM(dec_materials.plaintext_data_key) + return aesgcm.decrypt(nonce=dec_materials.iv, data=encrypted_data, associated_data=None) + + def _decrypt_v2(self, metadata, encryption_context) -> DecryptionMaterials: + """Prepare V2 decryption materials.""" + iv_bytes = base64.b64decode(metadata.content_iv) + edk_bytes = base64.b64decode(metadata.encrypted_data_key_v2) + + encrypted_data_key = EncryptedDataKey( + key_provider_id=b"S3Keyring", + key_provider_info=metadata.encrypted_data_key_algorithm, + encrypted_data_key=edk_bytes, + ) + + dec_materials = DecryptionMaterials( + iv=iv_bytes, + encrypted_data_keys=[encrypted_data_key], + encryption_context_stored=metadata.encrypted_data_key_context or {}, + encryption_context_from_request=encryption_context, + ) + + return self.cmm.decrypt_materials(dec_materials) + + def _decrypt_v1(self, metadata, encryption_context) -> DecryptionMaterials: + """Prepare V1 decryption materials.""" + iv_bytes = base64.b64decode(metadata.content_iv) + edk_bytes = base64.b64decode(metadata.encrypted_data_key_v1) + + encrypted_data_key = EncryptedDataKey( + key_provider_id=b"S3Keyring", + key_provider_info=metadata.encrypted_data_key_algorithm, + encrypted_data_key=edk_bytes, + ) + + dec_materials = DecryptionMaterials( + iv=iv_bytes, + encrypted_data_keys=[encrypted_data_key], + encryption_context_stored=metadata.encrypted_data_key_context or {}, + encryption_context_from_request=encryption_context, + ) + + return self.cmm.decrypt_materials(dec_materials) - return aesgcm.decrypt(nonce=iv_bytes, data=encrypted_data, associated_data=None) + def _decrypt_v3(self, metadata, encryption_context) -> DecryptionMaterials: + """Prepare V3 decryption materials.""" + # TODO: Implement V3 decryption + raise NotImplementedError("V3 decryption not yet implemented") diff --git a/test/integration/test_i_s3_encryption_instruction_file.py b/test/integration/test_i_s3_encryption_instruction_file.py new file mode 100644 index 00000000..467ddc7a --- /dev/null +++ b/test/integration/test_i_s3_encryption_instruction_file.py @@ -0,0 +1,151 @@ +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import os + +import boto3 +import pytest + +from s3_encryption import S3EncryptionClient, S3EncryptionClientConfig +from s3_encryption.materials.kms_keyring import KmsKeyring + +# Static test objects bucket +bucket = os.environ.get("CI_S3_STATIC_TEST_BUCKET", "s3ec-static-test-objects") +region = os.environ.get("CI_AWS_REGION", "us-west-2") +# KMS key used for static test objects (S3ECTestServerKMSKey) +kms_key_id = os.environ.get( + "CI_KMS_KEY_STATIC_TESTS", + "arn:aws:kms:us-west-2:370957321024:key/a3889cd9-99eb-4138-a93a-aea9d52ec2ef", +) + +# Static test object keys created by Java S3EC V4 +TEST_OBJECTS = { + "v1_instruction_file": "static-v1-instruction-file-from-java-v1", + "v2_instruction_file": "static-v2-instruction-file-from-java-v4", + "v3_instruction_file": "static-v3-instruction-file-from-java-v4", + "negative_v2_instruction_file": "NEGATIVE-static-v2-instruction-file-test-from-java-v4", +} + + +# TODO(cbc): enable once CBC decryption is implemented +@pytest.mark.skip(reason="V1 CBC decryption not yet implemented") +def test_decrypt_v1_instruction_file(): + """Test decrypting V1 object with instruction file. + + V1 format uses ALG_AES_256_CBC_IV16_NO_KDF (CBC mode, no key commitment). + Object encrypted by Java S3EC V1 with instruction file enabled. + """ + key = TEST_OBJECTS["v1_instruction_file"] + + kms_client = boto3.client("kms", region_name=region) + keyring = KmsKeyring(kms_client, kms_key_id, enable_legacy_wrapping_algorithms=True) + wrapped_client = boto3.client("s3") + config = S3EncryptionClientConfig(keyring) + s3ec = S3EncryptionClient(wrapped_client, config) + + response = s3ec.get_object(Bucket=bucket, Key=key) + output = response["Body"].read().decode("utf-8") + + assert output == "static-v1-instruction-file-from-java-v1" + print("Success! V1 instruction file decryption completed.") + + +def test_decrypt_v2_instruction_file(): + """Test decrypting V2 object with instruction file. + + V2 format uses ALG_AES_256_GCM_IV12_TAG16_NO_KDF (no key commitment). + Object encrypted by Java S3EC V4 with instruction file enabled. + """ + key = TEST_OBJECTS["v2_instruction_file"] + + kms_client = boto3.client("kms", region_name=region) + keyring = KmsKeyring(kms_client, kms_key_id) + wrapped_client = boto3.client("s3") + config = S3EncryptionClientConfig(keyring) + s3ec = S3EncryptionClient(wrapped_client, config) + + response = s3ec.get_object(Bucket=bucket, Key=key) + output = response["Body"].read().decode("utf-8") + + assert output == "static-v2-instruction-file-from-java-v4" + print("Success! V2 instruction file decryption completed.") + + +# TODO(v3): enable once v3 is implemented +@pytest.mark.skip(reason="V3 decryption not yet implemented") +def test_decrypt_v3_instruction_file(): + """Test decrypting V3 object with instruction file. + + V3 format uses ALG_AES_256_GCM_HKDF_SHA512_COMMIT_KEY (with key commitment). + Object encrypted by Java S3EC V4 with instruction file enabled. + """ + key = TEST_OBJECTS["v3_instruction_file"] + + kms_client = boto3.client("kms", region_name=region) + keyring = KmsKeyring(kms_client, kms_key_id) + wrapped_client = boto3.client("s3") + config = S3EncryptionClientConfig(keyring) + s3ec = S3EncryptionClient(wrapped_client, config) + + response = s3ec.get_object(Bucket=bucket, Key=key) + output = response["Body"].read().decode("utf-8") + + assert output != "static-v3-instruction-file-from-java-v4" + print("Success! V3 instruction file decryption completed.") + + +def test_decrypt_invalid_instruction_file(): + """Test that decrypting with an invalid instruction file raises an error. + + The NEGATIVE test object has an invalid instruction file that should + cause the S3 Encryption Client to raise an exception during decryption. + """ + from s3_encryption.exceptions import S3EncryptionClientError + + key = TEST_OBJECTS["negative_v2_instruction_file"] + + kms_client = boto3.client("kms", region_name=region) + keyring = KmsKeyring(kms_client, kms_key_id) + wrapped_client = boto3.client("s3") + config = S3EncryptionClientConfig(keyring) + s3ec = S3EncryptionClient(wrapped_client, config) + + with pytest.raises(S3EncryptionClientError) as exc_info: + s3ec.get_object(Bucket=bucket, Key=key) + + print(f"Error message: {exc_info.value}") + + +# TODO(v3): enable once v3 is implemented +@pytest.mark.skip(reason="V3 decryption not yet implemented") +def test_decrypt_v3_instruction_file_custom_suffix(): + """Test decrypting V3 object with a custom instruction file suffix.""" + key = TEST_OBJECTS["v3_instruction_file"] + + kms_client = boto3.client("kms", region_name=region) + keyring = KmsKeyring(kms_client, kms_key_id) + wrapped_client = boto3.client("s3") + config = S3EncryptionClientConfig(keyring, instruction_file_suffix=".custom-suffix-instruction") + s3ec = S3EncryptionClient(wrapped_client, config) + + response = s3ec.get_object(Bucket=bucket, Key=key) + output = response["Body"].read().decode("utf-8") + + assert output == "static-v3-instruction-file-from-java-v4" + print("Success! V3 custom suffix instruction file decryption completed.") + + +def test_decrypt_v2_instruction_file_custom_suffix(): + """Test decrypting V2 object with a custom instruction file suffix.""" + key = TEST_OBJECTS["v2_instruction_file"] + + kms_client = boto3.client("kms", region_name=region) + keyring = KmsKeyring(kms_client, kms_key_id) + wrapped_client = boto3.client("s3") + config = S3EncryptionClientConfig(keyring, instruction_file_suffix=".custom-suffix-instruction") + s3ec = S3EncryptionClient(wrapped_client, config) + + response = s3ec.get_object(Bucket=bucket, Key=key) + output = response["Body"].read().decode("utf-8") + + assert output == "static-v2-instruction-file-from-java-v4" + print("Success! V2 custom suffix instruction file decryption completed.") diff --git a/test/test_metadata.py b/test/test_metadata.py index a061c185..ba783bf5 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -79,3 +79,160 @@ def test_roundtrip(self): # Verify that the result matches the original assert result_dict == original_dict + + def test_from_dict_v3_fields(self): + # Create a metadata dictionary with V3 fields + metadata_dict = { + "x-amz-c": "02", + "x-amz-3": "encrypted-key-v3", + "x-amz-w": "12", + "x-amz-d": "key-commitment", + "x-amz-i": "message-id", + "x-amz-m": "mat-desc", + "x-amz-t": "encryption-context", + } + + metadata = ObjectMetadata.from_dict(metadata_dict) + + assert metadata.content_cipher_v3 == "02" + assert metadata.encrypted_data_key_v3 == "encrypted-key-v3" + assert metadata.encrypted_data_key_algorithm_v3 == "12" + assert metadata.key_commitment_v3 == "key-commitment" + assert metadata.message_id_v3 == "message-id" + assert metadata.mat_desc_v3 == "mat-desc" + assert metadata.encryption_context_v3 == "encryption-context" + + def test_to_dict_v3_fields(self): + # Create an ObjectMetadata instance with V3 fields + metadata = ObjectMetadata( + content_cipher_v3="02", + encrypted_data_key_v3="encrypted-key-v3", + encrypted_data_key_algorithm_v3="12", + key_commitment_v3="key-commitment", + message_id_v3="message-id", + mat_desc_v3="mat-desc", + encryption_context_v3="encryption-context", + ) + + metadata_dict = metadata.to_dict() + + assert metadata_dict["x-amz-c"] == "02" + assert metadata_dict["x-amz-3"] == "encrypted-key-v3" + assert metadata_dict["x-amz-w"] == "12" + assert metadata_dict["x-amz-d"] == "key-commitment" + assert metadata_dict["x-amz-i"] == "message-id" + assert metadata_dict["x-amz-m"] == "mat-desc" + assert metadata_dict["x-amz-t"] == "encryption-context" + + def test_is_v1_format(self): + metadata = ObjectMetadata( + content_iv="iv", + encrypted_data_key_context={"key": "value"}, + encrypted_data_key_v1="edk-v1", + ) + assert metadata.is_v1_format() is True + + # V2 key present should return False + metadata_v2 = ObjectMetadata( + content_iv="iv", + encrypted_data_key_context={"key": "value"}, + encrypted_data_key_v1="edk-v1", + encrypted_data_key_v2="edk-v2", + ) + assert metadata_v2.is_v1_format() is False + + def test_is_v2_format(self): + metadata = ObjectMetadata( + content_cipher="AES/GCM/NoPadding", + content_iv="iv", + encrypted_data_key_algorithm="kms+context", + encrypted_data_key_v2="edk-v2", + ) + assert metadata.is_v2_format() is True + + # V1 key present should return False + metadata_v1 = ObjectMetadata( + content_cipher="AES/GCM/NoPadding", + content_iv="iv", + encrypted_data_key_algorithm="kms+context", + encrypted_data_key_v2="edk-v2", + encrypted_data_key_v1="edk-v1", + ) + assert metadata_v1.is_v2_format() is False + + def test_is_v3_format(self): + metadata = ObjectMetadata( + content_cipher_v3="02", + encrypted_data_key_algorithm_v3="12", + key_commitment_v3="commitment", + message_id_v3="msg-id", + encrypted_data_key_v3="edk-v3", + ) + assert metadata.is_v3_format() is True + + # V1 or V2 keys present should return False + metadata_v2 = ObjectMetadata( + content_cipher_v3="02", + encrypted_data_key_algorithm_v3="12", + key_commitment_v3="commitment", + message_id_v3="msg-id", + encrypted_data_key_v3="edk-v3", + encrypted_data_key_v2="edk-v2", + ) + assert metadata_v2.is_v3_format() is False + + def test_has_exclusive_key_collision(self): + # No collision - only V2 + metadata_v2 = ObjectMetadata(encrypted_data_key_v2="edk-v2") + assert metadata_v2.has_exclusive_key_collision() is False + + # Collision - V1 and V2 + metadata_collision = ObjectMetadata( + encrypted_data_key_v1="edk-v1", + encrypted_data_key_v2="edk-v2", + ) + assert metadata_collision.has_exclusive_key_collision() is True + + # Collision - all three + metadata_all = ObjectMetadata( + encrypted_data_key_v1="edk-v1", + encrypted_data_key_v2="edk-v2", + encrypted_data_key_v3="edk-v3", + ) + assert metadata_all.has_exclusive_key_collision() is True + + ##= specification/s3-encryption/data-format/content-metadata.md#determining-s3ec-object-status + ##= type=test + ##% If the object matches none of the V1/V2/V3 formats, + ##% the S3EC MUST attempt to get the instruction file. + def test_should_use_instruction_file(self): + # No keys at all -> should use instruction file + metadata_empty = ObjectMetadata() + assert metadata_empty.should_use_instruction_file() is True + + # V3 in object metadata (has content keys but no EDK) -> instruction file + metadata_v3_partial = ObjectMetadata( + content_cipher_v3="02", + encrypted_data_key_algorithm_v3="12", + key_commitment_v3="commitment", + message_id_v3="msg-id", + ) + assert metadata_v3_partial.should_use_instruction_file() is True + + # V1 with EDK -> no instruction file needed + metadata_v1 = ObjectMetadata(encrypted_data_key_v1="edk-v1") + assert metadata_v1.should_use_instruction_file() is False + + # V2 with EDK -> no instruction file needed + metadata_v2 = ObjectMetadata(encrypted_data_key_v2="edk-v2") + assert metadata_v2.should_use_instruction_file() is False + + # V3 with EDK -> no instruction file needed + metadata_v3 = ObjectMetadata( + content_cipher_v3="02", + encrypted_data_key_algorithm_v3="12", + key_commitment_v3="commitment", + message_id_v3="msg-id", + encrypted_data_key_v3="edk-v3", + ) + assert metadata_v3.should_use_instruction_file() is False diff --git a/test/test_pipelines.py b/test/test_pipelines.py new file mode 100644 index 00000000..9f40cd5c --- /dev/null +++ b/test/test_pipelines.py @@ -0,0 +1,241 @@ +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import base64 +import json +import os +from io import BytesIO +from unittest.mock import Mock + +import pytest + +from s3_encryption.materials.crypto_materials_manager import DefaultCryptoMaterialsManager +from s3_encryption.materials.keyring import S3Keyring +from s3_encryption.pipelines import GetEncryptedObjectPipeline + + +class TestGetEncryptedObjectPipelineInstructionFile: + ##= specification/s3-encryption/data-format/metadata-strategy.md#v1-v2-instruction-files + ##= type=test + ##% In the V1/V2 message format, all of the content metadata + ##% MUST be stored in the Instruction File. + def test_decrypt_v1_from_instruction_file(self): + """Test decrypting V1 format with instruction file.""" + object_metadata = {"x-amz-meta-x-amz-unencrypted-content-length": "39"} + + # Instruction file contains all V1 metadata + instruction_file_metadata = { + "x-amz-iv": base64.b64encode(os.urandom(16)).decode("utf-8"), + "x-amz-key-v2": base64.b64encode(b"encrypted-key-data").decode("utf-8"), + "x-amz-wrap-alg": "kms", + "x-amz-matdesc": json.dumps({"kms_cmk_id": "test-key-id"}), + "x-amz-cek-alg": "AES/CBC/PKCS5Padding", + "x-amz-crypto-instr-file": "", + } + + # Create mock S3 client + mock_s3_client = Mock() + # Mock returns parsed metadata (simulating event handler behavior) + mock_s3_client.get_object.return_value = { + "Body": BytesIO(b""), # Body is cleared by event handler + "Metadata": instruction_file_metadata, + } + + # Create mock keyring and CMM + mock_keyring = Mock(spec=S3Keyring) + cmm = DefaultCryptoMaterialsManager(mock_keyring) + + # Create pipeline with mocked S3 client + pipeline = GetEncryptedObjectPipeline(cmm, mock_s3_client) + + # Create mock response + mock_response = { + "Body": BytesIO(b"encrypted-test-data"), + "Metadata": object_metadata, + } + + # Mock the keyring to raise an error so we don't actually decrypt + mock_keyring.on_decrypt.side_effect = Exception( + "Keyring called - instruction file was fetched" + ) + + # Should fail when trying to decrypt (proving instruction file was fetched) + with pytest.raises(Exception, match="Keyring called"): + pipeline.decrypt(mock_response, bucket="test-bucket", key="test-key") + + # Verify instruction file was fetched + mock_s3_client.get_object.assert_called_once_with( + Bucket="test-bucket", Key="test-key.instruction" + ) + + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=test + ##% The default Instruction File behavior uses the same S3 object key + ##% as its associated object suffixed with ".instruction". + def test_decrypt_v2_from_instruction_file(self): + """Test decrypting V2 format with instruction file.""" + # V2: Object metadata is empty, all metadata in instruction file + object_metadata = {} + + # Instruction file contains all V2 metadata + instruction_file_metadata = { + "x-amz-iv": base64.b64encode(os.urandom(12)).decode("utf-8"), + "x-amz-key-v2": base64.b64encode(b"encrypted-key-data").decode("utf-8"), + "x-amz-wrap-alg": "kms+context", + "x-amz-matdesc": json.dumps({"kms_cmk_id": "test-key-id"}), + "x-amz-cek-alg": "AES/GCM/NoPadding", + "x-amz-tag-len": "128", + "x-amz-crypto-instr-file": "", + } + + # Create mock S3 client + mock_s3_client = Mock() + # Mock returns parsed metadata (simulating event handler behavior) + mock_s3_client.get_object.return_value = { + "Body": BytesIO(b""), # Body is cleared by event handler + "Metadata": instruction_file_metadata, + } + + # Create mock keyring and CMM + mock_keyring = Mock(spec=S3Keyring) + cmm = DefaultCryptoMaterialsManager(mock_keyring) + + # Create pipeline with mocked S3 client + pipeline = GetEncryptedObjectPipeline(cmm, mock_s3_client) + + # Create mock response + mock_response = { + "Body": BytesIO(b"encrypted-test-data"), + "Metadata": object_metadata, + } + + # Mock the keyring to raise an error so we don't actually decrypt + mock_keyring.on_decrypt.side_effect = Exception( + "Keyring called - instruction file was fetched" + ) + + # Should fail when trying to decrypt (proving instruction file was fetched) + with pytest.raises(Exception, match="Keyring called"): + pipeline.decrypt(mock_response, bucket="test-bucket", key="test-key") + + # Verify instruction file was fetched + mock_s3_client.get_object.assert_called_once_with( + Bucket="test-bucket", Key="test-key.instruction" + ) + + ##= specification/s3-encryption/data-format/metadata-strategy.md#v3-instruction-files + ##= type=test + ##% In the V3 message format, only the content metadata related to + ##% the encrypted data is stored in the Instruction File. + def test_decrypt_v3_from_instruction_file(self): + """Test decrypting V3 format with instruction file.""" + # Object metadata contains V3 content keys only + object_metadata = { + "x-amz-c": "115", # Compressed algorithm suite + "x-amz-d": base64.b64encode(b"key-commitment-data").decode("utf-8"), + "x-amz-i": base64.b64encode(b"test-message-id").decode("utf-8"), + } + + # Instruction file contains encrypted data key and wrapping algorithm + instruction_file_metadata = { + "x-amz-3": base64.b64encode(b"encrypted-key-data").decode("utf-8"), + "x-amz-w": "02", # AES/GCM + "x-amz-m": json.dumps({"test-instruction": "material-desc-instruction"}), + "x-amz-crypto-instr-file": "", + } + + # Create mock S3 client + mock_s3_client = Mock() + # Mock returns parsed metadata (simulating event handler behavior) + mock_s3_client.get_object.return_value = { + "Body": BytesIO(b""), # Body is cleared by event handler + "Metadata": instruction_file_metadata, + } + + # Create mock keyring and CMM + mock_keyring = Mock(spec=S3Keyring) + cmm = DefaultCryptoMaterialsManager(mock_keyring) + + # Create pipeline with mocked S3 client + pipeline = GetEncryptedObjectPipeline(cmm, mock_s3_client) + + # Create mock response with encrypted data + iv = os.urandom(12) + encrypted_data = b"encrypted-test-data" + + mock_response = { + "Body": BytesIO(encrypted_data), + "Metadata": object_metadata, + } + + # Mock the keyring to return decryption materials + from s3_encryption.materials.materials import DecryptionMaterials + + plaintext_data_key = os.urandom(32) + + mock_dec_materials = DecryptionMaterials( + iv=iv, + encrypted_data_keys=[], + encryption_context_stored={}, + encryption_context_from_request={}, + ) + mock_dec_materials.plaintext_data_key = plaintext_data_key + + mock_keyring.on_decrypt.return_value = mock_dec_materials + + # This should fail with NotImplementedError since V3 decryption isn't implemented yet + with pytest.raises(NotImplementedError, match="V3 decryption not yet implemented"): + pipeline.decrypt(mock_response, bucket="test-bucket", key="test-key") + + # Verify instruction file was fetched + mock_s3_client.get_object.assert_called_once_with( + Bucket="test-bucket", Key="test-key.instruction" + ) + + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=test + ##% The S3EC SHOULD support providing a custom Instruction File suffix + ##% on GetObject requests, regardless of whether or not re-encryption is supported. + def test_decrypt_with_custom_instruction_file_suffix(self): + """Test that a custom instruction file suffix is used when provided.""" + object_metadata = {} + + instruction_file_metadata = { + "x-amz-iv": base64.b64encode(os.urandom(12)).decode("utf-8"), + "x-amz-key-v2": base64.b64encode(b"encrypted-key-data").decode("utf-8"), + "x-amz-wrap-alg": "kms+context", + "x-amz-matdesc": json.dumps({"kms_cmk_id": "test-key-id"}), + "x-amz-cek-alg": "AES/GCM/NoPadding", + "x-amz-tag-len": "128", + "x-amz-crypto-instr-file": "", + } + + mock_s3_client = Mock() + mock_s3_client.get_object.return_value = { + "Body": BytesIO(b""), + "Metadata": instruction_file_metadata, + } + + mock_keyring = Mock(spec=S3Keyring) + cmm = DefaultCryptoMaterialsManager(mock_keyring) + pipeline = GetEncryptedObjectPipeline(cmm, mock_s3_client) + + mock_response = { + "Body": BytesIO(b"encrypted-test-data"), + "Metadata": object_metadata, + } + + mock_keyring.on_decrypt.side_effect = Exception( + "Keyring called - instruction file was fetched" + ) + + with pytest.raises(Exception, match="Keyring called"): + pipeline.decrypt( + mock_response, + bucket="test-bucket", + key="test-key", + instruction_suffix=".custom-suffix", + ) + + mock_s3_client.get_object.assert_called_once_with( + Bucket="test-bucket", Key="test-key.custom-suffix" + ) diff --git a/test/test_s3_encryption_client_plugin.py b/test/test_s3_encryption_client_plugin.py new file mode 100644 index 00000000..bdc48c79 --- /dev/null +++ b/test/test_s3_encryption_client_plugin.py @@ -0,0 +1,141 @@ +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Unit tests for S3EncryptionClientPlugin event handlers.""" + +import io +import json +from unittest.mock import Mock + +import pytest +from botocore.response import StreamingBody + +from s3_encryption import S3EncryptionClientConfig, S3EncryptionClientPlugin +from s3_encryption.exceptions import S3EncryptionClientError +from s3_encryption.materials.keyring import S3Keyring + + +class TestS3EncryptionClientPlugin: + """S3EncryptionClientPlugin event handler behavior.""" + + def test_instruction_file_mode_parses_instruction_file(self): + """Test that plaintext mode parses instruction file and returns metadata.""" + # Create plugin + mock_keyring = Mock(spec=S3Keyring) + config = S3EncryptionClientConfig(keyring=mock_keyring) + plugin = S3EncryptionClientPlugin(config) + + # Set plaintext mode + plugin._context.instruction_file_mode = True + plugin._context.key = "test-key.instruction" + + # Create instruction file body + instruction_metadata = { + "x-amz-iv": "test-iv", + "x-amz-key-v2": "test-key", + "x-amz-wrap-alg": "kms+context", + "x-amz-cek-alg": "AES/GCM/NoPadding", + } + instruction_body = json.dumps(instruction_metadata).encode("utf-8") + + # Create parsed response with instruction file marker in S3 metadata + parsed = { + "Body": StreamingBody(io.BytesIO(instruction_body), len(instruction_body)), + "Metadata": {"x-amz-crypto-instr-file": ""}, + } + + # Call event handler + plugin.on_get_object_after_call(parsed) + + # Verify metadata was updated with parsed instruction file + assert parsed["Metadata"]["x-amz-iv"] == "test-iv" + assert parsed["Metadata"]["x-amz-key-v2"] == "test-key" + assert parsed["Metadata"]["x-amz-wrap-alg"] == "kms+context" + assert parsed["Metadata"]["x-amz-cek-alg"] == "AES/GCM/NoPadding" + assert parsed["Metadata"]["x-amz-crypto-instr-file"] == "" + + # Verify body was cleared + assert parsed["Body"].read() == b"" + + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=test + ##% The content metadata stored in the Instruction File MUST be serialized to a JSON string. + def test_instruction_file_mode_invalid_json_raises_error(self): + """Test that invalid JSON in instruction file raises error.""" + # Create plugin + mock_keyring = Mock(spec=S3Keyring) + config = S3EncryptionClientConfig(keyring=mock_keyring) + plugin = S3EncryptionClientPlugin(config) + + # Set plaintext mode + plugin._context.instruction_file_mode = True + plugin._context.key = "test-key.instruction" + + # Create invalid JSON body + invalid_body = b"not valid json" + + # Create parsed response + parsed = { + "Body": StreamingBody(io.BytesIO(invalid_body), len(invalid_body)), + "Metadata": {"x-amz-crypto-instr-file": ""}, + } + + # Should raise error + with pytest.raises(S3EncryptionClientError, match="Instruction file is not valid JSON"): + plugin.on_get_object_after_call(parsed) + + def test_instruction_file_mode_non_dict_json_raises_error(self): + """Test that non-dict JSON in instruction file raises error.""" + # Create plugin + mock_keyring = Mock(spec=S3Keyring) + config = S3EncryptionClientConfig(keyring=mock_keyring) + plugin = S3EncryptionClientPlugin(config) + + # Set plaintext mode + plugin._context.instruction_file_mode = True + plugin._context.key = "test-key.instruction" + + # Create JSON array instead of object + invalid_body = json.dumps(["not", "a", "dict"]).encode("utf-8") + + # Create parsed response + parsed = { + "Body": StreamingBody(io.BytesIO(invalid_body), len(invalid_body)), + "Metadata": {"x-amz-crypto-instr-file": ""}, + } + + # Should raise error + with pytest.raises( + S3EncryptionClientError, match="Instruction file must contain a JSON object" + ): + plugin.on_get_object_after_call(parsed) + + ##= specification/s3-encryption/data-format/metadata-strategy.md#instruction-file + ##= type=test + ##% The serialized JSON string MUST be the only contents of the Instruction File. + def test_instruction_file_mode_invalid_keys_raises_error(self): + """Test that invalid keys in instruction file raises error.""" + # Create plugin + mock_keyring = Mock(spec=S3Keyring) + config = S3EncryptionClientConfig(keyring=mock_keyring) + plugin = S3EncryptionClientPlugin(config) + + # Set plaintext mode + plugin._context.instruction_file_mode = True + plugin._context.key = "test-key.instruction" + + # Create instruction file with invalid keys + instruction_metadata = { + "x-amz-iv": "test-iv", + "invalid-key": "should-not-be-here", + } + instruction_body = json.dumps(instruction_metadata).encode("utf-8") + + # Create parsed response + parsed = { + "Body": StreamingBody(io.BytesIO(instruction_body), len(instruction_body)), + "Metadata": {"x-amz-crypto-instr-file": ""}, + } + + # Should raise error + with pytest.raises(S3EncryptionClientError, match="Instruction file contains invalid keys"): + plugin.on_get_object_after_call(parsed)