diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 283a0bb3..f628fa59 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,7 +9,7 @@ before_script: - apt-get -y update - apt-get -y install jq - pip install -r requirements.txt - - export DEPLOYMENT_ENV=$CI_COMMIT_REF_NAME + - export DEPLOYMENT_ENV=integration - export AWS_DEFAULT_REGION=us-east-1 - export SWAGGER_URL="https://dss.$DEPLOYMENT_ENV.data.humancellatlas.org/v1/swagger.json" - mkdir -p ~/.config/hca @@ -25,6 +25,7 @@ dcp_wide_test_SS2: only: - integration - staging + - validate-schema-versions script: - python -m unittest tests.integration.test_end_to_end_dcp.TestSmartSeq2Run.test_smartseq2_run @@ -33,6 +34,7 @@ dcp_wide_test_metadata_update: only: - integration - staging + - validate-schema-versions script: - python -m unittest tests.integration.test_end_to_end_dcp.TestSmartSeq2Run.test_update @@ -41,5 +43,6 @@ dcp_wide_test_optimus: only: - integration - staging + - validate-schema-versions script: - python -m unittest tests.integration.test_end_to_end_dcp.TestOptimusRun.test_optimus_run diff --git a/requirements.txt b/requirements.txt index bcc32575..f4be799f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ awscli hca-ingest cromwell-tools>=1.1.2 termcolor +jsonschema \ No newline at end of file diff --git a/tests/data_store_agent.py b/tests/data_store_agent.py index e4a1a452..e13f75f2 100644 --- a/tests/data_store_agent.py +++ b/tests/data_store_agent.py @@ -56,5 +56,8 @@ def download_file(self, file_uuid, save_as, replica='aws'): else: break + def get_file(self, file_uuid, replica='aws'): + return self.client.get_file(replica=replica, uuid=file_uuid) + def tombstone_bundle(self, bundle_uuid, replica='aws'): self.client.delete_bundle(replica=replica, uuid=bundle_uuid, reason="DCP-wide integration test") diff --git a/tests/dataset_runner.py b/tests/dataset_runner.py index 11b88e6d..155e4016 100644 --- a/tests/dataset_runner.py +++ b/tests/dataset_runner.py @@ -5,6 +5,7 @@ from urllib.parse import urlparse from datetime import datetime +from jsonschema import validate import boto3 from .azul_agent import AzulAgent @@ -98,6 +99,7 @@ def run(self, dataset_fixture, run_name_prefix="test"): else: # == Non-scaling Logic == self.wait_for_primary_bundles() + self.assert_valid_schema_versions_in_provenance() self.wait_for_analysis_workflows() self.wait_for_secondary_bundles() @@ -228,6 +230,20 @@ def wait_for_primary_bundles(self): raise RuntimeError(f'Expected {self.expected_bundle_count} primary bundles, but only ' f'got {primary_bundles_count}') + def assert_valid_schema_versions_in_provenance(self): + primary_bundles = [self.data_store.bundle_manifest(bundle_uuid, "aws") for bundle_uuid in self.submission_envelope.bundles()] + + metadata_files = [] + for bundle in primary_bundles: + metadata_file_manifests = filter(lambda file: "metadata" in file["content-type"], bundle["bundle"]["files"]) + metadata_files.extend([self.data_store.get_file(file["uuid"], "aws") for file in metadata_file_manifests]) + + for metadata_file in metadata_files: + schema_url = metadata_file["describedBy"] + schema = requests.get(schema_url).json() + validate(metadata_file, schema=schema) + + def wait_for_analysis_workflows(self): if not self.analysis_agent: Progress.report("NO CREDENTIALS PROVIDED FOR ANALYSIS AGENT, SKIPPING WORKFLOW(s) CHECK...") diff --git a/tests/integration/test_end_to_end_dcp.py b/tests/integration/test_end_to_end_dcp.py index 3f7a113b..b700dc5e 100755 --- a/tests/integration/test_end_to_end_dcp.py +++ b/tests/integration/test_end_to_end_dcp.py @@ -8,11 +8,11 @@ from ingest.importer.submission import Submission from tests.wait_for import WaitFor -from ..utils import Progress, Timeout -from ..cloudwatch_handler import CloudwatchHandler -from ..data_store_agent import DataStoreAgent -from ..dataset_fixture import DatasetFixture -from ..dataset_runner import DatasetRunner +from tests.utils import Progress, Timeout +from tests.cloudwatch_handler import CloudwatchHandler +from tests.data_store_agent import DataStoreAgent +from tests.dataset_fixture import DatasetFixture +from tests.dataset_runner import DatasetRunner cloudwatch_handler = CloudwatchHandler() DEPLOYMENTS = ('dev', 'staging', 'integration', 'prod')