HumanCellAtlas · rolando-ebi · Sep 11, 2019 · Sep 11, 2019 · Sep 11, 2019 · xbrianh
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -9,7 +9,7 @@ before_script:
   - apt-get -y update
   - apt-get -y install jq
   - pip install -r requirements.txt
-  - export DEPLOYMENT_ENV=$CI_COMMIT_REF_NAME
+  - export DEPLOYMENT_ENV=integration
   - export AWS_DEFAULT_REGION=us-east-1
   - export SWAGGER_URL="https://dss.$DEPLOYMENT_ENV.data.humancellatlas.org/v1/swagger.json"
   - mkdir -p ~/.config/hca
@@ -25,6 +25,7 @@ dcp_wide_test_SS2:
   only:
     - integration
     - staging
+    - validate-schema-versions
   script:
     - python -m unittest tests.integration.test_end_to_end_dcp.TestSmartSeq2Run.test_smartseq2_run
 
@@ -33,6 +34,7 @@ dcp_wide_test_metadata_update:
   only:
     - integration
     - staging
+    - validate-schema-versions
   script:
     - python -m unittest tests.integration.test_end_to_end_dcp.TestSmartSeq2Run.test_update
 
@@ -41,5 +43,6 @@ dcp_wide_test_optimus:
   only:
     - integration
     - staging
+    - validate-schema-versions
   script:
     - python -m unittest tests.integration.test_end_to_end_dcp.TestOptimusRun.test_optimus_run
diff --git a/requirements.txt b/requirements.txt
@@ -7,3 +7,4 @@ awscli
 hca-ingest
 cromwell-tools>=1.1.2
 termcolor
+jsonschema
diff --git a/tests/data_store_agent.py b/tests/data_store_agent.py
@@ -56,5 +56,8 @@ def download_file(self, file_uuid, save_as, replica='aws'):
                     else:
                         break
 
+    def get_file(self, file_uuid, replica='aws'):
+        return self.client.get_file(replica=replica, uuid=file_uuid)
+
     def tombstone_bundle(self, bundle_uuid, replica='aws'):
         self.client.delete_bundle(replica=replica, uuid=bundle_uuid, reason="DCP-wide integration test")
diff --git a/tests/dataset_runner.py b/tests/dataset_runner.py
@@ -5,6 +5,7 @@
 
 from urllib.parse import urlparse
 from datetime import datetime
+from jsonschema import validate
 import boto3
 
 from .azul_agent import AzulAgent
@@ -98,6 +99,7 @@ def run(self, dataset_fixture, run_name_prefix="test"):
             else:
                 # == Non-scaling Logic ==
                 self.wait_for_primary_bundles()
+                self.assert_valid_schema_versions_in_provenance()
                 self.wait_for_analysis_workflows()
                 self.wait_for_secondary_bundles()
 
@@ -228,6 +230,20 @@ def wait_for_primary_bundles(self):
             raise RuntimeError(f'Expected {self.expected_bundle_count} primary bundles, but only '
                                f'got {primary_bundles_count}')
 
+    def assert_valid_schema_versions_in_provenance(self):
+        primary_bundles = [self.data_store.bundle_manifest(bundle_uuid, "aws") for bundle_uuid in self.submission_envelope.bundles()]
+
+        metadata_files = []
+        for bundle in primary_bundles:
+            metadata_file_manifests = filter(lambda file: "metadata" in file["content-type"], bundle["bundle"]["files"])
+            metadata_files.extend([self.data_store.get_file(file["uuid"], "aws") for file in metadata_file_manifests])
+
+        for metadata_file in metadata_files:
+            schema_url = metadata_file["describedBy"]
+            schema = requests.get(schema_url).json()
+            validate(metadata_file, schema=schema)
+
+
     def wait_for_analysis_workflows(self):
         if not self.analysis_agent:
             Progress.report("NO CREDENTIALS PROVIDED FOR ANALYSIS AGENT, SKIPPING WORKFLOW(s) CHECK...")

diff --git a/tests/integration/test_end_to_end_dcp.py b/tests/integration/test_end_to_end_dcp.py
@@ -8,11 +8,11 @@
 from ingest.importer.submission import Submission
 
 from tests.wait_for import WaitFor
-from ..utils import Progress, Timeout
-from ..cloudwatch_handler import CloudwatchHandler
-from ..data_store_agent import DataStoreAgent
-from ..dataset_fixture import DatasetFixture
-from ..dataset_runner import DatasetRunner
+from tests.utils import Progress, Timeout
+from tests.cloudwatch_handler import CloudwatchHandler
+from tests.data_store_agent import DataStoreAgent
+from tests.dataset_fixture import DatasetFixture
+from tests.dataset_runner import DatasetRunner
 
 cloudwatch_handler = CloudwatchHandler()
 DEPLOYMENTS = ('dev', 'staging', 'integration', 'prod')