diff --git a/.gitignore b/.gitignore
index 57359aa4c..ce7f21d2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -214,3 +214,4 @@ validation.txt
/index.html
.DS_Store
+*_old.tf
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8fafc87bd..0dc3f7d92 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
## (Unreleased)
ENHANCEMENTS:
+* Add per-workspace `airlock_version` property (1=legacy, 2=consolidated) for backwards-compatible airlock storage migration. Add core-level `enable_legacy_airlock` toggle. Remove `USE_METADATA_STAGE_MANAGEMENT` environment variable. ([#4853](https://github.com/microsoft/AzureTRE/pull/4853), [#4358](https://github.com/microsoft/AzureTRE/issues/4358))
* Specify default_outbound_access_enabled = false setting for all subnets ([#4757](https://github.com/microsoft/AzureTRE/pull/4757))
* Pin all GitHub Actions workflow steps to full commit SHAs to prevent supply chain attacks plus update to latest releases ([#4886](https://github.com/microsoft/AzureTRE/pull/4886))
diff --git a/airlock_processor/BlobCreatedTrigger/__init__.py b/airlock_processor/BlobCreatedTrigger/__init__.py
index f119ad3ed..567d27c1c 100644
--- a/airlock_processor/BlobCreatedTrigger/__init__.py
+++ b/airlock_processor/BlobCreatedTrigger/__init__.py
@@ -11,6 +11,17 @@
from shared_code.blob_operations import get_blob_info_from_topic_and_subject, get_blob_client_from_blob_info
+# Mapping from v2 container metadata stage to (completed_step, new_status)
+V2_STAGE_COMPLETION_MAP = {
+ constants.STAGE_IMPORT_APPROVED: (constants.STAGE_APPROVAL_INPROGRESS, constants.STAGE_APPROVED),
+ constants.STAGE_IMPORT_REJECTED: (constants.STAGE_REJECTION_INPROGRESS, constants.STAGE_REJECTED),
+ constants.STAGE_IMPORT_BLOCKED: (constants.STAGE_BLOCKING_INPROGRESS, constants.STAGE_BLOCKED_BY_SCAN),
+ constants.STAGE_EXPORT_APPROVED: (constants.STAGE_APPROVAL_INPROGRESS, constants.STAGE_APPROVED),
+ constants.STAGE_EXPORT_REJECTED: (constants.STAGE_REJECTION_INPROGRESS, constants.STAGE_REJECTED),
+ constants.STAGE_EXPORT_BLOCKED: (constants.STAGE_BLOCKING_INPROGRESS, constants.STAGE_BLOCKED_BY_SCAN),
+}
+
+
def main(msg: func.ServiceBusMessage,
stepResultEvent: func.Out[func.EventGridOutputEvent],
dataDeletionEvent: func.Out[func.EventGridOutputEvent]):
@@ -23,6 +34,12 @@ def main(msg: func.ServiceBusMessage,
topic = json_body["topic"]
request_id = re.search(r'/blobServices/default/containers/(.*?)/blobs', json_body["subject"]).group(1)
+ # Check if this event is from a v2 consolidated storage account
+ if constants.STORAGE_ACCOUNT_NAME_AIRLOCK_CORE in topic or constants.STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL in topic:
+ _handle_v2_blob_created(json_body, topic, request_id, stepResultEvent, dataDeletionEvent)
+ return
+
+ # Legacy v1 handling below
# message originated from in-progress blob creation
if constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS in topic or constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS in topic:
try:
@@ -55,6 +72,9 @@ def main(msg: func.ServiceBusMessage,
elif constants.STORAGE_ACCOUNT_NAME_IMPORT_BLOCKED in topic or constants.STORAGE_ACCOUNT_NAME_EXPORT_BLOCKED in topic:
completed_step = constants.STAGE_BLOCKING_INPROGRESS
new_status = constants.STAGE_BLOCKED_BY_SCAN
+ else:
+ logging.warning(f"Unknown storage account in topic: {topic}")
+ return
# reply with a step completed event
stepResultEvent.set(
@@ -88,3 +108,44 @@ def send_delete_event(dataDeletionEvent: func.Out[func.EventGridOutputEvent], js
data_version=constants.DATA_DELETION_EVENT_DATA_VERSION
)
)
+
+
+def _handle_v2_blob_created(json_body, topic, request_id, stepResultEvent, dataDeletionEvent):
+ """Handle BlobCreated events from v2 consolidated storage accounts.
+
+ In v2, cross-account copies (e.g., import approval: core → workspace-global)
+ fire BlobCreated events. Container metadata determines the stage and appropriate
+ step result, matching the v1 pattern where BlobCreatedTrigger signals copy completion.
+ """
+ storage_account_name, _, _ = get_blob_info_from_topic_and_subject(
+ topic=json_body["topic"], subject=json_body["subject"])
+
+ from shared_code.blob_operations_metadata import get_container_metadata
+ try:
+ metadata = get_container_metadata(storage_account_name, request_id)
+ except Exception:
+ logging.warning(f"Could not read container metadata for request {request_id} on {storage_account_name}, skipping")
+ return
+
+ stage = metadata.get('stage', '')
+ logging.info(f"V2 BlobCreated for request {request_id}: stage={stage}, account={storage_account_name}")
+
+ if stage in V2_STAGE_COMPLETION_MAP:
+ completed_step, new_status = V2_STAGE_COMPLETION_MAP[stage]
+ logging.info(f"V2 copy completed for request {request_id}: {completed_step} -> {new_status}")
+
+ stepResultEvent.set(
+ func.EventGridOutputEvent(
+ id=str(uuid.uuid4()),
+ data={"completed_step": completed_step, "new_status": new_status, "request_id": request_id},
+ subject=request_id,
+ event_type="Airlock.StepResult",
+ event_time=datetime.datetime.now(datetime.UTC),
+ data_version=constants.STEP_RESULT_EVENT_DATA_VERSION))
+
+ # Send delete event for the source container (same as v1)
+ send_delete_event(dataDeletionEvent, json_body, request_id)
+ else:
+ # Non-terminal stages (e.g., import-external from user upload, export-internal)
+ # are not copy completions — ignore them
+ logging.info(f"V2 BlobCreated for non-terminal stage '{stage}' on request {request_id}, no action needed")
diff --git a/airlock_processor/StatusChangedQueueTrigger/__init__.py b/airlock_processor/StatusChangedQueueTrigger/__init__.py
index db64d72a4..ba5668993 100644
--- a/airlock_processor/StatusChangedQueueTrigger/__init__.py
+++ b/airlock_processor/StatusChangedQueueTrigger/__init__.py
@@ -9,7 +9,7 @@
from exceptions import NoFilesInRequestException, TooManyFilesInRequestException
-from shared_code import blob_operations, constants
+from shared_code import blob_operations, constants, airlock_storage_helper, parsers
from pydantic import BaseModel, parse_obj_as
@@ -19,6 +19,8 @@ class RequestProperties(BaseModel):
previous_status: Optional[str]
type: str
workspace_id: str
+ review_workspace_id: Optional[str] = None
+ airlock_version: int = 1
class ContainersCopyMetadata:
@@ -31,6 +33,8 @@ def __init__(self, source_account_name: str, dest_account_name: str):
def main(msg: func.ServiceBusMessage, stepResultEvent: func.Out[func.EventGridOutputEvent], dataDeletionEvent: func.Out[func.EventGridOutputEvent]):
+ request_properties = None
+ request_files = None
try:
request_properties = extract_properties(msg)
request_files = get_request_files(request_properties) if request_properties.new_status == constants.STAGE_SUBMITTED else None
@@ -53,13 +57,25 @@ def handle_status_changed(request_properties: RequestProperties, stepResultEvent
logging.info('Processing request with id %s. new status is "%s", type is "%s"', req_id, new_status, request_type)
+ # Check if using metadata-based stage management (v2) or legacy per-stage accounts (v1)
+ use_metadata = request_properties.airlock_version >= 2
+
if new_status == constants.STAGE_DRAFT:
- account_name = get_storage_account(status=constants.STAGE_DRAFT, request_type=request_type, short_workspace_id=ws_id)
- blob_operations.create_container(account_name, req_id)
+ if use_metadata:
+ from shared_code.blob_operations_metadata import create_container_with_metadata
+ account_name = airlock_storage_helper.get_storage_account_name_for_request(request_type, new_status, ws_id, airlock_version=request_properties.airlock_version)
+ stage = airlock_storage_helper.get_stage_from_status(request_type, new_status)
+ create_container_with_metadata(account_name, req_id, stage, workspace_id=ws_id, request_type=request_type)
+ else:
+ account_name = get_storage_account(status=constants.STAGE_DRAFT, request_type=request_type, short_workspace_id=ws_id)
+ blob_operations.create_container(account_name, req_id)
return
if new_status == constants.STAGE_CANCELLED:
- storage_account_name = get_storage_account(previous_status, request_type, ws_id)
+ if use_metadata:
+ storage_account_name = airlock_storage_helper.get_storage_account_name_for_request(request_type, previous_status, ws_id, airlock_version=request_properties.airlock_version)
+ else:
+ storage_account_name = get_storage_account(previous_status, request_type, ws_id)
container_to_delete_url = blob_operations.get_blob_url(account_name=storage_account_name, container_name=req_id)
set_output_event_to_trigger_container_deletion(dataDeletionEvent, request_properties, container_url=container_to_delete_url)
return
@@ -68,11 +84,74 @@ def handle_status_changed(request_properties: RequestProperties, stepResultEvent
set_output_event_to_report_request_files(stepResultEvent, request_properties, request_files)
if (is_require_data_copy(new_status)):
- logging.info('Request with id %s. requires data copy between storage accounts', req_id)
- containers_metadata = get_source_dest_for_copy(new_status=new_status, previous_status=previous_status, request_type=request_type, short_workspace_id=ws_id)
- blob_operations.create_container(containers_metadata.dest_account_name, req_id)
- blob_operations.copy_data(containers_metadata.source_account_name,
- containers_metadata.dest_account_name, req_id)
+ if use_metadata:
+ # Metadata mode: Update container stage instead of copying
+ from shared_code.blob_operations_metadata import update_container_stage, create_container_with_metadata
+
+ # For import submit, use review_workspace_id so data goes to review workspace storage
+ effective_ws_id = ws_id
+ if new_status == constants.STAGE_SUBMITTED and request_type.lower() == constants.IMPORT_TYPE and request_properties.review_workspace_id:
+ effective_ws_id = request_properties.review_workspace_id
+
+ # Get the storage account (might change from core to workspace or vice versa)
+ source_account = airlock_storage_helper.get_storage_account_name_for_request(request_type, previous_status, ws_id, airlock_version=request_properties.airlock_version)
+ dest_account = airlock_storage_helper.get_storage_account_name_for_request(request_type, new_status, effective_ws_id, airlock_version=request_properties.airlock_version)
+ new_stage = airlock_storage_helper.get_stage_from_status(request_type, new_status)
+
+ if source_account == dest_account:
+ # Same storage account - just update metadata
+ logging.info(f'Request {req_id}: Updating container stage to {new_stage} (no copy needed)')
+ update_container_stage(source_account, req_id, new_stage, changed_by='system')
+
+ # In v2, same-account transitions don't fire BlobCreated events.
+ # For SUBMITTED, v1 relies on BlobCreatedTrigger to handle the malware scanning gate
+ # (skip to in_review when scanning is disabled). We handle this inline for v2.
+ if new_status == constants.STAGE_SUBMITTED:
+ try:
+ enable_malware_scanning = parsers.parse_bool(os.environ["ENABLE_MALWARE_SCANNING"])
+ except KeyError:
+ logging.error("environment variable 'ENABLE_MALWARE_SCANNING' does not exist. Cannot continue.")
+ raise
+ if not enable_malware_scanning:
+ logging.info(f'Request {req_id}: Malware scanning disabled, skipping to in_review')
+ stepResultEvent.set(
+ func.EventGridOutputEvent(
+ id=str(uuid.uuid4()),
+ data={"completed_step": constants.STAGE_SUBMITTED, "new_status": constants.STAGE_IN_REVIEW, "request_id": req_id},
+ subject=req_id,
+ event_type="Airlock.StepResult",
+ event_time=datetime.datetime.now(datetime.UTC),
+ data_version=constants.STEP_RESULT_EVENT_DATA_VERSION))
+ else:
+ logging.info(f'Request {req_id}: Malware scanning enabled, waiting for scan result')
+ elif new_status in [constants.STAGE_REJECTION_INPROGRESS, constants.STAGE_BLOCKING_INPROGRESS]:
+ # Terminal transitions: emit StepResult immediately since no BlobCreated event will fire
+ final_status = constants.STAGE_REJECTED if new_status == constants.STAGE_REJECTION_INPROGRESS else constants.STAGE_BLOCKED_BY_SCAN
+ logging.info(f'Request {req_id}: Emitting StepResult for terminal transition {new_status} -> {final_status}')
+ stepResultEvent.set(
+ func.EventGridOutputEvent(
+ id=str(uuid.uuid4()),
+ data={"completed_step": new_status, "new_status": final_status, "request_id": req_id},
+ subject=req_id,
+ event_type="Airlock.StepResult",
+ event_time=datetime.datetime.now(datetime.UTC),
+ data_version=constants.STEP_RESULT_EVENT_DATA_VERSION))
+ else:
+ # Different storage account (e.g., core → workspace on import approval,
+ # workspace → core on export approval) - need to copy.
+ # BlobCreatedTrigger will fire when the copy completes and emit the StepResult,
+ # matching the v1 async pattern for large data transfers.
+ logging.info(f'Request {req_id}: Copying from {source_account} to {dest_account}')
+ create_container_with_metadata(dest_account, req_id, new_stage, workspace_id=effective_ws_id, request_type=request_type)
+ blob_operations.copy_data(source_account, dest_account, req_id)
+ else:
+ # Legacy mode: Copy data between storage accounts
+ logging.info('Request with id %s. requires data copy between storage accounts', req_id)
+ review_ws_id = request_properties.review_workspace_id
+ containers_metadata = get_source_dest_for_copy(new_status=new_status, previous_status=previous_status, request_type=request_type, short_workspace_id=ws_id, review_workspace_id=review_ws_id)
+ blob_operations.create_container(containers_metadata.dest_account_name, req_id)
+ blob_operations.copy_data(containers_metadata.source_account_name,
+ containers_metadata.dest_account_name, req_id)
return
# Other statuses which do not require data copy are dismissed as we don't need to do anything...
@@ -102,7 +181,7 @@ def is_require_data_copy(new_status: str):
return False
-def get_source_dest_for_copy(new_status: str, previous_status: str, request_type: str, short_workspace_id: str) -> ContainersCopyMetadata:
+def get_source_dest_for_copy(new_status: str, previous_status: str, request_type: str, short_workspace_id: str, review_workspace_id: str = None) -> ContainersCopyMetadata:
# sanity
if is_require_data_copy(new_status) is False:
raise Exception("Given new status is not supported")
@@ -115,7 +194,7 @@ def get_source_dest_for_copy(new_status: str, previous_status: str, request_type
raise Exception(msg)
source_account_name = get_storage_account(previous_status, request_type, short_workspace_id)
- dest_account_name = get_storage_account_destination_for_copy(new_status, request_type, short_workspace_id)
+ dest_account_name = get_storage_account_destination_for_copy(new_status, request_type, short_workspace_id, review_workspace_id=review_workspace_id)
return ContainersCopyMetadata(source_account_name, dest_account_name)
@@ -151,12 +230,14 @@ def get_storage_account(status: str, request_type: str, short_workspace_id: str)
raise Exception(error_message)
-def get_storage_account_destination_for_copy(new_status: str, request_type: str, short_workspace_id: str) -> str:
+def get_storage_account_destination_for_copy(new_status: str, request_type: str, short_workspace_id: str, review_workspace_id: str = None) -> str:
tre_id = _get_tre_id()
if request_type == constants.IMPORT_TYPE:
if new_status == constants.STAGE_SUBMITTED:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS + tre_id
+ # Import submit: copy to review workspace storage, or tre_id for legacy compatibility
+ dest_id = review_workspace_id if review_workspace_id else tre_id
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS + dest_id
elif new_status == constants.STAGE_APPROVAL_INPROGRESS:
return constants.STORAGE_ACCOUNT_NAME_IMPORT_APPROVED + short_workspace_id
elif new_status == constants.STAGE_REJECTION_INPROGRESS:
@@ -218,7 +299,13 @@ def set_output_event_to_trigger_container_deletion(dataDeletionEvent, request_pr
def get_request_files(request_properties: RequestProperties):
- storage_account_name = get_storage_account(request_properties.previous_status, request_properties.type, request_properties.workspace_id)
+ use_metadata = request_properties.airlock_version >= 2
+ if use_metadata:
+ storage_account_name = airlock_storage_helper.get_storage_account_name_for_request(
+ request_properties.type, request_properties.previous_status, request_properties.workspace_id,
+ airlock_version=request_properties.airlock_version)
+ else:
+ storage_account_name = get_storage_account(request_properties.previous_status, request_properties.type, request_properties.workspace_id)
return blob_operations.get_request_files(account_name=storage_account_name, request_id=request_properties.request_id)
diff --git a/airlock_processor/_version.py b/airlock_processor/_version.py
index 8d8e3b770..07508a608 100644
--- a/airlock_processor/_version.py
+++ b/airlock_processor/_version.py
@@ -1 +1 @@
-__version__ = "0.8.9"
+__version__ = "0.8.13"
diff --git a/airlock_processor/shared_code/airlock_storage_helper.py b/airlock_processor/shared_code/airlock_storage_helper.py
new file mode 100644
index 000000000..151983740
--- /dev/null
+++ b/airlock_processor/shared_code/airlock_storage_helper.py
@@ -0,0 +1,78 @@
+import os
+from shared_code import constants
+
+
+def get_storage_account_name_for_request(request_type: str, status: str, short_workspace_id: str, airlock_version: int = 1) -> str:
+ tre_id = os.environ.get("TRE_ID", "")
+
+ if airlock_version >= 2:
+ # Global workspace storage - all workspaces use same account
+ if request_type == constants.IMPORT_TYPE:
+ if status in [constants.STAGE_DRAFT, constants.STAGE_SUBMITTED, constants.STAGE_IN_REVIEW,
+ constants.STAGE_REJECTED, constants.STAGE_REJECTION_INPROGRESS,
+ constants.STAGE_BLOCKED_BY_SCAN, constants.STAGE_BLOCKING_INPROGRESS]:
+ # ALL core import stages in stalairlock
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_CORE + tre_id
+ else: # Approved, approval in progress
+ # Global workspace storage
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL + tre_id
+ else: # export
+ if status in [constants.STAGE_APPROVED, constants.STAGE_APPROVAL_INPROGRESS]:
+ # Export approved in core
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_CORE + tre_id
+ else: # Draft, submitted, in-review, rejected, blocked
+ # Global workspace storage
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL + tre_id
+ else:
+ # Legacy mode
+ if request_type == constants.IMPORT_TYPE:
+ if status == constants.STAGE_DRAFT:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_EXTERNAL + tre_id
+ elif status in [constants.STAGE_SUBMITTED, constants.STAGE_IN_REVIEW, constants.STAGE_APPROVAL_INPROGRESS,
+ constants.STAGE_REJECTION_INPROGRESS, constants.STAGE_BLOCKING_INPROGRESS]:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS + tre_id
+ elif status == constants.STAGE_APPROVED:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_APPROVED + short_workspace_id
+ elif status == constants.STAGE_REJECTED:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_REJECTED + tre_id
+ elif status == constants.STAGE_BLOCKED_BY_SCAN:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_BLOCKED + tre_id
+ else: # export
+ if status == constants.STAGE_DRAFT:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INTERNAL + short_workspace_id
+ elif status in [constants.STAGE_SUBMITTED, constants.STAGE_IN_REVIEW, constants.STAGE_APPROVAL_INPROGRESS,
+ constants.STAGE_REJECTION_INPROGRESS, constants.STAGE_BLOCKING_INPROGRESS]:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS + short_workspace_id
+ elif status == constants.STAGE_APPROVED:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_APPROVED + tre_id
+ elif status == constants.STAGE_REJECTED:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_REJECTED + short_workspace_id
+ elif status == constants.STAGE_BLOCKED_BY_SCAN:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_BLOCKED + short_workspace_id
+
+
+def get_stage_from_status(request_type: str, status: str) -> str:
+ if request_type == constants.IMPORT_TYPE:
+ if status == constants.STAGE_DRAFT:
+ return constants.STAGE_IMPORT_EXTERNAL
+ elif status in [constants.STAGE_SUBMITTED, constants.STAGE_IN_REVIEW]:
+ return constants.STAGE_IMPORT_IN_PROGRESS
+ elif status in [constants.STAGE_APPROVED, constants.STAGE_APPROVAL_INPROGRESS]:
+ return constants.STAGE_IMPORT_APPROVED
+ elif status in [constants.STAGE_REJECTED, constants.STAGE_REJECTION_INPROGRESS]:
+ return constants.STAGE_IMPORT_REJECTED
+ elif status in [constants.STAGE_BLOCKED_BY_SCAN, constants.STAGE_BLOCKING_INPROGRESS]:
+ return constants.STAGE_IMPORT_BLOCKED
+ else: # export
+ if status == constants.STAGE_DRAFT:
+ return constants.STAGE_EXPORT_INTERNAL
+ elif status in [constants.STAGE_SUBMITTED, constants.STAGE_IN_REVIEW]:
+ return constants.STAGE_EXPORT_IN_PROGRESS
+ elif status in [constants.STAGE_APPROVED, constants.STAGE_APPROVAL_INPROGRESS]:
+ return constants.STAGE_EXPORT_APPROVED
+ elif status in [constants.STAGE_REJECTED, constants.STAGE_REJECTION_INPROGRESS]:
+ return constants.STAGE_EXPORT_REJECTED
+ elif status in [constants.STAGE_BLOCKED_BY_SCAN, constants.STAGE_BLOCKING_INPROGRESS]:
+ return constants.STAGE_EXPORT_BLOCKED
+
+ return "unknown"
diff --git a/airlock_processor/shared_code/blob_operations_metadata.py b/airlock_processor/shared_code/blob_operations_metadata.py
new file mode 100644
index 000000000..de65501a8
--- /dev/null
+++ b/airlock_processor/shared_code/blob_operations_metadata.py
@@ -0,0 +1,186 @@
+import os
+import logging
+from datetime import datetime, UTC
+from typing import Dict
+
+from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError
+from azure.identity import DefaultAzureCredential
+from azure.storage.blob import BlobServiceClient
+from azure.core.exceptions import HttpResponseError
+
+
+def get_account_url(account_name: str) -> str:
+ return f"https://{account_name}.blob.{get_storage_endpoint_suffix()}/"
+
+
+def get_storage_endpoint_suffix() -> str:
+ return os.environ.get("STORAGE_ENDPOINT_SUFFIX", "core.windows.net")
+
+
+def get_credential():
+ managed_identity = os.environ.get("MANAGED_IDENTITY_CLIENT_ID")
+ if managed_identity:
+ logging.info("using the Airlock processor's managed identity to get credentials.")
+ return DefaultAzureCredential(managed_identity_client_id=managed_identity,
+ exclude_shared_token_cache_credential=True)
+ return DefaultAzureCredential()
+
+
+def create_container_with_metadata(account_name: str, request_id: str, stage: str,
+ workspace_id: str = None, request_type: str = None,
+ created_by: str = None) -> None:
+ try:
+ container_name = request_id
+ blob_service_client = BlobServiceClient(
+ account_url=get_account_url(account_name),
+ credential=get_credential()
+ )
+
+ # Prepare initial metadata
+ metadata = {
+ "stage": stage,
+ "stage_history": stage,
+ "created_at": datetime.now(UTC).isoformat(),
+ "last_stage_change": datetime.now(UTC).isoformat(),
+ }
+
+ if workspace_id:
+ metadata["workspace_id"] = workspace_id
+ if request_type:
+ metadata["request_type"] = request_type
+ if created_by:
+ metadata["created_by"] = created_by
+
+ # Create container with metadata
+ container_client = blob_service_client.get_container_client(container_name)
+ container_client.create_container(metadata=metadata)
+
+ logging.info(f'Container created for request id: {request_id} with stage: {stage}')
+
+ except ResourceExistsError:
+ logging.info(f'Did not create a new container. Container already exists for request id: {request_id}.')
+
+
+def update_container_stage(account_name: str, request_id: str, new_stage: str,
+ changed_by: str = None, additional_metadata: Dict[str, str] = None) -> None:
+ try:
+ container_name = request_id
+ blob_service_client = BlobServiceClient(
+ account_url=get_account_url(account_name),
+ credential=get_credential()
+ )
+ container_client = blob_service_client.get_container_client(container_name)
+
+ # Get current metadata
+ try:
+ properties = container_client.get_container_properties()
+ metadata = properties.metadata.copy()
+ except ResourceNotFoundError:
+ logging.error(f"Container {request_id} not found in account {account_name}")
+ raise
+
+ # Track old stage for logging
+ old_stage = metadata.get('stage', 'unknown')
+
+ # Update stage metadata
+ metadata['stage'] = new_stage
+
+ # Update stage history
+ stage_history = metadata.get('stage_history', old_stage)
+ metadata['stage_history'] = f"{stage_history},{new_stage}"
+
+ # Update timestamp
+ metadata['last_stage_change'] = datetime.now(UTC).isoformat()
+
+ # Track who made the change
+ if changed_by:
+ metadata['last_changed_by'] = changed_by
+
+ # Add any additional metadata (e.g., scan results)
+ if additional_metadata:
+ metadata.update(additional_metadata)
+
+ # Apply the updated metadata
+ container_client.set_container_metadata(metadata)
+
+ logging.info(
+ f"Updated container {request_id} from stage '{old_stage}' to '{new_stage}' in account {account_name}"
+ )
+
+ except HttpResponseError as e:
+ logging.error(f"Failed to update container metadata: {str(e)}")
+ raise
+
+
+def get_container_stage(account_name: str, request_id: str) -> str:
+ container_name = request_id
+ blob_service_client = BlobServiceClient(
+ account_url=get_account_url(account_name),
+ credential=get_credential()
+ )
+ container_client = blob_service_client.get_container_client(container_name)
+
+ try:
+ properties = container_client.get_container_properties()
+ return properties.metadata.get('stage', 'unknown')
+ except ResourceNotFoundError:
+ logging.error(f"Container {request_id} not found in account {account_name}")
+ raise
+
+
+def get_container_metadata(account_name: str, request_id: str) -> Dict[str, str]:
+ container_name = request_id
+ blob_service_client = BlobServiceClient(
+ account_url=get_account_url(account_name),
+ credential=get_credential()
+ )
+ container_client = blob_service_client.get_container_client(container_name)
+
+ try:
+ properties = container_client.get_container_properties()
+ return properties.metadata
+ except ResourceNotFoundError:
+ logging.error(f"Container {request_id} not found in account {account_name}")
+ raise
+
+
+def get_blob_client_from_blob_info(storage_account_name: str, container_name: str, blob_name: str):
+ source_blob_service_client = BlobServiceClient(
+ account_url=get_account_url(storage_account_name),
+ credential=get_credential()
+ )
+ source_container_client = source_blob_service_client.get_container_client(container_name)
+ return source_container_client.get_blob_client(blob_name)
+
+
+def get_request_files(account_name: str, request_id: str) -> list:
+ files = []
+ blob_service_client = BlobServiceClient(
+ account_url=get_account_url(account_name),
+ credential=get_credential()
+ )
+ container_client = blob_service_client.get_container_client(container=request_id)
+
+ for blob in container_client.list_blobs():
+ files.append({"name": blob.name, "size": blob.size})
+
+ return files
+
+
+def delete_container_by_request_id(account_name: str, request_id: str) -> None:
+ try:
+ container_name = request_id
+ blob_service_client = BlobServiceClient(
+ account_url=get_account_url(account_name),
+ credential=get_credential()
+ )
+ container_client = blob_service_client.get_container_client(container_name)
+ container_client.delete_container()
+
+ logging.info(f"Deleted container {request_id} from account {account_name}")
+
+ except ResourceNotFoundError:
+ logging.warning(f"Container {request_id} not found in account {account_name}, may have been already deleted")
+ except HttpResponseError as e:
+ logging.error(f"Failed to delete container: {str(e)}")
+ raise
diff --git a/airlock_processor/shared_code/constants.py b/airlock_processor/shared_code/constants.py
index 277312d1c..cc88ce455 100644
--- a/airlock_processor/shared_code/constants.py
+++ b/airlock_processor/shared_code/constants.py
@@ -4,6 +4,25 @@
IMPORT_TYPE = "import"
EXPORT_TYPE = "export"
+
+# Consolidated storage account names (metadata-based approach)
+STORAGE_ACCOUNT_NAME_AIRLOCK_CORE = "stalairlock" # Consolidated core account
+STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL = "stalairlockg" # Global workspace account for all workspaces
+
+# Stage metadata values for container metadata
+STAGE_IMPORT_EXTERNAL = "import-external"
+STAGE_IMPORT_IN_PROGRESS = "import-in-progress"
+STAGE_IMPORT_APPROVED = "import-approved"
+STAGE_IMPORT_REJECTED = "import-rejected"
+STAGE_IMPORT_BLOCKED = "import-blocked"
+STAGE_EXPORT_INTERNAL = "export-internal"
+STAGE_EXPORT_IN_PROGRESS = "export-in-progress"
+STAGE_EXPORT_APPROVED = "export-approved"
+STAGE_EXPORT_REJECTED = "export-rejected"
+STAGE_EXPORT_BLOCKED = "export-blocked"
+
+# Legacy storage account names (for backwards compatibility)
+# These will be removed after migration is complete
# Import
STORAGE_ACCOUNT_NAME_IMPORT_EXTERNAL = "stalimex"
STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS = "stalimip"
diff --git a/airlock_processor/tests/shared_code/test_airlock_storage_helper.py b/airlock_processor/tests/shared_code/test_airlock_storage_helper.py
new file mode 100644
index 000000000..2f6e91b04
--- /dev/null
+++ b/airlock_processor/tests/shared_code/test_airlock_storage_helper.py
@@ -0,0 +1,333 @@
+import os
+from unittest.mock import patch
+
+from shared_code.airlock_storage_helper import (
+ get_storage_account_name_for_request,
+ get_stage_from_status
+)
+from shared_code import constants
+
+
+class TestGetStageFromStatus:
+
+ def test_import_draft_maps_to_import_external(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_DRAFT)
+ assert stage == constants.STAGE_IMPORT_EXTERNAL
+
+ def test_import_submitted_maps_to_import_in_progress(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_SUBMITTED)
+ assert stage == constants.STAGE_IMPORT_IN_PROGRESS
+
+ def test_import_in_review_maps_to_import_in_progress(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_IN_REVIEW)
+ assert stage == constants.STAGE_IMPORT_IN_PROGRESS
+
+ def test_import_approved_maps_to_import_approved(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_APPROVED)
+ assert stage == constants.STAGE_IMPORT_APPROVED
+
+ def test_import_approval_in_progress_maps_to_import_approved(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_APPROVAL_INPROGRESS)
+ assert stage == constants.STAGE_IMPORT_APPROVED
+
+ def test_import_rejected_maps_to_import_rejected(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_REJECTED)
+ assert stage == constants.STAGE_IMPORT_REJECTED
+
+ def test_import_rejection_in_progress_maps_to_import_rejected(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_REJECTION_INPROGRESS)
+ assert stage == constants.STAGE_IMPORT_REJECTED
+
+ def test_import_blocked_maps_to_import_blocked(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN)
+ assert stage == constants.STAGE_IMPORT_BLOCKED
+
+ def test_import_blocking_in_progress_maps_to_import_blocked(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_BLOCKING_INPROGRESS)
+ assert stage == constants.STAGE_IMPORT_BLOCKED
+
+ def test_export_draft_maps_to_export_internal(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_DRAFT)
+ assert stage == constants.STAGE_EXPORT_INTERNAL
+
+ def test_export_submitted_maps_to_export_in_progress(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_SUBMITTED)
+ assert stage == constants.STAGE_EXPORT_IN_PROGRESS
+
+ def test_export_in_review_maps_to_export_in_progress(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_IN_REVIEW)
+ assert stage == constants.STAGE_EXPORT_IN_PROGRESS
+
+ def test_export_approved_maps_to_export_approved(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_APPROVED)
+ assert stage == constants.STAGE_EXPORT_APPROVED
+
+ def test_export_approval_in_progress_maps_to_export_approved(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_APPROVAL_INPROGRESS)
+ assert stage == constants.STAGE_EXPORT_APPROVED
+
+ def test_export_rejected_maps_to_export_rejected(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_REJECTED)
+ assert stage == constants.STAGE_EXPORT_REJECTED
+
+ def test_export_rejection_in_progress_maps_to_export_rejected(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_REJECTION_INPROGRESS)
+ assert stage == constants.STAGE_EXPORT_REJECTED
+
+ def test_export_blocked_maps_to_export_blocked(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN)
+ assert stage == constants.STAGE_EXPORT_BLOCKED
+
+ def test_export_blocking_in_progress_maps_to_export_blocked(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_BLOCKING_INPROGRESS)
+ assert stage == constants.STAGE_EXPORT_BLOCKED
+
+ def test_unknown_status_returns_unknown(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, "nonexistent_status")
+ assert stage == "unknown"
+
+
+class TestGetStorageAccountNameForRequestConsolidated:
+
+ @patch.dict(os.environ, {"TRE_ID": "tre123"}, clear=True)
+ class TestImportRequests:
+
+ def test_import_draft_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_DRAFT, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_submitted_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_SUBMITTED, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_in_review_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_IN_REVIEW, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_approved_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_APPROVED, "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_import_approval_in_progress_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_APPROVAL_INPROGRESS, "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_import_rejected_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_REJECTED, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_rejection_in_progress_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_REJECTION_INPROGRESS, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_blocked_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_blocking_in_progress_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_BLOCKING_INPROGRESS, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ @patch.dict(os.environ, {"TRE_ID": "tre123"}, clear=True)
+ class TestExportRequests:
+
+ def test_export_draft_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_DRAFT, "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_submitted_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_SUBMITTED, "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_approved_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_APPROVED, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_export_approval_in_progress_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_APPROVAL_INPROGRESS, "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_export_rejected_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_REJECTED, "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_blocked_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN, "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+
+class TestGetStorageAccountNameForRequestLegacy:
+
+ @patch.dict(os.environ, {"TRE_ID": "tre123"}, clear=True)
+ class TestImportRequestsLegacy:
+
+ def test_import_draft_uses_external_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_DRAFT, "ws12", airlock_version=1
+ )
+ assert account == "stalimextre123"
+
+ def test_import_submitted_uses_inprogress_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_SUBMITTED, "ws12", airlock_version=1
+ )
+ assert account == "stalimiptre123"
+
+ def test_import_approved_uses_workspace_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_APPROVED, "ws12", airlock_version=1
+ )
+ assert account == "stalimappwsws12"
+
+ def test_import_rejected_uses_rejected_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_REJECTED, "ws12", airlock_version=1
+ )
+ assert account == "stalimrejtre123"
+
+ def test_import_blocked_uses_blocked_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN, "ws12", airlock_version=1
+ )
+ assert account == "stalimblockedtre123"
+
+ @patch.dict(os.environ, {"TRE_ID": "tre123"}, clear=True)
+ class TestExportRequestsLegacy:
+
+ def test_export_draft_uses_internal_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_DRAFT, "ws12", airlock_version=1
+ )
+ assert account == "stalexintwsws12"
+
+ def test_export_submitted_uses_inprogress_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_SUBMITTED, "ws12", airlock_version=1
+ )
+ assert account == "stalexipwsws12"
+
+ def test_export_approved_uses_approved_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_APPROVED, "ws12", airlock_version=1
+ )
+ assert account == "stalexapptre123"
+
+ def test_export_rejected_uses_rejected_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_REJECTED, "ws12", airlock_version=1
+ )
+ assert account == "stalexrejwsws12"
+
+ def test_export_blocked_uses_blocked_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN, "ws12", airlock_version=1
+ )
+ assert account == "stalexblockedwsws12"
+
+
+class TestABACStageConstants:
+
+ def test_stage_import_external_value(self):
+ assert constants.STAGE_IMPORT_EXTERNAL == "import-external"
+
+ def test_stage_import_in_progress_value(self):
+ assert constants.STAGE_IMPORT_IN_PROGRESS == "import-in-progress"
+
+ def test_stage_import_approved_value(self):
+ assert constants.STAGE_IMPORT_APPROVED == "import-approved"
+
+ def test_stage_import_rejected_value(self):
+ assert constants.STAGE_IMPORT_REJECTED == "import-rejected"
+
+ def test_stage_import_blocked_value(self):
+ assert constants.STAGE_IMPORT_BLOCKED == "import-blocked"
+
+ def test_stage_export_internal_value(self):
+ assert constants.STAGE_EXPORT_INTERNAL == "export-internal"
+
+ def test_stage_export_in_progress_value(self):
+ assert constants.STAGE_EXPORT_IN_PROGRESS == "export-in-progress"
+
+ def test_stage_export_approved_value(self):
+ assert constants.STAGE_EXPORT_APPROVED == "export-approved"
+
+ def test_stage_export_rejected_value(self):
+ assert constants.STAGE_EXPORT_REJECTED == "export-rejected"
+
+ def test_stage_export_blocked_value(self):
+ assert constants.STAGE_EXPORT_BLOCKED == "export-blocked"
+
+
+class TestABACAccessPatterns:
+
+ ABAC_ALLOWED_STAGES = ['import-external', 'import-in-progress', 'export-approved']
+
+ def test_import_draft_is_api_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_DRAFT)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_import_submitted_is_api_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_SUBMITTED)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_import_in_review_is_api_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_IN_REVIEW)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_import_approved_is_not_api_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_APPROVED)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_import_rejected_is_not_api_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_REJECTED)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_import_blocked_is_not_api_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, constants.STAGE_BLOCKED_BY_SCAN)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_export_draft_is_not_api_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_DRAFT)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_export_submitted_is_not_api_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_SUBMITTED)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_export_approved_is_api_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_APPROVED)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_export_rejected_is_not_api_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, constants.STAGE_REJECTED)
+ assert stage not in self.ABAC_ALLOWED_STAGES
diff --git a/airlock_processor/tests/shared_code/test_blob_operations_metadata.py b/airlock_processor/tests/shared_code/test_blob_operations_metadata.py
new file mode 100644
index 000000000..74b504e99
--- /dev/null
+++ b/airlock_processor/tests/shared_code/test_blob_operations_metadata.py
@@ -0,0 +1,463 @@
+import pytest
+from unittest.mock import MagicMock, patch
+
+from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError, HttpResponseError
+
+from shared_code.blob_operations_metadata import (
+ get_account_url,
+ get_storage_endpoint_suffix,
+ create_container_with_metadata,
+ update_container_stage,
+ get_container_stage,
+ get_container_metadata,
+ get_request_files,
+ delete_container_by_request_id
+)
+
+
+class TestGetAccountUrl:
+
+ @patch.dict('os.environ', {"STORAGE_ENDPOINT_SUFFIX": "core.windows.net"}, clear=True)
+ def test_returns_correct_url_format(self):
+ url = get_account_url("mystorageaccount")
+ assert url == "https://mystorageaccount.blob.core.windows.net/"
+
+ @patch.dict('os.environ', {"STORAGE_ENDPOINT_SUFFIX": "core.chinacloudapi.cn"}, clear=True)
+ def test_uses_custom_endpoint_suffix(self):
+ url = get_account_url("mystorageaccount")
+ assert url == "https://mystorageaccount.blob.core.chinacloudapi.cn/"
+
+ @patch.dict('os.environ', {}, clear=True)
+ def test_uses_default_endpoint_when_not_set(self):
+ url = get_account_url("mystorageaccount")
+ assert url == "https://mystorageaccount.blob.core.windows.net/"
+
+
+class TestGetStorageEndpointSuffix:
+
+ @patch.dict('os.environ', {"STORAGE_ENDPOINT_SUFFIX": "core.usgovcloudapi.net"}, clear=True)
+ def test_returns_configured_suffix(self):
+ suffix = get_storage_endpoint_suffix()
+ assert suffix == "core.usgovcloudapi.net"
+
+ @patch.dict('os.environ', {}, clear=True)
+ def test_returns_default_when_not_configured(self):
+ suffix = get_storage_endpoint_suffix()
+ assert suffix == "core.windows.net"
+
+
+class TestCreateContainerWithMetadata:
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_creates_container_with_stage_metadata(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ create_container_with_metadata(
+ account_name="storageaccount",
+ request_id="request-123",
+ stage="import-external"
+ )
+
+ mock_container_client.create_container.assert_called_once()
+ call_args = mock_container_client.create_container.call_args
+ metadata = call_args.kwargs['metadata']
+
+ assert metadata['stage'] == "import-external"
+ assert 'created_at' in metadata
+ assert 'last_stage_change' in metadata
+ assert metadata['stage_history'] == "import-external"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_creates_container_with_all_optional_metadata(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ create_container_with_metadata(
+ account_name="storageaccount",
+ request_id="request-123",
+ stage="export-internal",
+ workspace_id="ws-456",
+ request_type="export",
+ created_by="user@example.com"
+ )
+
+ call_args = mock_container_client.create_container.call_args
+ metadata = call_args.kwargs['metadata']
+
+ assert metadata['stage'] == "export-internal"
+ assert metadata['workspace_id'] == "ws-456"
+ assert metadata['request_type'] == "export"
+ assert metadata['created_by'] == "user@example.com"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_handles_container_already_exists(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.create_container.side_effect = ResourceExistsError("Container already exists")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ create_container_with_metadata(
+ account_name="storageaccount",
+ request_id="request-123",
+ stage="import-external"
+ )
+
+
+class TestUpdateContainerStage:
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_updates_stage_metadata(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {
+ 'stage': 'import-external',
+ 'stage_history': 'import-external',
+ 'created_at': '2024-01-01T00:00:00'
+ }
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-in-progress"
+ )
+
+ mock_container_client.set_container_metadata.assert_called_once()
+ call_args = mock_container_client.set_container_metadata.call_args
+ updated_metadata = call_args.args[0]
+
+ assert updated_metadata['stage'] == "import-in-progress"
+ assert "import-in-progress" in updated_metadata['stage_history']
+ assert 'last_stage_change' in updated_metadata
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_appends_to_stage_history(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {
+ 'stage': 'import-external',
+ 'stage_history': 'import-external',
+ }
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-in-progress"
+ )
+
+ call_args = mock_container_client.set_container_metadata.call_args
+ updated_metadata = call_args.args[0]
+
+ assert updated_metadata['stage_history'] == "import-external,import-in-progress"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_adds_changed_by_when_provided(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {'stage': 'import-external', 'stage_history': 'import-external'}
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-in-progress",
+ changed_by="processor"
+ )
+
+ call_args = mock_container_client.set_container_metadata.call_args
+ updated_metadata = call_args.args[0]
+
+ assert updated_metadata['last_changed_by'] == "processor"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_adds_additional_metadata(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {'stage': 'import-in-progress', 'stage_history': 'import-external,import-in-progress'}
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-approved",
+ additional_metadata={"scan_result": "clean", "scan_time": "2024-01-01T12:00:00"}
+ )
+
+ call_args = mock_container_client.set_container_metadata.call_args
+ updated_metadata = call_args.args[0]
+
+ assert updated_metadata['scan_result'] == "clean"
+ assert updated_metadata['scan_time'] == "2024-01-01T12:00:00"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_raises_when_container_not_found(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.get_container_properties.side_effect = ResourceNotFoundError("Container not found")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ with pytest.raises(ResourceNotFoundError):
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="nonexistent-request",
+ new_stage="import-in-progress"
+ )
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_raises_on_http_error(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {'stage': 'import-external'}
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_container_client.set_container_metadata.side_effect = HttpResponseError("Service Error")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ with pytest.raises(HttpResponseError):
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-in-progress"
+ )
+
+
+class TestGetContainerStage:
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_returns_stage_from_metadata(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {'stage': 'import-in-progress'}
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ stage = get_container_stage(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+ assert stage == "import-in-progress"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_returns_unknown_when_stage_missing(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {}
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ stage = get_container_stage(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+ assert stage == "unknown"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_raises_when_container_not_found(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.get_container_properties.side_effect = ResourceNotFoundError("Container not found")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ with pytest.raises(ResourceNotFoundError):
+ get_container_stage(
+ account_name="storageaccount",
+ request_id="nonexistent-request"
+ )
+
+
+class TestGetContainerMetadata:
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_returns_all_metadata(self, mock_get_credential, mock_blob_service_client):
+ expected_metadata = {
+ 'stage': 'import-in-progress',
+ 'workspace_id': 'ws-123',
+ 'request_type': 'import',
+ 'created_at': '2024-01-01T00:00:00',
+ 'stage_history': 'import-external,import-in-progress'
+ }
+
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = expected_metadata
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ metadata = get_container_metadata(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+ assert metadata == expected_metadata
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_raises_when_container_not_found(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.get_container_properties.side_effect = ResourceNotFoundError("Container not found")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ with pytest.raises(ResourceNotFoundError):
+ get_container_metadata(
+ account_name="storageaccount",
+ request_id="nonexistent-request"
+ )
+
+
+class TestGetRequestFiles:
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_returns_list_of_files(self, mock_get_credential, mock_blob_service_client):
+ mock_blob1 = MagicMock()
+ mock_blob1.name = "data.csv"
+ mock_blob1.size = 1024
+
+ mock_blob2 = MagicMock()
+ mock_blob2.name = "readme.txt"
+ mock_blob2.size = 256
+
+ mock_container_client = MagicMock()
+ mock_container_client.list_blobs.return_value = [mock_blob1, mock_blob2]
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ files = get_request_files(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+ assert len(files) == 2
+ assert files[0] == {"name": "data.csv", "size": 1024}
+ assert files[1] == {"name": "readme.txt", "size": 256}
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_returns_empty_list_when_no_files(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.list_blobs.return_value = []
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ files = get_request_files(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+ assert files == []
+
+
+class TestDeleteContainerByRequestId:
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_deletes_container(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ delete_container_by_request_id(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+ mock_container_client.delete_container.assert_called_once()
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_handles_container_not_found(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.delete_container.side_effect = ResourceNotFoundError("Container not found")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ delete_container_by_request_id(
+ account_name="storageaccount",
+ request_id="nonexistent-request"
+ )
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_raises_on_http_error(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_container_client.delete_container.side_effect = HttpResponseError("Service Error")
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ with pytest.raises(HttpResponseError):
+ delete_container_by_request_id(
+ account_name="storageaccount",
+ request_id="request-123"
+ )
+
+
+class TestStageTransitions:
+
+ ABAC_ALLOWED_STAGES = ['import-external', 'import-in-progress', 'export-approved']
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_import_stage_transition_updates_history(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+
+ current_metadata = {
+ 'stage': 'import-external',
+ 'stage_history': 'import-external'
+ }
+ mock_properties = MagicMock()
+ mock_properties.metadata = current_metadata.copy()
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-in-progress"
+ )
+
+ call_args = mock_container_client.set_container_metadata.call_args
+ updated_metadata = call_args.args[0]
+
+ assert updated_metadata['stage'] == "import-in-progress"
+ assert updated_metadata['stage_history'] == "import-external,import-in-progress"
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch("shared_code.blob_operations_metadata.get_credential")
+ def test_scan_result_metadata_added_on_approval(self, mock_get_credential, mock_blob_service_client):
+ mock_container_client = MagicMock()
+ mock_properties = MagicMock()
+ mock_properties.metadata = {
+ 'stage': 'import-in-progress',
+ 'stage_history': 'import-external,import-in-progress'
+ }
+ mock_container_client.get_container_properties.return_value = mock_properties
+ mock_blob_service_client.return_value.get_container_client.return_value = mock_container_client
+
+ update_container_stage(
+ account_name="storageaccount",
+ request_id="request-123",
+ new_stage="import-approved",
+ additional_metadata={
+ "scan_result": "clean",
+ "scan_completed_at": "2024-01-01T12:00:00Z"
+ }
+ )
+
+ call_args = mock_container_client.set_container_metadata.call_args
+ updated_metadata = call_args.args[0]
+
+ assert updated_metadata['stage'] == "import-approved"
+ assert updated_metadata['scan_result'] == "clean"
+ assert "import-approved" not in self.ABAC_ALLOWED_STAGES
diff --git a/airlock_processor/tests/test_blob_created_trigger.py b/airlock_processor/tests/test_blob_created_trigger.py
new file mode 100644
index 000000000..70953221a
--- /dev/null
+++ b/airlock_processor/tests/test_blob_created_trigger.py
@@ -0,0 +1,98 @@
+import json
+from mock import MagicMock, patch
+
+import azure.functions as func
+
+from shared_code import constants
+from BlobCreatedTrigger import main
+
+
+def _make_service_bus_message(topic: str, request_id: str, blob_name: str = "test.txt"):
+ subject = f"/blobServices/default/containers/{request_id}/blobs/{blob_name}"
+ body = json.dumps({"topic": topic, "subject": subject})
+ encoded = body.encode("utf-8")
+ msg = MagicMock(spec=func.ServiceBusMessage)
+ msg.get_body.return_value = encoded
+ return msg
+
+
+def _mock_blob_client():
+ """Create a mock blob client that returns valid metadata for send_delete_event."""
+ mock_client = MagicMock()
+ mock_client.get_blob_properties.return_value = {"metadata": {"copied_from": '["container-prev"]'}}
+ return mock_client
+
+
+class TestV2BlobCreated():
+
+ @patch("BlobCreatedTrigger.get_blob_client_from_blob_info", return_value=_mock_blob_client())
+ @patch("shared_code.blob_operations_metadata.get_container_metadata", return_value={"stage": constants.STAGE_IMPORT_APPROVED, "workspace_id": "ws01"})
+ @patch("BlobCreatedTrigger.get_blob_info_from_topic_and_subject")
+ def test_v2_import_approved_emits_step_result(self, mock_get_blob_info, mock_get_metadata, mock_blob_client):
+ """When a blob lands in workspace-global with stage=import-approved, emit StepResult approved."""
+ topic = "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/stalairlockgtre123"
+ request_id = "req-001"
+ mock_get_blob_info.return_value = ("stalairlockgtre123", request_id, "test.txt")
+
+ step_result = MagicMock()
+ deletion_event = MagicMock()
+
+ msg = _make_service_bus_message(topic, request_id)
+ main(msg=msg, stepResultEvent=step_result, dataDeletionEvent=deletion_event)
+
+ step_result.set.assert_called_once()
+ event_data = step_result.set.call_args[0][0]
+ assert event_data.get_json()["completed_step"] == constants.STAGE_APPROVAL_INPROGRESS
+ assert event_data.get_json()["new_status"] == constants.STAGE_APPROVED
+
+ @patch("BlobCreatedTrigger.get_blob_client_from_blob_info", return_value=_mock_blob_client())
+ @patch("shared_code.blob_operations_metadata.get_container_metadata", return_value={"stage": constants.STAGE_EXPORT_APPROVED, "workspace_id": "ws01"})
+ @patch("BlobCreatedTrigger.get_blob_info_from_topic_and_subject")
+ def test_v2_export_approved_emits_step_result(self, mock_get_blob_info, mock_get_metadata, mock_blob_client):
+ """When a blob lands in core with stage=export-approved, emit StepResult approved."""
+ topic = "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/stalairlocktre123"
+ request_id = "req-002"
+ mock_get_blob_info.return_value = ("stalairlocktre123", request_id, "test.txt")
+
+ step_result = MagicMock()
+ deletion_event = MagicMock()
+
+ msg = _make_service_bus_message(topic, request_id)
+ main(msg=msg, stepResultEvent=step_result, dataDeletionEvent=deletion_event)
+
+ step_result.set.assert_called_once()
+ event_data = step_result.set.call_args[0][0]
+ assert event_data.get_json()["completed_step"] == constants.STAGE_APPROVAL_INPROGRESS
+ assert event_data.get_json()["new_status"] == constants.STAGE_APPROVED
+
+ @patch("shared_code.blob_operations_metadata.get_container_metadata", return_value={"stage": constants.STAGE_IMPORT_EXTERNAL, "workspace_id": "ws01"})
+ @patch("BlobCreatedTrigger.get_blob_info_from_topic_and_subject")
+ def test_v2_non_terminal_stage_does_not_emit_step_result(self, mock_get_blob_info, mock_get_metadata):
+ """When a blob is created in a non-terminal stage (e.g., import-external from user upload), skip."""
+ topic = "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/stalairlocktre123"
+ request_id = "req-003"
+ mock_get_blob_info.return_value = ("stalairlocktre123", request_id, "test.txt")
+
+ step_result = MagicMock()
+ deletion_event = MagicMock()
+
+ msg = _make_service_bus_message(topic, request_id)
+ main(msg=msg, stepResultEvent=step_result, dataDeletionEvent=deletion_event)
+
+ step_result.set.assert_not_called()
+
+ @patch("shared_code.blob_operations_metadata.get_container_metadata", side_effect=Exception("not found"))
+ @patch("BlobCreatedTrigger.get_blob_info_from_topic_and_subject")
+ def test_v2_metadata_read_failure_skips_gracefully(self, mock_get_blob_info, mock_get_metadata):
+ """If container metadata can't be read, log warning and return without error."""
+ topic = "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/stalairlockgtre123"
+ request_id = "req-004"
+ mock_get_blob_info.return_value = ("stalairlockgtre123", request_id, "test.txt")
+
+ step_result = MagicMock()
+ deletion_event = MagicMock()
+
+ msg = _make_service_bus_message(topic, request_id)
+ main(msg=msg, stepResultEvent=step_result, dataDeletionEvent=deletion_event)
+
+ step_result.set.assert_not_called()
diff --git a/airlock_processor/tests/test_status_change_queue_trigger.py b/airlock_processor/tests/test_status_change_queue_trigger.py
index 4ce518c09..d3d7c78df 100644
--- a/airlock_processor/tests/test_status_change_queue_trigger.py
+++ b/airlock_processor/tests/test_status_change_queue_trigger.py
@@ -4,7 +4,7 @@
from mock import MagicMock, patch
from pydantic import ValidationError
-from StatusChangedQueueTrigger import get_request_files, main, extract_properties, get_source_dest_for_copy, is_require_data_copy
+from StatusChangedQueueTrigger import get_request_files, main, extract_properties, get_source_dest_for_copy, is_require_data_copy, get_storage_account_destination_for_copy
from azure.functions.servicebus import ServiceBusMessage
from shared_code import constants
@@ -20,6 +20,18 @@ def test_extract_prop_valid_body_return_all_values(self):
assert req_prop.type == "101112"
assert req_prop.workspace_id == "ws1"
+ def test_extract_prop_with_review_workspace_id(self):
+ message_body = "{ \"data\": { \"request_id\":\"123\",\"new_status\":\"456\" ,\"previous_status\":\"789\" , \"type\":\"101112\", \"workspace_id\":\"ws1\", \"review_workspace_id\":\"rw01\" }}"
+ message = _mock_service_bus_message(body=message_body)
+ req_prop = extract_properties(message)
+ assert req_prop.review_workspace_id == "rw01"
+
+ def test_extract_prop_without_review_workspace_id_defaults_to_none(self):
+ message_body = "{ \"data\": { \"request_id\":\"123\",\"new_status\":\"456\" ,\"previous_status\":\"789\" , \"type\":\"101112\", \"workspace_id\":\"ws1\" }}"
+ message = _mock_service_bus_message(body=message_body)
+ req_prop = extract_properties(message)
+ assert req_prop.review_workspace_id is None
+
def test_extract_prop_missing_arg_throws(self):
message_body = "{ \"data\": { \"status\":\"456\" , \"type\":\"789\", \"workspace_id\":\"ws1\" }}"
message = _mock_service_bus_message(body=message_body)
@@ -119,7 +131,107 @@ def test_delete_request_files_should_be_called_on_cancel_stage(self, mock_set_ou
assert mock_set_output_event_to_trigger_container_deletion.called
+class TestImportSubmitUsesReviewWorkspaceId():
+ @patch.dict(os.environ, {"TRE_ID": "tre-id"}, clear=True)
+ def test_import_submit_destination_uses_review_workspace_id(self):
+ dest = get_storage_account_destination_for_copy(
+ new_status=constants.STAGE_SUBMITTED,
+ request_type=constants.IMPORT_TYPE,
+ short_workspace_id="ws01",
+ review_workspace_id="rw01"
+ )
+ assert dest == constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS + "rw01"
+
+ @patch.dict(os.environ, {"TRE_ID": "tre-id"}, clear=True)
+ def test_import_submit_destination_falls_back_to_tre_id_when_no_review_workspace_id(self):
+ dest = get_storage_account_destination_for_copy(
+ new_status=constants.STAGE_SUBMITTED,
+ request_type=constants.IMPORT_TYPE,
+ short_workspace_id="ws01",
+ review_workspace_id=None
+ )
+ assert dest == constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS + "tre-id"
+
+ @patch.dict(os.environ, {"TRE_ID": "tre-id"}, clear=True)
+ def test_export_submit_destination_ignores_review_workspace_id(self):
+ dest = get_storage_account_destination_for_copy(
+ new_status=constants.STAGE_SUBMITTED,
+ request_type=constants.EXPORT_TYPE,
+ short_workspace_id="ws01",
+ review_workspace_id="rw01"
+ )
+ assert dest == constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS + "ws01"
+
+
+class TestImportApproval():
+ @patch("StatusChangedQueueTrigger.blob_operations.copy_data")
+ @patch("StatusChangedQueueTrigger.blob_operations.create_container")
+ @patch.dict(os.environ, {"TRE_ID": "tre-id"}, clear=True)
+ def test_import_approval_copies_data_in_legacy_mode(self, mock_create_container, mock_copy_data):
+ message_body = "{ \"data\": { \"request_id\":\"123\",\"new_status\":\"approval_in_progress\" ,\"previous_status\":\"in_review\" , \"type\":\"import\", \"workspace_id\":\"ws01\" }}"
+ message = _mock_service_bus_message(body=message_body)
+ main(msg=message, stepResultEvent=MagicMock(), dataDeletionEvent=MagicMock())
+ mock_create_container.assert_called_once()
+ mock_copy_data.assert_called_once()
+
+
def _mock_service_bus_message(body: str):
encoded_body = str.encode(body, "utf-8")
message = ServiceBusMessage(body=encoded_body, message_id="123", user_properties={}, application_properties={})
return message
+
+
+class TestV2MetadataMode():
+
+ @patch("StatusChangedQueueTrigger.blob_operations.copy_data")
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch.dict(os.environ, {"TRE_ID": "tre-id", "ENABLE_MALWARE_SCANNING": "False"}, clear=True)
+ def test_v2_import_approval_copies_data_without_step_result(self, mock_blob_svc, mock_copy_data):
+ """V2 import approval triggers cross-account copy but does NOT emit StepResult directly.
+ BlobCreatedTrigger handles completion signaling asynchronously."""
+ message_body = '{ "data": { "request_id":"123","new_status":"approval_in_progress","previous_status":"in_review","type":"import","workspace_id":"ws01","airlock_version":2 }}'
+ message = _mock_service_bus_message(body=message_body)
+ step_result = MagicMock()
+ main(msg=message, stepResultEvent=step_result, dataDeletionEvent=MagicMock())
+ mock_copy_data.assert_called_once()
+ # StepResult should NOT be emitted — BlobCreatedTrigger handles this
+ step_result.set.assert_not_called()
+
+ @patch("StatusChangedQueueTrigger.blob_operations.copy_data")
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch.dict(os.environ, {"TRE_ID": "tre-id", "ENABLE_MALWARE_SCANNING": "False"}, clear=True)
+ def test_v2_export_approval_copies_data_without_step_result(self, mock_blob_svc, mock_copy_data):
+ """V2 export approval triggers cross-account copy but does NOT emit StepResult directly."""
+ message_body = '{ "data": { "request_id":"123","new_status":"approval_in_progress","previous_status":"in_review","type":"export","workspace_id":"ws01","airlock_version":2 }}'
+ message = _mock_service_bus_message(body=message_body)
+ step_result = MagicMock()
+ main(msg=message, stepResultEvent=step_result, dataDeletionEvent=MagicMock())
+ mock_copy_data.assert_called_once()
+ step_result.set.assert_not_called()
+
+ @patch("StatusChangedQueueTrigger.blob_operations.get_request_files", return_value=[{"name": "test.txt", "size": 100}])
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch.dict(os.environ, {"TRE_ID": "tre-id", "ENABLE_MALWARE_SCANNING": "False"}, clear=True)
+ def test_v2_submit_with_scanning_disabled_emits_in_review(self, mock_blob_svc, mock_get_files):
+ """V2 submit with malware scanning disabled should emit StepResult to skip to in_review."""
+ message_body = '{ "data": { "request_id":"123","new_status":"submitted","previous_status":"draft","type":"import","workspace_id":"ws01","airlock_version":2 }}'
+ message = _mock_service_bus_message(body=message_body)
+ step_result = MagicMock()
+ main(msg=message, stepResultEvent=step_result, dataDeletionEvent=MagicMock())
+ # Should have two calls: one for request files report, one for in_review transition
+ assert step_result.set.call_count == 2
+ # The second call should be the in_review step result
+ second_call_event = step_result.set.call_args_list[1][0][0]
+ assert second_call_event.get_json()["completed_step"] == constants.STAGE_SUBMITTED
+ assert second_call_event.get_json()["new_status"] == constants.STAGE_IN_REVIEW
+
+ @patch("shared_code.blob_operations_metadata.BlobServiceClient")
+ @patch.dict(os.environ, {"TRE_ID": "tre-id", "ENABLE_MALWARE_SCANNING": "True"}, clear=True)
+ def test_v2_submit_with_scanning_enabled_does_not_emit_in_review(self, mock_blob_svc):
+ """V2 submit with malware scanning enabled should NOT emit in_review — Defender handles it."""
+ message_body = '{ "data": { "request_id":"123","new_status":"submitted","previous_status":"draft","type":"import","workspace_id":"ws01","airlock_version":2 }}'
+ message = _mock_service_bus_message(body=message_body)
+ step_result = MagicMock()
+ main(msg=message, stepResultEvent=step_result, dataDeletionEvent=MagicMock())
+ # Only one call: request files report (not in_review)
+ assert step_result.set.call_count == 1
diff --git a/api_app/_version.py b/api_app/_version.py
index 2cb28789f..ae62eb632 100644
--- a/api_app/_version.py
+++ b/api_app/_version.py
@@ -1 +1 @@
-__version__ = "0.25.15"
+__version__ = "0.26.5"
diff --git a/api_app/api/routes/airlock.py b/api_app/api/routes/airlock.py
index 4d92f195b..9e7795a30 100644
--- a/api_app/api/routes/airlock.py
+++ b/api_app/api/routes/airlock.py
@@ -41,7 +41,8 @@ async def create_draft_request(airlock_request_input: AirlockRequestInCreate, us
if workspace.properties.get("enable_airlock") is False:
raise HTTPException(status_code=status_code.HTTP_405_METHOD_NOT_ALLOWED, detail=strings.AIRLOCK_NOT_ENABLED_IN_WORKSPACE)
try:
- airlock_request = airlock_request_repo.create_airlock_request_item(airlock_request_input, workspace.id, user)
+ airlock_version = workspace.properties.get("airlock_version", 1)
+ airlock_request = airlock_request_repo.create_airlock_request_item(airlock_request_input, workspace.id, user, airlock_version=airlock_version)
await save_and_publish_event_airlock_request(airlock_request, airlock_request_repo, user, workspace)
allowed_actions = get_allowed_actions(airlock_request, user, airlock_request_repo)
return AirlockRequestWithAllowedUserActions(airlockRequest=airlock_request, allowedUserActions=allowed_actions)
diff --git a/api_app/api/routes/api.py b/api_app/api/routes/api.py
index c8247c02b..6e4084c5e 100644
--- a/api_app/api/routes/api.py
+++ b/api_app/api/routes/api.py
@@ -63,8 +63,6 @@
@core_swagger_router.get("/openapi.json", include_in_schema=False, name="core_openapi")
async def core_openapi(request: Request):
- global openapi_definitions
-
if openapi_definitions["core"] is None:
openapi_definitions["core"] = get_openapi(
title=f"{config.PROJECT_NAME}",
@@ -122,8 +120,6 @@ def get_scope(workspace) -> str:
@workspace_swagger_router.get("/workspaces/{workspace_id}/openapi.json", include_in_schema=False, name="openapi_definitions")
async def get_openapi_json(workspace_id: str, request: Request, workspace_repo=Depends(get_repository(WorkspaceRepository))):
- global openapi_definitions
-
if openapi_definitions[workspace_id] is None:
openapi_definitions[workspace_id] = get_openapi(
diff --git a/api_app/core/config.py b/api_app/core/config.py
index d2f1cf1fa..2a5c85ad4 100644
--- a/api_app/core/config.py
+++ b/api_app/core/config.py
@@ -70,6 +70,10 @@
AIRLOCK_SAS_TOKEN_EXPIRY_PERIOD_IN_HOURS: int = config("AIRLOCK_SAS_TOKEN_EXPIRY_PERIOD_IN_HOURS", default=1)
ENABLE_AIRLOCK_EMAIL_CHECK: bool = config("ENABLE_AIRLOCK_EMAIL_CHECK", cast=bool, default=False)
+# Airlock storage configuration (set from Terraform outputs)
+# Optional App Gateway FQDN (not currently used by API at runtime)
+APP_GATEWAY_FQDN: str = config("APP_GATEWAY_FQDN", default="")
+
API_ROOT_SCOPE: str = f"api://{API_CLIENT_ID}/user_impersonation"
# User Management
diff --git a/api_app/db/repositories/airlock_requests.py b/api_app/db/repositories/airlock_requests.py
index 0990b90ef..f2a75733b 100644
--- a/api_app/db/repositories/airlock_requests.py
+++ b/api_app/db/repositories/airlock_requests.py
@@ -102,7 +102,7 @@ def validate_status_update(self, current_status: AirlockRequestStatus, new_statu
allowed_transitions = valid_transitions.get(current_status, set())
return new_status in allowed_transitions
- def create_airlock_request_item(self, airlock_request_input: AirlockRequestInCreate, workspace_id: str, user) -> AirlockRequest:
+ def create_airlock_request_item(self, airlock_request_input: AirlockRequestInCreate, workspace_id: str, user, airlock_version: int = 1) -> AirlockRequest:
full_airlock_request_id = str(uuid.uuid4())
resource_spec_parameters = {**self.get_airlock_request_spec_params()}
@@ -118,7 +118,8 @@ def create_airlock_request_item(self, airlock_request_input: AirlockRequestInCre
updatedBy=user,
updatedWhen=datetime.now(UTC).timestamp(),
properties=resource_spec_parameters,
- reviews=[]
+ reviews=[],
+ airlock_version=airlock_version
)
return airlock_request
diff --git a/api_app/event_grid/event_sender.py b/api_app/event_grid/event_sender.py
index 1821c6558..0125e1499 100644
--- a/api_app/event_grid/event_sender.py
+++ b/api_app/event_grid/event_sender.py
@@ -6,21 +6,33 @@
from models.domain.events import AirlockNotificationRequestData, AirlockNotificationWorkspaceData, StatusChangedData, AirlockNotificationData
from event_grid.helpers import publish_event
from core import config
-from models.domain.airlock_request import AirlockRequest, AirlockRequestStatus
+from models.domain.airlock_request import AirlockRequest, AirlockRequestStatus, AirlockRequestType
from models.domain.workspace import Workspace
from services.logging import logger
-async def send_status_changed_event(airlock_request: AirlockRequest, previous_status: Optional[AirlockRequestStatus]):
+async def send_status_changed_event(airlock_request: AirlockRequest, previous_status: Optional[AirlockRequestStatus], workspace: Optional[Workspace] = None):
request_id = airlock_request.id
new_status = airlock_request.status.value
previous_status = previous_status.value if previous_status else None
request_type = airlock_request.type.value
short_workspace_id = airlock_request.workspaceId[-4:]
+ # For v2, container metadata must match ABAC conditions which use the full workspace ID.
+ # For v1, storage account names include the short (4-char) workspace ID.
+ workspace_id_for_event = airlock_request.workspaceId if airlock_request.airlock_version >= 2 else short_workspace_id
+
+ review_workspace_id = None
+ if workspace and airlock_request.type == AirlockRequestType.Import:
+ try:
+ full_review_ws_id = workspace.properties["airlock_review_config"]["import"]["import_vm_workspace_id"]
+ review_workspace_id = full_review_ws_id if airlock_request.airlock_version >= 2 else full_review_ws_id[-4:]
+ except (KeyError, TypeError):
+ pass
+
status_changed_event = EventGridEvent(
event_type="statusChanged",
- data=StatusChangedData(request_id=request_id, new_status=new_status, previous_status=previous_status, type=request_type, workspace_id=short_workspace_id).__dict__,
+ data=StatusChangedData(request_id=request_id, new_status=new_status, previous_status=previous_status, type=request_type, workspace_id=workspace_id_for_event, review_workspace_id=review_workspace_id, airlock_version=airlock_request.airlock_version).__dict__,
subject=f"{request_id}/statusChanged",
data_version="2.0"
)
diff --git a/api_app/models/domain/airlock_request.py b/api_app/models/domain/airlock_request.py
index 37fe67f64..b0bb2ae0a 100644
--- a/api_app/models/domain/airlock_request.py
+++ b/api_app/models/domain/airlock_request.py
@@ -99,6 +99,7 @@ class AirlockRequest(AzureTREModel):
reviews: Optional[List[AirlockReview]]
etag: Optional[str] = Field(title="_etag", alias="_etag")
reviewUserResources: Dict[str, AirlockReviewUserResource] = Field({}, title="User resources created for Airlock Reviews")
+ airlock_version: int = Field(1, title="Airlock version", description="1 = legacy per-stage storage, 2 = consolidated metadata-based storage")
# SQL API CosmosDB saves ETag as an escaped string: https://github.com/microsoft/AzureTRE/issues/1931
@validator("etag", pre=True)
diff --git a/api_app/models/domain/events.py b/api_app/models/domain/events.py
index 76d7c557c..59752b80b 100644
--- a/api_app/models/domain/events.py
+++ b/api_app/models/domain/events.py
@@ -40,3 +40,5 @@ class StatusChangedData(AzureTREModel):
previous_status: Optional[str]
type: str
workspace_id: str
+ review_workspace_id: Optional[str] = None
+ airlock_version: int = 1
diff --git a/api_app/resources/constants.py b/api_app/resources/constants.py
index c6f60cec0..7eafa2b77 100644
--- a/api_app/resources/constants.py
+++ b/api_app/resources/constants.py
@@ -4,6 +4,25 @@
IMPORT_TYPE = "import"
EXPORT_TYPE = "export"
+
+# Consolidated storage account names (metadata-based approach)
+STORAGE_ACCOUNT_NAME_AIRLOCK_CORE = "stalairlock{}" # Consolidated core account
+STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL = "stalairlockg{}" # Global workspace account for all workspaces
+
+# Stage values for container metadata
+STAGE_IMPORT_EXTERNAL = "import-external"
+STAGE_IMPORT_IN_PROGRESS = "import-in-progress"
+STAGE_IMPORT_APPROVED = "import-approved"
+STAGE_IMPORT_REJECTED = "import-rejected"
+STAGE_IMPORT_BLOCKED = "import-blocked"
+STAGE_EXPORT_INTERNAL = "export-internal"
+STAGE_EXPORT_IN_PROGRESS = "export-in-progress"
+STAGE_EXPORT_APPROVED = "export-approved"
+STAGE_EXPORT_REJECTED = "export-rejected"
+STAGE_EXPORT_BLOCKED = "export-blocked"
+
+# Legacy storage account names (for backwards compatibility during migration)
+# These will be removed after migration is complete
# Import
STORAGE_ACCOUNT_NAME_IMPORT_EXTERNAL = "stalimex{}"
STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS = "stalimip{}"
diff --git a/api_app/services/airlock.py b/api_app/services/airlock.py
index 54109734c..13f027ae5 100644
--- a/api_app/services/airlock.py
+++ b/api_app/services/airlock.py
@@ -36,37 +36,6 @@
STORAGE_ENDPOINT = config.STORAGE_ENDPOINT_SUFFIX
-def get_account_by_request(airlock_request: AirlockRequest, workspace: Workspace) -> str:
- tre_id = config.TRE_ID
- short_workspace_id = workspace.id[-4:]
- if airlock_request.type == constants.IMPORT_TYPE:
- if airlock_request.status == AirlockRequestStatus.Draft:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_EXTERNAL.format(tre_id)
- elif airlock_request.status == AirlockRequestStatus.Submitted:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS.format(tre_id)
- elif airlock_request.status == AirlockRequestStatus.InReview:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS.format(tre_id)
- elif airlock_request.status == AirlockRequestStatus.Approved:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_APPROVED.format(short_workspace_id)
- elif airlock_request.status == AirlockRequestStatus.Rejected:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_REJECTED.format(tre_id)
- elif airlock_request.status == AirlockRequestStatus.Blocked:
- return constants.STORAGE_ACCOUNT_NAME_IMPORT_BLOCKED.format(tre_id)
- else:
- if airlock_request.status == AirlockRequestStatus.Draft:
- return constants.STORAGE_ACCOUNT_NAME_EXPORT_INTERNAL.format(short_workspace_id)
- elif airlock_request.status in AirlockRequestStatus.Submitted:
- return constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS.format(short_workspace_id)
- elif airlock_request.status == AirlockRequestStatus.InReview:
- return constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS.format(short_workspace_id)
- elif airlock_request.status == AirlockRequestStatus.Approved:
- return constants.STORAGE_ACCOUNT_NAME_EXPORT_APPROVED.format(tre_id)
- elif airlock_request.status == AirlockRequestStatus.Rejected:
- return constants.STORAGE_ACCOUNT_NAME_EXPORT_REJECTED.format(short_workspace_id)
- elif airlock_request.status == AirlockRequestStatus.Blocked:
- return constants.STORAGE_ACCOUNT_NAME_EXPORT_BLOCKED.format(short_workspace_id)
-
-
def validate_user_allowed_to_access_storage_account(user: User, airlock_request: AirlockRequest):
allowed_roles = []
@@ -103,8 +72,48 @@ def get_required_permission(airlock_request: AirlockRequest) -> ContainerSasPerm
return ContainerSasPermissions(read=True, list=True)
-def get_airlock_request_container_sas_token(account_name: str,
- airlock_request: AirlockRequest):
+def is_publicly_accessible_stage(airlock_request: AirlockRequest) -> bool:
+ if airlock_request.type == constants.IMPORT_TYPE:
+ # Only import Draft (external upload) is publicly accessible via App GW/SAS
+ return airlock_request.status == AirlockRequestStatus.Draft
+ else:
+ # Only export Approved is publicly accessible via App GW/SAS
+ return airlock_request.status == AirlockRequestStatus.Approved
+
+
+def get_account_by_request(airlock_request: AirlockRequest, workspace: Workspace) -> str:
+ """Resolve storage account name for v1 (legacy per-stage) airlock requests."""
+ tre_id = config.TRE_ID
+ short_workspace_id = workspace.id[-4:]
+ if airlock_request.type == constants.IMPORT_TYPE:
+ if airlock_request.status == AirlockRequestStatus.Draft:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_EXTERNAL.format(tre_id)
+ elif airlock_request.status == AirlockRequestStatus.Submitted:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS.format(tre_id)
+ elif airlock_request.status == AirlockRequestStatus.InReview:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS.format(tre_id)
+ elif airlock_request.status == AirlockRequestStatus.Approved:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_APPROVED.format(short_workspace_id)
+ elif airlock_request.status == AirlockRequestStatus.Rejected:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_REJECTED.format(tre_id)
+ elif airlock_request.status == AirlockRequestStatus.Blocked:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_BLOCKED.format(tre_id)
+ else:
+ if airlock_request.status == AirlockRequestStatus.Draft:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INTERNAL.format(short_workspace_id)
+ elif airlock_request.status == AirlockRequestStatus.Submitted:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS.format(short_workspace_id)
+ elif airlock_request.status == AirlockRequestStatus.InReview:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS.format(short_workspace_id)
+ elif airlock_request.status == AirlockRequestStatus.Approved:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_APPROVED.format(tre_id)
+ elif airlock_request.status == AirlockRequestStatus.Rejected:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_REJECTED.format(short_workspace_id)
+ elif airlock_request.status == AirlockRequestStatus.Blocked:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_BLOCKED.format(short_workspace_id)
+
+
+def get_airlock_request_container_sas_token(airlock_request: AirlockRequest, account_name: str):
blob_service_client = BlobServiceClient(account_url=get_account_url(account_name),
credential=credentials.get_credential())
@@ -125,6 +134,7 @@ def get_airlock_request_container_sas_token(account_name: str,
start=start,
expiry=expiry)
+ # Return standard blob storage URL format
return "https://{}.blob.{}/{}?{}" \
.format(account_name, STORAGE_ENDPOINT, airlock_request.id, token)
@@ -168,8 +178,25 @@ async def review_airlock_request(airlock_review_input: AirlockReviewInCreate, ai
def get_airlock_container_link(airlock_request: AirlockRequest, user, workspace):
validate_user_allowed_to_access_storage_account(user, airlock_request)
validate_request_status(airlock_request)
- account_name: str = get_account_by_request(airlock_request, workspace)
- return get_airlock_request_container_sas_token(account_name, airlock_request)
+
+ if airlock_request.airlock_version >= 2:
+ # v2: Resolve correct storage account (core or workspace-global) based on stage
+ # Network rules enforce public vs private access — SAS is always generated
+ from services.airlock_storage_helper import get_storage_account_name_for_request
+ tre_id = config.TRE_ID
+ short_workspace_id = workspace.id[-4:]
+ account_name = get_storage_account_name_for_request(
+ request_type=airlock_request.type.value,
+ status=airlock_request.status,
+ tre_id=tre_id,
+ short_workspace_id=short_workspace_id,
+ airlock_version=airlock_request.airlock_version
+ )
+ else:
+ # v1: Resolve per-stage storage account
+ account_name = get_account_by_request(airlock_request, workspace)
+
+ return get_airlock_request_container_sas_token(airlock_request, account_name)
async def create_review_vm(airlock_request: AirlockRequest, user: User, workspace: Workspace, user_resource_repo: UserResourceRepository, workspace_service_repo: WorkspaceServiceRepository,
@@ -288,7 +315,7 @@ async def save_and_publish_event_airlock_request(airlock_request: AirlockRequest
try:
logger.debug(f"Sending status changed event for airlock request item: {airlock_request.id}")
- await send_status_changed_event(airlock_request=airlock_request, previous_status=None)
+ await send_status_changed_event(airlock_request=airlock_request, previous_status=None, workspace=workspace)
await send_airlock_notification_event(airlock_request, workspace, role_assignment_details)
except Exception:
await airlock_request_repo.delete_item(airlock_request.id)
@@ -330,7 +357,7 @@ async def update_and_publish_event_airlock_request(
try:
logger.debug(f"Sending status changed event for airlock request item: {airlock_request.id}")
- await send_status_changed_event(airlock_request=updated_airlock_request, previous_status=airlock_request.status)
+ await send_status_changed_event(airlock_request=updated_airlock_request, previous_status=airlock_request.status, workspace=workspace)
access_service = get_access_service()
role_assignment_details = access_service.get_workspace_user_emails_by_role_assignment(workspace)
await send_airlock_notification_event(updated_airlock_request, workspace, role_assignment_details)
diff --git a/api_app/services/airlock_storage_helper.py b/api_app/services/airlock_storage_helper.py
new file mode 100644
index 000000000..ef770f540
--- /dev/null
+++ b/api_app/services/airlock_storage_helper.py
@@ -0,0 +1,83 @@
+from resources import constants
+from models.domain.airlock_request import AirlockRequestStatus
+
+
+def get_storage_account_name_for_request(
+ request_type: str,
+ status: AirlockRequestStatus,
+ tre_id: str,
+ short_workspace_id: str,
+ airlock_version: int = 1
+) -> str:
+ if airlock_version >= 2:
+ # Global workspace storage - all workspaces use same account
+ if request_type == constants.IMPORT_TYPE:
+ if status in [AirlockRequestStatus.Draft, AirlockRequestStatus.Submitted, AirlockRequestStatus.InReview]:
+ # Core import stages
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_CORE.format(tre_id)
+ elif status in [AirlockRequestStatus.Approved, AirlockRequestStatus.ApprovalInProgress]:
+ # Global workspace storage
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL.format(tre_id)
+ elif status in [AirlockRequestStatus.Rejected, AirlockRequestStatus.RejectionInProgress,
+ AirlockRequestStatus.Blocked, AirlockRequestStatus.BlockingInProgress]:
+ # These are in core storage
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_CORE.format(tre_id)
+ else: # export
+ if status in [AirlockRequestStatus.Approved, AirlockRequestStatus.ApprovalInProgress]:
+ # Export approved in core
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_CORE.format(tre_id)
+ else: # Draft, Submitted, InReview, Rejected, Blocked, etc.
+ # Global workspace storage
+ return constants.STORAGE_ACCOUNT_NAME_AIRLOCK_WORKSPACE_GLOBAL.format(tre_id)
+ else:
+ # Legacy mode - return original separate account names
+ if request_type == constants.IMPORT_TYPE:
+ if status == AirlockRequestStatus.Draft:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_EXTERNAL.format(tre_id)
+ elif status in [AirlockRequestStatus.Submitted, AirlockRequestStatus.InReview]:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS.format(tre_id)
+ elif status in [AirlockRequestStatus.Approved, AirlockRequestStatus.ApprovalInProgress]:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_APPROVED.format(short_workspace_id)
+ elif status in [AirlockRequestStatus.Rejected, AirlockRequestStatus.RejectionInProgress]:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_REJECTED.format(tre_id)
+ elif status in [AirlockRequestStatus.Blocked, AirlockRequestStatus.BlockingInProgress]:
+ return constants.STORAGE_ACCOUNT_NAME_IMPORT_BLOCKED.format(tre_id)
+ else: # export
+ if status == AirlockRequestStatus.Draft:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INTERNAL.format(short_workspace_id)
+ elif status in [AirlockRequestStatus.Submitted, AirlockRequestStatus.InReview]:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_INPROGRESS.format(short_workspace_id)
+ elif status in [AirlockRequestStatus.Approved, AirlockRequestStatus.ApprovalInProgress]:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_APPROVED.format(tre_id)
+ elif status in [AirlockRequestStatus.Rejected, AirlockRequestStatus.RejectionInProgress]:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_REJECTED.format(short_workspace_id)
+ elif status in [AirlockRequestStatus.Blocked, AirlockRequestStatus.BlockingInProgress]:
+ return constants.STORAGE_ACCOUNT_NAME_EXPORT_BLOCKED.format(short_workspace_id)
+
+
+def get_stage_from_status(request_type: str, status: AirlockRequestStatus) -> str:
+ if request_type == constants.IMPORT_TYPE:
+ if status == AirlockRequestStatus.Draft:
+ return constants.STAGE_IMPORT_EXTERNAL
+ elif status in [AirlockRequestStatus.Submitted, AirlockRequestStatus.InReview]:
+ return constants.STAGE_IMPORT_IN_PROGRESS
+ elif status in [AirlockRequestStatus.Approved, AirlockRequestStatus.ApprovalInProgress]:
+ return constants.STAGE_IMPORT_APPROVED
+ elif status in [AirlockRequestStatus.Rejected, AirlockRequestStatus.RejectionInProgress]:
+ return constants.STAGE_IMPORT_REJECTED
+ elif status in [AirlockRequestStatus.Blocked, AirlockRequestStatus.BlockingInProgress]:
+ return constants.STAGE_IMPORT_BLOCKED
+ else: # export
+ if status == AirlockRequestStatus.Draft:
+ return constants.STAGE_EXPORT_INTERNAL
+ elif status in [AirlockRequestStatus.Submitted, AirlockRequestStatus.InReview]:
+ return constants.STAGE_EXPORT_IN_PROGRESS
+ elif status in [AirlockRequestStatus.Approved, AirlockRequestStatus.ApprovalInProgress]:
+ return constants.STAGE_EXPORT_APPROVED
+ elif status in [AirlockRequestStatus.Rejected, AirlockRequestStatus.RejectionInProgress]:
+ return constants.STAGE_EXPORT_REJECTED
+ elif status in [AirlockRequestStatus.Blocked, AirlockRequestStatus.BlockingInProgress]:
+ return constants.STAGE_EXPORT_BLOCKED
+
+ # Default fallback
+ return "unknown"
diff --git a/api_app/tests_ma/test_services/test_airlock.py b/api_app/tests_ma/test_services/test_airlock.py
index 31cb6a006..1e3a968fe 100644
--- a/api_app/tests_ma/test_services/test_airlock.py
+++ b/api_app/tests_ma/test_services/test_airlock.py
@@ -4,7 +4,7 @@
import time
from resources import strings
from services.airlock import validate_user_allowed_to_access_storage_account, get_required_permission, \
- validate_request_status, cancel_request, delete_review_user_resource, check_email_exists, revoke_request
+ validate_request_status, cancel_request, delete_review_user_resource, check_email_exists, revoke_request, is_publicly_accessible_stage
from models.domain.airlock_request import AirlockRequest, AirlockRequestStatus, AirlockRequestType, AirlockReview, AirlockReviewDecision, AirlockActions, AirlockReviewUserResource
from tests_ma.test_api.conftest import create_workspace_owner_user, create_workspace_researcher_user, get_required_roles
from mock import AsyncMock, patch, MagicMock
@@ -24,6 +24,7 @@
AIRLOCK_REVIEW_ID = "96d909c5-e913-4c05-ae53-668a702ba2e5"
USER_RESOURCE_ID = "cce59042-1dee-42dc-9388-6db846feeb3b"
WORKSPACE_SERVICE_ID = "30f2fefa-e7bb-4e5b-93aa-e50bb037502a"
+REVIEW_WORKSPACE_ID = "def111e4-93eb-4afc-c7fa-0b8964fg864f"
CURRENT_TIME = time.time()
ALL_ROLES = AzureADAuthorization.WORKSPACE_ROLES_DICT.keys()
@@ -48,6 +49,26 @@ def sample_workspace():
resourcePath="test")
+def sample_workspace_with_review_config():
+ return Workspace(
+ id=WORKSPACE_ID,
+ templateName='template name',
+ templateVersion='1.0',
+ etag='',
+ properties={
+ "client_id": "12345",
+ "display_name": "my research workspace",
+ "description": "for science!",
+ "airlock_review_config": {
+ "import": {
+ "import_vm_workspace_id": REVIEW_WORKSPACE_ID,
+ "import_vm_workspace_service_id": WORKSPACE_SERVICE_ID,
+ "import_vm_user_resource_template_name": "test-template"
+ }
+ }},
+ resourcePath="test")
+
+
def sample_airlock_request(status=AirlockRequestStatus.Draft):
airlock_request = AirlockRequest(
id=AIRLOCK_REQUEST_ID,
@@ -82,10 +103,10 @@ def sample_airlock_user_resource_object():
)
-def sample_status_changed_event(new_status="draft", previous_status=None):
+def sample_status_changed_event(new_status="draft", previous_status=None, review_workspace_id=None):
status_changed_event = EventGridEvent(
event_type="statusChanged",
- data=StatusChangedData(request_id=AIRLOCK_REQUEST_ID, new_status=new_status, previous_status=previous_status, type=AirlockRequestType.Import, workspace_id=WORKSPACE_ID[-4:]).__dict__,
+ data=StatusChangedData(request_id=AIRLOCK_REQUEST_ID, new_status=new_status, previous_status=previous_status, type=AirlockRequestType.Import, workspace_id=WORKSPACE_ID[-4:], review_workspace_id=review_workspace_id).__dict__,
subject=f"{AIRLOCK_REQUEST_ID}/statusChanged",
data_version="2.0"
)
@@ -240,6 +261,48 @@ def test_get_required_permission_return_read_and_write_permissions_for_draft_req
assert permissions.read is True
+def test_is_publicly_accessible_stage_import_draft_is_public():
+ airlock_request = sample_airlock_request(AirlockRequestStatus.Draft)
+ assert is_publicly_accessible_stage(airlock_request) is True
+
+
+@pytest.mark.parametrize('airlock_status',
+ [AirlockRequestStatus.Submitted,
+ AirlockRequestStatus.InReview,
+ AirlockRequestStatus.ApprovalInProgress,
+ AirlockRequestStatus.Approved,
+ AirlockRequestStatus.RejectionInProgress,
+ AirlockRequestStatus.Rejected,
+ AirlockRequestStatus.Cancelled,
+ AirlockRequestStatus.BlockingInProgress,
+ AirlockRequestStatus.Blocked])
+def test_is_publicly_accessible_stage_import_non_draft_is_not_public(airlock_status):
+ airlock_request = sample_airlock_request(airlock_status)
+ assert is_publicly_accessible_stage(airlock_request) is False
+
+
+def test_is_publicly_accessible_stage_export_approved_is_public():
+ airlock_request = sample_airlock_request(AirlockRequestStatus.Approved)
+ airlock_request.type = AirlockRequestType.Export
+ assert is_publicly_accessible_stage(airlock_request) is True
+
+
+@pytest.mark.parametrize('airlock_status',
+ [AirlockRequestStatus.Draft,
+ AirlockRequestStatus.Submitted,
+ AirlockRequestStatus.InReview,
+ AirlockRequestStatus.ApprovalInProgress,
+ AirlockRequestStatus.RejectionInProgress,
+ AirlockRequestStatus.Rejected,
+ AirlockRequestStatus.Cancelled,
+ AirlockRequestStatus.BlockingInProgress,
+ AirlockRequestStatus.Blocked])
+def test_is_publicly_accessible_stage_export_non_approved_is_not_public(airlock_status):
+ airlock_request = sample_airlock_request(airlock_status)
+ airlock_request.type = AirlockRequestType.Export
+ assert is_publicly_accessible_stage(airlock_request) is False
+
+
@pytest.mark.asyncio
@patch("event_grid.helpers.EventGridPublisherClient", return_value=AsyncMock())
@patch("services.aad_authentication.AzureADAuthorization.get_workspace_user_emails_by_role_assignment", return_value={"WorkspaceResearcher": ["researcher@outlook.com"], "WorkspaceOwner": ["owner@outlook.com"], "AirlockManager": ["manager@outlook.com"]})
@@ -401,6 +464,30 @@ async def test_update_and_publish_event_airlock_request_updates_item(_, event_gr
assert actual_airlock_notification_event.data == airlock_notification_event_mock.data
+@pytest.mark.asyncio
+@patch("event_grid.helpers.EventGridPublisherClient", return_value=AsyncMock())
+@patch("services.aad_authentication.AzureADAuthorization.get_workspace_user_emails_by_role_assignment", return_value={"WorkspaceResearcher": ["researcher@outlook.com"], "WorkspaceOwner": ["owner@outlook.com"], "AirlockManager": ["manager@outlook.com"]})
+async def test_update_and_publish_event_includes_review_workspace_id_for_import(_, event_grid_publisher_client_mock,
+ airlock_request_repo_mock):
+ airlock_request_mock = sample_airlock_request()
+ updated_airlock_request_mock = sample_airlock_request(status=AirlockRequestStatus.Submitted)
+ status_changed_event_mock = sample_status_changed_event(new_status="submitted", previous_status="draft", review_workspace_id=REVIEW_WORKSPACE_ID[-4:])
+ airlock_request_repo_mock.update_airlock_request = AsyncMock(return_value=updated_airlock_request_mock)
+ event_grid_sender_client_mock = event_grid_publisher_client_mock.return_value
+ event_grid_sender_client_mock.send = AsyncMock()
+
+ await update_and_publish_event_airlock_request(
+ airlock_request=airlock_request_mock,
+ airlock_request_repo=airlock_request_repo_mock,
+ updated_by=create_test_user(),
+ new_status=AirlockRequestStatus.Submitted,
+ workspace=sample_workspace_with_review_config())
+
+ actual_status_changed_event = event_grid_sender_client_mock.send.await_args_list[0].args[0][0]
+ assert actual_status_changed_event.data == status_changed_event_mock.data
+ assert actual_status_changed_event.data["review_workspace_id"] == REVIEW_WORKSPACE_ID[-4:]
+
+
@pytest.mark.asyncio
@patch("services.airlock.send_status_changed_event")
@patch("services.airlock.send_airlock_notification_event")
@@ -586,3 +673,24 @@ async def test_delete_review_user_resource_disables_the_resource_before_deletion
resource_history_repo=AsyncMock(),
user=create_test_user())
disable_user_resource.assert_called_once()
+
+
+@patch("services.airlock.validate_request_status")
+@patch("services.airlock.validate_user_allowed_to_access_storage_account")
+@patch("services.airlock.get_airlock_request_container_sas_token", return_value="https://stalairlockgtest.blob.core.windows.net/container?sas")
+def test_get_airlock_container_link_v2_resolves_correct_account_for_approved_import(mock_sas, mock_validate_user, mock_validate_status):
+ from services.airlock import get_airlock_container_link
+
+ # v2 Import Approved should resolve to workspace-global storage account
+ request = sample_airlock_request(status=AirlockRequestStatus.Approved)
+ request.type = AirlockRequestType.Import
+ request.airlock_version = 2
+
+ workspace = sample_workspace()
+ result = get_airlock_container_link(request, None, workspace)
+
+ assert result == "https://stalairlockgtest.blob.core.windows.net/container?sas"
+ # Should have called SAS generation with the workspace-global account
+ mock_sas.assert_called_once()
+ account_name = mock_sas.call_args[0][1] # second positional arg
+ assert account_name.startswith("stalairlockg")
diff --git a/api_app/tests_ma/test_services/test_airlock_storage_helper.py b/api_app/tests_ma/test_services/test_airlock_storage_helper.py
new file mode 100644
index 000000000..5d7d5a00a
--- /dev/null
+++ b/api_app/tests_ma/test_services/test_airlock_storage_helper.py
@@ -0,0 +1,343 @@
+from models.domain.airlock_request import AirlockRequestStatus
+from services.airlock_storage_helper import (
+ get_storage_account_name_for_request,
+ get_stage_from_status
+)
+from resources import constants
+
+
+class TestGetStageFromStatus:
+
+ def test_import_draft_maps_to_import_external_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Draft)
+ assert stage == constants.STAGE_IMPORT_EXTERNAL
+ assert stage == "import-external"
+
+ def test_import_submitted_maps_to_import_in_progress_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Submitted)
+ assert stage == constants.STAGE_IMPORT_IN_PROGRESS
+ assert stage == "import-in-progress"
+
+ def test_import_in_review_maps_to_import_in_progress_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.InReview)
+ assert stage == constants.STAGE_IMPORT_IN_PROGRESS
+ assert stage == "import-in-progress"
+
+ def test_import_approved_maps_to_import_approved_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Approved)
+ assert stage == constants.STAGE_IMPORT_APPROVED
+ assert stage == "import-approved"
+
+ def test_import_approval_in_progress_maps_to_import_approved_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.ApprovalInProgress)
+ assert stage == constants.STAGE_IMPORT_APPROVED
+
+ def test_import_rejected_maps_to_import_rejected_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Rejected)
+ assert stage == constants.STAGE_IMPORT_REJECTED
+ assert stage == "import-rejected"
+
+ def test_import_rejection_in_progress_maps_to_import_rejected_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.RejectionInProgress)
+ assert stage == constants.STAGE_IMPORT_REJECTED
+
+ def test_import_blocked_maps_to_import_blocked_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Blocked)
+ assert stage == constants.STAGE_IMPORT_BLOCKED
+ assert stage == "import-blocked"
+
+ def test_import_blocking_in_progress_maps_to_import_blocked_stage(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.BlockingInProgress)
+ assert stage == constants.STAGE_IMPORT_BLOCKED
+
+ def test_export_approved_maps_to_export_approved_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Approved)
+ assert stage == constants.STAGE_EXPORT_APPROVED
+ assert stage == "export-approved"
+
+ def test_export_approval_in_progress_maps_to_export_approved_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.ApprovalInProgress)
+ assert stage == constants.STAGE_EXPORT_APPROVED
+ assert stage == "export-approved"
+
+ def test_export_draft_maps_to_export_internal_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Draft)
+ assert stage == constants.STAGE_EXPORT_INTERNAL
+ assert stage == "export-internal"
+
+ def test_export_submitted_maps_to_export_in_progress_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Submitted)
+ assert stage == constants.STAGE_EXPORT_IN_PROGRESS
+ assert stage == "export-in-progress"
+
+ def test_export_in_review_maps_to_export_in_progress_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.InReview)
+ assert stage == constants.STAGE_EXPORT_IN_PROGRESS
+
+ def test_export_rejected_maps_to_export_rejected_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Rejected)
+ assert stage == constants.STAGE_EXPORT_REJECTED
+ assert stage == "export-rejected"
+
+ def test_export_rejection_in_progress_maps_to_export_rejected_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.RejectionInProgress)
+ assert stage == constants.STAGE_EXPORT_REJECTED
+
+ def test_export_blocked_maps_to_export_blocked_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Blocked)
+ assert stage == constants.STAGE_EXPORT_BLOCKED
+ assert stage == "export-blocked"
+
+ def test_export_blocking_in_progress_maps_to_export_blocked_stage(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.BlockingInProgress)
+ assert stage == constants.STAGE_EXPORT_BLOCKED
+
+ def test_unknown_status_returns_unknown(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Failed)
+ assert stage == "unknown"
+
+
+class TestGetStorageAccountNameForRequestConsolidatedMode:
+
+ class TestImportRequestsConsolidated:
+
+ def test_import_draft_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Draft, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_submitted_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Submitted, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_in_review_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.InReview, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_approved_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Approved, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_import_approval_in_progress_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.ApprovalInProgress, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_import_rejected_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Rejected, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_import_blocked_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Blocked, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ class TestExportRequestsConsolidated:
+
+ def test_export_draft_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Draft, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_submitted_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Submitted, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_in_review_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.InReview, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_approved_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Approved, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_export_approval_in_progress_uses_core_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.ApprovalInProgress, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlocktre123"
+
+ def test_export_rejected_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Rejected, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+ def test_export_blocked_uses_workspace_global_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Blocked, "tre123", "ws12", airlock_version=2
+ )
+ assert account == "stalairlockgtre123"
+
+
+class TestGetStorageAccountNameForRequestLegacyMode:
+
+ class TestImportRequestsLegacy:
+
+ def test_import_draft_uses_external_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Draft, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalimextre123"
+
+ def test_import_submitted_uses_inprogress_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Submitted, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalimiptre123"
+
+ def test_import_in_review_uses_inprogress_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.InReview, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalimiptre123"
+
+ def test_import_approved_uses_workspace_approved_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Approved, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalimappwsws12"
+
+ def test_import_rejected_uses_rejected_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Rejected, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalimrejtre123"
+
+ def test_import_blocked_uses_blocked_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.IMPORT_TYPE, AirlockRequestStatus.Blocked, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalimblockedtre123"
+
+ class TestExportRequestsLegacy:
+
+ def test_export_draft_uses_workspace_internal_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Draft, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalexintwsws12"
+
+ def test_export_submitted_uses_workspace_inprogress_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Submitted, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalexipwsws12"
+
+ def test_export_approved_uses_core_approved_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Approved, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalexapptre123"
+
+ def test_export_rejected_uses_workspace_rejected_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Rejected, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalexrejwsws12"
+
+ def test_export_blocked_uses_workspace_blocked_storage(self):
+ account = get_storage_account_name_for_request(
+ constants.EXPORT_TYPE, AirlockRequestStatus.Blocked, "tre123", "ws12", airlock_version=1
+ )
+ assert account == "stalexblockedwsws12"
+
+
+class TestABACStageConstants:
+
+ def test_import_external_stage_constant_value(self):
+ assert constants.STAGE_IMPORT_EXTERNAL == "import-external"
+
+ def test_import_in_progress_stage_constant_value(self):
+ assert constants.STAGE_IMPORT_IN_PROGRESS == "import-in-progress"
+
+ def test_export_approved_stage_constant_value(self):
+ assert constants.STAGE_EXPORT_APPROVED == "export-approved"
+
+ def test_import_approved_stage_constant_value(self):
+ assert constants.STAGE_IMPORT_APPROVED == "import-approved"
+
+ def test_import_rejected_stage_constant_value(self):
+ assert constants.STAGE_IMPORT_REJECTED == "import-rejected"
+
+ def test_import_blocked_stage_constant_value(self):
+ assert constants.STAGE_IMPORT_BLOCKED == "import-blocked"
+
+ def test_export_internal_stage_constant_value(self):
+ assert constants.STAGE_EXPORT_INTERNAL == "export-internal"
+
+ def test_export_in_progress_stage_constant_value(self):
+ assert constants.STAGE_EXPORT_IN_PROGRESS == "export-in-progress"
+
+ def test_export_rejected_stage_constant_value(self):
+ assert constants.STAGE_EXPORT_REJECTED == "export-rejected"
+
+ def test_export_blocked_stage_constant_value(self):
+ assert constants.STAGE_EXPORT_BLOCKED == "export-blocked"
+
+
+class TestABACAccessibleStages:
+
+ ABAC_ALLOWED_STAGES = ['import-external', 'import-in-progress', 'export-approved']
+
+ def test_import_draft_is_abac_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Draft)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_import_submitted_is_abac_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Submitted)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_import_in_review_is_abac_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.InReview)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_import_approved_is_not_abac_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Approved)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_import_rejected_is_not_abac_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Rejected)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_import_blocked_is_not_abac_accessible(self):
+ stage = get_stage_from_status(constants.IMPORT_TYPE, AirlockRequestStatus.Blocked)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_export_draft_is_not_abac_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Draft)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_export_submitted_is_not_abac_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Submitted)
+ assert stage not in self.ABAC_ALLOWED_STAGES
+
+ def test_export_approved_is_abac_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Approved)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_export_approval_in_progress_is_abac_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.ApprovalInProgress)
+ assert stage in self.ABAC_ALLOWED_STAGES
+
+ def test_export_rejected_is_not_abac_accessible(self):
+ stage = get_stage_from_status(constants.EXPORT_TYPE, AirlockRequestStatus.Rejected)
+ assert stage not in self.ABAC_ALLOWED_STAGES
diff --git a/config.sample.yaml b/config.sample.yaml
index 95fedf283..fbdf33ca3 100644
--- a/config.sample.yaml
+++ b/config.sample.yaml
@@ -16,7 +16,6 @@ management:
# encryption_kv_name: __CHANGE_ME__
# Azure Resource Manager credentials used for CI/CD pipelines
arm_subscription_id: __CHANGE_ME__
-
# If you want to override the currently signed in credentials
# You would do this if running commands like `make terraform-install DIR=./templates/workspaces/base`
# arm_tenant_id: __CHANGE_ME__
@@ -38,6 +37,8 @@ tre:
resource_processor_vmss_sku: Standard_B2s
enable_swagger: true
enable_airlock_malware_scanning: true
+ # Set to true to deploy v1 legacy per-stage airlock storage accounts (required for workspaces using airlock_version=1)
+ enable_legacy_airlock: true
# Set to true if want to ensure users have an email address before airlock request is created
# Used if rely on email notifications for governance purposes
@@ -58,7 +59,6 @@ tre:
# Set to true if TreAdmins should be able to assign and de-assign users to workspaces via the UI
user_management_enabled: false
-
# Uncomment to enable DNS Security policy on the system, and add any known DNS names that you need to allow
# DNS queries on, in addition to those in the core list in core/terraform/allowed-dns.json
# Note, these need to be fully qualified, i.e. they end in a dot(.)
@@ -101,6 +101,7 @@ ui_config:
ui_site_name: "Azure TRE"
# Footer text shown in the bottom left hand corner of the TRE portal
ui_footer_text: "Azure Trusted Research Environment"
+
#developer_settings:
# Locks will not be added to stateful resources so they can be easily removed
# stateful_resources_locked: false
diff --git a/config_schema.json b/config_schema.json
index abfaf9721..245e2ad51 100644
--- a/config_schema.json
+++ b/config_schema.json
@@ -85,6 +85,10 @@
"description": "Require email check for airlock.",
"type": "boolean"
},
+ "enable_legacy_airlock": {
+ "description": "Deploy v1 legacy per-stage airlock storage accounts in core. Required for workspaces using airlock_version=1.",
+ "type": "boolean"
+ },
"core_address_space": {
"description": "TRE core address spaces.",
"type": "string"
diff --git a/core/terraform/airlock/data.tf b/core/terraform/airlock/data.tf
index dbec1db64..0ce749e3b 100644
--- a/core/terraform/airlock/data.tf
+++ b/core/terraform/airlock/data.tf
@@ -7,5 +7,5 @@ data "azurerm_monitor_diagnostic_categories" "eventgrid_custom_topics" {
}
data "azurerm_monitor_diagnostic_categories" "eventgrid_system_topics" {
- resource_id = azurerm_eventgrid_system_topic.export_approved_blob_created.id
+ resource_id = azurerm_eventgrid_system_topic.airlock_blob_created.id
}
diff --git a/core/terraform/airlock/eventgrid_topics.tf b/core/terraform/airlock/eventgrid_topics.tf
index 4041b5624..62cbd6b87 100644
--- a/core/terraform/airlock/eventgrid_topics.tf
+++ b/core/terraform/airlock/eventgrid_topics.tf
@@ -191,136 +191,6 @@ resource "azurerm_role_assignment" "servicebus_sender_scan_result" {
}
# System topic
-resource "azurerm_eventgrid_system_topic" "import_inprogress_blob_created" {
- name = local.import_inprogress_sys_topic_name
- location = var.location
- resource_group_name = var.resource_group_name
- source_resource_id = azurerm_storage_account.sa_import_in_progress.id
- topic_type = "Microsoft.Storage.StorageAccounts"
-
- identity {
- type = "SystemAssigned"
- }
-
- tags = merge(var.tre_core_tags, {
- Publishers = "airlock;import-in-progress-sa"
- })
-
- depends_on = [
- azurerm_storage_account.sa_import_in_progress
- ]
-
- lifecycle { ignore_changes = [tags] }
-}
-
-resource "azurerm_role_assignment" "servicebus_sender_import_inprogress_blob_created" {
- scope = var.airlock_servicebus.id
- role_definition_name = "Azure Service Bus Data Sender"
- principal_id = azurerm_eventgrid_system_topic.import_inprogress_blob_created.identity[0].principal_id
-
- depends_on = [
- azurerm_eventgrid_system_topic.import_inprogress_blob_created
- ]
-}
-
-
-resource "azurerm_eventgrid_system_topic" "import_rejected_blob_created" {
- name = local.import_rejected_sys_topic_name
- location = var.location
- resource_group_name = var.resource_group_name
- source_resource_id = azurerm_storage_account.sa_import_rejected.id
- topic_type = "Microsoft.Storage.StorageAccounts"
-
- identity {
- type = "SystemAssigned"
- }
-
- tags = merge(var.tre_core_tags, {
- Publishers = "airlock;import-rejected-sa"
- })
-
- depends_on = [
- azurerm_storage_account.sa_import_rejected,
- ]
-
- lifecycle { ignore_changes = [tags] }
-}
-
-resource "azurerm_role_assignment" "servicebus_sender_import_rejected_blob_created" {
- scope = var.airlock_servicebus.id
- role_definition_name = "Azure Service Bus Data Sender"
- principal_id = azurerm_eventgrid_system_topic.import_rejected_blob_created.identity[0].principal_id
-
- depends_on = [
- azurerm_eventgrid_system_topic.import_rejected_blob_created
- ]
-}
-
-resource "azurerm_eventgrid_system_topic" "import_blocked_blob_created" {
- name = local.import_blocked_sys_topic_name
- location = var.location
- resource_group_name = var.resource_group_name
- source_resource_id = azurerm_storage_account.sa_import_blocked.id
- topic_type = "Microsoft.Storage.StorageAccounts"
-
- identity {
- type = "SystemAssigned"
- }
-
- tags = merge(var.tre_core_tags, {
- Publishers = "airlock;import-blocked-sa"
- })
-
- depends_on = [
- azurerm_storage_account.sa_import_blocked,
- ]
-
- lifecycle { ignore_changes = [tags] }
-}
-
-resource "azurerm_role_assignment" "servicebus_sender_import_blocked_blob_created" {
- scope = var.airlock_servicebus.id
- role_definition_name = "Azure Service Bus Data Sender"
- principal_id = azurerm_eventgrid_system_topic.import_blocked_blob_created.identity[0].principal_id
-
- depends_on = [
- azurerm_eventgrid_system_topic.import_blocked_blob_created
- ]
-}
-
-
-resource "azurerm_eventgrid_system_topic" "export_approved_blob_created" {
- name = local.export_approved_sys_topic_name
- location = var.location
- resource_group_name = var.resource_group_name
- source_resource_id = azurerm_storage_account.sa_export_approved.id
- topic_type = "Microsoft.Storage.StorageAccounts"
-
- identity {
- type = "SystemAssigned"
- }
-
- tags = merge(var.tre_core_tags, {
- Publishers = "airlock;export-approved-sa"
- })
-
- depends_on = [
- azurerm_storage_account.sa_export_approved,
- ]
-
- lifecycle { ignore_changes = [tags] }
-}
-
-resource "azurerm_role_assignment" "servicebus_sender_export_approved_blob_created" {
- scope = var.airlock_servicebus.id
- role_definition_name = "Azure Service Bus Data Sender"
- principal_id = azurerm_eventgrid_system_topic.export_approved_blob_created.identity[0].principal_id
-
- depends_on = [
- azurerm_eventgrid_system_topic.export_approved_blob_created
- ]
-}
-
# Custom topic (for airlock notifications)
resource "azurerm_eventgrid_topic" "airlock_notification" {
name = local.notification_topic_name
@@ -442,25 +312,12 @@ resource "azurerm_eventgrid_event_subscription" "scan_result" {
]
}
-resource "azurerm_eventgrid_event_subscription" "import_inprogress_blob_created" {
- name = local.import_inprogress_eventgrid_subscription_name
- scope = azurerm_storage_account.sa_import_in_progress.id
-
- service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
-
- delivery_identity {
- type = "SystemAssigned"
- }
-
- depends_on = [
- azurerm_eventgrid_system_topic.import_inprogress_blob_created,
- azurerm_role_assignment.servicebus_sender_import_inprogress_blob_created
- ]
-}
-
-resource "azurerm_eventgrid_event_subscription" "import_rejected_blob_created" {
- name = local.import_rejected_eventgrid_subscription_name
- scope = azurerm_storage_account.sa_import_rejected.id
+# Unified EventGrid Event Subscription for ALL Core Blob Created Events
+# This single subscription handles ALL 5 core stages: import-external, import-in-progress,
+# import-rejected, import-blocked, export-approved
+resource "azurerm_eventgrid_event_subscription" "airlock_blob_created" {
+ name = "airlock-blob-created-${var.tre_id}"
+ scope = azurerm_storage_account.sa_airlock_core.id
service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
@@ -468,18 +325,22 @@ resource "azurerm_eventgrid_event_subscription" "import_rejected_blob_created" {
type = "SystemAssigned"
}
- # Todo add Dead_letter
+ # Include all blob created events - airlock processor will check container metadata for routing
+ included_event_types = ["Microsoft.Storage.BlobCreated"]
depends_on = [
- azurerm_eventgrid_system_topic.import_rejected_blob_created,
- azurerm_role_assignment.servicebus_sender_import_rejected_blob_created
+ azurerm_eventgrid_system_topic.airlock_blob_created,
+ azurerm_role_assignment.servicebus_sender_airlock_blob_created
]
}
-
-resource "azurerm_eventgrid_event_subscription" "import_blocked_blob_created" {
- name = local.import_blocked_eventgrid_subscription_name
- scope = azurerm_storage_account.sa_import_blocked.id
+# EventGrid Event Subscription for workspace-global storage account (v2)
+# Routes BlobCreated events to the same service bus topic as core.
+# BlobCreatedTrigger reads container metadata to determine the stage and emit StepResult
+# when cross-account copies complete (e.g., import approval: core → workspace-global).
+resource "azurerm_eventgrid_event_subscription" "airlock_workspace_global_blob_created" {
+ name = "airlock-blob-created-global-${var.tre_id}"
+ scope = azurerm_storage_account.sa_airlock_workspace_global.id
service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
@@ -487,27 +348,11 @@ resource "azurerm_eventgrid_event_subscription" "import_blocked_blob_created" {
type = "SystemAssigned"
}
- # Todo add Dead_letter
+ included_event_types = ["Microsoft.Storage.BlobCreated"]
depends_on = [
- azurerm_eventgrid_system_topic.import_blocked_blob_created,
- azurerm_role_assignment.servicebus_sender_import_blocked_blob_created
- ]
-}
-
-resource "azurerm_eventgrid_event_subscription" "export_approved_blob_created" {
- name = local.export_approved_eventgrid_subscription_name
- scope = azurerm_storage_account.sa_export_approved.id
-
- service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
-
- delivery_identity {
- type = "SystemAssigned"
- }
-
- depends_on = [
- azurerm_eventgrid_system_topic.export_approved_blob_created,
- azurerm_role_assignment.servicebus_sender_export_approved_blob_created
+ azurerm_eventgrid_system_topic.airlock_workspace_global_blob_created,
+ azurerm_role_assignment.servicebus_sender_airlock_workspace_global_blob_created
]
}
@@ -537,12 +382,18 @@ resource "azurerm_monitor_diagnostic_setting" "eventgrid_custom_topics" {
}
resource "azurerm_monitor_diagnostic_setting" "eventgrid_system_topics" {
- for_each = {
- (azurerm_eventgrid_system_topic.import_inprogress_blob_created.name) = azurerm_eventgrid_system_topic.import_inprogress_blob_created.id,
- (azurerm_eventgrid_system_topic.import_rejected_blob_created.name) = azurerm_eventgrid_system_topic.import_rejected_blob_created.id,
- (azurerm_eventgrid_system_topic.import_blocked_blob_created.name) = azurerm_eventgrid_system_topic.import_blocked_blob_created.id,
- (azurerm_eventgrid_system_topic.export_approved_blob_created.name) = azurerm_eventgrid_system_topic.export_approved_blob_created.id,
- }
+ for_each = merge(
+ {
+ (azurerm_eventgrid_system_topic.airlock_blob_created.name) = azurerm_eventgrid_system_topic.airlock_blob_created.id,
+ (azurerm_eventgrid_system_topic.airlock_workspace_global_blob_created.name) = azurerm_eventgrid_system_topic.airlock_workspace_global_blob_created.id,
+ },
+ var.enable_legacy_airlock ? {
+ (azurerm_eventgrid_system_topic.import_inprogress_blob_created[0].name) = azurerm_eventgrid_system_topic.import_inprogress_blob_created[0].id,
+ (azurerm_eventgrid_system_topic.import_rejected_blob_created[0].name) = azurerm_eventgrid_system_topic.import_rejected_blob_created[0].id,
+ (azurerm_eventgrid_system_topic.import_blocked_blob_created[0].name) = azurerm_eventgrid_system_topic.import_blocked_blob_created[0].id,
+ (azurerm_eventgrid_system_topic.export_approved_blob_created[0].name) = azurerm_eventgrid_system_topic.export_approved_blob_created[0].id,
+ } : {}
+ )
name = "${each.key}-diagnostics"
target_resource_id = each.value
diff --git a/core/terraform/airlock/eventgrid_topics_v1.tf b/core/terraform/airlock/eventgrid_topics_v1.tf
new file mode 100644
index 000000000..795527afc
--- /dev/null
+++ b/core/terraform/airlock/eventgrid_topics_v1.tf
@@ -0,0 +1,209 @@
+# Legacy (v1) EventGrid system topics and subscriptions for per-stage storage accounts
+# These are only deployed when enable_legacy_airlock = true
+
+resource "azurerm_eventgrid_system_topic" "import_inprogress_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_inprogress_sys_topic_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ source_resource_id = azurerm_storage_account.sa_import_in_progress[0].id
+ topic_type = "Microsoft.Storage.StorageAccounts"
+
+ identity {
+ type = "SystemAssigned"
+ }
+
+ tags = merge(var.tre_core_tags, {
+ Publishers = "airlock;import-in-progress-sa"
+ })
+
+ depends_on = [
+ azurerm_storage_account.sa_import_in_progress
+ ]
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+resource "azurerm_role_assignment" "servicebus_sender_import_inprogress_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ scope = var.airlock_servicebus.id
+ role_definition_name = "Azure Service Bus Data Sender"
+ principal_id = azurerm_eventgrid_system_topic.import_inprogress_blob_created[0].identity[0].principal_id
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.import_inprogress_blob_created
+ ]
+}
+
+
+resource "azurerm_eventgrid_system_topic" "import_rejected_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_rejected_sys_topic_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ source_resource_id = azurerm_storage_account.sa_import_rejected[0].id
+ topic_type = "Microsoft.Storage.StorageAccounts"
+
+ identity {
+ type = "SystemAssigned"
+ }
+
+ tags = merge(var.tre_core_tags, {
+ Publishers = "airlock;import-rejected-sa"
+ })
+
+ depends_on = [
+ azurerm_storage_account.sa_import_rejected,
+ ]
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+resource "azurerm_role_assignment" "servicebus_sender_import_rejected_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ scope = var.airlock_servicebus.id
+ role_definition_name = "Azure Service Bus Data Sender"
+ principal_id = azurerm_eventgrid_system_topic.import_rejected_blob_created[0].identity[0].principal_id
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.import_rejected_blob_created
+ ]
+}
+
+resource "azurerm_eventgrid_system_topic" "import_blocked_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_blocked_sys_topic_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ source_resource_id = azurerm_storage_account.sa_import_blocked[0].id
+ topic_type = "Microsoft.Storage.StorageAccounts"
+
+ identity {
+ type = "SystemAssigned"
+ }
+
+ tags = merge(var.tre_core_tags, {
+ Publishers = "airlock;import-blocked-sa"
+ })
+
+ depends_on = [
+ azurerm_storage_account.sa_import_blocked,
+ ]
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+resource "azurerm_role_assignment" "servicebus_sender_import_blocked_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ scope = var.airlock_servicebus.id
+ role_definition_name = "Azure Service Bus Data Sender"
+ principal_id = azurerm_eventgrid_system_topic.import_blocked_blob_created[0].identity[0].principal_id
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.import_blocked_blob_created
+ ]
+}
+
+
+resource "azurerm_eventgrid_system_topic" "export_approved_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.export_approved_sys_topic_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ source_resource_id = azurerm_storage_account.sa_export_approved[0].id
+ topic_type = "Microsoft.Storage.StorageAccounts"
+
+ identity {
+ type = "SystemAssigned"
+ }
+
+ tags = merge(var.tre_core_tags, {
+ Publishers = "airlock;export-approved-sa"
+ })
+
+ depends_on = [
+ azurerm_storage_account.sa_export_approved,
+ ]
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+resource "azurerm_role_assignment" "servicebus_sender_export_approved_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ scope = var.airlock_servicebus.id
+ role_definition_name = "Azure Service Bus Data Sender"
+ principal_id = azurerm_eventgrid_system_topic.export_approved_blob_created[0].identity[0].principal_id
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.export_approved_blob_created
+ ]
+}
+
+# Legacy EventGrid subscriptions for per-stage storage accounts
+resource "azurerm_eventgrid_event_subscription" "import_inprogress_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_inprogress_eventgrid_subscription_name
+ scope = azurerm_storage_account.sa_import_in_progress[0].id
+
+ service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
+
+ delivery_identity {
+ type = "SystemAssigned"
+ }
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.import_inprogress_blob_created,
+ azurerm_role_assignment.servicebus_sender_import_inprogress_blob_created
+ ]
+}
+
+resource "azurerm_eventgrid_event_subscription" "import_rejected_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_rejected_eventgrid_subscription_name
+ scope = azurerm_storage_account.sa_import_rejected[0].id
+
+ service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
+
+ delivery_identity {
+ type = "SystemAssigned"
+ }
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.import_rejected_blob_created,
+ azurerm_role_assignment.servicebus_sender_import_rejected_blob_created
+ ]
+}
+
+resource "azurerm_eventgrid_event_subscription" "import_blocked_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_blocked_eventgrid_subscription_name
+ scope = azurerm_storage_account.sa_import_blocked[0].id
+
+ service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
+
+ delivery_identity {
+ type = "SystemAssigned"
+ }
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.import_blocked_blob_created,
+ azurerm_role_assignment.servicebus_sender_import_blocked_blob_created
+ ]
+}
+
+resource "azurerm_eventgrid_event_subscription" "export_approved_blob_created" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.export_approved_eventgrid_subscription_name
+ scope = azurerm_storage_account.sa_export_approved[0].id
+
+ service_bus_topic_endpoint_id = azurerm_servicebus_topic.blob_created.id
+
+ delivery_identity {
+ type = "SystemAssigned"
+ }
+
+ depends_on = [
+ azurerm_eventgrid_system_topic.export_approved_blob_created,
+ azurerm_role_assignment.servicebus_sender_export_approved_blob_created
+ ]
+}
diff --git a/core/terraform/airlock/identity.tf b/core/terraform/airlock/identity.tf
index b4e272c14..0cdb55345 100644
--- a/core/terraform/airlock/identity.tf
+++ b/core/terraform/airlock/identity.tf
@@ -49,21 +49,6 @@ resource "azurerm_role_assignment" "eventgrid_data_sender_data_deletion" {
principal_id = azurerm_user_assigned_identity.airlock_id.principal_id
}
-resource "azurerm_role_assignment" "airlock_blob_data_contributor" {
- count = length(local.airlock_sa_blob_data_contributor)
- scope = local.airlock_sa_blob_data_contributor[count.index]
- role_definition_name = "Storage Blob Data Contributor"
- principal_id = azurerm_user_assigned_identity.airlock_id.principal_id
-}
-
-# This might be considered redundent since we give Virtual Machine Contributor
-# at the subscription level, but best to be explicit.
-resource "azurerm_role_assignment" "api_sa_data_contributor" {
- count = length(local.api_sa_data_contributor)
- scope = local.api_sa_data_contributor[count.index]
- role_definition_name = "Storage Blob Data Contributor"
- principal_id = var.api_principal_id
-}
# Permissions needed for the Function Host to work correctly.
resource "azurerm_role_assignment" "function_host_storage" {
diff --git a/core/terraform/airlock/locals.tf b/core/terraform/airlock/locals.tf
index 838ddf091..642f4bf40 100644
--- a/core/terraform/airlock/locals.tf
+++ b/core/terraform/airlock/locals.tf
@@ -1,26 +1,18 @@
locals {
version = replace(replace(replace(data.local_file.airlock_processor_version.content, "__version__ = \"", ""), "\"", ""), "\n", "")
- # STorage AirLock EXternal
- import_external_storage_name = lower(replace("stalimex${var.tre_id}", "-", ""))
- # STorage AirLock IMport InProgress
- import_in_progress_storage_name = lower(replace("stalimip${var.tre_id}", "-", ""))
- # STorage AirLock IMport REJected
- import_rejected_storage_name = lower(replace("stalimrej${var.tre_id}", "-", ""))
- # STorage AirLock IMport BLOCKED
- import_blocked_storage_name = lower(replace("stalimblocked${var.tre_id}", "-", ""))
- # STorage AirLock EXPort APProved
- export_approved_storage_name = lower(replace("stalexapp${var.tre_id}", "-", ""))
+ # Consolidated core airlock storage account
+ # STorage AirLock consolidated
+ airlock_core_storage_name = lower(replace("stalairlock${var.tre_id}", "-", ""))
+
+ # Global Workspace Airlock Storage Account - shared by all workspaces
+ # STorage AirLock Global - all workspace stages for all workspaces
+ airlock_workspace_global_storage_name = lower(replace("stalairlockg${var.tre_id}", "-", ""))
# Due to the following issue and Azure not liking delete and immediate recreate under the same name,
# we had to change the resource names. https://github.com/hashicorp/terraform-provider-azurerm/issues/17389
topic_name_suffix = "v2-${var.tre_id}"
- import_inprogress_sys_topic_name = "evgt-airlock-import-in-progress-${local.topic_name_suffix}"
- import_rejected_sys_topic_name = "evgt-airlock-import-rejected-${local.topic_name_suffix}"
- import_blocked_sys_topic_name = "evgt-airlock-import-blocked-${local.topic_name_suffix}"
- export_approved_sys_topic_name = "evgt-airlock-export-approved-${local.topic_name_suffix}"
-
step_result_topic_name = "evgt-airlock-step-result-${local.topic_name_suffix}"
status_changed_topic_name = "evgt-airlock-status-changed-${local.topic_name_suffix}"
notification_topic_name = "evgt-airlock-notification-${local.topic_name_suffix}"
@@ -35,31 +27,46 @@ locals {
blob_created_al_processor_subscription_name = "airlock-blob-created-airlock-processor"
- step_result_eventgrid_subscription_name = "evgs-airlock-update-status"
- status_changed_eventgrid_subscription_name = "evgs-airlock-status-changed"
- data_deletion_eventgrid_subscription_name = "evgs-airlock-data-deletion"
- scan_result_eventgrid_subscription_name = "evgs-airlock-scan-result"
+ step_result_eventgrid_subscription_name = "evgs-airlock-update-status"
+ status_changed_eventgrid_subscription_name = "evgs-airlock-status-changed"
+ data_deletion_eventgrid_subscription_name = "evgs-airlock-data-deletion"
+ scan_result_eventgrid_subscription_name = "evgs-airlock-scan-result"
+
+ # Legacy (v1) per-stage storage account names - only used when enable_legacy_airlock = true
+ import_external_storage_name = lower(replace("stalimex${var.tre_id}", "-", ""))
+ import_in_progress_storage_name = lower(replace("stalimip${var.tre_id}", "-", ""))
+ import_rejected_storage_name = lower(replace("stalimrej${var.tre_id}", "-", ""))
+ import_blocked_storage_name = lower(replace("stalimblocked${var.tre_id}", "-", ""))
+ export_approved_storage_name = lower(replace("stalexapp${var.tre_id}", "-", ""))
+
+ # Legacy (v1) eventgrid topic/subscription names
+ import_inprogress_sys_topic_name = "evgt-airlock-import-in-progress-${local.topic_name_suffix}"
+ import_rejected_sys_topic_name = "evgt-airlock-import-rejected-${local.topic_name_suffix}"
+ import_blocked_sys_topic_name = "evgt-airlock-import-blocked-${local.topic_name_suffix}"
+ export_approved_sys_topic_name = "evgt-airlock-export-approved-${local.topic_name_suffix}"
+
import_inprogress_eventgrid_subscription_name = "evgs-airlock-import-in-progress-blob-created"
import_rejected_eventgrid_subscription_name = "evgs-airlock-import-rejected-blob-created"
import_blocked_eventgrid_subscription_name = "evgs-airlock-import-blocked-blob-created"
export_approved_eventgrid_subscription_name = "evgs-airlock-export-approved-blob-created"
- airlock_function_app_name = "func-airlock-processor-${var.tre_id}"
- airlock_function_sa_name = lower(replace("stairlockp${var.tre_id}", "-", ""))
+ # Legacy (v1) role assignment lists
+ airlock_sa_blob_data_contributor = var.enable_legacy_airlock ? [
+ azurerm_storage_account.sa_import_external[0].id,
+ azurerm_storage_account.sa_import_in_progress[0].id,
+ azurerm_storage_account.sa_import_rejected[0].id,
+ azurerm_storage_account.sa_export_approved[0].id,
+ azurerm_storage_account.sa_import_blocked[0].id
+ ] : []
- airlock_sa_blob_data_contributor = [
- azurerm_storage_account.sa_import_external.id,
- azurerm_storage_account.sa_import_in_progress.id,
- azurerm_storage_account.sa_import_rejected.id,
- azurerm_storage_account.sa_export_approved.id,
- azurerm_storage_account.sa_import_blocked.id
- ]
+ api_sa_data_contributor = var.enable_legacy_airlock ? [
+ azurerm_storage_account.sa_import_external[0].id,
+ azurerm_storage_account.sa_import_in_progress[0].id,
+ azurerm_storage_account.sa_export_approved[0].id
+ ] : []
- api_sa_data_contributor = [
- azurerm_storage_account.sa_import_external.id,
- azurerm_storage_account.sa_import_in_progress.id,
- azurerm_storage_account.sa_export_approved.id
- ]
+ airlock_function_app_name = "func-airlock-processor-${var.tre_id}"
+ airlock_function_sa_name = lower(replace("stairlockp${var.tre_id}", "-", ""))
servicebus_connection = "SERVICEBUS_CONNECTION"
step_result_eventgrid_connection = "EVENT_GRID_STEP_RESULT_CONNECTION"
diff --git a/core/terraform/airlock/outputs.tf b/core/terraform/airlock/outputs.tf
index 5a71e7503..2dfeeaf8f 100644
--- a/core/terraform/airlock/outputs.tf
+++ b/core/terraform/airlock/outputs.tf
@@ -21,3 +21,7 @@ output "event_grid_airlock_notification_topic_resource_id" {
output "airlock_malware_scan_result_topic_name" {
value = local.scan_result_topic_name
}
+
+output "airlock_core_storage_fqdn" {
+ value = azurerm_storage_account.sa_airlock_core.primary_blob_host
+}
diff --git a/core/terraform/airlock/storage_accounts.tf b/core/terraform/airlock/storage_accounts.tf
index 13b8071ab..df837c268 100644
--- a/core/terraform/airlock/storage_accounts.tf
+++ b/core/terraform/airlock/storage_accounts.tf
@@ -1,8 +1,7 @@
-# 'External' storage account - drop location for import
-resource "azurerm_storage_account" "sa_import_external" {
- name = local.import_external_storage_name
+resource "azurerm_storage_account" "sa_airlock_core" {
+ name = local.airlock_core_storage_name
location = var.location
resource_group_name = var.resource_group_name
account_tier = "Standard"
@@ -12,10 +11,10 @@ resource "azurerm_storage_account" "sa_import_external" {
cross_tenant_replication_enabled = false
shared_access_key_enabled = false
local_user_enabled = false
- # Don't allow anonymous access (unrelated to the 'public' networking rules)
- allow_nested_items_to_be_public = false
+ allow_nested_items_to_be_public = false
+ public_network_access_enabled = true
- # Important! we rely on the fact that the blob craeted events are issued when the creation of the blobs are done.
+ # Important! we rely on the fact that the blob created events are issued when the creation of the blobs are done.
# This is true ONLY when Hierarchical Namespace is DISABLED
is_hns_enabled = false
@@ -38,15 +37,56 @@ resource "azurerm_storage_account" "sa_import_external" {
}
}
+ # Core storage is publicly accessible for user-facing stages (import-draft, export-approved)
+ # matching the original sa_import_external / sa_export_approved security model.
+ # Security is enforced by:
+ # - ABAC conditions on role assignments (API restricted to import-external + export-approved stages)
+ # - User delegation SAS tokens (inherit ABAC restrictions of the signing identity)
+ # - SAS tokens are only generated for publicly-accessible stages
+ # Internal stages (in-progress, rejected, blocked) are protected by ABAC even though
+ # the storage account allows public network access.
+ network_rules {
+ default_action = "Allow"
+ bypass = ["AzureServices"]
+ }
+
tags = merge(var.tre_core_tags, {
- description = "airlock;import;external"
+ description = "airlock;core;consolidated"
+ SecurityControl = "Ignore"
})
lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
}
-resource "azurerm_private_endpoint" "stg_import_external_pe" {
- name = "pe-stg-import-external-blob-${var.tre_id}"
+# Enable Airlock Malware Scanning on Consolidated Core Storage Account
+resource "azapi_resource_action" "enable_defender_for_storage_core" {
+ count = var.enable_malware_scanning ? 1 : 0
+ type = "Microsoft.Security/defenderForStorageSettings@2022-12-01-preview"
+ resource_id = "${azurerm_storage_account.sa_airlock_core.id}/providers/Microsoft.Security/defenderForStorageSettings/current"
+ method = "PUT"
+
+ body = {
+ properties = {
+ isEnabled = true
+ malwareScanning = {
+ onUpload = {
+ isEnabled = true
+ capGBPerMonth = 5000
+ },
+ scanResultsEventGridTopicResourceId = azurerm_eventgrid_topic.scan_result[0].id
+ }
+ sensitiveDataDiscovery = {
+ isEnabled = false
+ }
+ overrideSubscriptionLevelSettings = true
+ }
+ }
+}
+
+# Private Endpoint #1: From Airlock Storage Subnet (Processor Access)
+# For airlock processor to access all stages
+resource "azurerm_private_endpoint" "stg_airlock_core_pe_processor" {
+ name = "pe-stg-airlock-processor-${var.tre_id}"
location = var.location
resource_group_name = var.resource_group_name
subnet_id = var.airlock_storage_subnet_id
@@ -55,89 +95,84 @@ resource "azurerm_private_endpoint" "stg_import_external_pe" {
lifecycle { ignore_changes = [tags] }
private_dns_zone_group {
- name = "pdzg-stg-import-external-blob-${var.tre_id}"
+ name = "pdzg-stg-airlock-processor-${var.tre_id}"
private_dns_zone_ids = [var.blob_core_dns_zone_id]
}
private_service_connection {
- name = "psc-stg-import-external-blob-${var.tre_id}"
- private_connection_resource_id = azurerm_storage_account.sa_import_external.id
+ name = "psc-stg-airlock-processor-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_airlock_core.id
is_manual_connection = false
subresource_names = ["Blob"]
}
}
-# 'Approved' export
-resource "azurerm_storage_account" "sa_export_approved" {
- name = local.export_approved_storage_name
- location = var.location
- resource_group_name = var.resource_group_name
- account_tier = "Standard"
- account_replication_type = "LRS"
- table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
- queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
- cross_tenant_replication_enabled = false
- shared_access_key_enabled = false
- local_user_enabled = false
-
- # Don't allow anonymous access (unrelated to the 'public' networking rules)
- allow_nested_items_to_be_public = false
-
- # Important! we rely on the fact that the blob craeted events are issued when the creation of the blobs are done.
- # This is true ONLY when Hierarchical Namespace is DISABLED
- is_hns_enabled = false
+resource "azurerm_eventgrid_system_topic" "airlock_blob_created" {
+ name = "evgt-airlock-blob-created-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ source_arm_resource_id = azurerm_storage_account.sa_airlock_core.id
+ topic_type = "Microsoft.Storage.StorageAccounts"
+ tags = var.tre_core_tags
- # changing this value is destructive, hence attribute is in lifecycle.ignore_changes block below
- infrastructure_encryption_enabled = true
-
- dynamic "identity" {
- for_each = var.enable_cmk_encryption ? [1] : []
- content {
- type = "UserAssigned"
- identity_ids = [var.encryption_identity_id]
- }
+ identity {
+ type = "SystemAssigned"
}
- dynamic "customer_managed_key" {
- for_each = var.enable_cmk_encryption ? [1] : []
- content {
- key_vault_key_id = var.encryption_key_versionless_id
- user_assigned_identity_id = var.encryption_identity_id
- }
- }
+ lifecycle { ignore_changes = [tags] }
+}
- tags = merge(var.tre_core_tags, {
- description = "airlock;export;approved"
- })
+resource "azurerm_role_assignment" "servicebus_sender_airlock_blob_created" {
+ scope = var.airlock_servicebus.id
+ role_definition_name = "Azure Service Bus Data Sender"
+ principal_id = azurerm_eventgrid_system_topic.airlock_blob_created.identity[0].principal_id
- lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+ depends_on = [
+ azurerm_eventgrid_system_topic.airlock_blob_created
+ ]
}
-resource "azurerm_private_endpoint" "stg_export_approved_pe" {
- name = "pe-stg-export-approved-blob-${var.tre_id}"
- location = var.location
- resource_group_name = var.resource_group_name
- subnet_id = var.airlock_storage_subnet_id
- tags = var.tre_core_tags
- lifecycle { ignore_changes = [tags] }
+# Role Assignments for Consolidated Core Storage Account
- private_dns_zone_group {
- name = "pdzg-stg-export-approved-blob-${var.tre_id}"
- private_dns_zone_ids = [var.blob_core_dns_zone_id]
- }
+# Airlock Processor Identity - needs access to all containers (no restrictions)
+resource "azurerm_role_assignment" "airlock_core_blob_data_contributor" {
+ scope = azurerm_storage_account.sa_airlock_core.id
+ role_definition_name = "Storage Blob Data Contributor"
+ principal_id = azurerm_user_assigned_identity.airlock_id.principal_id
+}
- private_service_connection {
- name = "psc-stg-export-approved-blob-${var.tre_id}"
- private_connection_resource_id = azurerm_storage_account.sa_export_approved.id
- is_manual_connection = false
- subresource_names = ["Blob"]
- }
+# API Identity - restricted access using ABAC to specific stages and private endpoints
+# API accesses via processor PE and can access import-external, export-approved
+resource "azurerm_role_assignment" "api_core_blob_data_contributor" {
+ scope = azurerm_storage_account.sa_airlock_core.id
+ role_definition_name = "Storage Blob Data Contributor"
+ principal_id = var.api_principal_id
+
+ # ABAC condition: Restrict blob operations to specific stages only
+ # Logic: Allow if (action is NOT a blob operation) OR (action is blob operation AND stage matches)
+ # This allows container operations (list, etc.) while restricting blob read/write/delete to allowed stages
+ condition_version = "2.0"
+ condition = <<-EOT
+ (
+ (
+ !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read'})
+ AND !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write'})
+ AND !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/add/action'})
+ AND !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete'})
+ )
+ OR
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:stage]
+ StringEquals 'import-external'
+ OR
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:stage]
+ StringEquals 'export-approved'
+ )
+ EOT
}
-# 'In-Progress' storage account
-resource "azurerm_storage_account" "sa_import_in_progress" {
- name = local.import_in_progress_storage_name
+resource "azurerm_storage_account" "sa_airlock_workspace_global" {
+ name = local.airlock_workspace_global_storage_name
location = var.location
resource_group_name = var.resource_group_name
account_tier = "Standard"
@@ -156,6 +191,11 @@ resource "azurerm_storage_account" "sa_import_in_progress" {
# changing this value is destructive, hence attribute is in lifecycle.ignore_changes block below
infrastructure_encryption_enabled = true
+ network_rules {
+ default_action = var.enable_local_debugging ? "Allow" : "Deny"
+ bypass = ["AzureServices"]
+ }
+
dynamic "identity" {
for_each = var.enable_cmk_encryption ? [1] : []
content {
@@ -173,22 +213,17 @@ resource "azurerm_storage_account" "sa_import_in_progress" {
}
tags = merge(var.tre_core_tags, {
- description = "airlock;import;in-progress"
+ description = "airlock;workspace;global"
})
- network_rules {
- default_action = var.enable_local_debugging ? "Allow" : "Deny"
- bypass = ["AzureServices"]
- }
-
lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
}
-# Enable Airlock Malware Scanning on Core TRE
-resource "azapi_resource_action" "enable_defender_for_storage" {
+
+resource "azapi_resource_action" "enable_defender_for_storage_workspace_global" {
count = var.enable_malware_scanning ? 1 : 0
type = "Microsoft.Security/defenderForStorageSettings@2022-12-01-preview"
- resource_id = "${azurerm_storage_account.sa_import_in_progress.id}/providers/Microsoft.Security/defenderForStorageSettings/current"
+ resource_id = "${azurerm_storage_account.sa_airlock_workspace_global.id}/providers/Microsoft.Security/defenderForStorageSettings/current"
method = "PUT"
body = {
@@ -209,170 +244,67 @@ resource "azapi_resource_action" "enable_defender_for_storage" {
}
}
-resource "azurerm_private_endpoint" "stg_import_inprogress_pe" {
- name = "pe-stg-import-inprogress-blob-${var.tre_id}"
- location = var.location
- resource_group_name = var.resource_group_name
- subnet_id = var.airlock_storage_subnet_id
- tags = var.tre_core_tags
- lifecycle { ignore_changes = [tags] }
+resource "azurerm_eventgrid_system_topic" "airlock_workspace_global_blob_created" {
+ name = "evgt-airlock-blob-created-global-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ source_arm_resource_id = azurerm_storage_account.sa_airlock_workspace_global.id
+ topic_type = "Microsoft.Storage.StorageAccounts"
+ tags = var.tre_core_tags
- private_dns_zone_group {
- name = "pdzg-stg-import-inprogress-blob-${var.tre_id}"
- private_dns_zone_ids = [var.blob_core_dns_zone_id]
+ identity {
+ type = "SystemAssigned"
}
- private_service_connection {
- name = "psc-stg-import-inprogress-blob-${var.tre_id}"
- private_connection_resource_id = azurerm_storage_account.sa_import_in_progress.id
- is_manual_connection = false
- subresource_names = ["Blob"]
- }
+ lifecycle { ignore_changes = [tags] }
}
+# Role Assignment for Global Workspace EventGrid System Topic
+resource "azurerm_role_assignment" "servicebus_sender_airlock_workspace_global_blob_created" {
+ scope = var.airlock_servicebus.id
+ role_definition_name = "Azure Service Bus Data Sender"
+ principal_id = azurerm_eventgrid_system_topic.airlock_workspace_global_blob_created.identity[0].principal_id
-# 'Rejected' storage account
-resource "azurerm_storage_account" "sa_import_rejected" {
- name = local.import_rejected_storage_name
- location = var.location
- resource_group_name = var.resource_group_name
- account_tier = "Standard"
- account_replication_type = "LRS"
- table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
- queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
- allow_nested_items_to_be_public = false
- cross_tenant_replication_enabled = false
- shared_access_key_enabled = false
- local_user_enabled = false
-
- # Important! we rely on the fact that the blob craeted events are issued when the creation of the blobs are done.
- # This is true ONLY when Hierarchical Namespace is DISABLED
- is_hns_enabled = false
-
- # changing this value is destructive, hence attribute is in lifecycle.ignore_changes block below
- infrastructure_encryption_enabled = true
-
- dynamic "identity" {
- for_each = var.enable_cmk_encryption ? [1] : []
- content {
- type = "UserAssigned"
- identity_ids = [var.encryption_identity_id]
- }
- }
-
- dynamic "customer_managed_key" {
- for_each = var.enable_cmk_encryption ? [1] : []
- content {
- key_vault_key_id = var.encryption_key_versionless_id
- user_assigned_identity_id = var.encryption_identity_id
- }
- }
-
- tags = merge(var.tre_core_tags, {
- description = "airlock;import;rejected"
- })
-
- network_rules {
- default_action = var.enable_local_debugging ? "Allow" : "Deny"
- bypass = ["AzureServices"]
- }
-
- lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+ depends_on = [
+ azurerm_eventgrid_system_topic.airlock_workspace_global_blob_created
+ ]
}
-resource "azurerm_private_endpoint" "stg_import_rejected_pe" {
- name = "pe-stg-import-rejected-blob-${var.tre_id}"
+# Private Endpoint for workspace global storage (processor access via private endpoint, not service endpoint)
+resource "azurerm_private_endpoint" "stg_airlock_workspace_global_pe_processor" {
+ name = "pe-stg-airlock-ws-global-${var.tre_id}"
location = var.location
resource_group_name = var.resource_group_name
subnet_id = var.airlock_storage_subnet_id
-
- private_dns_zone_group {
- name = "pdzg-stg-import-rejected-blob-${var.tre_id}"
- private_dns_zone_ids = [var.blob_core_dns_zone_id]
- }
-
- private_service_connection {
- name = "psc-stg-import-rejected-blob-${var.tre_id}"
- private_connection_resource_id = azurerm_storage_account.sa_import_rejected.id
- is_manual_connection = false
- subresource_names = ["Blob"]
- }
-
- tags = var.tre_core_tags
+ tags = var.tre_core_tags
lifecycle { ignore_changes = [tags] }
-}
-
-# 'Blocked' storage account
-resource "azurerm_storage_account" "sa_import_blocked" {
- name = local.import_blocked_storage_name
- location = var.location
- resource_group_name = var.resource_group_name
- account_tier = "Standard"
- account_replication_type = "LRS"
- table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
- queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
- allow_nested_items_to_be_public = false
- cross_tenant_replication_enabled = false
- shared_access_key_enabled = false
- local_user_enabled = false
-
- # Important! we rely on the fact that the blob craeted events are issued when the creation of the blobs are done.
- # This is true ONLY when Hierarchical Namespace is DISABLED
- is_hns_enabled = false
-
- # changing this value is destructive, hence attribute is in lifecycle.ignore_changes block below
- infrastructure_encryption_enabled = true
-
- dynamic "identity" {
- for_each = var.enable_cmk_encryption ? [1] : []
- content {
- type = "UserAssigned"
- identity_ids = [var.encryption_identity_id]
- }
- }
-
- dynamic "customer_managed_key" {
- for_each = var.enable_cmk_encryption ? [1] : []
- content {
- key_vault_key_id = var.encryption_key_versionless_id
- user_assigned_identity_id = var.encryption_identity_id
- }
- }
-
- tags = merge(var.tre_core_tags, {
- description = "airlock;import;blocked"
- })
-
- network_rules {
- default_action = var.enable_local_debugging ? "Allow" : "Deny"
- bypass = ["AzureServices"]
- }
-
- lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
-}
-
-resource "azurerm_private_endpoint" "stg_import_blocked_pe" {
- name = "pe-stg-import-blocked-blob-${var.tre_id}"
- location = var.location
- resource_group_name = var.resource_group_name
- subnet_id = var.airlock_storage_subnet_id
private_dns_zone_group {
- name = "pdzg-stg-import-blocked-blob-${var.tre_id}"
+ name = "pdzg-stg-airlock-ws-global-${var.tre_id}"
private_dns_zone_ids = [var.blob_core_dns_zone_id]
}
private_service_connection {
- name = "psc-stg-import-blocked-blob-${var.tre_id}"
- private_connection_resource_id = azurerm_storage_account.sa_import_blocked.id
+ name = "psc-stg-airlock-ws-global-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_airlock_workspace_global.id
is_manual_connection = false
subresource_names = ["Blob"]
}
+}
- tags = var.tre_core_tags
-
- lifecycle { ignore_changes = [tags] }
+# Airlock Processor Identity - needs access to all workspace containers (no restrictions)
+resource "azurerm_role_assignment" "airlock_workspace_global_blob_data_contributor" {
+ scope = azurerm_storage_account.sa_airlock_workspace_global.id
+ role_definition_name = "Storage Blob Data Contributor"
+ principal_id = azurerm_user_assigned_identity.airlock_id.principal_id
}
+# API Identity - needs Storage Blob Delegator to generate user delegation SAS tokens.
+# Blob-level access is controlled by ABAC-conditioned role assignments in workspace terraform.
+resource "azurerm_role_assignment" "api_workspace_global_blob_delegator" {
+ scope = azurerm_storage_account.sa_airlock_workspace_global.id
+ role_definition_name = "Storage Blob Delegator"
+ principal_id = var.api_principal_id
+}
diff --git a/core/terraform/airlock/storage_accounts_v1.tf b/core/terraform/airlock/storage_accounts_v1.tf
new file mode 100644
index 000000000..395ca5745
--- /dev/null
+++ b/core/terraform/airlock/storage_accounts_v1.tf
@@ -0,0 +1,380 @@
+# Legacy (v1) per-stage storage accounts for airlock
+# These are only deployed when enable_legacy_airlock = true
+# Required for workspaces using airlock_version = 1
+
+# 'External' storage account - drop location for import
+resource "azurerm_storage_account" "sa_import_external" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_external_storage_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ account_tier = "Standard"
+ account_replication_type = "LRS"
+ table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ cross_tenant_replication_enabled = false
+ shared_access_key_enabled = false
+ local_user_enabled = false
+ allow_nested_items_to_be_public = false
+
+ is_hns_enabled = false
+ infrastructure_encryption_enabled = true
+
+ dynamic "identity" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ type = "UserAssigned"
+ identity_ids = [var.encryption_identity_id]
+ }
+ }
+
+ dynamic "customer_managed_key" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ key_vault_key_id = var.encryption_key_versionless_id
+ user_assigned_identity_id = var.encryption_identity_id
+ }
+ }
+
+ tags = merge(var.tre_core_tags, {
+ description = "airlock;import;external"
+ })
+
+ lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+}
+
+resource "azurerm_private_endpoint" "stg_import_external_pe" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = "pe-stg-import-external-blob-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ subnet_id = var.airlock_storage_subnet_id
+ tags = var.tre_core_tags
+
+ lifecycle { ignore_changes = [tags] }
+
+ private_dns_zone_group {
+ name = "pdzg-stg-import-external-blob-${var.tre_id}"
+ private_dns_zone_ids = [var.blob_core_dns_zone_id]
+ }
+
+ private_service_connection {
+ name = "psc-stg-import-external-blob-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_import_external[0].id
+ is_manual_connection = false
+ subresource_names = ["Blob"]
+ }
+}
+
+# 'Approved' export
+resource "azurerm_storage_account" "sa_export_approved" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.export_approved_storage_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ account_tier = "Standard"
+ account_replication_type = "LRS"
+ table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ cross_tenant_replication_enabled = false
+ shared_access_key_enabled = false
+ local_user_enabled = false
+ allow_nested_items_to_be_public = false
+
+ is_hns_enabled = false
+ infrastructure_encryption_enabled = true
+
+ dynamic "identity" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ type = "UserAssigned"
+ identity_ids = [var.encryption_identity_id]
+ }
+ }
+
+ dynamic "customer_managed_key" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ key_vault_key_id = var.encryption_key_versionless_id
+ user_assigned_identity_id = var.encryption_identity_id
+ }
+ }
+
+ tags = merge(var.tre_core_tags, {
+ description = "airlock;export;approved"
+ })
+
+ lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+}
+
+resource "azurerm_private_endpoint" "stg_export_approved_pe" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = "pe-stg-export-approved-blob-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ subnet_id = var.airlock_storage_subnet_id
+ tags = var.tre_core_tags
+
+ lifecycle { ignore_changes = [tags] }
+
+ private_dns_zone_group {
+ name = "pdzg-stg-export-approved-blob-${var.tre_id}"
+ private_dns_zone_ids = [var.blob_core_dns_zone_id]
+ }
+
+ private_service_connection {
+ name = "psc-stg-export-approved-blob-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_export_approved[0].id
+ is_manual_connection = false
+ subresource_names = ["Blob"]
+ }
+}
+
+# 'In-Progress' storage account
+resource "azurerm_storage_account" "sa_import_in_progress" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_in_progress_storage_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ account_tier = "Standard"
+ account_replication_type = "LRS"
+ table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ allow_nested_items_to_be_public = false
+ cross_tenant_replication_enabled = false
+ shared_access_key_enabled = false
+ local_user_enabled = false
+
+ is_hns_enabled = false
+ infrastructure_encryption_enabled = true
+
+ dynamic "identity" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ type = "UserAssigned"
+ identity_ids = [var.encryption_identity_id]
+ }
+ }
+
+ dynamic "customer_managed_key" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ key_vault_key_id = var.encryption_key_versionless_id
+ user_assigned_identity_id = var.encryption_identity_id
+ }
+ }
+
+ tags = merge(var.tre_core_tags, {
+ description = "airlock;import;in-progress"
+ })
+
+ network_rules {
+ default_action = var.enable_local_debugging ? "Allow" : "Deny"
+ bypass = ["AzureServices"]
+ }
+
+ lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+}
+
+# Enable Airlock Malware Scanning on legacy in-progress storage
+resource "azapi_resource_action" "enable_defender_for_storage" {
+ count = var.enable_legacy_airlock && var.enable_malware_scanning ? 1 : 0
+ type = "Microsoft.Security/defenderForStorageSettings@2022-12-01-preview"
+ resource_id = "${azurerm_storage_account.sa_import_in_progress[0].id}/providers/Microsoft.Security/defenderForStorageSettings/current"
+ method = "PUT"
+
+ body = {
+ properties = {
+ isEnabled = true
+ malwareScanning = {
+ onUpload = {
+ isEnabled = true
+ capGBPerMonth = 5000
+ },
+ scanResultsEventGridTopicResourceId = azurerm_eventgrid_topic.scan_result[0].id
+ }
+ sensitiveDataDiscovery = {
+ isEnabled = false
+ }
+ overrideSubscriptionLevelSettings = true
+ }
+ }
+}
+
+resource "azurerm_private_endpoint" "stg_import_inprogress_pe" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = "pe-stg-import-inprogress-blob-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ subnet_id = var.airlock_storage_subnet_id
+ tags = var.tre_core_tags
+
+ lifecycle { ignore_changes = [tags] }
+
+ private_dns_zone_group {
+ name = "pdzg-stg-import-inprogress-blob-${var.tre_id}"
+ private_dns_zone_ids = [var.blob_core_dns_zone_id]
+ }
+
+ private_service_connection {
+ name = "psc-stg-import-inprogress-blob-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_import_in_progress[0].id
+ is_manual_connection = false
+ subresource_names = ["Blob"]
+ }
+}
+
+# 'Rejected' storage account
+resource "azurerm_storage_account" "sa_import_rejected" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_rejected_storage_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ account_tier = "Standard"
+ account_replication_type = "LRS"
+ table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ allow_nested_items_to_be_public = false
+ cross_tenant_replication_enabled = false
+ shared_access_key_enabled = false
+ local_user_enabled = false
+
+ is_hns_enabled = false
+ infrastructure_encryption_enabled = true
+
+ dynamic "identity" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ type = "UserAssigned"
+ identity_ids = [var.encryption_identity_id]
+ }
+ }
+
+ dynamic "customer_managed_key" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ key_vault_key_id = var.encryption_key_versionless_id
+ user_assigned_identity_id = var.encryption_identity_id
+ }
+ }
+
+ tags = merge(var.tre_core_tags, {
+ description = "airlock;import;rejected"
+ })
+
+ network_rules {
+ default_action = var.enable_local_debugging ? "Allow" : "Deny"
+ bypass = ["AzureServices"]
+ }
+
+ lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+}
+
+resource "azurerm_private_endpoint" "stg_import_rejected_pe" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = "pe-stg-import-rejected-blob-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ subnet_id = var.airlock_storage_subnet_id
+
+ private_dns_zone_group {
+ name = "pdzg-stg-import-rejected-blob-${var.tre_id}"
+ private_dns_zone_ids = [var.blob_core_dns_zone_id]
+ }
+
+ private_service_connection {
+ name = "psc-stg-import-rejected-blob-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_import_rejected[0].id
+ is_manual_connection = false
+ subresource_names = ["Blob"]
+ }
+
+ tags = var.tre_core_tags
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+# 'Blocked' storage account
+resource "azurerm_storage_account" "sa_import_blocked" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = local.import_blocked_storage_name
+ location = var.location
+ resource_group_name = var.resource_group_name
+ account_tier = "Standard"
+ account_replication_type = "LRS"
+ table_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ queue_encryption_key_type = var.enable_cmk_encryption ? "Account" : "Service"
+ allow_nested_items_to_be_public = false
+ cross_tenant_replication_enabled = false
+ shared_access_key_enabled = false
+ local_user_enabled = false
+
+ is_hns_enabled = false
+ infrastructure_encryption_enabled = true
+
+ dynamic "identity" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ type = "UserAssigned"
+ identity_ids = [var.encryption_identity_id]
+ }
+ }
+
+ dynamic "customer_managed_key" {
+ for_each = var.enable_cmk_encryption ? [1] : []
+ content {
+ key_vault_key_id = var.encryption_key_versionless_id
+ user_assigned_identity_id = var.encryption_identity_id
+ }
+ }
+
+ tags = merge(var.tre_core_tags, {
+ description = "airlock;import;blocked"
+ })
+
+ network_rules {
+ default_action = var.enable_local_debugging ? "Allow" : "Deny"
+ bypass = ["AzureServices"]
+ }
+
+ lifecycle { ignore_changes = [infrastructure_encryption_enabled, tags] }
+}
+
+resource "azurerm_private_endpoint" "stg_import_blocked_pe" {
+ count = var.enable_legacy_airlock ? 1 : 0
+ name = "pe-stg-import-blocked-blob-${var.tre_id}"
+ location = var.location
+ resource_group_name = var.resource_group_name
+ subnet_id = var.airlock_storage_subnet_id
+
+ private_dns_zone_group {
+ name = "pdzg-stg-import-blocked-blob-${var.tre_id}"
+ private_dns_zone_ids = [var.blob_core_dns_zone_id]
+ }
+
+ private_service_connection {
+ name = "psc-stg-import-blocked-blob-${var.tre_id}"
+ private_connection_resource_id = azurerm_storage_account.sa_import_blocked[0].id
+ is_manual_connection = false
+ subresource_names = ["Blob"]
+ }
+
+ tags = var.tre_core_tags
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+# Legacy role assignments for v1 per-stage storage accounts
+resource "azurerm_role_assignment" "airlock_blob_data_contributor" {
+ count = var.enable_legacy_airlock ? length(local.airlock_sa_blob_data_contributor) : 0
+ scope = local.airlock_sa_blob_data_contributor[count.index]
+ role_definition_name = "Storage Blob Data Contributor"
+ principal_id = azurerm_user_assigned_identity.airlock_id.principal_id
+}
+
+resource "azurerm_role_assignment" "api_sa_data_contributor" {
+ count = var.enable_legacy_airlock ? length(local.api_sa_data_contributor) : 0
+ scope = local.api_sa_data_contributor[count.index]
+ role_definition_name = "Storage Blob Data Contributor"
+ principal_id = var.api_principal_id
+}
diff --git a/core/terraform/airlock/variables.tf b/core/terraform/airlock/variables.tf
index 69888118d..16cbbb505 100644
--- a/core/terraform/airlock/variables.tf
+++ b/core/terraform/airlock/variables.tf
@@ -107,3 +107,9 @@ variable "encryption_key_versionless_id" {
type = string
description = "Versionless ID of the encryption key in the key vault"
}
+
+variable "enable_legacy_airlock" {
+ type = bool
+ default = true
+ description = "Deploy v1 legacy per-stage airlock storage accounts. Required for workspaces using airlock_version=1."
+}
diff --git a/core/terraform/api-webapp.tf b/core/terraform/api-webapp.tf
index 47afeb83c..18b346ac4 100644
--- a/core/terraform/api-webapp.tf
+++ b/core/terraform/api-webapp.tf
@@ -67,6 +67,8 @@ resource "azurerm_linux_web_app" "api" {
OTEL_RESOURCE_ATTRIBUTES = "service.name=api,service.version=${local.version}"
OTEL_EXPERIMENTAL_RESOURCE_DETECTORS = "azure_app_service"
USER_MANAGEMENT_ENABLED = var.user_management_enabled
+ # Airlock storage configuration
+ APP_GATEWAY_FQDN = module.appgateway.app_gateway_fqdn
}
identity {
diff --git a/core/terraform/appgateway/appgateway.tf b/core/terraform/appgateway/appgateway.tf
index 5afcfbb8a..75ac842d6 100644
--- a/core/terraform/appgateway/appgateway.tf
+++ b/core/terraform/appgateway/appgateway.tf
@@ -7,7 +7,7 @@ resource "azurerm_public_ip" "appgwpip" {
domain_name_label = var.tre_id
tags = local.tre_core_tags
- lifecycle { ignore_changes = [tags, zones] }
+ lifecycle { ignore_changes = [tags, zones, ip_tags] }
}
resource "azurerm_user_assigned_identity" "agw_id" {
@@ -90,6 +90,16 @@ resource "azurerm_application_gateway" "agw" {
fqdns = [var.api_fqdn]
}
+ # Backend pool with the airlock core storage account.
+ # Only core storage needs public App Gateway access for:
+ # - import-external: user uploads
+ # - import-in-progress: airlock manager review
+ # - export-approved: user downloads
+ backend_address_pool {
+ name = local.airlock_core_backend_pool_name
+ fqdns = [var.airlock_core_storage_fqdn]
+ }
+
# Backend settings for api.
# Using custom probe to test specific health endpoint
backend_http_settings {
@@ -113,6 +123,18 @@ resource "azurerm_application_gateway" "agw" {
pick_host_name_from_backend_address = true
}
+ # Backend settings for airlock core storage.
+ # Pass through query string for SAS token authentication
+ backend_http_settings {
+ name = local.airlock_core_http_setting_name
+ cookie_based_affinity = "Disabled"
+ port = 443
+ protocol = "Https"
+ request_timeout = 300
+ pick_host_name_from_backend_address = true
+ probe_name = local.airlock_core_probe_name
+ }
+
# Custom health probe for API.
probe {
name = local.api_probe_name
@@ -135,6 +157,24 @@ resource "azurerm_application_gateway" "agw" {
}
}
+ # Health probe for airlock core storage.
+ # Uses the blob service endpoint to check storage health
+ probe {
+ name = local.airlock_core_probe_name
+ pick_host_name_from_backend_http_settings = true
+ interval = 30
+ protocol = "Https"
+ path = "/"
+ timeout = "30"
+ unhealthy_threshold = "3"
+
+ match {
+ status_code = [
+ "200-499"
+ ]
+ }
+ }
+
# Public HTTPS listener
http_listener {
name = local.secure_listener_name
@@ -208,6 +248,38 @@ resource "azurerm_application_gateway" "agw" {
rewrite_rule_set_name = "security-headers-rewrite-rule"
}
+ # Route airlock core storage traffic
+ # Path: /airlock-storage/{container}/{blob} → /{container}/{blob}
+ path_rule {
+ name = "airlock-storage"
+ paths = ["/airlock-storage/*"]
+ backend_address_pool_name = local.airlock_core_backend_pool_name
+ backend_http_settings_name = local.airlock_core_http_setting_name
+ rewrite_rule_set_name = "airlock-storage-rewrite"
+ }
+
+ }
+
+ # Rewrite rule set for airlock storage - strips /airlock-storage prefix
+ rewrite_rule_set {
+ name = "airlock-storage-rewrite"
+
+ rewrite_rule {
+ name = "strip-airlock-storage-prefix"
+ rule_sequence = 100
+
+ url {
+ path = "{var_uri_path_1}"
+ query_string = "{var_query_string}"
+ }
+
+ condition {
+ variable = "var_uri_path"
+ pattern = "/airlock-storage/(.*)"
+ ignore_case = true
+ negate = false
+ }
+ }
}
# Redirect any HTTP traffic to HTTPS unless its the ACME challenge path used for LetsEncrypt validation.
diff --git a/core/terraform/appgateway/locals.tf b/core/terraform/appgateway/locals.tf
index 4962ad86f..c8adafab8 100644
--- a/core/terraform/appgateway/locals.tf
+++ b/core/terraform/appgateway/locals.tf
@@ -6,6 +6,12 @@ locals {
app_path_map_name = "upm-application"
redirect_path_map_name = "upm-redirect"
+ # Airlock core storage backend (only core storage needs public App Gateway access)
+ # Workspace storage is accessed internally via private endpoints
+ airlock_core_backend_pool_name = "beap-airlock-core"
+ airlock_core_http_setting_name = "be-htst-airlock-core"
+ airlock_core_probe_name = "hp-airlock-core"
+
insecure_frontend_port_name = "feport-insecure"
secure_frontend_port_name = "feport-secure"
diff --git a/core/terraform/appgateway/variables.tf b/core/terraform/appgateway/variables.tf
index 77c223ec2..688f184a9 100644
--- a/core/terraform/appgateway/variables.tf
+++ b/core/terraform/appgateway/variables.tf
@@ -41,3 +41,11 @@ variable "encryption_key_versionless_id" {
variable "deployer_principal_id" {
type = string
}
+
+# Airlock core storage backend configuration
+# Only core storage needs public App Gateway access for import uploads and export downloads
+# Workspace storage is accessed internally via private endpoints from within workspaces
+variable "airlock_core_storage_fqdn" {
+ type = string
+ description = "FQDN of the consolidated core airlock storage account for App Gateway backend"
+}
diff --git a/core/terraform/main.tf b/core/terraform/main.tf
index b2f9a6f22..409bea790 100644
--- a/core/terraform/main.tf
+++ b/core/terraform/main.tf
@@ -130,15 +130,20 @@ module "appgateway" {
app_gateway_sku = var.app_gateway_sku
deployer_principal_id = data.azurerm_client_config.current.object_id
+ # Airlock core storage backend configuration for public access via App Gateway
+ # Only core storage needs public access (import uploads, in-progress review, export downloads)
+ # Workspace storage is accessed internally via private endpoints from within workspaces
+ airlock_core_storage_fqdn = module.airlock_resources.airlock_core_storage_fqdn
+
enable_cmk_encryption = var.enable_cmk_encryption
encryption_key_versionless_id = var.enable_cmk_encryption ? azurerm_key_vault_key.tre_encryption[0].versionless_id : null
encryption_identity_id = var.enable_cmk_encryption ? azurerm_user_assigned_identity.encryption[0].id : null
depends_on = [
module.network,
+ module.airlock_resources,
azurerm_key_vault.kv,
azurerm_role_assignment.keyvault_deployer_role,
- azurerm_private_endpoint.api_private_endpoint,
azurerm_key_vault_key.tre_encryption[0]
]
}
@@ -159,6 +164,7 @@ module "airlock_resources" {
airlock_servicebus_fqdn = azurerm_servicebus_namespace.sb.endpoint
applicationinsights_connection_string = module.azure_monitor.app_insights_connection_string
enable_malware_scanning = var.enable_airlock_malware_scanning
+ enable_legacy_airlock = var.enable_legacy_airlock
arm_environment = var.arm_environment
tre_core_tags = local.tre_core_tags
log_analytics_workspace_id = module.azure_monitor.log_analytics_workspace_id
diff --git a/core/terraform/variables.tf b/core/terraform/variables.tf
index e81375174..36588b2e2 100644
--- a/core/terraform/variables.tf
+++ b/core/terraform/variables.tf
@@ -174,6 +174,12 @@ variable "enable_airlock_malware_scanning" {
description = "If False, Airlock requests will skip the malware scanning stage"
}
+variable "enable_legacy_airlock" {
+ type = bool
+ default = true
+ description = "Deploy v1 legacy per-stage airlock storage accounts in core. Required for workspaces using airlock_version=1."
+}
+
variable "enable_airlock_email_check" {
type = bool
default = false
diff --git a/core/version.txt b/core/version.txt
index 54ea27795..1317d7554 100644
--- a/core/version.txt
+++ b/core/version.txt
@@ -1 +1 @@
-__version__ = "0.16.16"
+__version__ = "0.18.0"
diff --git a/docs/azure-tre-overview/airlock-legacy.md b/docs/azure-tre-overview/airlock-legacy.md
new file mode 100644
index 000000000..1201839b6
--- /dev/null
+++ b/docs/azure-tre-overview/airlock-legacy.md
@@ -0,0 +1,119 @@
+# Legacy Airlock Architecture
+
+!!! warning "Legacy Architecture"
+ This page documents the legacy airlock architecture that uses per-stage storage accounts. New deployments should use the current [consolidated architecture](airlock.md). This architecture is maintained for backwards compatibility with existing workspaces.
+
+## Overview
+
+The legacy airlock architecture uses **separate storage accounts for each stage** of the airlock process. Data is physically copied between storage accounts as the request progresses through stages. This results in 5 core storage accounts and 5 per-workspace storage accounts (10+ total).
+
+To use the legacy architecture, set `airlock_version: 1` (the default) in your workspace properties and ensure `enable_legacy_airlock: true` is set in your `config.yaml`.
+
+## Storage Accounts
+
+### Core (TRE-level)
+
+| Storage Account | Name Pattern | Description | Network Access |
+| --- | --- | --- | --- |
+| `stalimex` | `stalimex{tre_id}` | Import external — initial upload location | Public (SAS token) |
+| `stalimip` | `stalimip{tre_id}` | Import in-progress — during review | TRE Core VNet |
+| `stalimrej` | `stalimrej{tre_id}` | Import rejected | TRE Core VNet |
+| `stalimblocked` | `stalimblocked{tre_id}` | Import blocked by scan | TRE Core VNet |
+| `stalexapp` | `stalexapp{tre_id}` | Export approved — final export location | Public (SAS token) |
+
+### Workspace-level
+
+| Storage Account | Name Pattern | Description | Network Access |
+| --- | --- | --- | --- |
+| `stalimappws` | `stalimappws{short_ws_id}` | Import approved — final import location | Workspace VNet |
+| `stalexintws` | `stalexintws{short_ws_id}` | Export internal — initial export upload | Workspace VNet |
+| `stalexipws` | `stalexipws{short_ws_id}` | Export in-progress — during review | Workspace VNet |
+| `stalexrejws` | `stalexrejws{short_ws_id}` | Export rejected | Workspace VNet |
+| `stalexblockedws` | `stalexblockedws{short_ws_id}` | Export blocked by scan | Workspace VNet |
+
+> Each workspace gets its own set of 5 storage accounts, leading to significant resource proliferation as the number of workspaces grows.
+
+## Data Flow
+
+In the legacy architecture, data is **copied between storage accounts** at each stage transition. A typical import request involves up to 3 copies:
+
+1. External → In-progress (on submit)
+2. In-progress → Blocked (if scan fails) OR stay in In-progress (if clean)
+3. In-progress → Approved (on approval) OR In-progress → Rejected (on rejection)
+
+```mermaid
+graph LR
+ subgraph TRE["TRE Core"]
+ A["stalimex\nimport external"]-->|"Copy on submit"| B
+ B["stalimip\nimport in-progress"]-->|"Copy if blocked"| D["stalimblocked\nimport blocked"]
+ B-->|"No issues"| review{"Manual\nApproval"}
+ review-->|"Copy on reject"| C["stalimrej\nimport rejected"]
+ end
+ subgraph Workspace["TRE Workspace"]
+ review-->|"Copy on approve"| E["stalimappws\nimport approved"]
+ end
+ subgraph External["External"]
+ data("Data to import")-->A
+ end
+```
+> Legacy import data flow — data is copied at each stage transition.
+
+```mermaid
+graph LR
+ subgraph Workspace["TRE Workspace"]
+ data("Data to export")-->A
+ A["stalexintws\nexport internal"]-->|"Copy on submit"| B
+ B["stalexipws\nexport in-progress"]-->|"Copy if blocked"| D["stalexblockedws\nexport blocked"]
+ B-->|"No issues"| review{"Manual\nApproval"}
+ review-->|"Copy on reject"| C["stalexrejws\nexport rejected"]
+ end
+ subgraph External["External"]
+ review-->|"Copy on approve"| E["stalexapp\nexport approved"]
+ end
+```
+> Legacy export data flow — data is copied at each stage transition.
+
+## Network Architecture
+
+In the legacy architecture, each storage account has its own network configuration:
+
+- **External accounts** (`stalimex`, `stalexapp`): Not bound to any VNet, accessible via SAS token through the internet.
+- **Core internal accounts** (`stalimip`, `stalimrej`, `stalimblocked`): Bound to the TRE Core VNet.
+- **Workspace accounts** (`stalimappws`, `stalexintws`, `stalexipws`, `stalexrejws`, `stalexblockedws`): Bound to the workspace VNet.
+
+Each storage account has its own private endpoints, EventGrid system topics, and role assignments.
+
+[](../assets/airlock-networking.png)
+
+## Airlock Flow
+
+The following diagram shows the legacy airlock flow with data copies between storage accounts:
+
+[](../assets/airlock-swimlanes.png)
+
+## Comparison with Current Architecture
+
+| Aspect | Current (Consolidated) | Legacy (Per-Stage) |
+| --- | --- | --- |
+| **Storage accounts** | 2 total | 10+ (5 core + 5 per workspace) |
+| **Stage tracking** | Container metadata | Separate storage accounts |
+| **Data copies per request** | 1 (on approval only) | Up to 3 |
+| **Workspace isolation** | ABAC + shared PE | Dedicated storage per workspace |
+| **Private endpoints** | 2 core + 1 per workspace | 5 core + 5 per workspace |
+| **EventGrid topics** | 2 system topics | 10+ system topics |
+| **Infrastructure cost** | Lower | Higher (more resources) |
+| **Stage transition speed** | Near-instant (metadata) | Minutes (data copy) |
+| **Scalability** | All workspaces share storage | Linear growth per workspace |
+
+## Upgrading to Current Architecture
+
+To upgrade a workspace from the legacy architecture:
+
+1. Ensure core is deployed with the current codebase (`enable_legacy_airlock: true` to keep legacy infrastructure alongside the new accounts).
+2. Update the workspace `airlock_version` property to `2`.
+3. Redeploy the workspace — this switches from the legacy airlock terraform module to the consolidated module.
+4. New airlock requests will use the consolidated storage accounts. In-flight requests on the legacy path will continue to completion on the legacy accounts (the version is stamped on each request at creation time).
+5. Once all workspaces are migrated and no legacy requests are in-flight, set `enable_legacy_airlock: false` in `config.yaml` and redeploy core to remove the legacy storage accounts.
+
+!!! note
+ In-flight airlock requests are safe during upgrade. Each request has `airlock_version` stamped at creation time, so upgrading a workspace does not affect requests that are already in progress.
diff --git a/docs/azure-tre-overview/airlock.md b/docs/azure-tre-overview/airlock.md
index 92b71ac63..6094d53a0 100644
--- a/docs/azure-tre-overview/airlock.md
+++ b/docs/azure-tre-overview/airlock.md
@@ -2,49 +2,123 @@
In a Trusted Research Environment (TRE) the workspaces represent a security boundary that enables researchers to access data, execute analysis, apply algorithms and collect reports. The airlock capability is the only mechanism that allows users to `import` or `export` data, tools or other file based artefacts in a secure fashion with a human approval.
This constitutes the mechanism focused on preventing data exfiltration and securing TRE and its workspaces from inappropriate data, while allowing researchers to work on their projects and execute their tasks.
-The airlock feature brings several actions: ingress/egress Mechanism; Data movement; Security gates; Approval mechanism and Notifications. As part of TRE's Safe settings all activity must be tracked for auditing purposes.
+The airlock feature brings several actions: ingress/egress mechanism, data movement, security gates, approval mechanism and notifications. As part of TRE's Safe Settings all activity must be tracked for auditing purposes.
The Airlock feature aims to address these goals:
* Prevent unauthorised data import or export.
-
* Provide a process to allow approved data to be imported through the security boundary of a TRE Workspace.
+* Track requests and decisions, supporting cycles of revision, approval or rejection.
+* Automatically scan data being imported for security issues.
+* Require manual review by the Airlock Manager for data being exported or imported.
+* Notify the requesting researcher of progress and required actions.
+* Audit all steps within the airlock process.
-* TRE provides functionality to track requests and decisions, supporting cycles of revision, approval or rejection.
+Typically in a TRE, the Airlock feature would be used to allow a researcher to export the outputs of a research project such as summary results. With the airlock, data to be exported must go through a human review, typically undertaken by a data governance team.
-* Data being imported with an airlock import process can be automatically scanned for security issues.
+The Airlock feature creates events on every meaningful step of the process, enabling organisations to extend the notification mechanism.
-* Data being exported or imported must be manually reviewed by the Airlock Manager.
+## Storage Architecture
-* Notify the requesting researcher of the process progress and/or required actions.
+The airlock uses a consolidated storage architecture with **2 storage accounts** and metadata-based stage management. Each airlock request gets a dedicated container (named with the request ID), and the request's stage is tracked via container metadata rather than by copying data between storage accounts.
-* All steps within the airlock process are audited.
+```mermaid
+graph TB
+ subgraph External["External"]
+ researcher["fa:fa-user Researcher"]
+ reviewer["fa:fa-user-shield Airlock Manager"]
+ end
+
+ appgw["fa:fa-shield-alt App Gateway"]
+
+ subgraph Core["TRE Core"]
+ direction TB
+ subgraph CoreStorage["Core: stalairlock"]
+ ie{{"stage: import-external"}}
+ eapp{{"stage: export-approved"}}
+ iip{{"stage: import-in-progress"}}
+ irej{{"stage: import-rejected"}}
+ iblk{{"stage: import-blocked"}}
+ end
+ processor["fa:fa-cog Airlock Processor"]
+ end
+
+ subgraph WSStorage["Workspace: stalairlockg"]
+ iappr{{"stage: import-approved"}}
+ eint{{"stage: export-internal"}}
+ eip{{"stage: export-in-progress"}}
+ erej{{"stage: export-rejected"}}
+ eblk{{"stage: export-blocked"}}
+ end
+
+ subgraph Workspace["TRE Workspace"]
+ vm["fa:fa-desktop Researcher VM"]
+ end
+
+ researcher -- "SAS token" --> appgw
+ reviewer -- "SAS token" --> appgw
+ appgw -- "Public stages only" --> CoreStorage
+ processor -. "All stages" .-> CoreStorage
+ processor -. "All stages" .-> WSStorage
+ vm -- "Private Endpoint" --> WSStorage
+
+ style Core fill:#1a3d6d,stroke:#0d2240,color:#fff
+ style CoreStorage fill:#2c5f9e,stroke:#1a3d6d,color:#fff
+ style WSStorage fill:#8b5c00,stroke:#5c3d00,color:#fff
+ style External fill:#444,stroke:#333,color:#fff
+ style Workspace fill:#1a5c1a,stroke:#0d330d,color:#fff
+ style appgw fill:#0078d4,stroke:#005a9e,color:#fff
+ style processor fill:#cc7000,stroke:#995300,color:#fff
+ style vm fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style ie fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style eapp fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style iip fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style irej fill:#b85450,stroke:#8b3e3b,color:#fff
+ style iblk fill:#b85450,stroke:#8b3e3b,color:#fff
+ style iappr fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style eint fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style eip fill:#8b7800,stroke:#5c5000,color:#fff
+ style erej fill:#b85450,stroke:#8b3e3b,color:#fff
+ style eblk fill:#b85450,stroke:#8b3e3b,color:#fff
+ style researcher fill:#0078d4,stroke:#005a9e,color:#fff
+ style reviewer fill:#0078d4,stroke:#005a9e,color:#fff
+```
+> Airlock architecture overview. Hexagon shapes represent container metadata stages. Green = user-accessible, yellow = processing, red = terminal.
-Typically in a TRE, the Airlock feature would be used to allow a researcher to export the outputs of a research project such as summary results. With the airlock, data to be exported must go through a human review, typically undertaken by a data governance team.
+**Storage Accounts:**
-The Airlock feature will create events on every meaningful step of the process. This will enable increased flexibility by allowing an organization to extend the notification mechanism.
+| Storage Account | Name Pattern | Purpose |
+| --- | --- | --- |
+| **Core Storage** | `stalairlock{tre_id}` | All core-managed stages: import external, in-progress, rejected, blocked; export approved |
+| **Global Workspace Storage** | `stalairlockg{tre_id}` | All workspace-managed stages: import approved; export internal, in-progress, rejected, blocked |
+
+**Key design principles:**
+
+- **Metadata over movement** — Most stage transitions simply update container metadata, providing near-instant transitions. Data is only physically copied when crossing the core/workspace boundary (once per request).
+- **ABAC security** — Azure Attribute-Based Access Control conditions restrict which stages each identity can access on the storage account, enforced at the Azure RBAC layer.
+- **Shared infrastructure** — All workspaces share the same workspace storage account, with network isolation via per-workspace private endpoints and ABAC conditions filtering by `workspace_id`.
## Ingress/Egress Mechanism
The Airlock allows a TRE user to start the `import` or `export` process to a given workspace. A number of milestones must be reached in order to complete a successful import or export. These milestones are defined using the following states:
-1. **Draft**: An Airlock request has been created but has not yet started. The TRE User/Researcher has now access to a storage location and they must identify the data to be processed. At this point the airlock import/export processes allow a single file to be processed. However a compressed file may be used (zip).
+1. **Draft**: An Airlock request has been created but has not yet started. The TRE User/Researcher has access to a storage container and must upload the data to be processed. At this point the airlock import/export processes allow a single file to be processed. However a compressed file may be used (zip).
2. **Submitted**: The request was submitted by the researcher (not yet processed).
3. **In-Review**: The request is ready to be reviewed. This state can be reached directly from Submitted state or after going through a successful security scan (found clean).
4. **Approval In-progress**: The Airlock request has been approved, however data movement is still ongoing.
-5. **Approved**: The Airlock request has been approved. At this state, data has been securely verified and manually reviewed. The data is now in its final location. For an import process the data is now available in the TRE workspace, it can be accessed by the requestor from within the workspace.
+5. **Approved**: The Airlock request has been approved. Data has been securely verified and manually reviewed. The data is now in its final location. For an import process the data is available in the TRE workspace and can be accessed by the requestor from within the workspace.
6. **Rejection In-progress**: The Airlock request has been rejected, however data movement is still ongoing.
-7. **Rejected**: The Airlock request has been rejected. The data in the process was rejected manually by the Airlock Manager.
-8. **Cancelled**: The Airlock request was manually cancelled by the requestor TRE user, a Workspace owner or a TRE administrator. The cancelation is only allowed when the request is not actively changing (i.e. **Draft** or **In-Review** state).
+7. **Rejected**: The Airlock request has been rejected. The data was rejected manually by the Airlock Manager.
+8. **Cancelled**: The Airlock request was manually cancelled by the requestor, a Workspace Owner, or a TRE administrator. Cancellation is only allowed when the request is not actively changing (i.e. **Draft** or **In-Review** state).
9. **Blocking In-progress**: The Airlock request has been blocked, however data movement is still ongoing.
10. **Blocked By Scan**: The Airlock request has been blocked. The security analysis found issues in the submitted data and consequently quarantined the data.
```mermaid
-graph TD
- A[Researcher wants to export data from TRE Workspace] -->|Request created| B[Request in state Draft]
+graph TD
+ A[Researcher wants to export data from TRE Workspace] -->|Request created| B[Request in state Draft]
B-->|Researcher gets link to storage container and uploads data| B
B-->|Request submitted| C[Submitted]
- C--> D{Security issues found?}
+ C--> D{Security issues found?}
D-->|Yes| E[Blocking In-progress]
D-->|No| G[In-Review]
E:::temporary--> F((Blocked By Scan))
@@ -58,148 +132,458 @@ graph TD
H-->|Request Canceled| X
classDef temporary stroke-dasharray: 5 5
```
-> Airlock state flow diagram for an Airlock export request
+> Airlock state flow diagram for an export request. Import follows the same flow.
+
+When an airlock process is created the initial state is **Draft** and the airlock processor creates a storage container with the appropriate stage metadata. The user receives a link to this container (URL + SAS token) that they can use to upload data.
+
+For import, the container is created in core storage (`stalairlock`) with metadata `stage=import-external`. For export, the container is created in global workspace storage (`stalairlockg`) with metadata `stage=export-internal`, accessible only from within the workspace via private endpoint.
-When an airlock process is created the initial state is **Draft** and the required infrastructure will get created providing a single container to isolate the data in the request. Once completed, the user will be able to get a link for this container inside the storage account (URL + SAS token) that they can use to upload the desired data to be processed (import or export).
+The user uploads a file using any tool of their preference: [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) or [AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10).
-This storage location is external for import (`stalimex`) or internal for export (`stalexint`), however only accessible to the requestor (ex: a TRE user/researcher).
-The user will be able to upload a file to the provided storage location, using any tool of their preference: [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) or [AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10) which is a command line tool.
+The user submits the request (TRE API call), which updates the container metadata to the next stage. The airlock request is now in state **Submitted**.
-The user Submits the request (TRE API call) starting the data movement (to the `stalimip` - import in-progress or `stalexip` - export in-progress). The airlock request is now in state **Submitted**.
-If enabled, the Malware Scanning is started. The scan is done using Microsoft Defender for Storage, which is described in detail in the [Microsoft Defender for Storage documentation](https://learn.microsoft.com/en-us/azure/defender-for-cloud/defender-for-storage-introduction).
-In the case that security flaws are found, the request state becomes **Blocking In-progress** while the data is moved to blocked storage (either import blocked `stalimblocked` or export blocked `stalexblocked`). In this case, the request is finalized with the state **Blocked By Scan**.
-If the Security Scanning does not identify any security flaws, the request state becomes **In-Review**. Simultaneously, a notification is sent to the Airlock Manager user. The user needs to ask for the container URL using the TRE API (SAS token + URL with READ permission).
+If enabled, malware scanning is started using Microsoft Defender for Storage
+(see [Microsoft Defender for Storage documentation](https://learn.microsoft.com/en-us/azure/defender-for-cloud/defender-for-storage-introduction)).
+If security flaws are found, the container metadata is updated to blocked status and the request is finalised with state **Blocked By Scan**.
+If no issues are found, the metadata is updated to in-review status and the request state becomes **In-Review**.
+A notification is sent to the Airlock Manager.
> The Security Scanning can be disabled, changing the request state from **Submitted** straight to **In-Review**.
-The Airlock Manager will manually review the data using the tools of their choice available in the TRE workspace. Once review is completed, the Airlock Manager will have to *Approve* or *Reject* the airlock proces, though a TRE API call.
-At this point, the request will change state to either **Approval In-progress** or **Rejection In-progress**, while the data movement occurs moving afterwards to **Approved** or **Rejected** accordingly. The data will now be in the final storage destination: `stalexapp` - export approved or `stalimapp` - import approved.
-With this state change, a notification will be triggered to the requestor including the location of the processed data in the form of an URL + SAS token.
+The Airlock Manager manually reviews the data using tools available in the TRE workspace. Once review is completed, the Airlock Manager approves or rejects the request through a TRE API call. For approval, data is copied to the final destination. For rejection, only metadata is updated.
-## Data movement
+## Data Movement
-For any airlock process, there is data movement either **into** a TRE workspace (in import process) or **from** a TRE workspace (in export process). Being a TRE Workspace boundary, there are networking configurations designed to achieve this goal. The data movement will guarantee that the data is automatically verified for security flaws and manually reviewed, before placing data inside the TRE Workspace.
-Also, the process guarantees that data is not tampered with throughout the process.
+For any airlock process, there is data movement either **into** a TRE workspace (import) or **from** a TRE workspace (export). The data movement guarantees that data is automatically verified for security flaws and manually reviewed before being placed inside or taken outside the TRE Workspace.
-In an import process, data will transition from more public locations (yet confined to the requestor) to TRE workspace storage, after guaranteeing security automatically and by manual review.
+**Metadata-based stage management** means most transitions are near-instantaneous metadata updates. Data is only physically copied when it crosses the core/workspace boundary:
-In an export process, data will transition from internal locations (available to the requestor) to public locations in the TRE, after going through a manual review.
+- **Import approved**: Core storage → Workspace storage (1 async copy per import)
+- **Export approved**: Workspace storage → Core storage (1 async copy per export)
-Considering that the Airlock requests may require large data movements, the operations can have longer durations, hence becoming the operations asynchronous. This is why states like **Approval In-progress**, **Rejection In-progress** or **Blocking In-progress** will be set while there are data movement operations.
+All other transitions — draft→submitted, submitted→in-review, in-review→rejected/blocked — update metadata only with no data movement.
-> The data movement mechanism is data-driven, allowing an organization to extend how request data transitions between
+Cross-account copies are **asynchronous**: the processor initiates the copy and returns. When the blob appears at the destination, a BlobCreated event fires and the BlobCreatedTrigger reads container metadata to emit the appropriate StepResult. This matches the original airlock design where "in-progress" states represent ongoing data movement operations, supporting large data transfers gracefully.
+
+### Import Data Flow
+
+```mermaid
+graph LR
+ subgraph External["External"]
+ data("fa:fa-file Data to import")
+ end
+
+ subgraph CoreStorage["Core: stalairlock"]
+ A{{"stage: import-external"}}
+ B{{"stage: import-in-progress"}}
+ D{{"stage: import-blocked"}}
+ C{{"stage: import-rejected"}}
+ end
+
+ subgraph WorkspaceStorage["Workspace: stalairlockg"]
+ E{{"stage: import-approved"}}
+ end
+
+ data -- "Upload via SAS" --> A
+ A -. "Submitted - metadata only" .-> B
+ B -. "Threat found - metadata only" .-> D
+ B -. "Clean scan - metadata only" .-> review{"Review"}
+ review -. "Rejected - metadata only" .-> C
+ review == "Approved - DATA COPY" ==> E
+
+ style External fill:#444,stroke:#333,color:#fff
+ style CoreStorage fill:#2c5f9e,stroke:#1a3d6d,color:#fff
+ style WorkspaceStorage fill:#8b5c00,stroke:#5c3d00,color:#fff
+ style data fill:#0078d4,stroke:#005a9e,color:#fff
+ style A fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style B fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style C fill:#b85450,stroke:#8b3e3b,color:#fff
+ style D fill:#b85450,stroke:#8b3e3b,color:#fff
+ style E fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style review fill:#6b5900,stroke:#4a3d00,color:#fff
+```
+> Import data flow. Dashed lines = metadata-only transitions. Thick line = the only data copy (on approval). Hexagons = container metadata stages.
+
+### Export Data Flow
+
+```mermaid
+graph LR
+ subgraph Workspace["TRE Workspace"]
+ data("fa:fa-file Data to export")
+ end
+
+ subgraph WorkspaceStorage["Workspace: stalairlockg"]
+ A{{"stage: export-internal"}}
+ B{{"stage: export-in-progress"}}
+ D{{"stage: export-blocked"}}
+ C{{"stage: export-rejected"}}
+ end
+
+ subgraph CoreStorage["Core: stalairlock"]
+ E{{"stage: export-approved"}}
+ end
+
+ data -- "Upload via PE" --> A
+ A -. "Submitted - metadata only" .-> B
+ B -. "Threat found - metadata only" .-> D
+ B -. "Clean scan - metadata only" .-> review{"Review"}
+ review -. "Rejected - metadata only" .-> C
+ review == "Approved - DATA COPY" ==> E
+
+ style Workspace fill:#1a5c1a,stroke:#0d330d,color:#fff
+ style WorkspaceStorage fill:#8b5c00,stroke:#5c3d00,color:#fff
+ style CoreStorage fill:#2c5f9e,stroke:#1a3d6d,color:#fff
+ style data fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style A fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style B fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style C fill:#b85450,stroke:#8b3e3b,color:#fff
+ style D fill:#b85450,stroke:#8b3e3b,color:#fff
+ style E fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style review fill:#6b5900,stroke:#4a3d00,color:#fff
+```
+> Export data flow. Dashed lines = metadata-only transitions. Thick line = the only data copy (on approval). Hexagons = container metadata stages.
## Security Scan
-The identified data in a airlock proces, will be submited to a security scan. If the security scan identifies issues the data is quarantined and a report is added to the process metadata. Both the requestor and Workspace Owner are notified. For a successful security scan, the data will remain in state **In-progress**, and accessible to the Workspace Owner.
+Data in an airlock process is submitted to a security scan. If the scan identifies issues, the container metadata is updated to blocked status and a report is added to the process metadata. Both the requestor and Workspace Owner are notified. For a successful security scan, data remains accessible to the Workspace Owner for review.
+
+> * The security scan is optional, behind a feature flag enabled by a script.
+> * The outcome of the security scan will be either the in-progress metadata status or blocked metadata status.
+> * An airlock process guarantees that the content being imported/exported is secure.
+
+## Access Control
+
+The airlock uses Azure Attribute-Based Access Control (ABAC) to restrict access at the storage account level. This ensures that identities can only access containers matching specific stage metadata values.
+
+```mermaid
+graph LR
+ api["fa:fa-key TRE API"]
+ proc["fa:fa-cog Airlock Processor"]
+ wspe["fa:fa-lock Workspace PE"]
+
+ subgraph CoreStorage["Core: stalairlock"]
+ cs_ie{{"stage: import-external"}}
+ cs_eapp{{"stage: export-approved"}}
+ cs_iip{{"stage: import-in-progress"}}
+ cs_irej{{"stage: import-rejected"}}
+ cs_iblk{{"stage: import-blocked"}}
+ end
+
+ subgraph WorkspaceStorage["Workspace: stalairlockg"]
+ ws_iapp{{"stage: import-approved"}}
+ ws_eint{{"stage: export-internal"}}
+ ws_eip{{"stage: export-in-progress"}}
+ ws_erej{{"stage: export-rejected"}}
+ ws_eblk{{"stage: export-blocked"}}
+ end
+
+ api -- "ABAC: import-external OR export-approved" --> CoreStorage
+ proc == "Unrestricted access" ==> CoreStorage
+ proc == "Unrestricted access" ==> WorkspaceStorage
+ wspe -- "ABAC: workspace_id + stage" --> WorkspaceStorage
+
+ style api fill:#b85450,stroke:#8b3e3b,color:#fff
+ style proc fill:#cc7000,stroke:#995300,color:#fff
+ style wspe fill:#6a3d9a,stroke:#4a2b6d,color:#fff
+ style CoreStorage fill:#2c5f9e,stroke:#1a3d6d,color:#fff
+ style WorkspaceStorage fill:#8b5c00,stroke:#5c3d00,color:#fff
+ style cs_ie fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style cs_eapp fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style cs_iip fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style cs_irej fill:#8b3e3b,stroke:#6b2e2b,color:#fff
+ style cs_iblk fill:#8b3e3b,stroke:#6b2e2b,color:#fff
+ style ws_iapp fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style ws_eint fill:#2d8a2d,stroke:#1a5c1a,color:#fff
+ style ws_eip fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style ws_erej fill:#8b3e3b,stroke:#6b2e2b,color:#fff
+ style ws_eblk fill:#8b3e3b,stroke:#6b2e2b,color:#fff
+```
+> ABAC access control. The API can only access public stages (green). The Processor has full access. Workspace PEs are scoped by workspace_id.
-> * The Security scan will be optional, behind a feature flag enabled by a script
-> * The outcome of the security scan will be either the in-progress (`stalexip`) storage or blocked (`stalexblocked`)
-> * An airlock process will guarantee that the content being imported/exported is secure. It is envisioned that a set of **security gates** are identified to be executed successfully for a process to be approved.
+**Identity access summary:**
-## Approval mechanism
+| Identity | Core Storage | Workspace Storage | ABAC Condition |
+| --- | --- | --- | --- |
+| TRE API | `Storage Blob Data Contributor` | — | Only `import-external` and `export-approved` stages |
+| Airlock Processor | `Storage Blob Data Contributor` | `Storage Blob Data Contributor` | None (unrestricted) |
+| Workspace PE | — | `Storage Blob Data Contributor` | `workspace_id` must match + stage restrictions |
-The approval mechanism, is bundled with any airlock process, providing a specific way to `approve` or `reject` the data. This mechanism will allow the Airlock Managers to explicitly approve/reject the process, after having access to the data. The Airlock Manager users will be able to execute a manual review on the data using the tools available to them in a review TRE Workspace.
-Once this manual review is executed, Airlock Managers can proactively approve or reject the airlock request.
+**Network access:**
-The only goal of the Approval mechanism is to provide a cycle of revision, approval or rejection while tracking the decision.
+- Core storage allows public access for import-external and export-approved stages via SAS tokens (through the App Gateway).
+- Global workspace storage uses `Deny` as the default network action. Access is only possible via per-workspace private endpoints from within the workspace VNet.
+- The airlock processor has a private endpoint on the airlock storage subnet for internal processing on both accounts.
+- User Delegation SAS tokens inherit the ABAC restrictions of the signing identity, so even a valid SAS token cannot access stages outside the identity's ABAC scope.
-This mechanism will provide access to the data in the airlock process, and will be able to use a VM in TRE workspace. The data review will be the Airlock Manager responsibility
+### Container Metadata Stages
-> * It is envisioned that this mechanism to be more flexible and extensible.
-> * The `Airlock Manager` is a role defined at the workspace instance level and assigned to identities. Initially, the `Owner` role will be used.
+Each container has a `stage` metadata key that tracks the current stage of the airlock request:
+
+**Core Storage (`stalairlock`):**
+
+| Stage | Description | Access |
+| --- | --- | --- |
+| `import-external` | Initial upload location for imports | Public via SAS |
+| `import-in-progress` | After submission, during review | Processor only |
+| `import-rejected` | Import rejected by reviewer | Processor only |
+| `import-blocked` | Import blocked by security scan | Processor only |
+| `export-approved` | Final location for approved exports | Public via SAS |
+
+**Global Workspace Storage (`stalairlockg`):**
+
+| Stage | Description | Access |
+| --- | --- | --- |
+| `import-approved` | Final location for approved imports | Workspace PE |
+| `export-internal` | Initial upload location for exports | Workspace PE |
+| `export-in-progress` | After submission, during review | Processor only |
+| `export-rejected` | Export rejected by reviewer | Processor only |
+| `export-blocked` | Export blocked by security scan | Processor only |
+
+## Approval Mechanism
+
+The approval mechanism is bundled with any airlock process, providing a specific way to `approve` or `reject` the data. Airlock Managers can explicitly approve/reject the process after reviewing the data using tools available in a review TRE Workspace.
+
+The only goal of the approval mechanism is to provide a cycle of revision, approval or rejection while tracking the decision.
+
+> * It is envisioned that this mechanism will be more flexible and extensible.
+> * The `Airlock Manager` is a role defined at the workspace instance level and assigned to identities.
## Notifications
-Throughout the airlock process, the notification mechanism will notify the relevant people of the process. Both the requestor (TRE User/Researcher) and the Workspace Owner will be notified by email of the relevant process events.
+Throughout the airlock process, the notification mechanism notifies the relevant people. Both the requestor (TRE User/Researcher) and the Workspace Owner are notified by email of relevant process events.
Whenever the airlock process changes to a state of **Draft**, **Submitted**, **Approved**, **Rejected**, **Approval In-progress**, **Rejection In-progress**, **Blocked By Scan** or **Cancelled**, the process requestor gets notified.
-When the state changes to `In-progress` the Workspace Owner (Airlock Manager) gets notified.
+When the state changes to **In-Review**, the Workspace Owner (Airlock Manager) gets notified.
+
+> * The notification mechanism is data-driven, allowing an organisation to extend the notifications behaviour. The mechanism is exemplified with a Logic App determining the notifications logic.
+> * Notifications work with all TRE users being Microsoft Entra ID users (guests or not), with email defined — if not, notifications will not be sent.
-> * The Notification mechanism is also data-driven, allowing an organization to extend the notifications behavior. The mechanism is exemplified with a Logic App determining the notifications logic.
-> * Notifications will work with All TRE users being Microsoft Entra ID users (guests or not), with email defined – if not, notifications will not be sent.
+## API Endpoints
-## Architecture
+The TRE API exposes the following airlock endpoints:
-The Airlock feature is supported by infrastructure at the TRE and workspace level, containing a set of storage accounts. Each Airlock request will provision and use unique storage containers with the request id in its name.
+| Method | Endpoint | Description |
+| --- | --- | --- |
+| `POST` | `/api/workspaces/{workspace_id}/requests` | Create an Airlock request (in **Draft**) |
+| `GET` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/link` | Get the url and token to access an Airlock Request |
+| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/submit` | Submit an Airlock request |
+| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/review` | Review an Airlock request |
+| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/cancel` | Cancel an Airlock request |
+
+## Airlock Processor
+
+The **Airlock Processor** is a set of Azure Functions that handle the events created throughout the airlock process:
+
+- **StatusChangedQueueTrigger** — Consumes status change events from the Service Bus queue and orchestrates container creation, metadata updates, and cross-account data copies. For same-account transitions (most stages), it updates container metadata directly. For cross-account transitions (approval), it initiates an async server-side copy and returns — the copy completion is handled by the BlobCreatedTrigger.
+- **BlobCreatedTrigger** — Fires when a blob appears in a storage account (via EventGrid → Service Bus). For cross-account copies, this signals that the copy has completed and emits a StepResult event to advance the request to its final state (e.g., approved, rejected, blocked).
+- **ScanResultTrigger** — Consumes malware scan results from Microsoft Defender for Storage. If threats are found, emits a StepResult to block the request. If clean, emits a StepResult to advance to in-review.
+- **DataDeletionTrigger** — Cleans up source containers after data has been copied to the destination.
+
+This event-driven design ensures that long-running data copies (which may take minutes for large files) are handled asynchronously, matching the original airlock architecture's use of "in-progress" states to represent ongoing operations.
+
+## Airlock Flow
+
+The following sequence diagram details the airlock feature and its event-driven behaviour:
```mermaid
-graph LR
- subgraph TRE Workspace
- E[(stalimappimport approved)]
- end
- subgraph TRE
- A[(stalimeximport external)]-->|Request Submitted| B
- B[(stalimipimport in-progress)]-->|Security issues found| D[(stalimblockedimport blocked)]
- B-->|No security issues found| review{ManualApproval}
- review-->|Rejected| C[(stalimrejimport rejected)]
- review-->|Approved| E
- end
- subgraph External
- data(Data to import)-->A
- end
+sequenceDiagram
+ participant R as Researcher
+ participant API as TRE API
+ participant CS as Core Storage
(stalairlock)
+ participant WS as Workspace Storage
(stalairlockg)
+ participant AP as Airlock Processor
+ participant EG as Event Grid
+ participant SB as Service Bus
+ participant DB as Cosmos DB
+
+ Note over R,DB: Creating a Draft Request (Import Example)
+ R->>API: POST /requests (type=import)
+ API->>DB: Save request (status: draft)
+ API->>EG: StatusChangedEvent(draft)
+ EG->>SB: Queue status change
+ SB->>AP: Consume event
+ AP->>CS: Create container with metadata stage=import-external
+ API-->>R: OK + request details
+
+ Note over R,DB: Getting Upload Link
+ R->>API: GET /requests/{id}/link
+ API->>CS: Generate User Delegation SAS (ABAC: import-external)
+ API-->>R: SAS URL for container
+
+ Note over R,DB: Uploading File
+ R->>CS: Upload file via SAS token
+
+ Note over R,DB: Submitting Request
+ R->>API: POST /requests/{id}/submit
+ API->>DB: Update status → submitted
+ API->>EG: StatusChangedEvent(submitted)
+ EG->>SB: Queue status change
+ SB->>AP: StatusChangedQueueTrigger
+ AP->>CS: Update metadata → import-in-progress
+
+ Note over R,DB: Security Scan
+ alt Malware Scanning Enabled
+ CS-->>EG: Defender scan result
+ EG->>SB: Queue scan result
+ SB->>AP: ScanResultTrigger
+ alt Threat Found
+ AP->>EG: StepResult(blocking_in_progress)
+ Note over AP,CS: StatusChangedQueueTrigger updates metadata → import-blocked
+ AP->>EG: StepResult(blocked)
+ else No Threat
+ AP->>EG: StepResult(in-review)
+ end
+ else Malware Scanning Disabled
+ AP->>EG: StepResult(submitted → in-review)
+ end
+ AP->>DB: Update status → in-review
+ AP->>EG: NotificationEvent (to reviewer)
+
+ Note over R,DB: Approval (Async Copy)
+ R->>API: POST /requests/{id}/review (approve)
+ API->>DB: Update status → approval_in_progress
+ API->>EG: StatusChangedEvent(approval_in_progress)
+ EG->>SB: Queue status change
+ SB->>AP: StatusChangedQueueTrigger
+ AP->>WS: Create container with metadata stage=import-approved
+ AP->>WS: Start async copy from Core → Workspace storage
+ Note over AP,WS: Copy runs asynchronously in Azure Storage
+ WS-->>EG: BlobCreated event (copy complete)
+ EG->>SB: Queue blob created
+ SB->>AP: BlobCreatedTrigger reads container metadata
+ AP->>EG: StepResult(approved)
+ AP->>DB: Update status → approved
+ AP->>EG: NotificationEvent (to researcher)
```
-> Data movement in an Airlock import request
-```mermaid
-graph LR
- subgraph TRE workspace
- data(Data to export)-->A
- A[(stalexintexport internal)]-->|Request Submitted| B
- B[(stalexipexport in-progress)]-->|Security issues found| D[(stalexblockedexport blocked)]
- B-->|No security issues found| review{ManualApproval}
- review-->|Rejected| C[(stalexrejexport rejected)]
- end
- subgraph External
- review-->|Approved| E[(stalexappexport approved)]
- end
+## Legacy Airlock
+
+For details on the legacy airlock architecture (per-stage storage accounts) and migration guidance, see [Legacy Airlock Architecture](airlock-legacy.md).
+
+## Configuration
+
+### Core Settings (`config.yaml`)
+
+The following settings in `config.yaml` control the airlock infrastructure at the TRE core level:
+
+```yaml
+# config.yaml
+tre_id: mytre
+
+# Set to false to remove legacy per-stage storage accounts.
+# Default: true (keeps legacy accounts for backward compatibility)
+enable_legacy_airlock: false
+```
+
+| Setting | Type | Default | Description |
+| --- | --- | --- | --- |
+| `enable_legacy_airlock` | bool | `true` | When `true`, deploys legacy per-stage storage accounts alongside the consolidated accounts for backward compatibility. When `false`, only the consolidated accounts (`stalairlock`, `stalairlockg`) are deployed. See [Legacy Airlock Architecture](airlock-legacy.md) for details. |
+
+The consolidated storage accounts (`stalairlock{tre_id}` and `stalairlockg{tre_id}`) are **always** provisioned regardless of this setting.
+
+### Workspace Settings
+
+The airlock is enabled per workspace via the following properties:
+
+| Property | Type | Default | Values | Description |
+| --- | --- | --- | --- | --- |
+| `enable_airlock` | bool | `false` | `true` / `false` | Enables or disables the airlock feature for the workspace |
+| `airlock_version` | int | `2` | `1` or `2` | `2` = Consolidated metadata-based storage (recommended), `1` = Legacy per-stage storage accounts |
+
+The `airlock_version` property only appears when `enable_airlock` is set to `true`.
+
+**Enabling airlock via the API:**
+
+```json
+PATCH /api/workspaces/{workspace_id}
+{
+ "properties": {
+ "enable_airlock": true,
+ "airlock_version": 2
+ }
+}
```
-> Data movement in an Airlock export request
+**Enabling airlock via the UI:**
-TRE:
+When creating or updating a workspace, the airlock version is available as a dropdown under the airlock configuration section.
-* `stalimex` - storage (st) airlock (al) import (im) external (ex)
-* `stalimip` - storage (st) airlock (al) import (im) in-progress (ip)
-* `stalimrej` - storage (st) airlock (al) import (im) rejected (rej)
-* `stalimblocked` - storage (st) airlock (al) import (im) blocked
-* `stalexapp` - storage (st) airlock (al) export (ex) approved (app)
+### What Happens at Each Level
-Workspace:
+```text
+config.yaml Workspace Properties
+┌─────────────────────────┐ ┌─────────────────────────────┐
+│ enable_legacy_airlock: │ │ enable_airlock: true │
+│ false → v2 infra only│ │ airlock_version: 2 → v2 TF │
+└─────────────────────────┘ └─────────────────────────────┘
+ Core Terraform Workspace Terraform
+```
-* `stalimapp` - workspace storage (st) airlock (al) import (im) approved (app)
-* `stalexint` - workspace storage (st) airlock (al) export (ex) internal (int)
-* `stalexip` - workspace storage (st) airlock (al) export (ex) in-progress (ip)
-* `stalexrej` - workspace storage (st) airlock (al) export (ex) rejected (rej)
-* `stalexblocked` - workspace storage (st) airlock (al) export (ex) blocked
+- **Core level** (`enable_legacy_airlock`): Controls whether legacy per-stage storage accounts are also deployed (for backward compatibility only)
+- **Workspace level** (`airlock_version`): Controls which workspace Terraform module runs — `airlock_v2/` for consolidated storage with ABAC
-> * The external storage accounts (`stalimex`, `stalexapp`), are not bound to any vnet and are accessible (with SAS token) via the internet
-> * The internal storage account (`stalexint`) is bound to the workspace vnet, so ONLY TRE Users/Researchers on that workspace can access it
-> * The (export) in-progress storage account (`stalexip`) is bound to the workspace vnet
-> * The (export) blocked storage account (`stalexblocked`) is bound to the workspace vnet
-> * The (export) rejected storage account (`stalexrej`) is bound to the workspace vnet
-> * The (import) in-progress storage account (`stalimip`) is bound to the TRE CORE vnet
-> * The (import) blocked storage account (`stalimblocked`) is bound to the TRE CORE vnet
-> * The (import) rejected storage account (`stalimrej`) is bound to the TRE CORE vnet
-> * The (import) approved storage account (`stalimapp`) is bound to the workspace vnet
+## Cross-Workspace Isolation
-[](../assets/airlock-networking.png)
+A common question: if all workspaces share the same storage account (`stalairlockg{tre_id}`), what prevents Workspace A from accessing Workspace B's data?
-In the TRE Core, the TRE API will provide the airlock API endpoints allowing to advance the process. The TRE API will expose the following methods:
+The answer is **three layers of isolation**:
-| Method | Endpoint | Description |
-| --- | --- | --- |
-| `POST` | `/api/workspaces/{workspace_id}/requests` | Create an Airlock request (in **Draft**) |
-| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/link` | Get the url and token to access an Airlock Request |
-| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/submit` | Submits an Airlock request |
-| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/review` | Reviews an Airlock request |
-| `POST` | `/api/workspaces/{workspace_id}/requests/{airlock_request_id}/cancel` | Cancels an Airlock request |
+### 1. ABAC Conditions (Azure Attribute-Based Access Control)
+
+Each workspace deployment creates a role assignment on the global workspace storage account with an ABAC condition that requires **all three** of the following to be true for blob operations:
+- The request must come through **that workspace's specific private endpoint**
+- The container's `workspace_id` metadata must match **that workspace's ID**
+- The container's `stage` metadata must be one of the allowed stages (`import-approved`, `export-internal`, `export-in-progress`)
+
+```text
+ABAC condition (per workspace):
+ @Environment[Microsoft.Network/privateEndpoints]
+ == '/subscriptions/.../pe-sa-airlock-ws-global-{workspace_short_id}'
+ AND
+ @Resource[...containers/metadata:workspace_id]
+ == '{workspace_id}'
+ AND
+ @Resource[...containers/metadata:stage]
+ IN ('import-approved', 'export-internal', 'export-in-progress')
+```
-Also in the airlock feature there is the **Airlock Processor** which handles the events that are created throughout the process, signalling state changes from blobs created, status changed or security scans finalized.
+This means even if Workspace A somehow obtained a SAS token referencing Workspace B's container, the ABAC condition would deny the operation because the private endpoint wouldn't match.
-## Airlock flow
+### 2. Network Isolation (Private Endpoints)
-The following sequence diagram detailing the Airlock feature and its event driven behaviour:
+Each workspace creates its own private endpoint to the global workspace storage account, connected to the workspace's VNet. The ABAC condition references this specific private endpoint ID, so requests from a different workspace's PE are rejected.
-[](../assets/airlock-swimlanes.png)
+### 3. Container Metadata
+
+The airlock processor stamps every container with `workspace_id` metadata at creation time. This metadata is immutable in practice (only the processor identity can modify it, and researcher identities have no direct access to the storage account).
+
+```mermaid
+graph TB
+ subgraph WS_A["Workspace A"]
+ pe_a["PE: pe-sa-airlock-ws-global-ab12"]
+ end
+
+ subgraph WS_B["Workspace B"]
+ pe_b["PE: pe-sa-airlock-ws-global-cd34"]
+ end
+
+ subgraph GlobalStorage["Workspace: stalairlockg"]
+ c1("req-001
workspace_id: ws-ab12
stage: import-approved")
+ c2("req-002
workspace_id: ws-cd34
stage: export-internal")
+ end
+
+ pe_a -- "ABAC: ws-ab12 + PE match" --> c1
+ pe_a -. "DENIED by ABAC" .-> c2
+ pe_b -. "DENIED by ABAC" .-> c1
+ pe_b -- "ABAC: ws-cd34 + PE match" --> c2
+
+ style WS_A fill:#2c5f9e,stroke:#1a3d6d,color:#fff
+ style WS_B fill:#8b5c00,stroke:#5c3d00,color:#fff
+ style GlobalStorage fill:#444,stroke:#333,color:#fff
+ style pe_a fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style pe_b fill:#cc7000,stroke:#995300,color:#fff
+ style c1 fill:#4a6fa5,stroke:#2c5f9e,color:#fff
+ style c2 fill:#cc7000,stroke:#995300,color:#fff
+```
+> Cross-workspace isolation. Each workspace can only access containers matching its own workspace_id, through its own private endpoint. ABAC enforces both conditions at the Azure RBAC layer.
diff --git a/e2e_tests/conftest.py b/e2e_tests/conftest.py
index 39589e169..30927ea9b 100644
--- a/e2e_tests/conftest.py
+++ b/e2e_tests/conftest.py
@@ -104,11 +104,12 @@ async def clean_up_test_workspace_service(pre_created_workspace_service_id: str,
# Session scope isn't in effect with python-xdist: https://github.com/microsoft/AzureTRE/issues/2868
@pytest.fixture(scope="session")
-async def setup_test_workspace(verify) -> Tuple[str, str, str]:
+async def setup_test_workspace(verify) -> Tuple[str, str]:
pre_created_workspace_id = config.TEST_WORKSPACE_ID
- # Set up - uses a pre created app reg as has appropriate roles assigned
+ # Set up - uses a pre created app reg as has appropriate roles assigned, or falls back to Automatic
+ auth_type = "Manual" if config.TEST_WORKSPACE_APP_ID else "Automatic"
workspace_path, workspace_id = await create_or_get_test_workspace(
- auth_type="Manual", verify=verify, pre_created_workspace_id=pre_created_workspace_id, client_id=config.TEST_WORKSPACE_APP_ID, client_secret=config.TEST_WORKSPACE_APP_SECRET)
+ auth_type=auth_type, verify=verify, pre_created_workspace_id=pre_created_workspace_id, client_id=config.TEST_WORKSPACE_APP_ID, client_secret=config.TEST_WORKSPACE_APP_SECRET)
yield workspace_path, workspace_id
diff --git a/e2e_tests/pytest.ini b/e2e_tests/pytest.ini
index 3e3cf490e..6d283c96a 100644
--- a/e2e_tests/pytest.ini
+++ b/e2e_tests/pytest.ini
@@ -7,6 +7,7 @@ markers =
performance: marks tests for performance evaluation
timeout: used to set test timeout with pytest-timeout
airlock: only airlock related
+ airlock_consolidated: consolidated airlock storage tests
workspace_services
asyncio_mode = auto
diff --git a/e2e_tests/resources/workspace.py b/e2e_tests/resources/workspace.py
index 2518ba9a0..151284efe 100644
--- a/e2e_tests/resources/workspace.py
+++ b/e2e_tests/resources/workspace.py
@@ -29,7 +29,7 @@ async def get_identifier_uri(client, workspace_id: str, auth_headers) -> str:
raise Exception("Scope Id not found in workspace properties.")
# Cope with the fact that scope id can have api:// at the front.
- return f"api://{workspace['properties']['scope_id'].replace('api://','')}"
+ return f"api://{workspace['properties']['scope_id'].replace('api://', '')}"
async def get_workspace_auth_details(admin_token, workspace_id, verify) -> Tuple[str, str]:
diff --git a/e2e_tests/test_airlock.py b/e2e_tests/test_airlock.py
index 051a5c9d8..85203589a 100644
--- a/e2e_tests/test_airlock.py
+++ b/e2e_tests/test_airlock.py
@@ -1,11 +1,7 @@
-import os
import pytest
import asyncio
import logging
-from azure.core.exceptions import ResourceNotFoundError
-from azure.storage.blob import ContainerClient
-
from airlock.request import post_request, get_request, upload_blob_using_sas, wait_for_status
from resources.resource import get_resource, post_resource
from resources.workspace import get_workspace_auth_details
@@ -156,70 +152,4 @@ async def test_airlock_review_vm_flow(setup_test_workspace, setup_test_airlock_i
LOGGER.info("Review VM has started deletion successfully")
# EXPORT FLOW
- # We can't test teh export flow as we can't fully create an export request without special networking setup
-
-
-@pytest.mark.airlock
-@pytest.mark.extended
-@pytest.mark.timeout(35 * 60)
-async def test_airlock_flow(setup_test_workspace, verify) -> None:
- # 1. Get the workspace set up
- workspace_path, workspace_id = setup_test_workspace
- workspace_owner_token = await get_workspace_owner_token(workspace_id, verify)
-
- # 2. create and submit airlock request
- request_id, container_url = await submit_airlock_import_request(workspace_path, workspace_owner_token, verify)
-
- # 3. approve request
- LOGGER.info("Approving airlock request")
- payload = {
- "approval": "True",
- "decisionExplanation": "the reason why this request was approved/rejected"
- }
- request_result = await post_request(payload, f'/api{workspace_path}/requests/{request_id}/review', workspace_owner_token, verify, 200)
- assert request_result["airlockRequest"]["reviews"][0]["decisionExplanation"] == "the reason why this request was approved/rejected"
-
- await wait_for_status(airlock_strings.APPROVED_STATUS, workspace_owner_token, workspace_path, request_id, verify)
-
- # 4. check the file has been deleted from the source
- # NOTE: We should really be checking that the file is deleted from in progress location too,
- # but doing that will require setting up network access to in-progress storage account
- try:
- container_client = ContainerClient.from_container_url(container_url=container_url)
- # We expect the container to eventually be deleted too, but sometimes this async operation takes some time.
- # Checking that at least there are no blobs within the container
- for _ in container_client.list_blobs():
- container_url_without_sas = container_url.split("?")[0]
- assert False, f"The source blob in container {container_url_without_sas} should be deleted"
- except ResourceNotFoundError:
- # Expecting this exception
- pass
-
- # 5. get a link to the blob in the approved location.
- # For a full E2E we should try to download it, but can't without special networking setup.
- # So at the very least we check that we get the link for it.
- request_result = await get_request(f'/api{workspace_path}/requests/{request_id}/link', workspace_owner_token, verify, 200)
- container_url = request_result["containerUrl"]
-
- # 6. create airlock export request
- LOGGER.info("Creating airlock export request")
- justification = "another business justification"
- payload = {
- "type": airlock_strings.EXPORT,
- "businessJustification": justification
- }
-
- request_result = await post_request(payload, f'/api{workspace_path}/requests', workspace_owner_token, verify, 201)
-
- assert request_result["airlockRequest"]["type"] == airlock_strings.EXPORT
- assert request_result["airlockRequest"]["businessJustification"] == justification
- assert request_result["airlockRequest"]["status"] == airlock_strings.DRAFT_STATUS
-
- request_id = request_result["airlockRequest"]["id"]
-
- # 7. get container link
- LOGGER.info("Getting airlock request container URL")
- request_result = await get_request(f'/api{workspace_path}/requests/{request_id}/link', workspace_owner_token, verify, 200)
- container_url = request_result["containerUrl"]
- # we can't test any more the export flow since we don't have the network
- # access to upload the file from within the workspace.
+ # We can't test the export flow as we can't fully create an export request without special networking setup
diff --git a/e2e_tests/test_airlock_consolidated.py b/e2e_tests/test_airlock_consolidated.py
new file mode 100644
index 000000000..287ec0449
--- /dev/null
+++ b/e2e_tests/test_airlock_consolidated.py
@@ -0,0 +1,193 @@
+"""
+E2E tests for v2 consolidated airlock storage.
+
+These tests verify the full airlock lifecycle using consolidated storage
+(metadata-based stage management with ABAC workspace_id filtering).
+The workspace defaults to airlock_version=2.
+
+Tests that can run from a CI runner outside the workspace VNet:
+- Import: draft -> upload (core storage, public) -> submit -> in_review -> approve/reject
+- Export: draft creation and storage account verification (can't upload - workspace storage is private)
+"""
+import re
+import pytest
+import asyncio
+import logging
+
+from airlock.request import post_request, get_request, upload_blob_using_sas, wait_for_status
+from airlock import strings as airlock_strings
+from e2e_tests.conftest import get_workspace_owner_token
+
+
+pytestmark = pytest.mark.asyncio(loop_scope="session")
+LOGGER = logging.getLogger(__name__)
+BLOB_FILE_PATH = "./test_airlock_sample.txt"
+
+
+async def create_and_submit_import(workspace_path, workspace_owner_token, verify):
+ """Helper: create import draft, upload a file, submit, wait for in_review."""
+ payload = {
+ "type": airlock_strings.IMPORT,
+ "businessJustification": "E2E test import"
+ }
+ result = await post_request(payload, f'/api{workspace_path}/requests', workspace_owner_token, verify, 201)
+ request_id = result["airlockRequest"]["id"]
+ assert result["airlockRequest"]["status"] == airlock_strings.DRAFT_STATUS
+
+ # Get container URL - should be core storage (stalairlock, not stalairlockg)
+ link_result = await get_request(
+ f'/api{workspace_path}/requests/{request_id}/link',
+ workspace_owner_token, verify, 200
+ )
+ container_url = link_result["containerUrl"]
+ assert "stalairlock" in container_url and "stalairlockg" not in container_url, \
+ f"Import draft should use core storage, got: {container_url}"
+
+ # Upload blob (core storage allows public access for import-external)
+ blob_uploaded = False
+ for attempt in range(5):
+ try:
+ await asyncio.sleep(5)
+ upload_response = await upload_blob_using_sas(BLOB_FILE_PATH, container_url)
+ if "etag" in upload_response:
+ blob_uploaded = True
+ break
+ except Exception:
+ LOGGER.info(f"Upload attempt {attempt + 1} failed, retrying...")
+ await asyncio.sleep(10)
+ assert blob_uploaded, "Failed to upload blob after retries"
+
+ # Submit
+ result = await post_request(None, f'/api{workspace_path}/requests/{request_id}/submit', workspace_owner_token, verify, 200)
+ assert result["airlockRequest"]["status"] == airlock_strings.SUBMITTED_STATUS
+
+ await wait_for_status(airlock_strings.IN_REVIEW_STATUS, workspace_owner_token, workspace_path, request_id, verify)
+
+ return request_id, container_url
+
+
+@pytest.mark.timeout(35 * 60)
+@pytest.mark.airlock
+async def test_v2_import_approve_flow(setup_test_workspace, verify):
+ """Full v2 import lifecycle: draft -> upload -> submit -> in_review -> approve -> approved."""
+ workspace_path, workspace_id = setup_test_workspace
+ workspace_owner_token = await get_workspace_owner_token(workspace_id, verify)
+
+ request_id, container_url = await create_and_submit_import(workspace_path, workspace_owner_token, verify)
+ LOGGER.info(f"Import request {request_id} is in_review")
+
+ # Approve
+ payload = {
+ "approval": "True",
+ "decisionExplanation": "Approved for E2E test"
+ }
+ result = await post_request(payload, f'/api{workspace_path}/requests/{request_id}/review', workspace_owner_token, verify, 200)
+ assert result["airlockRequest"]["reviews"][0]["decisionExplanation"] == "Approved for E2E test"
+
+ await wait_for_status(airlock_strings.APPROVED_STATUS, workspace_owner_token, workspace_path, request_id, verify)
+ LOGGER.info(f"Import request {request_id} approved")
+
+ # Verify the container name is consistent (same request_id container throughout)
+ def extract_container_name(url):
+ m = re.match(r'https://[^/]+/([^?]+)', url)
+ return m.group(1) if m else None
+
+ assert extract_container_name(container_url) == request_id, \
+ f"Container name should be request_id {request_id}"
+
+
+@pytest.mark.timeout(35 * 60)
+@pytest.mark.airlock
+async def test_v2_import_reject_flow(setup_test_workspace, verify):
+ """V2 import rejection: draft -> upload -> submit -> in_review -> reject -> rejected."""
+ workspace_path, workspace_id = setup_test_workspace
+ workspace_owner_token = await get_workspace_owner_token(workspace_id, verify)
+
+ request_id, _ = await create_and_submit_import(workspace_path, workspace_owner_token, verify)
+ LOGGER.info(f"Import request {request_id} is in_review, rejecting")
+
+ # Reject
+ payload = {
+ "approval": "False",
+ "decisionExplanation": "Rejected for E2E test"
+ }
+ result = await post_request(payload, f'/api{workspace_path}/requests/{request_id}/review', workspace_owner_token, verify, 200)
+ assert result["airlockRequest"]["reviews"][0]["decisionExplanation"] == "Rejected for E2E test"
+
+ await wait_for_status(airlock_strings.REJECTED_STATUS, workspace_owner_token, workspace_path, request_id, verify)
+ LOGGER.info(f"Import request {request_id} rejected")
+
+
+@pytest.mark.timeout(10 * 60)
+@pytest.mark.airlock
+async def test_v2_import_cancel(setup_test_workspace, verify):
+ """V2 import cancellation from draft state."""
+ workspace_path, workspace_id = setup_test_workspace
+ workspace_owner_token = await get_workspace_owner_token(workspace_id, verify)
+
+ # Create draft
+ payload = {
+ "type": airlock_strings.IMPORT,
+ "businessJustification": "E2E cancel test"
+ }
+ result = await post_request(payload, f'/api{workspace_path}/requests', workspace_owner_token, verify, 201)
+ request_id = result["airlockRequest"]["id"]
+ assert result["airlockRequest"]["status"] == airlock_strings.DRAFT_STATUS
+
+ # Wait for container to be created
+ await asyncio.sleep(10)
+
+ # Cancel
+ result = await post_request(None, f'/api{workspace_path}/requests/{request_id}/cancel', workspace_owner_token, verify, 200)
+ assert result["airlockRequest"]["status"] == airlock_strings.CANCELLED_STATUS
+ LOGGER.info(f"Import request {request_id} cancelled from draft")
+
+
+@pytest.mark.timeout(10 * 60)
+@pytest.mark.airlock
+async def test_v2_export_uses_workspace_storage(setup_test_workspace, verify):
+ """V2 export draft should use global workspace storage (stalairlockg)."""
+ workspace_path, workspace_id = setup_test_workspace
+ workspace_owner_token = await get_workspace_owner_token(workspace_id, verify)
+
+ payload = {
+ "type": airlock_strings.EXPORT,
+ "businessJustification": "E2E export storage test"
+ }
+ result = await post_request(payload, f'/api{workspace_path}/requests', workspace_owner_token, verify, 201)
+ request_id = result["airlockRequest"]["id"]
+
+ link_result = await get_request(
+ f'/api{workspace_path}/requests/{request_id}/link',
+ workspace_owner_token, verify, 200
+ )
+ container_url = link_result["containerUrl"]
+
+ assert "stalairlockg" in container_url, \
+ f"Export draft should use global workspace storage (stalairlockg), got: {container_url}"
+ LOGGER.info(f"Export request uses correct storage: {container_url}")
+
+
+@pytest.mark.timeout(10 * 60)
+@pytest.mark.airlock
+async def test_v2_import_uses_core_storage(setup_test_workspace, verify):
+ """V2 import draft should use core storage (stalairlock, not stalairlockg)."""
+ workspace_path, workspace_id = setup_test_workspace
+ workspace_owner_token = await get_workspace_owner_token(workspace_id, verify)
+
+ payload = {
+ "type": airlock_strings.IMPORT,
+ "businessJustification": "E2E import storage test"
+ }
+ result = await post_request(payload, f'/api{workspace_path}/requests', workspace_owner_token, verify, 201)
+ request_id = result["airlockRequest"]["id"]
+
+ link_result = await get_request(
+ f'/api{workspace_path}/requests/{request_id}/link',
+ workspace_owner_token, verify, 200
+ )
+ container_url = link_result["containerUrl"]
+
+ assert "stalairlock" in container_url and "stalairlockg" not in container_url, \
+ f"Import draft should use core storage (stalairlock), got: {container_url}"
+ LOGGER.info(f"Import request uses correct storage: {container_url}")
diff --git a/mkdocs.yml b/mkdocs.yml
index f99e74c13..a9a867919 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -35,7 +35,7 @@ markdown_extensions:
custom_fences:
- name: mermaid
class: mermaid
- format: !!python/name:pymdownx.superfences.fence_code_format
+ format: !!python/name:pymdownx.superfences.fence_code_format ""
- meta
- admonition
- pymdownx.highlight
@@ -44,25 +44,29 @@ markdown_extensions:
- pymdownx.tabbed
- pymdownx.tasklist
- pymdownx.emoji:
- emoji_index: !!python/name:material.extensions.emoji.twemoji
- emoji_generator: !!python/name:material.extensions.emoji.to_svg
+ emoji_index: !!python/name:material.extensions.emoji.twemoji ""
+ emoji_generator: !!python/name:material.extensions.emoji.to_svg ""
- attr_list
nav:
- - Overview: # Pages to explain what the Azure TRE
+ - Overview:
+ # Pages to explain what the Azure TRE
- Introducing the AzureTRE: index.md # Introduction to the concept of a TRE, and the AzureTRE
- User Roles: azure-tre-overview/user-roles.md # The users and roles within an Azure TRE
- - Architecture: # Pages to help understand the components, infra, and networking
+ - Architecture:
+ # Pages to help understand the components, infra, and networking
- System Architecture: azure-tre-overview/architecture.md
- Network Architecture: azure-tre-overview/networking.md
- Azure Resources: azure-tre-overview/tre-resources-breakdown.md
- Airlock: azure-tre-overview/airlock.md
+ - Airlock (Legacy): azure-tre-overview/airlock-legacy.md
- Cost Reporting: azure-tre-overview/cost-reporting.md
- Terms and Definitions: using-tre/terms-definitions.md
- Case Studies: azure-tre-overview/case-studies.md
# - Compliance: azure-tre-overview/compliance-info.md
- - QuickStart: # Setup steps for anyone performing an initial deployment of the AzureTRE for eval purposes
+ - QuickStart:
+ # Setup steps for anyone performing an initial deployment of the AzureTRE for eval purposes
- tre-admins/setup-instructions/index.md
- 1. Prerequisites: tre-admins/setup-instructions/prerequisites.md
- 2. Deployment Repository: tre-admins/setup-instructions/deployment-repo.md
@@ -79,7 +83,8 @@ nav:
- 7. Install Base Workspace: tre-admins/setup-instructions/ui-install-base-workspace.md
- 8. Install Workspace Service and User Resource: tre-admins/setup-instructions/ui-install-ws-and-ur.md
- - Using the Azure TRE: # Documentation for users of the TRE
+ - Using the Azure TRE:
+ # Documentation for users of the TRE
- Introduction: using-tre/index.md
- Custom Templates: using-tre/templates/index.md
- Using AzureTRE for Research:
@@ -88,7 +93,8 @@ nav:
- Importing/exporting data with Airlock: using-tre/tre-for-research/importing-exporting-data-airlock.md
- Reviewing Airlock Requests: using-tre/tre-for-research/review-airlock-request.md
- - Templates and Services: # Docs to highlight and illustrate workspaces, workspace services etc
+ - Templates and Services:
+ # Docs to highlight and illustrate workspaces, workspace services etc
- Workspaces:
- Base: tre-templates/workspaces/base.md
- Unrestricted: tre-templates/workspaces/unrestricted.md
@@ -114,8 +120,10 @@ nav:
- Import Review VM: tre-templates/user-resources/import-reviewvm.md
- Export Review VM: tre-templates/user-resources/export-reviewvm.md
- - Technical Guide: # All Technical Documentation (Admin, Development)
- - Administration: # Docs related to the deployment and operation of AzureTRE infrastructure
+ - Technical Guide:
+ # All Technical Documentation (Admin, Development)
+ - Administration:
+ # Docs related to the deployment and operation of AzureTRE infrastructure
- Starting and Stopping Azure TRE Services: tre-admins/start-stop.md
- Environment Variables: tre-admins/environment-variables.md
- Tear-down: tre-admins/tear-down.md
@@ -142,9 +150,11 @@ nav:
- Firewall Force Tunneling: tre-admins/configure-firewall-force-tunneling.md
- DNS Security Policy: tre-admins/dns-security-policy.md
- - Development: # Docs related to the developing code for the AzureTRE
+ - Development:
+ # Docs related to the developing code for the AzureTRE
- Local Development: using-tre/local-development/local-development.md
- - Contributing to AzureTRE: # Docs aimed at OSS developers, committing code to the AzureTRE repo
+ - Contributing to AzureTRE:
+ # Docs aimed at OSS developers, committing code to the AzureTRE repo
- Introduction: tre-developers/index.md
- API:
- API Overview: tre-developers/api.md
@@ -158,7 +168,8 @@ nav:
- GitHub Actions: tre-admins/setup-instructions/workflows.md
- GitHub PR Bot Commands: tre-developers/github-pr-bot-commands.md
- - Developing Workspace Templates: # Docs aimed at developers creating workspace templates
+ - Developing Workspace Templates:
+ # Docs aimed at developers creating workspace templates
- Authoring Workspace Templates: tre-workspace-authors/authoring-workspace-templates.md
- Firewall Rules: tre-workspace-authors/firewall-rules.md
- Pipeline Templates:
@@ -167,7 +178,8 @@ nav:
- AzureTRE CLI: tre-developers/CLI.md
- - Troubleshooting FAQ: # General Troubleshooting Section for Development
+ - Troubleshooting FAQ:
+ # General Troubleshooting Section for Development
- troubleshooting-faq/index.md
- Enabling DEBUG logs: troubleshooting-faq/debug-logs.md
- API logs using deployment center: troubleshooting-faq/api-logs-deployment-center.md
diff --git a/templates/workspaces/airlock-import-review/porter.yaml b/templates/workspaces/airlock-import-review/porter.yaml
index 25fd654f5..4cc894b0f 100644
--- a/templates/workspaces/airlock-import-review/porter.yaml
+++ b/templates/workspaces/airlock-import-review/porter.yaml
@@ -1,7 +1,7 @@
---
schemaVersion: 1.0.0
name: tre-workspace-airlock-import-review
-version: 0.14.9
+version: 1.5.0
description: "A workspace to do Airlock Data Import Reviews for Azure TRE"
dockerfile: Dockerfile.tmpl
registry: azuretre
diff --git a/templates/workspaces/airlock-import-review/terraform/import_review_resources.terraform b/templates/workspaces/airlock-import-review/terraform/import_review_resources.terraform
index 3fbfb6e2b..6a9c1fde9 100644
--- a/templates/workspaces/airlock-import-review/terraform/import_review_resources.terraform
+++ b/templates/workspaces/airlock-import-review/terraform/import_review_resources.terraform
@@ -2,9 +2,8 @@
# The Dockerfile includes a RUN command to change the extension from .terraform to .tf after the files from the base workspace are copied to this directory.
locals {
- core_resource_group_name = "rg-${var.tre_id}"
- # STorage AirLock IMport InProgress
- import_in_progress_storage_name = lower(replace("stalimip${var.tre_id}", "-", ""))
+ core_resource_group_name = "rg-${var.tre_id}"
+ airlock_core_storage_name = lower(replace("stalairlock${var.tre_id}", "-", ""))
}
module "terraform_azurerm_environment_configuration" {
@@ -12,14 +11,14 @@ module "terraform_azurerm_environment_configuration" {
arm_environment = var.arm_environment
}
-data "azurerm_storage_account" "sa_import_inprogress" {
+data "azurerm_storage_account" "sa_airlock_core" {
provider = azurerm.core
- name = local.import_in_progress_storage_name
+ name = local.airlock_core_storage_name
resource_group_name = local.core_resource_group_name
}
-resource "azurerm_private_endpoint" "sa_import_inprogress_pe" {
- name = "stg-ip-import-blob-${local.workspace_resource_name_suffix}"
+resource "azurerm_private_endpoint" "sa_airlock_core_pe" {
+ name = "pe-airlock-import-review-${local.workspace_resource_name_suffix}"
location = var.location
resource_group_name = azurerm_resource_group.ws.name
subnet_id = module.network.services_subnet_id
@@ -27,8 +26,8 @@ resource "azurerm_private_endpoint" "sa_import_inprogress_pe" {
lifecycle { ignore_changes = [tags] }
private_service_connection {
- name = "psc-stg-ip-import-blob-${local.workspace_resource_name_suffix}"
- private_connection_resource_id = data.azurerm_storage_account.sa_import_inprogress.id
+ name = "psc-airlock-import-review-${local.workspace_resource_name_suffix}"
+ private_connection_resource_id = data.azurerm_storage_account.sa_airlock_core.id
is_manual_connection = false
subresource_names = ["Blob"]
}
@@ -36,33 +35,69 @@ resource "azurerm_private_endpoint" "sa_import_inprogress_pe" {
tags = local.tre_workspace_tags
}
-resource "azurerm_private_dns_zone" "stg_import_inprogress_blob" {
- name = "${data.azurerm_storage_account.sa_import_inprogress.name}.${module.terraform_azurerm_environment_configuration.private_links["privatelink.blob.core.windows.net"]}"
+resource "azurerm_private_dns_zone" "stg_airlock_core_blob" {
+ name = "${data.azurerm_storage_account.sa_airlock_core.name}.${module.terraform_azurerm_environment_configuration.private_links["privatelink.blob.core.windows.net"]}"
resource_group_name = azurerm_resource_group.ws.name
tags = local.tre_workspace_tags
- depends_on = [azurerm_private_endpoint.sa_import_inprogress_pe]
+ depends_on = [azurerm_private_endpoint.sa_airlock_core_pe]
}
-resource "azurerm_private_dns_a_record" "stg_import_inprogress_blob" {
+resource "azurerm_private_dns_a_record" "stg_airlock_core_blob" {
name = "@" # Root record
- zone_name = azurerm_private_dns_zone.stg_import_inprogress_blob.name
+ zone_name = azurerm_private_dns_zone.stg_airlock_core_blob.name
resource_group_name = azurerm_resource_group.ws.name
ttl = 300
- records = [azurerm_private_endpoint.sa_import_inprogress_pe.private_service_connection[0].private_ip_address]
+ records = [azurerm_private_endpoint.sa_airlock_core_pe.private_service_connection[0].private_ip_address]
tags = local.tre_workspace_tags
}
-resource "azurerm_private_dns_zone_virtual_network_link" "stg_import_inprogress_blob" {
- name = "vnl-stg-ip-import-blob-${local.workspace_resource_name_suffix}"
+resource "azurerm_private_dns_zone_virtual_network_link" "stg_airlock_core_blob" {
+ name = "vnl-airlock-import-review-${local.workspace_resource_name_suffix}"
resource_group_name = azurerm_resource_group.ws.name
- private_dns_zone_name = azurerm_private_dns_zone.stg_import_inprogress_blob.name
+ private_dns_zone_name = azurerm_private_dns_zone.stg_airlock_core_blob.name
virtual_network_id = module.network.vnet_id
tags = local.tre_workspace_tags
- depends_on = [azurerm_private_dns_a_record.stg_import_inprogress_blob]
+ depends_on = [azurerm_private_dns_a_record.stg_airlock_core_blob]
}
+
+# Per-workspace managed identity for accessing import-in-progress blobs
+# Each workspace needs its own identity so that role assignments don't conflict
+resource "azurerm_user_assigned_identity" "import_review_id" {
+ name = "id-airlock-import-review-${local.workspace_resource_name_suffix}"
+ location = var.location
+ resource_group_name = azurerm_resource_group.ws.name
+
+ tags = local.tre_workspace_tags
+
+ lifecycle { ignore_changes = [tags] }
+}
+
+resource "azurerm_role_assignment" "review_workspace_import_access" {
+ scope = data.azurerm_storage_account.sa_airlock_core.id
+ role_definition_name = "Storage Blob Data Reader"
+ principal_id = azurerm_user_assigned_identity.import_review_id.principal_id
+
+ condition_version = "2.0"
+ condition = <<-EOT
+ (
+ !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read'})
+ OR
+ (
+ @Environment[Microsoft.Network/privateEndpoints] StringEqualsIgnoreCase
+ '${azurerm_private_endpoint.sa_airlock_core_pe.id}'
+ AND
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:stage]
+ StringEquals 'import-in-progress'
+ )
+ )
+ EOT
+
+ depends_on = [azurerm_private_endpoint.sa_airlock_core_pe]
+}
+
diff --git a/templates/workspaces/base/porter.yaml b/templates/workspaces/base/porter.yaml
index 368df5402..b0fe16ba0 100644
--- a/templates/workspaces/base/porter.yaml
+++ b/templates/workspaces/base/porter.yaml
@@ -1,7 +1,7 @@
---
schemaVersion: 1.0.0
name: tre-workspace-base
-version: 2.8.3
+version: 3.9.0
description: "A base Azure TRE workspace"
dockerfile: Dockerfile.tmpl
registry: azuretre
@@ -71,24 +71,24 @@ parameters:
- name: create_aad_groups
type: boolean
default: true
- description: "Whether this bundle should create AAD groups for the workspace app roles (required for User Management)"
+ description: "Whether this bundle should create AAD groups for the workspace app
+ roles (required for User Management)"
- name: core_api_client_id
type: string
description: "The client id of the core API"
- name: workspace_owner_object_id
type: string
- description: "The object id of the user that will be granted WorkspaceOwner after it is created."
+ description: "The object id of the user that will be granted WorkspaceOwner
+ after it is created."
- name: client_id
type: string
default: ""
- description:
- "The client id of the workspace in the identity provider. This value is typically provided to you
- when you create the ws application"
+ description: "The client id of the workspace in the identity provider. This
+ value is typically provided to you when you create the ws application"
- name: client_secret
type: string
- description:
- "The client secret of the workspace in the identity provider. This value is typically provided to you
- when you create the ws application"
+ description: "The client secret of the workspace in the identity provider. This
+ value is typically provided to you when you create the ws application"
default: ""
- name: ui_client_id
type: string
@@ -109,7 +109,8 @@ parameters:
- name: app_role_id_workspace_researcher
type: string
default: ""
- description: "The id of the application role WorkspaceResearcher in the identity provider"
+ description: "The id of the application role WorkspaceResearcher in the identity
+ provider"
- name: app_role_id_workspace_airlock_manager
type: string
default: ""
@@ -125,6 +126,11 @@ parameters:
- name: enable_airlock
type: boolean
default: true
+ - name: airlock_version
+ type: integer
+ default: 2
+ description: "Airlock storage version: 1 = legacy per-stage storage accounts, 2
+ = consolidated metadata-based storage"
- name: arm_environment
type: string
- name: enable_cmk_encryption
@@ -136,7 +142,8 @@ parameters:
- name: storage_account_redundancy
type: string
default: "GRS"
- description: "The redundancy option for the storage account in the workspace: GRS (Geo-Redundant Storage) or ZRS (Zone-Redundant Storage)."
+ description: "The redundancy option for the storage account in the workspace:
+ GRS (Geo-Redundant Storage) or ZRS (Zone-Redundant Storage)."
- name: enable_backup
type: boolean
default: true
@@ -148,7 +155,8 @@ parameters:
- name: auto_grant_workspace_consent
type: boolean
default: true
- description: "Setting this to `true` will prevent the need for users to manually grant consent to new workspaces"
+ description: "Setting this to `true` will prevent the need for users to manually
+ grant consent to new workspaces"
- name: enable_airlock_malware_scanning
type: boolean
default: false
@@ -261,6 +269,7 @@ install:
aad_redirect_uris_b64: ${ bundle.parameters.aad_redirect_uris }
app_service_plan_sku: ${ bundle.parameters.app_service_plan_sku }
enable_airlock: ${ bundle.parameters.enable_airlock }
+ airlock_version: ${ bundle.parameters.airlock_version }
arm_environment: ${ bundle.parameters.arm_environment }
enable_cmk_encryption: ${ bundle.parameters.enable_cmk_encryption }
key_store_id: ${ bundle.parameters.key_store_id }
@@ -321,6 +330,7 @@ upgrade:
aad_redirect_uris_b64: ${ bundle.parameters.aad_redirect_uris }
app_service_plan_sku: ${ bundle.parameters.app_service_plan_sku }
enable_airlock: ${ bundle.parameters.enable_airlock }
+ airlock_version: ${ bundle.parameters.airlock_version }
arm_environment: ${ bundle.parameters.arm_environment }
enable_cmk_encryption: ${ bundle.parameters.enable_cmk_encryption }
key_store_id: ${ bundle.parameters.key_store_id }
@@ -404,6 +414,7 @@ uninstall:
aad_redirect_uris_b64: ${ bundle.parameters.aad_redirect_uris }
app_service_plan_sku: ${ bundle.parameters.app_service_plan_sku }
enable_airlock: ${ bundle.parameters.enable_airlock }
+ airlock_version: ${ bundle.parameters.airlock_version }
arm_environment: ${ bundle.parameters.arm_environment }
enable_cmk_encryption: ${ bundle.parameters.enable_cmk_encryption }
key_store_id: ${ bundle.parameters.key_store_id }
diff --git a/templates/workspaces/base/template_schema.json b/templates/workspaces/base/template_schema.json
index c69024b8e..8ec376622 100644
--- a/templates/workspaces/base/template_schema.json
+++ b/templates/workspaces/base/template_schema.json
@@ -111,6 +111,17 @@
},
"then": {
"properties": {
+ "airlock_version": {
+ "type": "integer",
+ "title": "Airlock Version",
+ "description": "Airlock storage version: 1 = legacy per-stage storage accounts, 2 = consolidated metadata-based storage",
+ "default": 2,
+ "enum": [
+ 1,
+ 2
+ ],
+ "updateable": true
+ },
"configure_review_vms": {
"type": "boolean",
"title": "Configure Review VMs",
@@ -357,4 +368,4 @@
"*"
]
}
-}
\ No newline at end of file
+}
diff --git a/templates/workspaces/base/terraform/airlock_v2/data.tf b/templates/workspaces/base/terraform/airlock_v2/data.tf
new file mode 100644
index 000000000..cb426f7a9
--- /dev/null
+++ b/templates/workspaces/base/terraform/airlock_v2/data.tf
@@ -0,0 +1,11 @@
+data "azurerm_user_assigned_identity" "api_id" {
+ provider = azurerm.core
+ name = "id-api-${var.tre_id}"
+ resource_group_name = "rg-${var.tre_id}"
+}
+
+data "azurerm_private_dns_zone" "blobcore" {
+ provider = azurerm.core
+ name = module.terraform_azurerm_environment_configuration.private_links["privatelink.blob.core.windows.net"]
+ resource_group_name = local.core_resource_group_name
+}
diff --git a/templates/workspaces/base/terraform/airlock_v2/locals.tf b/templates/workspaces/base/terraform/airlock_v2/locals.tf
new file mode 100644
index 000000000..de03e8b90
--- /dev/null
+++ b/templates/workspaces/base/terraform/airlock_v2/locals.tf
@@ -0,0 +1,6 @@
+locals {
+ core_resource_group_name = "rg-${var.tre_id}"
+
+ # Global workspace airlock storage account name (in core) - shared by all workspaces
+ airlock_workspace_global_storage_name = lower(replace("stalairlockg${var.tre_id}", "-", ""))
+}
diff --git a/templates/workspaces/base/terraform/airlock_v2/providers.tf b/templates/workspaces/base/terraform/airlock_v2/providers.tf
new file mode 100644
index 000000000..aa395ac8d
--- /dev/null
+++ b/templates/workspaces/base/terraform/airlock_v2/providers.tf
@@ -0,0 +1,19 @@
+terraform {
+ # In modules we should only specify the min version
+ required_providers {
+ azurerm = {
+ source = "hashicorp/azurerm"
+ version = ">= 4.27.0"
+ configuration_aliases = [
+ azurerm,
+ azurerm.core
+ ]
+ }
+ }
+}
+
+
+module "terraform_azurerm_environment_configuration" {
+ source = "git::https://github.com/microsoft/terraform-azurerm-environment-configuration.git?ref=0.2.0"
+ arm_environment = var.arm_environment
+}
diff --git a/templates/workspaces/base/terraform/airlock_v2/storage_accounts.tf b/templates/workspaces/base/terraform/airlock_v2/storage_accounts.tf
new file mode 100644
index 000000000..2c1deb820
--- /dev/null
+++ b/templates/workspaces/base/terraform/airlock_v2/storage_accounts.tf
@@ -0,0 +1,74 @@
+# Global Workspace Storage with workspace_id ABAC
+# This file replaces storage_accounts.tf to use the global workspace storage account
+# created in core infrastructure instead of creating a per-workspace account
+
+# Data source to reference the global workspace storage account
+data "azurerm_storage_account" "sa_airlock_workspace_global" {
+ name = local.airlock_workspace_global_storage_name
+ resource_group_name = local.core_resource_group_name
+}
+
+# Private Endpoint for this workspace to access the global storage account
+# Each workspace needs its own PE for network isolation
+# ABAC will restrict this PE to only access containers with matching workspace_id
+resource "azurerm_private_endpoint" "airlock_workspace_pe" {
+ name = "pe-sa-airlock-ws-global-${var.short_workspace_id}"
+ location = var.location
+ resource_group_name = var.ws_resource_group_name
+ subnet_id = var.services_subnet_id
+ tags = var.tre_workspace_tags
+
+ lifecycle { ignore_changes = [tags] }
+
+ private_dns_zone_group {
+ name = "private-dns-zone-group-sa-airlock-ws-global"
+ private_dns_zone_ids = [data.azurerm_private_dns_zone.blobcore.id]
+ }
+
+ private_service_connection {
+ name = "psc-sa-airlock-ws-global-${var.short_workspace_id}"
+ private_connection_resource_id = data.azurerm_storage_account.sa_airlock_workspace_global.id
+ is_manual_connection = false
+ subresource_names = ["Blob"]
+ }
+}
+
+resource "azurerm_role_assignment" "api_workspace_global_blob_data_contributor" {
+ # Use a deterministic name per workspace to avoid conflicts when multiple
+ # workspaces assign the same role on the same global storage account.
+ name = uuidv5("url", "${data.azurerm_storage_account.sa_airlock_workspace_global.id}-${var.workspace_id}-blob-data-contributor")
+ scope = data.azurerm_storage_account.sa_airlock_workspace_global.id
+ role_definition_name = "Storage Blob Data Contributor"
+ principal_id = data.azurerm_user_assigned_identity.api_id.principal_id
+
+ condition_version = "2.0"
+ condition = <<-EOT
+ (
+ (
+ !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read'})
+ AND !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write'})
+ AND !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/add/action'})
+ AND !(ActionMatches{'Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete'})
+ )
+ OR
+ (
+ @Environment[Microsoft.Network/privateEndpoints] StringEqualsIgnoreCase
+ '${azurerm_private_endpoint.airlock_workspace_pe.id}'
+ AND
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:workspace_id]
+ StringEquals '${var.workspace_id}'
+ AND
+ (
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:stage]
+ StringEquals 'import-approved'
+ OR
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:stage]
+ StringEquals 'export-internal'
+ OR
+ @Resource[Microsoft.Storage/storageAccounts/blobServices/containers/metadata:stage]
+ StringEquals 'export-in-progress'
+ )
+ )
+ )
+ EOT
+}
diff --git a/templates/workspaces/base/terraform/airlock_v2/variables.tf b/templates/workspaces/base/terraform/airlock_v2/variables.tf
new file mode 100644
index 000000000..b4af38033
--- /dev/null
+++ b/templates/workspaces/base/terraform/airlock_v2/variables.tf
@@ -0,0 +1,25 @@
+variable "location" {
+ type = string
+}
+variable "tre_id" {
+ type = string
+}
+variable "ws_resource_group_name" {
+ type = string
+}
+variable "services_subnet_id" {
+ type = string
+}
+variable "short_workspace_id" {
+ type = string
+}
+variable "tre_workspace_tags" {
+ type = map(string)
+}
+variable "arm_environment" {
+ type = string
+}
+variable "workspace_id" {
+ type = string
+ description = "The workspace ID used for ABAC conditions on global workspace storage"
+}
diff --git a/templates/workspaces/base/terraform/variables.tf b/templates/workspaces/base/terraform/variables.tf
index b475c0135..3d482bdc4 100644
--- a/templates/workspaces/base/terraform/variables.tf
+++ b/templates/workspaces/base/terraform/variables.tf
@@ -69,6 +69,12 @@ variable "enable_airlock" {
description = "Controls the deployment of Airlock resources in the workspace."
}
+variable "airlock_version" {
+ type = number
+ default = 2
+ description = "Airlock storage version: 1 = legacy per-stage storage accounts, 2 = consolidated metadata-based storage."
+}
+
variable "aad_redirect_uris_b64" {
type = string # B64 encoded list of objects like [{"name": "my uri 1", "value": "https://..."}, {}]
default = "W10=" #b64 for []
@@ -172,14 +178,16 @@ variable "enable_dns_policy" {
default = false
}
+# tflint-ignore: terraform_unused_declarations
variable "enable_airlock_malware_scanning" {
type = bool
default = false
- description = "Enable Airlock malware scanning for the workspace"
+ description = "Enable Airlock malware scanning for the workspace. Passed by porter bundle but no longer used in workspace terraform after airlock consolidation."
}
+# tflint-ignore: terraform_unused_declarations
variable "airlock_malware_scan_result_topic_name" {
type = string
- description = "The name of the topic to publish scan results to"
+ description = "The name of the topic to publish scan results to. Passed by porter bundle but no longer used in workspace terraform after airlock consolidation."
default = null
}
diff --git a/templates/workspaces/base/terraform/workspace.tf b/templates/workspaces/base/terraform/workspace.tf
index 8008c545b..43867b01e 100644
--- a/templates/workspaces/base/terraform/workspace.tf
+++ b/templates/workspaces/base/terraform/workspace.tf
@@ -53,7 +53,7 @@ module "aad" {
}
module "airlock" {
- count = var.enable_airlock ? 1 : 0
+ count = var.enable_airlock && var.airlock_version == 1 ? 1 : 0
source = "./airlock"
location = var.location
tre_id = var.tre_id
@@ -80,6 +80,28 @@ module "airlock" {
]
}
+module "airlock_v2" {
+ count = var.enable_airlock && var.airlock_version >= 2 ? 1 : 0
+ source = "./airlock_v2"
+ location = var.location
+ tre_id = var.tre_id
+ tre_workspace_tags = local.tre_workspace_tags
+ ws_resource_group_name = azurerm_resource_group.ws.name
+ services_subnet_id = module.network.services_subnet_id
+ short_workspace_id = local.short_workspace_id
+ workspace_id = var.tre_resource_id
+ arm_environment = var.arm_environment
+
+ providers = {
+ azurerm = azurerm
+ azurerm.core = azurerm.core
+ }
+
+ depends_on = [
+ module.network,
+ ]
+}
+
module "azure_monitor" {
source = "./azure-monitor"
@@ -102,7 +124,8 @@ module "azure_monitor" {
depends_on = [
module.network,
- module.airlock
+ module.airlock,
+ module.airlock_v2
]
}
diff --git a/ui/app/package.json b/ui/app/package.json
index 8f6aa8f14..c35ded1eb 100644
--- a/ui/app/package.json
+++ b/ui/app/package.json
@@ -1,6 +1,6 @@
{
"name": "tre-ui",
- "version": "0.8.26",
+ "version": "0.8.27",
"private": true,
"type": "module",
"dependencies": {