diff --git a/src/buildcompiler/stages/__init__.py b/src/buildcompiler/stages/__init__.py index d354b34..bacbb7d 100644 --- a/src/buildcompiler/stages/__init__.py +++ b/src/buildcompiler/stages/__init__.py @@ -1,6 +1,7 @@ """Stage exports.""" from .assembly_lvl1 import AssemblyLvl1Stage +from .assembly_lvl2 import AssemblyLvl2Stage from .domestication import DomesticationStage -__all__ = ["AssemblyLvl1Stage", "DomesticationStage"] +__all__ = ["AssemblyLvl1Stage", "AssemblyLvl2Stage", "DomesticationStage"] diff --git a/src/buildcompiler/stages/assembly_lvl2.py b/src/buildcompiler/stages/assembly_lvl2.py new file mode 100644 index 0000000..31d6eb6 --- /dev/null +++ b/src/buildcompiler/stages/assembly_lvl2.py @@ -0,0 +1,277 @@ +"""Thin lvl2 assembly stage orchestration.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +import sbol2 + +from buildcompiler.adapters.pudu import assembly_route_to_pudu_json +from buildcompiler.api import BuildOptions +from buildcompiler.domain import ( + BuildRequest, + BuildStage, + MissingBuildInput, + StageResult, + StageStatus, +) +from buildcompiler.inventory import CompatibilitySelector, Inventory +from buildcompiler.inventory.compatibility import Lvl2Route +from buildcompiler.sbol import AssemblyJob, AssemblyService + + +class AssemblyLvl2Stage: + def __init__( + self, + *, + inventory: Inventory, + selector: CompatibilitySelector | None = None, + assembly_service: AssemblyService | None = None, + options: BuildOptions | None = None, + ) -> None: + self.inventory = inventory + self.options = options or BuildOptions() + self.selector = selector or CompatibilitySelector( + inventory, options=self.options + ) + self.assembly_service = assembly_service or AssemblyService() + + def run( + self, + request: BuildRequest, + *, + source_document: sbol2.Document, + target_document: sbol2.Document, + ) -> StageResult: + constraints = request.constraints or {} + module_definition = source_document.find(request.source_identity) + if not isinstance(module_definition, sbol2.ModuleDefinition): + return StageResult( + id=f"{request.id}:{BuildStage.ASSEMBLY_LVL2.value}", + stage=BuildStage.ASSEMBLY_LVL2, + status=StageStatus.FAILED, + request_ids=[request.id], + logs=[ + f"Source identity is not a ModuleDefinition: {request.source_identity}" + ], + ) + + region_identities = self._extract_region_identities( + module_definition, constraints + ) + if not region_identities: + return StageResult( + id=f"{request.id}:{BuildStage.ASSEMBLY_LVL2.value}", + stage=BuildStage.ASSEMBLY_LVL2, + status=StageStatus.FAILED, + request_ids=[request.id], + logs=["No engineered-region identities found for lvl2 assembly."], + ) + + route_selection = self.selector.select_lvl2_route( + request_id=request.id, + region_identities=region_identities, + constraints=constraints, + ) + warning_logs: list[str] = [] + if route_selection.selected is None and constraints.get("region_order"): + relaxed_constraints = { + key: value for key, value in constraints.items() if key != "region_order" + } + relaxed_selection = self.selector.select_lvl2_route( + request_id=request.id, + region_identities=region_identities, + constraints=relaxed_constraints, + ) + if relaxed_selection.selected is not None: + route_selection = relaxed_selection + warning_logs.append( + "Unable to satisfy region_order constraint; proceeding with an arbitrary compatible order." + ) + + route = route_selection.selected + artifacts = self._route_artifacts(route, route_selection.rejected) + if route is None: + return StageResult( + id=f"{request.id}:{BuildStage.ASSEMBLY_LVL2.value}", + stage=BuildStage.ASSEMBLY_LVL2, + status=StageStatus.BLOCKED, + request_ids=[request.id], + protocol_artifacts=artifacts, + logs=[ + "No lvl2 route selected by CompatibilitySelector. Provide explicit region_order " + "or enable large-order search for large designs.", + *warning_logs, + ], + ) + + missing_inputs: list[MissingBuildInput] = [] + for missing_identity in route.missing_region_identities: + missing_inputs.append( + MissingBuildInput( + source_stage=BuildStage.ASSEMBLY_LVL2, + source_design_identity=request.source_identity, + missing_identity=missing_identity, + missing_display_id=missing_identity.rsplit("/", 1)[-1], + missing_kind="engineered_region", + required_stage=BuildStage.ASSEMBLY_LVL1, + reason="No compatible lvl1 engineered-region plasmid found in inventory.", + ) + ) + + if route.backbone is None: + missing_inputs.append( + MissingBuildInput( + source_stage=BuildStage.ASSEMBLY_LVL2, + source_design_identity=request.source_identity, + missing_identity="backbone", + missing_display_id=None, + missing_kind="backbone", + required_stage="fatal", + reason="No compatible lvl2 backbone found in inventory.", + ) + ) + + restriction_enzyme_name = self.options.reagents.default_restriction_enzyme + restriction_enzyme = self.inventory.find_restriction_enzyme( + restriction_enzyme_name + ) + if restriction_enzyme is None: + missing_inputs.append( + MissingBuildInput( + source_stage=BuildStage.ASSEMBLY_LVL2, + source_design_identity=request.source_identity, + missing_identity=restriction_enzyme_name, + missing_display_id=restriction_enzyme_name, + missing_kind="restriction_enzyme", + required_stage="fatal", + reason="Required restriction enzyme is missing from inventory.", + ) + ) + + ligase_name = self.options.reagents.default_ligase + ligase = self.inventory.find_ligase(ligase_name) + if ligase is None: + missing_inputs.append( + MissingBuildInput( + source_stage=BuildStage.ASSEMBLY_LVL2, + source_design_identity=request.source_identity, + missing_identity=ligase_name, + missing_display_id=ligase_name, + missing_kind="ligase", + required_stage="fatal", + reason="Required ligase is missing from inventory.", + ) + ) + + if missing_inputs: + return StageResult( + id=f"{request.id}:{BuildStage.ASSEMBLY_LVL2.value}", + stage=BuildStage.ASSEMBLY_LVL2, + status=StageStatus.BLOCKED, + request_ids=[request.id], + missing_inputs=missing_inputs, + protocol_artifacts=artifacts, + logs=[ + *warning_logs, + f"Blocked lvl2 assembly for {request.id}; missing {len(missing_inputs)} required input(s).", + ], + ) + + product_identity = ( + constraints.get("product_identity") or request.source_identity + ) + product_display_id = ( + constraints.get("product_display_id") + or request.source_display_id + or product_identity.rsplit("/", 1)[-1] + ) + assembly_result = self.assembly_service.run( + AssemblyJob( + stage=BuildStage.ASSEMBLY_LVL2, + product_identity=product_identity, + product_display_id=product_display_id, + part_plasmids=list(route.selected_lvl1_plasmids), + backbone=route.backbone, + restriction_enzyme=restriction_enzyme, + ligase=ligase, + source_document=source_document, + target_document=target_document, + ) + ) + + for product in assembly_result.products: + insert_identities = list(product.metadata.get("insert_identities", [])) + if request.source_identity not in insert_identities: + insert_identities.append(request.source_identity) + product.metadata["insert_identities"] = insert_identities + product.metadata.setdefault("source_stage", BuildStage.ASSEMBLY_LVL2.value) + self.inventory.add_generated_product(product) + + json_intermediate = assembly_route_to_pudu_json( + product_identity=product_identity, + part_plasmids=route.selected_lvl1_plasmids, + backbone=route.backbone, + restriction_enzyme=restriction_enzyme, + ) + + return StageResult( + id=f"{request.id}:{BuildStage.ASSEMBLY_LVL2.value}", + stage=BuildStage.ASSEMBLY_LVL2, + status=StageStatus.SUCCESS, + request_ids=[request.id], + products=assembly_result.products, + sbol_document=assembly_result.stage_document, + json_intermediate=json_intermediate, + protocol_artifacts=artifacts, + logs=[ + *warning_logs, + f"Selected lvl2 route with {len(route.selected_lvl1_plasmids)} lvl1 plasmid(s).", + *assembly_result.logs, + ], + ) + + def _extract_region_identities( + self, module_definition: sbol2.ModuleDefinition, constraints: Mapping[str, Any] + ) -> list[str]: + for key in ("engineered_region_identities", "region_identities"): + values = constraints.get(key) + if values: + return list(values) + + identities: list[str] = [] + for functional_component in module_definition.functionalComponents: + definition = functional_component.definition + if definition: + identities.append(definition) + return identities + + def _route_artifacts( + self, selected: Lvl2Route | None, rejected: tuple[Any, ...] + ) -> dict[str, Any]: + return { + "selected_route": self._route_to_dict(selected), + "rejected_routes": [self._route_to_dict(route) for route in rejected[:3]], + } + + def _route_to_dict(self, route: Lvl2Route | None) -> dict[str, Any] | None: + if route is None: + return None + return { + "region_order": list(route.region_order), + "selected_lvl1_plasmids": [ + p.identity for p in route.selected_lvl1_plasmids + ], + "missing_region_identities": list(route.missing_region_identities), + "score": { + "missing_required_products": route.score.missing_required_products, + "missing_domestications": route.score.missing_domestications, + "missing_lvl1_plasmids": route.score.missing_lvl1_plasmids, + "generated_or_planned_materials": route.score.generated_or_planned_materials, + "lower_material_state_penalty": route.score.lower_material_state_penalty, + "constraint_violations": route.score.constraint_violations, + "total_assemblies": route.score.total_assemblies, + "identity_tiebreak": list(route.score.identity_tiebreak), + }, + } diff --git a/tests/unit/stages/test_assembly_lvl2.py b/tests/unit/stages/test_assembly_lvl2.py new file mode 100644 index 0000000..17466c7 --- /dev/null +++ b/tests/unit/stages/test_assembly_lvl2.py @@ -0,0 +1,208 @@ +import sbol2 + +from buildcompiler.api import BuildOptions +from buildcompiler.domain import ( + BuildRequest, + BuildStage, + DesignKind, + IndexedBackbone, + IndexedPlasmid, + IndexedReagent, + MaterialState, + StageStatus, +) +from buildcompiler.inventory import Inventory +from buildcompiler.sbol import AssemblySbolResult +from buildcompiler.stages import AssemblyLvl2Stage + + +class _FakeAssemblyService: + def __init__(self, products): + self.products = products + + def run(self, job): + return AssemblySbolResult( + products=self.products, + stage_document=job.target_document, + activity_identity="https://example.org/activity/lvl2", + logs=["fake-lvl2-assembly-service-ran"], + ) + + +def _module_doc(): + doc = sbol2.Document() + module = sbol2.ModuleDefinition("design_mod") + region1 = sbol2.ComponentDefinition("region1", sbol2.BIOPAX_DNA) + region2 = sbol2.ComponentDefinition("region2", sbol2.BIOPAX_DNA) + doc.addComponentDefinition(region1) + doc.addComponentDefinition(region2) + fc1 = module.functionalComponents.create("fc1") + fc1.definition = region1.identity + fc2 = module.functionalComponents.create("fc2") + fc2.definition = region2.identity + doc.addModuleDefinition(module) + return doc, module, [region1.identity, region2.identity] + + +def _request(source_identity, constraints=None): + return BuildRequest( + id="req-lvl2", + stage=BuildStage.ASSEMBLY_LVL2, + source_identity=source_identity, + source_display_id="design_mod", + source_kind=DesignKind.MODULE_DEFINITION, + constraints=constraints or {}, + ) + + +def _inventory( + region_identities=None, + *, + include_regions=True, + include_backbone=True, + include_reagents=True, +): + plasmids = [] + if include_regions and region_identities: + plasmids = [ + IndexedPlasmid( + identity="https://example.org/plasmids/lvl1-r1", + metadata={"insert_identities": [region_identities[0]]}, + state=MaterialState.ASSEMBLED, + ), + IndexedPlasmid( + identity="https://example.org/plasmids/lvl1-r2", + metadata={"insert_identities": [region_identities[1]]}, + state=MaterialState.ASSEMBLED, + ), + ] + backbones = [] + if include_backbone: + backbones = [ + IndexedBackbone( + identity="https://example.org/backbones/lvl2", + metadata={"stage": BuildStage.ASSEMBLY_LVL2.value}, + ) + ] + reagents = [] + if include_reagents: + reagents = [ + IndexedReagent( + identity="https://example.org/reagents/bsaI", + name="BsaI", + reagent_type="restriction_enzyme", + ), + IndexedReagent( + identity="https://example.org/reagents/ligase", + name="T4_DNA_ligase", + reagent_type="ligase", + ), + ] + return Inventory(plasmids=plasmids, backbones=backbones, reagents=reagents) + + +def test_assembly_lvl2_success_routes_and_indexes_generated_product(): + doc, module, regions = _module_doc() + inv = _inventory(regions) + generated = IndexedPlasmid( + identity="https://example.org/plasmids/lvl2-product", + state=MaterialState.GENERATED, + ) + stage = AssemblyLvl2Stage( + inventory=inv, assembly_service=_FakeAssemblyService([generated]) + ) + + result = stage.run( + _request(module.identity), source_document=doc, target_document=sbol2.Document() + ) + + assert result.status == StageStatus.SUCCESS + assert result.products and result.products[0].identity == generated.identity + assert ( + result.json_intermediate + and result.json_intermediate["Product"] == module.identity + ) + assert result.protocol_artifacts["selected_route"] is not None + assert inv.find_lvl1_region_plasmids(module.identity) + + +def test_assembly_lvl2_no_regions_failed(): + doc = sbol2.Document() + module = sbol2.ModuleDefinition("empty_mod") + doc.addModuleDefinition(module) + stage = AssemblyLvl2Stage(inventory=_inventory([], include_regions=False)) + + result = stage.run( + _request(module.identity), source_document=doc, target_document=sbol2.Document() + ) + + assert result.status == StageStatus.FAILED + + +def test_assembly_lvl2_missing_engineered_regions_promote_to_lvl1_blockers(): + doc, module, regions = _module_doc() + stage = AssemblyLvl2Stage(inventory=_inventory(regions, include_regions=False)) + + result = stage.run( + _request(module.identity), source_document=doc, target_document=sbol2.Document() + ) + + assert result.status == StageStatus.BLOCKED + engineered = [ + m for m in result.missing_inputs if m.missing_kind == "engineered_region" + ] + assert engineered + assert all(m.required_stage == BuildStage.ASSEMBLY_LVL1 for m in engineered) + + +def test_assembly_lvl2_large_order_requires_opt_in_without_explicit_order(): + options = BuildOptions() + options.planning.lvl2_search.max_exhaustive_region_count = 4 + options.planning.lvl2_search.allow_large_order_search = False + stage = AssemblyLvl2Stage( + inventory=_inventory([], include_regions=False), options=options + ) + constraints = {"region_identities": [f"https://example.org/r{i}" for i in range(5)]} + + doc, module, _ = _module_doc() + result = stage.run( + _request(module.identity, constraints=constraints), + source_document=doc, + target_document=sbol2.Document(), + ) + + assert result.status == StageStatus.BLOCKED + assert result.protocol_artifacts["selected_route"] is None + + +def test_assembly_lvl2_region_order_constraint_is_hard(): + doc, module, regions = _module_doc() + inv = _inventory(regions) + stage = AssemblyLvl2Stage(inventory=inv, assembly_service=_FakeAssemblyService([])) + + order = [regions[1], regions[0]] + result = stage.run( + _request(module.identity, constraints={"region_order": order}), + source_document=doc, + target_document=sbol2.Document(), + ) + + assert result.status == StageStatus.SUCCESS + assert result.protocol_artifacts["selected_route"]["region_order"] == order + + +def test_assembly_lvl2_incomplete_region_order_falls_back_with_warning(): + doc, module, regions = _module_doc() + inv = _inventory(regions) + stage = AssemblyLvl2Stage(inventory=inv, assembly_service=_FakeAssemblyService([])) + + incomplete_order = [regions[0]] + result = stage.run( + _request(module.identity, constraints={"region_order": incomplete_order}), + source_document=doc, + target_document=sbol2.Document(), + ) + + assert result.status == StageStatus.SUCCESS + assert result.protocol_artifacts["selected_route"] is not None + assert any("Unable to satisfy region_order constraint" in log for log in result.logs)