diff --git a/src/buildcompiler/planning/__init__.py b/src/buildcompiler/planning/__init__.py index 5dd1576..483aae4 100644 --- a/src/buildcompiler/planning/__init__.py +++ b/src/buildcompiler/planning/__init__.py @@ -1,6 +1,14 @@ """Planning package exports.""" +from .domestication import DomesticationPlan, DomesticationPlanner, SequenceEditProposal from .full_build_planner import FullBuildPlanner from .models import BuildPlan, UnsupportedPlanningRecord -__all__ = ["BuildPlan", "UnsupportedPlanningRecord", "FullBuildPlanner"] +__all__ = [ + "BuildPlan", + "UnsupportedPlanningRecord", + "FullBuildPlanner", + "DomesticationPlan", + "DomesticationPlanner", + "SequenceEditProposal", +] diff --git a/src/buildcompiler/planning/domestication.py b/src/buildcompiler/planning/domestication.py new file mode 100644 index 0000000..00aea97 --- /dev/null +++ b/src/buildcompiler/planning/domestication.py @@ -0,0 +1,83 @@ +"""Deterministic domestication planning helpers.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +import sbol2 + +from buildcompiler.domain import BuildStage, BuildWarning +from buildcompiler.planning.validation import classify_part_role + + +@dataclass +class SequenceEditProposal: + source_identity: str + enzyme_name: str + site_sequence: str + position: int + original_sequence: str + proposed_sequence: str + reason: str + approved: bool = False + + +@dataclass +class DomesticationPlan: + part_identity: str + part_display_id: str | None + part_role: str + backbone_identity: str | None = None + restriction_enzyme_name: str = "BsaI" + ligase_name: str = "T4_DNA_ligase" + sequence_edit_proposals: list[SequenceEditProposal] = field(default_factory=list) + warnings: list[BuildWarning] = field(default_factory=list) + + +class DomesticationPlanner: + """Pure planner for domestication requirements for a single part.""" + + _BSAI_SITES = ("GGTCTC", "GAGACC") + + def plan(self, part_component: sbol2.ComponentDefinition) -> DomesticationPlan: + part_role = classify_part_role(part_component) + if part_role is None: + raise ValueError( + f"Unsupported domestication role for part {part_component.identity}; expected promoter/rbs/cds/terminator" + ) + + sequence = self._resolve_sequence(part_component) + proposals: list[SequenceEditProposal] = [] + for site in self._BSAI_SITES: + start = 0 + while True: + index = sequence.find(site, start) + if index < 0: + break + proposals.append( + SequenceEditProposal( + source_identity=part_component.identity, + enzyme_name="BsaI", + site_sequence=site, + position=index, + original_sequence=site, + proposed_sequence=f"{site[:-1]}A", + reason="Internal BsaI recognition site detected; human-reviewed edit required.", + ) + ) + start = index + 1 + + return DomesticationPlan( + part_identity=part_component.identity, + part_display_id=part_component.displayId, + part_role=part_role, + sequence_edit_proposals=proposals, + ) + + def _resolve_sequence(self, part_component: sbol2.ComponentDefinition) -> str: + for sequence_ref in part_component.sequences: + sequence_obj = part_component.doc.find(sequence_ref) if part_component.doc else None + elements = getattr(sequence_obj, "elements", None) + if isinstance(elements, str) and elements: + return elements.upper() + raise ValueError(f"Part {part_component.identity} is missing a usable DNA sequence") diff --git a/src/buildcompiler/sbol/__init__.py b/src/buildcompiler/sbol/__init__.py index d9e6f2c..7539882 100644 --- a/src/buildcompiler/sbol/__init__.py +++ b/src/buildcompiler/sbol/__init__.py @@ -1,12 +1,16 @@ """SBOL package exports for clean architecture contracts.""" from .assembly import AssemblyJob, AssemblySbolResult, AssemblyService +from .domestication import DomesticationJob, DomesticationSbolResult, DomesticationService from .resolver import PullPolicy, SbolResolver __all__ = [ "AssemblyJob", "AssemblySbolResult", "AssemblyService", + "DomesticationJob", + "DomesticationSbolResult", + "DomesticationService", "PullPolicy", "SbolResolver", ] diff --git a/src/buildcompiler/sbol/domestication.py b/src/buildcompiler/sbol/domestication.py new file mode 100644 index 0000000..2e1014f --- /dev/null +++ b/src/buildcompiler/sbol/domestication.py @@ -0,0 +1,83 @@ +"""SBOL domestication service.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import sbol2 + +from buildcompiler.domain import IndexedBackbone, IndexedPlasmid, IndexedReagent, MaterialState + + +@dataclass +class DomesticationJob: + part_identity: str + part_display_id: str | None + part_component: Any + backbone: IndexedBackbone + restriction_enzyme: IndexedReagent + ligase: IndexedReagent + source_document: Any + target_document: Any + sequence_edit_proposals: list[Any] = field(default_factory=list) + + +@dataclass +class DomesticationSbolResult: + product: IndexedPlasmid + stage_document: Any + artifacts: dict[str, Any] = field(default_factory=dict) + logs: list[str] = field(default_factory=list) + + +class DomesticationService: + def run(self, job: DomesticationJob) -> DomesticationSbolResult: + component = self._ensure_component(job.part_component) + product_identity = f"{component.identity}/domesticated" + product_display_id = f"{job.part_display_id or component.displayId or component.identity.rsplit('/', 1)[-1]}_lvl0" + + product_component = sbol2.ComponentDefinition(product_identity) + product_component.displayId = product_display_id + product_component.name = f"Domesticated {component.displayId or component.identity.rsplit('/', 1)[-1]}" + product_component.roles = list(component.roles) + job.target_document.addComponentDefinition(product_component) + + implementation_identity = f"{product_identity}_implementation" + product_implementation = sbol2.Implementation(implementation_identity) + product_implementation.built = product_component.identity + job.target_document.addImplementation(product_implementation) + + metadata = { + "source_stage": "domestication", + "source_part_identity": job.part_identity, + "insert_identities": [job.part_identity], + "implementation_identity": product_implementation.identity, + "backbone_identity": job.backbone.identity, + "restriction_enzyme": { + "identity": job.restriction_enzyme.identity, + "name": job.restriction_enzyme.name, + }, + "ligase": {"identity": job.ligase.identity, "name": job.ligase.name}, + "sequence_edit_proposals": [proposal.__dict__.copy() for proposal in job.sequence_edit_proposals], + } + product = IndexedPlasmid( + identity=product_component.identity, + display_id=product_display_id, + name=product_component.name, + state=MaterialState.GENERATED, + roles=list(component.roles), + metadata=metadata, + sbol_component=product_component, + ) + return DomesticationSbolResult( + product=product, + stage_document=job.target_document, + artifacts={"domestication": metadata}, + logs=[f"Generated domesticated lvl0 product {product_identity}."], + ) + + def _ensure_component(self, component: Any) -> sbol2.ComponentDefinition: + if not isinstance(component, sbol2.ComponentDefinition): + raise ValueError("DomesticationJob.part_component must be an sbol2.ComponentDefinition") + return component diff --git a/src/buildcompiler/stages/__init__.py b/src/buildcompiler/stages/__init__.py index b024f42..d354b34 100644 --- a/src/buildcompiler/stages/__init__.py +++ b/src/buildcompiler/stages/__init__.py @@ -1,5 +1,6 @@ """Stage exports.""" from .assembly_lvl1 import AssemblyLvl1Stage +from .domestication import DomesticationStage -__all__ = ["AssemblyLvl1Stage"] +__all__ = ["AssemblyLvl1Stage", "DomesticationStage"] diff --git a/src/buildcompiler/stages/domestication.py b/src/buildcompiler/stages/domestication.py new file mode 100644 index 0000000..62e9618 --- /dev/null +++ b/src/buildcompiler/stages/domestication.py @@ -0,0 +1,149 @@ +"""Domestication stage orchestration.""" + +from __future__ import annotations + +import sbol2 + +from buildcompiler.api import BuildOptions, ProtocolMode +from buildcompiler.domain import ( + ApprovalStatus, + BuildRequest, + BuildStage, + BuildWarning, + MissingBuildInput, + RequiredApproval, + StageResult, + StageStatus, +) +from buildcompiler.inventory import Inventory +from buildcompiler.planning.domestication import DomesticationPlanner +from buildcompiler.sbol.domestication import DomesticationJob, DomesticationService + + +class DomesticationStage: + def __init__( + self, + *, + inventory: Inventory, + domestication_planner: DomesticationPlanner | None = None, + domestication_service: DomesticationService | None = None, + options: BuildOptions | None = None, + ) -> None: + self.inventory = inventory + self.domestication_planner = domestication_planner or DomesticationPlanner() + self.domestication_service = domestication_service or DomesticationService() + self.options = options or BuildOptions() + + def run(self, request: BuildRequest, *, source_document: sbol2.Document, target_document: sbol2.Document) -> StageResult: + part_component = source_document.find(request.source_identity) + if not isinstance(part_component, sbol2.ComponentDefinition): + for candidate in source_document.componentDefinitions: + if ( + candidate.identity == request.source_identity + or candidate.persistentIdentity == request.source_identity + or candidate.displayId == request.source_identity + or candidate.identity.endswith(f"/{request.source_identity}/1") + or candidate.persistentIdentity.endswith(f"/{request.source_identity}") + ): + part_component = candidate + break + if not isinstance(part_component, sbol2.ComponentDefinition): + return StageResult( + id=f"{request.id}:{BuildStage.DOMESTICATION.value}", + stage=BuildStage.DOMESTICATION, + status=StageStatus.FAILED, + request_ids=[request.id], + logs=[f"Failed domestication: source part {request.source_identity} not found."], + ) + try: + plan = self.domestication_planner.plan(part_component) + except ValueError as exc: + return StageResult( + id=f"{request.id}:{BuildStage.DOMESTICATION.value}", + stage=BuildStage.DOMESTICATION, + status=StageStatus.FAILED, + request_ids=[request.id], + warnings=[BuildWarning(code="domestication.invalid_input", message=str(exc), stage=BuildStage.DOMESTICATION, source_identity=request.source_identity)], + logs=[str(exc)], + ) + + missing_inputs: list[MissingBuildInput] = [] + backbone = self.inventory.find_backbone(stage=BuildStage.DOMESTICATION) + if backbone is None: + missing_inputs.append(MissingBuildInput(BuildStage.DOMESTICATION, request.source_identity, "backbone", None, "backbone", "fatal", "No domestication backbone found in inventory.")) + + restriction = self.inventory.find_restriction_enzyme(self.options.reagents.default_restriction_enzyme) + if restriction is None: + missing_inputs.append(MissingBuildInput(BuildStage.DOMESTICATION, request.source_identity, self.options.reagents.default_restriction_enzyme, self.options.reagents.default_restriction_enzyme, "restriction_enzyme", "fatal", "Required domestication restriction enzyme missing from inventory.")) + + ligase = self.inventory.find_ligase(self.options.reagents.default_ligase) + if ligase is None: + missing_inputs.append(MissingBuildInput(BuildStage.DOMESTICATION, request.source_identity, self.options.reagents.default_ligase, self.options.reagents.default_ligase, "ligase", "fatal", "Required domestication ligase missing from inventory.")) + + if missing_inputs: + return StageResult( + id=f"{request.id}:{BuildStage.DOMESTICATION.value}", + stage=BuildStage.DOMESTICATION, + status=StageStatus.BLOCKED, + request_ids=[request.id], + missing_inputs=missing_inputs, + logs=["Domestication blocked on missing backbone/reagents."], + protocol_artifacts={"sequence_edit_proposals": [proposal.__dict__.copy() for proposal in plan.sequence_edit_proposals]}, + ) + + approvals: list[RequiredApproval] = [] + if plan.sequence_edit_proposals: + approval_id = f"domestication-edit:{request.source_identity}" + process_approved = "domestication_sequence_edit" in self.options.approvals.approved_processes + id_approved = approval_id in self.options.approvals.approved_approval_ids + allow_edits = self.options.domestication.allow_sequence_domestication_edits + protocol_mode_active = self.options.protocol.mode != ProtocolMode.NONE + if (not allow_edits) or (protocol_mode_active and not (process_approved or id_approved)): + approvals.append( + RequiredApproval( + status=ApprovalStatus.REQUIRED, + process="domestication_sequence_edit", + reason="Sequence edits were proposed and require explicit human approval.", + metadata={ + "approval_id": approval_id, + "part_identity": request.source_identity, + "proposals": [proposal.__dict__.copy() for proposal in plan.sequence_edit_proposals], + }, + ) + ) + + if approvals: + return StageResult( + id=f"{request.id}:{BuildStage.DOMESTICATION.value}", + stage=BuildStage.DOMESTICATION, + status=StageStatus.BLOCKED, + request_ids=[request.id], + required_approvals=approvals, + protocol_artifacts={"sequence_edit_proposals": [proposal.__dict__.copy() for proposal in plan.sequence_edit_proposals]}, + logs=["Domestication blocked pending sequence-edit approval."], + ) + + result = self.domestication_service.run( + DomesticationJob( + part_identity=request.source_identity, + part_display_id=request.source_display_id, + part_component=part_component, + backbone=backbone, + restriction_enzyme=restriction, + ligase=ligase, + source_document=source_document, + target_document=target_document, + sequence_edit_proposals=plan.sequence_edit_proposals, + ) + ) + self.inventory.add_generated_product(result.product) + return StageResult( + id=f"{request.id}:{BuildStage.DOMESTICATION.value}", + stage=BuildStage.DOMESTICATION, + status=StageStatus.SUCCESS, + request_ids=[request.id], + products=[result.product], + sbol_document=result.stage_document, + protocol_artifacts={"sequence_edit_proposals": [proposal.__dict__.copy() for proposal in plan.sequence_edit_proposals], **result.artifacts}, + logs=result.logs, + ) diff --git a/tests/unit/planning/test_domestication_planner.py b/tests/unit/planning/test_domestication_planner.py new file mode 100644 index 0000000..031c1dd --- /dev/null +++ b/tests/unit/planning/test_domestication_planner.py @@ -0,0 +1,52 @@ +import sbol2 +import pytest + +from buildcompiler.constants import PART_ROLES +from buildcompiler.planning import DomesticationPlanner + + +def _part(identity: str, role: str, sequence: str | None = None) -> sbol2.ComponentDefinition: + doc = sbol2.Document() + part = sbol2.ComponentDefinition(identity) + part.roles = role + doc.addComponentDefinition(part) + if sequence is not None: + seq = sbol2.Sequence(f"{identity}_seq") + seq.elements = sequence + seq.encoding = sbol2.SBOL_ENCODING_IUPAC + doc.addSequence(seq) + part.sequences = seq.identity + return part + + +def test_supported_role_produces_plan() -> None: + planner = DomesticationPlanner() + part = _part("https://example.org/p", sorted(PART_ROLES)[0], "ATGCGT") + plan = planner.plan(part) + assert plan.part_identity == part.identity + assert plan.part_role in {"promoter", "rbs", "cds", "terminator"} + + +def test_unsupported_role_fails_structurally() -> None: + planner = DomesticationPlanner() + part = _part("https://example.org/x", "https://example.org/unsupported", "ATGC") + with pytest.raises(ValueError, match="Unsupported domestication role"): + planner.plan(part) + + +def test_missing_sequence_fails() -> None: + planner = DomesticationPlanner() + part = _part("https://example.org/p2", sorted(PART_ROLES)[0]) + with pytest.raises(ValueError, match="missing a usable DNA sequence"): + planner.plan(part) + + +def test_bsai_sites_create_edit_proposals_without_mutating_sequence() -> None: + planner = DomesticationPlanner() + original = "AAAGGTCTCTTT" + part = _part("https://example.org/p3", sorted(PART_ROLES)[0], original) + plan = planner.plan(part) + assert len(plan.sequence_edit_proposals) == 1 + assert plan.sequence_edit_proposals[0].site_sequence == "GGTCTC" + seq = part.doc.find(part.sequences[0]) + assert seq.elements == original diff --git a/tests/unit/sbol/test_domestication_service.py b/tests/unit/sbol/test_domestication_service.py new file mode 100644 index 0000000..0ddfeae --- /dev/null +++ b/tests/unit/sbol/test_domestication_service.py @@ -0,0 +1,40 @@ +import sbol2 + +from buildcompiler.domain import IndexedBackbone, IndexedReagent, MaterialState +from buildcompiler.planning import SequenceEditProposal +from buildcompiler.sbol import DomesticationJob, DomesticationService + + +def test_domestication_service_returns_generated_plasmid_with_provenance() -> None: + source = sbol2.Document() + target = sbol2.Document() + part = sbol2.ComponentDefinition("https://example.org/part") + part.roles = ["https://example.org/role1", "https://example.org/role2"] + source.addComponentDefinition(part) + + service = DomesticationService() + result = service.run( + DomesticationJob( + part_identity=part.identity, + part_display_id="part", + part_component=part, + backbone=IndexedBackbone("https://example.org/bb", metadata={"stage": "domestication"}), + restriction_enzyme=IndexedReagent("https://example.org/bsai", name="BsaI", reagent_type="restriction_enzyme"), + ligase=IndexedReagent("https://example.org/lig", name="T4_DNA_ligase", reagent_type="ligase"), + source_document=source, + target_document=target, + sequence_edit_proposals=[ + SequenceEditProposal(part.identity, "BsaI", "GGTCTC", 5, "GGTCTC", "GGTCTA", "reason") + ], + ) + ) + + assert result.product.state == MaterialState.GENERATED + assert result.product.metadata["source_part_identity"] == part.identity + assert result.product.metadata["insert_identities"] == [part.identity] + assert result.product.roles == list(part.roles) + implementation_identity = result.product.metadata["implementation_identity"] + implementation = target.find(implementation_identity) + assert isinstance(implementation, sbol2.Implementation) + assert implementation.built == result.product.identity + assert result.logs diff --git a/tests/unit/stages/test_domestication_stage.py b/tests/unit/stages/test_domestication_stage.py new file mode 100644 index 0000000..8b7b993 --- /dev/null +++ b/tests/unit/stages/test_domestication_stage.py @@ -0,0 +1,71 @@ +import sbol2 + +from buildcompiler.api import BuildOptions, ProtocolMode +from buildcompiler.domain import BuildRequest, BuildStage, DesignKind, IndexedBackbone, IndexedReagent, StageStatus +from buildcompiler.inventory import Inventory +from buildcompiler.stages import DomesticationStage + + +def _request(identity: str = "part") -> BuildRequest: + return BuildRequest("req-1", BuildStage.DOMESTICATION, identity, "part", DesignKind.COMPONENT_DEFINITION) + + +def _source_doc(identity: str = "part", seq: str = "ATGGGTCTCAA") -> sbol2.Document: + doc = sbol2.Document() + part = sbol2.ComponentDefinition(identity) + part.roles = "http://identifiers.org/so/SO:0000167" + seq_obj = sbol2.Sequence(f"{identity}_seq") + seq_obj.elements = seq + seq_obj.encoding = sbol2.SBOL_ENCODING_IUPAC + doc.addSequence(seq_obj) + part.sequences = seq_obj.identity + doc.addComponentDefinition(part) + return doc + + +def _inventory(with_backbone=True, with_enzyme=True, with_ligase=True) -> Inventory: + backbones = [IndexedBackbone("bb", metadata={"stage": BuildStage.DOMESTICATION.value})] if with_backbone else [] + reagents = [] + if with_enzyme: + reagents.append(IndexedReagent("e1", name="BsaI", reagent_type="restriction_enzyme")) + if with_ligase: + reagents.append(IndexedReagent("l1", name="T4_DNA_ligase", reagent_type="ligase")) + return Inventory(backbones=backbones, reagents=reagents) + + +def test_blocked_when_backbone_missing() -> None: + stage = DomesticationStage(inventory=_inventory(with_backbone=False)) + result = stage.run(_request(), source_document=_source_doc(), target_document=sbol2.Document()) + assert result.status == StageStatus.BLOCKED + assert result.missing_inputs[0].missing_kind == "backbone" + + +def test_blocked_when_reagents_missing() -> None: + stage = DomesticationStage(inventory=_inventory(with_enzyme=False, with_ligase=False)) + result = stage.run(_request(), source_document=_source_doc(), target_document=sbol2.Document()) + kinds = {item.missing_kind for item in result.missing_inputs} + assert result.status == StageStatus.BLOCKED + assert "restriction_enzyme" in kinds + assert "ligase" in kinds + + +def test_default_options_block_for_sequence_edit_approval() -> None: + stage = DomesticationStage(inventory=_inventory()) + result = stage.run(_request(), source_document=_source_doc(), target_document=sbol2.Document()) + assert result.status == StageStatus.BLOCKED + assert result.required_approvals + + +def test_protocol_mode_requires_explicit_process_or_approval_id() -> None: + options = BuildOptions() + options.domestication.allow_sequence_domestication_edits = True + options.protocol.mode = ProtocolMode.MANUAL + stage = DomesticationStage(inventory=_inventory(), options=options) + blocked = stage.run(_request(), source_document=_source_doc(), target_document=sbol2.Document()) + assert blocked.status == StageStatus.BLOCKED + + options.approvals.approved_processes.add("domestication_sequence_edit") + stage2 = DomesticationStage(inventory=_inventory(), options=options) + ok = stage2.run(_request(), source_document=_source_doc(), target_document=sbol2.Document()) + assert ok.status == StageStatus.SUCCESS + assert ok.products[0].metadata["insert_identities"] == ["part"]