diff --git a/notebooks/full_build_quickstart.ipynb b/notebooks/full_build_quickstart.ipynb new file mode 100644 index 0000000..efbc33b --- /dev/null +++ b/notebooks/full_build_quickstart.ipynb @@ -0,0 +1,193 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BuildCompiler `full_build(...)` Quickstart\n", + "\n", + "This notebook shows how to run the high-level `BuildCompiler.full_build(...)` workflow end-to-end.\n", + "\n", + "The orchestrated stages are:\n", + "1. domestication (only when missing parts are detected)\n", + "2. assembly level 1\n", + "3. transformation\n", + "4. plating\n", + "\n", + "`assembly_lvl2` is intentionally skipped for now and recorded as skipped in the output manifest." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1) Imports and setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import sbol2\n", + "\n", + "from buildcompiler.buildcompiler import BuildCompiler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2) Load an SBOL design document\n", + "\n", + "Replace the path below with your own SBOL file containing one or more abstract designs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc = sbol2.Document()\n", + "# Example: doc.read(\"path/to/your/designs.xml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3) Construct the compiler\n", + "\n", + "Provide one or more SynBioHub collections that contain available parts/backbones/implementations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "compiler = BuildCompiler(\n", + " collections=[\n", + " # \"https://synbiohub.org/public/example_collection\",\n", + " ],\n", + " sbh_registry=\"https://synbiohub.org\",\n", + " auth_token=\"\",\n", + " sbol_doc=doc,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4) Full build with concrete designs (list of `ComponentDefinition`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example lookup by displayId (adjust to your document)\n", + "# design_1 = doc.getComponentDefinition(\"design_1\")\n", + "# design_2 = doc.getComponentDefinition(\"design_2\")\n", + "# designs = [design_1, design_2]\n", + "\n", + "# result = compiler.full_build(\n", + "# designs=designs,\n", + "# results_dir=Path(\"results/full_build\"),\n", + "# chassis_name=\"E_coli_DH5alpha\",\n", + "# protocol_type=\"manual\",\n", + "# plating_params={\n", + "# \"incubation_temperature_c\": 37,\n", + "# \"incubation_time_h\": 16,\n", + "# },\n", + "# product_name_prefix=\"build\",\n", + "# overwrite=True,\n", + "# )\n", + "\n", + "# print(result[\"status\"])\n", + "# print(result[\"manifest_path\"])\n", + "# print(result[\"zip_path\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5) Full build with a `CombinatorialDerivation`\n", + "\n", + "Use this when your input is combinatorial. `full_build(...)` expands the derivation into concrete variants automatically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# derivation = doc.combinatorialderivations[0]\n", + "\n", + "# combo_result = compiler.full_build(\n", + "# designs=derivation,\n", + "# results_dir=Path(\"results/full_build_combinatorial\"),\n", + "# chassis_name=\"E_coli_DH5alpha\",\n", + "# protocol_type=\"automated\",\n", + "# plating_params={\n", + "# \"replicates\": 1,\n", + "# \"number_dilutions\": 1,\n", + "# \"volume_colony\": 6,\n", + "# \"thermocycler_starting_well\": 0,\n", + "# },\n", + "# product_name_prefix=\"combo_build\",\n", + "# overwrite=True,\n", + "# )\n", + "\n", + "# print(combo_result[\"status\"])\n", + "# print(combo_result[\"manifest_path\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6) Return shape and generated files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Top-level keys in returned result:\n", + "# stage, status, results_dir, zip_path, manifest_path, sbol_path,\n", + "# inputs, domestication, assembly_lvl1, transformation, plating, skipped, errors\n", + "\n", + "# Files written under :\n", + "# - full_build_manifest.json\n", + "# - full_build_results.zip\n", + "# - sbol/full_build.xml\n", + "# - domestication/* and assembly_lvl1/* plating/transformation artifacts" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 7e4005f..c1e646a 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -1,13 +1,23 @@ import sbol2 +import json import random +import re +import shutil import warnings import urllib.parse +import csv from pathlib import Path from typing import Any, Dict, List from buildcompiler.plasmid import Plasmid -from buildcompiler.sbol2build import Assembly, dna_componentdefinition_with_sequence +from buildcompiler.sbol2build import ( + Assembly, + Transformation as SBOL2Transformation, + dna_componentdefinition_with_sequence, +) from .abstract_translator import ( + enumerate_design_variants, + extract_combinatorial_design_parts, get_or_pull, get_compatible_plasmids, ) @@ -409,76 +419,32 @@ def transformation( chassis_module, chassis_impl = self._get_or_create_chassis( transformation_doc, chassis_name ) + normalized_plasmids = [] + for product in normalized_products: + indexed = self._get_indexed_plasmid(self.indexed_plasmids, product["plasmid"]) + if indexed is None: + indexed = type( + "TransformationPlasmid", + (), + { + "plasmid_definition": product["plasmid"], + "plasmid_implementations": [], + "name": product["plasmid"].displayId, + }, + )() + normalized_plasmids.append(indexed) + + sbol_outputs = SBOL2Transformation( + plasmids=normalized_plasmids, + chassis_name=chassis_name, + source_document=transformation_doc, + ).chemical_transformation() - sbol_outputs = [] robot_steps = [] logs = [] for index, product in enumerate(normalized_products, start=1): plasmid = product["plasmid"] - plasmid_impl = self._get_or_create_plasmid_implementation( - transformation_doc, plasmid - ) - transform_id = f"transform_{plasmid.displayId}_{index}" - - transformation_activity = sbol2.Activity(transform_id) - transformation_activity.name = f"Transform {chassis_name} with {plasmid.displayId}" - transformation_activity.types = "http://sbols.org/v2#build" - - chassis_usage = sbol2.Usage( - uri=f"{transform_id}_chassis_usage", - entity=chassis_impl.identity, - role="http://sbols.org/v2#build", - ) - plasmid_usage = sbol2.Usage( - uri=f"{transform_id}_plasmid_usage", - entity=plasmid_impl.identity, - role="http://sbols.org/v2#build", - ) - transformation_activity.usages = [chassis_usage, plasmid_usage] - - transformed_strain = sbol2.ModuleDefinition( - f"{chassis_name}_with_{plasmid.displayId}" - ) - transformed_strain.roles = [ORGANISM_STRAIN] - transformed_strain.name = f"{chassis_name} transformed with {plasmid.displayId}" - - chassis_module_ref = sbol2.Module( - uri=f"{transformed_strain.displayId}_chassis_module" - ) - chassis_module_ref.definition = chassis_module.identity - plasmid_fc = sbol2.FunctionalComponent( - uri=f"{transformed_strain.displayId}_plasmid_fc" - ) - plasmid_fc.definition = plasmid.identity - - transformed_strain.modules = [chassis_module_ref] - transformed_strain.functionalComponents = [plasmid_fc] - - transformed_impl = sbol2.Implementation( - f"{transformed_strain.displayId}_impl" - ) - transformed_impl.built = transformed_strain.identity - transformed_impl.wasGeneratedBy = transformation_activity.identity - - for obj in ( - transformation_activity, - chassis_usage, - plasmid_usage, - transformed_strain, - chassis_module_ref, - plasmid_fc, - transformed_impl, - ): - self._add_if_absent(transformation_doc, obj) - - sbol_outputs.append( - { - "transformation_activity": transformation_activity.identity, - "transformed_strain_module": transformed_strain.identity, - "transformed_strain_implementation": transformed_impl.identity, - } - ) robot_steps.append( { "step": index, @@ -524,14 +490,19 @@ def plating( plating_doc: sbol2.Document | None = None, overwrite: bool = False, ) -> Dict[str, Any]: - """Generate a plated 96-well output and protocol artifacts.""" + """Generate plating layout artifacts and protocol metadata. + + This implementation is file/metadata oriented and does not create new + SBOL objects for plating. + """ if protocol_type not in {"manual", "automated"}: raise ValueError("protocol_type must be one of: 'manual', 'automated'.") - if plating_doc is None: - plating_doc = self.sbol_doc advanced_params = advanced_params or {} + doc_ref = plating_doc or self.sbol_doc - normalized = normalize_plating_input(transformation_results, doc=plating_doc) + normalized = normalize_plating_input( + transformation_results, doc=doc_ref + ) if len(normalized) > 96: raise ValueError("plating supports up to 96 transformed strains.") @@ -540,69 +511,14 @@ def plating( results_path.mkdir(parents=True, exist_ok=True) plate_id = plate_name or "solid_96_well_plate" - plate_impl = sbol2.Implementation(plate_id) - plate_md = plating_doc.find("solid_96_well_plate_md") or sbol2.ModuleDefinition( - "solid_96_well_plate_md" - ) - plate_md.name = "Solid 96-well plate" - self._add_if_absent(plating_doc, plate_md) - plate_impl.built = plate_md.identity - self._add_if_absent(plating_doc, plate_impl) - - # Optional SBOLInventory integration with fallback behavior. - try: - from sbol_inventory import ( # type: ignore - make_solid_96_well_plate, - make_plated_strain, - place_in_plate, - ) - - inventory_enabled = True - inventory_plate = make_solid_96_well_plate( - uri=plate_impl.identity, plate_md_uri=plate_md.identity - ) - except Exception: - inventory_enabled = False - inventory_plate = None - - activity_id = f"plating_{protocol_type}_{plate_id}" - plating_activity = sbol2.Activity(activity_id) - plating_activity.name = f"Plating activity for {plate_id}" - plating_activity.types = "http://sbols.org/v2#build" - self._add_if_absent(plating_doc, plating_activity) - - agent_id = ( - "manual_plating_agent" - if protocol_type == "manual" - else "opentrons_plating_agent" - ) - agent = plating_doc.find(agent_id) or sbol2.Agent(agent_id) - agent.name = "Manual plating agent" if protocol_type == "manual" else "Opentrons plating agent" - self._add_if_absent(plating_doc, agent) - - plan_id = f"{plate_id}_{protocol_type}_plating_plan" - plan = plating_doc.find(plan_id) or sbol2.Plan(plan_id) - plan.name = f"{protocol_type.title()} plating plan for {plate_id}" - self._add_if_absent(plating_doc, plan) - - association = sbol2.Association( - uri=f"{activity_id}_association", - agent=agent.identity, - role="http://sbols.org/v2#build", - ) - association.plan = plan.identity - plating_activity.associations = [association] - self._add_if_absent(plating_doc, association) - plate_rows = [] plate_map = {} bacterium_locations = {} - plated_impls = [] for idx, entry in enumerate(normalized): well = wells[idx] source_impl_uri = entry.get("source_impl_uri") - source_impl = plating_doc.find(source_impl_uri) if source_impl_uri else None + source_impl = doc_ref.find(source_impl_uri) if source_impl_uri else None strain_module_uri = entry.get("strain_module_uri") if strain_module_uri is None and source_impl is not None: strain_module_uri = getattr(source_impl, "built", None) @@ -612,65 +528,31 @@ def plating( slug = parsed.path.split("/")[-1] if parsed.path else display_source slug = slug.replace("#", "_").replace(":", "_") - plated_module_id = f"{slug}_plated_{well}_md" - plated_module = plating_doc.find(plated_module_id) or sbol2.ModuleDefinition( - plated_module_id - ) - plated_module.roles = [ORGANISM_STRAIN] - plated_module.name = f"Plated strain {slug} at {well}" - if strain_module_uri: - plated_module.wasDerivedFrom = strain_module_uri - self._add_if_absent(plating_doc, plated_module) - plated_impl_id = f"{slug}_plated_{well}_impl" - plated_impl = plating_doc.find(plated_impl_id) or sbol2.Implementation( - plated_impl_id - ) - plated_impl.built = plated_module.identity - plated_impl.wasGeneratedBy = plating_activity.identity - if source_impl_uri: - plated_impl.wasDerivedFrom = source_impl_uri - self._add_if_absent(plating_doc, plated_impl) - plated_impls.append(plated_impl.identity) - - usage = sbol2.Usage( - uri=f"{activity_id}_usage_{idx+1}", - entity=source_impl_uri or plated_module.identity, - role=PLATING_ACTIVITY_ROLE, - ) - self._add_if_absent(plating_doc, usage) - current_usages = list(plating_activity.usages) - current_usages.append(usage) - plating_activity.usages = current_usages - - if inventory_enabled: - try: - inventory_plated = make_plated_strain( - uri=plated_impl.identity, - strain_md_uri=strain_module_uri or plated_module.identity, - design_uri=source_impl_uri, - ) - place_in_plate(inventory_plate, inventory_plated, well) - except Exception: - inventory_enabled = False - - plate_map[well] = plated_impl.identity - display_name = plated_module.displayId + plate_map[well] = plated_impl_id + display_name = plated_impl_id bacterium_locations[well] = display_name plate_rows.append( { "well": well, "source_transformed_strain_implementation": source_impl_uri, "strain_module": strain_module_uri, - "plated_strain_implementation": plated_impl.identity, + "plated_strain_implementation": plated_impl_id, "strain_display_name": display_name, } ) + plate_layout_csv = results_path / "plate_layout_dataframe.csv" + with plate_layout_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=list(plate_rows[0].keys()) if plate_rows else ["well"]) + writer.writeheader() + for row in plate_rows: + writer.writerow(row) + plate_map_json_path = write_plate_map_json( results_path / "plate_map.json", { - "plate_implementation": plate_impl.identity, + "plate_implementation": plate_id, "protocol_type": protocol_type, "well_map": plate_rows, }, @@ -685,18 +567,23 @@ def plating( protocol_artifacts: Dict[str, Any] = { "plate_map_json": str(plate_map_json_path), "plate_map_csv": str(plate_map_csv_path), + "plate_layout_dataframe_csv": str(plate_layout_csv), "logs": logs, + "pudu": { + "runner_script": "https://github.com/MyersResearchGroup/PUDU/blob/main/scripts/run_sbol2plating_with_params.py", + "mode": protocol_type, + "advanced_params": advanced_params, + }, } if protocol_type == "manual": md_path = write_manual_plating_protocol( results_path / "manual_plating_protocol.md", - plate_id=plate_impl.displayId, + plate_id=plate_id, plate_rows=plate_rows, advanced_params=advanced_params, ) protocol_artifacts["manual_protocol_markdown"] = str(md_path) - plan.description = f"Manual protocol file: {md_path}" else: script_path = write_plating_protocol_script( results_path / "plating_ot2.py", @@ -704,7 +591,6 @@ def plating( advanced_params=advanced_params, ) protocol_artifacts["ot2_script"] = str(script_path) - plan.description = f"Automated protocol script: {script_path}" try: sim_zip = run_opentrons_script_to_zip( script_path, @@ -719,15 +605,12 @@ def plating( "stage": "plating", "protocol_type": protocol_type, "plate": { - "plate_implementation": plate_impl.identity, + "plate_implementation": plate_id, "plate_map": plate_map, }, - "sbol_artifacts": { - "plating_activity": plating_activity.identity, - "agent": agent.identity, - "plan": plan.identity, - "plate_implementation": plate_impl.identity, - "plated_strain_implementations": plated_impls, + "metadata": { + "plate_rows": plate_rows, + "layout_dataframe_columns": list(plate_rows[0].keys()) if plate_rows else [], }, "json_intermediate": { "plating_data": {"bacterium_locations": bacterium_locations}, @@ -1012,6 +895,388 @@ def _normalize_transformation_inputs( ) return normalized + def _safe_display_id(self, value: str) -> str: + safe_value = re.sub(r"[^A-Za-z0-9_]+", "_", value or "") + return safe_value.strip("_") or "unnamed_design" + + def _serialize_sbol_identity(self, obj_or_uri) -> str: + return getattr(obj_or_uri, "identity", str(obj_or_uri)) + + def _write_json(self, path: Path, payload: dict) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2, sort_keys=True) + return path + + def _status_from_manifest(self, manifest: dict) -> str: + if manifest.get("errors"): + return "completed_with_errors" + if manifest["assembly_lvl1"].get("successful"): + return "completed" + if ( + manifest["assembly_lvl1"].get("failed") + or manifest["domestication"].get("errors") + ): + return "completed_with_errors" + return "failed" + + def _find_missing_parts_for_lvl1( + self, + design: sbol2.ComponentDefinition, + backbone: Plasmid = None, + ) -> list[dict]: + parts = self._extract_design_parts(design) + plasmid_dict = self._construct_plasmid_dict(parts, antibiotic_resistance=AMP) + + missing_parts = [] + for part in parts: + candidates = plasmid_dict.get(part.displayId, []) + if not candidates: + missing_parts.append( + { + "part": part, + "reason": "no implemented plasmid", + } + ) + continue + + try: + if backbone is None: + selected_backbone, _ = self._get_backbone( + plasmid_dict, antibiotic_resistance=KAN + ) + if selected_backbone is None: + missing_parts.append( + { + "part": part, + "reason": "no compatible backbone", + } + ) + else: + compatible = get_compatible_plasmids(plasmid_dict, backbone) + if not compatible: + missing_parts.append( + { + "part": part, + "reason": "no compatible plasmid", + } + ) + except Exception: + missing_parts.append( + { + "part": part, + "reason": "no compatible plasmid", + } + ) + + return missing_parts + + def _index_domestication_products( + self, + products: list[sbol2.ComponentDefinition], + ) -> None: + for product_definition in products: + if self._get_indexed_plasmid(self.indexed_plasmids, product_definition): + continue + try: + indexed = Plasmid(product_definition, None, [], [], self.sbol_doc) + except Exception: + indexed = type( + "IndexedDomesticationPlasmid", + (), + { + "plasmid_definition": product_definition, + "name": product_definition.displayId, + "fusion_sites": [], + "antibiotic_resistance": None, + }, + )() + self.indexed_plasmids.append(indexed) + + def _zip_full_build_results( + self, + source_dir: Path, + zip_path: Path, + overwrite: bool = False, + ) -> Path: + if zip_path.exists(): + if not overwrite: + raise FileExistsError( + f"Zip path already exists and overwrite=False: {zip_path}" + ) + zip_path.unlink() + zip_path.parent.mkdir(parents=True, exist_ok=True) + zip_base = zip_path.with_suffix("") + archive = shutil.make_archive( + str(zip_base), "zip", root_dir=str(source_dir), base_dir="." + ) + return Path(archive) + + def _expand_combinatorial_derivation( + self, + derivation: sbol2.CombinatorialDerivation, + product_name_prefix: str = None, + ) -> list[sbol2.ComponentDefinition]: + master_template = get_or_pull(self.sbol_doc, self.sbh, derivation.masterTemplate) + component_variants = extract_combinatorial_design_parts( + master_template, self.sbol_doc, self.sbol_doc + ) + variant_definitions = enumerate_design_variants(component_variants) + + prefix = product_name_prefix or master_template.displayId + created_variants = [] + + ordered_components = list(master_template.getInSequentialOrder()) + for index, variant_parts in enumerate(variant_definitions, start=1): + variant_display_id = f"{self._safe_display_id(prefix)}_variant_{index:03d}" + variant_design = self.sbol_doc.find(variant_display_id) or sbol2.ComponentDefinition( + variant_display_id + ) + variant_design.types = list(master_template.types) + variant_design.roles = list(master_template.roles) + variant_design.wasDerivedFrom = derivation.identity + self._add_if_absent(self.sbol_doc, variant_design) + + if len(variant_design.components) == 0: + for comp_index, component in enumerate(ordered_components): + part_def = variant_parts[comp_index] + variant_component = variant_design.components.create( + f"{variant_display_id}_component_{comp_index+1:03d}" + ) + variant_component.definition = part_def.identity + try: + variant_component.access = component.access + except Exception: + pass + try: + variant_component.direction = component.direction + except Exception: + pass + created_variants.append(variant_design) + + return created_variants + + def _normalize_full_build_designs(self, designs) -> list[sbol2.ComponentDefinition]: + if isinstance(designs, sbol2.ComponentDefinition): + return [designs] + if isinstance(designs, sbol2.CombinatorialDerivation): + return self._expand_combinatorial_derivation(designs) + if isinstance(designs, list): + if all(isinstance(design, sbol2.ComponentDefinition) for design in designs): + return designs + raise TypeError("designs list must contain only ComponentDefinition objects.") + raise TypeError( + "designs must be a ComponentDefinition, list[ComponentDefinition], or CombinatorialDerivation." + ) + + def full_build( + self, + designs, + results_dir, + chassis_name: str = "E_coli_DH5alpha", + protocol_type: str = "manual", + transformation_params: dict | None = None, + plating_params: dict | None = None, + product_name_prefix: str | None = None, + overwrite: bool = False, + ) -> dict: + transformation_params = transformation_params or {} + plating_params = plating_params or {} + results_path = Path(results_dir) + results_path.mkdir(parents=True, exist_ok=True) + + input_type = type(designs).__name__ + normalized_designs = self._normalize_full_build_designs(designs) + manifest = { + "stage": "full_build", + "inputs": { + "input_type": input_type, + "design_count": len(normalized_designs), + "chassis_name": chassis_name, + "protocol_type": protocol_type, + "product_name_prefix": product_name_prefix, + }, + "domestication": { + "missing_parts": [], + "products": [], + "transformation": {}, + "plating": {}, + "errors": [], + }, + "assembly_lvl1": {"successful": [], "failed": []}, + "transformation": {"assembly_products": {}}, + "plating": {"assembly_products": {}}, + "skipped": [ + { + "stage": "assembly_lvl2", + "status": "skipped", + "reason": "assembly_lvl2 is not implemented yet", + } + ], + "errors": [], + } + + per_design_missing = {} + unique_missing = {} + for design in normalized_designs: + missing_items = self._find_missing_parts_for_lvl1(design) + serialized_missing = [] + for item in missing_items: + part = item["part"] + entry = { + "part_identity": self._serialize_sbol_identity(part), + "part_display_id": part.displayId, + "reason": item["reason"], + } + serialized_missing.append(entry) + unique_missing.setdefault(part.identity, {"part": part, "reason": item["reason"]}) + per_design_missing[design.identity] = serialized_missing + + manifest["domestication"]["missing_parts"] = list(unique_missing.values()) + if unique_missing: + manifest["domestication"]["missing_parts"] = [ + { + "part_identity": self._serialize_sbol_identity(item["part"]), + "part_display_id": item["part"].displayId, + "reason": item["reason"], + } + for item in unique_missing.values() + ] + unique_missing_parts = [item["part"] for item in unique_missing.values()] + try: + domesticated_products = self.domestication(unique_missing_parts) + self._index_domestication_products(domesticated_products) + manifest["domestication"]["products"] = [ + self._serialize_sbol_identity(product) + for product in domesticated_products + ] + try: + domestication_transformation = self.transformation( + domesticated_products, + chassis_name=chassis_name, + transformation_doc=self.sbol_doc, + **transformation_params, + ) + manifest["domestication"]["transformation"] = domestication_transformation + except Exception as exc: + manifest["domestication"]["errors"].append( + f"Domestication transformation failed: {exc}" + ) + domestication_transformation = None + + if domestication_transformation: + try: + domestication_plating = self.plating( + transformation_results=domestication_transformation, + results_dir=results_path / "domestication" / "plating", + protocol_type=protocol_type, + advanced_params=plating_params, + plating_doc=self.sbol_doc, + overwrite=overwrite, + ) + manifest["domestication"]["plating"] = domestication_plating + except Exception as exc: + manifest["domestication"]["errors"].append( + f"Domestication plating failed: {exc}" + ) + except Exception as exc: + manifest["domestication"]["errors"].append(f"Domestication failed: {exc}") + manifest["errors"].append(f"Domestication failed: {exc}") + + for index, design in enumerate(normalized_designs, start=1): + design_slug = self._safe_display_id(design.displayId or f"design_{index:03d}") + stable_product_name = ( + f"{self._safe_display_id(product_name_prefix)}_{design_slug}_{index:03d}" + if product_name_prefix + else f"{design_slug}_{index:03d}" + ) + try: + assembly_products = self.assembly_lvl1( + abstract_design=design, + product_name=stable_product_name, + ) + product_ids = [ + self._serialize_sbol_identity( + product.plasmid_definition if isinstance(product, Plasmid) else product + ) + for product in assembly_products + ] + + assembly_transformation = self.transformation( + assembly_products, + chassis_name=chassis_name, + transformation_doc=self.sbol_doc, + **transformation_params, + ) + assembly_plating = self.plating( + transformation_results=assembly_transformation, + results_dir=results_path / "assembly_lvl1" / design_slug / "plating", + protocol_type=protocol_type, + advanced_params=plating_params, + plating_doc=self.sbol_doc, + overwrite=overwrite, + ) + + manifest["assembly_lvl1"]["successful"].append( + { + "design_identity": design.identity, + "design_display_id": design.displayId, + "assembly_product_identities": product_ids, + } + ) + manifest["transformation"]["assembly_products"][design_slug] = assembly_transformation + manifest["plating"]["assembly_products"][design_slug] = assembly_plating + except Exception as exc: + failure_entry = { + "design_identity": design.identity, + "design_display_id": design.displayId, + "error": str(exc), + "missing_parts": per_design_missing.get(design.identity, []), + } + manifest["assembly_lvl1"]["failed"].append(failure_entry) + manifest["errors"].append( + f"Assembly failed for {design.displayId}: {exc}" + ) + + sbol_path = results_path / "sbol" / "full_build.xml" + try: + sbol_path.parent.mkdir(parents=True, exist_ok=True) + self.sbol_doc.write(str(sbol_path)) + except Exception as exc: + manifest["errors"].append(f"SBOL write failed: {exc}") + + manifest_path = self._write_json(results_path / "full_build_manifest.json", manifest) + zip_path = results_path / "full_build_results.zip" + try: + zip_result = self._zip_full_build_results( + source_dir=results_path, + zip_path=zip_path, + overwrite=overwrite, + ) + except Exception as exc: + manifest["errors"].append(f"Result packaging failed: {exc}") + self._write_json(manifest_path, manifest) + zip_result = zip_path + + status = self._status_from_manifest(manifest) + result = { + "stage": "full_build", + "status": status, + "results_dir": str(results_path), + "zip_path": str(zip_result), + "manifest_path": str(manifest_path), + "sbol_path": str(sbol_path), + "inputs": manifest["inputs"], + "domestication": manifest["domestication"], + "assembly_lvl1": manifest["assembly_lvl1"], + "transformation": manifest["transformation"], + "plating": manifest["plating"], + "skipped": manifest["skipped"], + "errors": manifest["errors"], + } + self._write_json(manifest_path, manifest) + return result + def _get_or_create_chassis( self, doc: sbol2.Document, chassis_name: str ) -> tuple[sbol2.ModuleDefinition, sbol2.Implementation]: diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 4298c18..92aa36d 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -142,6 +142,125 @@ def initialize_assembly_activity(self): return activity +class Transformation: + """Create SBOL transformation records from plasmid/chassis inputs. + + Mirrors the orchestration pattern used by :class:`Assembly`, but for + chemical transformation into a chassis strain. + """ + + def __init__( + self, + plasmids: List[Plasmid], + chassis_name: str, + source_document: sbol2.Document, + ): + self.plasmids = plasmids + self.chassis_name = chassis_name + self.source_document = source_document + + def _add_if_absent(self, obj): + if self.source_document.find(obj.identity) is None: + self.source_document.add(obj) + + def _get_or_create_chassis(self) -> tuple[sbol2.ModuleDefinition, sbol2.Implementation]: + chassis_module = self.source_document.find(self.chassis_name) or sbol2.ModuleDefinition( + self.chassis_name + ) + chassis_module.name = self.chassis_name + self._add_if_absent(chassis_module) + + chassis_impl_id = f"{self.chassis_name}_impl" + chassis_impl = self.source_document.find(chassis_impl_id) or sbol2.Implementation( + chassis_impl_id + ) + chassis_impl.built = chassis_module.identity + self._add_if_absent(chassis_impl) + return chassis_module, chassis_impl + + def chemical_transformation(self) -> list[dict]: + chassis_module, chassis_impl = self._get_or_create_chassis() + outputs = [] + + for index, plasmid in enumerate(self.plasmids, start=1): + plasmid_def = plasmid.plasmid_definition + plasmid_impl = ( + plasmid.plasmid_implementations[0] + if plasmid.plasmid_implementations + else None + ) + if plasmid_impl is None: + plasmid_impl_id = f"{plasmid_def.displayId}_impl" + plasmid_impl = self.source_document.find(plasmid_impl_id) or sbol2.Implementation( + plasmid_impl_id + ) + plasmid_impl.built = plasmid_def.identity + self._add_if_absent(plasmid_impl) + + transform_id = f"transform_{plasmid_def.displayId}_{index}" + transformation_activity = sbol2.Activity(transform_id) + transformation_activity.name = ( + f"Transform {self.chassis_name} with {plasmid_def.displayId}" + ) + transformation_activity.types = "http://sbols.org/v2#build" + + chassis_usage = sbol2.Usage( + uri=f"{transform_id}_chassis_usage", + entity=chassis_impl.identity, + role="http://sbols.org/v2#build", + ) + plasmid_usage = sbol2.Usage( + uri=f"{transform_id}_plasmid_usage", + entity=plasmid_impl.identity, + role="http://sbols.org/v2#build", + ) + transformation_activity.usages = [chassis_usage, plasmid_usage] + + transformed_strain = sbol2.ModuleDefinition( + f"{self.chassis_name}_with_{plasmid_def.displayId}" + ) + transformed_strain.name = ( + f"{self.chassis_name} transformed with {plasmid_def.displayId}" + ) + chassis_module_ref = sbol2.Module( + uri=f"{transformed_strain.displayId}_chassis_module" + ) + chassis_module_ref.definition = chassis_module.identity + plasmid_fc = sbol2.FunctionalComponent( + uri=f"{transformed_strain.displayId}_plasmid_fc" + ) + plasmid_fc.definition = plasmid_def.identity + transformed_strain.modules = [chassis_module_ref] + transformed_strain.functionalComponents = [plasmid_fc] + + transformed_impl = sbol2.Implementation( + f"{transformed_strain.displayId}_impl" + ) + transformed_impl.built = transformed_strain.identity + transformed_impl.wasGeneratedBy = transformation_activity.identity + + for obj in ( + transformation_activity, + chassis_usage, + plasmid_usage, + transformed_strain, + chassis_module_ref, + plasmid_fc, + transformed_impl, + ): + self._add_if_absent(obj) + + outputs.append( + { + "transformation_activity": transformation_activity.identity, + "transformed_strain_module": transformed_strain.identity, + "transformed_strain_implementation": transformed_impl.identity, + } + ) + + return outputs + + def rebase_restriction_enzyme(name: str, **kwargs) -> sbol2.ComponentDefinition: """Creates an ComponentDefinition Restriction Enzyme Component from rebase. diff --git a/tests/test_full_build.py b/tests/test_full_build.py new file mode 100644 index 0000000..eb3a532 --- /dev/null +++ b/tests/test_full_build.py @@ -0,0 +1,178 @@ +import os +import sys +import tempfile +import unittest +import zipfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import sbol2 + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) + +from buildcompiler.buildcompiler import BuildCompiler +from buildcompiler.constants import ENGINEERED_PLASMID + + +class TestFullBuild(unittest.TestCase): + def setUp(self): + self.doc = sbol2.Document() + self.compiler = BuildCompiler( + collections=[], + sbh_registry="https://synbiohub.org", + auth_token="", + sbol_doc=self.doc, + ) + + def _make_part(self, display_id: str) -> sbol2.ComponentDefinition: + part = sbol2.ComponentDefinition(display_id) + self.doc.add(part) + return part + + def _get_by_display_id(self, display_id: str): + for obj in self.doc.componentDefinitions: + if obj.displayId == display_id: + return obj + return None + + def _make_design(self, display_id: str, part_ids: list[str]) -> sbol2.ComponentDefinition: + design = sbol2.ComponentDefinition(display_id) + self.doc.add(design) + created_components = [] + for index, part_id in enumerate(part_ids, start=1): + part = self._get_by_display_id(part_id) or self._make_part(part_id) + comp = design.components.create(f"{display_id}_c{index}") + comp.definition = part.identity + created_components.append(comp) + for index in range(len(created_components) - 1): + sc = design.sequenceConstraints.create(f"{display_id}_sc{index+1}") + sc.subject = created_components[index].identity + sc.object = created_components[index + 1].identity + sc.restriction = sbol2.SBOL_RESTRICTION_PRECEDES + return design + + def _make_plasmid(self, display_id: str) -> sbol2.ComponentDefinition: + plasmid = sbol2.ComponentDefinition(display_id) + plasmid.roles = [ENGINEERED_PLASMID] + self.doc.add(plasmid) + return plasmid + + def test_normalize_full_build_designs_input_shapes(self): + d1 = self._make_design("design_a", ["part_a"]) + d2 = self._make_design("design_b", ["part_b"]) + + self.assertEqual(self.compiler._normalize_full_build_designs(d1), [d1]) + self.assertEqual(self.compiler._normalize_full_build_designs([d1, d2]), [d1, d2]) + + derivation = sbol2.CombinatorialDerivation("combo") + self.doc.add(derivation) + with patch.object(self.compiler, "_expand_combinatorial_derivation", return_value=[d1]) as mock_expand: + normalized = self.compiler._normalize_full_build_designs(derivation) + self.assertEqual(normalized, [d1]) + mock_expand.assert_called_once_with(derivation) + + def test_expand_combinatorial_derivation_creates_deterministic_variants(self): + p1 = self._make_part("p1") + p2 = self._make_part("p2") + p3 = self._make_part("p3") + + template = self._make_design("master", ["p1", "p2"]) + derivation = sbol2.CombinatorialDerivation("combo_2") + derivation.masterTemplate = template.identity + + with patch("buildcompiler.buildcompiler.get_or_pull", return_value=template), patch( + "buildcompiler.buildcompiler.extract_combinatorial_design_parts", return_value={"a": [p1, p2], "b": [p3]} + ), patch( + "buildcompiler.buildcompiler.enumerate_design_variants", + return_value=[[p1, p3], [p2, p3]], + ): + variants = self.compiler._expand_combinatorial_derivation(derivation, product_name_prefix="combo") + + self.assertEqual([v.displayId for v in variants], ["combo_variant_001", "combo_variant_002"]) + self.assertIsNotNone(self._get_by_display_id("combo_variant_001")) + self.assertIsNotNone(self._get_by_display_id("combo_variant_002")) + + def test_find_missing_parts_reports_missing_and_present(self): + design = self._make_design("design_missing", ["part_x", "part_y"]) + part_x = self._get_by_display_id("part_x") + part_y = self._get_by_display_id("part_y") + + with patch.object(self.compiler, "_extract_design_parts", return_value=[part_x, part_y]), patch.object( + self.compiler, + "_construct_plasmid_dict", + return_value={"part_x": [object()], "part_y": []}, + ), patch.object(self.compiler, "_get_backbone", return_value=(object(), [object()])): + missing = self.compiler._find_missing_parts_for_lvl1(design) + + self.assertEqual(len(missing), 1) + self.assertEqual(missing[0]["part"].displayId, "part_y") + self.assertEqual(missing[0]["reason"], "no implemented plasmid") + + def test_full_build_orchestration_and_stage_skip(self): + design_a = self._make_design("dA", ["pa"]) + design_b = self._make_design("dB", ["pb"]) + missing_part = self._make_part("missing_part") + domesticated = self._make_plasmid("domesticated_missing_part") + assembled = self._make_plasmid("assembled_dA") + + with tempfile.TemporaryDirectory() as tmpdir, patch.object( + self.compiler, "_normalize_full_build_designs", return_value=[design_a, design_b] + ), patch.object( + self.compiler, + "_find_missing_parts_for_lvl1", + side_effect=[ + [{"part": missing_part, "reason": "no implemented plasmid"}], + [{"part": missing_part, "reason": "no implemented plasmid"}], + ], + ), patch.object(self.compiler, "domestication", return_value=[domesticated]) as mock_dom, patch.object( + self.compiler, "transformation", return_value={"stage": "transformation", "sbol_artifacts": []} + ) as mock_tx, patch.object( + self.compiler, "plating", return_value={"stage": "plating"} + ) as mock_plating, patch.object( + self.compiler, + "assembly_lvl1", + side_effect=[[assembled], RuntimeError("assembly fail")], + ) as mock_asm: + result = self.compiler.full_build(designs=[design_a, design_b], results_dir=tmpdir, overwrite=True) + + mock_dom.assert_called_once() + self.assertEqual(mock_asm.call_count, 2) + self.assertGreaterEqual(mock_tx.call_count, 2) + self.assertGreaterEqual(mock_plating.call_count, 2) + self.assertEqual(result["skipped"][0]["stage"], "assembly_lvl2") + self.assertEqual(len(result["assembly_lvl1"]["successful"]), 1) + self.assertEqual(len(result["assembly_lvl1"]["failed"]), 1) + + def test_full_build_writes_manifest_and_zip_and_return_shape(self): + design = self._make_design("d_main", ["p_main"]) + assembled = self._make_plasmid("assembled_main") + + with tempfile.TemporaryDirectory() as tmpdir, patch.object( + self.compiler, "_find_missing_parts_for_lvl1", return_value=[] + ), patch.object( + self.compiler, "assembly_lvl1", return_value=[assembled] + ), patch.object( + self.compiler, "transformation", return_value={"stage": "transformation", "sbol_artifacts": []} + ), patch.object( + self.compiler, "plating", return_value={"stage": "plating"} + ): + result = self.compiler.full_build(designs=[design], results_dir=Path(tmpdir) / "run", overwrite=True) + + manifest_path = Path(result["manifest_path"]) + zip_path = Path(result["zip_path"]) + + self.assertTrue(manifest_path.exists()) + self.assertTrue(zip_path.exists()) + self.assertIn("domestication", result) + self.assertIn("assembly_lvl1", result) + self.assertIn("transformation", result) + self.assertIn("plating", result) + self.assertIn("skipped", result) + + with zipfile.ZipFile(zip_path, "r") as archive: + names = archive.namelist() + self.assertIn("full_build_manifest.json", names) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_plating.py b/tests/test_plating.py index e0c1684..9989538 100644 --- a/tests/test_plating.py +++ b/tests/test_plating.py @@ -100,11 +100,10 @@ def test_plating_manual_outputs_and_provenance(self): ) self.assertTrue(Path(result["protocol_artifacts"]["plate_map_json"]).exists()) self.assertTrue(Path(result["protocol_artifacts"]["plate_map_csv"]).exists()) - - activity = self.doc.find(result["sbol_artifacts"]["plating_activity"]) - self.assertIsNotNone(activity) - self.assertEqual(len(activity.usages), 2) - self.assertTrue(len(activity.associations) >= 1) + self.assertTrue( + Path(result["protocol_artifacts"]["plate_layout_dataframe_csv"]).exists() + ) + self.assertIn("pudu", result["protocol_artifacts"]) @patch("buildcompiler.robotutils.subprocess.run") def test_plating_automated_script_and_sim_zip(self, mock_run):