From 869b543df5af04d19fac8f3eaf7c5a8d56e521df Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 13:19:52 -0600 Subject: [PATCH 1/7] Add SynBioHub tutorial notebook and tighten digest simulation --- README.md | 21 ++++ ...ler_synbiohub_authenticated_tutorial.ipynb | 106 ++++++++++++++++++ src/buildcompiler/api/compiler.py | 60 +++++++++- src/buildcompiler/execution/executor.py | 3 +- src/buildcompiler/sbol/__init__.py | 2 + src/buildcompiler/sbol/repository.py | 56 +++++++++ src/buildcompiler/sbol2build.py | 62 +++++----- tests/unit/api/test_compiler_api.py | 48 +++++++- tests/unit/sbol/test_repository.py | 85 ++++++++++++++ 9 files changed, 411 insertions(+), 32 deletions(-) create mode 100644 notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb create mode 100644 src/buildcompiler/sbol/repository.py create mode 100644 tests/unit/sbol/test_repository.py diff --git a/README.md b/README.md index addf87a..5e7c353 100644 --- a/README.md +++ b/README.md @@ -163,6 +163,27 @@ plan = compiler.plan(abstract_designs) result = compiler.execute(plan) ``` + +Authenticated and anonymous repository access are both supported: + +```python +compiler = BuildCompiler.from_synbiohub( + collections=["https://synbiohub.org/public/igem/igem_collection/1"], + repository_url="https://synbiohub.org", + auth_token="", +) + +compiler = BuildCompiler.from_synbiohub( + repository_url="https://synbiohub.org", + email="user@example.org", + password="", +) + +compiler = BuildCompiler.from_synbiohub( + repository_url="https://synbiohub.org", +) +``` + A convenience wrapper may exist: ```python diff --git a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb new file mode 100644 index 0000000..83fe8a6 --- /dev/null +++ b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BuildCompiler SynBioHub Authenticated Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial shows how to authenticate to SynBioHub with email/password, load inventory collections into BuildCompiler's internal SBOL document, and prepare an assembly workflow from an abstract design URI." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from getpass import getpass\n", + "import sbol2\n", + "\n", + "from buildcompiler.api import BuildCompiler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "repository_url = 'https://synbiohub.org'\n", + "email = input('SynBioHub user/email: ').strip()\n", + "password = getpass('SynBioHub password: ')\n", + "\n", + "abstract_design_uri = 'https://synbiohub.org/user/Gon/abstract_design/standard_GFP/1'\n", + "collections = [\n", + " 'https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1',\n", + " 'https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1',\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc = sbol2.Document()\n", + "compiler = BuildCompiler.from_synbiohub(\n", + " collections=collections,\n", + " repository_url=repository_url,\n", + " email=email,\n", + " password=password,\n", + " sbol_doc=doc,\n", + ")\n", + "\n", + "print(f'Loaded objects: {len(doc.componentDefinitions)} component definitions, {len(doc.implementations)} implementations')\n", + "print('Repository client:', type(compiler.repository_client).__name__)\n", + "print('In-memory auth token available:', compiler.repository_client.auth_token is not None)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What BuildCompiler does with this setup\n", + "\n", + "- Pulls each collection URI using authenticated `sbol2.PartShop` into `sbol_doc`.\n", + "- Reuses the same authenticated pull client for identity-based resolver misses in planning/execution.\n", + "- Uses inventory indexing to find compatible plasmids containing abstract-design parts and compatible backbones/reagents." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Assembly simulation behavior\n", + "\n", + "Legacy Golden Gate simulation now enforces strict digest validation:\n", + "\n", + "- Restriction digest must yield exactly **2 fragments**.\n", + "- For part plasmids, the smaller fragment is selected as the insert.\n", + "- For the backbone plasmid, the larger fragment is selected as the backbone.\n", + "- If digest count is unexpected, simulation fails with a clear error message naming the reactant.\n", + "- Successful assembly encodes reagent usage and links generated product implementations with `wasGeneratedBy` to one assembly activity per assembled design product." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/src/buildcompiler/api/compiler.py b/src/buildcompiler/api/compiler.py index 2a82db0..58cb23b 100644 --- a/src/buildcompiler/api/compiler.py +++ b/src/buildcompiler/api/compiler.py @@ -6,6 +6,7 @@ from typing import Any from buildcompiler.planning import FullBuildPlanner +from buildcompiler.sbol import PartShopRepositoryClient from .options import BuildOptions @@ -17,6 +18,7 @@ class BuildCompiler: planner: Any = None executor: Any = None adapters: Any = None + repository_client: PartShopRepositoryClient | None = None options: BuildOptions = field(default_factory=BuildOptions) @classmethod @@ -25,16 +27,52 @@ def from_synbiohub( *, collections: list[str] | None = None, sbh_registry: str | None = None, + repository_url: str | None = None, auth_token: str | None = None, + email: str | None = None, + password: str | None = None, sbol_doc: Any = None, options: BuildOptions | None = None, **kwargs: Any, ) -> "BuildCompiler": - if collections: - raise NotImplementedError( - "Automatic SynBioHub collection loading/indexing is not implemented yet. Inject inventory dependencies directly for now." + resolved_repository_url = repository_url or sbh_registry + if auth_token and (email or password): + raise ValueError( + "Specify either auth_token or email/password credentials, not both." ) - return cls(sbol_document=sbol_doc, options=options or BuildOptions(), **kwargs) + if (email and not password) or (password and not email): + raise ValueError("Both email and password are required for login.") + + needs_repository = bool(collections) or bool(auth_token) or bool(email) or bool(password) + if needs_repository and not resolved_repository_url: + raise ValueError("repository_url (or sbh_registry) is required for repository access.") + + document = sbol_doc + if document is None: + import sbol2 + + document = sbol2.Document() + + repository_client = None + if resolved_repository_url: + repository_client = PartShopRepositoryClient( + repository_url=resolved_repository_url, + document=document, + auth_token=auth_token, + email=email, + password=password, + ) + + if collections and repository_client is not None: + for identity in collections: + repository_client.pull_identity(identity) + + return cls( + sbol_document=document, + repository_client=repository_client, + options=options or BuildOptions(), + **kwargs, + ) def plan(self, abstract_designs: Any, options: BuildOptions | None = None) -> Any: effective_options = options or self.options @@ -60,6 +98,11 @@ def execute(self, plan: Any, options: BuildOptions | None = None) -> Any: sbol_document=self.sbol_document, options=effective_options, adapters=self.adapters, + pull_client=( + self.repository_client.pull_identity + if self.repository_client is not None + else None + ), ) return executor.execute(plan, options=effective_options) @@ -82,6 +125,9 @@ def full_build( collections: list[str] | None = None, sbh_registry: str | None = None, auth_token: str | None = None, + email: str | None = None, + password: str | None = None, + repository_url: str | None = None, sbol_doc: Any = None, **kwargs: Any, ) -> Any: @@ -90,12 +136,18 @@ def full_build( collections is not None or sbh_registry is not None or auth_token is not None + or email is not None + or password is not None + or repository_url is not None or sbol_doc is not None ): compiler = BuildCompiler.from_synbiohub( collections=collections, sbh_registry=sbh_registry, + repository_url=repository_url, auth_token=auth_token, + email=email, + password=password, sbol_doc=sbol_doc, options=compiler_options, inventory=inventory, diff --git a/src/buildcompiler/execution/executor.py b/src/buildcompiler/execution/executor.py index 4a1a064..02ea5eb 100644 --- a/src/buildcompiler/execution/executor.py +++ b/src/buildcompiler/execution/executor.py @@ -63,9 +63,10 @@ def from_dependencies( adapters: Any = None, graph: Any = None, logger: Any = None, + pull_client: Any = None, **stage_overrides: Any, ) -> "FullBuildExecutor": - resolver = SbolResolver(sbol_document) + resolver = SbolResolver(sbol_document, pull_client=pull_client) return cls( context=BuildContext( sbol=resolver, diff --git a/src/buildcompiler/sbol/__init__.py b/src/buildcompiler/sbol/__init__.py index 7539882..de1c9c9 100644 --- a/src/buildcompiler/sbol/__init__.py +++ b/src/buildcompiler/sbol/__init__.py @@ -2,6 +2,7 @@ from .assembly import AssemblyJob, AssemblySbolResult, AssemblyService from .domestication import DomesticationJob, DomesticationSbolResult, DomesticationService +from .repository import PartShopRepositoryClient from .resolver import PullPolicy, SbolResolver __all__ = [ @@ -11,6 +12,7 @@ "DomesticationJob", "DomesticationSbolResult", "DomesticationService", + "PartShopRepositoryClient", "PullPolicy", "SbolResolver", ] diff --git a/src/buildcompiler/sbol/repository.py b/src/buildcompiler/sbol/repository.py new file mode 100644 index 0000000..d734f56 --- /dev/null +++ b/src/buildcompiler/sbol/repository.py @@ -0,0 +1,56 @@ +"""SynBioHub repository client adapters.""" + +from __future__ import annotations + +from typing import Any + +import sbol2 + + +class PartShopRepositoryClient: + """Thin adapter around ``sbol2.PartShop`` for identity-based pulls.""" + + def __init__( + self, + repository_url: str, + document: sbol2.Document, + auth_token: str | None = None, + email: str | None = None, + password: str | None = None, + part_shop: Any | None = None, + ) -> None: + if not repository_url: + raise ValueError("repository_url is required") + if auth_token and (email or password): + raise ValueError( + "Specify either auth_token or email/password credentials, not both." + ) + if (email and not password) or (password and not email): + raise ValueError("Both email and password are required for login.") + + self.repository_url = repository_url + self.document = document + self._auth_token: str | None = None + self.part_shop = part_shop or sbol2.PartShop(repository_url) + + if auth_token is not None: + self.part_shop.key = auth_token + self._auth_token = auth_token + elif email and password: + self.part_shop.login(email, password) + self._auth_token = self.part_shop.getKey() + + @property + def auth_token(self) -> str | None: + return self._auth_token + + def pull_identity(self, identity: str) -> object | None: + self.part_shop.pull(identity, self.document, recursive=True) + return self.document.find(identity) + + def __repr__(self) -> str: + token_state = "set" if self._auth_token else "unset" + return ( + "PartShopRepositoryClient(" + f"repository_url={self.repository_url!r}, auth_token={token_state})" + ) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 92aa36d..49151f1 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -26,6 +26,33 @@ sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False) +def _select_expected_digest_fragment( + *, + digested_reactant, + reactant_component_definition: sbol2.ComponentDefinition, + expected_role: str, +): + """Validate digest count and select expected fragment. + + Golden Gate simulation expects exactly two fragments after digest: + smaller insert + larger backbone. + """ + fragment_count = len(digested_reactant) + if fragment_count != 2: + raise ValueError( + "Golden Gate simulation failed for " + f"{reactant_component_definition.displayId}: expected exactly 2 digestion " + f"fragments and found {fragment_count}. Check sequence/restriction sites." + ) + + smaller, larger = sorted(digested_reactant, key=len) + if expected_role == "insert": + return smaller + if expected_role == "backbone": + return larger + raise ValueError(f"Unknown expected digest role: {expected_role}") + + class Assembly: """Creates an Assembly Plan. @@ -522,18 +549,11 @@ def part_digestion( ds_reactant = Dseqrecord(reactant_seq, circular=circular) digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) - if len(digested_reactant) < 2 or len(digested_reactant) > 3: - raise ValueError( - f"Not supported number of products. Found{len(digested_reactant)}" - ) - elif circular and len(digested_reactant) == 2: - part_extract, _ = sorted(digested_reactant, key=len) - elif linear and len(digested_reactant) == 3: - _, part_extract, _ = digested_reactant - else: - raise ValueError( - f"Reactant {reactant_component_definition.displayId} has no valid topology type, with {len(digested_reactant)} digested products, types: {reactant_component_definition.types}, and roles: {reactant_component_definition.roles}" - ) + part_extract = _select_expected_digest_fragment( + digested_reactant=digested_reactant, + reactant_component_definition=reactant_component_definition, + expected_role="insert", + ) # Compute the length of single strand sticky ends or fusion sites product_5_prime_ss_strand, product_5_prime_ss_end = ( @@ -745,19 +765,11 @@ def backbone_digestion( ds_reactant = Dseqrecord(reactant_seq, circular=circular) digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) - if len(digested_reactant) < 2 or len(digested_reactant) > 3: - raise ValueError( - f"Not supported number of products. Found: {len(digested_reactant)}" - ) - # TODO select them based on content rather than size. - elif circular and len(digested_reactant) == 2: - _, backbone = sorted(digested_reactant, key=len) - elif linear and len(digested_reactant) == 3: - prefix, part_extract, suffix = digested_reactant - else: - raise ValueError( - f"Reactant {reactant_component_definition.displayId} has no valid topology type, with {len(digested_reactant)} digested products, types: {reactant_component_definition.types}, and roles: {reactant_component_definition.roles}" - ) + backbone = _select_expected_digest_fragment( + digested_reactant=digested_reactant, + reactant_component_definition=reactant_component_definition, + expected_role="backbone", + ) # Compute the length of single strand sticky ends or fusion sites product_5_prime_ss_strand, product_5_prime_ss_end = backbone.seq.five_prime_end() diff --git a/tests/unit/api/test_compiler_api.py b/tests/unit/api/test_compiler_api.py index 648d61e..37476dd 100644 --- a/tests/unit/api/test_compiler_api.py +++ b/tests/unit/api/test_compiler_api.py @@ -1,10 +1,28 @@ import sys import pytest +import sbol2 from buildcompiler.api import BuildCompiler, BuildOptions, full_build +class FakePartShop: + def __init__(self, *_args, **_kwargs): + self.key = None + self.pull_calls = [] + + def pull(self, identity, document, recursive=True): + self.pull_calls.append((identity, recursive)) + if document.find(identity) is None: + document.add(sbol2.ComponentDefinition(identity)) + + def login(self, *_args, **_kwargs): + return None + + def getKey(self): + return "fake-key" + + class FakePlanner: def __init__(self): self.calls = [] @@ -75,11 +93,37 @@ def test_from_synbiohub_placeholder_without_collection_loading(): assert isinstance(compiler, BuildCompiler) -def test_from_synbiohub_raises_when_collection_loading_is_requested(): - with pytest.raises(NotImplementedError, match="collection loading/indexing"): +def test_from_synbiohub_requires_repository_url_for_collections(): + with pytest.raises(ValueError, match="repository_url"): BuildCompiler.from_synbiohub(collections=["https://example.org/collection"]) +def test_from_synbiohub_rejects_mixed_auth_modes(): + with pytest.raises(ValueError, match="auth_token"): + BuildCompiler.from_synbiohub( + repository_url="https://example.org", + auth_token="token", + password="secret", + email="user@example.org", + ) + + +def test_from_synbiohub_pulls_collections_with_authenticated_client(monkeypatch): + monkeypatch.setattr("buildcompiler.sbol.repository.sbol2.PartShop", FakePartShop) + doc = sbol2.Document() + collection = "https://example.org/collection" + + compiler = BuildCompiler.from_synbiohub( + collections=[collection], + repository_url="https://example.org", + auth_token="token", + sbol_doc=doc, + ) + + assert compiler.repository_client is not None + assert compiler.repository_client.part_shop.pull_calls == [(collection, True)] + + def test_execute_raises_clear_error_without_dependencies(): compiler = BuildCompiler() compiler.plan([object()]) diff --git a/tests/unit/sbol/test_repository.py b/tests/unit/sbol/test_repository.py new file mode 100644 index 0000000..c8ee950 --- /dev/null +++ b/tests/unit/sbol/test_repository.py @@ -0,0 +1,85 @@ +import sbol2 + +from buildcompiler.sbol.repository import PartShopRepositoryClient + + +class FakePartShop: + def __init__(self): + self.key = None + self.login_calls = [] + self.pull_calls = [] + self._session_key = "session-key" + + def login(self, email, password): + self.login_calls.append((email, password)) + + def getKey(self): + return self._session_key + + def pull(self, identity, document, recursive=True): + self.pull_calls.append((identity, recursive)) + + +def test_repository_client_anonymous_pull(): + doc = sbol2.Document() + sbol2.setHomespace("https://example.org") + component = sbol2.ComponentDefinition("component") + doc.add(component) + part_shop = FakePartShop() + client = PartShopRepositoryClient( + repository_url="https://example.org", + document=doc, + part_shop=part_shop, + ) + + identity = component.identity + obj = client.pull_identity(identity) + + assert obj is not None + assert part_shop.pull_calls == [(identity, True)] + assert part_shop.login_calls == [] + + +def test_repository_client_uses_auth_token_without_login(): + doc = sbol2.Document() + part_shop = FakePartShop() + client = PartShopRepositoryClient( + repository_url="https://example.org", + document=doc, + auth_token="token-123", + part_shop=part_shop, + ) + + assert part_shop.key == "token-123" + assert client.auth_token == "token-123" + assert part_shop.login_calls == [] + + +def test_repository_client_logs_in_and_reuses_session_key(): + doc = sbol2.Document() + part_shop = FakePartShop() + client = PartShopRepositoryClient( + repository_url="https://example.org", + document=doc, + email="user@example.org", + password="secret", + part_shop=part_shop, + ) + + assert part_shop.login_calls == [("user@example.org", "secret")] + assert client.auth_token == "session-key" + + +def test_repository_client_repr_redacts_secrets(): + doc = sbol2.Document() + part_shop = FakePartShop() + client = PartShopRepositoryClient( + repository_url="https://example.org", + document=doc, + auth_token="token-123", + part_shop=part_shop, + ) + + rendered = repr(client) + assert "token-123" not in rendered + assert "secret" not in rendered From 7a3f767873614dfec016418e8b8bb73889fe6def Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 14:08:13 -0600 Subject: [PATCH 2/7] Enforce single Golden Gate product and expand tutorial workflow --- ...ler_synbiohub_authenticated_tutorial.ipynb | 52 +++++++++++++++++++ src/buildcompiler/sbol/assembly.py | 10 ++++ tests/unit/sbol/test_assembly_service.py | 49 +++++++++++++++++ 3 files changed, 111 insertions(+) diff --git a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb index 83fe8a6..06a74b8 100644 --- a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb +++ b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb @@ -88,6 +88,58 @@ "- If digest count is unexpected, simulation fails with a clear error message naming the reactant.\n", "- Successful assembly encodes reagent usage and links generated product implementations with `wasGeneratedBy` to one assembly activity per assembled design product." ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Pull the abstract design into the same in-memory document\n", + "if doc.find(abstract_design_uri) is None:\n", + " compiler.repository_client.pull_identity(abstract_design_uri)\n", + "\n", + "abstract_design = doc.find(abstract_design_uri)\n", + "print('Abstract design found:', abstract_design is not None, type(abstract_design).__name__)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run full build using the abstract design object\n", + "# Note: this assumes your pulled inventory contains compatible lvl1 part plasmids, backbone, and reagents.\n", + "result = compiler.full_build([abstract_design])\n", + "print('Build status:', result.status)\n", + "print('Stage results:', [(s.stage.value, s.status.value) for s in result.stage_results])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check whether lvl1 assembly was selected\n", + "lvl1 = [s for s in result.stage_results if s.stage.value == 'assembly_lvl1']\n", + "print('Lvl1 stage entries:', len(lvl1))\n", + "if lvl1:\n", + " print('Lvl1 status:', lvl1[0].status.value)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Access the best-practice SBOL build document\n", + "build_doc = result.build_document\n", + "print('Build document objects:', len(build_doc.componentDefinitions), 'CDs,', len(build_doc.implementations), 'implementations,', len(build_doc.activities), 'activities')\n", + "# build_doc.write('build_result.xml')\n" + ] } ], "metadata": { diff --git a/src/buildcompiler/sbol/assembly.py b/src/buildcompiler/sbol/assembly.py index fa877d9..d24c915 100644 --- a/src/buildcompiler/sbol/assembly.py +++ b/src/buildcompiler/sbol/assembly.py @@ -87,6 +87,15 @@ def run(self, job: AssemblyJob) -> AssemblySbolResult: legacy_products, final_doc = legacy_assembly.run( include_extracted_parts=job.include_extracted_parts ) + if len(legacy_products) != 1: + raise ValueError( + "Golden Gate ligation must produce exactly one assembled product; " + f"found {len(legacy_products)} for {job.product_identity}." + ) + if not getattr(legacy_products[0].plasmid_implementations[0], "wasGeneratedBy", None): + legacy_products[0].plasmid_implementations[ + 0 + ].wasGeneratedBy = legacy_assembly.assembly_activity.identity products = [ self._indexed_product_from_legacy_product(plasmid, job) @@ -95,6 +104,7 @@ def run(self, job: AssemblyJob) -> AssemblySbolResult: logs = [ f"Assembled {len(products)} product(s) at stage {job.stage.value}.", f"Assembly activity: {legacy_assembly.assembly_activity.identity}", + "Golden Gate simulation completed with 1 ligation product.", ] return AssemblySbolResult( diff --git a/tests/unit/sbol/test_assembly_service.py b/tests/unit/sbol/test_assembly_service.py index 2ef8475..c934e5b 100644 --- a/tests/unit/sbol/test_assembly_service.py +++ b/tests/unit/sbol/test_assembly_service.py @@ -79,3 +79,52 @@ def test_assembly_service_raises_clear_error_for_missing_component(): target_document=sbol2.Document(), ) ) + + +def test_assembly_service_requires_single_ligation_product(monkeypatch): + source = sbol2.Document() + component = sbol2.ComponentDefinition("assembled_product") + source.add(component) + impl = sbol2.Implementation("assembled_product_impl") + impl.built = component.identity + source.add(impl) + + class FakeLegacyAssembly: + def __init__(self, **kwargs): + self.assembly_activity = sbol2.Activity("fake_assembly") + + def run(self, include_extracted_parts=False): + product = type( + "LegacyProduct", + (), + {"plasmid_definition": component, "plasmid_implementations": [impl]}, + )() + return [product, product], source + + monkeypatch.setattr("buildcompiler.sbol.assembly.Assembly", FakeLegacyAssembly) + + service = AssemblyService() + with pytest.raises(ValueError, match="exactly one assembled product"): + service.run( + AssemblyJob( + stage=BuildStage.ASSEMBLY_LVL1, + product_identity="https://example.org/products/p001", + product_display_id="p001", + part_plasmids=[ + IndexedPlasmid( + identity=component.identity, + sbol_component=component, + metadata={"implementation_identity": impl.identity}, + ) + ], + backbone=IndexedBackbone( + identity=component.identity, + sbol_component=component, + metadata={"implementation_identity": impl.identity}, + ), + restriction_enzyme=IndexedReagent(identity=impl.identity), + ligase=IndexedReagent(identity=impl.identity), + source_document=source, + target_document=sbol2.Document(), + ) + ) From d079d20fdac04aaa67daafcb0f78e95fed200d01 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 14:22:13 -0600 Subject: [PATCH 3/7] Auto-build inventory from SynBioHub collections in from_synbiohub --- src/buildcompiler/api/compiler.py | 97 +++++++++++++++++++++++++++++ tests/unit/api/test_compiler_api.py | 12 ++++ 2 files changed, 109 insertions(+) diff --git a/src/buildcompiler/api/compiler.py b/src/buildcompiler/api/compiler.py index 58cb23b..ffa1709 100644 --- a/src/buildcompiler/api/compiler.py +++ b/src/buildcompiler/api/compiler.py @@ -6,6 +6,8 @@ from typing import Any from buildcompiler.planning import FullBuildPlanner +from buildcompiler.domain import IndexedBackbone, IndexedPlasmid, IndexedReagent +from buildcompiler.inventory import Inventory from buildcompiler.sbol import PartShopRepositoryClient from .options import BuildOptions @@ -67,7 +69,17 @@ def from_synbiohub( for identity in collections: repository_client.pull_identity(identity) + inventory = kwargs.pop("inventory", None) + if inventory is None and collections and resolved_repository_url: + inventory = _inventory_from_collections( + collections=collections, + repository_url=resolved_repository_url, + auth_token=repository_client.auth_token if repository_client else None, + sbol_doc=document, + ) + return cls( + inventory=inventory, sbol_document=document, repository_client=repository_client, options=options or BuildOptions(), @@ -113,6 +125,91 @@ def full_build( return self.execute(plan, options=options) +def _inventory_from_collections( + *, + collections: list[str], + repository_url: str, + auth_token: str | None, + sbol_doc: Any, +) -> Inventory: + from buildcompiler.buildcompiler import BuildCompiler as LegacyBuildCompiler + + legacy = LegacyBuildCompiler( + collections=collections, + sbh_registry=repository_url, + auth_token=auth_token or "", + sbol_doc=sbol_doc, + ) + + plasmids = [] + for entry in legacy.indexed_plasmids: + plasmids.append( + IndexedPlasmid( + identity=entry.plasmid_definition.identity, + display_id=entry.plasmid_definition.displayId, + name=entry.name, + metadata={ + "fusion_sites": list(entry.fusion_sites or []), + "antibiotic": entry.antibiotic_resistance, + "insert_identities": [ + c.definition for c in entry.plasmid_definition.components + ], + "implementation_identity": ( + entry.plasmid_implementations[0].identity + if entry.plasmid_implementations + else None + ), + }, + sbol_component=entry.plasmid_definition, + ) + ) + + backbones = [] + for entry in legacy.indexed_backbones: + backbones.append( + IndexedBackbone( + identity=entry.plasmid_definition.identity, + display_id=entry.plasmid_definition.displayId, + name=entry.name, + metadata={ + "fusion_sites": list(entry.fusion_sites or []), + "antibiotic": entry.antibiotic_resistance, + "implementation_identity": ( + entry.plasmid_implementations[0].identity + if entry.plasmid_implementations + else None + ), + }, + sbol_component=entry.plasmid_definition, + ) + ) + + reagents = [] + for impl in legacy.restriction_enzyme_implementations: + built = sbol_doc.find(impl.built) + reagents.append( + IndexedReagent( + identity=impl.identity, + display_id=impl.displayId, + name=getattr(built, "displayId", None), + reagent_type="restriction_enzyme", + metadata={"implementation_identity": impl.identity}, + ) + ) + for impl in legacy.ligase_implementations: + built = sbol_doc.find(impl.built) + reagents.append( + IndexedReagent( + identity=impl.identity, + display_id=impl.displayId, + name=getattr(built, "displayId", None), + reagent_type="ligase", + metadata={"implementation_identity": impl.identity}, + ) + ) + return Inventory(plasmids=plasmids, backbones=backbones, reagents=reagents) + + def full_build( abstract_designs: Any, *, diff --git a/tests/unit/api/test_compiler_api.py b/tests/unit/api/test_compiler_api.py index 37476dd..f7e129f 100644 --- a/tests/unit/api/test_compiler_api.py +++ b/tests/unit/api/test_compiler_api.py @@ -23,6 +23,14 @@ def getKey(self): return "fake-key" +class FakeLegacyBuildCompiler: + def __init__(self, collections, sbh_registry, auth_token, sbol_doc): + self.indexed_plasmids = [] + self.indexed_backbones = [] + self.restriction_enzyme_implementations = [] + self.ligase_implementations = [] + + class FakePlanner: def __init__(self): self.calls = [] @@ -110,6 +118,9 @@ def test_from_synbiohub_rejects_mixed_auth_modes(): def test_from_synbiohub_pulls_collections_with_authenticated_client(monkeypatch): monkeypatch.setattr("buildcompiler.sbol.repository.sbol2.PartShop", FakePartShop) + monkeypatch.setattr( + "buildcompiler.buildcompiler.BuildCompiler", FakeLegacyBuildCompiler + ) doc = sbol2.Document() collection = "https://example.org/collection" @@ -122,6 +133,7 @@ def test_from_synbiohub_pulls_collections_with_authenticated_client(monkeypatch) assert compiler.repository_client is not None assert compiler.repository_client.part_shop.pull_calls == [(collection, True)] + assert compiler.inventory is not None def test_execute_raises_clear_error_without_dependencies(): From 7626002fb6442b291fff2f5f01b87c8c2ccaabe4 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 14:53:20 -0600 Subject: [PATCH 4/7] Fix lvl1 planning constraints and add notebook debug report cell --- ...iler_synbiohub_authenticated_tutorial.ipynb | 18 ++++++++++++++++++ src/buildcompiler/planning/classifier.py | 10 +++++++++- tests/unit/planning/test_classifier.py | 6 ++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb index 06a74b8..646ea6e 100644 --- a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb +++ b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb @@ -140,6 +140,24 @@ "print('Build document objects:', len(build_doc.componentDefinitions), 'CDs,', len(build_doc.implementations), 'implementations,', len(build_doc.activities), 'activities')\n", "# build_doc.write('build_result.xml')\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Enable detailed reporting and inspect full diagnostic report when builds fail\n", + "from buildcompiler.api import BuildOptions\n", + "\n", + "debug_options = BuildOptions()\n", + "debug_options.reporting.include_detailed_report = True\n", + "debug_result = compiler.full_build([abstract_design], options=debug_options)\n", + "print('Debug status:', debug_result.status)\n", + "print('Missing inputs:', [m.missing_identity for m in debug_result.missing_inputs])\n", + "print('Warnings:', [w.message if hasattr(w, 'message') else str(w) for w in debug_result.warnings])\n", + "debug_result.report\n" + ] } ], "metadata": { diff --git a/src/buildcompiler/planning/classifier.py b/src/buildcompiler/planning/classifier.py index 5d666ae..e94f7b3 100644 --- a/src/buildcompiler/planning/classifier.py +++ b/src/buildcompiler/planning/classifier.py @@ -9,7 +9,7 @@ from buildcompiler.domain import BuildRequest, BuildStage, DesignKind from buildcompiler.planning.models import UnsupportedPlanningRecord -from buildcompiler.planning.validation import classify_part_role +from buildcompiler.planning.validation import classify_part_role, ordered_lvl1_parts RECOMMENDED_LVL1_PARTS = ("promoter", "rbs", "cds", "terminator") @@ -85,6 +85,14 @@ def classify_non_combinatorial( design.identity, design.displayId, DesignKind.COMPONENT_DEFINITION, + constraints={ + "ordered_part_identities": ( + ordered_lvl1_parts(design)[0] + or [component.definition for component in design.components] + ), + "product_identity": design.identity, + "product_display_id": design.displayId, + }, ) if count <= 1 and classify_part_role(design) is not None: return BuildRequest( diff --git a/tests/unit/planning/test_classifier.py b/tests/unit/planning/test_classifier.py index 961e915..f42af4d 100644 --- a/tests/unit/planning/test_classifier.py +++ b/tests/unit/planning/test_classifier.py @@ -26,6 +26,12 @@ def test_classifier_maps_module_and_components(): er.components.create("c4").definition = t.identity out2 = classify_non_combinatorial(er) assert out2.stage == BuildStage.ASSEMBLY_LVL1 + assert out2.constraints["ordered_part_identities"] == [ + p.identity, + r.identity, + c.identity, + t.identity, + ] def test_classifier_warns_for_invalid_lvl1_part_mix(): From 08894d7d82f305902d8fc66b4257cc76c4b2a36c Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 15:24:46 -0600 Subject: [PATCH 5/7] Infer lvl1 backbone from part fusion sites and antibiotic profile --- src/buildcompiler/inventory/selector.py | 44 +++++++++++++++++++++++-- tests/unit/inventory/test_selector.py | 23 ++++++++++++- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/buildcompiler/inventory/selector.py b/src/buildcompiler/inventory/selector.py index 1b712e9..84f7dbc 100644 --- a/src/buildcompiler/inventory/selector.py +++ b/src/buildcompiler/inventory/selector.py @@ -74,11 +74,32 @@ def select_lvl1_route(self, *, request_id: str, part_identities: Sequence[str], else: selected.append(choice) + inferred_fusion_sites = None + inferred_backbone_antibiotic = active_constraints.get("backbone_antibiotic") + if selected and "fusion_sites" not in active_constraints: + inferred_fusion_sites = self._infer_backbone_fusion_sites(selected) + if inferred_backbone_antibiotic is None: + inferred_backbone_antibiotic = self._infer_backbone_antibiotic(selected) + backbone = self.inventory.find_backbone( - fusion_sites=tuple(active_constraints["fusion_sites"]) if "fusion_sites" in active_constraints else None, - antibiotic=active_constraints.get("antibiotic"), + fusion_sites=( + tuple(active_constraints["fusion_sites"]) + if "fusion_sites" in active_constraints + else inferred_fusion_sites + ), + antibiotic=inferred_backbone_antibiotic, stage=BuildStage.ASSEMBLY_LVL1, ) + if backbone is None: + backbone = self.inventory.find_backbone( + fusion_sites=( + tuple(active_constraints["fusion_sites"]) + if "fusion_sites" in active_constraints + else inferred_fusion_sites + ), + antibiotic=inferred_backbone_antibiotic, + stage=None, + ) score = RouteScore( missing_required_products=len(missing), missing_domestications=len(missing), @@ -91,6 +112,25 @@ def select_lvl1_route(self, *, request_id: str, part_identities: Sequence[str], route = Lvl1Route(request_id, tuple(part_identities), tuple(selected), tuple(missing), backbone, score) return RouteSelection(selected=route, rejected=()) + def _infer_backbone_fusion_sites(self, selected: list[Any]) -> tuple[str, ...] | None: + if len(selected) < 2: + return None + parsed = [] + for plasmid in selected: + sites = tuple((plasmid.metadata or {}).get("fusion_sites", ())) + if len(sites) != 2: + return None + parsed.append(sites) + first = parsed[0][0] + last = parsed[-1][1] + return (first, last) + + def _infer_backbone_antibiotic(self, selected: list[Any]) -> str | None: + antibiotics = {p.metadata.get("antibiotic") for p in selected if p.metadata.get("antibiotic")} + if antibiotics == {"Ampicillin"}: + return "Kanamycin" + return None + def select_lvl2_route(self, *, request_id: str, region_identities: Sequence[str], constraints: Mapping[str, Any] | None = None) -> RouteSelection: active_constraints = constraints or {} max_regions = self.options.planning.lvl2_search.max_exhaustive_region_count diff --git a/tests/unit/inventory/test_selector.py b/tests/unit/inventory/test_selector.py index fe4c03c..dc47a86 100644 --- a/tests/unit/inventory/test_selector.py +++ b/tests/unit/inventory/test_selector.py @@ -1,5 +1,5 @@ from buildcompiler.api import BuildOptions -from buildcompiler.domain import IndexedPlasmid, MaterialState +from buildcompiler.domain import IndexedBackbone, IndexedPlasmid, MaterialState from buildcompiler.inventory import CompatibilitySelector, Inventory @@ -71,3 +71,24 @@ def test_lvl2_constrained_order_must_match_requested_regions(): assert out.selected is None assert out.rejected assert out.rejected[0].missing_region_identities == ("https://e/r0", "https://e/r1") + + +def test_lvl1_infers_backbone_fusion_sites_and_kan_backbone_for_amp_parts(): + inv = Inventory( + plasmids=[ + IndexedPlasmid(identity="pAB", metadata={"insert_identities": ["A"], "fusion_sites": ["A", "B"], "antibiotic": "Ampicillin"}), + IndexedPlasmid(identity="pBC", metadata={"insert_identities": ["B"], "fusion_sites": ["B", "C"], "antibiotic": "Ampicillin"}), + IndexedPlasmid(identity="pCD", metadata={"insert_identities": ["C"], "fusion_sites": ["C", "D"], "antibiotic": "Ampicillin"}), + IndexedPlasmid(identity="pDE", metadata={"insert_identities": ["D"], "fusion_sites": ["D", "E"], "antibiotic": "Ampicillin"}), + ], + backbones=[ + IndexedBackbone( + identity="bbAEkan", + metadata={"fusion_sites": ["A", "E"], "antibiotic": "Kanamycin"}, + ) + ], + ) + sel = CompatibilitySelector(inv) + route = sel.select_lvl1_route(request_id="r1", part_identities=["A", "B", "C", "D"]).selected + assert route.backbone is not None + assert route.backbone.identity == "bbAEkan" From 7d00a4b235277ede727f0d5b62eaab712d0dab1f Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 16:40:16 -0600 Subject: [PATCH 6/7] Avoid duplicate SBOL adds during assembly product creation --- src/buildcompiler/sbol2build.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 49151f1..fe72a44 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -132,7 +132,7 @@ def run( self.ligase, ) - self.final_document.add(self.assembly_activity) + add_object_to_doc(self.assembly_activity, self.final_document) composite_plasmid_objs = [ Plasmid( @@ -1169,12 +1169,9 @@ def ligation( composite_implementation.built = composite_component_definition.identity composite_implementation.wasGeneratedBy = assembly_activity.identity - source_document.add_list( - [composite_component_definition, composite_seq, composite_implementation] - ) - final_document.add_list( - [composite_component_definition, composite_seq, composite_implementation] - ) + for obj in (composite_component_definition, composite_seq, composite_implementation): + add_object_to_doc(obj, source_document) + add_object_to_doc(obj, final_document) product_impl_list.append(composite_implementation) composite_number += 1 From 422d0d51da6eacc70789aaaac3a44ff3a2c10c01 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Thu, 7 May 2026 21:30:50 -0600 Subject: [PATCH 7/7] Add robust assembly links and optional validation workflow --- ...ler_synbiohub_authenticated_tutorial.ipynb | 34 ++++++++++ src/buildcompiler/sbol/assembly.py | 66 +++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb index 646ea6e..5d2493a 100644 --- a/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb +++ b/notebooks/buildcompiler_synbiohub_authenticated_tutorial.ipynb @@ -158,6 +158,40 @@ "print('Warnings:', [w.message if hasattr(w, 'message') else str(w) for w in debug_result.warnings])\n", "debug_result.report\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Validate full build document\n", + "build_doc.validate()\n", + "print('Full build document validation completed.')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Build a focused assembly-only document and validate\n", + "assembly_doc = sbol2.Document()\n", + "for activity in build_doc.activities:\n", + " if 'assembly' in activity.displayId.lower():\n", + " activity.copy(assembly_doc)\n", + "\n", + "for impl in build_doc.implementations:\n", + " if getattr(impl, 'wasGeneratedBy', None):\n", + " impl.copy(assembly_doc)\n", + " built_obj = build_doc.find(impl.built)\n", + " if built_obj is not None:\n", + " built_obj.copy(assembly_doc)\n", + "\n", + "assembly_doc.validate()\n", + "print('Assembly-only document validation completed.')\n" + ] } ], "metadata": { diff --git a/src/buildcompiler/sbol/assembly.py b/src/buildcompiler/sbol/assembly.py index d24c915..df7d33d 100644 --- a/src/buildcompiler/sbol/assembly.py +++ b/src/buildcompiler/sbol/assembly.py @@ -101,11 +101,20 @@ def run(self, job: AssemblyJob) -> AssemblySbolResult: self._indexed_product_from_legacy_product(plasmid, job) for plasmid in legacy_products ] + self._ensure_minimal_assembly_links( + job=job, + final_doc=final_doc, + activity_identity=legacy_assembly.assembly_activity.identity, + product_identity=legacy_products[0].plasmid_implementations[0].identity, + ) + validation_warning = self._maybe_validate_document(final_doc) logs = [ f"Assembled {len(products)} product(s) at stage {job.stage.value}.", f"Assembly activity: {legacy_assembly.assembly_activity.identity}", "Golden Gate simulation completed with 1 ligation product.", ] + if validation_warning: + logs.append(validation_warning) return AssemblySbolResult( products=products, @@ -114,6 +123,63 @@ def run(self, job: AssemblyJob) -> AssemblySbolResult: logs=logs, ) + def _ensure_minimal_assembly_links( + self, + *, + job: AssemblyJob, + final_doc: sbol2.Document, + activity_identity: str, + product_identity: str, + ) -> None: + activity = final_doc.find(activity_identity) + if not isinstance(activity, sbol2.Activity): + activity = sbol2.Activity(activity_identity) + final_doc.add(activity) + + usage_entities = {usage.entity for usage in activity.usages} + + def _add_usage(entity_identity: str, usage_id: str) -> None: + if entity_identity in usage_entities: + return + usage = sbol2.Usage(uri=usage_id, entity=entity_identity, role=sbol2.SBO_REACTANT) + activity.usages.add(usage) + usage_entities.add(entity_identity) + + for idx, plasmid in enumerate(job.part_plasmids): + impl = self._implementation_from_plasmid_record(plasmid, job.source_document) + _add_usage(impl.identity, f"part_{idx}") + + backbone_impl = self._implementation_from_plasmid_record( + IndexedPlasmid( + identity=job.backbone.identity, + display_id=job.backbone.display_id, + name=job.backbone.name, + metadata=job.backbone.metadata, + sbol_component=job.backbone.sbol_component, + ), + job.source_document, + ) + _add_usage(backbone_impl.identity, "backbone") + + restriction_impl = self._implementation_from_record(job.restriction_enzyme, job.source_document) + ligase_impl = self._implementation_from_record(job.ligase, job.source_document) + _add_usage(restriction_impl.identity, "restriction_enzyme") + _add_usage(ligase_impl.identity, "ligase") + + product_impl = final_doc.find(product_identity) + if isinstance(product_impl, sbol2.Implementation): + product_impl.wasGeneratedBy = activity.identity + + def _maybe_validate_document(self, document: sbol2.Document) -> str | None: + try: + document.validate() + return None + except Exception: + return ( + "Document validation skipped due to unavailable SBOL validator service; " + "run build_doc.validate() in a network-enabled environment." + ) + def _record_to_legacy_plasmid( self, record: IndexedPlasmid,