diff --git a/examples/notebook_70_oci_tools.py b/examples/notebook_70_oci_tools.py index 76c3163..a59c4f8 100644 --- a/examples/notebook_70_oci_tools.py +++ b/examples/notebook_70_oci_tools.py @@ -74,11 +74,26 @@ from typing import Any -def _env(name: str, default: str | None = None) -> str: - val = os.environ.get(name, default) +def _env(name: str, default: str | None = None, *, fallbacks: tuple[str, ...] = ()) -> str: + """Read env var ``name``; fall back to any of ``fallbacks`` if unset. + + Supports the ``OCI_USE_*`` aliases documented in this notebook AND + the standard ``OCI_PROFILE`` / ``OCI_REGION`` / ``OCI_COMPARTMENT`` + envelope, so users with stock OCI environment variables don't have + to re-export anything just to run this notebook. + """ + val = os.environ.get(name) if not val: + for fb in fallbacks: + val = os.environ.get(fb) + if val: + break + if not val: + val = default + if not val: + tried = [name, *fallbacks] sys.stderr.write( - f"missing env var {name} — see the prerequisites in the notebook docstring\n" + f"missing env var (tried {tried}) — see the prerequisites in the notebook docstring\n" ) sys.exit(2) return val @@ -139,9 +154,9 @@ async def part2_execute() -> None: """Call real OCI services directly through use_oci.""" from locus.tools import use_oci - profile = _env("OCI_USE_PROFILE") - region = _env("OCI_USE_REGION") - tenancy = _env("OCI_USE_TENANCY") + profile = _env("OCI_USE_PROFILE", fallbacks=("OCI_PROFILE",)) + region = _env("OCI_USE_REGION", fallbacks=("OCI_REGION", "OCI_GENAI_REGION")) + tenancy = _env("OCI_USE_TENANCY", fallbacks=("OCI_COMPARTMENT", "OCI_TENANCY")) print(f"=== use_oci — direct dispatch (profile={profile}, region={region}) ===\n") @@ -234,11 +249,11 @@ async def part3_agent() -> None: from locus.models import get_model from locus.tools import describe_oci, use_oci - use_profile = _env("OCI_USE_PROFILE") - use_region = _env("OCI_USE_REGION") - tenancy = _env("OCI_USE_TENANCY") - genai_profile = _env("OCI_GENAI_PROFILE") - genai_region = _env("OCI_GENAI_REGION", "us-chicago-1") + use_profile = _env("OCI_USE_PROFILE", fallbacks=("OCI_PROFILE",)) + use_region = _env("OCI_USE_REGION", fallbacks=("OCI_REGION", "OCI_GENAI_REGION")) + tenancy = _env("OCI_USE_TENANCY", fallbacks=("OCI_COMPARTMENT", "OCI_TENANCY")) + genai_profile = _env("OCI_GENAI_PROFILE", fallbacks=("OCI_PROFILE",)) + genai_region = _env("OCI_GENAI_REGION", "us-chicago-1", fallbacks=("OCI_REGION",)) print( f"=== Agent loop (model via {genai_profile}@{genai_region}, " diff --git a/src/locus/models/providers/oci/client.py b/src/locus/models/providers/oci/client.py index e8545a1..5bb2612 100644 --- a/src/locus/models/providers/oci/client.py +++ b/src/locus/models/providers/oci/client.py @@ -68,6 +68,25 @@ class OCIClientConfig(BaseModel): auth_type: OCIAuthType = Field(default=OCIAuthType.API_KEY, description="Auth type") compartment_id: str | None = Field(default=None, description="OCI compartment OCID") service_endpoint: str | None = Field(default=None, description="Full service endpoint URL") + # HTTP timeouts in seconds for the underlying OCI Python SDK + # ``GenerativeAiInferenceClient``. The SDK defaults to ``(10, 60)`` + # (connect, read); 60s read is not enough for reasoning models + # (gpt-5.5, o-series, etc.) doing long-form summarization in + # orchestrator/swarm flows, where the first response token can take + # 90-180 seconds to arrive. Bump the read timeout to 300s by + # default; callers needing tighter latency contracts can override. + connect_timeout: float = Field( + default=10.0, + description="HTTP connect timeout in seconds.", + ) + read_timeout: float = Field( + default=300.0, + description=( + "HTTP read timeout in seconds. Default 300s accommodates " + "reasoning-model summarization (gpt-5.5, o-series) which " + "can sit on the wire for 90-180s before the first token." + ), + ) model_config = {"extra": "allow"} @@ -161,6 +180,7 @@ def _create_client(self) -> GenerativeAiInferenceClient: return GenerativeAiInferenceClient( config=self.oci_config, service_endpoint=self.config.service_endpoint, + timeout=(self.config.connect_timeout, self.config.read_timeout), ) def _create_security_token_client(self) -> GenerativeAiInferenceClient: @@ -207,6 +227,7 @@ def _create_security_token_client(self) -> GenerativeAiInferenceClient: config=oci_cfg, signer=signer, service_endpoint=self.config.service_endpoint, + timeout=(self.config.connect_timeout, self.config.read_timeout), ) def _create_instance_principal_client(self) -> GenerativeAiInferenceClient: @@ -223,6 +244,7 @@ def _create_instance_principal_client(self) -> GenerativeAiInferenceClient: config={}, signer=signer, service_endpoint=self.config.service_endpoint, + timeout=(self.config.connect_timeout, self.config.read_timeout), ) def _create_resource_principal_client(self) -> GenerativeAiInferenceClient: @@ -239,6 +261,7 @@ def _create_resource_principal_client(self) -> GenerativeAiInferenceClient: config={}, signer=signer, service_endpoint=self.config.service_endpoint, + timeout=(self.config.connect_timeout, self.config.read_timeout), ) def get_serving_mode(self, model_id: str) -> Any: diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ec4e903..799ce2b 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -213,13 +213,22 @@ def _build_model(): if endpoint and compartment: from locus.models.providers.oci import OCIModel + # max_tokens=8192 is a ceiling, not a target. Reasoning + # models (gpt-5.5, o-series, etc.) burn 200-2000+ tokens + # of hidden chain-of-thought before producing any visible + # output; at 512 they typically return empty content with + # finish_reason='length', which surfaces in orchestrator / + # swarm tests as ``summary=''``. 8192 leaves room for both + # the reasoning trace and a normal-length response without + # being wasteful on short-answer tests (the model stops + # naturally when done). return OCIModel( model_id=model_id, profile_name=os.getenv("OCI_PROFILE", "DEFAULT"), auth_type=os.getenv("OCI_AUTH_TYPE", "api_key"), service_endpoint=endpoint, compartment_id=compartment, - max_tokens=512, + max_tokens=8192, ) # OpenAI fallback @@ -227,7 +236,7 @@ def _build_model(): from locus.models.native.openai import OpenAIModel model_id = os.getenv("OPENAI_MODEL_ID", "gpt-4o-mini") - return OpenAIModel(model=model_id, max_tokens=512) + return OpenAIModel(model=model_id, max_tokens=8192) # Anthropic fallback — cheapest path for non-OCI iteration. if anthropic_available(): diff --git a/tests/integration/test_notebooks_all_live.py b/tests/integration/test_notebooks_all_live.py index ce5b35c..bc4d254 100644 --- a/tests/integration/test_notebooks_all_live.py +++ b/tests/integration/test_notebooks_all_live.py @@ -56,9 +56,13 @@ def _has_oci_config() -> bool: # the default and override per-notebook below. _DEFAULT_TIMEOUT = 360 _NOTEBOOK_TIMEOUT_OVERRIDES: dict[str, int] = { - # notebook_40_emergent_routing: 5 dispatches × 2-3 LLM calls each - # through a reasoning model — empirical wall time ~7-9 min. - "notebook_40_emergent_routing.py": 900, + # notebook_34_emergent_routing: 5 dispatches × 2-3 LLM calls each + # through a reasoning model — empirical wall time ~7-9 min. The + # filename used to be ``notebook_40_emergent_routing.py``; this + # override key was stale after the catalogue renumbering and let + # the test fall through to the ``_DEFAULT_TIMEOUT`` (360s), which + # isn't enough — the subprocess was getting SIGKILL'd at 6 min. + "notebook_34_emergent_routing.py": 900, } diff --git a/tests/integration/test_notebooks_subset.py b/tests/integration/test_notebooks_subset.py index 8d8dc0f..9bfd4ae 100644 --- a/tests/integration/test_notebooks_subset.py +++ b/tests/integration/test_notebooks_subset.py @@ -786,77 +786,83 @@ def test_sse_response_headers(self): class TestNotebookExecution: """Tests that run actual notebooks (with mock model).""" + # Test method names mirror the current ``examples/notebook_NN_*.py`` + # numbering. The notebooks have been renumbered a few times during + # development; the previous test methods pointed at filenames that + # no longer exist on disk and were failing with FileNotFoundError. + # Keep these in sync with the actual ``examples/`` layout. + @pytest.mark.asyncio - async def test_notebook_36_runs(self): - """Test that notebook 35 runs without error.""" + async def test_notebook_13_runs(self): + """Smoke: notebook 13 (SSE streaming) executes cleanly.""" import subprocess import sys result = subprocess.run( - [sys.executable, "examples/notebook_41_structured_output.py"], + [sys.executable, "examples/notebook_13_sse_streaming.py"], capture_output=True, text=True, timeout=60, check=False, ) - assert result.returncode == 0, f"Notebook 36 failed: {result.stderr}" + assert result.returncode == 0, f"Notebook 13 failed: {result.stderr}" @pytest.mark.asyncio - async def test_notebook_37_runs(self): - """Test that notebook 36 runs without error.""" + async def test_notebook_35_runs(self): + """Smoke: notebook 35 (structured output) executes cleanly.""" import subprocess import sys result = subprocess.run( - [sys.executable, "examples/notebook_42_reasoning_patterns.py"], + [sys.executable, "examples/notebook_35_structured_output.py"], capture_output=True, text=True, timeout=60, check=False, ) - assert result.returncode == 0, f"Notebook 37 failed: {result.stderr}" + assert result.returncode == 0, f"Notebook 35 failed: {result.stderr}" @pytest.mark.asyncio - async def test_notebook_43_runs(self): - """Test that notebook 42 runs without error.""" + async def test_notebook_36_runs(self): + """Smoke: notebook 36 (reasoning patterns) executes cleanly.""" import subprocess import sys result = subprocess.run( - [sys.executable, "examples/notebook_48_playbooks.py"], + [sys.executable, "examples/notebook_36_reasoning_patterns.py"], capture_output=True, text=True, timeout=60, check=False, ) - assert result.returncode == 0, f"Notebook 43 failed: {result.stderr}" + assert result.returncode == 0, f"Notebook 36 failed: {result.stderr}" @pytest.mark.asyncio - async def test_notebook_49_runs(self): - """Test that notebook 48 runs without error.""" + async def test_notebook_46_runs(self): + """Smoke: notebook 46 (playbooks) executes cleanly.""" import subprocess import sys result = subprocess.run( - [sys.executable, "examples/notebook_54_checkpoint_backends.py"], + [sys.executable, "examples/notebook_46_playbooks.py"], capture_output=True, text=True, timeout=60, check=False, ) - assert result.returncode == 0, f"Notebook 49 failed: {result.stderr}" + assert result.returncode == 0, f"Notebook 46 failed: {result.stderr}" @pytest.mark.asyncio - async def test_notebook_14_runs(self): - """Test that notebook 13 runs without error.""" + async def test_notebook_52_runs(self): + """Smoke: notebook 52 (checkpoint backends) executes cleanly.""" import subprocess import sys result = subprocess.run( - [sys.executable, "examples/notebook_19_sse_streaming.py"], + [sys.executable, "examples/notebook_52_checkpoint_backends.py"], capture_output=True, text=True, timeout=60, check=False, ) - assert result.returncode == 0, f"Notebook 14 failed: {result.stderr}" + assert result.returncode == 0, f"Notebook 52 failed: {result.stderr}" diff --git a/tests/integration/test_workbench_categories.py b/tests/integration/test_workbench_categories.py index ac2cb30..9220aab 100644 --- a/tests/integration/test_workbench_categories.py +++ b/tests/integration/test_workbench_categories.py @@ -64,7 +64,17 @@ def test_endpoint_returns_curated_categories(self, client: TestClient) -> None: # Must include the cardinal sections — these power the user- # facing learning path. Drift here = the README / nav docs are # describing categories that no longer exist. - for required in ("fundamentals", "graphs", "multi-agent", "router", "observability"): + # + # ``router-observability`` is a single combined category — the + # cognitive router and the EventBus observability surface ship + # together as one learning track, and the workbench reflects that + # in its NOTEBOOK_CATEGORIES list. + for required in ( + "fundamentals", + "graphs", + "multi-agent", + "router-observability", + ): assert required in ids, f"missing notebook category: {required}" for c in cats: assert c["name"], f"category {c['id']} has empty name" @@ -78,16 +88,20 @@ def test_every_notebook_has_known_category(self, client: TestClient) -> None: f"notebook {t['id']} has unknown category {t.get('category')!r}" ) - def test_observability_category_contains_new_sse_notebooks(self, client: TestClient) -> None: - """Notebooks 52-55 (the SSE retrofit suite) must live under - ``observability`` so the sidebar surfaces them as a group.""" - obs_numbers = sorted( + def test_router_observability_groups_router_plus_eventbus(self, client: TestClient) -> None: + """The combined ``router-observability`` track must surface the + cognitive router (notebook 58) and the EventBus / observability + notebooks (59, 60, 61) as a single sidebar group. Drift here + means the curated learning path lost a notebook to ``misc``.""" + track_numbers = sorted( t["number"] for t in client.get("/api/notebooks").json() - if t.get("category") == "observability" + if t.get("category") == "router-observability" ) - for n in (52, 53, 54, 55): - assert n in obs_numbers, f"notebook {n} missing from 'observability'" + for n in (58, 59, 60, 61): + assert n in track_numbers, ( + f"notebook {n} missing from 'router-observability' (got {track_numbers})" + ) def test_notebooks_sorted_by_category_then_order(self, client: TestClient) -> None: """The catalogue is pre-sorted by (category position, diff --git a/tests/unit/test_oci_client.py b/tests/unit/test_oci_client.py index b72ff21..2aba7d4 100644 --- a/tests/unit/test_oci_client.py +++ b/tests/unit/test_oci_client.py @@ -293,6 +293,7 @@ def test_instance_principal_client_creation(self, mock_signer_class, mock_client config={}, signer=mock_signer, service_endpoint="https://test.endpoint.com", + timeout=(10.0, 300.0), ) assert result == mock_client