diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml index 075ee5cf3..ab20929a8 100644 --- a/.github/workflows/harness-integration.yml +++ b/.github/workflows/harness-integration.yml @@ -7,8 +7,7 @@ on: paths: - "src/agentex/lib/core/harness/**" - "src/agentex/lib/adk/_modules/**" - - "tests/lib/core/harness/test_harness_pydantic_ai_*.py" - - "tests/lib/core/harness/test_harness_langgraph_*.py" + - "tests/lib/core/harness/test_harness_*.py" - ".github/workflows/harness-integration.yml" jobs: @@ -34,14 +33,15 @@ jobs: run: ./scripts/test tests/lib/core/harness/ -v # Offline harness integration tests (sync / async / temporal channels) for each - # migrated harness. These use fake streams / TestModel + fake streaming/tracing - # and require no live infrastructure. Future harness migration PRs (6-8) add - # their harness to the matrix below and their test paths to the triggers above. + # harness. These use fake streams / TestModel + fake streaming/tracing and + # require no live infrastructure. All five harnesses are now covered; the + # trigger above uses a `test_harness_*.py` glob so new suites are picked up + # automatically. live-matrix: runs-on: ubuntu-latest strategy: matrix: - harness: [pydantic_ai, langgraph] + harness: [pydantic_ai, langgraph, openai, claude_code, codex] channel: [sync, async, temporal] fail-fast: false name: ${{ matrix.harness }}-${{ matrix.channel }} diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 9a40fa434..17d037516 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,4 +1,4 @@ { - ".": "0.15.0", - "adk": "0.14.0" + ".": "0.16.0", + "adk": "0.15.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index a9b0590c8..9dbedc3c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,46 @@ ## Unreleased +### ⚠ BREAKING CHANGES + +* **harness:** removed the deprecated bespoke LangGraph tracing handler `create_langgraph_tracing_handler` (and its `AgentexLangGraphTracingHandler` class) from the public `agentex.lib.adk` surface. Span tracing is now derived from the canonical `StreamTaskMessage*` stream by `UnifiedEmitter` — wrap your run in the harness `*Turn` and drive `UnifiedEmitter.yield_turn` / `auto_send_turn`. The `agentex init` templates were migrated accordingly. +* **harness:** removed the deprecated bespoke Pydantic-AI tracing handler `create_pydantic_ai_tracing_handler` (and its `AgentexPydanticAITracingHandler` class) from the public `agentex.lib.adk` surface. Span tracing is now derived from the canonical `StreamTaskMessage*` stream by `UnifiedEmitter` — wrap your run in `PydanticAITurn` and drive `UnifiedEmitter.yield_turn` / `auto_send_turn`. The `agentex init` templates were migrated accordingly. +* **harness:** each harness now exposes exactly `__sync.py` + `__turn.py` under `agentex.lib.adk._modules`. The OpenAI harness `OpenAITurn` and `convert_openai_to_agentex_events` moved to `agentex.lib.adk._modules._openai_turn` / `_openai_sync`; back-compat shims remain at `agentex.lib.adk.providers._modules.{openai_turn,sync_provider}` for one release. Public facade names (`stream_pydantic_ai_events`, `stream_langgraph_events`, `emit_langgraph_messages`, etc.) are unchanged. + ### Features * **tracing:** emit OTel metrics for async span queue depth, batch drain, and SGP export success/failure (HTTP status labels). Disable SDK-side recording with ``AGENTEX_TRACING_METRICS=0``. +## 0.16.0 (2026-06-24) + +Full Changelog: [agentex-client-v0.15.0...agentex-client-v0.16.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.15.0...agentex-client-v0.16.0) + +### ⚠ BREAKING CHANGES + +* **harness:** consolidate the Pydantic-AI harness + remove tracing handler ([#431](https://github.com/scaleapi/scale-agentex-python/issues/431)) +* **harness:** consolidate the LangGraph harness + remove tracing handler ([#430](https://github.com/scaleapi/scale-agentex-python/issues/430)) + +### Features + +* **cli:** add claude-code init templates (sync / async / temporal) ([#435](https://github.com/scaleapi/scale-agentex-python/issues/435)) ([fd9bc4a](https://github.com/scaleapi/scale-agentex-python/commit/fd9bc4a81417b9d75ad692b779293720f8435d37)) +* **cli:** add codex init templates (sync / async / temporal) ([#436](https://github.com/scaleapi/scale-agentex-python/issues/436)) ([0fadfd7](https://github.com/scaleapi/scale-agentex-python/commit/0fadfd7a113536d49a99894a3b80ed0915a0e0fb)) +* **cli:** add default-openai-agents init template (async base) ([#434](https://github.com/scaleapi/scale-agentex-python/issues/434)) ([624e9c8](https://github.com/scaleapi/scale-agentex-python/commit/624e9c8f3b4c4288a7037bc83651970cfb02e6b0)) +* **openai-agents:** single-emit + input-bearing tool spans + run_turn ([#445](https://github.com/scaleapi/scale-agentex-python/issues/445)) ([53ab8ef](https://github.com/scaleapi/scale-agentex-python/commit/53ab8efaaf65590e71abe07149582ea59814921b)) +* **openai-temporal:** render hosted/server-side tool calls in TemporalStreamingModel ([#442](https://github.com/scaleapi/scale-agentex-python/issues/442)) ([5dce9f0](https://github.com/scaleapi/scale-agentex-python/commit/5dce9f097723d3436a0e40277139e7cce68580ef)) + + +### Bug Fixes + +* **cli:** harden init templates per Greptile feedback (suite-wide) ([#444](https://github.com/scaleapi/scale-agentex-python/issues/444)) ([2d85eb0](https://github.com/scaleapi/scale-agentex-python/commit/2d85eb0952f2298e6c412ab44b9c59255431cb84)) +* **harness:** harden Claude Code + OpenAI taps and span tracing ([#446](https://github.com/scaleapi/scale-agentex-python/issues/446)) ([5b4359d](https://github.com/scaleapi/scale-agentex-python/commit/5b4359dcf28f390f780215ed954fa52e8cb4dd7c)) + + +### Refactors + +* **harness:** consolidate the LangGraph harness + remove tracing handler ([#430](https://github.com/scaleapi/scale-agentex-python/issues/430)) ([a3fb5ad](https://github.com/scaleapi/scale-agentex-python/commit/a3fb5ad51f6392a48cbb8324f15c9619f10244b6)) +* **harness:** consolidate the Pydantic-AI harness + remove tracing handler ([#431](https://github.com/scaleapi/scale-agentex-python/issues/431)) ([48c3da8](https://github.com/scaleapi/scale-agentex-python/commit/48c3da8777ae20a9ca6d544238dccd64d6c62c2b)) +* **harness:** move OpenAI harness into adk/_modules + facade export ([#432](https://github.com/scaleapi/scale-agentex-python/issues/432)) ([58bdb16](https://github.com/scaleapi/scale-agentex-python/commit/58bdb16b4b18db22188a29d5d1b31759f9d0dd4e)) + ## 0.15.0 (2026-06-23) Full Changelog: [agentex-client-v0.14.0...agentex-client-v0.15.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.14.0...agentex-client-v0.15.0) diff --git a/adk/CHANGELOG.md b/adk/CHANGELOG.md index ac7404e6b..e308ad36e 100644 --- a/adk/CHANGELOG.md +++ b/adk/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## 0.15.0 (2026-06-24) + +Full Changelog: [agentex-sdk-v0.14.0...agentex-sdk-v0.15.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.14.0...agentex-sdk-v0.15.0) + +### ⚠ BREAKING CHANGES + +* **harness:** consolidate the LangGraph harness + remove tracing handler ([#430](https://github.com/scaleapi/scale-agentex-python/issues/430)) + +### Bug Fixes + +* **harness:** harden Claude Code + OpenAI taps and span tracing ([#446](https://github.com/scaleapi/scale-agentex-python/issues/446)) ([5b4359d](https://github.com/scaleapi/scale-agentex-python/commit/5b4359dcf28f390f780215ed954fa52e8cb4dd7c)) + + +### Refactors + +* **harness:** consolidate the LangGraph harness + remove tracing handler ([#430](https://github.com/scaleapi/scale-agentex-python/issues/430)) ([a3fb5ad](https://github.com/scaleapi/scale-agentex-python/commit/a3fb5ad51f6392a48cbb8324f15c9619f10244b6)) + ## 0.14.0 (2026-06-23) Full Changelog: [agentex-sdk-v0.13.2...agentex-sdk-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.2...agentex-sdk-v0.14.0) diff --git a/adk/docs/harness.md b/adk/docs/harness.md index 6a9d8947a..62094d469 100644 --- a/adk/docs/harness.md +++ b/adk/docs/harness.md @@ -39,14 +39,17 @@ Every harness tap produces a sequence of these. Everything downstream (delivery, ## Per-harness taps: `convert__to_agentex_events` -A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The currently shipped taps are: +A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The shipped taps are: | Harness | Tap function | Exported from | |---|---|---| | pydantic-ai | `convert_pydantic_ai_to_agentex_events` | `agentex.lib.adk` | | LangGraph | `convert_langgraph_to_agentex_events` | `agentex.lib.adk` | +| claude-code | `convert_claude_code_to_agentex_events` | `agentex.lib.adk` | +| codex | `convert_codex_to_agentex_events` | `agentex.lib.adk` | +| OpenAI Agents | `convert_openai_to_agentex_events` | `agentex.lib.adk.providers._modules.sync_provider` | -Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421) and exported from `agentex.lib.adk` in the same way. +Each harness also provides a `HarnessTurn` wrapper that pairs its tap's event stream with usage extraction: `PydanticAITurn`, `LangGraphTurn`, `ClaudeCodeTurn`, `CodexTurn`, and `OpenAITurn`. --- @@ -157,11 +160,13 @@ Spans are derived from the canonical stream by `SpanDeriver` (pure, no `adk` dep ## Usage examples by channel -### Sync ACP (pydantic-ai tap) +### Sync ACP (`yield_turn`) + +Build the harness's `HarnessTurn` wrapper and iterate `emitter.yield_turn(turn)` — the emitter forwards each event to the caller and traces spans as a side effect: ```python import agentex.lib.adk as adk -from agentex.lib.adk import UnifiedEmitter, convert_pydantic_ai_to_agentex_events +from agentex.lib.adk import UnifiedEmitter, ClaudeCodeTurn @acp.on_message_send async def handle(params): @@ -172,13 +177,12 @@ async def handle(params): trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, ) - tap = convert_pydantic_ai_to_agentex_events(pydantic_stream) - # wrap tap in a HarnessTurn then yield_turn, or yield directly: - async for event in tap: + turn = ClaudeCodeTurn(claude_code_stream) # any HarnessTurn + async for event in emitter.yield_turn(turn): yield event ``` -For the pre-unified sync path the tap is still yielded directly; `UnifiedEmitter.yield_turn` is the forward-looking integration point when a `HarnessTurn` wrapper is available. +Every harness follows the same shape — swap `ClaudeCodeTurn` for `PydanticAITurn`, `LangGraphTurn`, `CodexTurn`, or `OpenAITurn` and feed it that harness's native stream. ### Async Temporal (auto-send) @@ -194,3 +198,9 @@ result = await emitter.auto_send_turn(turn, created_at=workflow.now()) # result.final_text — last text segment # result.usage — TurnUsage (tokens, cost, ...) ``` + +--- + +## Migration + +- [Migrating to `agentex-client` 0.16.0 / `agentex-sdk` 0.15.0](./migration-0.16.0.md) — removed LangGraph/Pydantic-AI tracing handlers (tracing is now derived from the canonical stream), private `_modules` path moves, the OpenAI harness facade relocation, and the new `run_turn` Temporal entry point. diff --git a/adk/docs/migration-0.16.0.md b/adk/docs/migration-0.16.0.md new file mode 100644 index 000000000..b76da55ba --- /dev/null +++ b/adk/docs/migration-0.16.0.md @@ -0,0 +1,272 @@ +# Migration Guide — `agentex-client` 0.16.0 / `agentex-sdk` 0.15.0 + +This release consolidates the LangGraph, Pydantic-AI, and OpenAI Agents harnesses +onto the **unified harness surface** (`UnifiedEmitter` + `SpanDeriver`), introduces +`run_turn` as the single Temporal entry point for OpenAI Agents, renders +hosted/server-side tool calls in the Temporal streaming model, and ships new CLI +init templates. + +Most consumers only need to act on **section 1** (removed tracing handlers). +Sections 2–3 only matter if you import private modules. Section 4 lists the new, +opt-in capabilities. Section 5 documents the defect fixes shipped on top of the +release. + +--- + +## 1. Tracing handlers removed (LangGraph + Pydantic-AI) — **action required** + +The bespoke tracing callback handlers are **gone** from the public +`agentex.lib.adk` surface: + +| Removed | | +|---|---| +| `agentex.lib.adk.create_langgraph_tracing_handler` | + class `AgentexLangGraphTracingHandler` | +| `agentex.lib.adk.create_pydantic_ai_tracing_handler` | + class `AgentexPydanticAITracingHandler` | + +Span tracing is now **derived automatically** from the canonical +`StreamTaskMessage*` stream by `UnifiedEmitter`. You no longer construct or pass a +callback handler — you wrap the run in the harness `*Turn` and drive delivery +through the emitter, and spans fall out of the stream. + +### LangGraph + +**Before** + +```python +from agentex.lib import adk + +handler = adk.create_langgraph_tracing_handler( + trace_id=trace_id, + parent_span_id=parent_span_id, +) +result = await graph.ainvoke(state, config={"callbacks": [handler]}) +``` + +**After** + +```python +from agentex.lib.adk import stream_langgraph_events # facade name unchanged + +# Streaming delivery + tracing are handled for you; no callbacks wiring. +async for event in stream_langgraph_events(graph, state, ...): + ... +``` + +or, when you own the emitter directly: + +```python +from agentex.lib.adk import LangGraphTurn +from agentex.lib.core.harness import UnifiedEmitter + +emitter = UnifiedEmitter(...) +await emitter.auto_send_turn(LangGraphTurn(...)) # or: emitter.yield_turn(...) +``` + +### Pydantic-AI + +**Before** + +```python +handler = adk.create_pydantic_ai_tracing_handler(trace_id=..., parent_span_id=...) +``` + +**After** + +```python +from agentex.lib.adk import PydanticAITurn, stream_pydantic_ai_events +from agentex.lib.core.harness import UnifiedEmitter + +# Wrap in PydanticAITurn and drive UnifiedEmitter.yield_turn / auto_send_turn. +await UnifiedEmitter(...).auto_send_turn(PydanticAITurn(...)) +``` + +The `agentex init` templates were migrated to this pattern. If you scaffolded +from an older template, regenerate (or diff against a fresh template) for the +canonical shape. + +--- + +## 2. Private `_modules` import paths changed — **only if you import privates** + +Each harness now exposes exactly `__sync.py` + `__turn.py` under +`agentex.lib.adk._modules`. Several private modules were deleted and their +functions relocated. If you imported the **public facade names** from +`agentex.lib.adk`, **nothing changes**. Repoint only if you reached into the +private modules directly: + +| Old (deleted) private import | New location | Public facade (unchanged) | +|---|---|---| +| `_modules._langgraph_async.stream_langgraph_events` | `_modules._langgraph_turn` | `adk.stream_langgraph_events` | +| `_modules._langgraph_messages.emit_langgraph_messages` | `_modules._langgraph_sync` | `adk.emit_langgraph_messages` | +| `_modules._langgraph_tracing.*` | **removed** (see §1) | — | +| `_modules._pydantic_ai_async.stream_pydantic_ai_events` | `_modules._pydantic_ai_turn` | `adk.stream_pydantic_ai_events` | +| `_modules._pydantic_ai_tracing.*` | **removed** (see §1) | — | + +✅ These facade names are unchanged and keep working: +`stream_langgraph_events`, `emit_langgraph_messages`, +`convert_langgraph_to_agentex_events`, `LangGraphTurn`, +`stream_pydantic_ai_events`, `convert_pydantic_ai_to_agentex_events`, +`PydanticAITurn`. + +--- + +## 3. OpenAI harness moved into `adk/_modules` + facade export + +The OpenAI Agents harness now lives alongside the others: + +- `OpenAITurn`, `openai_usage_to_turn_usage` → `agentex.lib.adk._modules._openai_turn` +- `convert_openai_to_agentex_events` → `agentex.lib.adk._modules._openai_sync` + +New **public** facade exports (prefer these): + +```python +from agentex.lib.adk import ( + OpenAITurn, + convert_openai_to_agentex_events, + openai_usage_to_turn_usage, +) +``` + +Back-compat shims remain at +`agentex.lib.adk.providers._modules.{openai_turn,sync_provider}` **for one +release** — migrate to the facade names before the next minor. + +--- + +## 4. New capabilities (opt-in, no migration required) + +- **`run_turn` — unified Temporal entry point for OpenAI Agents.** + + ```python + from agentex.lib.core.temporal.plugins.openai_agents import run_turn, OpenAIAgentsTurnResult + + result = await run_turn( + agent, input, + task_id=task_id, + trace_id=trace_id, + parent_span_id=parent_span_id, + ) + result.final_output # raw SDK final_output + result.usage # normalized TurnUsage for the turn span + ``` + + It emits each tool call exactly once (the streaming model is the sole + tool-**request** emitter; hooks emit tool **responses**), traces per-tool spans, + normalizes token usage, and drains orphaned tool spans in a `finally` block if + the run terminates mid-tool. Existing `TemporalStreamingHooks` callers keep + working — `run_turn` is additive. If you pass your own `hooks` subclass, also + set `emit_tool_requests=False` and forward `trace_id` / `parent_span_id` + yourself (they are only auto-applied to the default hooks). + +- **Hosted / server-side tool rendering** in `TemporalStreamingModel`: + web_search, file_search, code_interpreter, image_generation, server-side mcp, + computer, and local_shell calls now surface as ToolRequest/ToolResponse pairs. + +- **New CLI init templates:** `default` / `sync` / `temporal` flavors of + `claude-code` and `codex`, plus `default-openai-agents`. + +--- + +## 5. Defect fixes shipped with this migration + +These fixes harden the newly-added sync OpenAI converter +(`convert_openai_to_agentex_events` / `OpenAITurn`) and the Temporal hosted-tool +path. No API change — behavior only. + +1. **Malformed tool arguments no longer abort the turn.** The converter now + parses raw tool-call arguments through a defensive helper + (`_safe_parse_arguments`): a non-decodable string is preserved under `raw` + and a non-dict JSON value under `value`, instead of raising `JSONDecodeError` + and killing the run before later output is delivered. This matches the + Temporal streaming model's existing fallback. + +2. **Reasoning messages are closed.** Completed reasoning content/summary items + now emit a matching `StreamTaskMessageDone`. Previously the `Done` was + skipped, so `UnifiedEmitter.auto_send` never released the context and the + reasoning span could be marked incomplete (reasoning-model output appeared to + hang). + +3. **Text no longer collides with reasoning.** Every new text `item_id` now + reserves a fresh message index (matching the increment-then-use convention of + the reasoning/tool paths). Previously the first text item reused the current + index, so on reasoning-model streams the final answer could overwrite the + reasoning message, duplicate a `Start`, or route deltas into the wrong context. + +4. **Hosted-tool response shape aligned.** Hosted/server-side tool responses in + `TemporalStreamingModel` now emit `content` as a plain string, matching the + function-tool response path (`on_tool_end`) so hosted and function tools + render identically within the same flow. + +5. **Reasoning text now appears in derived spans.** `SpanDeriver` opened reasoning + spans with empty input and closed them with `output=None`, so reasoning/thinking + text never reached the trace (spans showed blank — read as "0 reasoning traces"). + It now accumulates the `ReasoningContentDelta` / `ReasoningSummaryDelta` text (and + any text seeded on the Start content) and records it as the span output. Affects + every harness that streams reasoning, including the Claude Code tap. + +6. **Claude Code: no more duplicate text messages.** The `stream-json` converter + deduped streamed-vs-materialized blocks by numeric block index and reset that + state after every materialized `assistant` envelope. A single streamed message + that materializes as several envelopes (thinking, then text) lost the dedup + marker between envelopes and re-emitted the text. Dedup is now **content-based** + (match the streamed block's text, consume once), which a numeric index cannot do + reliably. + +> Action: if you adopted `OpenAITurn` for **reasoning models** (o1/o3/gpt-5) on +> the sync path before these fixes, upgrade — fixes 2 and 3 are required for +> correct reasoning rendering. Claude Code agents on the unified harness tap should +> upgrade for fixes 5 and 6. + +--- + +## 6. Legacy Temporal `claude_agents` plugin → unified harness tap + +`agentex.lib.core.temporal.plugins.claude_agents` (`run_claude_agent_activity`, +`create_streaming_hooks`, `TemporalStreamingHooks`, `ClaudeMessageHandler`) is the +**original** Claude Code integration: it drives the Python `claude-agent-sdk` +directly and hand-rolls its own streaming + tracing. It is **superseded** by the +unified harness tap and slated for removal in a future release. It still works +today, so this migration is **recommended, not yet required** — but new Claude Code +agents should use the tap, and existing ones should plan to move. + +Why migrate: the tap routes Claude Code through the same canonical +`StreamTaskMessage*` stream as every other harness, so it gets central span +derivation (tool **and** reasoning spans), the single delivery path +(`UnifiedEmitter`), and fixes like the two above for free. The legacy plugin does +not derive reasoning spans at all and duplicates the streaming/tracing logic. + +**Before — legacy plugin activity:** + +```python +from agentex.lib.core.temporal.plugins.claude_agents import run_claude_agent_activity + +# In the workflow: +result = await workflow.execute_activity( + run_claude_agent_activity, + args=[prompt, workspace_path, allowed_tools, ...], + start_to_close_timeout=..., +) +``` + +**After — unified harness tap.** Run the CLI yourself (`claude -p --output-format +stream-json --include-partial-messages`), wrap its stdout in `ClaudeCodeTurn`, and +deliver through `UnifiedEmitter`: + +```python +from agentex.lib.adk import ClaudeCodeTurn, UnifiedEmitter + +# `stdout_lines` is an async iterator of the CLI's stdout lines (raw JSON strings +# or pre-parsed dicts) — e.g. read from sandbox.exec() / a subprocess. +turn = ClaudeCodeTurn(stdout_lines) + +emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id) +result = await emitter.auto_send_turn(turn, created_at=workflow.now()) +# result.final_text — last text segment +# result.usage — TurnUsage (tokens, cost, num_reasoning_blocks, ...) +``` + +The golden agent is the reference implementation +(`teams/sgp/agents/golden_agent/project/harness/`): it spawns the CLI in a sandbox, +yields stdout lines into `ClaudeCodeTurn`, and drives `auto_send_turn`. Known +remaining consumers to migrate: the `090_claude_agents_sdk_mvp` tutorial and the +`eval_dashboard_agent`. diff --git a/adk/pyproject.toml b/adk/pyproject.toml index 1d8c00a40..c1f5b7443 100644 --- a/adk/pyproject.toml +++ b/adk/pyproject.toml @@ -4,7 +4,7 @@ # (agentex/{__init__.py, _*.py, types/, resources/}) ships from the slim # sibling package `agentex-client` which is pinned as a runtime dep. name = "agentex-sdk" -version = "0.14.0" +version = "0.15.0" description = "Agent Development Kit (ADK) overlay for the Agentex API — FastACP server, Temporal workflows, LLM provider integrations, observability" license = "Apache-2.0" authors = [ diff --git a/pyproject.toml b/pyproject.toml index 7ee0cf56b..61c42aa99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ # overlay (formerly `src/agentex/lib/*`) now lives in `adk/` and ships # as the sibling `agentex-sdk` package — see `adk/pyproject.toml`. name = "agentex-client" -version = "0.15.0" +version = "0.16.0" description = "The official Python REST client for the Agentex API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/agentex/_version.py b/src/agentex/_version.py index c567e168b..e30c3695a 100644 --- a/src/agentex/_version.py +++ b/src/agentex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "agentex" -__version__ = "0.15.0" # x-release-please-version +__version__ = "0.16.0" # x-release-please-version diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py index e618a20d3..4d79be9dd 100644 --- a/src/agentex/lib/adk/__init__.py +++ b/src/agentex/lib/adk/__init__.py @@ -6,15 +6,15 @@ from agentex.lib.adk._modules.agents import AgentsModule from agentex.lib.adk._modules.agent_task_tracker import AgentTaskTrackerModule from agentex.lib.adk._modules.checkpointer import create_checkpointer -from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events -from agentex.lib.adk._modules._langgraph_messages import emit_langgraph_messages -from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn, stream_langgraph_events +from agentex.lib.adk._modules._langgraph_sync import ( + emit_langgraph_messages, + convert_langgraph_to_agentex_events, +) +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn, stream_pydantic_ai_events from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events -from agentex.lib.adk._modules._pydantic_ai_tracing import create_pydantic_ai_tracing_handler -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from agentex.lib.adk._modules._openai_sync import convert_openai_to_agentex_events +from agentex.lib.adk._modules._openai_turn import OpenAITurn, openai_usage_to_turn_usage from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events from agentex.lib.adk._modules._claude_code_turn import ( ClaudeCodeTurn, @@ -68,7 +68,6 @@ "agent_task_tracker", # Checkpointing / LangGraph "create_checkpointer", - "create_langgraph_tracing_handler", "stream_langgraph_events", "emit_langgraph_messages", "convert_langgraph_to_agentex_events", @@ -76,8 +75,11 @@ # Pydantic AI "stream_pydantic_ai_events", "convert_pydantic_ai_to_agentex_events", - "create_pydantic_ai_tracing_handler", "PydanticAITurn", + # OpenAI Agents + "convert_openai_to_agentex_events", + "OpenAITurn", + "openai_usage_to_turn_usage", # Claude Code "convert_claude_code_to_agentex_events", "ClaudeCodeTurn", diff --git a/src/agentex/lib/adk/_modules/_claude_code_sync.py b/src/agentex/lib/adk/_modules/_claude_code_sync.py index 4e25503cf..93a639118 100644 --- a/src/agentex/lib/adk/_modules/_claude_code_sync.py +++ b/src/agentex/lib/adk/_modules/_claude_code_sync.py @@ -98,18 +98,15 @@ async def convert_claude_code_to_agentex_events( _text_open = False _text_buf = "" _text_index: int | None = None - # Track which assistant-message block indices were already streamed via - # stream_event triples. Those blocks must not be re-emitted when the full - # assistant message arrives. Reset at each message boundary (see below) so a - # later turn's block indices don't collide with an earlier turn's. - _streamed_block_indexes: set[int] = set() - # Once-guard so a thinking block's pending index is claimed on its first - # thinking_delta only. Reset per turn alongside _streamed_block_indexes. - _saw_thinking_stream = False - # For deferred ReasoningStarted: if a content_block_start(thinking) arrives - # but no thinking_delta ever follows, the final assistant block's thinking - # field fills the reasoning content instead. - _pending_thinking_block_index: int | None = None + # Full text of each block already delivered via stream_event deltas, so the + # materialised assistant envelope does not re-emit it. Matched by CONTENT, + # not block index: a single streamed message can arrive as several assistant + # envelopes (e.g. a thinking block, then the text block), and the per-block + # numeric index does not survive that split while the text does. Each match + # is consumed (one entry removed) so a genuinely repeated later block — a new + # turn that happens to emit identical text — is still delivered. + _streamed_texts: list[str] = [] + _streamed_thinkings: list[str] = [] async for raw in lines: if not raw: @@ -138,43 +135,56 @@ async def convert_claude_code_to_agentex_events( if not isinstance(blocks, list): blocks = [blocks] - for idx, block in enumerate(blocks): + for block in blocks: if not isinstance(block, dict): continue block_type = block.get("type", "") if block_type == "text": - # Skip only the specific blocks already delivered via - # stream_event deltas (per-block, not a turn-wide latch). - if idx in _streamed_block_indexes: - continue text = block.get("text", "") - if text: - msg_index = next_index - next_index += 1 - yield StreamTaskMessageStart( - type="start", - index=msg_index, - content=TextContent( - type="text", - author="agent", - content="", - ), - ) - yield StreamTaskMessageDelta( - type="delta", - index=msg_index, - delta=TextDelta(type="text", text_delta=text), - ) - yield StreamTaskMessageDone(type="done", index=msg_index) + if not text: + continue + # Skip blocks already delivered via stream_event deltas. Two + # cases: (1) the streamed block already finished — its full + # text is recorded in _streamed_texts; (2) the materialised + # envelope arrives INTERLEAVED, mid-stream, before the streamed + # block's content_block_stop records its buffer — the still-open + # block's partial buffer is a prefix of this full text. + if text in _streamed_texts: + _streamed_texts.remove(text) + continue + if _text_open and _text_buf and text.startswith(_text_buf): + continue + msg_index = next_index + next_index += 1 + yield StreamTaskMessageStart( + type="start", + index=msg_index, + content=TextContent( + type="text", + author="agent", + content="", + ), + ) + yield StreamTaskMessageDelta( + type="delta", + index=msg_index, + delta=TextDelta(type="text", text_delta=text), + ) + yield StreamTaskMessageDone(type="done", index=msg_index) elif block_type == "thinking": - # Skip only the specific blocks already delivered via - # stream_event deltas (per-block, not a turn-wide latch). - if idx in _streamed_block_indexes: - continue thinking_text = block.get("thinking", "") if thinking_text: + # Skip blocks already delivered via stream_event deltas. + # Same two cases as text above: finished streamed block + # (recorded), or an interleaved materialised envelope whose + # text the still-open streamed buffer is a prefix of. + if thinking_text in _streamed_thinkings: + _streamed_thinkings.remove(thinking_text) + continue + if _thinking_open and _thinking_buf and thinking_text.startswith(_thinking_buf): + continue summary = _extract_summary(thinking_text) msg_index = next_index next_index += 1 @@ -243,20 +253,12 @@ async def convert_claude_code_to_agentex_events( ), ) - # End of a materialised message: reset per-turn streaming dedup state - # so the next turn's stream_event indices start clean. Without this, - # a block index streamed in an earlier turn would linger in the set - # and silently drop a later turn's non-streamed block at that index. - _streamed_block_indexes = set() - _saw_thinking_stream = False - # ----------------------------------------------------------------------- # stream_event — incremental streaming deltas # ----------------------------------------------------------------------- elif evt_type == "stream_event": se = evt.get("event") or {} se_type = se.get("type", "") - block_index = se.get("index") if se_type == "content_block_start": block = se.get("content_block") or {} @@ -265,11 +267,6 @@ async def convert_claude_code_to_agentex_events( if btype == "thinking": _thinking_open = True _thinking_buf = "" - # Defer marking the block as streamed until we actually - # receive a thinking_delta. Some configurations emit a - # thinking block_start but no deltas — in that case we want - # the final assistant-message handler to fill the text. - _pending_thinking_block_index = block_index if isinstance(block_index, int) else None msg_index = next_index next_index += 1 _thinking_index = msg_index @@ -288,8 +285,6 @@ async def convert_claude_code_to_agentex_events( elif btype == "text": _text_open = True _text_buf = "" - if isinstance(block_index, int): - _streamed_block_indexes.add(block_index) msg_index = next_index next_index += 1 _text_index = msg_index @@ -310,12 +305,6 @@ async def convert_claude_code_to_agentex_events( if dtype == "thinking_delta": chunk = delta.get("thinking", "") if chunk and _thinking_open: - if not _saw_thinking_stream: - _saw_thinking_stream = True - # Now mark the block as claimed so the assistant - # message handler won't re-emit it. - if _pending_thinking_block_index is not None: - _streamed_block_indexes.add(_pending_thinking_block_index) _thinking_buf += chunk if _thinking_index is not None: yield StreamTaskMessageDelta( @@ -342,18 +331,21 @@ async def convert_claude_code_to_agentex_events( elif se_type == "content_block_stop": if _thinking_open: _thinking_open = False + # Record the streamed thinking so the materialised assistant + # envelope doesn't re-emit it. Skip empties: a block_start with + # no deltas leaves the assistant envelope free to fill the text. + if _thinking_buf: + _streamed_thinkings.append(_thinking_buf) _thinking_buf = "" - _pending_thinking_block_index = None - # Reset the once-guard per thinking block: a turn can stream a - # second thinking block, and without this the guard stays True, - # the second block's index is never claimed, and the final - # assistant envelope re-emits it (duplicate Start/Delta/Done). - _saw_thinking_stream = False if _thinking_index is not None: yield StreamTaskMessageDone(type="done", index=_thinking_index) _thinking_index = None elif _text_open: _text_open = False + # Record the streamed text for content-based dedup against the + # materialised assistant envelope (see _streamed_texts). + if _text_buf: + _streamed_texts.append(_text_buf) _text_buf = "" if _text_index is not None: yield StreamTaskMessageDone(type="done", index=_text_index) diff --git a/src/agentex/lib/adk/_modules/_langgraph_async.py b/src/agentex/lib/adk/_modules/_langgraph_async.py deleted file mode 100644 index 02ef059eb..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_async.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Async LangGraph streaming helper for Agentex. - -Converts LangGraph graph.astream() events into Agentex streaming updates -and pushes them to Redis via adk.streaming contexts. For use with async -ACP agents that stream via Redis rather than HTTP yields. - -Unified surface ---------------- -This module is now implemented on top of ``LangGraphTurn`` and -``UnifiedEmitter.auto_send_turn``, the same surface used by every other -harness adapter (pydantic-ai, openai-agents, etc.). The public signature -and return type are preserved identically. - -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` -handles Full events correctly; no coalescing wrapper is needed. -""" - -from agentex.lib.utils.temporal import workflow_now_if_in_workflow - - -async def stream_langgraph_events(stream, task_id: str) -> str: - """Stream LangGraph events to Agentex via Redis. - - Processes the stream from graph.astream() called with - stream_mode=["messages", "updates"] and pushes text, reasoning, - tool request, and tool response messages through Redis streaming - contexts. - - Supports both regular models (chunk.content is a str) and reasoning - models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks - in the Responses API responses/v1 format). - - Reimplemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for - cross-harness consistency. Behavior is identical to the previous bespoke - implementation (verified by characterization tests in test_langgraph_async.py). - - AGX1-377 note: LangGraph emits tool requests as ``Full`` events (from "updates"), - NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events - correctly; no coalescing wrapper is needed. - - AGX1-378 note: ``created_at`` is set from ``workflow.now()`` when called inside a - Temporal workflow, matching the pattern used by the openai/litellm providers. - Outside a workflow (plain async activities, sync agents) it is ``None`` and the - server's wall clock is used. - - Args: - stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"]) - task_id: The Agentex task ID to stream messages to. - - Returns: - The accumulated final text output from the agent. - """ - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - - # AGX1-377 note: LangGraph emits tool requests as Full events (from "updates"), - # NOT Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly; - # no coalescing wrapper is needed. - # AGX1-378: stamp messages with workflow.now() inside Temporal for deterministic - # created_at ordering; falls back to None (server wall clock) outside a workflow. - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) - result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow()) - return result.final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_messages.py b/src/agentex/lib/adk/_modules/_langgraph_messages.py deleted file mode 100644 index c8856755b..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_messages.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Emit finished LangGraph messages as Agentex task messages. - -This is the non-streaming counterpart to ``stream_langgraph_events``. Use it -when you run a LangGraph graph with ``ainvoke`` (for example a Temporal-backed -agent using the LangGraph plugin, where streaming deltas aren't available) and -want to surface the resulting messages to the Agentex UI after the fact. - -It maps LangGraph/LangChain message objects to Agentex content types: - -- ``AIMessage`` tool calls → ``ToolRequestContent`` (one per call) -- ``AIMessage`` text content → ``TextContent`` -- ``ToolMessage`` → ``ToolResponseContent`` - -Pass only the messages produced this turn (e.g. ``messages[already_emitted:]``) -so each message is surfaced exactly once across a multi-turn conversation. -""" - -from __future__ import annotations - -from typing import Any - - -async def emit_langgraph_messages(messages: list[Any], task_id: str) -> str: - """Create Agentex messages for a list of LangGraph messages. - - Args: - messages: LangGraph/LangChain message objects to surface — typically - the new messages a turn produced. - task_id: The Agentex task to create messages on. - - Returns: - The last assistant text emitted (useful as a span/turn output), or "". - """ - # Lazy imports so langchain isn't required at module load time. - from langchain_core.messages import AIMessage, ToolMessage - - from agentex.lib import adk - from agentex.types.text_content import TextContent - from agentex.types.tool_request_content import ToolRequestContent - from agentex.types.tool_response_content import ToolResponseContent - - final_text = "" - for message in messages: - if isinstance(message, AIMessage): - for tool_call in message.tool_calls or []: - await adk.messages.create( - task_id=task_id, - content=ToolRequestContent( - author="agent", - tool_call_id=tool_call["id"], - name=tool_call["name"], - arguments=tool_call["args"], - ), - ) - # ``content`` may be a plain string (OpenAI) or a list of content - # blocks (Anthropic/Claude via LangChain, e.g. - # ``[{"type": "text", "text": "..."}]``). Extract and join the text - # so the response is visible regardless of the underlying model. - if isinstance(message.content, str): - text = message.content - else: - text = "".join( - block.get("text", "") if isinstance(block, dict) else str(block) - for block in message.content - if not isinstance(block, dict) or block.get("type") == "text" - ) - if text: - final_text = text - await adk.messages.create( - task_id=task_id, - content=TextContent(author="agent", content=text, format="markdown"), - ) - elif isinstance(message, ToolMessage): - await adk.messages.create( - task_id=task_id, - content=ToolResponseContent( - author="agent", - tool_call_id=message.tool_call_id, - name=message.name or "unknown", - content=message.content - if isinstance(message.content, str) - else str(message.content), - ), - ) - return final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_sync.py b/src/agentex/lib/adk/_modules/_langgraph_sync.py index 48231a87d..9d7b73847 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_sync.py +++ b/src/agentex/lib/adk/_modules/_langgraph_sync.py @@ -48,8 +48,8 @@ async def convert_langgraph_to_agentex_events( Supports both regular models (chunk.content is a str) and reasoning models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks). - AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` (from - "updates" events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests + LangGraph emits tool requests as ``StreamTaskMessageFull`` (from "updates" + events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests option is needed for LangGraph. Args: @@ -271,3 +271,82 @@ async def convert_langgraph_to_agentex_events( yield StreamTaskMessageDone(type="done", index=message_index) if reasoning_streaming: yield StreamTaskMessageDone(type="done", index=message_index) + + +async def emit_langgraph_messages(messages: list[Any], task_id: str) -> str: + """Create Agentex messages for a list of LangGraph messages. + + This is the non-streaming counterpart to ``stream_langgraph_events``. Use it + when you run a LangGraph graph with ``ainvoke`` (for example a Temporal-backed + agent using the LangGraph plugin, where streaming deltas aren't available) and + want to surface the resulting messages to the Agentex UI after the fact. + + It maps LangGraph/LangChain message objects to Agentex content types: + + - ``AIMessage`` tool calls -> ``ToolRequestContent`` (one per call) + - ``AIMessage`` text content -> ``TextContent`` + - ``ToolMessage`` -> ``ToolResponseContent`` + + Pass only the messages produced this turn (e.g. ``messages[already_emitted:]``) + so each message is surfaced exactly once across a multi-turn conversation. + + Args: + messages: LangGraph/LangChain message objects to surface — typically + the new messages a turn produced. + task_id: The Agentex task to create messages on. + + Returns: + The last assistant text emitted (useful as a span/turn output), or "". + """ + # Lazy imports so langchain isn't required at module load time. + from langchain_core.messages import AIMessage, ToolMessage + + from agentex.lib import adk + from agentex.types.text_content import TextContent + from agentex.types.tool_request_content import ToolRequestContent + from agentex.types.tool_response_content import ToolResponseContent + + final_text = "" + for message in messages: + if isinstance(message, AIMessage): + for tool_call in message.tool_calls or []: + await adk.messages.create( + task_id=task_id, + content=ToolRequestContent( + author="agent", + tool_call_id=tool_call["id"], + name=tool_call["name"], + arguments=tool_call["args"], + ), + ) + # ``content`` may be a plain string (OpenAI) or a list of content + # blocks (Anthropic/Claude via LangChain, e.g. + # ``[{"type": "text", "text": "..."}]``). Extract and join the text + # so the response is visible regardless of the underlying model. + if isinstance(message.content, str): + text = message.content + else: + text = "".join( + block.get("text", "") if isinstance(block, dict) else str(block) + for block in message.content + if not isinstance(block, dict) or block.get("type") == "text" + ) + if text: + final_text = text + await adk.messages.create( + task_id=task_id, + content=TextContent(author="agent", content=text, format="markdown"), + ) + elif isinstance(message, ToolMessage): + await adk.messages.create( + task_id=task_id, + content=ToolResponseContent( + author="agent", + tool_call_id=message.tool_call_id, + name=message.name or "unknown", + content=message.content + if isinstance(message.content, str) + else str(message.content), + ), + ) + return final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_tracing.py b/src/agentex/lib/adk/_modules/_langgraph_tracing.py deleted file mode 100644 index 2162201e1..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_tracing.py +++ /dev/null @@ -1,273 +0,0 @@ -"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions. - -.. deprecated:: - ``AgentexLangGraphTracingHandler`` and ``create_langgraph_tracing_handler`` are - superseded by the unified harness surface (``LangGraphTurn`` + - ``UnifiedEmitter``), which derives spans automatically from the canonical - event stream without requiring a LangChain callback handler. - - They remain importable and functional for backward compatibility, but new - agents should use the unified path instead. -""" -# ruff: noqa: ARG002 -# Callback methods must accept all arguments defined by LangChain's AsyncCallbackHandler interface. - -from __future__ import annotations - -from uuid import UUID -from typing import Any, override - -from langchain_core.outputs import LLMResult -from langchain_core.messages import BaseMessage -from langchain_core.callbacks import AsyncCallbackHandler - -from agentex.types.span import Span -from agentex.lib.utils.logging import make_logger -from agentex.lib.adk._modules.tracing import TracingModule - -logger = make_logger(__name__) - - -class AgentexLangGraphTracingHandler(AsyncCallbackHandler): - """Async LangChain callback handler that records Agentex tracing spans. - - Creates child spans under a parent span for each LLM call and tool execution. - Designed to be passed via ``config={"callbacks": [handler]}`` to LangGraph's - ``graph.astream()`` or ``graph.ainvoke()``. - - Span hierarchy produced:: - - (e.g. "message" turn-level span) - ├── llm: (LLM call) - ├── tool: (tool execution) - └── llm: (LLM call) - - .. deprecated:: - Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified - harness derives equivalent spans from the canonical event stream, - removing the need for a LangChain callback handler entirely. - """ - - def __init__( - self, - trace_id: str, - parent_span_id: str | None = None, - tracing: TracingModule | None = None, - ) -> None: - super().__init__() - self._trace_id = trace_id - self._parent_span_id = parent_span_id - # Lazily initialise TracingModule so the httpx client is created - # inside the *running* event-loop (not at import/construction time). - self._tracing_eager = tracing - self._tracing_lazy: TracingModule | None = None - # Map run_id → Span for in-flight spans - self._spans: dict[UUID, Span] = {} - - @property - def _tracing(self) -> TracingModule: - if self._tracing_eager is not None: - return self._tracing_eager - if self._tracing_lazy is None: - self._tracing_lazy = TracingModule() - return self._tracing_lazy - - # ------------------------------------------------------------------ - # LLM lifecycle - # ------------------------------------------------------------------ - - @override - async def on_chat_model_start( - self, - serialized: dict[str, Any], - messages: list[list[BaseMessage]], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - **kwargs: Any, - ) -> None: - model_name = (metadata or {}).get("ls_model_name", "") or _extract_model_name(serialized) - span = await self._tracing.start_span( - trace_id=self._trace_id, - name=f"llm:{model_name}" if model_name else "llm", - input=_serialize_messages(messages), - parent_id=self._parent_span_id, - data={"__span_type__": "COMPLETION"}, - ) - if span: - self._spans[run_id] = span - - @override - async def on_llm_end( - self, - response: LLMResult, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = _serialize_llm_result(response) - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - @override - async def on_llm_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"error": str(error)} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - # ------------------------------------------------------------------ - # Tool lifecycle - # ------------------------------------------------------------------ - - @override - async def on_tool_start( - self, - serialized: dict[str, Any], - input_str: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - inputs: dict[str, Any] | None = None, - **kwargs: Any, - ) -> None: - tool_name = serialized.get("name", "") or serialized.get("id", [""])[-1] - span = await self._tracing.start_span( - trace_id=self._trace_id, - name=f"tool:{tool_name}" if tool_name else "tool", - input={"input": input_str}, - parent_id=self._parent_span_id, - data={"__span_type__": "CUSTOM"}, - ) - if span: - self._spans[run_id] = span - - @override - async def on_tool_end( - self, - output: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"output": output} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - @override - async def on_tool_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"error": str(error)} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - -# ------------------------------------------------------------------ -# Helpers -# ------------------------------------------------------------------ - - -def _extract_model_name(serialized: dict[str, Any]) -> str: - """Best-effort model name extraction from the serialized callback dict.""" - kwargs = serialized.get("kwargs", {}) - return kwargs.get("model_name", "") or kwargs.get("model", "") - - -def _serialize_messages(messages: list[list[BaseMessage]]) -> dict[str, Any]: - """Serialize LangChain messages into a JSON-safe dict for the span input.""" - result: list[dict[str, Any]] = [] - for batch in messages: - for msg in batch: - entry: dict[str, Any] = {"type": msg.type, "content": msg.content} - tool_calls = getattr(msg, "tool_calls", None) - if tool_calls: - entry["tool_calls"] = tool_calls - result.append(entry) - return {"messages": result} - - -def _serialize_llm_result(response: LLMResult) -> dict[str, Any]: - """Serialize an LLMResult into a JSON-safe dict for the span output.""" - output: dict[str, Any] = {} - if response.generations: - last_gen = response.generations[-1] - if last_gen: - gen = last_gen[-1] - msg = getattr(gen, "message", None) - - # For reasoning models, content is a list of typed blocks. - # Extract text from the blocks instead of relying on gen.text. - if msg and isinstance(msg.content, list): - text_parts: list[str] = [] - for block in msg.content: - if isinstance(block, dict): - if block.get("type") == "text": - text_parts.append(block.get("text", "")) - output["content"] = "".join(text_parts) if text_parts else gen.text - else: - output["content"] = gen.text - - if msg and hasattr(msg, "tool_calls") and msg.tool_calls: - output["tool_calls"] = [{"name": tc["name"], "args": tc["args"]} for tc in msg.tool_calls] - return output - - -def create_langgraph_tracing_handler( - trace_id: str, - parent_span_id: str | None = None, -) -> AgentexLangGraphTracingHandler: - """Create a LangChain callback handler that records Agentex tracing spans. - - Pass the returned handler to LangGraph via ``config={"callbacks": [handler]}``. - - Args: - trace_id: The trace ID (typically the task/thread ID). - parent_span_id: Optional parent span ID to nest LLM/tool spans under. - - Returns: - An ``AgentexLangGraphTracingHandler`` instance ready to use as a LangChain callback. - - .. deprecated:: - Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified harness - derives equivalent spans from the canonical event stream automatically, with - no LangChain callback required:: - - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - - turn = LangGraphTurn(stream) - emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id) - result = await emitter.auto_send_turn(turn) - - This function remains available for backward compatibility. - """ - return AgentexLangGraphTracingHandler( - trace_id=trace_id, - parent_span_id=parent_span_id, - ) diff --git a/src/agentex/lib/adk/_modules/_langgraph_turn.py b/src/agentex/lib/adk/_modules/_langgraph_turn.py index da8ff0e7c..a6e290e1b 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_turn.py +++ b/src/agentex/lib/adk/_modules/_langgraph_turn.py @@ -4,9 +4,9 @@ ``langgraph_usage_to_turn_usage`` helper that maps LangGraph's ``AIMessage.usage_metadata`` onto the framework-agnostic ``TurnUsage`` model. -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` -handles Full events correctly; no coalescing wrapper is needed. +LangGraph emits tool requests as ``StreamTaskMessageFull`` events (from +"updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles +Full events correctly; no coalescing wrapper is needed. """ from __future__ import annotations @@ -14,6 +14,7 @@ from typing import Any, AsyncIterator from collections.abc import AsyncGenerator +from agentex.lib.utils.temporal import workflow_now_if_in_workflow from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events @@ -111,9 +112,9 @@ class LangGraphTurn: # Async / temporal result = await emitter.auto_send_turn(turn) - AGX1-377 note: LangGraph tool requests are ``StreamTaskMessageFull`` (from - "updates"), NOT Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests`` - option is needed. + LangGraph tool requests are ``StreamTaskMessageFull`` (from "updates"), NOT + Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests`` option is + needed. Usage data is captured lazily via the ``on_final_ai_message`` callback and is only valid after ``events`` has been fully consumed. Multi-step turns @@ -150,3 +151,50 @@ def usage(self) -> TurnUsage: did not report usage. """ return self._usage + + +async def stream_langgraph_events(stream, task_id: str) -> str: + """Stream LangGraph events to Agentex via Redis. + + Converts LangGraph ``graph.astream()`` events into Agentex streaming + updates and pushes them to Redis via ``adk.streaming`` contexts. For use + with async ACP agents that stream via Redis rather than HTTP yields. + + Processes the stream from graph.astream() called with + stream_mode=["messages", "updates"] and pushes text, reasoning, + tool request, and tool response messages through Redis streaming + contexts. + + Supports both regular models (chunk.content is a str) and reasoning + models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks + in the Responses API responses/v1 format). + + Implemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for + cross-harness consistency, the same surface used by every other harness + adapter (pydantic-ai, openai-agents, etc.). The public signature and + return type are preserved identically. + + LangGraph emits tool requests as ``Full`` events (from "updates"), NOT + Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events + correctly; no coalescing wrapper is needed. + + ``created_at`` is set from ``workflow.now()`` when called inside a + Temporal workflow, matching the pattern used by the openai/litellm providers. + Outside a workflow (plain async activities, sync agents) it is ``None`` and the + server's wall clock is used. + + Args: + stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"]) + task_id: The Agentex task ID to stream messages to. + + Returns: + The accumulated final text output from the agent. + """ + from agentex.lib.core.harness.emitter import UnifiedEmitter + + # Stamp messages with workflow.now() inside Temporal for deterministic + # created_at ordering; falls back to None (server wall clock) outside a workflow. + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) + result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow()) + return result.final_text diff --git a/src/agentex/lib/adk/_modules/_openai_sync.py b/src/agentex/lib/adk/_modules/_openai_sync.py new file mode 100644 index 000000000..ac404bef1 --- /dev/null +++ b/src/agentex/lib/adk/_modules/_openai_sync.py @@ -0,0 +1,376 @@ +"""Sync OpenAI Agents SDK streaming tap for Agentex. + +Converts an OpenAI Agents SDK streamed run (``Runner.run_streamed(...)`` +``stream_events()``) into Agentex ``StreamTaskMessage*`` events, including +reasoning content and reasoning summary deltas for reasoning models (o1/o3/gpt-5). + +This is the lower-level primitive used by ``OpenAITurn`` (in +``_openai_turn.py``). New OpenAI Agents integrations should prefer wrapping a +``Runner.run_streamed`` result in ``OpenAITurn`` and driving delivery + tracing +through ``UnifiedEmitter``. +""" + +from __future__ import annotations + +import json +from typing import Any + +from openai.types.responses import ( + ResponseTextDeltaEvent, + ResponseFunctionToolCall, + ResponseFunctionWebSearch, + ResponseOutputItemDoneEvent, + ResponseOutputItemAddedEvent, + ResponseCodeInterpreterToolCall, + ResponseReasoningSummaryPartAddedEvent, + ResponseReasoningSummaryTextDeltaEvent, +) +from openai.types.responses.response_reasoning_text_done_event import ResponseReasoningTextDoneEvent +from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent +from openai.types.responses.response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent + +from agentex.types.reasoning_content import ReasoningContent +from agentex.types.task_message_delta import TextDelta +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.task_message_content import TextContent +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.types.reasoning_content_delta import ReasoningContentDelta +from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta + + +def _safe_parse_arguments(arguments: Any) -> dict[str, Any]: + """Coerce a tool call's ``arguments`` into a dict, tolerating bad JSON. + + ``ToolRequestContent.arguments`` is typed ``Dict[str, object]``, so the + result is ALWAYS a dict — a non-dict payload must not abort the turn. + Mirroring the Temporal streaming model: malformed/truncated strings are + preserved under ``raw``, and any other non-dict value (a list, scalar, or + SDK object) is serialized if possible, otherwise wrapped under ``value``. + """ + if not arguments: + return {} + if isinstance(arguments, dict): + return arguments + if isinstance(arguments, str): + try: + parsed = json.loads(arguments) + except (json.JSONDecodeError, ValueError): + return {"raw": arguments} + return parsed if isinstance(parsed, dict) else {"value": parsed} + # Non-string, non-dict (e.g. a provider tool passing a list / scalar / SDK + # object). Prefer the object's own dict form; fall back to wrapping it. + dumped = arguments.model_dump() if hasattr(arguments, "model_dump") else None + if isinstance(dumped, dict): + return dumped + return {"value": arguments} + + +def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]: + """ + Extract call_id, tool_name, and tool_arguments from a tool call item. + Args: + tool_call_item: The tool call item to process + Returns: + A tuple of (call_id, tool_name, tool_arguments) + """ + # Generic handling for different tool call types + # Try 'call_id' first, then 'id', then generate placeholder + if hasattr(tool_call_item, "call_id"): + call_id = tool_call_item.call_id + elif hasattr(tool_call_item, "id"): + call_id = tool_call_item.id + else: + call_id = f"unknown_call_{id(tool_call_item)}" + + if isinstance(tool_call_item, ResponseFunctionWebSearch): + tool_name = "web_search" + tool_arguments = {"action": tool_call_item.action.model_dump(), "status": tool_call_item.status} + elif isinstance(tool_call_item, ResponseCodeInterpreterToolCall): + tool_name = "code_interpreter" + tool_arguments = {"code": tool_call_item.code, "status": tool_call_item.status} + elif isinstance(tool_call_item, ResponseFunctionToolCall): + # Handle standard function tool calls + tool_name = tool_call_item.name + tool_arguments = _safe_parse_arguments(tool_call_item.arguments) + else: + # Generic handling for any tool call type + tool_name = getattr(tool_call_item, "name", type(tool_call_item).__name__) + if hasattr(tool_call_item, "arguments"): + tool_arguments = _safe_parse_arguments(tool_call_item.arguments) + else: + tool_arguments = tool_call_item.model_dump() + + return call_id, tool_name, tool_arguments + + +def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any) -> tuple[str, str, str]: + """ + Extract call_id, tool_name, and content from a tool output item. + Args: + tool_map: Dictionary mapping call_ids to tool names + tool_output_item: The tool output item to process + Returns: + A tuple of (call_id, tool_name, content) + """ + + # Handle different formats of tool_output_item + if isinstance(tool_output_item, dict): + call_id = tool_output_item.get("call_id", tool_output_item.get("id", f"unknown_call_{id(tool_output_item)}")) + content = tool_output_item.get("output", str(tool_output_item)) + else: + # Try to get call_id from attributes + if hasattr(tool_output_item, "call_id"): + call_id = tool_output_item.call_id + elif hasattr(tool_output_item, "id"): + call_id = tool_output_item.id + else: + call_id = f"unknown_call_{id(tool_output_item)}" + + # Get content + if hasattr(tool_output_item, "output"): + content = tool_output_item.output + else: + content = str(tool_output_item) + + # Get tool name from map + tool_name = tool_map.get(call_id, "unknown_tool") + + return call_id, tool_name, content + + +async def convert_openai_to_agentex_events(stream_response): + """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support. + + This is an enhanced version of the base converter that includes support for: + - Reasoning content deltas (for o1 models) + - Reasoning summary deltas (for o1 models) + + Args: + stream_response: An async iterator of OpenAI streaming events + Yields: + TaskMessageUpdate: AgentEx streaming events (StreamTaskMessageDelta, StreamTaskMessageFull, or StreamTaskMessageDone) + """ + + tool_map = {} + event_count = 0 + message_index = 0 # Track message index for proper sequencing + item_id_to_index = {} # Map item_id to message index + item_id_to_type = {} # Map item_id to content type (text, reasoning_content, reasoning_summary) + + async for event in stream_response: + event_count += 1 + + # Check for raw response events which contain the actual OpenAI streaming events + if hasattr(event, "type") and event.type == "raw_response_event": + if hasattr(event, "data"): + raw_event = event.data + + # Check for ResponseOutputItemAddedEvent which signals a new message starting + if isinstance(raw_event, ResponseOutputItemAddedEvent): + # Don't increment here - we'll increment when we see the actual text delta + # This is just a signal that a new message is starting + pass + + # Handle item completion - send done event to close the message + elif isinstance(raw_event, ResponseOutputItemDoneEvent): + item_id = raw_event.item.id + if item_id in item_id_to_index: + # Close every streamed message — text AND reasoning — with a + # matching Done. UnifiedEmitter.auto_send only releases a + # context on StreamTaskMessageDone; skipping it for reasoning + # left those messages hanging and their spans incomplete. The + # accumulator rebuilds ReasoningContent from the deltas, so the + # Done carries no payload. + yield StreamTaskMessageDone( + type="done", + index=item_id_to_index[item_id], + ) + + # Skip reasoning summary part added events - we handle them on delta + elif isinstance(raw_event, ResponseReasoningSummaryPartAddedEvent): + pass + + # Handle reasoning summary text delta events + elif isinstance(raw_event, ResponseReasoningSummaryTextDeltaEvent): + item_id = raw_event.item_id + summary_index = raw_event.summary_index + + # If this is a new item_id we haven't seen, create a new message + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "reasoning_summary" + + # Send a start event for this new reasoning summary message. + # The start content must be ReasoningContent (not TextContent) + # so consumers that branch on the start event's content type + # render a reasoning/thinking indicator; the final persisted + # content is rebuilt from the reasoning deltas regardless. + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=ReasoningContent( + type="reasoning", + author="agent", + summary=[], + content=[], + style="active", + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + # Yield reasoning summary delta + yield StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=ReasoningSummaryDelta( + type="reasoning_summary", + summary_index=summary_index, + summary_delta=raw_event.delta, + ), + ) + + # Handle reasoning summary text done events + elif isinstance(raw_event, ResponseReasoningSummaryTextDoneEvent): + # We do NOT close the streaming context here + # as there can be multiple reasoning summaries. + # The context will be closed when the entire + # output item is done (ResponseOutputItemDoneEvent) + pass + + # Handle reasoning content text delta events + elif isinstance(raw_event, ResponseReasoningTextDeltaEvent): + item_id = raw_event.item_id + content_index = raw_event.content_index + + # If this is a new item_id we haven't seen, create a new message + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "reasoning_content" + + # Send a start event for this new reasoning content message. + # The start content must be ReasoningContent (not TextContent) + # so consumers that branch on the start event's content type + # render a reasoning/thinking indicator; the final persisted + # content is rebuilt from the reasoning deltas regardless. + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=ReasoningContent( + type="reasoning", + author="agent", + summary=[], + content=[], + style="active", + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + # Yield reasoning content delta + yield StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=ReasoningContentDelta( + type="reasoning_content", + content_index=content_index, + content_delta=raw_event.delta, + ), + ) + + # Handle reasoning content text done events + elif isinstance(raw_event, ResponseReasoningTextDoneEvent): + # We do NOT close the streaming context here + # as there can be multiple reasoning content texts. + # The context will be closed when the entire + # output item is done (ResponseOutputItemDoneEvent) + pass + + # Check if this is a text delta event from OpenAI + elif isinstance(raw_event, ResponseTextDeltaEvent): + # Check if this event has an item_id + item_id = getattr(raw_event, "item_id", None) + + # If this is a new item_id we haven't seen, it's a new message. + # Reserve a fresh index for every text item_id (matching the + # increment-then-use convention of the reasoning/tool paths). + # Reusing the current index let a final answer collide with the + # preceding reasoning message on reasoning-model streams. + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "text" + + # Send a start event with empty content for this new text message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content, deltas will fill it + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + delta_message = StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=TextDelta( + type="text", + text_delta=raw_event.delta, + ), + ) + yield delta_message + + elif hasattr(event, "type") and event.type == "run_item_stream_event": + # Skip reasoning_item events - they're handled via raw_response_event above + if hasattr(event, "item") and event.item.type == "reasoning_item": + continue + + # Check for tool_call_item type (this is when a tool is being called) + elif hasattr(event, "item") and event.item.type == "tool_call_item": + # Extract tool call information using the helper method + call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item) + tool_map[call_id] = tool_name + tool_request_content = ToolRequestContent( + tool_call_id=call_id, + name=tool_name, + arguments=tool_arguments, + author="agent", + ) + message_index += 1 # Increment for new message + yield StreamTaskMessageFull( + index=message_index, + type="full", + content=tool_request_content, + ) + + # Check for tool_call_output_item type (this is when a tool returns output) + elif hasattr(event, "item") and event.item.type == "tool_call_output_item": + # Extract tool response information using the helper method + call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item) + tool_response_content = ToolResponseContent( + tool_call_id=call_id, + name=tool_name, + content=content, + author="agent", + ) + message_index += 1 # Increment for new message + yield StreamTaskMessageFull( + type="full", + index=message_index, + content=tool_response_content, + ) diff --git a/src/agentex/lib/adk/_modules/_openai_turn.py b/src/agentex/lib/adk/_modules/_openai_turn.py new file mode 100644 index 000000000..cfb1ce22d --- /dev/null +++ b/src/agentex/lib/adk/_modules/_openai_turn.py @@ -0,0 +1,134 @@ +"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface. + +A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus +normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from +``Runner.run_streamed``), converts its native OpenAI events into the canonical +stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the +run's ``raw_responses`` to aggregate usage into a provider-independent +``TurnUsage``. + +Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this +module is purely the provider->canonical adapter. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, AsyncIterator + +from agents.usage import Usage + +from agentex.lib.utils.logging import make_logger +from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage +from agentex.lib.adk._modules._openai_sync import ( + convert_openai_to_agentex_events, +) + +if TYPE_CHECKING: + from agents import ModelResponse, RunResultStreaming + +logger = make_logger(__name__) + + +def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage: + """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``. + + All field access is defensive (``getattr(..., None)``): different model + backends populate different subsets of the usage object, and real zeros are + valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce + a present-but-zero value into ``None``. + """ + if usage is None: + return TurnUsage(model=model) + + input_details = getattr(usage, "input_tokens_details", None) + output_details = getattr(usage, "output_tokens_details", None) + + return TurnUsage( + model=model, + num_llm_calls=getattr(usage, "requests", None) or 0, + input_tokens=getattr(usage, "input_tokens", None), + cached_input_tokens=getattr(input_details, "cached_tokens", None), + output_tokens=getattr(usage, "output_tokens", None), + reasoning_tokens=getattr(output_details, "reasoning_tokens", None), + total_tokens=getattr(usage, "total_tokens", None), + ) + + +def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None: + """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list. + + Returns ``None`` when no response carries usage so the caller can emit a + usage object with only the model name set. ``Usage.add`` accumulates + requests/tokens (including cached/reasoning detail fields). + """ + total: Usage | None = None + for response in raw_responses: + resp_usage = getattr(response, "usage", None) + if resp_usage is None: + continue + if total is None: + total = Usage() + total.add(resp_usage) + return total + + +class OpenAITurn: + """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol. + + Construct with exactly one of: + - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its + ``stream_events()`` is converted to the canonical stream, and after the + stream is exhausted ``raw_responses`` is read to compute usage. + - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage`` + events (bypasses ``convert_openai_to_agentex_events``). Useful for tests + and for callers that have already produced canonical events. Usage stays + at ``TurnUsage(model=...)`` because there is no run to read usage from. + + ``coalesce_tool_requests`` is accepted for API parity with other provider + turns but is a no-op for OpenAI: the OpenAI converter already emits a single + ``Full(ToolRequestContent)`` per tool call rather than streamed argument + deltas, so there is nothing to coalesce. + """ + + def __init__( + self, + result: RunResultStreaming | None = None, + model: str | None = None, + stream: AsyncIterator[StreamTaskMessage] | None = None, + coalesce_tool_requests: bool = False, # noqa: ARG002 - API parity, no-op for OpenAI + ) -> None: + if result is None and stream is None: + raise ValueError("OpenAITurn requires either `result` or `stream`") + self._result = result + self._model = model + self._stream = stream + self._usage: TurnUsage = TurnUsage(model=model) + + @property + def events(self) -> AsyncIterator[StreamTaskMessage]: + return self._iter_events() + + async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]: + if self._stream is not None: + async for event in self._stream: + yield event + return + + result = self._result + assert result is not None # guaranteed by __init__ + async for event in convert_openai_to_agentex_events(result.stream_events()): + yield event + + # Stream is exhausted: the run has finished and raw_responses is now + # populated, so usage can be aggregated and normalized. + try: + raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or []) + aggregated = _aggregate_usage(raw_responses) + self._usage = openai_usage_to_turn_usage(aggregated, self._model) + except Exception as exc: # pragma: no cover - defensive: never break delivery on usage + logger.warning(f"Failed to aggregate OpenAI usage: {exc}") + self._usage = TurnUsage(model=self._model) + + def usage(self) -> TurnUsage: + """Normalized turn usage. Valid only after ``events`` is exhausted.""" + return self._usage diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py deleted file mode 100644 index 85abfb845..000000000 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Async Pydantic AI streaming helper for Agentex. - -Consumes a Pydantic AI ``agent.run_stream_events(...)`` async iterator and -pushes Agentex streaming updates to Redis via the ``adk.streaming`` -contexts. For use with async ACP agents that stream via Redis rather than -HTTP yields. - -Text and thinking tokens stream as deltas inside coalesced streaming -contexts. Tool requests and tool results are posted as open+close pairs -on a streaming context (the unified surface persists ``initial_content`` -when a context is closed without deltas). This matches the ``auto_send`` -convention used by all other async/Temporal harnesses. - -Tracing is opt-in via a ``tracing_handler`` parameter — see -``create_pydantic_ai_tracing_handler`` in -``agentex.lib.adk._modules._pydantic_ai_tracing``. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - -async def stream_pydantic_ai_events( - stream, - task_id: str, - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, -) -> str: - """Stream Pydantic AI events to Agentex via Redis. - - Args: - stream: Async iterator yielded by ``agent.run_stream_events(...)``. - task_id: The Agentex task ID to stream messages to. - tracing_handler: Optional handler from - ``create_pydantic_ai_tracing_handler(...)``. When provided, each - tool call in the run is also recorded as an Agentex child span - beneath the handler's configured ``parent_span_id``. Streaming - behavior is unchanged when omitted. - - Returns: - The accumulated text content of the **last** text part in the run. - Multi-step runs (where the model emits text, then a tool call, then - more text) return only the final text segment, matching the - ``stream_langgraph_events`` convention. - """ - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - turn = PydanticAITurn( - stream, - model=None, - tracing_handler=tracing_handler, - ) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=None, - parent_span_id=None, - ) - result = await emitter.auto_send_turn(turn) - return result.final_text diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py index e4ac31e7e..0f9aaeb55 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py @@ -41,14 +41,9 @@ async def handle_message_send(params): import json import inspect -from typing import TYPE_CHECKING, Any, Callable, AsyncIterator +from typing import Any, Callable, AsyncIterator from pydantic_ai.run import AgentRunResultEvent - -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) from pydantic_ai.messages import ( TextPart, PartEndEvent, @@ -124,7 +119,6 @@ def _tool_return_content(result: ToolReturnPart | Any) -> Any: async def convert_pydantic_ai_to_agentex_events( stream_response: AsyncIterator[Any], - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, on_result: Callable[[AgentRunResultEvent], Any] | None = None, ) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]: """Convert a Pydantic AI agent event stream into Agentex stream events. @@ -148,11 +142,6 @@ async def convert_pydantic_ai_to_agentex_events( stream_response: The async iterator yielded by Pydantic AI's ``agent.run_stream_events(...)`` context manager (or a stream of ``AgentStreamEvent`` items received in an ``event_stream_handler``). - tracing_handler: Optional handler from - ``create_pydantic_ai_tracing_handler(...)``. When provided, each - tool call in the run is also recorded as an Agentex child span - beneath the handler's configured ``parent_span_id``. Streaming - behavior is unchanged when omitted. on_result: Optional callback invoked with the terminal ``AgentRunResultEvent`` when the run completes. Both sync and async callables are accepted. No ``StreamTaskMessage*`` events are @@ -306,26 +295,6 @@ async def convert_pydantic_ai_to_agentex_events( if message_index is None: continue yield StreamTaskMessageDone(type="done", index=message_index) - # Tool-call parts end with the model's full args known. Open a - # tracing child span for the tool execution now; close it when - # FunctionToolResultEvent arrives below. - if tracing_handler is not None and isinstance(event.part, ToolCallPart) and event.part.tool_call_id: - args: dict[str, Any] | str | None - raw_args = event.part.args - if isinstance(raw_args, dict): - args = dict(raw_args) - elif isinstance(raw_args, str): - try: - args = json.loads(raw_args) if raw_args else {} - except json.JSONDecodeError: - args = {"_raw": raw_args} - else: - args = {} - await tracing_handler.on_tool_start( - tool_call_id=event.part.tool_call_id, - tool_name=event.part.tool_name, - arguments=args, - ) elif isinstance(event, FunctionToolResultEvent): result = event.part @@ -345,11 +314,6 @@ async def convert_pydantic_ai_to_agentex_events( content=content_payload, ), ) - if tracing_handler is not None and tool_call_id: - await tracing_handler.on_tool_end( - tool_call_id=tool_call_id, - result=content_payload, - ) elif isinstance(event, (FunctionToolCallEvent, FinalResultEvent, AgentRunResultEvent)): # Already covered by PartStart/PartDelta/PartEnd events above, or diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py deleted file mode 100644 index e199d0a8c..000000000 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Tracing handler that records Agentex spans for tool calls in a pydantic-ai agent run. - -.. deprecated:: - ``AgentexPydanticAITracingHandler`` and ``create_pydantic_ai_tracing_handler`` - are superseded by the unified harness surface (``UnifiedEmitter`` in - ``agentex.lib.core.harness``). The unified surface derives tool and - reasoning spans directly from the canonical ``StreamTaskMessage*`` stream, - so no separate handler is required. Both symbols remain fully importable - and functional; they will be removed in a future release. New code should - construct a ``UnifiedEmitter`` with a ``trace_id`` instead: - - from agentex.lib.core.harness import UnifiedEmitter - from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id) - turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o") - async for event in emitter.yield_turn(turn): - yield event - -# NOTE: A runtime ``warnings.warn(..., DeprecationWarning)`` is intentionally -# omitted here. The repo's pyproject ``filterwarnings = ["error"]`` would turn -# it into a test/caller failure, and the async helper (``stream_pydantic_ai_events``) -# still threads this handler through for existing callers that lack a ``trace_id`` -# on the async path. The runtime warning and caller migration are deferred until -# ``trace_id`` threading lands on the async helper in a future API-versioning change. - -Mirrors the LangGraph tracing handler pattern: the caller creates a handler -bound to a ``trace_id`` and a ``parent_span_id``, then hands it to -``stream_pydantic_ai_events(..., tracing_handler=handler)``. The streamer -calls ``on_tool_start`` / ``on_tool_end`` as it observes the corresponding -events in the agent stream, and the handler records one Agentex child span -per tool call. - -Why a handler-on-the-streamer rather than an OpenTelemetry bridge: -pydantic-ai exposes its stream of ``AgentStreamEvent`` directly, and that -stream already contains every signal we need to record tool spans. Going -through an OTel processor would require setting up an OTel ``TracerProvider`` -plus a bridge processor — that's a much larger investment, and orthogonal -to the streaming path we already own. This handler hooks into the same -event stream the UI-streaming helper consumes, so a single pass over the -events produces both: live deltas on Redis and child spans on the AgentEx -tracing pipeline. - -Why span IDs are derived from ``tool_call_id`` instead of held in a dict: -pydantic-ai's ``TemporalAgent`` splits the agent run across one or more -Temporal activities. The ``event_stream_handler`` is invoked once per -activity, with a fresh handler instance each time. So ``on_tool_start`` -(emitted inside the model activity that issued the tool call) and -``on_tool_end`` (emitted inside the next model activity, after the tool -runs) land in different handler instances — an in-memory dict can't pair -them. Deriving the span ID deterministically from ``(trace_id, -tool_call_id)`` makes the open/close pairing stateless: ``on_tool_end`` -re-derives the same ID and PATCHes the existing span directly. - -Span hierarchy produced:: - - (e.g. "Turn N", created by the caller) - ├── tool: (one child span per tool call) - └── tool: -""" - -from __future__ import annotations - -import uuid -from typing import Any -from datetime import UTC, datetime - -from agentex import AsyncAgentex -from agentex.lib.utils.logging import make_logger -from agentex.lib.adk._modules.tracing import TracingModule -from agentex.lib.adk.utils._modules.client import create_async_agentex_client - -logger = make_logger(__name__) - - -# Stable namespace for deriving tool-call span IDs. The exact UUID value is -# arbitrary; it just needs to be a constant so the same (trace_id, tool_call_id) -# always maps to the same span ID across handler invocations. -_TOOL_SPAN_NAMESPACE = uuid.UUID("8c2f9a2b-3e4d-4b5a-9c1f-0a1b2c3d4e5f") - - -def _tool_span_id(trace_id: str, tool_call_id: str) -> str: - """Deterministic span ID for a given tool call within a trace.""" - return str(uuid.uuid5(_TOOL_SPAN_NAMESPACE, f"{trace_id}:{tool_call_id}")) - - -class AgentexPydanticAITracingHandler: - """Records Agentex tracing spans for tool calls observed in a pydantic-ai event stream. - - .. deprecated:: - Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which - derives tool and reasoning spans from the canonical ``StreamTaskMessage*`` - stream automatically when ``trace_id`` is provided. This class remains - fully functional but will be removed in a future release. New code should - use ``UnifiedEmitter`` with a trace context instead of constructing this - handler directly. - - Pass an instance to ``stream_pydantic_ai_events(..., tracing_handler=...)`` - or call ``on_tool_start`` / ``on_tool_end`` yourself if you're consuming - the event stream by hand. - """ - - def __init__( - self, - trace_id: str, - parent_span_id: str | None = None, - task_id: str | None = None, - tracing: TracingModule | None = None, - client: AsyncAgentex | None = None, - ) -> None: - self._trace_id = trace_id - self._parent_span_id = parent_span_id - # task_id on the span record (separate from trace_id) is what the - # AgentEx UI's per-task spans dropdown filters by. If you want your - # tool spans visible in that dropdown, set this to the task ID. - self._task_id = task_id - # ``_tracing`` is retained for callers / tests that want to inject a - # mocked TracingModule, even though the on_tool_* methods now go - # direct to the AgentEx client (see module docstring for why). - self._tracing_eager = tracing - self._tracing_lazy: TracingModule | None = None - # Defer client construction until first use so httpx binds to the - # running event loop (matches the TracingModule pattern). - self._client_eager = client - self._client_lazy: AsyncAgentex | None = None - - @property - def _tracing(self) -> TracingModule: - if self._tracing_eager is not None: - return self._tracing_eager - if self._tracing_lazy is None: - self._tracing_lazy = TracingModule() - return self._tracing_lazy - - @property - def _client(self) -> AsyncAgentex: - if self._client_eager is not None: - return self._client_eager - if self._client_lazy is None: - self._client_lazy = create_async_agentex_client() - return self._client_lazy - - async def on_tool_start( - self, - tool_call_id: str, - tool_name: str, - arguments: dict[str, Any] | str | None, - ) -> None: - """Open a child span for a tool call. - - Uses a deterministic span ID derived from ``tool_call_id`` so that - ``on_tool_end`` — which may run inside a different handler instance - when pydantic-ai splits the run across Temporal activities — can - close the same span without needing in-memory state. - """ - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.create( - id=span_id, - trace_id=self._trace_id, - task_id=self._task_id, - parent_id=self._parent_span_id, - name=f"tool:{tool_name}" if tool_name else "tool", - start_time=datetime.now(UTC), - input={"arguments": arguments}, - data={"__span_type__": "CUSTOM"}, - ) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - """Close a child span by PATCHing its end_time and output. - - Re-derives the deterministic span ID from ``tool_call_id`` and updates - the existing span record directly. No in-memory span lookup, so this - works even when ``on_tool_start`` ran inside a different handler - instance (e.g. across pydantic-ai TemporalAgent activity boundaries). - """ - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.update( - span_id, - end_time=datetime.now(UTC), - output={"result": result}, - ) - - async def on_tool_error(self, tool_call_id: str, error: BaseException | str) -> None: - """Close a child span with an error payload as output.""" - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.update( - span_id, - end_time=datetime.now(UTC), - output={"error": str(error)}, - ) - - -def create_pydantic_ai_tracing_handler( - trace_id: str, - parent_span_id: str | None = None, - task_id: str | None = None, -) -> AgentexPydanticAITracingHandler: - """Create a tracing handler that records Agentex spans for pydantic-ai tool calls. - - .. deprecated:: - Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which - derives tool and reasoning spans from the canonical ``StreamTaskMessage*`` - stream automatically when ``trace_id`` is provided. This function remains - fully functional but will be removed in a future release. New code should - construct a ``UnifiedEmitter`` with a trace context instead. - - Args: - trace_id: The trace ID. Typically the Agentex task ID. - parent_span_id: Optional parent span ID to nest tool spans under. If - omitted, the tool spans become trace-root spans. - task_id: Optional task ID stamped onto each span. Required for the - AgentEx UI's per-task spans dropdown to display the spans. - - Returns: - A handler suitable for passing to ``stream_pydantic_ai_events(..., tracing_handler=...)``. - """ - return AgentexPydanticAITracingHandler( - trace_id=trace_id, - parent_span_id=parent_span_id, - task_id=task_id, - ) diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py index b06172e7f..4e9340d7a 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, AsyncIterator +from typing import Any, AsyncIterator from pydantic_ai.run import AgentRunResultEvent @@ -28,9 +28,6 @@ ) from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import AgentexPydanticAITracingHandler - StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone @@ -83,19 +80,17 @@ class PydanticAITurn: ``events`` is identical to the bare ``convert_pydantic_ai_to_agentex_events`` output (tool calls stream as ``Start + ToolRequestDelta + Done``, preserving argument-token streaming on the sync/yield channel). The foundation - ``auto_send`` delivers the streamed tool-request shape natively (AGX1-377), - so no coalescing is needed on either channel. + ``auto_send`` delivers the streamed tool-request shape natively, so no + coalescing is needed on either channel. """ def __init__( self, stream: AsyncIterator[Any], model: str | None = None, - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, ) -> None: self._stream = stream self._model = model - self._tracing_handler = tracing_handler self._usage = TurnUsage(model=model) @property @@ -119,7 +114,6 @@ def _capture(result_event: AgentRunResultEvent) -> None: raw_stream = convert_pydantic_ai_to_agentex_events( self._stream, - tracing_handler=self._tracing_handler, on_result=_capture, ) async for ev in raw_stream: @@ -132,3 +126,48 @@ def usage(self) -> TurnUsage: Before exhaustion the model field is set but token fields are None. """ return self._usage + + +async def stream_pydantic_ai_events( + stream, + task_id: str, +) -> str: + """Stream Pydantic AI events to Agentex via Redis. + + Consumes a Pydantic AI ``agent.run_stream_events(...)`` async iterator and + pushes Agentex streaming updates to Redis via the ``adk.streaming`` + contexts. For use with async ACP agents that stream via Redis rather than + HTTP yields. + + Text and thinking tokens stream as deltas inside coalesced streaming + contexts. Tool requests and tool results are posted as open+close pairs + on a streaming context (the unified surface persists ``initial_content`` + when a context is closed without deltas). This matches the ``auto_send`` + convention used by all other async/Temporal harnesses. + + Tracing is derived automatically from the event stream by the emitter when + a ``trace_id`` is provided to the ``UnifiedEmitter``. + + Args: + stream: Async iterator yielded by ``agent.run_stream_events(...)``. + task_id: The Agentex task ID to stream messages to. + + Returns: + The accumulated text content of the **last** text part in the run. + Multi-step runs (where the model emits text, then a tool call, then + more text) return only the final text segment, matching the + ``stream_langgraph_events`` convention. + """ + from agentex.lib.core.harness.emitter import UnifiedEmitter + + turn = PydanticAITurn( + stream, + model=None, + ) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=None, + parent_span_id=None, + ) + result = await emitter.auto_send_turn(turn) + return result.final_text diff --git a/src/agentex/lib/adk/providers/_modules/openai_turn.py b/src/agentex/lib/adk/providers/_modules/openai_turn.py index 17a6518ee..320642dfc 100644 --- a/src/agentex/lib/adk/providers/_modules/openai_turn.py +++ b/src/agentex/lib/adk/providers/_modules/openai_turn.py @@ -1,134 +1,12 @@ -"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface. +"""Back-compat shim: ``OpenAITurn`` and ``openai_usage_to_turn_usage`` now live +in ``agentex.lib.adk._modules._openai_turn``. -A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus -normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from -``Runner.run_streamed``), converts its native OpenAI events into the canonical -stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the -run's ``raw_responses`` to aggregate usage into a provider-independent -``TurnUsage``. - -Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this -module is purely the provider->canonical adapter. +Existing importers of +``agentex.lib.adk.providers._modules.openai_turn.{OpenAITurn,openai_usage_to_turn_usage}`` +keep working. """ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, AsyncIterator - -from agents.usage import Usage - -from agentex.lib.utils.logging import make_logger -from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage -from agentex.lib.adk.providers._modules.sync_provider import ( - convert_openai_to_agentex_events, +from agentex.lib.adk._modules._openai_turn import ( # noqa: F401 + OpenAITurn, + openai_usage_to_turn_usage, ) - -if TYPE_CHECKING: - from agents import ModelResponse, RunResultStreaming - -logger = make_logger(__name__) - - -def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage: - """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``. - - All field access is defensive (``getattr(..., None)``): different model - backends populate different subsets of the usage object, and real zeros are - valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce - a present-but-zero value into ``None``. - """ - if usage is None: - return TurnUsage(model=model) - - input_details = getattr(usage, "input_tokens_details", None) - output_details = getattr(usage, "output_tokens_details", None) - - return TurnUsage( - model=model, - num_llm_calls=getattr(usage, "requests", None) or 0, - input_tokens=getattr(usage, "input_tokens", None), - cached_input_tokens=getattr(input_details, "cached_tokens", None), - output_tokens=getattr(usage, "output_tokens", None), - reasoning_tokens=getattr(output_details, "reasoning_tokens", None), - total_tokens=getattr(usage, "total_tokens", None), - ) - - -def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None: - """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list. - - Returns ``None`` when no response carries usage so the caller can emit a - usage object with only the model name set. ``Usage.add`` accumulates - requests/tokens (including cached/reasoning detail fields). - """ - total: Usage | None = None - for response in raw_responses: - resp_usage = getattr(response, "usage", None) - if resp_usage is None: - continue - if total is None: - total = Usage() - total.add(resp_usage) - return total - - -class OpenAITurn: - """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol. - - Construct with exactly one of: - - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its - ``stream_events()`` is converted to the canonical stream, and after the - stream is exhausted ``raw_responses`` is read to compute usage. - - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage`` - events (bypasses ``convert_openai_to_agentex_events``). Useful for tests - and for callers that have already produced canonical events. Usage stays - at ``TurnUsage(model=...)`` because there is no run to read usage from. - - ``coalesce_tool_requests`` is accepted for API parity with other provider - turns but is a no-op for OpenAI: the OpenAI converter already emits a single - ``Full(ToolRequestContent)`` per tool call rather than streamed argument - deltas, so there is nothing to coalesce. - """ - - def __init__( - self, - result: RunResultStreaming | None = None, - model: str | None = None, - stream: AsyncIterator[StreamTaskMessage] | None = None, - coalesce_tool_requests: bool = False, # noqa: ARG002 - API parity, no-op for OpenAI - ) -> None: - if result is None and stream is None: - raise ValueError("OpenAITurn requires either `result` or `stream`") - self._result = result - self._model = model - self._stream = stream - self._usage: TurnUsage = TurnUsage(model=model) - - @property - def events(self) -> AsyncIterator[StreamTaskMessage]: - return self._iter_events() - - async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]: - if self._stream is not None: - async for event in self._stream: - yield event - return - - result = self._result - assert result is not None # guaranteed by __init__ - async for event in convert_openai_to_agentex_events(result.stream_events()): - yield event - - # Stream is exhausted: the run has finished and raw_responses is now - # populated, so usage can be aggregated and normalized. - try: - raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or []) - aggregated = _aggregate_usage(raw_responses) - self._usage = openai_usage_to_turn_usage(aggregated, self._model) - except Exception as exc: # pragma: no cover - defensive: never break delivery on usage - logger.warning(f"Failed to aggregate OpenAI usage: {exc}") - self._usage = TurnUsage(model=self._model) - - def usage(self) -> TurnUsage: - """Normalized turn usage. Valid only after ``events`` is exhausted.""" - return self._usage diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py index d1d5e1c09..86696a2b5 100644 --- a/src/agentex/lib/adk/providers/_modules/sync_provider.py +++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py @@ -14,37 +14,11 @@ TResponseInputItem, AgentOutputSchemaBase, ) -from openai.types.responses import ( - ResponseTextDeltaEvent, - ResponseFunctionToolCall, - ResponseFunctionWebSearch, - ResponseOutputItemDoneEvent, - ResponseOutputItemAddedEvent, - ResponseCodeInterpreterToolCall, - ResponseReasoningSummaryPartAddedEvent, - ResponseReasoningSummaryTextDeltaEvent, -) from agents.models.openai_provider import OpenAIProvider -from openai.types.responses.response_reasoning_text_done_event import ResponseReasoningTextDoneEvent -from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent -from openai.types.responses.response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent from agentex import AsyncAgentex from agentex.lib.utils.logging import make_logger from agentex.lib.core.tracing.tracer import AsyncTracer -from agentex.types.reasoning_content import ReasoningContent -from agentex.types.task_message_delta import TextDelta -from agentex.types.task_message_update import ( - StreamTaskMessageDone, - StreamTaskMessageFull, - StreamTaskMessageDelta, - StreamTaskMessageStart, -) -from agentex.types.task_message_content import TextContent -from agentex.types.tool_request_content import ToolRequestContent -from agentex.types.tool_response_content import ToolResponseContent -from agentex.types.reasoning_content_delta import ReasoningContentDelta -from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta logger = make_logger(__name__) @@ -95,10 +69,10 @@ class SyncStreamingModel(Model): .. deprecated:: Prefer the unified harness surface for new OpenAI Agents integrations: wrap a ``Runner.run_streamed`` result in - ``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` and drive + ``agentex.lib.adk._modules._openai_turn.OpenAITurn`` and drive delivery + tracing through ``UnifiedEmitter`` (see the - ``060_harness_openai`` / ``130_harness_openai`` / ``140_harness_openai`` - tutorials). This per-model tracing wrapper predates the harness and is + ``050_openai_agents`` / ``120_openai_agents`` tutorials). This + per-model tracing wrapper predates the harness and is retained only for backwards compatibility; it will be removed in a future release. No runtime warning is emitted. """ @@ -406,329 +380,8 @@ def get_model(self, model_name: Optional[str] = None) -> Model: return wrapped_model -def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]: - """ - Extract call_id, tool_name, and tool_arguments from a tool call item. - Args: - tool_call_item: The tool call item to process - Returns: - A tuple of (call_id, tool_name, tool_arguments) - """ - # Generic handling for different tool call types - # Try 'call_id' first, then 'id', then generate placeholder - if hasattr(tool_call_item, "call_id"): - call_id = tool_call_item.call_id - elif hasattr(tool_call_item, "id"): - call_id = tool_call_item.id - else: - call_id = f"unknown_call_{id(tool_call_item)}" - - if isinstance(tool_call_item, ResponseFunctionWebSearch): - tool_name = "web_search" - tool_arguments = {"action": tool_call_item.action.model_dump(), "status": tool_call_item.status} - elif isinstance(tool_call_item, ResponseCodeInterpreterToolCall): - tool_name = "code_interpreter" - tool_arguments = {"code": tool_call_item.code, "status": tool_call_item.status} - elif isinstance(tool_call_item, ResponseFunctionToolCall): - # Handle standard function tool calls - tool_name = tool_call_item.name - # Handle the arguments field which might be a string or None - if tool_call_item.arguments: - if isinstance(tool_call_item.arguments, str): - import json - - tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {} - else: - tool_arguments = tool_call_item.arguments - else: - tool_arguments = {} - else: - # Generic handling for any tool call type - tool_name = getattr(tool_call_item, "name", type(tool_call_item).__name__) - # Handle the arguments field which might be a string or None - if hasattr(tool_call_item, "arguments"): - arguments = tool_call_item.arguments - if isinstance(arguments, str): - import json - - tool_arguments = json.loads(arguments) if arguments else {} - elif arguments is None: - tool_arguments = {} - else: - tool_arguments = arguments - else: - tool_arguments = tool_call_item.model_dump() - - return call_id, tool_name, tool_arguments - - -def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any) -> tuple[str, str, str]: - """ - Extract call_id, tool_name, and content from a tool output item. - Args: - tool_map: Dictionary mapping call_ids to tool names - tool_output_item: The tool output item to process - Returns: - A tuple of (call_id, tool_name, content) - """ - - # Handle different formats of tool_output_item - if isinstance(tool_output_item, dict): - call_id = tool_output_item.get("call_id", tool_output_item.get("id", f"unknown_call_{id(tool_output_item)}")) - content = tool_output_item.get("output", str(tool_output_item)) - else: - # Try to get call_id from attributes - if hasattr(tool_output_item, "call_id"): - call_id = tool_output_item.call_id - elif hasattr(tool_output_item, "id"): - call_id = tool_output_item.id - else: - call_id = f"unknown_call_{id(tool_output_item)}" - - # Get content - if hasattr(tool_output_item, "output"): - content = tool_output_item.output - else: - content = str(tool_output_item) - - # Get tool name from map - tool_name = tool_map.get(call_id, "unknown_tool") - - return call_id, tool_name, content - - -async def convert_openai_to_agentex_events(stream_response): - """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support. - - This is an enhanced version of the base converter that includes support for: - - Reasoning content deltas (for o1 models) - - Reasoning summary deltas (for o1 models) - - Args: - stream_response: An async iterator of OpenAI streaming events - Yields: - TaskMessageUpdate: AgentEx streaming events (StreamTaskMessageDelta, StreamTaskMessageFull, or StreamTaskMessageDone) - """ - - tool_map = {} - event_count = 0 - message_index = 0 # Track message index for proper sequencing - seen_tool_output = False # Track if we've seen tool output to know when final text starts - item_id_to_index = {} # Map item_id to message index - item_id_to_type = {} # Map item_id to content type (text, reasoning_content, reasoning_summary) - - async for event in stream_response: - event_count += 1 - - # Check for raw response events which contain the actual OpenAI streaming events - if hasattr(event, "type") and event.type == "raw_response_event": - if hasattr(event, "data"): - raw_event = event.data - - # Check for ResponseOutputItemAddedEvent which signals a new message starting - if isinstance(raw_event, ResponseOutputItemAddedEvent): - # Don't increment here - we'll increment when we see the actual text delta - # This is just a signal that a new message is starting - pass - - # Handle item completion - send done event to close the message - elif isinstance(raw_event, ResponseOutputItemDoneEvent): - item_id = raw_event.item.id - if item_id in item_id_to_index: - # Get the message type to decide whether to send done event - message_type = item_id_to_type.get(item_id, "text") - - # Don't send done events for reasoning content/summary - # They just end with their last delta - if message_type not in ("reasoning_content", "reasoning_summary"): - yield StreamTaskMessageDone( - type="done", - index=item_id_to_index[item_id], - ) - - # Skip reasoning summary part added events - we handle them on delta - elif isinstance(raw_event, ResponseReasoningSummaryPartAddedEvent): - pass - - # Handle reasoning summary text delta events - elif isinstance(raw_event, ResponseReasoningSummaryTextDeltaEvent): - item_id = raw_event.item_id - summary_index = raw_event.summary_index - - # If this is a new item_id we haven't seen, create a new message - if item_id and item_id not in item_id_to_index: - message_index += 1 - item_id_to_index[item_id] = message_index - item_id_to_type[item_id] = "reasoning_summary" - - # Send a start event for this new reasoning summary message. - # The start content must be ReasoningContent (not TextContent) - # so consumers that branch on the start event's content type - # render a reasoning/thinking indicator; the final persisted - # content is rebuilt from the reasoning deltas regardless. - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=ReasoningContent( - type="reasoning", - author="agent", - summary=[], - content=[], - style="active", - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - # Yield reasoning summary delta - yield StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=ReasoningSummaryDelta( - type="reasoning_summary", - summary_index=summary_index, - summary_delta=raw_event.delta, - ), - ) - - # Handle reasoning summary text done events - elif isinstance(raw_event, ResponseReasoningSummaryTextDoneEvent): - # We do NOT close the streaming context here - # as there can be multiple reasoning summaries. - # The context will be closed when the entire - # output item is done (ResponseOutputItemDoneEvent) - pass - - # Handle reasoning content text delta events - elif isinstance(raw_event, ResponseReasoningTextDeltaEvent): - item_id = raw_event.item_id - content_index = raw_event.content_index - - # If this is a new item_id we haven't seen, create a new message - if item_id and item_id not in item_id_to_index: - message_index += 1 - item_id_to_index[item_id] = message_index - item_id_to_type[item_id] = "reasoning_content" - - # Send a start event for this new reasoning content message. - # The start content must be ReasoningContent (not TextContent) - # so consumers that branch on the start event's content type - # render a reasoning/thinking indicator; the final persisted - # content is rebuilt from the reasoning deltas regardless. - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=ReasoningContent( - type="reasoning", - author="agent", - summary=[], - content=[], - style="active", - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - # Yield reasoning content delta - yield StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=ReasoningContentDelta( - type="reasoning_content", - content_index=content_index, - content_delta=raw_event.delta, - ), - ) - - # Handle reasoning content text done events - elif isinstance(raw_event, ResponseReasoningTextDoneEvent): - # We do NOT close the streaming context here - # as there can be multiple reasoning content texts. - # The context will be closed when the entire - # output item is done (ResponseOutputItemDoneEvent) - pass - - # Check if this is a text delta event from OpenAI - elif isinstance(raw_event, ResponseTextDeltaEvent): - # Check if this event has an item_id - item_id = getattr(raw_event, "item_id", None) - - # If this is a new item_id we haven't seen, it's a new message - if item_id and item_id not in item_id_to_index: - # Check if this is truly a NEW text message after tools - # We need to differentiate between the first text and the final text after tools - if seen_tool_output: - # This is the final text message after tool execution - message_index += 1 - item_id_to_index[item_id] = message_index - else: - item_id_to_index[item_id] = message_index - - item_id_to_type[item_id] = "text" - - # Send a start event with empty content for this new text message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content, deltas will fill it - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - delta_message = StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=TextDelta( - type="text", - text_delta=raw_event.delta, - ), - ) - yield delta_message - - elif hasattr(event, "type") and event.type == "run_item_stream_event": - # Skip reasoning_item events - they're handled via raw_response_event above - if hasattr(event, "item") and event.item.type == "reasoning_item": - continue - - # Check for tool_call_item type (this is when a tool is being called) - elif hasattr(event, "item") and event.item.type == "tool_call_item": - # Extract tool call information using the helper method - call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item) - tool_map[call_id] = tool_name - tool_request_content = ToolRequestContent( - tool_call_id=call_id, - name=tool_name, - arguments=tool_arguments, - author="agent", - ) - message_index += 1 # Increment for new message - yield StreamTaskMessageFull( - index=message_index, - type="full", - content=tool_request_content, - ) - - # Check for tool_call_output_item type (this is when a tool returns output) - elif hasattr(event, "item") and event.item.type == "tool_call_output_item": - # Extract tool response information using the helper method - call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item) - tool_response_content = ToolResponseContent( - tool_call_id=call_id, - name=tool_name, - content=content, - author="agent", - ) - message_index += 1 # Increment for new message - seen_tool_output = True # Mark that we've seen tool output so next text gets new index - yield StreamTaskMessageFull( - type="full", - index=message_index, - content=tool_response_content, - ) +# The OpenAI streaming tap ``convert_openai_to_agentex_events`` now lives in +# ``agentex.lib.adk._modules._openai_sync``; re-exported here for back-compat. +from agentex.lib.adk._modules._openai_sync import ( # noqa: E402 + convert_openai_to_agentex_events as convert_openai_to_agentex_events, +) diff --git a/src/agentex/lib/cli/commands/init.py b/src/agentex/lib/cli/commands/init.py index 307a5d0e8..9849e9bbc 100644 --- a/src/agentex/lib/cli/commands/init.py +++ b/src/agentex/lib/cli/commands/init.py @@ -26,14 +26,21 @@ class TemplateType(str, Enum): TEMPORAL_OPENAI_AGENTS = "temporal-openai-agents" TEMPORAL_PYDANTIC_AI = "temporal-pydantic-ai" TEMPORAL_LANGGRAPH = "temporal-langgraph" + TEMPORAL_CLAUDE_CODE = "temporal-claude-code" + TEMPORAL_CODEX = "temporal-codex" DEFAULT = "default" DEFAULT_LANGGRAPH = "default-langgraph" DEFAULT_PYDANTIC_AI = "default-pydantic-ai" + DEFAULT_OPENAI_AGENTS = "default-openai-agents" + DEFAULT_CLAUDE_CODE = "default-claude-code" + DEFAULT_CODEX = "default-codex" SYNC = "sync" SYNC_OPENAI_AGENTS = "sync-openai-agents" SYNC_OPENAI_AGENTS_LOCAL_SANDBOX = "sync-openai-agents-local-sandbox" SYNC_LANGGRAPH = "sync-langgraph" SYNC_PYDANTIC_AI = "sync-pydantic-ai" + SYNC_CLAUDE_CODE = "sync-claude-code" + SYNC_CODEX = "sync-codex" def render_template( @@ -66,14 +73,21 @@ def create_project_structure( TemplateType.TEMPORAL_OPENAI_AGENTS: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], TemplateType.TEMPORAL_PYDANTIC_AI: ["acp.py", "workflow.py", "run_worker.py", "agent.py", "tools.py"], TemplateType.TEMPORAL_LANGGRAPH: ["acp.py", "workflow.py", "run_worker.py", "graph.py", "tools.py"], + TemplateType.TEMPORAL_CLAUDE_CODE: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], + TemplateType.TEMPORAL_CODEX: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], TemplateType.DEFAULT: ["acp.py"], TemplateType.DEFAULT_LANGGRAPH: ["acp.py", "graph.py", "tools.py"], TemplateType.DEFAULT_PYDANTIC_AI: ["acp.py", "agent.py", "tools.py"], + TemplateType.DEFAULT_OPENAI_AGENTS: ["acp.py"], + TemplateType.DEFAULT_CLAUDE_CODE: ["acp.py"], + TemplateType.DEFAULT_CODEX: ["acp.py"], TemplateType.SYNC: ["acp.py"], TemplateType.SYNC_OPENAI_AGENTS: ["acp.py"], TemplateType.SYNC_OPENAI_AGENTS_LOCAL_SANDBOX: ["acp.py", "agent.py", "tools.py"], TemplateType.SYNC_LANGGRAPH: ["acp.py", "graph.py", "tools.py"], TemplateType.SYNC_PYDANTIC_AI: ["acp.py", "agent.py", "tools.py"], + TemplateType.SYNC_CLAUDE_CODE: ["acp.py"], + TemplateType.SYNC_CODEX: ["acp.py"], }[template_type] # Create project/code files @@ -184,8 +198,11 @@ def validate_agent_name(text: str) -> bool | str: "Which Async template would you like to use?", choices=[ {"name": "Basic Async ACP", "value": TemplateType.DEFAULT}, + {"name": "Async ACP + OpenAI Agents SDK", "value": TemplateType.DEFAULT_OPENAI_AGENTS}, {"name": "Async ACP + LangGraph", "value": TemplateType.DEFAULT_LANGGRAPH}, {"name": "Async ACP + Pydantic AI", "value": TemplateType.DEFAULT_PYDANTIC_AI}, + {"name": "Async ACP + Claude Code", "value": TemplateType.DEFAULT_CLAUDE_CODE}, + {"name": "Async ACP + Codex", "value": TemplateType.DEFAULT_CODEX}, ], ).ask() if not template_type: @@ -198,6 +215,8 @@ def validate_agent_name(text: str) -> bool | str: {"name": "Temporal + OpenAI Agents SDK (Recommended)", "value": TemplateType.TEMPORAL_OPENAI_AGENTS}, {"name": "Temporal + Pydantic AI", "value": TemplateType.TEMPORAL_PYDANTIC_AI}, {"name": "Temporal + LangGraph", "value": TemplateType.TEMPORAL_LANGGRAPH}, + {"name": "Temporal + Claude Code", "value": TemplateType.TEMPORAL_CLAUDE_CODE}, + {"name": "Temporal + Codex", "value": TemplateType.TEMPORAL_CODEX}, ], ).ask() if not template_type: @@ -211,6 +230,8 @@ def validate_agent_name(text: str) -> bool | str: {"name": "Sync ACP + OpenAI Agents SDK + Local Sandbox", "value": TemplateType.SYNC_OPENAI_AGENTS_LOCAL_SANDBOX}, {"name": "Sync ACP + LangGraph", "value": TemplateType.SYNC_LANGGRAPH}, {"name": "Sync ACP + Pydantic AI", "value": TemplateType.SYNC_PYDANTIC_AI}, + {"name": "Sync ACP + Claude Code", "value": TemplateType.SYNC_CLAUDE_CODE}, + {"name": "Sync ACP + Codex", "value": TemplateType.SYNC_CODEX}, ], ).ask() if not template_type: diff --git a/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 new file mode 100644 index 000000000..5aff34a60 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for the Claude Code CLI (the `claude` subprocess this agent spawns) +ANTHROPIC_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..93d0f82d1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,51 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install the Claude Code CLI: the agent shells out to `claude` on every turn, +# so the binary must be present in the runtime image. +RUN npm install -g @anthropic-ai/claude-code + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..d714d96f9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 @@ -0,0 +1,46 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install the Claude Code CLI: the agent shells out to `claude` on every turn, +# so the binary must be present in the runtime image. +RUN npm install -g @anthropic-ai/claude-code + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 new file mode 100644 index 000000000..ab05398e3 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 @@ -0,0 +1,64 @@ +# {{ agent_name }} - AgentEx Async Claude Code Agent + +This template builds an **asynchronous** (non-Temporal) agent that drives the +**Claude Code CLI** through the unified harness surface on AgentEx: +- Spawns `claude -p --output-format stream-json --verbose` as a local subprocess +- Wraps the CLI's stdout stream in a `ClaudeCodeTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler spawns the Claude Code CLI and pushes the +harness events to the task stream. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/acp.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..f802776f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..ee08bc91b --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 @@ -0,0 +1,123 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + # The Claude Code CLI authenticates with ANTHROPIC_API_KEY (LITELLM_API_KEY + # is not read by the `claude` subprocess this agent spawns). + - env_var_name: ANTHROPIC_API_KEY + secret_name: anthropic-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on. ANTHROPIC_API_KEY is supplied via the credential + # mapping above (deploy) or your local .env (load_dotenv). Do NOT set it to an + # empty string here — that would shadow the real key at runtime. + env: {} + # ANTHROPIC_API_KEY: "" # uncomment only to hardcode for local runs + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..42512c601 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 @@ -0,0 +1,167 @@ +"""ACP handler for {{ agent_name }} — an async Claude Code agent. + +Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL +asyncio subprocess (no Scale sandbox — that is a production concern). Stdout +lines are fed into ``ClaudeCodeTurn``. Events are delivered via +``UnifiedEmitter.auto_send_turn``, the async Redis push path. + +Live runs require the ``claude`` CLI to be installed and an +ANTHROPIC_API_KEY (or equivalent credential) in the environment. +""" + +from __future__ import annotations + +import os +import asyncio +from typing import AsyncIterator +from collections import deque + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + + +async def _spawn_claude(prompt: str) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + Injectable seam: tests can monkeypatch this with a fake async iterator of + pre-recorded lines so no real CLI invocation is needed offline. + """ + proc = await asyncio.create_subprocess_exec( + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + await proc.stdin.drain() + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. We keep a + # bounded tail so a non-zero exit can be surfaced with context instead of + # silently completing the turn. + stderr_tail: deque[str] = deque(maxlen=20) + + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for raw in proc.stderr: + text = raw.decode("utf-8", errors="replace").rstrip() + if text: + stderr_tail.append(text) + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + if proc.returncode: + # The CLI failed (missing binary/auth, bad command). Raise so the + # turn surfaces as failed instead of completing with no output. + tail = "\n".join(stderr_tail) + raise RuntimeError( + f"claude CLI exited with status {proc.returncode}:\n{tail}" + ) + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + logger.info("Task created: %s", params.task.id) + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle a user message: spawn Claude Code locally and push events to the task stream.""" + task_id = params.task.id + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + prompt = content.content + logger.info("Processing message for task %s", task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": prompt}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = ClaudeCodeTurn(_spawn_claude(prompt)) + result = await emitter.auto_send_turn(turn) + if turn_span: + turn_span.output = {"final_text": result.final_text} + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info("Task canceled: %s", params.task.id) diff --git a/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-codex/.env.example.j2 b/src/agentex/lib/cli/templates/default-codex/.env.example.j2 new file mode 100644 index 000000000..5d621a83e --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key used by the codex CLI (`codex exec` reads OPENAI_API_KEY directly) +OPENAI_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..02860b9b9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 @@ -0,0 +1,51 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install the codex CLI: the agent shells out to `codex` on every turn, so the +# binary must be present in the runtime image. +RUN npm install -g @openai/codex + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 new file mode 100644 index 000000000..1a8eb1484 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 @@ -0,0 +1,46 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install the codex CLI: the agent shells out to `codex` on every turn, so the +# binary must be present in the runtime image. +RUN npm install -g @openai/codex + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/README.md.j2 b/src/agentex/lib/cli/templates/default-codex/README.md.j2 new file mode 100644 index 000000000..b82f1c5f2 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/README.md.j2 @@ -0,0 +1,72 @@ +# {{ agent_name }} - AgentEx Async Codex Agent + +This template builds an **asynchronous** (non-Temporal) agent that drives the +**Codex CLI** through the unified harness surface on AgentEx: +- Spawns `codex exec --json` as a local subprocess +- Wraps the CLI's stdout stream in a `CodexTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Persists the codex session/thread ID via `adk.state` for multi-turn memory +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, state, and event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler spawns the Codex CLI and pushes the harness +events to the task stream. + +### Multi-turn memory +The codex session/thread ID is persisted via `adk.state`, so each new turn +resumes the same codex session with `codex exec resume `. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/acp.py` to change the CLI flags or how the +prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 new file mode 100644 index 000000000..f802776f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 new file mode 100644 index 000000000..3c894318f --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 @@ -0,0 +1,123 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + # The codex CLI (`codex exec`) reads OPENAI_API_KEY directly; it does not + # use a LiteLLM key. + - env_var_name: OPENAI_API_KEY + secret_name: openai-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on. OPENAI_API_KEY is supplied via the credential + # mapping above (deploy) or your local .env. Do NOT set it to an empty string + # here — that would shadow the real key at runtime. + env: {} + # OPENAI_BASE_URL: "" + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 new file mode 100644 index 000000000..f676ef137 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 @@ -0,0 +1,271 @@ +"""Async (base) ACP handler for {{ agent_name }} — a Codex CLI harness agent. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for an async (Redis-streaming) ACP agent without Temporal. + +The handler: +1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox). + This is correct for local development; production isolation is a separate + concern. +2. Wraps the stdout line stream in a ``CodexTurn``. +3. Delivers every canonical ``StreamTaskMessage*`` event to Redis via + ``UnifiedEmitter.auto_send_turn``, so the UI receives tokens in real time. +4. Multi-turn memory is persisted via ``adk.state``. + +Live runs require: +- ``codex`` CLI on PATH (``npm install -g @openai/codex``) +- ``OPENAI_API_KEY`` set in the environment +""" + +from __future__ import annotations + +import os +import time +import codecs +import asyncio +from collections.abc import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import CodexTurn +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.types.text_content import TextContent +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + +# Serialize turns per task. Two ``task/event/send`` calls for the same task can +# otherwise both read the old ``codex_thread_id`` (or ``None``), run independent +# codex turns, and race to overwrite the stored thread id — forking the session. +# A per-task lock keeps turns sequential without blocking other tasks. +_task_locks: dict[str, asyncio.Lock] = {} + + +def _task_lock(task_id: str) -> asyncio.Lock: + lock = _task_locks.get(task_id) + if lock is None: + lock = asyncio.Lock() + _task_locks[task_id] = lock + return lock + + +class ConversationState(BaseModel): + """Per-task conversation state persisted via ``adk.state``. + + We store the codex session/thread ID so subsequent turns can resume the + same codex session via ``codex exec resume ``. + """ + + codex_thread_id: str | None = None + turn_number: int = 0 + + +async def _spawn_codex( + model: str, + thread_id: str | None = None, +) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + When ``thread_id`` is provided the subcommand becomes + ``codex exec ... resume -`` so codex continues the prior + conversation thread. + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + base_flags = [ + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + ] + + if thread_id: + cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"] + else: + cmd = ["codex", "exec", *base_flags, "-"] + + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + """Initialize per-task state on task creation.""" + logger.info("Task created: %s", params.task.id) + await adk.state.create( + task_id=params.task.id, + agent_id=params.agent.id, + state=ConversationState(), + ) + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle each user message: spawn codex, stream events, save thread ID.""" + task_id = params.task.id + agent_id = params.agent.id + + content = params.event.content + if not isinstance(content, TextContent): + logger.warning( + "Ignoring non-text event content (type=%s) for task %s", + getattr(content, "type", "?"), + task_id, + ) + return + user_message = content.content + + logger.info("Processing message for task %s", task_id) + + # Serialize the whole turn (echo + the read-modify-write of + # ``codex_thread_id``) so two concurrent turns on the same task cannot fork + # the codex session or interleave their echoed messages. + lock = _task_lock(task_id) + await lock.acquire() + try: + # Echo inside the lock so this turn's message stays ordered with it. + await adk.messages.create(task_id=task_id, content=content) + + task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) + if task_state is None: + state = ConversationState() + task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) + else: + state = ConversationState.model_validate(task_state.state) + + state.turn_number += 1 + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name=f"Turn {state.turn_number}", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + start_ms = int(time.monotonic() * 1000) + + process = await _spawn_codex(MODEL, thread_id=state.codex_thread_id) + + assert process.stdin is not None + process.stdin.write(user_message.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn( + events=_process_stdout(process), + model=MODEL, + ) + + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + # Guarantee the subprocess is reaped even if auto_send_turn raises + # (e.g. a Redis error); otherwise codex stays blocked writing to a full + # stdout pipe buffer and the OS process leaks until the server restarts. + try: + result = await emitter.auto_send_turn(turn) + finally: + if process.returncode is None: + process.kill() + await process.wait() + + # Record the real wall-clock duration AFTER streaming completes; setting + # it before the stream ran would capture only subprocess spawn overhead. + turn.duration_ms = int(time.monotonic() * 1000) - start_ms + + usage = turn.usage() + + # Persist the codex session id (public accessor; valid post-stream) so the + # next turn resumes the same session. + if turn.session_id: + state.codex_thread_id = turn.session_id + + await adk.state.update( + state_id=task_state.id, + task_id=task_id, + agent_id=agent_id, + state=state, + ) + + if turn_span: + turn_span.output = { + "final_text": result.final_text, + "model": usage.model, + } + finally: + lock.release() + # Evict the lock once released and idle (unlocked, no waiters) so + # ``_task_locks`` stays bounded even if the turn raised. There is no + # await between ``_task_lock()`` and acquiring it, so an unlocked, + # waiter-free lock has no in-flight user. + if not lock.locked() and not getattr(lock, "_waiters", None): + _task_locks.pop(task_id, None) + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info("Task canceled: %s", params.task.id) diff --git a/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-langgraph/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-langgraph/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/default-langgraph/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/default-langgraph/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/default-langgraph/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-langgraph/manifest.yaml.j2 index 2d94ba41c..e6c15cf33 100644 --- a/src/agentex/lib/cli/templates/default-langgraph/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/default-langgraph/manifest.yaml.j2 @@ -65,7 +65,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 index 750a271ad..da5d37905 100644 --- a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 @@ -22,6 +22,7 @@ from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskPa from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent from agentex.lib.adk import LangGraphTurn from project.graph import create_graph @@ -55,7 +56,11 @@ async def handle_task_event_send(params: SendEventParams): """Handle incoming events, streaming tokens and tool calls via Redis.""" graph = await get_graph() task_id = params.task.id - user_message = params.event.content.content + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + user_message = content.content logger.info(f"Processing message for thread {task_id}") diff --git a/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 b/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 b/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 new file mode 100644 index 000000000..dd3035f7b --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 new file mode 100644 index 000000000..056d60b96 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 b/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 new file mode 100644 index 000000000..9611e83bd --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 @@ -0,0 +1,69 @@ +# {{ agent_name }} - AgentEx Async OpenAI Agents SDK Agent + +This template builds an **asynchronous** (non-Temporal) agent built on the +**OpenAI Agents SDK**, delivered through the unified harness surface on AgentEx: +- Defines an OpenAI Agents SDK `Agent` (with an example weather tool) inline in + `acp.py` +- Wraps the SDK run in an `OpenAITurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- An `OPENAI_API_KEY` in your environment (or a `LITELLM_API_KEY`, which is + copied to `OPENAI_API_KEY` for LiteLLM-proxy compatibility) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, agent + tool definitions, event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler runs the OpenAI Agents SDK and pushes the +harness events to the task stream. + +### The unified harness surface +`OpenAITurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes the SDK's streamed run into canonical AgentEx events; the emitter +traces and delivers them. + +## Development + +### 1. Add Your Own Tools +Define new `@function_tool` functions in `project/acp.py` and add them to the +agent's `tools=[...]` list in `create_agent()`. + +### 2. Customize the Agent +Edit `MODEL_NAME` and `INSTRUCTIONS` in `project/acp.py` to change the model or +system prompt. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 new file mode 100644 index 000000000..b0691b1b1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message_update)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message_update)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 new file mode 100644 index 000000000..b633518be --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 @@ -0,0 +1,115 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: [] # Update with your credentials + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} # Update with your environment variables + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 new file mode 100644 index 000000000..66ee31243 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 @@ -0,0 +1,171 @@ +"""ACP handler for {{ agent_name }} — an async OpenAI Agents SDK agent. + +Uses the async ACP model with Redis streaming instead of HTTP yields. The +OpenAI Agents SDK run is wrapped in an ``OpenAITurn`` and pushed to the task +stream via ``UnifiedEmitter.auto_send_turn`` — the async delivery path of the +unified harness surface. ``auto_send_turn`` returns a ``TurnResult`` carrying +the accumulated final text and normalized usage. + +The agent and its tools are defined inline below so this template stays a +single, self-contained ``acp.py``. +""" + +from __future__ import annotations + +import os +from typing import List +from datetime import datetime + +from dotenv import load_dotenv + +load_dotenv() + +from agents import Agent, Runner, function_tool, set_tracing_disabled + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.utils.model_utils import BaseModel +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.lib.adk import OpenAITurn +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +# Disable the openai-agents SDK's native tracer so it doesn't ship traces to +# api.openai.com using OPENAI_API_KEY (which may be a LiteLLM proxy key). +# SGP tracing below still runs via the Agentex tracing manager. +set_tracing_disabled(True) + +logger = make_logger(__name__) + +# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client compatibility. +_litellm_key = os.environ.get("LITELLM_API_KEY") +if _litellm_key and not os.environ.get("OPENAI_API_KEY"): + os.environ["OPENAI_API_KEY"] = _litellm_key + +_sgp_api_key = os.environ.get("SGP_API_KEY", "") +_sgp_account_id = os.environ.get("SGP_ACCOUNT_ID", "") +if _sgp_api_key and _sgp_account_id: + add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=_sgp_api_key, + sgp_account_id=_sgp_account_id, + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) + ) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + +MODEL_NAME = "gpt-4o" +INSTRUCTIONS = """You are a helpful AI assistant with access to tools. + +Current date and time: {timestamp} + +Guidelines: +- Be concise and helpful +- Use the weather tool when the user asks about the weather +- Always report the real tool output back to the user +""" + + +@function_tool +def get_weather(city: str) -> str: + """Get the current weather for a city.""" + return f"The weather in {city} is sunny and 72°F" + + +def create_agent() -> Agent: + """Build and return the OpenAI Agents SDK agent with the weather tool.""" + return Agent( + name="{{ agent_name }}", + model=MODEL_NAME, + instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + tools=[get_weather], + ) + + +def get_agent() -> Agent: + """Build a fresh agent per request so the timestamp in the instructions stays current.""" + return create_agent() + + +class StateModel(BaseModel): + """Per-task conversation state persisted between turns.""" + + input_list: List[dict] + turn_number: int + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + logger.info(f"Task created: {params.task.id}") + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle each user message: run the agent and auto-send its turn.""" + agent = get_agent() + task_id = params.task.id + agent_id = params.agent.id + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + user_message = content.content + + logger.info(f"Processing message for task {task_id}") + + # Echo the user's message into the task history. + await adk.messages.create(task_id=task_id, content=params.event.content) + + # Load (or create) the persisted conversation history for this task so the + # agent can see prior turns, then append the new user message. + task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) + if task_state is None: + state = StateModel(input_list=[], turn_number=0) + task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) + else: + state = StateModel.model_validate(task_state.state) + + state.turn_number += 1 + state.input_list.append({"role": "user", "content": user_message}) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + result = Runner.run_streamed(starting_agent=agent, input=state.input_list) + turn = OpenAITurn(result=result, model=MODEL_NAME) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn_result = await emitter.auto_send_turn(turn) + + # Persist the full conversation history (user + assistant + tool calls) + # so the next turn resumes with complete context. + state.input_list = result.to_input_list() + await adk.state.update( + state_id=task_state.id, + task_id=task_id, + agent_id=agent_id, + state=state, + ) + + if turn_span: + turn_span.output = {"final_output": turn_result.final_text} + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info(f"Task canceled: {params.task.id}") diff --git a/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 new file mode 100644 index 000000000..4b9c7ed71 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 @@ -0,0 +1,34 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "openai-agents", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 new file mode 100644 index 000000000..14779c089 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# OpenAI Agents SDK +openai-agents + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-pydantic-ai/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-pydantic-ai/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/default-pydantic-ai/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/default-pydantic-ai/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/default-pydantic-ai/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-pydantic-ai/manifest.yaml.j2 index 2d94ba41c..e6c15cf33 100644 --- a/src/agentex/lib/cli/templates/default-pydantic-ai/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/default-pydantic-ai/manifest.yaml.j2 @@ -65,7 +65,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 index 11d3ab476..245f9ec38 100644 --- a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 @@ -29,6 +29,7 @@ from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent from agentex.lib.utils.model_utils import BaseModel from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.lib.adk import PydanticAITurn @@ -97,7 +98,11 @@ async def handle_task_event_send(params: SendEventParams): agent = get_agent() task_id = params.task.id agent_id = params.agent.id - user_message = params.event.content.content + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + user_message = content.content logger.info(f"Processing message for task {task_id}") diff --git a/src/agentex/lib/cli/templates/default/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/default/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/default/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/default/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default/manifest.yaml.j2 index 61c9064ed..c78ce1f44 100644 --- a/src/agentex/lib/cli/templates/default/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/default/manifest.yaml.j2 @@ -65,7 +65,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 new file mode 100644 index 000000000..5aff34a60 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for the Claude Code CLI (the `claude` subprocess this agent spawns) +ANTHROPIC_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..93d0f82d1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,51 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install the Claude Code CLI: the agent shells out to `claude` on every turn, +# so the binary must be present in the runtime image. +RUN npm install -g @anthropic-ai/claude-code + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..6cdc70799 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install the Claude Code CLI: the agent shells out to `claude` on every turn, +# so the binary must be present in the runtime image. +RUN npm install -g @anthropic-ai/claude-code + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 new file mode 100644 index 000000000..7e38eddec --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 @@ -0,0 +1,64 @@ +# {{ agent_name }} - AgentEx Sync Claude Code Agent + +This template builds a **synchronous** agent that drives the **Claude Code CLI** +through the unified harness surface on AgentEx: +- Spawns `claude -p --output-format stream-json --verbose` as a local subprocess +- Wraps the CLI's stdout stream in a `ClaudeCodeTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.yield_turn` + (the sync HTTP yield path) +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and message handler +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Sync ACP with the harness +The sync ACP model uses HTTP request/response. The `@acp.on_message_send` +handler spawns the Claude Code CLI and yields the harness events back to the +client as they arrive. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/acp.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..b0691b1b1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message_update)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message_update)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..4432d1a33 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 @@ -0,0 +1,120 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: sync + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + # The Claude Code CLI authenticates with ANTHROPIC_API_KEY (LITELLM_API_KEY + # is not read by the `claude` subprocess this agent spawns). + - env_var_name: ANTHROPIC_API_KEY + secret_name: anthropic-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on. ANTHROPIC_API_KEY is supplied via the credential + # mapping above (deploy) or your local .env (load_dotenv). Do NOT set it to an + # empty string here — that would shadow the real key at runtime. + env: {} + # ANTHROPIC_API_KEY: "" # uncomment only to hardcode for local runs + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..33a89a51e --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 @@ -0,0 +1,155 @@ +"""ACP handler for {{ agent_name }} — a sync Claude Code agent. + +Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL +asyncio subprocess (no Scale sandbox — that is a production concern). Stdout +lines are fed into ``ClaudeCodeTurn``, which wraps +``convert_claude_code_to_agentex_events``. Events are delivered via +``UnifiedEmitter.yield_turn``, the sync HTTP yield path. + +Live runs require the ``claude`` CLI to be installed and an +ANTHROPIC_API_KEY (or equivalent credential) to be in the environment. +""" + +from __future__ import annotations + +import os +import asyncio +from typing import AsyncIterator, AsyncGenerator +from collections import deque + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.types.task_message_update import TaskMessageUpdate +from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create(acp_type="sync") + + +async def _spawn_claude(prompt: str) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + This is a seam: tests can replace it with a fake async iterator of + pre-recorded lines so no real CLI invocation is needed offline. + """ + proc = await asyncio.create_subprocess_exec( + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + await proc.stdin.drain() + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. We keep a + # bounded tail so a non-zero exit can be surfaced with context instead of + # silently completing the turn. + stderr_tail: deque[str] = deque(maxlen=20) + + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for raw in proc.stderr: + text = raw.decode("utf-8", errors="replace").rstrip() + if text: + stderr_tail.append(text) + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + if proc.returncode: + # The CLI failed (missing binary/auth, bad command). Raise so the + # turn surfaces as failed instead of completing with no output. + tail = "\n".join(stderr_tail) + raise RuntimeError( + f"claude CLI exited with status {proc.returncode}:\n{tail}" + ) + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@acp.on_message_send +async def handle_message_send( + params: SendMessageParams, +) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: + """Handle an incoming message: run Claude Code locally and stream events.""" + task_id = params.task.id + content = params.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text message content (type=%s)", getattr(content, "type", "?")) + return + prompt = content.content + logger.info("Processing message for task %s", task_id) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": prompt}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = ClaudeCodeTurn(_spawn_claude(prompt)) + async for event in emitter.yield_turn(turn): + yield event diff --git a/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 b/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 new file mode 100644 index 000000000..5d621a83e --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key used by the codex CLI (`codex exec` reads OPENAI_API_KEY directly) +OPENAI_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..02860b9b9 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 @@ -0,0 +1,51 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install the codex CLI: the agent shells out to `codex` on every turn, so the +# binary must be present in the runtime image. +RUN npm install -g @openai/codex + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 new file mode 100644 index 000000000..afa4470d9 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install the codex CLI: the agent shells out to `codex` on every turn, so the +# binary must be present in the runtime image. +RUN npm install -g @openai/codex + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/README.md.j2 b/src/agentex/lib/cli/templates/sync-codex/README.md.j2 new file mode 100644 index 000000000..4ca1aeccf --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/README.md.j2 @@ -0,0 +1,67 @@ +# {{ agent_name }} - AgentEx Sync Codex Agent + +This template builds a **synchronous** agent that drives the **Codex CLI** +through the unified harness surface on AgentEx: +- Spawns `codex exec --json` as a local subprocess +- Wraps the CLI's stdout stream in a `CodexTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.yield_turn` + (the sync HTTP yield path) +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and message handler +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Sync ACP with the harness +The sync ACP model uses HTTP request/response. The `@acp.on_message_send` +handler spawns the Codex CLI and yields the harness events back to the client +as they arrive. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/acp.py` to change the CLI flags or how the +prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 new file mode 100644 index 000000000..b0691b1b1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message_update)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message_update)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 new file mode 100644 index 000000000..4e3cc0c3a --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 @@ -0,0 +1,120 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: sync + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + # The codex CLI (`codex exec`) reads OPENAI_API_KEY directly; it does not + # use a LiteLLM key. + - env_var_name: OPENAI_API_KEY + secret_name: openai-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on. OPENAI_API_KEY is supplied via the credential + # mapping above (deploy) or your local .env. Do NOT set it to an empty string + # here — that would shadow the real key at runtime. + env: {} + # OPENAI_BASE_URL: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 new file mode 100644 index 000000000..0bc5d66a7 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 @@ -0,0 +1,185 @@ +"""Sync ACP handler for {{ agent_name }} — a Codex CLI harness agent. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for a sync (HTTP-yield) ACP agent. + +The handler: +1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox). + This is correct for local development; production isolation is a separate + concern. +2. Wraps the stdout line stream in a ``CodexTurn``. +3. Delivers every canonical ``StreamTaskMessage*`` event via + ``UnifiedEmitter.yield_turn``, which traces + yields each event back to + the HTTP caller in one pass. + +Live runs require: +- ``codex`` CLI on PATH (``npm install -g @openai/codex``) +- ``OPENAI_API_KEY`` set in the environment +""" + +from __future__ import annotations + +import os +import time +import codecs +import asyncio +from typing import AsyncGenerator +from collections.abc import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import CodexTurn +from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.types.task_message_update import TaskMessageUpdate +from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create(acp_type="sync") + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +async def _spawn_codex(model: str) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + The flags: + --json machine-readable newline-delimited events + --skip-git-repo-check safe to run outside a git repo + --dangerously-bypass-approvals-and-sandbox + skip interactive approval prompts in a + non-interactive (server) context + --model which OpenAI model to use + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + cmd = [ + "codex", + "exec", + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + "-", # read prompt from stdin + ] + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@acp.on_message_send +async def handle_message_send( + params: SendMessageParams, +) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: + """Handle each message by running ``codex exec`` locally and streaming events.""" + task_id = params.task.id + content = params.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text message content (type=%s)", getattr(content, "type", "?")) + return + user_message = content.content + logger.info("Processing message for task %s", task_id) + + start_ms = int(time.monotonic() * 1000) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + process = await _spawn_codex(MODEL) + + # Write prompt to stdin then close it so codex knows input is done. + assert process.stdin is not None + process.stdin.write(user_message.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn( + events=_process_stdout(process), + model=MODEL, + ) + + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + # Guarantee the subprocess is reaped even if the generator is abandoned + # (client disconnect / GC) or yield_turn raises; otherwise codex stays + # blocked writing to a full stdout pipe buffer and the process leaks. + try: + async for event in emitter.yield_turn(turn): + yield event + finally: + if process.returncode is None: + process.kill() + await process.wait() + + # Record the real wall-clock duration AFTER streaming completes; setting + # it before the stream ran would capture only subprocess spawn overhead. + turn.duration_ms = int(time.monotonic() * 1000) - start_ms + + if turn_span: + usage = turn.usage() + turn_span.output = { + "model": usage.model, + "input_tokens": usage.input_tokens, + "output_tokens": usage.output_tokens, + } diff --git a/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/sync-langgraph/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-langgraph/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/sync-langgraph/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/sync-langgraph/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/sync-langgraph/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-langgraph/manifest.yaml.j2 index 7bf2cb355..33f2d7b67 100644 --- a/src/agentex/lib/cli/templates/sync-langgraph/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/sync-langgraph/manifest.yaml.j2 @@ -64,7 +64,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 index c6814b9c4..32d261093 100644 --- a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 @@ -14,6 +14,7 @@ from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.protocol.acp import SendMessageParams from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent from agentex.lib.adk import LangGraphTurn from agentex.types.task_message_content import TaskMessageContent from agentex.types.task_message_delta import TextDelta @@ -63,7 +64,11 @@ async def handle_message_send( graph = await get_graph() thread_id = params.task.id - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text message content (type=%s)", getattr(content, "type", "?")) + return + user_message = content.content logger.info(f"Processing message for thread {thread_id}") diff --git a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/README.md.j2 b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/README.md.j2 index 9416f2477..c49f0f56f 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/README.md.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/README.md.j2 @@ -262,7 +262,10 @@ Add sophisticated response generation: @acp.on_message_send async def handle_message_send(params: SendMessageParams): # Analyze input - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + return TextContent(author="agent", content="Sorry, I can only handle text messages right now.") + user_message = content.content # Generate response response = await generate_intelligent_response(user_message) diff --git a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/manifest.yaml.j2 index bc2910f2a..6377d01cd 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/manifest.yaml.j2 @@ -64,7 +64,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/project/acp.py.j2 index e394e14c2..14af98351 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents-local-sandbox/project/acp.py.j2 @@ -63,7 +63,11 @@ async def handle_message_send( ) -> TaskMessageContent: """Handle incoming messages by running the local-sandbox agent.""" task_id = params.task.id - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text message content (type=%s)", getattr(content, "type", "?")) + return TextContent(author="agent", content="Sorry, I can only handle text messages right now.") + user_message = content.content logger.info(f"Processing message for task {task_id}") async with adk.tracing.span( diff --git a/src/agentex/lib/cli/templates/sync-openai-agents/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-openai-agents/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/sync-openai-agents/README.md.j2 b/src/agentex/lib/cli/templates/sync-openai-agents/README.md.j2 index a8ad10799..7711969cd 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents/README.md.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents/README.md.j2 @@ -251,7 +251,10 @@ Add sophisticated response generation: @acp.on_message_send async def handle_message_send(params: SendMessageParams): # Analyze input - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + return TextContent(author="agent", content="Sorry, I can only handle text messages right now.") + user_message = content.content # Generate response response = await generate_intelligent_response(user_message) diff --git a/src/agentex/lib/cli/templates/sync-openai-agents/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-openai-agents/manifest.yaml.j2 index 965769233..875fcc5e0 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents/manifest.yaml.j2 @@ -64,7 +64,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2 index 4e2517838..41029f2ce 100644 --- a/src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2 @@ -98,7 +98,12 @@ async def handle_message_send( ) return - user_prompt = params.content.content + content = params.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text message content (type=%s)", getattr(content, "type", "?")) + return + + user_prompt = content.content # Retrieve the task state. Each event is handled as a new turn, so we need to get the state for the current turn. task_state = await adk.state.get_by_task_and_agent(task_id=params.task.id, agent_id=params.agent.id) diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/sync-pydantic-ai/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/README.md.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/README.md.j2 index a8ad10799..7711969cd 100644 --- a/src/agentex/lib/cli/templates/sync-pydantic-ai/README.md.j2 +++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/README.md.j2 @@ -251,7 +251,10 @@ Add sophisticated response generation: @acp.on_message_send async def handle_message_send(params: SendMessageParams): # Analyze input - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + return TextContent(author="agent", content="Sorry, I can only handle text messages right now.") + user_message = content.content # Generate response response = await generate_intelligent_response(user_message) diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/manifest.yaml.j2 index 965769233..875fcc5e0 100644 --- a/src/agentex/lib/cli/templates/sync-pydantic-ai/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/manifest.yaml.j2 @@ -64,7 +64,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 index 061ae0e08..1a3c6f0a9 100644 --- a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 @@ -22,6 +22,7 @@ from agentex.protocol.acp import SendMessageParams from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.types.task_message_update import TaskMessageUpdate from agentex.types.task_message_content import TaskMessageContent @@ -67,7 +68,12 @@ async def handle_message_send( agent = get_agent() task_id = params.task.id - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text message content (type=%s)", getattr(content, "type", "?")) + return + + user_message = content.content logger.info(f"Processing message for task {task_id}") # Open a per-message turn span. Tool calls below nest underneath this diff --git a/src/agentex/lib/cli/templates/sync/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync/Dockerfile-uv.j2 index 582434ac9..dd3035f7b 100644 --- a/src/agentex/lib/cli/templates/sync/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/sync/Dockerfile-uv.j2 @@ -27,18 +27,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" ENV PYTHONPATH=/app diff --git a/src/agentex/lib/cli/templates/sync/README.md.j2 b/src/agentex/lib/cli/templates/sync/README.md.j2 index a8ad10799..7711969cd 100644 --- a/src/agentex/lib/cli/templates/sync/README.md.j2 +++ b/src/agentex/lib/cli/templates/sync/README.md.j2 @@ -251,7 +251,10 @@ Add sophisticated response generation: @acp.on_message_send async def handle_message_send(params: SendMessageParams): # Analyze input - user_message = params.content.content + content = params.content + if not isinstance(content, TextContent): + return TextContent(author="agent", content="Sorry, I can only handle text messages right now.") + user_message = content.content # Generate response response = await generate_intelligent_response(user_message) diff --git a/src/agentex/lib/cli/templates/sync/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync/manifest.yaml.j2 index 965769233..875fcc5e0 100644 --- a/src/agentex/lib/cli/templates/sync/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/sync/manifest.yaml.j2 @@ -64,7 +64,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # Set enabled: true to use Temporal workflows for long-running tasks diff --git a/src/agentex/lib/cli/templates/sync/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync/project/acp.py.j2 index ce5069a4c..d7d6f51d2 100644 --- a/src/agentex/lib/cli/templates/sync/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync/project/acp.py.j2 @@ -20,7 +20,13 @@ async def handle_message_send( params: SendMessageParams ) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: """Default message handler with streaming support""" + content = params.content + if not isinstance(content, TextContent): + return TextContent( + author="agent", + content="Sorry, I can only handle text messages right now.", + ) return TextContent( author="agent", - content=f"Hello! I've received your message. Here's a generic response, but in future tutorials we'll see how you can get me to intelligently respond to your message. This is what I heard you say: {params.content.content}", + content=f"Hello! I've received your message. Here's a generic response, but in future tutorials we'll see how you can get me to intelligently respond to your message. This is what I heard you say: {content.content}", ) \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 new file mode 100644 index 000000000..5aff34a60 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for the Claude Code CLI (the `claude` subprocess this agent spawns) +ANTHROPIC_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..f8746c573 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,61 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install the Claude Code CLI: the activity shells out to `claude` on every +# turn, so the binary must be present in the runtime image. +RUN npm install -g @anthropic-ai/claude-code + +# Install tctl (Temporal CLI) +RUN ARCH="$(uname -m)" && \ + case "$ARCH" in x86_64) TCTL_ARCH=amd64 ;; aarch64|arm64) TCTL_ARCH=arm64 ;; *) TCTL_ARCH=amd64 ;; esac && \ + curl -L "https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_${TCTL_ARCH}.tar.gz" -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..225863607 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 @@ -0,0 +1,54 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install the Claude Code CLI: the activity shells out to `claude` on every +# turn, so the binary must be present in the runtime image. +RUN npm install -g @anthropic-ai/claude-code + +# Install tctl (Temporal CLI) +RUN ARCH="$(uname -m)" && \ + case "$ARCH" in x86_64) TCTL_ARCH=amd64 ;; aarch64|arm64) TCTL_ARCH=arm64 ;; *) TCTL_ARCH=amd64 ;; esac && \ + curl -L "https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_${TCTL_ARCH}.tar.gz" -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 new file mode 100644 index 000000000..35ac019b5 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 @@ -0,0 +1,73 @@ +# {{ agent_name }} — AgentEx Temporal + Claude Code + +This template builds a **Temporal-durable** agent that drives the **Claude Code +CLI** through the unified harness surface on AgentEx: +- A Temporal workflow holds conversation state (the Claude Code `session_id`) + durably across worker crashes +- Each turn delegates to the `run_claude_code_turn` activity, which spawns the + CLI (subprocess I/O is not permitted on the workflow event loop) +- The activity wraps the CLI's stdout stream in a `ClaudeCodeTurn` and delivers + canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment +- A running Temporal service (provided automatically by the local dev stack) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +This starts both the ACP HTTP server and the Temporal worker. + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ ├── acp.py # Thin ACP server; FastACP auto-wires to the workflow +│ ├── workflow.py # Temporal workflow (durable conversation state) +│ ├── activities.py # run_claude_code_turn activity (CLI subprocess) +│ └── run_worker.py # Temporal worker entrypoint +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Subprocess must run in an activity +Temporal runs workflow + signal-handler bodies on a deterministic sandbox event +loop that does not implement `subprocess_exec`. The workflow therefore delegates +each turn to the `run_claude_code_turn` activity, which also gains Temporal's +retry + timeout guarantees. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/activities.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..a3df5e228 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 @@ -0,0 +1,64 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + # This is used to override the global helm values.yaml file in the agentex-agent helm charts + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal-worker: + enabled: true + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..9aa2b2b2f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 @@ -0,0 +1,142 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + # Path to temporal worker file + # Examples: + # project/run_worker.py (standard) + # workers/temporal.py (custom structure) + # ../shared/worker.py (shared across projects) + worker: project/run_worker.py + + +# Agent Configuration +# ----------------- +agent: + # Type of agent - either sync or async + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # This enables your agent to run as a Temporal workflow for long-running tasks + temporal: + enabled: true + workflows: + # Name of the workflow class + # Must match the @workflow.defn name in your workflow.py + - name: {{ workflow_name }} + + # Queue name for task distribution + # Used by Temporal to route tasks to your agent + # Convention: _task_queue + queue_name: {{ queue_name }} + + # Optional: Health check port for temporal worker + # Defaults to 80 if not specified + # health_check_port: 80 + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + # The Claude Code CLI spawned in project/activities.py authenticates with + # ANTHROPIC_API_KEY; without it every turn fails with a CLI auth error. + - env_var_name: ANTHROPIC_API_KEY + secret_name: anthropic-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on. ANTHROPIC_API_KEY is supplied via the credential + # mapping above (deploy) or your local .env (load_dotenv). Do NOT set it to an + # empty string here — that would shadow the real key at runtime. + env: {} + # ANTHROPIC_API_KEY: "" # uncomment only to hardcode for local runs + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret name + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..0515efeeb --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 @@ -0,0 +1,31 @@ +"""ACP server for {{ agent_name }} — a Temporal Claude Code agent. + +This file is intentionally thin. When ``acp_type="async"`` is combined +with ``TemporalACPConfig``, FastACP auto-wires: + + HTTP task/create -> @workflow.run on the workflow class + HTTP task/event/send -> @workflow.signal(SignalName.RECEIVE_EVENT) + HTTP task/cancel -> workflow cancellation via the Temporal client + +The actual agent code lives in ``project/workflow.py`` and is executed by +the Temporal worker (``project/run_worker.py``), not by this HTTP process. +""" + +from __future__ import annotations + +import os + +from dotenv import load_dotenv + +load_dotenv() + +from agentex.lib.types.fastacp import TemporalACPConfig +from agentex.lib.sdk.fastacp.fastacp import FastACP + +acp = FastACP.create( + acp_type="async", + config=TemporalACPConfig( + type="temporal", + temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), + ), +) diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 new file mode 100644 index 000000000..94055c7df --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 @@ -0,0 +1,155 @@ +"""Temporal activity for {{ agent_name }} — Claude Code harness. + +Subprocess spawning (and any other I/O) must run inside a Temporal *activity*, +not in workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(or threads / sockets), so spawning the CLI directly in the signal handler +raises ``NotImplementedError``. This activity runs the Claude Code CLI, drives +the ``ClaudeCodeTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async +Redis push path), and returns the turn result to the workflow. + +The ``_spawn_claude`` async generator is an injectable seam: offline tests +can provide a fake that yields pre-recorded stdout lines so no real CLI runs. +""" + +from __future__ import annotations + +import asyncio +from typing import Any, AsyncIterator +from datetime import datetime +from collections import deque + +from temporalio import activity + +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel + +logger = make_logger(__name__) + +RUN_CLAUDE_CODE_TURN_ACTIVITY = "run_claude_code_turn" + + +class RunClaudeCodeTurnParams(BaseModel): + """Arguments for one Claude Code turn run inside an activity.""" + + task_id: str + prompt: str + trace_id: str | None = None + parent_span_id: str | None = None + session_id: str | None = None + created_at: datetime | None = None + + +class RunClaudeCodeTurnResult(BaseModel): + """Result returned from the activity to the workflow.""" + + final_text: str + session_id: str | None = None + + +async def _spawn_claude(prompt: str, session_id: str | None = None) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + Pass ``session_id`` to resume a previous Claude Code session (multi-turn + memory via ``-r ``). + + Injectable seam: tests can monkeypatch this with a fake async iterator so no + real CLI invocation is needed offline. + """ + cmd = [ + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + ] + if session_id: + cmd.extend(["-r", session_id]) + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + await proc.stdin.drain() + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. We keep a + # bounded tail so a non-zero exit can be surfaced with context instead of + # silently completing the turn. + stderr_tail: deque[str] = deque(maxlen=20) + + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for raw in proc.stderr: + text = raw.decode("utf-8", errors="replace").rstrip() + if text: + stderr_tail.append(text) + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + if proc.returncode: + # The CLI failed (missing binary/auth, bad command). Raise so the + # activity (and turn) surfaces as failed instead of completing with + # no output. Temporal will apply the activity's retry policy. + tail = "\n".join(stderr_tail) + raise RuntimeError( + f"claude CLI exited with status {proc.returncode}:\n{tail}" + ) + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@activity.defn(name=RUN_CLAUDE_CODE_TURN_ACTIVITY) +async def run_claude_code_turn(params: RunClaudeCodeTurnParams) -> dict[str, Any]: + """Run one Claude Code turn end-to-end and stream events to the task. + + Runs in an activity (real asyncio loop) so subprocess I/O is permitted. + """ + emitter = UnifiedEmitter( + task_id=params.task_id, + trace_id=params.trace_id, + parent_span_id=params.parent_span_id, + ) + turn = ClaudeCodeTurn(_spawn_claude(params.prompt, session_id=params.session_id)) + result = await emitter.auto_send_turn(turn, created_at=params.created_at) + + return RunClaudeCodeTurnResult(final_text=result.final_text, session_id=turn.session_id).model_dump() diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 new file mode 100644 index 000000000..354326b9d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 @@ -0,0 +1,41 @@ +"""Temporal worker for {{ agent_name }} — Claude Code harness. + +Run as a separate long-lived process alongside the ACP HTTP server. The +worker polls Temporal for workflow + activity tasks and executes them. + +The Claude Code CLI subprocess runs in the ``run_claude_code_turn`` activity +(registered below alongside the built-in Agentex activities), because +subprocess I/O is not permitted on the Temporal workflow event loop. +""" + +import asyncio + +from project.workflow import {{ workflow_class }} +from project.activities import run_claude_code_turn +from agentex.lib.utils.debug import setup_debug_if_enabled +from agentex.lib.utils.logging import make_logger +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.activities import get_all_activities +from agentex.lib.core.temporal.workers.worker import AgentexWorker + +environment_variables = EnvironmentVariables.refresh() +logger = make_logger(__name__) + + +async def main(): + setup_debug_if_enabled() + + task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE + if task_queue_name is None: + raise ValueError("WORKFLOW_TASK_QUEUE is not set") + + worker = AgentexWorker(task_queue=task_queue_name) + + await worker.run( + activities=[run_claude_code_turn, *get_all_activities()], + workflow={{ workflow_class }}, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 new file mode 100644 index 000000000..8191ad80f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 @@ -0,0 +1,148 @@ +"""Temporal workflow for {{ agent_name }} — Claude Code harness. + +Holds conversation state (session_id for multi-turn resume) durably across +crashes. Each user message triggers ``on_task_event_send``, which delegates the +turn to the ``run_claude_code_turn`` activity. The activity spawns the Claude +Code CLI, wraps its stdout in ``ClaudeCodeTurn``, and delivers the turn via +``UnifiedEmitter.auto_send_turn`` (the async Redis push path). + +Note on subprocess inside Temporal +------------------------------------ +Subprocess (and all other) I/O must run in a Temporal *activity*, never in +workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(spawning the CLI there raises ``NotImplementedError``). The activity also gets +Temporal's retry + timeout guarantees. +""" + +from __future__ import annotations + +import os +import json +import asyncio +from datetime import timedelta + +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +with workflow.unsafe.imports_passed_through(): + from project.activities import RunClaudeCodeTurnParams, run_claude_code_turn + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +logger = make_logger(__name__) + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class {{ workflow_class }}(BaseWorkflow): + """Temporal workflow that runs Claude Code locally for each user message. + + Persists the Claude Code session_id across turns so the CLI can resume + the conversation (``-r ``). Temporal's durable state ensures + the session_id survives worker crashes. + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._turn_number = 0 + # Claude Code session_id for multi-turn resume. + self._session_id: str | None = None + # Serialize turns: signal handlers can interleave at await points, so two + # quick messages could both read the same stale _session_id and run + # independent Claude Code sessions. The lock keeps turns sequential and + # preserves conversation continuity. + self._turn_lock = asyncio.Lock() + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """Handle a user message: spawn Claude Code and push events to the task stream.""" + async with self._turn_lock: + task_id = params.task.id + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + self._turn_number += 1 + prompt = content.content + logger.info("Turn %d for task %s", self._turn_number, task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name=f"Turn {self._turn_number}", + input={"message": prompt}, + ) as span: + # Delegate the subprocess turn to an activity: subprocess I/O is not + # permitted on the Temporal workflow event loop. The activity streams + # events to the task and returns the final text + session_id. + # workflow.now() gives a deterministic timestamp under replay. + result = await workflow.execute_activity( + run_claude_code_turn, + RunClaudeCodeTurnParams( + task_id=task_id, + prompt=prompt, + trace_id=task_id, + parent_span_id=span.id if span else None, + session_id=self._session_id, + created_at=workflow.now(), + ), + # Agentic Claude Code runs (multiple tool calls, large codegen) + # can take a while; tune this to your workload. + start_to_close_timeout=timedelta(minutes=30), + ) + + # Capture session_id to enable Claude Code resume on the next turn. + sid = result.get("session_id") + if sid: + self._session_id = sid + + if span: + span.output = {"final_text": result.get("final_text")} + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + logger.info("Task created: %s", params.task.id) + + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=( + f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" + "Send me a message and I'll run it through Claude Code locally." + ), + ), + ) + + await workflow.wait_condition(lambda: self._complete_task, timeout=None) + return "Task completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + logger.info("Received complete_task signal") + self._complete_task = True diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..2c6ec9c2f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 @@ -0,0 +1,37 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "temporalio>=1.18.2", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "httpx", + "black", + "isort", + "flake8", + "debugpy>=1.8.15", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..a060d2331 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Agentex SDK +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Temporal workflow engine +temporalio>=1.18.2 + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 b/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 new file mode 100644 index 000000000..5d621a83e --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key used by the codex CLI (`codex exec` reads OPENAI_API_KEY directly) +OPENAI_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..7e31387fa --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 @@ -0,0 +1,61 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install the codex CLI: the activity shells out to `codex` on every turn, so +# the binary must be present in the runtime image. +RUN npm install -g @openai/codex + +# Install tctl (Temporal CLI) +RUN ARCH="$(uname -m)" && \ + case "$ARCH" in x86_64) TCTL_ARCH=amd64 ;; aarch64|arm64) TCTL_ARCH=arm64 ;; *) TCTL_ARCH=amd64 ;; esac && \ + curl -L "https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_${TCTL_ARCH}.tar.gz" -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 new file mode 100644 index 000000000..0ae4e2079 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 @@ -0,0 +1,54 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install the codex CLI: the activity shells out to `codex` on every turn, so +# the binary must be present in the runtime image. +RUN npm install -g @openai/codex + +# Install tctl (Temporal CLI) +RUN ARCH="$(uname -m)" && \ + case "$ARCH" in x86_64) TCTL_ARCH=amd64 ;; aarch64|arm64) TCTL_ARCH=arm64 ;; *) TCTL_ARCH=amd64 ;; esac && \ + curl -L "https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_${TCTL_ARCH}.tar.gz" -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 b/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 new file mode 100644 index 000000000..794109ff3 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 @@ -0,0 +1,80 @@ +# {{ agent_name }} — AgentEx Temporal + Codex + +This template builds a **Temporal-durable** agent that drives the **Codex CLI** +through the unified harness surface on AgentEx: +- A Temporal workflow holds conversation state (the codex thread ID) durably + across worker crashes — no external state store needed +- Each turn delegates to the `run_codex_turn` activity, which spawns the CLI + (subprocess I/O is not permitted on the workflow event loop) +- The activity wraps the CLI's stdout stream in a `CodexTurn` and delivers + canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment +- A running Temporal service (provided automatically by the local dev stack) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +This starts both the ACP HTTP server and the Temporal worker. + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ ├── acp.py # Thin ACP server; FastACP auto-wires to the workflow +│ ├── workflow.py # Temporal workflow (durable conversation state) +│ ├── activities.py # run_codex_turn activity (CLI subprocess) +│ └── run_worker.py # Temporal worker entrypoint +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Subprocess must run in an activity +Temporal runs workflow + signal-handler bodies on a deterministic sandbox event +loop that does not implement `subprocess_exec`. The workflow therefore delegates +each turn to the `run_codex_turn` activity, which also gains Temporal's retry + +timeout guarantees. + +### Durable multi-turn memory +The codex thread ID is kept on the workflow instance; Temporal's durable replay +reconstructs it after a crash, so the next turn resumes the same codex session. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/activities.py` to change the CLI flags or how +the prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 new file mode 100644 index 000000000..a3df5e228 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 @@ -0,0 +1,64 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + # This is used to override the global helm values.yaml file in the agentex-agent helm charts + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal-worker: + enabled: true + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 new file mode 100644 index 000000000..067567059 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 @@ -0,0 +1,142 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + # Path to temporal worker file + # Examples: + # project/run_worker.py (standard) + # workers/temporal.py (custom structure) + # ../shared/worker.py (shared across projects) + worker: project/run_worker.py + + +# Agent Configuration +# ----------------- +agent: + # Type of agent - either sync or async + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description | tojson }} + + # Temporal workflow configuration + # This enables your agent to run as a Temporal workflow for long-running tasks + temporal: + enabled: true + workflows: + # Name of the workflow class + # Must match the @workflow.defn name in your workflow.py + - name: {{ workflow_name }} + + # Queue name for task distribution + # Used by Temporal to route tasks to your agent + # Convention: _task_queue + queue_name: {{ queue_name }} + + # Optional: Health check port for temporal worker + # Defaults to 80 if not specified + # health_check_port: 80 + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + # The codex CLI spawned in project/activities.py reads OPENAI_API_KEY + # directly; without it every turn fails with an auth error. + - env_var_name: OPENAI_API_KEY + secret_name: openai-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret name + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 new file mode 100644 index 000000000..7ef5744f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 @@ -0,0 +1,32 @@ +"""ACP server for {{ agent_name }} — a Temporal Codex harness agent. + +This file is intentionally thin. When ``acp_type="async"`` is combined with +``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: + + HTTP task/create -> @workflow.run on the workflow class + HTTP task/event/send -> @workflow.signal(SignalName.RECEIVE_EVENT) + HTTP task/cancel -> workflow cancellation via the Temporal client + +so we don't define any handlers here. The actual agent code lives in +``project/workflow.py`` and is executed by the Temporal worker +(``project/run_worker.py``), not by this HTTP process. +""" + +from __future__ import annotations + +import os + +from dotenv import load_dotenv + +load_dotenv() + +from agentex.lib.types.fastacp import TemporalACPConfig +from agentex.lib.sdk.fastacp.fastacp import FastACP + +acp = FastACP.create( + acp_type="async", + config=TemporalACPConfig( + type="temporal", + temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), + ), +) diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 new file mode 100644 index 000000000..0111794d9 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 @@ -0,0 +1,151 @@ +"""Temporal activity for {{ agent_name }} — Codex harness. + +Subprocess spawning (and any other I/O) must run inside a Temporal *activity*, +not in workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(or threads / sockets), so spawning ``codex exec`` directly in the signal +handler raises ``NotImplementedError``. This activity runs codex, drives the +``CodexTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async Redis push +path), and returns the turn result to the workflow. + +The ``_spawn_codex`` / ``_process_stdout`` seams are injectable: offline tests +can replace them with fakes that yield pre-recorded event lines so no real CLI +runs. +""" + +from __future__ import annotations + +import os +import codecs +import asyncio +from typing import Any +from datetime import datetime +from collections.abc import AsyncIterator + +from temporalio import activity + +from agentex.lib.adk import CodexTurn +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel + +logger = make_logger(__name__) + +RUN_CODEX_TURN_ACTIVITY = "run_codex_turn" + + +class RunCodexTurnParams(BaseModel): + """Arguments for one codex turn run inside an activity.""" + + task_id: str + prompt: str + model: str + trace_id: str | None = None + parent_span_id: str | None = None + thread_id: str | None = None + created_at: datetime | None = None + + +class RunCodexTurnResult(BaseModel): + """Result returned from the activity to the workflow.""" + + final_text: str + session_id: str | None = None + model: str | None = None + + +async def _spawn_codex( + model: str, + thread_id: str | None = None, +) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + base_flags = [ + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + ] + + if thread_id: + cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"] + else: + cmd = ["codex", "exec", *base_flags, "-"] + + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@activity.defn(name=RUN_CODEX_TURN_ACTIVITY) +async def run_codex_turn(params: RunCodexTurnParams) -> dict[str, Any]: + """Run one codex turn end-to-end and stream events to the task. + + Runs in an activity (real asyncio loop) so subprocess I/O is permitted. + """ + process = await _spawn_codex(params.model, thread_id=params.thread_id) + + assert process.stdin is not None + process.stdin.write(params.prompt.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn(events=_process_stdout(process), model=params.model) + emitter = UnifiedEmitter( + task_id=params.task_id, + trace_id=params.trace_id, + parent_span_id=params.parent_span_id, + ) + # Guarantee the subprocess is reaped even if auto_send_turn raises; + # otherwise codex stays blocked writing to a full stdout pipe buffer and the + # OS process leaks until the worker restarts. + try: + result = await emitter.auto_send_turn(turn, created_at=params.created_at) + finally: + if process.returncode is None: + process.kill() + await process.wait() + + return RunCodexTurnResult( + final_text=result.final_text, + session_id=turn.session_id, + model=turn.usage().model, + ).model_dump() diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 new file mode 100644 index 000000000..d86519977 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 @@ -0,0 +1,41 @@ +"""Temporal worker for {{ agent_name }} — Codex harness. + +Run as a separate long-lived process alongside the ACP HTTP server. The +worker polls Temporal for workflow + activity tasks and executes them. + +The codex CLI subprocess runs in the ``run_codex_turn`` activity (registered +below alongside the built-in Agentex activities), because subprocess I/O is not +permitted on the Temporal workflow event loop. +""" + +import asyncio + +from project.workflow import {{ workflow_class }} +from project.activities import run_codex_turn +from agentex.lib.utils.debug import setup_debug_if_enabled +from agentex.lib.utils.logging import make_logger +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.activities import get_all_activities +from agentex.lib.core.temporal.workers.worker import AgentexWorker + +environment_variables = EnvironmentVariables.refresh() +logger = make_logger(__name__) + + +async def main(): + setup_debug_if_enabled() + + task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE + if task_queue_name is None: + raise ValueError("WORKFLOW_TASK_QUEUE is not set") + + worker = AgentexWorker(task_queue=task_queue_name) + + await worker.run( + activities=[run_codex_turn, *get_all_activities()], + workflow={{ workflow_class }}, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 new file mode 100644 index 000000000..1004ebfb8 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 @@ -0,0 +1,157 @@ +"""Temporal workflow for {{ agent_name }} — Codex harness. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for a Temporal-durable ACP agent. + +KEY CONCEPTS DEMONSTRATED: +- Running ``codex exec --json`` in the ``run_codex_turn`` activity. Subprocess + I/O is not permitted on the Temporal workflow event loop (the deterministic + sandbox loop does not implement ``subprocess_exec``), so the signal handler + delegates the turn to an activity, which also gets Temporal's retry + timeout + guarantees. +- Wrapping the stdout line stream in a ``CodexTurn`` (inside the activity). +- Delivering events via ``UnifiedEmitter.auto_send_turn``, which pushes + ``StreamTaskMessage*`` events to Redis so the UI sees tokens in real time. +- Passing ``created_at=workflow.now()`` for deterministic timestamps under + Temporal replay (required for Temporal-safe delivery). +- Persisting the codex thread ID on the workflow instance itself — Temporal's + workflow state is durable, so no external ``adk.state`` round-trip is needed. +""" + +from __future__ import annotations + +import os +import asyncio +from datetime import timedelta + +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +with workflow.unsafe.imports_passed_through(): + from project.activities import RunCodexTurnParams, run_codex_turn + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +logger = make_logger(__name__) + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class {{ workflow_class }}(BaseWorkflow): + """Long-running Temporal workflow that runs codex exec for each turn. + + Conversation state (codex thread ID + turn counter) is kept on the + workflow instance. Temporal's durable replay reconstructs this state if + the worker crashes, so no external ``adk.state`` round-trip is needed. + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._turn_number = 0 + self._codex_thread_id: str | None = None + # Serialize turns: signal handlers can interleave at await points, so two + # quick messages could both read the same stale _codex_thread_id and fork + # the codex session. The lock keeps turns sequential and preserves + # conversation continuity. + self._turn_lock = asyncio.Lock() + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """Handle a new user message: spawn codex, stream events via UnifiedEmitter.""" + logger.info("Received task event: %s", params.task.id) + async with self._turn_lock: + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + + self._turn_number += 1 + + await adk.messages.create(task_id=params.task.id, content=params.event.content) + + user_message = content.content + + async with adk.tracing.span( + trace_id=params.task.id, + task_id=params.task.id, + name=f"Turn {self._turn_number}", + input={"message": user_message}, + ) as span: + # Delegate the subprocess turn to an activity: subprocess I/O is not + # permitted on the Temporal workflow event loop. The activity streams + # events to the task and returns the final text + codex thread id. + # workflow.now() gives a deterministic timestamp under replay. + result = await workflow.execute_activity( + run_codex_turn, + RunCodexTurnParams( + task_id=params.task.id, + prompt=user_message, + model=MODEL, + trace_id=params.task.id, + parent_span_id=span.id if span else None, + thread_id=self._codex_thread_id, + created_at=workflow.now(), + ), + start_to_close_timeout=timedelta(minutes=5), + ) + + # Persist the codex thread id so the next turn resumes the session. + session_id = result.get("session_id") + if session_id: + self._codex_thread_id = session_id + + if span: + span.output = { + "final_text": result.get("final_text"), + "model": result.get("model"), + } + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + """Workflow entry point — keep the conversation alive for incoming signals.""" + logger.info("Task created: %s", params.task.id) + + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=( + "Task initialized.\n" + "Send me a message and I'll run codex (local subprocess) " + "to answer, streaming events via the unified harness surface." + ), + ), + ) + + await workflow.wait_condition(lambda: self._complete_task, timeout=None) + return "Task completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + """Graceful workflow shutdown signal.""" + logger.info("Received complete_task signal") + self._complete_task = True diff --git a/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 new file mode 100644 index 000000000..2c6ec9c2f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 @@ -0,0 +1,37 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "temporalio>=1.18.2", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "httpx", + "black", + "isort", + "flake8", + "debugpy>=1.8.15", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 new file mode 100644 index 000000000..a060d2331 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Agentex SDK +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Temporal workflow engine +temporalio>=1.18.2 + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/temporal-langgraph/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-langgraph/Dockerfile-uv.j2 index 2a3f1108b..6746869df 100644 --- a/src/agentex/lib/cli/templates/temporal-langgraph/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/temporal-langgraph/Dockerfile-uv.j2 @@ -33,18 +33,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" diff --git a/src/agentex/lib/cli/templates/temporal-langgraph/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-langgraph/manifest.yaml.j2 index 18cffd54a..b9216929f 100644 --- a/src/agentex/lib/cli/templates/temporal-langgraph/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/temporal-langgraph/manifest.yaml.j2 @@ -73,7 +73,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: "{{ description }}" + description: {{ description | tojson }} # Temporal workflow configuration # This enables your agent to run as a Temporal workflow for long-running tasks diff --git a/src/agentex/lib/cli/templates/temporal-langgraph/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-langgraph/project/workflow.py.j2 index d1621fb8c..14bafabc1 100644 --- a/src/agentex/lib/cli/templates/temporal-langgraph/project/workflow.py.j2 +++ b/src/agentex/lib/cli/templates/temporal-langgraph/project/workflow.py.j2 @@ -93,8 +93,12 @@ class {{ workflow_class }}(BaseWorkflow): async def on_task_event_send(self, params: SendEventParams) -> None: """Handle a new user message: echo it, then run the agent graph durably.""" logger.info(f"Received task event for task {params.task.id}") + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return self._turn_number += 1 - user_text = params.event.content.content + user_text = content.content # Echo the user's message so it shows up as a chat bubble. await adk.messages.create(task_id=params.task.id, content=params.event.content) diff --git a/src/agentex/lib/cli/templates/temporal-openai-agents/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-openai-agents/Dockerfile-uv.j2 index 625592d31..0d9801016 100644 --- a/src/agentex/lib/cli/templates/temporal-openai-agents/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/temporal-openai-agents/Dockerfile-uv.j2 @@ -33,18 +33,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" diff --git a/src/agentex/lib/cli/templates/temporal-openai-agents/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-openai-agents/manifest.yaml.j2 index ee5e473d2..b9216929f 100644 --- a/src/agentex/lib/cli/templates/temporal-openai-agents/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/temporal-openai-agents/manifest.yaml.j2 @@ -73,7 +73,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # This enables your agent to run as a Temporal workflow for long-running tasks diff --git a/src/agentex/lib/cli/templates/temporal-openai-agents/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-openai-agents/project/workflow.py.j2 index 2b81bb335..af8b7a299 100644 --- a/src/agentex/lib/cli/templates/temporal-openai-agents/project/workflow.py.j2 +++ b/src/agentex/lib/cli/templates/temporal-openai-agents/project/workflow.py.j2 @@ -100,6 +100,11 @@ class {{ workflow_class }}(BaseWorkflow): async def on_task_event_send(self, params: SendEventParams) -> None: logger.info(f"Received task message instruction: {params}") + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + # Increment turn number for tracing self._state.turn_number += 1 @@ -108,7 +113,7 @@ class {{ workflow_class }}(BaseWorkflow): self._parent_span_id = params.task.id # Add the user message to conversation history - self._state.input_list.append({"role": "user", "content": params.event.content.content}) + self._state.input_list.append({"role": "user", "content": content.content}) # Echo back the client's message to show it in the UI await adk.messages.create(task_id=params.task.id, content=params.event.content) diff --git a/src/agentex/lib/cli/templates/temporal-pydantic-ai/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-pydantic-ai/Dockerfile-uv.j2 index 625592d31..0d9801016 100644 --- a/src/agentex/lib/cli/templates/temporal-pydantic-ai/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/temporal-pydantic-ai/Dockerfile-uv.j2 @@ -33,18 +33,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" diff --git a/src/agentex/lib/cli/templates/temporal-pydantic-ai/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-pydantic-ai/manifest.yaml.j2 index ee5e473d2..b9216929f 100644 --- a/src/agentex/lib/cli/templates/temporal-pydantic-ai/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/temporal-pydantic-ai/manifest.yaml.j2 @@ -73,7 +73,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # This enables your agent to run as a Temporal workflow for long-running tasks diff --git a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/workflow.py.j2 index 66a91d7a8..6dcca3002 100644 --- a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/workflow.py.j2 +++ b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/workflow.py.j2 @@ -85,6 +85,13 @@ class {{ workflow_class }}(BaseWorkflow): async def on_task_event_send(self, params: SendEventParams) -> None: """Handle a new user message: echo it, then run the agent durably.""" logger.info(f"Received task event: {params.task.id}") + + content = params.event.content + if not isinstance(content, TextContent): + logger.warning("Ignoring non-text event content (type=%s)", getattr(content, "type", "?")) + return + user_message = content.content + self._turn_number += 1 # Echo the user's message so it shows up in the UI as a chat bubble. @@ -94,7 +101,7 @@ class {{ workflow_class }}(BaseWorkflow): trace_id=params.task.id, task_id=params.task.id, name=f"Turn {self._turn_number}", - input={"message": params.event.content.content}, + input={"message": user_message}, ) as span: # temporal_agent.run() is the magic line. Internally it schedules # a model activity (LLM HTTP call) and, for each tool the model @@ -107,7 +114,7 @@ class {{ workflow_class }}(BaseWorkflow): # without it the agent would respond to each user message as if # it had never seen the conversation before. result = await temporal_agent.run( - params.event.content.content, + user_message, message_history=self._message_history, deps=TaskDeps( task_id=params.task.id, diff --git a/src/agentex/lib/cli/templates/temporal/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal/Dockerfile-uv.j2 index 625592d31..0d9801016 100644 --- a/src/agentex/lib/cli/templates/temporal/Dockerfile-uv.j2 +++ b/src/agentex/lib/cli/templates/temporal/Dockerfile-uv.j2 @@ -33,18 +33,18 @@ ENV UV_HTTP_TIMEOUT=1000 WORKDIR /app/{{ project_path_from_build_root }} # Copy dependency files for layer caching -COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ +COPY {{ project_path_from_build_root }}/pyproject.toml ./ # Install dependencies (without project itself, for layer caching) RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --no-dev + uv sync --no-install-project --no-dev # Copy the project code COPY {{ project_path_from_build_root }}/project ./project # Install the project RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev + uv sync --no-dev ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" diff --git a/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 index ee5e473d2..b9216929f 100644 --- a/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 @@ -73,7 +73,7 @@ agent: # Description of what your agent does # Helps with documentation and discovery - description: {{ description }} + description: {{ description | tojson }} # Temporal workflow configuration # This enables your agent to run as a Temporal workflow for long-running tasks diff --git a/src/agentex/lib/core/harness/auto_send.py b/src/agentex/lib/core/harness/auto_send.py index 2ecd6b583..b645a4aae 100644 --- a/src/agentex/lib/core/harness/auto_send.py +++ b/src/agentex/lib/core/harness/auto_send.py @@ -52,11 +52,11 @@ async def auto_send( final_text_parts so that multi-step turns return the LAST text segment. Full(TextContent) also overwrites final_text_parts (same semantics). - AGX1-378: created_at is forwarded to every streaming_task_message_context - call so callers can back-date message timestamps. + created_at is forwarded to every streaming_task_message_context call so + callers can back-date message timestamps. Mirrors the open/close/stream_update pattern from - src/agentex/lib/adk/_modules/_langgraph_async.py: + src/agentex/lib/adk/_modules/_langgraph_turn.py: - context opened via streaming_task_message_context(...).__aenter__() - context closed via ctx.close() (not __aexit__) - deltas pushed as StreamTaskMessageDelta with parent_task_message set @@ -110,8 +110,8 @@ async def _close_all() -> None: ctx = ctx_map.get(event.index) if ctx is not None and event.delta is not None: # Reconstruct the delta with parent_task_message set from - # the context's task_message (mirrors _langgraph_async.py - # lines 72-78 and 117-127). + # the context's task_message (mirrors the legacy + # _langgraph_async streaming helper, now in _langgraph_turn.py). delta_with_parent = StreamTaskMessageDelta( parent_task_message=ctx.task_message, delta=event.delta, diff --git a/src/agentex/lib/core/harness/span_derivation.py b/src/agentex/lib/core/harness/span_derivation.py index cecb24bcc..c0ed6ee90 100644 --- a/src/agentex/lib/core/harness/span_derivation.py +++ b/src/agentex/lib/core/harness/span_derivation.py @@ -19,6 +19,8 @@ ) from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent +from agentex.types.reasoning_content_delta import ReasoningContentDelta +from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta @dataclass @@ -51,6 +53,9 @@ class SpanDeriver: def __init__(self) -> None: self._tool_by_index: dict[int, _ToolReqMeta] = {} self._reasoning_index_open: set[int] = set() + # accumulated reasoning text per open reasoning index, recorded as the + # span output on close (deltas carry the chain-of-thought / summary text). + self._reasoning_text: dict[int, str] = {} # insertion-ordered set of open tool_call_ids (dict keys preserve order) self._open_tool_ids: dict[str, None] = {} @@ -72,8 +77,10 @@ def flush(self) -> list[SpanSignal]: signals.append(CloseSpan(key=tcid, output=None, is_complete=False)) self._open_tool_ids.clear() for idx in sorted(self._reasoning_index_open): - signals.append(CloseSpan(key=f"reasoning:{idx}", output=None, is_complete=False)) + text = self._reasoning_text.pop(idx, "") + signals.append(CloseSpan(key=f"reasoning:{idx}", output=text or None, is_complete=False)) self._reasoning_index_open.clear() + self._reasoning_text.clear() return signals def _on_start(self, event: StreamTaskMessageStart) -> list[SpanSignal]: @@ -90,6 +97,11 @@ def _on_start(self, event: StreamTaskMessageStart) -> list[SpanSignal]: return [] if content.type == "reasoning": self._reasoning_index_open.add(idx) + # Seed from any text already on the Start content — non-streaming + # harnesses may carry the full reasoning up front; deltas append. + summary = getattr(content, "summary", None) or [] + body = getattr(content, "content", None) or [] + self._reasoning_text[idx] = "".join([*summary, *body]) return [OpenSpan(key=f"reasoning:{idx}", kind="reasoning", name="reasoning", input={})] return [] @@ -102,6 +114,12 @@ def _on_delta(self, event: StreamTaskMessageDelta) -> list[SpanSignal]: meta = self._tool_by_index.get(idx) if meta is not None and delta.arguments_delta: meta.args_buf += delta.arguments_delta + elif isinstance(delta, ReasoningContentDelta): + if idx in self._reasoning_index_open and delta.content_delta: + self._reasoning_text[idx] = self._reasoning_text.get(idx, "") + delta.content_delta + elif isinstance(delta, ReasoningSummaryDelta): + if idx in self._reasoning_index_open and delta.summary_delta: + self._reasoning_text[idx] = self._reasoning_text.get(idx, "") + delta.summary_delta return [] def _on_full(self, event: StreamTaskMessageFull) -> list[SpanSignal]: @@ -150,5 +168,6 @@ def _on_done(self, event: StreamTaskMessageDone) -> list[SpanSignal]: return [OpenSpan(key=meta.tool_call_id, kind="tool", name=meta.name, input=args)] if idx in self._reasoning_index_open: self._reasoning_index_open.discard(idx) - return [CloseSpan(key=f"reasoning:{idx}", output=None, is_complete=True)] + text = self._reasoning_text.pop(idx, "") + return [CloseSpan(key=f"reasoning:{idx}", output=text or None, is_complete=True)] return [] diff --git a/src/agentex/lib/core/harness/tracer.py b/src/agentex/lib/core/harness/tracer.py index 4ca4d628b..bf37bad30 100644 --- a/src/agentex/lib/core/harness/tracer.py +++ b/src/agentex/lib/core/harness/tracer.py @@ -16,6 +16,21 @@ logger = logging.getLogger(__name__) +def _as_span_payload(value: Any, *, key: str) -> Any: + """Coerce a span input/output payload into a dict. + + The SGP spans API requires ``input`` and ``output`` to be objects: a scalar + or string is rejected with a 422 and the span is dropped by the async + processor. The SpanDeriver legitimately produces non-dict payloads — the + reasoning span's output is the chain-of-thought string, and some harnesses' + tool results are plain strings — so wrap anything that isn't already a dict + (``None`` passes through unchanged so an absent payload stays absent). + """ + if value is None or isinstance(value, dict): + return value + return {key: value} + + class SpanTracer: """Opens/closes adk.tracing child spans in response to span signals. @@ -24,7 +39,7 @@ class SpanTracer: The real TracingModule.end_span does NOT accept an output kwarg — output is recorded by mutating span.output before calling end_span, matching the pattern - used throughout the codebase (see _langgraph_tracing.py on_tool_end etc.). + used throughout the codebase. Span-lifecycle contract: the `_open` dict (span key -> span object) is scoped to a single turn. Pairing is by `key`: @@ -60,7 +75,7 @@ async def handle(self, signal: SpanSignal) -> None: span = await self._tracing.start_span( trace_id=self.trace_id, name=signal.name, - input=signal.input, + input=_as_span_payload(signal.input, key="input"), parent_id=self.parent_span_id, task_id=self.task_id, ) @@ -73,7 +88,7 @@ async def handle(self, signal: SpanSignal) -> None: # The real TracingModule.end_span signature is: # end_span(trace_id, span, start_to_close_timeout, heartbeat_timeout, retry_policy) # It does not accept an output= kwarg. - span.output = signal.output + span.output = _as_span_payload(signal.output, key="output") # Tool failure status (ToolResponseContent.is_error) is recorded # on span.data when the harness reports one; Span has no dedicated # error field. None means no status was reported, so leave data alone. diff --git a/src/agentex/lib/core/services/adk/providers/openai.py b/src/agentex/lib/core/services/adk/providers/openai.py index 1ae29589d..a2513ea01 100644 --- a/src/agentex/lib/core/services/adk/providers/openai.py +++ b/src/agentex/lib/core/services/adk/providers/openai.py @@ -742,11 +742,10 @@ async def run_agent_streamed_auto_send( ) as span: heartbeat_if_in_workflow("run agent streamed auto send") - # AGX1-378 restored: created_at is now threaded through - # UnifiedEmitter.auto_send_turn -> auto_send -> every - # streaming_task_message_context call, so the first agent message of - # the turn is stamped with the workflow-supplied timestamp (e.g. - # workflow.now()) just as the original inline loop did. + # created_at is threaded through UnifiedEmitter.auto_send_turn -> + # auto_send -> every streaming_task_message_context call, so the + # first agent message of the turn is stamped with the + # workflow-supplied timestamp (e.g. workflow.now()). # The dispenser is still used below for guardrail-rejection messages, # which open their own streaming contexts directly. _take_created_at = _make_created_at_dispenser(created_at) diff --git a/src/agentex/lib/core/temporal/plugins/claude_agents/__init__.py b/src/agentex/lib/core/temporal/plugins/claude_agents/__init__.py index fd40545ec..1e9ee694a 100644 --- a/src/agentex/lib/core/temporal/plugins/claude_agents/__init__.py +++ b/src/agentex/lib/core/temporal/plugins/claude_agents/__init__.py @@ -1,5 +1,16 @@ """Claude Agents SDK integration with Temporal. +.. deprecated:: + This is the original Claude Code integration: it drives the Python + ``claude-agent-sdk`` directly and hand-rolls its own streaming + tracing + (and does not derive reasoning spans). It is superseded by the unified + harness tap (``agentex.lib.adk.ClaudeCodeTurn`` over the ``claude -p + --output-format stream-json`` CLI stdout, delivered via ``UnifiedEmitter``), + which routes Claude Code through the same canonical ``StreamTaskMessage*`` + stream as every other harness. It still works, but new agents should use the + tap and existing ones should plan to migrate; see + ``adk/docs/migration-0.16.0.md`` for the before/after. + This plugin provides integration between Claude Agents SDK and AgentEx's Temporal-based orchestration platform. diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/__init__.py b/src/agentex/lib/core/temporal/plugins/openai_agents/__init__.py index def67c9af..7d81b37d0 100644 --- a/src/agentex/lib/core/temporal/plugins/openai_agents/__init__.py +++ b/src/agentex/lib/core/temporal/plugins/openai_agents/__init__.py @@ -51,6 +51,10 @@ - No forked plugin required - uses standard OpenAIAgentsPlugin """ +from agentex.lib.core.temporal.plugins.openai_agents.run import ( + OpenAIAgentsTurnResult, + run_turn, +) from agentex.lib.core.temporal.plugins.openai_agents.hooks.hooks import ( TemporalStreamingHooks, ) @@ -81,4 +85,6 @@ "streaming_parent_span_id", "TemporalStreamingHooks", "stream_lifecycle_content", -] \ No newline at end of file + "run_turn", + "OpenAIAgentsTurnResult", +] diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/hooks/hooks.py b/src/agentex/lib/core/temporal/plugins/openai_agents/hooks/hooks.py index 758b0db27..30d358cc9 100644 --- a/src/agentex/lib/core/temporal/plugins/openai_agents/hooks/hooks.py +++ b/src/agentex/lib/core/temporal/plugins/openai_agents/hooks/hooks.py @@ -1,9 +1,30 @@ """Temporal streaming hooks for OpenAI Agents SDK lifecycle events. This module provides a convenience class for streaming agent lifecycle events -to the AgentEx UI via Temporal activities. +to the AgentEx UI via Temporal activities, and (optionally) tracing tool calls +to SGP with both inputs and outputs. + +Two responsibilities, independently switchable: + +1. UI message emission, split into tool requests / tool responses / handoffs + (each default True). Leave all on for the non-streaming model provider, which + does not emit them itself. When pairing with ``TemporalStreamingModelProvider`` + set ``emit_tool_requests=False`` — that model already streams the tool REQUEST + from the model output, so emitting it here double-posts. But keep + ``emit_tool_responses=True``: the streaming model does NOT emit a function + tool's response, so ``on_tool_end`` is its only source (disabling it makes the + tool-call "done" events vanish). ``run_turn`` wires this correctly for you. + +2. SGP tracing (enabled when ``trace_id`` is provided): opens a span named after + the tool on tool start with the tool ARGUMENTS as its input and closes it on + tool end with the result as its output, parented to ``parent_span_id``. Token + usage metrics are always emitted via ``LLMMetricsHooks`` regardless of these + flags. """ +from __future__ import annotations + +import json import logging from typing import Any, override from datetime import timedelta @@ -19,6 +40,23 @@ logger = logging.getLogger(__name__) +# Best-effort tracing budget — a tracing outage must never break a tool call. +_TRACE_TIMEOUT = timedelta(seconds=5) +# Cap tool-result span output so a large payload can't bloat the trace. +_MAX_SPAN_OUTPUT_CHARS = 2000 + + +def _get_adk() -> Any: + """Lazily import the adk facade for workflow-safe tracing. + + Kept lazy (not a module-level import) so this core hooks module does not pull + the full adk surface — and its optional deps — at import time. Only invoked + when a tool span is actually created (i.e. when ``trace_id`` is set). + """ + from agentex.lib import adk + + return adk + class TemporalStreamingHooks(LLMMetricsHooks): """Convenience hooks class for streaming OpenAI Agent lifecycle events to the AgentEx UI. @@ -27,10 +65,14 @@ class TemporalStreamingHooks(LLMMetricsHooks): AgentEx UI via Temporal activities. It subclasses the OpenAI Agents SDK's RunHooks to intercept lifecycle events and forward them for real-time UI updates. - Lifecycle events streamed: - - Tool requests (on_tool_start): Streams when a tool is about to be invoked - - Tool responses (on_tool_end): Streams the tool's execution result - - Agent handoffs (on_handoff): Streams when control transfers between agents + Lifecycle events streamed (each gated by its own flag, all default True): + - Tool requests (on_tool_start, ``emit_tool_requests``): when a tool is invoked + - Tool responses (on_tool_end, ``emit_tool_responses``): the tool's result + - Agent handoffs (on_handoff, ``emit_handoffs``): when control transfers + + Tracing (when ``trace_id`` is provided): + - One SGP span per tool call, named after the tool, with the tool + arguments as the span input and the tool result as the span output. Usage: Basic usage - streams all lifecycle events:: @@ -40,6 +82,18 @@ class TemporalStreamingHooks(LLMMetricsHooks): hooks = TemporalStreamingHooks(task_id="abc123") result = await Runner.run(agent, input, hooks=hooks) + Paired with the streaming model provider (it already streams the tool + REQUEST, so suppress that here — but keep responses, which the model does + not emit). Prefer ``run_turn`` which wires this for you:: + + hooks = TemporalStreamingHooks( + task_id="abc123", + emit_tool_requests=False, + emit_tool_responses=True, + trace_id=trace_id, + parent_span_id=parent_span_id, + ) + Advanced - subclass for custom behavior:: class MyCustomHooks(TemporalStreamingHooks): @@ -62,22 +116,82 @@ async def on_agent_start(self, context, agent): Attributes: task_id: The AgentEx task ID for routing streamed events timeout: Timeout for streaming activity calls (default: 10 seconds) + emit_tool_requests: Whether to stream the ToolRequestContent on tool start + emit_tool_responses: Whether to stream the ToolResponseContent on tool end + emit_handoffs: Whether to stream the handoff text message + trace_id: When set, tool calls are traced to SGP (input + output) + parent_span_id: Parent span for the per-tool spans """ def __init__( self, task_id: str, timeout: timedelta = timedelta(seconds=10), + *, + emit_tool_requests: bool = True, + emit_tool_responses: bool = True, + emit_handoffs: bool = True, + trace_id: str | None = None, + parent_span_id: str | None = None, ): """Initialize the streaming hooks. + Request and response emission are independently switchable because the + ``TemporalStreamingModelProvider`` emits a function tool's REQUEST from + the model output but NOT its response — the function result only ever + surfaces here via ``on_tool_end``. So when pairing with that provider, + set ``emit_tool_requests=False`` (the model already posted the request) + but keep ``emit_tool_responses=True`` (otherwise the tool-call "done" + events disappear). ``run_turn`` wires this correctly for you. + Args: task_id: AgentEx task ID for routing streamed events to the correct UI session timeout: Timeout for streaming activity invocations (default: 10 seconds) + emit_tool_requests: When True (default) stream a ToolRequestContent on + tool start. Set False when a streaming model provider already + emits the request, to avoid double-posting it. + emit_tool_responses: When True (default) stream a ToolResponseContent + on tool end. Keep True with the streaming model provider — it does + NOT emit function-tool responses, so this is their only source. + emit_handoffs: When True (default) stream a handoff text message. + trace_id: When provided, open an SGP span per tool call (named after + the tool) with the arguments as input and the result as output. When None, + no tool spans are created (token-usage metrics still emit). + parent_span_id: Parent span id the per-tool spans attach to. """ super().__init__() self.task_id = task_id self.timeout = timeout + self.emit_tool_requests = emit_tool_requests + self.emit_tool_responses = emit_tool_responses + self.emit_handoffs = emit_handoffs + self.trace_id = trace_id + self.parent_span_id = parent_span_id + # tool_call_id -> open SGP span, so on_tool_end closes the right one. + self._tool_spans: dict[str, Any] = {} + + @staticmethod + def _tool_call_id(context: RunContextWrapper, tool: Tool) -> str: + tool_context = context if isinstance(context, ToolContext) else None + return getattr(tool_context, "tool_call_id", None) or f"call_{id(tool)}" + + @staticmethod + def _parse_tool_arguments(context: RunContextWrapper) -> dict[str, Any]: + """Parse the JSON ``tool_arguments`` off a ToolContext into a dict. + + Returns an empty dict for a non-ToolContext or unparseable arguments — + a tool call must never fail because its args could not be displayed. + """ + tool_context = context if isinstance(context, ToolContext) else None + raw = getattr(tool_context, "tool_arguments", None) + if not raw: + return {} + try: + parsed = json.loads(raw) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse tool arguments: {raw!r}") + return {} + return parsed if isinstance(parsed, dict) else {"value": parsed} @override async def on_agent_start(self, context: RunContextWrapper, agent: Agent) -> None: # noqa: ARG002 @@ -102,100 +216,101 @@ async def on_agent_end(self, context: RunContextWrapper, agent: Agent, output: A agent: The agent that completed output: The agent's output """ - logger.debug(f"[TemporalStreamingHooks] Agent '{agent.name}' completed execution with output type: {type(output).__name__}") + logger.debug( + f"[TemporalStreamingHooks] Agent '{agent.name}' completed execution with output type: {type(output).__name__}" + ) @override async def on_tool_start(self, context: RunContextWrapper, agent: Agent, tool: Tool) -> None: # noqa: ARG002 - """Stream tool request when a tool starts execution. + """Stream the tool request (optional) and open a traced span (optional). - Extracts the tool_call_id and tool_arguments from the context and streams a - ToolRequestContent message to the UI showing that the tool is about to execute. + Streams a ToolRequestContent message when ``emit_tool_requests`` is True, + and opens an SGP span named after the tool (input = arguments) when + ``trace_id`` is set. Both read the same parsed arguments. Args: - context: The run context wrapper (will be a ToolContext with tool_call_id and tool_arguments) + context: The run context wrapper (a ToolContext with tool_call_id and tool_arguments) agent: The agent executing the tool tool: The tool being executed """ - import json - - tool_context = context if isinstance(context, ToolContext) else None - tool_call_id = tool_context.tool_call_id if tool_context else f"call_{id(tool)}" - - # Extract tool arguments from context - tool_arguments = {} - if tool_context and hasattr(tool_context, 'tool_arguments'): - try: - # tool_arguments is a JSON string, parse it - tool_arguments = json.loads(tool_context.tool_arguments) - except (json.JSONDecodeError, TypeError): - # If parsing fails, log and use empty dict - logger.warning(f"Failed to parse tool arguments: {tool_context.tool_arguments}") - tool_arguments = {} - - await workflow.execute_activity( - stream_lifecycle_content, - args=[ - self.task_id, - ToolRequestContent( - author="agent", - tool_call_id=tool_call_id, - name=tool.name, - arguments=tool_arguments, - ).model_dump(), - ], - start_to_close_timeout=self.timeout, - ) + tool_call_id = self._tool_call_id(context, tool) + tool_arguments = self._parse_tool_arguments(context) + + if self.emit_tool_requests: + await workflow.execute_activity( + stream_lifecycle_content, + args=[ + self.task_id, + ToolRequestContent( + author="agent", + tool_call_id=tool_call_id, + name=tool.name, + arguments=tool_arguments, + ).model_dump(), + ], + start_to_close_timeout=self.timeout, + ) + + await self._maybe_start_tool_span(tool_call_id, tool.name, tool_arguments) @override async def on_tool_end( - self, context: RunContextWrapper, agent: Agent, tool: Tool, result: str # noqa: ARG002 + self, + context: RunContextWrapper, + agent: Agent, # noqa: ARG002 + tool: Tool, + result: str, ) -> None: - """Stream tool response when a tool completes execution. + """Stream the tool response (optional) and close the traced span (optional). - Extracts the tool_call_id and streams a ToolResponseContent message to the UI - showing the tool's execution result. + Streams a ToolResponseContent message when ``emit_tool_responses`` is True, + and closes the matching tool span (output = result) when one was + opened in on_tool_start. Args: - context: The run context wrapper (will be a ToolContext with tool_call_id) + context: The run context wrapper (a ToolContext with tool_call_id) agent: The agent that executed the tool tool: The tool that was executed result: The tool's execution result """ - tool_context = context if isinstance(context, ToolContext) else None - tool_call_id = ( - getattr(tool_context, "tool_call_id", f"call_{id(tool)}") - if tool_context - else f"call_{id(tool)}" - ) - - await workflow.execute_activity( - stream_lifecycle_content, - args=[ - self.task_id, - ToolResponseContent( - author="agent", - tool_call_id=tool_call_id, - name=tool.name, - content=result, - ).model_dump(), - ], - start_to_close_timeout=self.timeout, - ) + tool_call_id = self._tool_call_id(context, tool) + + if self.emit_tool_responses: + await workflow.execute_activity( + stream_lifecycle_content, + args=[ + self.task_id, + ToolResponseContent( + author="agent", + tool_call_id=tool_call_id, + name=tool.name, + content=result, + ).model_dump(), + ], + start_to_close_timeout=self.timeout, + ) + + await self._maybe_end_tool_span(tool_call_id, result) @override async def on_handoff( - self, context: RunContextWrapper, from_agent: Agent, to_agent: Agent # noqa: ARG002 + self, + context: RunContextWrapper, + from_agent: Agent, + to_agent: Agent, # noqa: ARG002 ) -> None: """Stream handoff message when control transfers between agents. Sends a text message to the UI indicating that one agent is handing off - to another agent. + to another agent. No-op when ``emit_handoffs`` is False. Args: context: The run context wrapper from_agent: The agent transferring control to_agent: The agent receiving control """ + if not self.emit_handoffs: + return await workflow.execute_activity( stream_lifecycle_content, args=[ @@ -208,3 +323,73 @@ async def on_handoff( ], start_to_close_timeout=self.timeout, ) + + async def _maybe_start_tool_span(self, tool_call_id: str, tool_name: str, arguments: dict[str, Any]) -> None: + """Open a span named after the tool with the arguments as input. + + The span name is the bare ``tool_name`` (no prefix) to match the shared + unified-harness span reducer (``core/harness/span_derivation.py``), so + OpenAI Temporal traces look the same as every other harness. + + Best-effort: tracing must never break a tool call, so any failure is + logged and swallowed. No-op when ``trace_id`` is not set. + """ + if not self.trace_id: + return + try: + span = await _get_adk().tracing.start_span( + trace_id=self.trace_id, + parent_id=self.parent_span_id, + name=tool_name, + input={"arguments": arguments}, + start_to_close_timeout=_TRACE_TIMEOUT, + ) + if span is not None: + self._tool_spans[tool_call_id] = span + except Exception as e: # noqa: BLE001 - tracing is best-effort + logger.warning(f"[tracing] tool start_span failed (non-fatal): {e}") + + async def _maybe_end_tool_span(self, tool_call_id: str, result: Any) -> None: + """Close the span opened for ``tool_call_id`` with the result as output.""" + span = self._tool_spans.pop(tool_call_id, None) + if span is None or not self.trace_id: + return + try: + span.output = {"result": str(result)[:_MAX_SPAN_OUTPUT_CHARS]} + await _get_adk().tracing.end_span( + trace_id=self.trace_id, + span=span, + start_to_close_timeout=_TRACE_TIMEOUT, + ) + except Exception as e: # noqa: BLE001 - tracing is best-effort + logger.warning(f"[tracing] tool end_span failed (non-fatal): {e}") + + async def close_open_tool_spans(self) -> None: + """Close any tool spans still open because ``on_tool_end`` never fired. + + ``on_tool_start`` opens a span that ``on_tool_end`` is expected to close. + If the runner terminates mid-tool (max-turns exceeded, cancellation, an + unexpected SDK exception), the matching ``on_tool_end`` never runs and the + span would otherwise stay open forever — orphaned in the tracing backend. + Call this from a ``finally`` around ``Runner.run`` to drain the leftovers. + + Best-effort, like the rest of tracing: each span is closed with an + ``incomplete`` marker and any failure is logged and swallowed. + """ + if not self._tool_spans: + return + orphaned = list(self._tool_spans.items()) + self._tool_spans.clear() + for tool_call_id, span in orphaned: + logger.warning( + f"[tracing] tool span for {tool_call_id} left open (on_tool_end never fired); closing as incomplete" + ) + try: + span.output = {"result": None, "status": "incomplete"} + await _get_adk().tracing.end_span( + trace_id=self.trace_id, + span=span, + start_to_close_timeout=_TRACE_TIMEOUT, + ) + except Exception as e: # noqa: BLE001 - tracing is best-effort + logger.warning(f"[tracing] orphan tool end_span failed (non-fatal): {e}") diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py index 75dc0f053..7c8690f21 100644 --- a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py +++ b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py @@ -66,7 +66,7 @@ from agentex.lib.core.tracing.tracer import AsyncTracer from agentex.types.task_message_delta import TextDelta, ToolRequestDelta, ReasoningContentDelta, ReasoningSummaryDelta from agentex.types.task_message_update import StreamTaskMessageFull, StreamTaskMessageDelta -from agentex.types.task_message_content import TextContent, ReasoningContent, ToolRequestContent +from agentex.types.task_message_content import TextContent, ReasoningContent, ToolRequestContent, ToolResponseContent from agentex.lib.adk.utils._modules.client import create_async_agentex_client from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ( streaming_task_id, @@ -123,6 +123,103 @@ def _serialize_item(item: Any) -> dict[str, Any]: return item_dict +# Responses-API output items for server-side / hosted tools. These execute inside +# the Responses API, so they never become function_call items AND the SDK's +# RunHooks (on_tool_start/on_tool_end) never fire for them. The streaming loop +# must surface them explicitly, as a tool request + response pair, when the item +# completes (by then it carries the full query/result). +_HOSTED_TOOL_TYPES = frozenset( + { + "web_search_call", + "file_search_call", + "code_interpreter_call", + "image_generation_call", + "mcp_call", + "computer_call", + "local_shell_call", + } +) + +# Cap on the rendered hosted-tool result string (UI / trace readability). +_HOSTED_TOOL_RESULT_CAP = 2000 + + +def _coerce_args(raw: Any) -> dict[str, Any]: + """Best-effort coerce a hosted-tool's arguments to a dict for the UI.""" + if raw is None: + return {} + if isinstance(raw, dict): + return raw + if isinstance(raw, str): + try: + parsed = json.loads(raw) + return parsed if isinstance(parsed, dict) else {"value": parsed} + except (json.JSONDecodeError, ValueError): + return {"raw": raw} + serialized = _serialize_item(raw) + return serialized if isinstance(serialized, dict) else {"value": str(raw)} + + +def _hosted_tool_request(item: Any) -> tuple[str, str, dict[str, Any]]: + """Extract (call_id, display_name, arguments) from a hosted-tool item.""" + itype = getattr(item, "type", "") or "" + call_id = ( + getattr(item, "id", "") + or getattr(item, "call_id", "") + or f"hosted_{uuid.uuid4().hex[:8]}" + ) + name = itype[:-5] if itype.endswith("_call") else itype # web_search_call -> web_search + args: dict[str, Any] = {} + if itype == "web_search_call": + action = getattr(item, "action", None) + if action is not None: + args = _coerce_args(action) + elif itype == "file_search_call": + args = {"queries": list(getattr(item, "queries", []) or [])} + elif itype == "code_interpreter_call": + args = {"code": getattr(item, "code", "") or ""} + elif itype in ("computer_call", "local_shell_call"): + # Both carry an `action` object: a ComputerAction (click/scroll/type/...) + # or a LocalShellCallAction (command/env/cwd). Surface it as the args so + # the trace shows what the tool actually did, not just its status. + action = getattr(item, "action", None) + if action is not None: + args = _coerce_args(action) + elif itype == "mcp_call": + mcp_name = getattr(item, "name", None) or "mcp" + server = getattr(item, "server_label", None) + name = f"{server}.{mcp_name}" if server else mcp_name + args = _coerce_args(getattr(item, "arguments", None)) + return call_id, name, args + + +def _hosted_tool_result(item: Any) -> str: + """Extract a short result string from a completed hosted-tool item.""" + itype = getattr(item, "type", "") or "" + if itype == "mcp_call": + err = getattr(item, "error", None) + if err: + return f"error: {err}" + out = getattr(item, "output", None) + if out: + return str(out) + elif itype == "code_interpreter_call": + outputs = getattr(item, "outputs", None) + if outputs: + return json.dumps([_serialize_item(o) for o in outputs])[:_HOSTED_TOOL_RESULT_CAP] + elif itype == "file_search_call": + results = getattr(item, "results", None) + if results: + return json.dumps([_serialize_item(r) for r in results])[:_HOSTED_TOOL_RESULT_CAP] + elif itype == "image_generation_call": + # `result` is base64 image data; surface a compact reference instead of + # dumping the (large) payload into the trace. + result = getattr(item, "result", None) + if result: + return f"" + return str(getattr(item, "status", "completed") or "completed") + + class TemporalStreamingModel(Model): """Custom model implementation with streaming support.""" @@ -481,6 +578,31 @@ def _convert_tool_choice(self, tool_choice: Any) -> Any: # Pass through as-is for other types return tool_choice + async def _post_tool_message(self, task_id: str, content: Any) -> None: + """Post a one-shot tool request/response message (no deltas). + + Used for hosted/server-side tool calls (web_search, file_search, + code_interpreter, image generation, server-side mcp, ...) that execute + inside the Responses API and so never produce function_call items or fire + RunHooks. Each completed hosted tool is surfaced as a ToolRequestContent + + ToolResponseContent pair. Posting full (no deltas) means the coalescing + path that the streamed reasoning/text contexts use does not apply here. + """ + try: + async with adk.streaming.streaming_task_message_context( + task_id=task_id, + initial_content=content, + ) as ctx: + await ctx.stream_update( + StreamTaskMessageFull( + parent_task_message=ctx.task_message, + content=content, + type="full", + ) + ) + except Exception as e: # noqa: BLE001 - UI surfacing must never break a turn + logger.warning(f"[TemporalStreamingModel] failed to post hosted-tool message: {e}") + @override async def get_response( self, @@ -942,6 +1064,36 @@ async def get_response( finally: call_data['context'] = None + elif item and getattr(item, 'type', None) in _HOSTED_TOOL_TYPES: + # Hosted / server-side tool call (web_search, file_search, + # code_interpreter, image generation, server-side mcp, ...). + # These run inside the Responses API: no function_call item + # and no RunHooks fire, so surface the completed call as a + # tool request + response pair (it carries the full + # query/result by the time it's done). + call_id, name, args = _hosted_tool_request(item) + await self._post_tool_message( + task_id, + ToolRequestContent( + author="agent", + tool_call_id=call_id, + name=name, + arguments=args, + ), + ) + await self._post_tool_message( + task_id, + ToolResponseContent( + author="agent", + tool_call_id=call_id, + name=name, + # Plain string, matching the function-tool response + # path (hooks.on_tool_end) so hosted and function + # tools render identically in the same flow. + content=_hosted_tool_result(item)[:_HOSTED_TOOL_RESULT_CAP], + ), + ) + elif isinstance(event, ResponseReasoningSummaryPartAddedEvent): # New reasoning part/summary started - reset accumulator part = getattr(event, 'part', None) diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/run.py b/src/agentex/lib/core/temporal/plugins/openai_agents/run.py new file mode 100644 index 000000000..0fb21bfe4 --- /dev/null +++ b/src/agentex/lib/core/temporal/plugins/openai_agents/run.py @@ -0,0 +1,161 @@ +"""``run_turn`` — the unified entry point for the OpenAI Agents Temporal harness. + +This is the ``Runner.run`` analogue of the CLI harness's +``UnifiedEmitter.auto_send_turn``: it owns the repeatable per-turn concerns so +agents don't hand-roll them. + +What it does: + +1. Runs the agent via ``Runner.run`` with hooks that emit each tool call exactly + ONCE. The ``TemporalStreamingModelProvider`` already streams the tool-call + message from the model output, so the hooks are wired with + ``emit_messages=False`` to avoid the double-post; they still trace tool calls + (input + output) and emit token-usage metrics. +2. Normalizes token usage off the run result into a harness-independent + ``TurnUsage`` so callers can attach it to the turn span / task metadata, + matching what the CLI harness reports. + +What it deliberately does NOT do: sandboxing. Sandbox provisioning is a +composable concern carried on ``RunConfig`` (the SDK's ``SandboxRunConfig``) and +is passed straight through. Agent-specific lifecycle UI (e.g. surfacing sandbox +provisioning as a tool card) belongs in a caller-supplied ``hooks`` subclass, +not here. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any +from dataclasses import dataclass + +from agents import Runner + +from agentex.lib.utils.logging import make_logger +from agentex.lib.core.harness.types import TurnUsage +from agentex.lib.core.temporal.plugins.openai_agents.hooks.hooks import TemporalStreamingHooks + +if TYPE_CHECKING: + from agents import RunHooks, RunConfig + from agents.result import RunResult + +logger = make_logger(__name__) + +# Mirror the OpenAI Agents SDK default; callers can override per turn. +_DEFAULT_MAX_TURNS = 10 + + +@dataclass +class OpenAIAgentsTurnResult: + """The raw SDK run result plus normalized agentex usage. + + The raw ``result`` is kept so callers retain ``final_output``, + ``to_input_list()`` and any provider extras (e.g. sandbox resume state); + ``usage`` is the harness-independent token/cost summary for the turn span. + """ + + result: "RunResult" + usage: TurnUsage + + @property + def final_output(self) -> Any: + return self.result.final_output + + +def _extract_turn_usage(result: "RunResult", *, model: str | None = None) -> TurnUsage: + """Map the SDK's aggregated ``context_wrapper.usage`` onto ``TurnUsage``. + + Tolerant of a missing/partial Usage shape (non-OpenAI providers routed via + litellm may omit the nested token details) — absent fields stay None. + """ + usage = getattr(getattr(result, "context_wrapper", None), "usage", None) + if usage is None: + return TurnUsage(model=model) + + input_details = getattr(usage, "input_tokens_details", None) + output_details = getattr(usage, "output_tokens_details", None) + return TurnUsage( + model=model, + input_tokens=getattr(usage, "input_tokens", None), + output_tokens=getattr(usage, "output_tokens", None), + total_tokens=getattr(usage, "total_tokens", None), + cached_input_tokens=getattr(input_details, "cached_tokens", None), + reasoning_tokens=getattr(output_details, "reasoning_tokens", None), + num_llm_calls=getattr(usage, "requests", None), + ) + + +async def run_turn( + starting_agent: Any, + input: Any, + *, + task_id: str, + trace_id: str | None = None, + parent_span_id: str | None = None, + run_config: "RunConfig | None" = None, + hooks: "RunHooks | None" = None, + model: str | None = None, + max_turns: int = _DEFAULT_MAX_TURNS, +) -> OpenAIAgentsTurnResult: + """Run one agent turn and return the result plus normalized usage. + + Args: + starting_agent: The agent to run. + input: The input list / string passed to ``Runner.run``. + task_id: AgentEx task id for streaming. + trace_id: When set, tool calls are traced to SGP (input + output). Only + applied when ``hooks`` is omitted (it flows into the default + ``TemporalStreamingHooks``). Ignored when you pass your own ``hooks`` + — see ``hooks`` below. + parent_span_id: Parent span for the per-tool spans (typically the turn + span). Same caveat as ``trace_id``: only applied to the default hooks. + run_config: Forwarded to ``Runner.run`` verbatim (carries the model + provider and any ``SandboxRunConfig``). Left untouched here. + hooks: Optional hooks override. When omitted, a default + ``TemporalStreamingHooks(emit_tool_requests=False, ...)`` is used so + the streaming model is the sole tool-REQUEST emitter while the hooks + still emit tool RESPONSES (the model does not), and ``trace_id`` / + ``parent_span_id`` are forwarded into it. When you pass your own + subclass (also with ``emit_tool_requests=False``) to add agent-specific + lifecycle behavior such as a sandbox-ready card, ``trace_id`` and + ``parent_span_id`` are NOT applied for you — pass them to your + subclass's constructor yourself if you want tool spans traced. + model: Model name recorded on the returned usage; derived from the agent + when not supplied. + max_turns: Forwarded to ``Runner.run``. + + Returns: + OpenAIAgentsTurnResult with the raw run result and normalized usage. + """ + if hooks is None: + hooks = TemporalStreamingHooks( + task_id=task_id, + # The streaming model already posts the tool REQUEST, so suppress it + # here (no double-post) — but keep responses, which the model does not + # emit for function tools (on_tool_end is their only source). + emit_tool_requests=False, + emit_tool_responses=True, + trace_id=trace_id, + parent_span_id=parent_span_id, + ) + + run_kwargs: dict[str, Any] = {"hooks": hooks, "max_turns": max_turns} + if run_config is not None: + run_kwargs["run_config"] = run_config + + try: + result = await Runner.run(starting_agent, input, **run_kwargs) + finally: + # If the runner terminated mid-tool (max-turns, cancellation, SDK error), + # on_tool_end never fired for the in-flight call, leaving its span open. + # Drain any leftovers so they don't orphan in the tracing backend. + if isinstance(hooks, TemporalStreamingHooks): + await hooks.close_open_tool_spans() + + resolved_model = model + if resolved_model is None: + agent_model = getattr(starting_agent, "model", None) + resolved_model = str(agent_model) if agent_model else None + + return OpenAIAgentsTurnResult( + result=result, + usage=_extract_turn_usage(result, model=resolved_model), + ) diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_hosted_tools.py b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_hosted_tools.py new file mode 100644 index 000000000..066d6f2ed --- /dev/null +++ b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_hosted_tools.py @@ -0,0 +1,135 @@ +"""Unit tests for hosted/server-side tool rendering helpers. + +These cover the pure extraction helpers used by TemporalStreamingModel to surface +Responses-API hosted tools (web_search, file_search, code_interpreter, mcp, ...) +as ToolRequest/ToolResponse pairs. They never become function_call items, so the +streaming loop must render them explicitly. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +from openai.types.responses.response_output_item import ( + LocalShellCall, + ImageGenerationCall, + LocalShellCallAction, +) +from openai.types.responses.response_computer_tool_call import ActionClick, ResponseComputerToolCall +from openai.types.responses.response_function_web_search import ActionSearch, ResponseFunctionWebSearch + +from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import ( + _HOSTED_TOOL_TYPES, + _coerce_args, + _hosted_tool_result, + _hosted_tool_request, +) + + +def test_hosted_tool_types_membership(): + for t in ("web_search_call", "file_search_call", "code_interpreter_call", + "image_generation_call", "mcp_call", "computer_call", "local_shell_call"): + assert t in _HOSTED_TOOL_TYPES + assert "function_call" not in _HOSTED_TOOL_TYPES + + +def test_coerce_args_variants(): + assert _coerce_args(None) == {} + assert _coerce_args({"a": 1}) == {"a": 1} + assert _coerce_args('{"a": 1}') == {"a": 1} + assert _coerce_args("[1, 2]") == {"value": [1, 2]} + assert _coerce_args("not json") == {"raw": "not json"} + + +def test_hosted_tool_request_web_search(): + # Use the real Responses-API type to prove `action` is a genuine SDK field + # (it is on ResponseFunctionWebSearch), not a hand-crafted stand-in. + item = ResponseFunctionWebSearch( + id="ws_1", + status="completed", + type="web_search_call", + action=ActionSearch(type="search", query="agentex"), + ) + call_id, name, args = _hosted_tool_request(item) + assert call_id == "ws_1" + assert name == "web_search" # "_call" stripped + assert args["query"] == "agentex" + assert args["type"] == "search" + + +def test_hosted_tool_request_computer_call(): + item = ResponseComputerToolCall( + id="cc_1", + call_id="ccall_1", + type="computer_call", + status="completed", + pending_safety_checks=[], + action=ActionClick(type="click", button="left", x=10, y=20), + ) + call_id, name, args = _hosted_tool_request(item) + assert call_id == "cc_1" + assert name == "computer" + assert args["type"] == "click" + assert args["button"] == "left" + assert args["x"] == 10 and args["y"] == 20 + + +def test_hosted_tool_request_local_shell_call(): + item = LocalShellCall( + id="ls_1", + call_id="lscall_1", + type="local_shell_call", + status="completed", + action=LocalShellCallAction(type="exec", command=["ls", "-la"], env={}), + ) + call_id, name, args = _hosted_tool_request(item) + assert call_id == "ls_1" + assert name == "local_shell" + assert args["command"] == ["ls", "-la"] + + +def test_hosted_tool_request_mcp_uses_server_label(): + item = SimpleNamespace(type="mcp_call", id="m_1", name="search", + server_label="linear", arguments='{"q": "x"}') + call_id, name, args = _hosted_tool_request(item) + assert call_id == "m_1" + assert name == "linear.search" + assert args == {"q": "x"} + + +def test_hosted_tool_request_file_search_queries(): + item = SimpleNamespace(type="file_search_call", id="fs_1", + queries=["q1", "q2"]) + _, name, args = _hosted_tool_request(item) + assert name == "file_search" + assert args == {"queries": ["q1", "q2"]} + + +def test_hosted_tool_request_falls_back_to_generated_id(): + item = SimpleNamespace(type="code_interpreter_call", code="print(1)") + call_id, name, args = _hosted_tool_request(item) + assert call_id.startswith("hosted_") + assert name == "code_interpreter" + assert args == {"code": "print(1)"} + + +def test_hosted_tool_result_mcp_error_and_output(): + err_item = SimpleNamespace(type="mcp_call", error="boom") + assert "boom" in _hosted_tool_result(err_item) + ok_item = SimpleNamespace(type="mcp_call", error=None, output="done") + assert _hosted_tool_result(ok_item) == "done" + + +def test_hosted_tool_result_image_generation(): + item = ImageGenerationCall( + id="ig_1", + type="image_generation_call", + status="completed", + result="QUJD", # 4 chars of (fake) base64 + ) + assert _hosted_tool_result(item) == "" + + +def test_hosted_tool_result_falls_back_to_status(): + item = SimpleNamespace(type="web_search_call", status="completed") + assert _hosted_tool_result(item) == "completed" diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_run_turn_and_hooks.py b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_run_turn_and_hooks.py new file mode 100644 index 000000000..244182ac5 --- /dev/null +++ b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_run_turn_and_hooks.py @@ -0,0 +1,247 @@ +"""Tests for the unified OpenAI-Agents turn surface. + +Covers: +- ``TemporalStreamingHooks`` message-emission gating (``emit_messages``), so the + streaming model can be the sole tool-message emitter (no double-post). +- ``TemporalStreamingHooks`` input-bearing tool spans (input = arguments, + output = result) when a ``trace_id`` is provided. +- ``run_turn`` usage extraction and default-hooks wiring. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest +from agents.tool_context import ToolContext + +from agentex.lib.core.temporal.plugins.openai_agents import run as run_mod +from agentex.lib.core.temporal.plugins.openai_agents.hooks import hooks as hooks_mod + +TemporalStreamingHooks = hooks_mod.TemporalStreamingHooks + + +def _tool_context(args: str = '{"query": "hi"}') -> ToolContext: + return ToolContext( + context=None, + tool_name="search", + tool_call_id="call_abc", + tool_arguments=args, + ) + + +def _tool() -> MagicMock: + tool = MagicMock() + tool.name = "search" + return tool + + +# --------------------------------------------------------------------------- # +# Argument parsing +# --------------------------------------------------------------------------- # + + +def test_parse_tool_arguments_valid_dict(): + assert TemporalStreamingHooks._parse_tool_arguments(_tool_context('{"a": 1}')) == {"a": 1} + + +def test_parse_tool_arguments_garbage_is_empty(): + assert TemporalStreamingHooks._parse_tool_arguments(_tool_context("not json")) == {} + + +def test_parse_tool_arguments_non_tool_context_is_empty(): + assert TemporalStreamingHooks._parse_tool_arguments(SimpleNamespace()) == {} + + +# --------------------------------------------------------------------------- # +# Message emission gating (the double-post fix + the response-survival guard) +# --------------------------------------------------------------------------- # + + +@pytest.mark.asyncio +async def test_defaults_stream_tool_request(monkeypatch): + exec_activity = AsyncMock() + monkeypatch.setattr(hooks_mod.workflow, "execute_activity", exec_activity) + + hooks = TemporalStreamingHooks(task_id="t1") # all emit flags default True + await hooks.on_tool_start(_tool_context(), MagicMock(), _tool()) + + exec_activity.assert_awaited_once() + # args=[task_id, ToolRequestContent.model_dump()] + _, kwargs = exec_activity.call_args + payload = kwargs["args"][1] + assert payload["name"] == "search" + assert payload["arguments"] == {"query": "hi"} + + +@pytest.mark.asyncio +async def test_requests_off_skips_request_but_keeps_response(monkeypatch): + """The streaming-model pairing: suppress the duplicate REQUEST, but the + RESPONSE must still emit (the model never emits function-tool responses).""" + exec_activity = AsyncMock() + monkeypatch.setattr(hooks_mod.workflow, "execute_activity", exec_activity) + + hooks = TemporalStreamingHooks(task_id="t1", emit_tool_requests=False, emit_tool_responses=True) + await hooks.on_tool_start(_tool_context(), MagicMock(), _tool()) + exec_activity.assert_not_awaited() # request suppressed + + await hooks.on_tool_end(_tool_context(), MagicMock(), _tool(), "the result") + exec_activity.assert_awaited_once() # response still emitted + _, kwargs = exec_activity.call_args + payload = kwargs["args"][1] + assert payload["name"] == "search" + assert payload["content"] == "the result" + + +@pytest.mark.asyncio +async def test_responses_off_skips_response(monkeypatch): + exec_activity = AsyncMock() + monkeypatch.setattr(hooks_mod.workflow, "execute_activity", exec_activity) + + hooks = TemporalStreamingHooks(task_id="t1", emit_tool_responses=False) + await hooks.on_tool_end(_tool_context(), MagicMock(), _tool(), "result") + + exec_activity.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_emit_handoffs_false_skips_handoff(monkeypatch): + exec_activity = AsyncMock() + monkeypatch.setattr(hooks_mod.workflow, "execute_activity", exec_activity) + + hooks = TemporalStreamingHooks(task_id="t1", emit_handoffs=False) + await hooks.on_handoff(MagicMock(), MagicMock(name="from"), MagicMock(name="to")) + + exec_activity.assert_not_awaited() + + +# --------------------------------------------------------------------------- # +# Input-bearing tool spans (the "traces have outputs but no inputs" fix) +# --------------------------------------------------------------------------- # + + +@pytest.mark.asyncio +async def test_tool_span_carries_input_and_output(monkeypatch): + monkeypatch.setattr(hooks_mod.workflow, "execute_activity", AsyncMock()) + span = SimpleNamespace(output=None) + start_span = AsyncMock(return_value=span) + end_span = AsyncMock() + fake_adk = SimpleNamespace(tracing=SimpleNamespace(start_span=start_span, end_span=end_span)) + monkeypatch.setattr(hooks_mod, "_get_adk", lambda: fake_adk) + + hooks = TemporalStreamingHooks( + task_id="t1", emit_tool_requests=False, trace_id="trace-1", parent_span_id="parent-1" + ) + await hooks.on_tool_start(_tool_context(), MagicMock(), _tool()) + + start_span.assert_awaited_once() + _, kwargs = start_span.call_args + assert kwargs["name"] == "search" + assert kwargs["parent_id"] == "parent-1" + assert kwargs["input"] == {"arguments": {"query": "hi"}} + + await hooks.on_tool_end(_tool_context(), MagicMock(), _tool(), "the answer") + end_span.assert_awaited_once() + assert span.output == {"result": "the answer"} + + +@pytest.mark.asyncio +async def test_no_trace_id_means_no_span(monkeypatch): + monkeypatch.setattr(hooks_mod.workflow, "execute_activity", AsyncMock()) + start_span = AsyncMock() + fake_adk = SimpleNamespace(tracing=SimpleNamespace(start_span=start_span)) + monkeypatch.setattr(hooks_mod, "_get_adk", lambda: fake_adk) + + hooks = TemporalStreamingHooks(task_id="t1", emit_tool_requests=False, trace_id=None) + await hooks.on_tool_start(_tool_context(), MagicMock(), _tool()) + + start_span.assert_not_awaited() + + +# --------------------------------------------------------------------------- # +# Usage extraction +# --------------------------------------------------------------------------- # + + +def _result_with_usage() -> SimpleNamespace: + usage = SimpleNamespace( + requests=3, + input_tokens=100, + output_tokens=40, + total_tokens=140, + input_tokens_details=SimpleNamespace(cached_tokens=20), + output_tokens_details=SimpleNamespace(reasoning_tokens=10), + ) + return SimpleNamespace(context_wrapper=SimpleNamespace(usage=usage), final_output="done") + + +def test_extract_turn_usage_maps_fields(): + usage = run_mod._extract_turn_usage(_result_with_usage(), model="openai/gpt-5.5") + assert usage.model == "openai/gpt-5.5" + assert usage.input_tokens == 100 + assert usage.output_tokens == 40 + assert usage.total_tokens == 140 + assert usage.cached_input_tokens == 20 + assert usage.reasoning_tokens == 10 + assert usage.num_llm_calls == 3 + + +def test_extract_turn_usage_missing_usage_is_tolerant(): + usage = run_mod._extract_turn_usage(SimpleNamespace(), model="m") + assert usage.model == "m" + assert usage.input_tokens is None + assert usage.num_llm_calls is None + + +# --------------------------------------------------------------------------- # +# run_turn +# --------------------------------------------------------------------------- # + + +@pytest.mark.asyncio +async def test_run_turn_returns_usage_and_passes_through_result(monkeypatch): + fake_result = _result_with_usage() + runner_run = AsyncMock(return_value=fake_result) + monkeypatch.setattr(run_mod.Runner, "run", runner_run) + + agent = SimpleNamespace(model="openai/gpt-5.5") + out = await run_mod.run_turn( + agent, + [{"role": "user", "content": "hi"}], + task_id="t1", + trace_id="trace-1", + parent_span_id="parent-1", + ) + + assert isinstance(out, run_mod.OpenAIAgentsTurnResult) + assert out.final_output == "done" + assert out.usage.total_tokens == 140 + assert out.usage.model == "openai/gpt-5.5" + + # Default hooks must be wired so the streaming model is the sole tool-REQUEST + # emitter, while the hooks still emit tool RESPONSES (the model does not). + runner_run.assert_awaited_once() + _, kwargs = runner_run.call_args + hooks = kwargs["hooks"] + assert hooks.emit_tool_requests is False + assert hooks.emit_tool_responses is True + assert hooks.trace_id == "trace-1" + assert hooks.parent_span_id == "parent-1" + + +@pytest.mark.asyncio +async def test_run_turn_respects_supplied_hooks(monkeypatch): + runner_run = AsyncMock(return_value=_result_with_usage()) + monkeypatch.setattr(run_mod.Runner, "run", runner_run) + + custom_hooks = TemporalStreamingHooks(task_id="t1", emit_tool_requests=False) + await run_mod.run_turn( + SimpleNamespace(model="m"), + "hi", + task_id="t1", + hooks=custom_hooks, + ) + + _, kwargs = runner_run.call_args + assert kwargs["hooks"] is custom_hooks diff --git a/tests/lib/adk/providers/test_openai_activities.py b/tests/lib/adk/providers/test_openai_activities.py index 2f89308a9..964b24545 100644 --- a/tests/lib/adk/providers/test_openai_activities.py +++ b/tests/lib/adk/providers/test_openai_activities.py @@ -653,7 +653,7 @@ def _assert_tools_conversion(self, starting_agent, tools_case, _original_tools): @patch("agents.Runner.run_streamed") async def test_run_agent_streamed_auto_send_forwards_created_at(self, mock_runner_run_streamed): - """created_at is forwarded to every streaming context opened by auto_send_turn (AGX1-378).""" + """created_at is forwarded to every streaming context opened by auto_send_turn.""" from datetime import datetime, timezone from agentex.lib.core.temporal.activities.adk.providers.openai_activities import ( diff --git a/tests/lib/adk/providers/test_openai_turn.py b/tests/lib/adk/providers/test_openai_turn.py index 47a9ba9fe..d5ad2b5c8 100644 --- a/tests/lib/adk/providers/test_openai_turn.py +++ b/tests/lib/adk/providers/test_openai_turn.py @@ -25,7 +25,7 @@ def _import_target(): - from agentex.lib.adk.providers._modules.openai_turn import ( + from agentex.lib.adk._modules._openai_turn import ( OpenAITurn, _aggregate_usage, openai_usage_to_turn_usage, @@ -219,7 +219,7 @@ def stream_events(self): # monkeypatch that converter below so this can yield canonical events. return _canonical_stream(canonical) - import agentex.lib.adk.providers._modules.openai_turn as mod + import agentex.lib.adk._modules._openai_turn as mod async def _passthrough(stream): async for e in stream: diff --git a/tests/lib/adk/test_claude_code_sync.py b/tests/lib/adk/test_claude_code_sync.py index 6dd36d973..5a78acaf7 100644 --- a/tests/lib/adk/test_claude_code_sync.py +++ b/tests/lib/adk/test_claude_code_sync.py @@ -140,6 +140,84 @@ async def test_streamed_text_not_re_emitted_by_assistant_block(self): text_starts = [e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, TextContent)] assert len(text_starts) == 1, "Text block must not be emitted twice" + async def test_streamed_message_split_across_assistant_envelopes_not_duplicated(self): + """Regression: one streamed message (thinking + text) can materialise as + SEPARATE assistant envelopes. Content-based dedup must skip both streamed + blocks even though the text arrives in its own later envelope — an earlier + index-based scheme re-emitted the text (duplicate).""" + envelopes = [ + # Streamed: thinking at block index 0, then text at block index 1. + { + "type": "stream_event", + "event": {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking"}}, + }, + { + "type": "stream_event", + "event": { + "type": "content_block_delta", + "index": 0, + "delta": {"type": "thinking_delta", "thinking": "ponder"}, + }, + }, + {"type": "stream_event", "event": {"type": "content_block_stop", "index": 0}}, + { + "type": "stream_event", + "event": {"type": "content_block_start", "index": 1, "content_block": {"type": "text"}}, + }, + { + "type": "stream_event", + "event": {"type": "content_block_delta", "index": 1, "delta": {"type": "text_delta", "text": "answer"}}, + }, + {"type": "stream_event", "event": {"type": "content_block_stop", "index": 1}}, + # Materialised as two separate assistant envelopes (thinking alone at + # idx 0, then text alone at idx 0) — the shape that caused duplicates. + {"type": "assistant", "message": {"content": [{"type": "thinking", "thinking": "ponder"}]}}, + {"type": "assistant", "message": {"content": [{"type": "text", "text": "answer"}]}}, + ] + out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes))) + text_starts = [e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, TextContent)] + reasoning_starts = [ + e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ReasoningContent) + ] + assert len(text_starts) == 1, "Streamed text must not be re-emitted by its own materialised envelope" + assert len(reasoning_starts) == 1, "Streamed thinking must not be re-emitted either" + + async def test_interleaved_materialized_block_not_duplicated(self): + """Regression: the materialised `assistant` envelope can arrive MID-stream + (before the streamed block's content_block_stop). Content-recorded dedup + hasn't fired yet, so the still-open block's partial buffer is prefix-matched + against the materialised full text to suppress the duplicate.""" + envelopes = [ + { + "type": "stream_event", + "event": {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking"}}, + }, + { + "type": "stream_event", + "event": { + "type": "content_block_delta", + "index": 0, + "delta": {"type": "thinking_delta", "thinking": "I"}, + }, + }, + # Materialised envelope interleaved before content_block_stop. + {"type": "assistant", "message": {"content": [{"type": "thinking", "thinking": "I need to load tools."}]}}, + { + "type": "stream_event", + "event": { + "type": "content_block_delta", + "index": 0, + "delta": {"type": "thinking_delta", "thinking": " need to load tools."}, + }, + }, + {"type": "stream_event", "event": {"type": "content_block_stop", "index": 0}}, + ] + out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes))) + reasoning_starts = [ + e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ReasoningContent) + ] + assert len(reasoning_starts) == 1, "Interleaved materialised reasoning must not duplicate the streamed block" + async def test_later_turn_non_streamed_text_not_dropped(self): """A non-streamed text block in a later turn must not be dropped because an earlier turn streamed a block at the same index.""" diff --git a/tests/lib/adk/test_langgraph_async.py b/tests/lib/adk/test_langgraph_async.py index 682bd43bc..ebe215a15 100644 --- a/tests/lib/adk/test_langgraph_async.py +++ b/tests/lib/adk/test_langgraph_async.py @@ -26,7 +26,7 @@ from agentex.types.text_content import TextContent from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import StreamTaskMessageDelta -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_turn import stream_langgraph_events TASK_ID = "task-test" diff --git a/tests/lib/adk/test_langgraph_sync.py b/tests/lib/adk/test_langgraph_sync.py index 248d18f68..9e8c6e4f0 100644 --- a/tests/lib/adk/test_langgraph_sync.py +++ b/tests/lib/adk/test_langgraph_sync.py @@ -1,10 +1,12 @@ -"""Tests for the sync LangGraph -> Agentex stream event converter. +"""Tests for the sync LangGraph -> Agentex path. Covers: -- Basic text, tool call, and tool response emission -- on_final_ai_message callback for usage capture -- create_langgraph_tracing_handler symbol is importable and functional - (runtime DeprecationWarning removed; deprecation is docstring-only) +- The bare converter ``convert_langgraph_to_agentex_events``: + * Basic text, tool call, and tool response emission + * on_final_ai_message callback for usage capture +- The unified sync (HTTP ACP) path ``UnifiedEmitter.yield_turn(LangGraphTurn(...))``: + * Passthrough: yield_turn events equal LangGraphTurn(stream).events + * Span derivation from Full tool events with a fake tracer NOTE: langchain_core imports must be deferred to test-function scope because conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK @@ -15,15 +17,20 @@ import sys from typing import Any, AsyncIterator +from datetime import datetime, timezone +from dataclasses import field, dataclass import pytest +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_update import ( StreamTaskMessageFull, ) from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn # --------------------------------------------------------------------------- # Helpers @@ -227,21 +234,160 @@ def _cb(msg): assert yield_order.index("event") < yield_order.index("callback") -class TestLangGraphTracingHandlerBackwardCompat: - def test_create_langgraph_tracing_handler_no_runtime_warning(self): - """Deprecated symbol remains importable and emits no runtime DeprecationWarning. +# --------------------------------------------------------------------------- +# Unified sync path: LangGraphTurn + UnifiedEmitter.yield_turn +# +# Verifies the sync (HTTP ACP) delivery surface: +# 1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal +# LangGraphTurn(stream).events collected directly. +# 2. Span derivation: with trace_id + fake tracer, tool spans are derived from +# the event stream. +# --------------------------------------------------------------------------- + + +@dataclass +class _FakeTracingBackend: + spans_started: list[dict[str, Any]] = field(default_factory=list) + spans_ended: list[str] = field(default_factory=list) + + async def start_span(self, **kw) -> Any: + from agentex.types.span import Span + + sp = Span( + id=f"span-{len(self.spans_started) + 1}", + trace_id=kw.get("trace_id", "trace1"), + name=kw.get("name", ""), + start_time=datetime.now(tz=timezone.utc), + ) + self.spans_started.append(kw) + return sp + + async def end_span(self, *, trace_id: str, span: Any) -> None: + self.spans_ended.append(span.id if span else "") + + +class TestUnifiedSyncPathPassthrough: + async def test_yield_turn_events_equal_direct_events(self): + """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal + LangGraphTurn(stream).events collected directly — the emitter must not + add, drop, or reorder events in yield mode.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + chunk = AIMessageChunk(content="Hello!") + ai_msg = AIMessage(content="Hello!") - The runtime warnings.warn was removed (docstring-only deprecation) to - align with PR 4/6 and avoid breaking callers under warnings-as-errors. - Using ``warnings.simplefilter("error", DeprecationWarning)`` verifies - that calling the function is safe under -W error conditions. + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + + direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events] + + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration" + for a, b in zip(direct, via_emitter, strict=True): + assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}" + + async def test_yield_turn_passes_all_event_types(self): + """Start, Delta, Done, Full — each type is preserved.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + chunk = AIMessageChunk(content="hi") + tc = {"id": "c1", "name": "t", "args": {}} + ai_msg = AIMessage(content="hi", tool_calls=[tc]) + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + types = {type(e).__name__ for e in out} + # text chunk emits Start + Delta + assert "StreamTaskMessageStart" in types + assert "StreamTaskMessageDelta" in types + # tool call emits Full + assert "StreamTaskMessageFull" in types + + async def test_empty_stream_yields_no_events(self): + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))] + assert out == [] + + +class TestUnifiedSyncPathSpanDerivation: + @pytest.fixture + def fake_tracer(self): + backend = _FakeTracingBackend() + tracer = SpanTracer( + trace_id="trace1", + parent_span_id=None, + task_id="t", + tracing=backend, # type: ignore[arg-type] + ) + return tracer, backend + + async def test_tool_span_derived_from_full_events(self, fake_tracer): + """SpanDeriver handles Full tool events for LangGraph. + + Full(ToolRequestContent) opens a tool span keyed by tool_call_id; + Full(ToolResponseContent) closes it, aligning LangGraph's Full-event + path with the Start+Done harnesses (pydantic-ai, openai-agents). """ - import warnings + from langchain_core.messages import AIMessage, ToolMessage - from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler + tracer, backend = fake_tracer + tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}} + ai_msg = AIMessage(content="", tool_calls=[tc]) + tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather") + + events_raw = [ + ("updates", {"agent": {"messages": [ai_msg]}}), + ("updates", {"tools": {"messages": [tool_msg]}}), + ] - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("error", DeprecationWarning) - create_langgraph_tracing_handler(trace_id="t1", parent_span_id="p1") + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span" + started = backend.spans_started[0] + assert started["name"] == "get_weather" + assert started["input"] == {"city": "Paris"} + + async def test_no_spans_when_no_tool_calls(self, fake_tracer): + """yield_turn with tracer but no tool calls emits no spans.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + tracer, backend = fake_tracer + chunk = AIMessageChunk(content="Hello!") + ai_msg = AIMessage(content="Hello!") + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert backend.spans_started == [], "No tool spans when there are no tool calls" + + async def test_tracer_none_means_no_spans(self): + """With tracer=False, no spans should be emitted.""" + from langchain_core.messages import AIMessage, ToolMessage + + tc = {"id": "c1", "name": "t", "args": {}} + ai_msg = AIMessage(content="", tool_calls=[tc]) + tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t") + + events_raw = [ + ("updates", {"agent": {"messages": [ai_msg]}}), + ("updates", {"tools": {"messages": [tool_msg]}}), + ] - assert w == [], "create_langgraph_tracing_handler must NOT emit a runtime DeprecationWarning" + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + # No assertion on spans since tracer=False means emitter.tracer is None + assert emitter.tracer is None diff --git a/tests/lib/adk/test_langgraph_sync_unified.py b/tests/lib/adk/test_langgraph_sync_unified.py deleted file mode 100644 index cfd522828..000000000 --- a/tests/lib/adk/test_langgraph_sync_unified.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Unified sync path tests for LangGraphTurn + UnifiedEmitter. - -Verifies: -1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal - LangGraphTurn(stream).events collected directly. -2. Span derivation: with trace_id + fake tracer, tool spans are derived from - the event stream. - -NOTE: langchain_core imports are deferred to test scope because conftest.py -stubs ``langchain_core.messages`` with MagicMock. -""" - -from __future__ import annotations - -import sys -from typing import Any -from datetime import datetime, timezone -from dataclasses import field, dataclass - -import pytest - -from agentex.lib.core.harness.tracer import SpanTracer -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - -# --------------------------------------------------------------------------- -# Remove conftest stubs so real langchain_core types are used -# --------------------------------------------------------------------------- - - -@pytest.fixture(autouse=True) -def _real_langchain_core(): - stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")] - saved = {k: sys.modules.pop(k) for k in stub_keys} - import importlib - - importlib.import_module("langchain_core.messages") - yield - sys.modules.update(saved) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _make_stream(events: list[tuple[str, Any]]): - async def _gen(): - for e in events: - yield e - - return _gen() - - -# --------------------------------------------------------------------------- -# Fake SpanTracer -# --------------------------------------------------------------------------- - - -@dataclass -class _FakeTracingBackend: - spans_started: list[dict[str, Any]] = field(default_factory=list) - spans_ended: list[str] = field(default_factory=list) - - async def start_span(self, **kw) -> Any: - from agentex.types.span import Span - - sp = Span( - id=f"span-{len(self.spans_started) + 1}", - trace_id=kw.get("trace_id", "trace1"), - name=kw.get("name", ""), - start_time=datetime.now(tz=timezone.utc), - ) - self.spans_started.append(kw) - return sp - - async def end_span(self, *, trace_id: str, span: Any) -> None: - self.spans_ended.append(span.id if span else "") - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - - -class TestPassthrough: - async def test_yield_turn_events_equal_direct_events(self): - """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal - LangGraphTurn(stream).events collected directly — the emitter must not - add, drop, or reorder events in yield mode.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - chunk = AIMessageChunk(content="Hello!") - ai_msg = AIMessage(content="Hello!") - - # Build two identical streams - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - - # Direct collection - direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events] - - # Via emitter.yield_turn - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration" - for a, b in zip(direct, via_emitter, strict=True): - assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}" - - async def test_yield_turn_passes_all_event_types(self): - """Start, Delta, Done, Full — each type is preserved.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - chunk = AIMessageChunk(content="hi") - tc = {"id": "c1", "name": "t", "args": {}} - ai_msg = AIMessage(content="hi", tool_calls=[tc]) - - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - types = {type(e).__name__ for e in out} - # text chunk emits Start + Delta - assert "StreamTaskMessageStart" in types - assert "StreamTaskMessageDelta" in types - # tool call emits Full - assert "StreamTaskMessageFull" in types - - async def test_empty_stream_yields_no_events(self): - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))] - assert out == [] - - -class TestSpanDerivation: - @pytest.fixture - def fake_tracer(self): - backend = _FakeTracingBackend() - tracer = SpanTracer( - trace_id="trace1", - parent_span_id=None, - task_id="t", - tracing=backend, # type: ignore[arg-type] - ) - return tracer, backend - - async def test_tool_span_derived_from_full_events(self, fake_tracer): - """AGX1-377: SpanDeriver now handles Full tool events for LangGraph. - - Full(ToolRequestContent) opens a tool span keyed by tool_call_id; - Full(ToolResponseContent) closes it. This bridges the previous gap where - LangGraph's Full-event path produced no spans, aligning it with - Start+Done harnesses (pydantic-ai, openai-agents). - """ - from langchain_core.messages import AIMessage, ToolMessage - - tracer, backend = fake_tracer - tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}} - ai_msg = AIMessage(content="", tool_calls=[tc]) - tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather") - - events_raw = [ - ("updates", {"agent": {"messages": [ai_msg]}}), - ("updates", {"tools": {"messages": [tool_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span" - started = backend.spans_started[0] - assert started["name"] == "get_weather" - assert started["input"] == {"city": "Paris"} - - async def test_no_spans_when_no_tool_calls(self, fake_tracer): - """yield_turn with tracer but no tool calls emits no spans.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - tracer, backend = fake_tracer - chunk = AIMessageChunk(content="Hello!") - ai_msg = AIMessage(content="Hello!") - - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert backend.spans_started == [], "No tool spans when there are no tool calls" - - async def test_tracer_none_means_no_spans(self): - """With tracer=False, no spans should be emitted.""" - from langchain_core.messages import AIMessage, ToolMessage - - tc = {"id": "c1", "name": "t", "args": {}} - ai_msg = AIMessage(content="", tool_calls=[tc]) - tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t") - - events_raw = [ - ("updates", {"agent": {"messages": [ai_msg]}}), - ("updates", {"tools": {"messages": [tool_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - # No assertion on spans since tracer=False means emitter.tracer is None - assert emitter.tracer is None diff --git a/tests/lib/adk/test_openai_sync.py b/tests/lib/adk/test_openai_sync.py new file mode 100644 index 000000000..de2a61db8 --- /dev/null +++ b/tests/lib/adk/test_openai_sync.py @@ -0,0 +1,189 @@ +"""Tests for ``convert_openai_to_agentex_events`` and its helpers. + +Focused on three previously-broken behaviors on the sync OpenAI converter: + +- ``_safe_parse_arguments`` never raises on malformed/non-dict JSON (a bad + tool-args string must not abort the whole turn). +- Every streamed item — text AND reasoning — is closed with a matching + ``StreamTaskMessageDone`` (reasoning messages used to hang open). +- Each new text ``item_id`` gets a fresh index, so a final answer cannot + collide with the preceding reasoning message on reasoning-model streams. +""" + +import types as _types + +import pytest +from openai.types.responses import ResponseTextDeltaEvent, ResponseOutputItemDoneEvent +from openai.types.responses.response_output_message import ResponseOutputMessage +from openai.types.responses.response_reasoning_item import ResponseReasoningItem +from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent + +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.lib.adk._modules._openai_sync import ( + _safe_parse_arguments, + convert_openai_to_agentex_events, +) + +# --------------------------------------------------------------------------- +# _safe_parse_arguments +# --------------------------------------------------------------------------- + + +def test_safe_parse_arguments_valid_dict_json(): + assert _safe_parse_arguments('{"a": 1}') == {"a": 1} + + +def test_safe_parse_arguments_empty_and_none(): + assert _safe_parse_arguments("") == {} + assert _safe_parse_arguments(None) == {} + + +def test_safe_parse_arguments_passthrough_dict(): + d = {"already": "dict"} + assert _safe_parse_arguments(d) is d + + +def test_safe_parse_arguments_malformed_preserved_not_raised(): + # A truncated / malformed payload must be preserved, never raise — raising + # here would abort the whole turn before later output is delivered. + assert _safe_parse_arguments('{"a": ') == {"raw": '{"a": '} + + +def test_safe_parse_arguments_non_dict_json_wrapped(): + # Valid JSON that isn't an object is wrapped so the result stays a dict. + assert _safe_parse_arguments("[1, 2]") == {"value": [1, 2]} + assert _safe_parse_arguments("42") == {"value": 42} + + +def test_safe_parse_arguments_non_string_non_dict_always_returns_dict(): + # A provider tool may pass arguments as a list / scalar / SDK object rather + # than a JSON string. The result must still be a dict so ToolRequestContent + # (arguments: Dict[str, object]) accepts it instead of raising. + assert _safe_parse_arguments([1, 2]) == {"value": [1, 2]} + assert _safe_parse_arguments(7) == {"value": 7} + + class _Args: + def model_dump(self): + return {"q": "hi"} + + assert _safe_parse_arguments(_Args()) == {"q": "hi"} + + # An SDK object whose model_dump is not a dict still degrades to a dict. + class _BadDump: + def model_dump(self): + return ["not", "a", "dict"] + + bad = _BadDump() + assert _safe_parse_arguments(bad) == {"value": bad} + + +# --------------------------------------------------------------------------- +# convert_openai_to_agentex_events — reasoning + text sequencing +# --------------------------------------------------------------------------- + + +def _raw(data): + return _types.SimpleNamespace(type="raw_response_event", data=data) + + +async def _stream(events): + for e in events: + yield e + + +async def _collect(events): + return [e async for e in convert_openai_to_agentex_events(_stream(events))] + + +@pytest.mark.asyncio +async def test_reasoning_item_emits_done(): + """A completed reasoning item must yield a matching Done (it used to be skipped).""" + events = [ + _raw( + ResponseReasoningTextDeltaEvent( + type="response.reasoning_text.delta", + item_id="r1", + content_index=0, + delta="thinking", + output_index=0, + sequence_number=1, + ) + ), + _raw( + ResponseOutputItemDoneEvent( + type="response.output_item.done", + item=ResponseReasoningItem(id="r1", type="reasoning", summary=[]), + output_index=0, + sequence_number=2, + ) + ), + ] + out = await _collect(events) + + starts = [e for e in out if isinstance(e, StreamTaskMessageStart)] + dones = [e for e in out if isinstance(e, StreamTaskMessageDone)] + assert len(starts) == 1 + # The reasoning message is now closed instead of hanging open. + assert [d.index for d in dones] == [starts[0].index] + + +@pytest.mark.asyncio +async def test_reasoning_then_text_use_distinct_indices(): + """Final answer text must not reuse the reasoning message's index.""" + events = [ + _raw( + ResponseReasoningTextDeltaEvent( + type="response.reasoning_text.delta", + item_id="r1", + content_index=0, + delta="thinking", + output_index=0, + sequence_number=1, + ) + ), + _raw( + ResponseOutputItemDoneEvent( + type="response.output_item.done", + item=ResponseReasoningItem(id="r1", type="reasoning", summary=[]), + output_index=0, + sequence_number=2, + ) + ), + _raw( + ResponseTextDeltaEvent( + type="response.output_text.delta", + item_id="t1", + content_index=0, + delta="answer", + output_index=1, + sequence_number=3, + logprobs=[], + ) + ), + _raw( + ResponseOutputItemDoneEvent( + type="response.output_item.done", + item=ResponseOutputMessage(id="t1", type="message", role="assistant", status="completed", content=[]), + output_index=1, + sequence_number=4, + ) + ), + ] + out = await _collect(events) + + starts = [e for e in out if isinstance(e, StreamTaskMessageStart)] + assert len(starts) == 2 + reasoning_index, text_index = starts[0].index, starts[1].index + assert reasoning_index != text_index + + # Text deltas route to the text index, not the reasoning index. + text_deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta) and e.delta.type == "text"] + assert text_deltas and all(d.index == text_index for d in text_deltas) + + # Both messages are closed on their own index. + done_indices = sorted(e.index for e in out if isinstance(e, StreamTaskMessageDone)) + assert done_indices == sorted({reasoning_index, text_index}) diff --git a/tests/lib/adk/test_pydantic_ai_async.py b/tests/lib/adk/test_pydantic_ai_async.py index 49cb6054c..4ab468152 100644 --- a/tests/lib/adk/test_pydantic_ai_async.py +++ b/tests/lib/adk/test_pydantic_ai_async.py @@ -36,7 +36,7 @@ from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent from agentex.types.reasoning_content_delta import ReasoningContentDelta -from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events +from agentex.lib.adk._modules._pydantic_ai_turn import stream_pydantic_ai_events TASK_ID = "task_test" @@ -262,8 +262,8 @@ async def test_tool_call_opens_streaming_context_with_identity( ) -> None: """Tool requests are delivered as a streaming context (Start+Delta+Done). - AGX1-377 fix: auto_send now delivers streamed tool-request messages - natively (Start+ToolRequestDelta+Done). The streaming context is opened + auto_send delivers streamed tool-request messages natively + (Start+ToolRequestDelta+Done). The streaming context is opened at the Start event with the initial ToolRequestContent (tool_call_id + name + empty arguments), argument tokens are streamed as deltas, and the context is closed on Done. @@ -304,7 +304,7 @@ async def test_tool_call_opens_streaming_context_with_identity( assert content.tool_call_id == "c1" assert content.name == "get_weather" assert content.author == "agent" - # AGX1-377 streamed shape: initial_content has empty args (args come via delta) + # Streamed shape: initial_content has empty args (args come via delta) assert content.arguments == {} # The arg delta is delivered as a stream_update assert len(ctx.updates) == 1 @@ -657,292 +657,6 @@ async def test_part_delta_without_matching_start_is_ignored( assert final == "" -class TestTracingHandler: - """Tracing handler hooks fire alongside streaming for each tool call.""" - - @dataclass - class _RecordingHandler: - starts: list[dict[str, Any]] = field(default_factory=list) - ends: list[dict[str, Any]] = field(default_factory=list) - - async def on_tool_start(self, tool_call_id: str, tool_name: str, arguments: Any) -> None: - self.starts.append({"tool_call_id": tool_call_id, "tool_name": tool_name, "arguments": arguments}) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - self.ends.append({"tool_call_id": tool_call_id, "result": result}) - - async def test_handler_records_start_and_end_for_each_tool_call( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - streaming, messages = fake_adk - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - - # AGX1-373: tool messages arrive via streaming_task_message_context. - # Tracing is still additive — both messages are delivered AND hooks fire. - assert messages.created == [] - assert len(streaming.contexts) == 2 - assert isinstance(streaming.contexts[0].initial_content, ToolRequestContent) - assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent) - # And both lifecycle hooks fired exactly once with the right payload. - assert handler.starts == [ - { - "tool_call_id": "c1", - "tool_name": "get_weather", - "arguments": {"city": "Paris"}, - } - ] - assert handler.ends == [{"tool_call_id": "c1", "result": "Sunny"}] - - async def test_handler_not_called_when_no_tool_calls_in_stream( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - handler = self._RecordingHandler() - events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello")), - PartEndEvent(index=0, part=TextPart(content="Hello")), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - assert handler.starts == [] - assert handler.ends == [] - - async def test_handler_records_each_tool_in_multi_tool_run( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - """A turn with two tool calls must produce two start/end pairs in order.""" - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="lookup_city", args=None, tool_call_id="c2"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="lookup_city", args="{}", tool_call_id="c2"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="lookup_city", content="Paris, FR", tool_call_id="c2"), - ), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - - assert [s["tool_call_id"] for s in handler.starts] == ["c1", "c2"] - assert [e["tool_call_id"] for e in handler.ends] == ["c1", "c2"] - assert handler.starts[0]["tool_name"] == "get_weather" - assert handler.starts[1]["tool_name"] == "lookup_city" - - async def test_omitting_handler_is_a_no_op_for_existing_behavior( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - """Regression: passing no tracing handler preserves streaming behavior. - - AGX1-373: tool messages arrive via streaming_task_message_context - regardless of whether tracing_handler is passed. - """ - streaming, messages = fake_adk - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - await stream_pydantic_ai_events(_aiter(events), TASK_ID) - # AGX1-373: tool messages via streaming_task_message_context. - assert messages.created == [] - assert len(streaming.contexts) == 2 - content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts] - assert content_types == ["ToolRequestContent", "ToolResponseContent"] - - -class TestPydanticAITracingHandlerDeterministicIds: - """Regression coverage for ``AgentexPydanticAITracingHandler``. - - pydantic-ai's ``TemporalAgent`` splits a single agent run across several - Temporal activities. The event_stream_handler is invoked once per - activity, with a fresh handler instance each time. So ``on_tool_start`` - (during the model activity that issued the tool call) and ``on_tool_end`` - (during the next model activity, after the tool ran) end up in DIFFERENT - handler instances — an in-memory dict can't pair them. - - The fix is deterministic span IDs derived from ``(trace_id, tool_call_id)``. - These tests lock that in. - """ - - class _RecordingClient: - """Stand-in for ``AsyncAgentex`` capturing spans.create / spans.update calls.""" - - def __init__(self) -> None: - self.creates: list[dict[str, Any]] = [] - self.updates: list[tuple[str, dict[str, Any]]] = [] - self.spans = self # so .spans.create / .spans.update resolve back here - - async def create(self, **kwargs: Any) -> Any: - self.creates.append(kwargs) - return None - - async def update(self, span_id: str, **kwargs: Any) -> Any: - self.updates.append((span_id, kwargs)) - return None - - async def test_same_tool_call_id_yields_same_span_id_across_handler_instances( - self, - ) -> None: - """The whole point of the design: two handler instances with the same - trace_id and tool_call_id resolve to the same span ID — otherwise - ``on_tool_end`` patches a different (non-existent) record and the span - in the DB never gets ``end_time`` / ``output``.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client_a = self._RecordingClient() - client_b = self._RecordingClient() - - # Two independent handler instances — simulates the cross-activity - # invocation pattern in TemporalAgent. - handler_a = AgentexPydanticAITracingHandler( - trace_id="trace-1", - parent_span_id="parent-1", - task_id="task-1", - client=client_a, # type: ignore[arg-type] - ) - handler_b = AgentexPydanticAITracingHandler( - trace_id="trace-1", - parent_span_id="parent-1", - task_id="task-1", - client=client_b, # type: ignore[arg-type] - ) - - await handler_a.on_tool_start(tool_call_id="call_abc", tool_name="get_weather", arguments={"city": "Paris"}) - await handler_b.on_tool_end(tool_call_id="call_abc", result="Sunny, 72F") - - assert len(client_a.creates) == 1 - assert len(client_b.updates) == 1 - - created_span_id = client_a.creates[0]["id"] - updated_span_id = client_b.updates[0][0] - assert created_span_id == updated_span_id, ( - "on_tool_start and on_tool_end must address the same span across handler " - "instances; mismatch means tool spans will be left open and the AgentEx UI " - "will hide their trace." - ) - - async def test_different_tool_call_ids_yield_different_span_ids(self) -> None: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler( - trace_id="trace-1", - client=client, # type: ignore[arg-type] - ) - - await handler.on_tool_start("call_a", "get_weather", {"city": "Paris"}) - await handler.on_tool_start("call_b", "get_weather", {"city": "Tokyo"}) - - ids = {c["id"] for c in client.creates} - assert len(ids) == 2, "Distinct tool_call_ids must map to distinct span IDs" - - async def test_same_tool_call_id_in_different_traces_yields_different_span_ids( - self, - ) -> None: - """Span IDs are namespaced by trace_id so two unrelated runs with the - same provider-issued tool_call_id don't collide.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler_t1 = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - handler_t2 = AgentexPydanticAITracingHandler(trace_id="trace-2", client=client) # type: ignore[arg-type] - - await handler_t1.on_tool_start("call_abc", "t", None) - await handler_t2.on_tool_start("call_abc", "t", None) - - ids = {c["id"] for c in client.creates} - assert len(ids) == 2 - - async def test_on_tool_end_patches_only_end_time_and_output(self) -> None: - """Don't overwrite start_time, name, parent_id, etc. on close — only patch - the fields we have new values for. Sending start_time again could clobber - what was set at create time.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - - await handler.on_tool_end("call_abc", "Sunny") - - assert len(client.updates) == 1 - _, patch_kwargs = client.updates[0] - assert set(patch_kwargs.keys()) == {"end_time", "output"}, ( - f"Unexpected fields in tool span PATCH: {set(patch_kwargs.keys())}" - ) - assert patch_kwargs["output"] == {"result": "Sunny"} - - async def test_on_tool_error_patches_error_output(self) -> None: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - - await handler.on_tool_error("call_abc", RuntimeError("boom")) - - assert len(client.updates) == 1 - _, patch_kwargs = client.updates[0] - assert "error" in patch_kwargs["output"] - assert "boom" in patch_kwargs["output"]["error"] - - class TestCleanupOnException: async def test_open_contexts_are_closed_on_iterator_failure( self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py index 080bc5be8..ac9986f2b 100644 --- a/tests/lib/adk/test_pydantic_ai_sync.py +++ b/tests/lib/adk/test_pydantic_ai_sync.py @@ -1,4 +1,12 @@ -"""Tests for the Pydantic AI -> Agentex stream event converter.""" +"""Tests for the sync Pydantic AI -> Agentex path. + +Covers: +- The bare converter ``convert_pydantic_ai_to_agentex_events`` (text/thinking/ + tool-call streaming and arg-delta handling). +- The unified sync (HTTP ACP) path ``UnifiedEmitter.yield_turn(PydanticAITurn(...))``: + * Passthrough: yield_turn events equal PydanticAITurn(stream).events + * Span derivation (tool + reasoning) with a fake tracing backend +""" from __future__ import annotations @@ -25,6 +33,7 @@ FunctionToolResultEvent, ) +from agentex.lib.core.harness import UnifiedEmitter from agentex.types.reasoning_content import ReasoningContent from agentex.types.task_message_delta import TextDelta from agentex.types.tool_request_delta import ToolRequestDelta @@ -42,6 +51,9 @@ _args_delta_to_str, convert_pydantic_ai_to_agentex_events, ) +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn + +from ..core.harness._fakes import FakeTracing async def _aiter(events: list[Any]) -> AsyncIterator[Any]: @@ -290,90 +302,6 @@ async def test_tool_retry_prompt_surfaces_as_response(self): assert out[0].content.content == "bad arguments" -class TestTracingHandlerSync: - """The sync converter has the same opt-in tracing-handler contract as the - async streamer: pass a handler and the converter calls ``on_tool_start`` / - ``on_tool_end`` for each tool call. Streaming yields are unchanged when - omitted.""" - - class _RecordingHandler: - def __init__(self) -> None: - self.starts: list[dict[str, Any]] = [] - self.ends: list[dict[str, Any]] = [] - - async def on_tool_start(self, tool_call_id: str, tool_name: str, arguments: Any) -> None: - self.starts.append({"tool_call_id": tool_call_id, "tool_name": tool_name, "arguments": arguments}) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - self.ends.append({"tool_call_id": tool_call_id, "result": result}) - - async def test_handler_records_start_and_end_for_a_tool_call(self): - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - out = await _collect( - convert_pydantic_ai_to_agentex_events(_aiter(events), tracing_handler=handler) # type: ignore[arg-type] - ) - - # Streaming output is unchanged. - assert any(isinstance(e, StreamTaskMessageStart) for e in out) - assert any(isinstance(e, StreamTaskMessageFull) for e in out) - - assert handler.starts == [ - { - "tool_call_id": "c1", - "tool_name": "get_weather", - "arguments": {"city": "Paris"}, - } - ] - assert handler.ends == [{"tool_call_id": "c1", "result": "Sunny"}] - - async def test_handler_not_called_when_no_tool_calls(self): - handler = self._RecordingHandler() - events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")), - PartEndEvent(index=0, part=TextPart(content="hi")), - ] - await _collect( - convert_pydantic_ai_to_agentex_events(_aiter(events), tracing_handler=handler) # type: ignore[arg-type] - ) - assert handler.starts == [] - assert handler.ends == [] - - async def test_omitting_handler_preserves_pre_tracing_behavior(self): - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="t", args=None, tool_call_id="c"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="t", args="{}", tool_call_id="c"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="t", content="ok", tool_call_id="c"), - ), - ] - out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events))) - # Same emit shape as before: Start, Done, Full - types = [type(e).__name__ for e in out] - assert "StreamTaskMessageStart" in types - assert "StreamTaskMessageDone" in types - assert "StreamTaskMessageFull" in types - - class TestMultiStepRun: async def test_text_then_tool_then_text_assigns_distinct_indices(self): """A multi-step run: model emits text + tool call → tool runs → model emits more text. @@ -555,3 +483,157 @@ async def on_result_async(event: AgentRunResultEvent) -> None: assert len(awaited) == 1 assert awaited[0].result.output == "async_output" + + +# --------------------------------------------------------------------------- +# Unified sync path: PydanticAITurn + UnifiedEmitter.yield_turn +# +# Exercises the path documented in _pydantic_ai_sync.py under +# "Recommended: unified surface": +# - events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough) +# - with a trace context + fake tracing backend, tool / reasoning spans are derived +# --------------------------------------------------------------------------- + + +class TestUnifiedSyncPathPassthrough: + """The events forwarded by yield_turn are identical to PydanticAITurn.events.""" + + async def test_text_stream_passthrough(self): + raw_events = [ + PartStartEvent(index=0, part=TextPart(content="")), + PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")), + PartEndEvent(index=0, part=TextPart(content="hello")), + ] + + turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + direct = await _collect(turn_a.events) + + turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = await _collect(emitter.yield_turn(turn_b)) + + assert len(via_emitter) == len(direct) + for a, b in zip(via_emitter, direct): + assert type(a) is type(b) + assert a.model_dump() == b.model_dump() + + async def test_tool_call_stream_passthrough(self): + raw_events = [ + PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")), + PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"), + ), + ] + + turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + direct = await _collect(turn_a.events) + + turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = await _collect(emitter.yield_turn(turn_b)) + + assert len(via_emitter) == len(direct) + for a, b in zip(via_emitter, direct): + assert type(a) is type(b) + assert a.model_dump() == b.model_dump() + + +class TestUnifiedSyncPathSpanDerivation: + """With trace context + fake tracing, spans are derived from the stream.""" + + async def test_tool_span_opened_and_closed(self): + """A tool call produces start_span + end_span on the fake tracing backend.""" + tool_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"), + ), + FunctionToolResultEvent( + part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) + + events = await _collect(emitter.yield_turn(turn)) + + assert len(events) >= 2, "at least Start(tool) + Done + Full(response)" + assert len(fake.started) == 1, "one tool span opened" + assert len(fake.ended) == 1, "one tool span closed" + span_name, parent_id, span_input = fake.started[0] + assert span_name == "Bash" + assert parent_id == "p" + closed_name, closed_output = fake.ended[0] + assert closed_name == "Bash" + + async def test_reasoning_span_opened_and_closed(self): + """A thinking/reasoning block produces start_span + end_span.""" + reasoning_events = [ + PartStartEvent(index=0, part=ThinkingPart(content="")), + PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")), + PartEndEvent(index=0, part=ThinkingPart(content="let me think")), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert len(fake.started) == 1, "one reasoning span opened" + assert len(fake.ended) == 1, "one reasoning span closed" + span_name, parent_id, _ = fake.started[0] + assert span_name == "reasoning" + assert parent_id == "p" + + async def test_no_trace_id_means_no_spans(self): + """When trace_id is None, no spans are derived even with a fake tracing backend.""" + raw_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert fake.started == [], "no spans when trace_id is absent" + assert fake.ended == [] + + async def test_tracer_false_suppresses_spans_even_with_trace_id(self): + """tracer=False disables span derivation regardless of trace_id.""" + raw_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert fake.started == [] + assert fake.ended == [] diff --git a/tests/lib/adk/test_pydantic_ai_sync_unified.py b/tests/lib/adk/test_pydantic_ai_sync_unified.py deleted file mode 100644 index f920418de..000000000 --- a/tests/lib/adk/test_pydantic_ai_sync_unified.py +++ /dev/null @@ -1,209 +0,0 @@ -"""Tests for the unified sync (HTTP ACP) path: PydanticAITurn + UnifiedEmitter. - -Exercises the path documented in _pydantic_ai_sync.py under "Recommended: unified surface": -- events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough) -- with a trace context + fake tracing backend, tool spans are derived (start_span / end_span called) -- with a trace context + fake tracing backend, reasoning spans are derived -""" - -from __future__ import annotations - -from typing import Any, AsyncIterator - -from pydantic_ai.run import AgentRunResult, AgentRunResultEvent -from pydantic_ai.usage import RunUsage -from pydantic_ai.messages import ( - TextPart, - PartEndEvent, - ThinkingPart, - ToolCallPart, - TextPartDelta, - PartDeltaEvent, - PartStartEvent, - ThinkingPartDelta, - ToolCallPartDelta, -) - -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - -async def _aiter(events: list[Any]) -> AsyncIterator[Any]: - for e in events: - yield e - - -async def _collect(stream: AsyncIterator[Any]) -> list[Any]: - return [e async for e in stream] - - -class _FakeSpan: - def __init__(self, name: str): - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append((name, parent_id, input)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id, span): - self.ended.append((span.name, span.output)) - - -def _make_result_event(usage: RunUsage | None = None) -> AgentRunResultEvent: - result = AgentRunResult(output="done", _output_tool_name=None) - if usage is not None: - result._state.usage = usage - return AgentRunResultEvent(result=result) - - -class TestUnifiedSyncPathPassthrough: - """The events forwarded by yield_turn are identical to PydanticAITurn.events.""" - - async def test_text_stream_passthrough(self): - raw_events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")), - PartEndEvent(index=0, part=TextPart(content="hello")), - ] - - turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - direct = await _collect(turn_a.events) - - turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = await _collect(emitter.yield_turn(turn_b)) - - assert len(via_emitter) == len(direct) - for a, b in zip(via_emitter, direct): - assert type(a) is type(b) - assert a.model_dump() == b.model_dump() - - async def test_tool_call_stream_passthrough(self): - raw_events = [ - PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")), - PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"), - ), - ] - - turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - direct = await _collect(turn_a.events) - - turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = await _collect(emitter.yield_turn(turn_b)) - - assert len(via_emitter) == len(direct) - for a, b in zip(via_emitter, direct): - assert type(a) is type(b) - assert a.model_dump() == b.model_dump() - - -class TestUnifiedSyncPathSpanDerivation: - """With trace context + fake tracing, spans are derived from the stream.""" - - async def test_tool_span_opened_and_closed(self): - """A tool call produces start_span + end_span on the fake tracing backend.""" - from pydantic_ai.messages import ToolReturnPart, FunctionToolResultEvent - - tool_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"), - ), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) - - events = await _collect(emitter.yield_turn(turn)) - - assert len(events) >= 2, "at least Start(tool) + Done + Full(response)" - assert len(fake.started) == 1, "one tool span opened" - assert len(fake.ended) == 1, "one tool span closed" - span_name, parent_id, span_input = fake.started[0] - assert span_name == "Bash" - assert parent_id == "p" - closed_name, closed_output = fake.ended[0] - assert closed_name == "Bash" - - async def test_reasoning_span_opened_and_closed(self): - """A thinking/reasoning block produces start_span + end_span.""" - reasoning_events = [ - PartStartEvent(index=0, part=ThinkingPart(content="")), - PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")), - PartEndEvent(index=0, part=ThinkingPart(content="let me think")), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert len(fake.started) == 1, "one reasoning span opened" - assert len(fake.ended) == 1, "one reasoning span closed" - span_name, parent_id, _ = fake.started[0] - assert span_name == "reasoning" - assert parent_id == "p" - - async def test_no_trace_id_means_no_spans(self): - """When trace_id is None, no spans are derived even with a fake tracing backend.""" - raw_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"), - ), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert fake.started == [], "no spans when trace_id is absent" - assert fake.ended == [] - - async def test_tracer_false_suppresses_spans_even_with_trace_id(self): - """tracer=False disables span derivation regardless of trace_id.""" - raw_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"), - ), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert fake.started == [] - assert fake.ended == [] diff --git a/tests/lib/adk/test_pydantic_ai_turn.py b/tests/lib/adk/test_pydantic_ai_turn.py index 46bf247a3..c57251db6 100644 --- a/tests/lib/adk/test_pydantic_ai_turn.py +++ b/tests/lib/adk/test_pydantic_ai_turn.py @@ -233,8 +233,8 @@ async def test_no_usage_event_leaves_default_usage(self): class TestToolRequestStreaming: """PydanticAITurn.events equals the bare converter output unconditionally. - The foundation auto_send delivers Start+ToolRequestDelta+Done natively - (AGX1-377), so no coalescing is needed on either channel. + The foundation auto_send delivers Start+ToolRequestDelta+Done natively, so + no coalescing is needed on either channel. """ async def test_events_match_bare_converter_for_streamed_tool_call(self): diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py index 764dae8b3..8133a488c 100644 --- a/tests/lib/core/harness/test_auto_send.py +++ b/tests/lib/core/harness/test_auto_send.py @@ -218,7 +218,8 @@ async def test_auto_send_derives_tool_spans_via_tracer(): assert result.final_text == "" assert fake_tracing.started_names == ["Bash"] - assert fake_tracing.ended_outputs == ["ok"] + # String tool output is wrapped in a dict (SGP spans require an object). + assert fake_tracing.ended_outputs == [{"output": "ok"}] # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_claude_code_async.py b/tests/lib/core/harness/test_harness_claude_code_async.py new file mode 100644 index 000000000..c622d25c1 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_async.py @@ -0,0 +1,248 @@ +"""Integration test: async (Redis-streaming) channel with a claude-code turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + ClaudeCodeTurn) +with hand-built claude-code ``stream-json`` envelopes and a fake streaming +backend so the test runs fully offline (no claude-code CLI subprocess, no +Redis, no Agentex server). + +Native envelope shapes are copied verbatim from the claude-code turn test and +conformance fixtures (assistant tool_use -> Start(ToolRequestContent)+Done; +user tool_result -> Full(ToolResponseContent); assistant text -> +Start(TextContent)+Delta+Done; result envelope -> usage). + +What is tested +-------------- +- auto_send pushes the correct message contexts: tool_request + tool_response + + text (in that order). +- TurnResult.final_text equals the final assistant text. +- TurnResult.usage reflects the claude-code ``result`` envelope (input/output + tokens, cost, num_llm_calls from num_turns). +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real claude-code CLI subprocess / live model behaviour. + +See also: test_harness_claude_code_sync.py and test_harness_claude_code_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator + +import pytest + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.types import TurnResult +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native claude-code envelope fixtures +# --------------------------------------------------------------------------- + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_read", + "content": "# My Project — temperature 72F", + } + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + { + "type": "result", + "usage": {"input_tokens": 200, "output_tokens": 80}, + "cost_usd": 0.015, + "num_turns": 2, + }, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + envelopes: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert types[-1] == "text", f"Expected last type=text, got {types}" + + +class TestAsyncAutoSendContent: + async def test_tool_request_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)] + assert len(tool_reqs) == 1 + assert tool_reqs[0].name == "Read" + + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id == "call_read" + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_last_text(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_envelopes()) + assert result.final_text == "The project file says 72F." + + async def test_usage_from_result_envelope(self) -> None: + """TurnResult.usage reflects the claude-code result envelope.""" + result, _ = await _run_auto_send_turn(_tool_then_text_envelopes()) + assert result.usage is not None + assert result.usage.input_tokens == 200 + assert result.usage.output_tokens == 80 + assert result.usage.total_tokens == 280 + assert result.usage.cost_usd == pytest.approx(0.015) + assert result.usage.num_llm_calls == 2 + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) + assert len(opens) == len(fake_streaming.messages_opened) + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert fake_tracing.started[0][0] == "Read" + assert len(fake_tracing.ended) == 1 + assert "72F" in str(fake_tracing.ended[0][1]) diff --git a/tests/lib/core/harness/test_harness_claude_code_sync.py b/tests/lib/core/harness/test_harness_claude_code_sync.py new file mode 100644 index 000000000..b53485499 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_sync.py @@ -0,0 +1,303 @@ +"""Integration test: sync (HTTP-yield) channel with a claude-code turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + ClaudeCodeTurn) +with hand-built claude-code ``stream-json`` envelopes so the test runs fully +offline (no claude-code CLI subprocess, no API keys, no Agentex server). + +Native stream shapes +--------------------- +``ClaudeCodeTurn`` consumes an async iterator of raw claude-code stream-json +envelopes (str | dict). The envelope shapes used here are copied verbatim from +the claude-code turn test (tests/lib/adk/test_claude_code_turn.py) and the +claude-code conformance fixtures +(tests/lib/core/harness/conformance/test_claude_code_conformance.py): + + assistant text block -> Start(TextContent) + Delta + Done + assistant tool_use -> Start(ToolRequestContent) + Done + user tool_result -> Full(ToolResponseContent) + assistant thinking -> Start(ReasoningContent) + Delta + Done + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events in canonical order: + tool_request (Start+Done) -> tool_response (Full) -> text. +- The tool_response carries the tool_result content, keyed by tool_use_id. +- With a trace_id + fake tracing, the SpanDeriver opens a tool span on + Done(tool_request) and closes it on the matching Full(tool_response), and + opens/closes a reasoning span for a thinking block. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real claude-code CLI subprocess / live model behaviour. +- The full FastACP request/response lifecycle. + +See also: test_harness_claude_code_async.py and test_harness_claude_code_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, override + +from agentex.lib.core.harness.types import OpenSpan, CloseSpan +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native claude-code envelope fixtures (copied from the turn + conformance tests) +# --------------------------------------------------------------------------- + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + """tool_use -> tool_result -> final text, then a result envelope with usage.""" + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_read", + "content": "# My Project — temperature 72F", + } + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + { + "type": "result", + "usage": {"input_tokens": 100, "output_tokens": 50}, + "cost_usd": 0.01, + "num_turns": 2, + }, + ] + + +def _thinking_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + {"type": "thinking", "thinking": "Let me think.\nStep 1: check the facts."}, + {"type": "text", "text": "Here is my answer."}, + ] + }, + }, + {"type": "result", "usage": {"input_tokens": 10, "output_tokens": 5}}, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + envelopes: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_result_keyed_by_tool_use_id(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + full_responses = [ + ev.content + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0] + assert isinstance(tool_response, ToolResponseContent) + assert tool_response.tool_call_id == "call_read" + assert "72F" in str(tool_response.content) + + async def test_tool_request_is_read(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + tool_reqs = [ + ev.content + for ev in events + if isinstance(getattr(ev, "content", None), ToolRequestContent) + ] + assert any(isinstance(c, ToolRequestContent) and c.name == "Read" for c in tool_reqs) + + async def test_every_start_has_matching_done(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)} + dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)} + assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}" + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Done(tool_request) opens a tool span; Full(tool_response) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + name, parent_id, _ = fake_tracing.started[0] + assert name == "Read" + assert parent_id == "parent-span" + + async def test_tool_span_output_is_tool_result(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + name, output = fake_tracing.ended[0] + assert name == "Read" + assert "72F" in str(output) + + async def test_reasoning_span_for_thinking_block(self) -> None: + """A thinking block opens and closes a reasoning span.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _thinking_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert fake_tracing.started_names == ["reasoning"] + assert len(fake_tracing.ended) == 1 + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter(task_id="task1", trace_id=None, parent_span_id=None, tracing=fake_tracing) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_span_signal_types(self) -> None: + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent", + task_id="task1", + tracing=fake_tracing, + ) + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter(task_id="task1", trace_id="trace1", parent_span_id="parent", tracer=tracer) + [_ async for _ in emitter.yield_turn(turn)] + + tool_signals = [s for s in received_signals if getattr(s, "name", None) == "Read"] + assert len(tool_signals) >= 1 + assert isinstance(received_signals[0], OpenSpan) + assert any(isinstance(s, CloseSpan) for s in received_signals) diff --git a/tests/lib/core/harness/test_harness_claude_code_temporal.py b/tests/lib/core/harness/test_harness_claude_code_temporal.py new file mode 100644 index 000000000..b643f0d20 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_temporal.py @@ -0,0 +1,183 @@ +"""Integration test: Temporal channel with a claude-code turn, offline. + +The claude-code tap is a pure library adapter (no Temporal-specific helper such +as langgraph's ``stream_langgraph_events``). In a Temporal deployment the +claude-code CLI runs inside a Temporal activity and the resulting canonical +stream is delivered via the SAME ``UnifiedEmitter.auto_send_turn`` path used by +the non-temporal async channel. The only temporal-specific concern at the +harness boundary is that the activity stamps messages with a deterministic +``created_at`` (e.g. ``workflow.now()``) for replay determinism. + +This suite therefore exercises the auto_send path inside an activity-style call +plus the temporal-only contract: ``created_at`` is threaded through to every +streaming context. The native claude-code envelope shapes are copied verbatim +from the claude-code turn test / conformance fixtures. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text + usage from the result envelope are returned. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling / durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real claude-code CLI subprocess / live model behaviour. + +See also: test_harness_claude_code_sync.py and test_harness_claude_code_async.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator +from datetime import datetime, timezone + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + {"type": "tool_result", "tool_use_id": "call_read", "content": "# My Project — 72F"} + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + {"type": "result", "usage": {"input_tokens": 50, "output_tokens": 20}, "num_turns": 2}, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records created_at +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity( + envelopes: list[dict[str, Any]], created_at: datetime | None +) -> tuple[Any, _FakeStreaming]: + fake_streaming = _FakeStreaming() + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityDelivery: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_tool_round_trip_keyed_correctly(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id == "call_read" + + async def test_final_text_and_usage(self) -> None: + result, _ = await _run_activity(_tool_then_text_envelopes(), created_at=None) + assert result.final_text == "The project file says 72F." + assert result.usage.input_tokens == 50 + assert result.usage.num_llm_calls == 2 + + +class TestTemporalCreatedAtThreading: + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + assert len(fake_streaming.created_ats) == len(fake_streaming.messages_opened) + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_deterministic_across_runs(self) -> None: + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + assert first.created_ats == second.created_ats diff --git a/tests/lib/core/harness/test_harness_codex_async.py b/tests/lib/core/harness/test_harness_codex_async.py new file mode 100644 index 000000000..c31ebfa49 --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_async.py @@ -0,0 +1,228 @@ +"""Integration test: async (Redis-streaming) channel with a codex turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + CodexTurn) +with hand-built codex ``exec --json`` event dicts and a fake streaming backend +so the test runs fully offline (no codex CLI subprocess, no Redis, no Agentex +server). + +Native event shapes are copied verbatim from the codex turn test / conformance +fixtures (command_execution -> tool round-trip; agent_message -> text; +turn.completed -> usage). + +What is tested +-------------- +- auto_send pushes the correct message contexts: tool_request + tool_response + + text (in that order). +- TurnResult.final_text equals the final agent_message text. +- TurnResult.usage reflects the codex ``turn.completed`` usage (input/output/ + total tokens) plus the locally-counted num_tool_calls. +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real codex CLI subprocess / live model behaviour. + +See also: test_harness_codex_sync.py and test_harness_codex_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.types import TurnResult +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native codex event fixtures +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + events: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert types[-1] == "text", f"Expected last type=text, got {types}" + + +class TestAsyncAutoSendContent: + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_last_text(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.final_text == "The weather is sunny and 72F." + + async def test_usage_from_turn_completed(self) -> None: + """TurnResult.usage reflects the codex turn.completed usage + tool count.""" + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.usage is not None + assert result.usage.input_tokens == 20 + assert result.usage.output_tokens == 8 + assert result.usage.total_tokens == 28 + assert result.usage.model == "o4-mini" + assert result.usage.num_tool_calls == 1 + assert result.usage.num_llm_calls == 1 + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) + assert len(opens) == len(fake_streaming.messages_opened) + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + assert "72F" in str(fake_tracing.ended[0][1]) diff --git a/tests/lib/core/harness/test_harness_codex_sync.py b/tests/lib/core/harness/test_harness_codex_sync.py new file mode 100644 index 000000000..6129716ee --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_sync.py @@ -0,0 +1,276 @@ +"""Integration test: sync (HTTP-yield) channel with a codex turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + CodexTurn) +with hand-built codex ``exec --json`` event dicts so the test runs fully offline +(no codex CLI subprocess, no API keys, no Agentex server). + +Native stream shapes +--------------------- +``CodexTurn`` consumes an async iterator of raw codex events (str | dict). The +event shapes used here are copied verbatim from the codex turn test +(tests/lib/adk/test_codex_turn.py) and the codex conformance fixtures +(tests/lib/core/harness/conformance/test_codex_conformance.py): + + command_execution item -> Start(ToolRequestContent) + Done + Full(ToolResponseContent) + agent_message item -> Start(TextContent) + ... + Full/Done + reasoning item -> Start(ReasoningContent) + Full(ReasoningContent) + turn.completed -> usage + +Reasoning note +-------------- +The codex converter emits reasoning as Start(ReasoningContent) + deltas + Done. +The SpanDeriver opens a reasoning span on Start and closes it normally when the +Done event is observed (is_complete=True). + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events in canonical order: + tool_request (Start+Done) -> tool_response (Full) -> text. +- The tool_response carries the command output, keyed by item id. +- With a trace_id + fake tracing, a tool span is opened on Done(tool_request) + and closed on the matching Full(tool_response), and a reasoning span is + opened and closed normally for a reasoning item. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real codex CLI subprocess / live model behaviour. +- The full FastACP request/response lifecycle. + +See also: test_harness_codex_async.py and test_harness_codex_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, override + +from agentex.lib.core.harness.types import OpenSpan, CloseSpan +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageFull, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native codex event fixtures (copied from the turn + conformance tests) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[dict[str, Any]]: + """A command_execution tool round-trip followed by a final text reply.""" + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + {"type": "turn.started"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +def _reasoning_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-reason"}, + {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "r1", "type": "reasoning", "text": "Step 1: analyze\nStep 2: solve"}, + }, + {"type": "item.started", "item": {"id": "msg2", "type": "agent_message", "text": ""}}, + {"type": "item.completed", "item": {"id": "msg2", "type": "agent_message", "text": "42"}}, + {"type": "turn.completed", "usage": {"input_tokens": 30, "output_tokens": 20, "total_tokens": 50}}, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + events: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_command_output(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + full_responses = [ + ev.content + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0] + assert isinstance(tool_response, ToolResponseContent) + assert "72F" in str(tool_response.content) + + async def test_tool_request_present(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_reqs = [ + ev.content for ev in events if isinstance(getattr(ev, "content", None), ToolRequestContent) + ] + assert len(tool_reqs) == 1 + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Done(tool_request) opens a tool span; Full(tool_response) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + _name, parent_id, _input = fake_tracing.started[0] + assert parent_id == "parent-span" + + async def test_tool_span_output_is_command_output(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + _name, output = fake_tracing.ended[0] + assert "72F" in str(output) + + async def test_reasoning_span_opened_then_done_closed(self) -> None: + """A codex reasoning item emits Start+Delta+Done: the reasoning span + opens and is closed normally when the Done event is observed.""" + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent-span", + task_id="task1", + tracing=fake_tracing, + ) + turn = CodexTurn(_aiter(_reasoning_events()), model="o4-mini") + emitter = UnifiedEmitter(task_id="task1", trace_id="trace1", parent_span_id="parent-span", tracer=tracer) + [_ async for _ in emitter.yield_turn(turn)] + + opens = [s for s in received_signals if isinstance(s, OpenSpan) and s.kind == "reasoning"] + closes = [s for s in received_signals if isinstance(s, CloseSpan) and str(s.key).startswith("reasoning:")] + assert len(opens) == 1, "Reasoning Start must open exactly one reasoning span" + assert len(closes) == 1, "Reasoning span must close exactly once" + assert closes[0].is_complete is True, "Done event closes the reasoning span as complete" + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = CodexTurn(_aiter(_tool_then_text_events()), model="o4-mini") + emitter = UnifiedEmitter(task_id="task1", trace_id=None, parent_span_id=None, tracing=fake_tracing) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = CodexTurn(_aiter(_tool_then_text_events()), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] diff --git a/tests/lib/core/harness/test_harness_codex_temporal.py b/tests/lib/core/harness/test_harness_codex_temporal.py new file mode 100644 index 000000000..0af0b862b --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_temporal.py @@ -0,0 +1,180 @@ +"""Integration test: Temporal channel with a codex turn, offline. + +The codex tap is a pure library adapter (subprocess/sandbox provisioning lives +in the golden agent; there is no codex-specific temporal helper like langgraph's +``stream_langgraph_events``). In a Temporal deployment the codex CLI runs inside +a Temporal activity and the resulting canonical stream is delivered via the SAME +``UnifiedEmitter.auto_send_turn`` path used by the non-temporal async channel. +The only temporal-specific concern at the harness boundary is that the activity +stamps messages with a deterministic ``created_at`` (e.g. ``workflow.now()``) +for replay determinism. + +This suite exercises the auto_send path inside an activity-style call plus the +temporal-only contract: ``created_at`` is threaded through to every streaming +context. The native codex event shapes are copied verbatim from the codex turn +test / conformance fixtures. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text + usage from turn.completed are returned. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling / durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real codex CLI subprocess / live model behaviour. + +See also: test_harness_codex_sync.py and test_harness_codex_async.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator +from datetime import datetime, timezone + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + + +def _tool_then_text_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records created_at +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity(events: list[dict[str, Any]], created_at: datetime | None) -> tuple[Any, _FakeStreaming]: + fake_streaming = _FakeStreaming() + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityDelivery: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_tool_round_trip_keyed_correctly(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + async def test_final_text_and_usage(self) -> None: + result, _ = await _run_activity(_tool_then_text_events(), created_at=None) + assert result.final_text == "The weather is sunny and 72F." + assert result.usage.total_tokens == 28 + assert result.usage.num_tool_calls == 1 + + +class TestTemporalCreatedAtThreading: + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert len(fake_streaming.created_ats) == len(fake_streaming.messages_opened) + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_deterministic_across_runs(self) -> None: + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_events(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert first.created_ats == second.created_ats diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py index 39bf5bc66..09e92102b 100644 --- a/tests/lib/core/harness/test_harness_langgraph_async.py +++ b/tests/lib/core/harness/test_harness_langgraph_async.py @@ -13,10 +13,10 @@ -------------- - The async handler pushes the correct sequence of messages to the fake streaming backend: Full(ToolRequest) + Full(ToolResponse) + text Start/Delta/Done. -- final_text accumulates all text (not just last segment — AGX1-377 unified behavior). +- final_text accumulates all text (not just last segment — unified behavior). - Tool messages go through streaming_task_message_context (not messages.create). -- With a SpanTracer, no tool spans are produced (AGX1-377: Full events are not - handled by SpanDeriver today). +- With a SpanTracer, Full tool events produce tool spans (request opens, response + closes), aligning LangGraph tracing with the Start+Done harnesses. What is NOT covered without live infrastructure ----------------------------------------------- @@ -46,6 +46,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used # --------------------------------------------------------------------------- @@ -102,30 +104,6 @@ def streaming_task_message_context(self, task_id: str, initial_content: Any, **k return ctx -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span(self, *, trace_id: str, name: str, **kw: Any) -> _FakeSpan: - self.started.append((name, kw.get("parent_id"))) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -142,9 +120,9 @@ async def _gen(): async def _run_auto_send_turn( stream_events: list[tuple[str, Any]], trace_id: str | None = None, -) -> tuple[TurnResult, _FakeStreaming, _FakeTracing | None]: +) -> tuple[TurnResult, _FakeStreaming, FakeTracing | None]: fake_streaming = _FakeStreaming() - fake_tracing = _FakeTracing() if trace_id else None + fake_tracing = FakeTracing() if trace_id else None tracer: SpanTracer | bool = False if trace_id and fake_tracing is not None: @@ -275,7 +253,7 @@ async def test_turn_usage_populated_after_events_consumed(self): assert usage.total_tokens == 15 async def test_tracer_produces_tool_spans_for_full_events(self): - """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes). + """SpanDeriver handles Full tool events (request opens, response closes). Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it. This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents). diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py index 9f67dd2b6..67d213b6a 100644 --- a/tests/lib/core/harness/test_harness_langgraph_sync.py +++ b/tests/lib/core/harness/test_harness_langgraph_sync.py @@ -46,6 +46,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used # --------------------------------------------------------------------------- @@ -62,32 +64,6 @@ def _real_langchain_core(): sys.modules.update(saved) -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, *, trace_id: str, name: str, input: Any = None, parent_id: Any = None, **kw: Any - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -103,8 +79,8 @@ async def _gen(): async def _run_yield_turn( stream_events: list[tuple[str, Any]], trace_id: str | None = None -) -> tuple[list[Any], _FakeTracing | None]: - fake_tracing = _FakeTracing() if trace_id else None +) -> tuple[list[Any], FakeTracing | None]: + fake_tracing = FakeTracing() if trace_id else None tracer: SpanTracer | bool | None = None if trace_id and fake_tracing is not None: tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing) @@ -191,7 +167,7 @@ async def test_empty_stream_yields_nothing(self): assert out == [] async def test_tracer_produces_tool_spans_for_full_events(self): - """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes). + """SpanDeriver handles Full tool events (request opens, response closes). Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it. This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents). diff --git a/tests/lib/core/harness/test_harness_langgraph_temporal.py b/tests/lib/core/harness/test_harness_langgraph_temporal.py index 1a094a33c..219e92229 100644 --- a/tests/lib/core/harness/test_harness_langgraph_temporal.py +++ b/tests/lib/core/harness/test_harness_langgraph_temporal.py @@ -1,7 +1,7 @@ """Integration test: Temporal channel with a LangGraph agent. -The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (from -``_langgraph_messages.py``) inside a Temporal activity. That module is not +The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (now in +``_langgraph_sync.py``) inside a Temporal activity. That helper is not yet unified onto the harness surface (it has its own Redis-streaming code). This test file verifies the LangGraph Temporal agent's streaming behavior using @@ -43,8 +43,7 @@ from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn, stream_langgraph_events # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used diff --git a/tests/lib/core/harness/test_harness_openai_async.py b/tests/lib/core/harness/test_harness_openai_async.py new file mode 100644 index 000000000..1329b94b9 --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_async.py @@ -0,0 +1,305 @@ +"""Integration test: async (Redis-streaming) channel with an OpenAI-agents turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + OpenAITurn) +with hand-built canonical StreamTaskMessage* streams and a fake streaming +backend so the test runs fully offline (no API keys, no Redis, no Agentex +server). + +The canonical event shapes are copied from the OpenAI converter contract +(see tests/lib/core/harness/conformance/test_openai_conformance.py): tool calls +are Full(ToolRequestContent) + Full(ToolResponseContent); text is +Start+Delta+Done. + +What is tested +-------------- +- auto_send pushes the correct message contexts to the fake streaming backend: + tool_request + tool_response + text (in that order). +- TurnResult.final_text equals the accumulated text deltas. +- TurnResult carries a TurnUsage; via the OpenAITurn result/converter path the + aggregated token usage (input/output/total + num_llm_calls) is surfaced in + TurnResult.usage. +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real Runner.run_streamed execution / live OpenAI model behaviour. + +See also: test_harness_openai_sync.py and test_harness_openai_temporal.py. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from agents.usage import Usage + +from agentex.types.text_delta import TextDelta +from agentex.types.task_message import TaskMessage +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import TurnResult, StreamTaskMessage +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Canonical event fixtures (copied from the OpenAI converter contract) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend (replaces adk.streaming; no Redis required) +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + events: list[StreamTaskMessage], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests: message order and content +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in message_types + assert message_types.index("tool_request") < message_types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert message_types[-1] == "text", f"Expected last message type=text, got {message_types}" + + async def test_exactly_three_messages(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + assert len(fake_streaming.messages_opened) == 3, ( + f"Expected 3 messages, got {[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}" + ) + + +class TestAsyncAutoSendContentVerification: + async def test_tool_request_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)] + assert len(tool_reqs) == 1 + assert tool_reqs[0].name == "get_weather" + + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + assert tool_resps[0].name == "get_weather" + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_deltas(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.final_text == "Sunny and 72F." + + async def test_turn_result_has_usage(self) -> None: + """An injected canonical stream has no run to read usage from, so usage + carries only the model name (input_tokens stays None).""" + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.usage is not None + assert result.usage.model == "gpt-4o" + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) == 3 + + async def test_usage_populated_from_result_path(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Via the OpenAITurn result/converter path, aggregated token usage is + surfaced on TurnResult.usage after the stream is consumed. + + Mirrors the OpenAI turn test: a fake RunResultStreaming exposes + raw_responses with a Usage, and the converter is monkeypatched to a + passthrough so the canonical text stream is delivered while usage is read + from raw_responses. + """ + import agentex.lib.adk._modules._openai_turn as turn_mod + + canonical: list[StreamTaskMessage] = [ + StreamTaskMessageStart( + type="start", index=0, content=TextContent(type="text", author="agent", content="") + ), + StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="hi")), + StreamTaskMessageDone(type="done", index=0), + ] + + class _FakeResult: + def __init__(self) -> None: + self.raw_responses = [ + type("R", (), {"usage": Usage(requests=2, input_tokens=8, output_tokens=4, total_tokens=12)})() + ] + + def stream_events(self): # type: ignore[no-untyped-def] + return _canonical_stream(canonical) + + async def _passthrough(stream): # type: ignore[no-untyped-def] + async for e in stream: + yield e + + monkeypatch.setattr(turn_mod, "convert_openai_to_agentex_events", _passthrough) + + turn = OpenAITurn(result=_FakeResult(), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=_FakeStreaming(), + ) + result = await emitter.auto_send_turn(turn) + + assert result.final_text == "hi" + assert result.usage.model == "gpt-4o" + assert result.usage.num_llm_calls == 2 + assert result.usage.input_tokens == 8 + assert result.usage.output_tokens == 4 + assert result.usage.total_tokens == 12 + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert fake_tracing.started[0][0] == "get_weather" + assert len(fake_tracing.ended) == 1 diff --git a/tests/lib/core/harness/test_harness_openai_sync.py b/tests/lib/core/harness/test_harness_openai_sync.py new file mode 100644 index 000000000..34a9b72c6 --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_sync.py @@ -0,0 +1,323 @@ +"""Integration test: sync (HTTP-yield) channel with an OpenAI-agents turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + OpenAITurn) +with hand-built canonical StreamTaskMessage* streams so the test runs fully +offline (no API keys, no live OpenAI Agents run, no Agentex server). + +Why an injected canonical stream +-------------------------------- +OpenAI's native ``RunResultStreaming`` events are heavy SDK objects; the +``OpenAITurn`` accepts a pre-built canonical ``stream=`` of StreamTaskMessage* +events that bypasses ``convert_openai_to_agentex_events``. The shapes used here +are copied verbatim from the OpenAI converter contract exercised by +``tests/lib/core/harness/conformance/test_openai_conformance.py`` (tool calls +are Full(ToolRequestContent) + Full(ToolResponseContent); reasoning is +Start(ReasoningContent) + Delta + Done). This keeps the canonical stream +faithful to what the live converter produces while staying offline. + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events verbatim in canonical + order: tool_request (Full) -> tool_response (Full) -> text (Start+Delta+Done). +- Final accumulated text equals the seeded text deltas. +- With a trace_id + fake tracing, a tool span is opened (OpenSpan) on + Full(ToolRequestContent) and closed (CloseSpan) on the matching + Full(ToolResponseContent), and a reasoning span is opened/closed for a + reasoning segment — proving the SpanDeriver is wired on the yield path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real ``Runner.run_streamed`` execution / live OpenAI model behaviour. +- ``convert_openai_to_agentex_events`` over real SDK events (covered by the + OpenAI turn + conformance suites). + +See also: test_harness_openai_async.py and test_harness_openai_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, override + +from agentex.types.text_delta import TextDelta +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import OpenSpan, CloseSpan, StreamTaskMessage +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.types.reasoning_content import ReasoningContent +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn +from agentex.types.reasoning_content_delta import ReasoningContentDelta + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Canonical event fixtures (copied from the OpenAI converter contract) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + """A tool round-trip followed by a final text reply. + + Mirrors the OpenAI converter's tool path: a Full(ToolRequestContent) for the + call and a Full(ToolResponseContent) for the result (matched by tool_call_id), + then a streamed text answer. + """ + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +def _reasoning_events() -> list[StreamTaskMessage]: + """A reasoning segment: Start(ReasoningContent) + Delta + Done.""" + return [ + StreamTaskMessageStart( + type="start", + index=0, + content=ReasoningContent(type="reasoning", author="agent", summary=["Thinking..."]), + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningContentDelta(type="reasoning_content", content_index=0, content_delta="step 1"), + ), + StreamTaskMessageDone(type="done", index=0), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + events: list[StreamTaskMessage], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + """Drive the sync (yield) path and collect all yielded events.""" + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_weather_result(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + full_responses = [ + ev + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0].content + assert isinstance(tool_response, ToolResponseContent) + assert "72F" in str(tool_response.content) + assert tool_response.name == "get_weather" + + async def test_accumulated_text_matches_deltas(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + accumulated = "".join( + ev.delta.text_delta + for ev in events + if isinstance(ev, StreamTaskMessageDelta) and isinstance(ev.delta, TextDelta) and ev.delta.text_delta + ) + assert accumulated == "Sunny and 72F." + + async def test_every_start_has_matching_done(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)} + dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)} + assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}" + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + + assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened" + assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed" + name, parent_id, _ = fake_tracing.started[0] + assert name == "get_weather" + assert parent_id == "parent-span" + + async def test_tool_span_output_is_tool_result(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + name, output = fake_tracing.ended[0] + assert name == "get_weather" + assert "72F" in str(output) + + async def test_reasoning_span_opened_and_closed(self) -> None: + """A reasoning segment opens and closes a reasoning span.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _reasoning_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert fake_tracing.started_names == ["reasoning"] + assert len(fake_tracing.ended) == 1 + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_span_signal_types(self) -> None: + """The signals received by the tracer are OpenSpan then CloseSpan.""" + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent", + task_id="task1", + tracing=fake_tracing, + ) + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent", + tracer=tracer, + ) + [_ async for _ in emitter.yield_turn(turn)] + + assert len(received_signals) == 2 + assert isinstance(received_signals[0], OpenSpan) + assert isinstance(received_signals[1], CloseSpan) + assert received_signals[0].name == "get_weather" diff --git a/tests/lib/core/harness/test_harness_openai_temporal.py b/tests/lib/core/harness/test_harness_openai_temporal.py new file mode 100644 index 000000000..61cda37ef --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_temporal.py @@ -0,0 +1,195 @@ +"""Integration test: Temporal channel with an OpenAI-agents turn, offline. + +In a Temporal OpenAI deployment (see +examples/tutorials/10_async/10_temporal/120_openai_agents), the OpenAI Agents +SDK run executes inside a Temporal activity. Each turn's canonical stream is +delivered to Redis via the SAME ``UnifiedEmitter.auto_send_turn`` path used by +the non-temporal async channel — the only temporal-specific concern at the +harness boundary is that the activity stamps messages with a deterministic +``created_at`` (e.g. ``workflow.now()``) so replay is deterministic. + +There is no dedicated ``stream_openai_events`` temporal helper (unlike +langgraph's ``stream_langgraph_events``); the temporal OpenAI agent builds an +``OpenAITurn`` and calls ``auto_send_turn`` directly inside the activity. This +suite therefore exercises the auto_send path plus the temporal-only contract: +``created_at`` is threaded through to every streaming context. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text is returned from the turn. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling (workflow.signal -> activity dispatch). +- Temporal durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real Runner.run_streamed execution / live OpenAI model behaviour. + +See also: test_harness_openai_sync.py and test_harness_openai_async.py. +""" + +from __future__ import annotations + +from typing import Any +from datetime import datetime, timezone + +from agentex.types.text_delta import TextDelta +from agentex.types.task_message import TaskMessage +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import StreamTaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records the created_at it receives +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity(events: list[StreamTaskMessage], created_at: datetime | None) -> tuple[Any, _FakeStreaming]: + """Mirror the temporal activity body: build an OpenAITurn and auto_send it.""" + fake_streaming = _FakeStreaming() + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityMessageOrder: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_final_text_returned(self) -> None: + result, _ = await _run_activity(_tool_then_text_events(), created_at=None) + assert result.final_text == "Sunny and 72F." + + +class TestTemporalCreatedAtThreading: + """created_at is forwarded to every streaming context (deterministic replay).""" + + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert len(fake_streaming.created_ats) == 3 + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + """When the activity does not stamp a timestamp, contexts see None.""" + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_is_deterministic_across_runs(self) -> None: + """Two runs with the same created_at stamp identical timestamps — the + determinism the Temporal channel relies on for replay.""" + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_events(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert first.created_ats == second.created_ats + assert all(ts == fixed for ts in first.created_ats) diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py index 8bda7d020..4b6b86415 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_async.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py @@ -12,7 +12,7 @@ The async path uses the bare PydanticAITurn (no coalescing): the foundation auto_send delivers streamed tool-request Start+ToolRequestDelta+Done messages -natively (AGX1-377 fix), so no coalescing wrapper is needed. +natively, so no coalescing wrapper is needed. What is tested -------------- @@ -51,6 +51,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Minimal agent under test # --------------------------------------------------------------------------- @@ -120,39 +122,6 @@ def streaming_task_message_context( return _FakeCtx(self.sink, ctype, initial_content) -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -163,7 +132,7 @@ async def _run_auto_send_turn( user_msg: str = "What is the weather in Paris?", trace_id: str | None = None, parent_span_id: str | None = None, - fake_tracing: _FakeTracing | None = None, + fake_tracing: FakeTracing | None = None, ) -> tuple[TurnResult, _FakeStreaming]: """Drive the async (auto_send) path and return the TurnResult + fake streaming state.""" fake_streaming = _FakeStreaming() @@ -304,9 +273,9 @@ async def test_context_lifecycle_open_then_close(self) -> None: class TestAsyncAutoSendSpanDerivation: """Span derivation on the async path now works for streamed tool requests. - The foundation auto_send delivers Start+ToolRequestDelta+Done natively - (AGX1-377 fix). The SpanDeriver opens a tool span on Done(tool_request), - so the async path now derives spans just like the sync path. + The foundation auto_send delivers Start+ToolRequestDelta+Done natively. + The SpanDeriver opens a tool span on Done(tool_request), so the async path + derives spans just like the sync path. """ async def test_tool_span_derived_on_async_path(self) -> None: @@ -314,7 +283,7 @@ async def test_tool_span_derived_on_async_path(self) -> None: on the async/auto_send path when auto_send delivers the streamed Start+ToolRequestDelta+Done sequence.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent", diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py index 1557d0dd1..04beea81d 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py @@ -49,6 +49,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Minimal agent under test # --------------------------------------------------------------------------- @@ -74,39 +76,6 @@ def get_weather(city: str) -> str: return agent -# --------------------------------------------------------------------------- -# Fake tracing backend (no network calls) -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -117,7 +86,7 @@ async def _run_yield_turn( user_msg: str = "What is the weather in Paris?", trace_id: str | None = None, parent_span_id: str | None = None, - fake_tracing: _FakeTracing | None = None, + fake_tracing: FakeTracing | None = None, ) -> list[Any]: """Drive the sync (yield) path and collect all yielded events.""" tracer: SpanTracer | bool | None = None @@ -245,7 +214,7 @@ class TestSyncYieldSpanDerivation: async def test_tool_span_opened_and_closed(self) -> None: """One tool span is opened and closed per tool call.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent-span", @@ -266,14 +235,14 @@ async def test_tool_span_opened_and_closed(self) -> None: assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened" assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed" - span_name, parent_id = fake_tracing.started[0] + span_name, parent_id, _ = fake_tracing.started[0] assert span_name == "get_weather" assert parent_id == "parent-span" async def test_tool_span_output_is_tool_result(self) -> None: """The closed tool span's output equals the tool's return value.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent-span", @@ -299,7 +268,7 @@ async def test_tool_span_output_is_tool_result(self) -> None: async def test_no_trace_id_means_no_spans(self) -> None: """With trace_id=None, no spans are derived (emitter disables tracing).""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() async with agent.run_stream_events("What is the weather in Paris?") as stream: turn = PydanticAITurn(stream, model="test") @@ -317,7 +286,7 @@ async def test_no_trace_id_means_no_spans(self) -> None: async def test_tracer_false_suppresses_spans(self) -> None: """tracer=False disables span derivation regardless of trace_id.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() async with agent.run_stream_events("What is the weather in Paris?") as stream: turn = PydanticAITurn(stream, model="test") @@ -345,7 +314,7 @@ async def handle(self, signal: Any) -> None: received_signals.append(signal) await super().handle(signal) - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = _RecordingTracer( trace_id="trace1", parent_span_id="parent", diff --git a/tests/lib/core/harness/test_span_derivation.py b/tests/lib/core/harness/test_span_derivation.py index 51e2ede2c..6376dc0c6 100644 --- a/tests/lib/core/harness/test_span_derivation.py +++ b/tests/lib/core/harness/test_span_derivation.py @@ -10,6 +10,8 @@ ) from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent +from agentex.types.reasoning_content_delta import ReasoningContentDelta +from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta from agentex.lib.core.harness.span_derivation import SpanDeriver @@ -98,9 +100,86 @@ def test_reasoning_opens_on_start_closes_on_done(): ] sigs = _signals(d, events) assert sigs[0] == OpenSpan(key="reasoning:0", kind="reasoning", name="reasoning", input={}) + # No deltas -> nothing to record, so output stays None (not an empty string). assert sigs[1] == CloseSpan(key="reasoning:0", output=None, is_complete=True) +def test_reasoning_content_deltas_recorded_as_output(): + """The chain-of-thought streamed via ReasoningContentDelta lands on the + reasoning span's output (previously dropped, leaving the span blank).""" + d = SpanDeriver() + events = [ + StreamTaskMessageStart( + type="start", index=0, content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[]) + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningContentDelta(type="reasoning_content", content_index=0, content_delta="Let me "), + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningContentDelta(type="reasoning_content", content_index=0, content_delta="think."), + ), + StreamTaskMessageDone(type="done", index=0), + ] + sigs = _signals(d, events) + assert sigs[0] == OpenSpan(key="reasoning:0", kind="reasoning", name="reasoning", input={}) + assert sigs[1] == CloseSpan(key="reasoning:0", output="Let me think.", is_complete=True) + + +def test_reasoning_summary_deltas_recorded_as_output(): + """Reasoning-model summary deltas (o-series) also land on the span output.""" + d = SpanDeriver() + events = [ + StreamTaskMessageStart( + type="start", index=0, content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[]) + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningSummaryDelta(type="reasoning_summary", summary_index=0, summary_delta="Summary text"), + ), + StreamTaskMessageDone(type="done", index=0), + ] + sigs = _signals(d, events) + assert sigs[1] == CloseSpan(key="reasoning:0", output="Summary text", is_complete=True) + + +def test_reasoning_text_seeded_from_start_content(): + """A non-streaming harness that carries the full thinking on the Start + content still records it as output even with no deltas.""" + d = SpanDeriver() + events = [ + StreamTaskMessageStart( + type="start", + index=0, + content=ReasoningContent(type="reasoning", author="agent", summary=[], content=["full thought"]), + ), + StreamTaskMessageDone(type="done", index=0), + ] + sigs = _signals(d, events) + assert sigs[1] == CloseSpan(key="reasoning:0", output="full thought", is_complete=True) + + +def test_reasoning_unclosed_flushes_with_text(): + """An unclosed reasoning span flushes incomplete but still carries its text.""" + d = SpanDeriver() + events = [ + StreamTaskMessageStart( + type="start", index=0, content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[]) + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningContentDelta(type="reasoning_content", content_index=0, content_delta="partial"), + ), + ] + sigs = _signals(d, events) + assert sigs[-1] == CloseSpan(key="reasoning:0", output="partial", is_complete=False) + + def test_parallel_tools_pair_by_tool_call_id(): d = SpanDeriver() events = [ diff --git a/tests/lib/core/harness/test_tracer.py b/tests/lib/core/harness/test_tracer.py index b3d9002c4..9bd17b90c 100644 --- a/tests/lib/core/harness/test_tracer.py +++ b/tests/lib/core/harness/test_tracer.py @@ -15,7 +15,32 @@ async def test_open_then_close_starts_and_ends_span(): await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"})) await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True)) assert fake.started == [("Bash", "p1", {"cmd": "ls"})] - assert fake.ended == [("Bash", "files")] + # A plain-string output is wrapped in a dict (SGP spans require an object). + assert fake.ended == [("Bash", {"output": "files"})] + + +@pytest.mark.asyncio +async def test_non_dict_payloads_are_wrapped_in_a_dict(): + """SGP spans reject scalar input/output with a 422; the tracer wraps any + non-dict payload so reasoning spans (string output) are not dropped.""" + fake = FakeTracing() + tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) + await tracer.handle(OpenSpan(key="reasoning:0", kind="reasoning", name="reasoning", input={})) + await tracer.handle(CloseSpan(key="reasoning:0", output="chain of thought", is_complete=True)) + # Empty-dict input stays a dict; string output is wrapped. + assert fake.started == [("reasoning", "p1", {})] + assert fake.ended == [("reasoning", {"output": "chain of thought"})] + + +@pytest.mark.asyncio +async def test_dict_and_none_payloads_pass_through_unchanged(): + fake = FakeTracing() + tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) + await tracer.handle(OpenSpan(key="c", kind="tool", name="T", input={"a": 1})) + await tracer.handle(CloseSpan(key="c", output={"result": "x"}, is_complete=True)) + await tracer.handle(OpenSpan(key="d", kind="tool", name="U", input={})) + await tracer.handle(CloseSpan(key="d", output=None, is_complete=False)) + assert fake.ended == [("T", {"result": "x"}), ("U", None)] @pytest.mark.asyncio diff --git a/tests/lib/core/harness/test_yield_delivery.py b/tests/lib/core/harness/test_yield_delivery.py index ef3861a16..21c93a95c 100644 --- a/tests/lib/core/harness/test_yield_delivery.py +++ b/tests/lib/core/harness/test_yield_delivery.py @@ -42,7 +42,8 @@ async def test_yield_passes_events_through_and_traces(): out = [e async for e in yield_events(_gen(events), tracer=tracer)] assert out == events # passthrough unchanged assert fake.started_names == ["Bash"] # span derived + opened - assert fake.ended_outputs == ["ok"] # span closed with response + # String tool output is wrapped in a dict (SGP spans require an object). + assert fake.ended_outputs == [{"output": "ok"}] # span closed with response @pytest.mark.asyncio