From e71c20a38363132e61bf300b6a2724259b6f8a36 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 10:14:02 -0500 Subject: [PATCH 01/23] py(deps) Bump fastmcp 3.2.4 -> 3.4.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Unlock fastmcp 3.3/3.4 server features adopted in follow-up commits: ToolError(log_level=...) for expected-failure log demotion (fastmcp#4036) and ToolResult(is_error=True) error results (fastmcp#4217). what: - pyproject: fastmcp floor >=3.2.4 -> >=3.4.0 (lock already at 3.4.0) - CHANGES: Dependencies entry covering passive wins of the bump (template resource title preservation, no exc_info on expected tool failures, MCP-compliant OTEL span attributes) note: floor stays at 3.4.0 for now — 3.4.1 (starlette CVE-2026-48710 floor) is inside the 3-day uv dependency cooldown until 2026-06-08; a follow-up will raise the floor once it clears. --- CHANGES | 4 ++++ pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index e4e1dbe..b00df9b 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,10 @@ _Notes on upcoming releases will be added here_ +### Dependencies + +**Minimum `fastmcp>=3.4.0`** (was `>=3.2.4`). Picks up the 3.3.x/3.4.0 releases. Visible effects of the bump alone: resource titles on the `tmux://` hierarchy are now preserved when a template materializes a resource (fastmcp#4061), expected tool failures are no longer logged with a traceback by fastmcp's server layer (fastmcp#4029), and OTEL spans follow the MCP semantic conventions — `gen_ai.tool.name` / `gen_ai.prompt.name` instead of the deprecated `rpc.*` attributes (fastmcp#3889, fastmcp#3890). + ## libtmux-mcp 0.1.0a10 (2026-05-24) ### What's new diff --git a/pyproject.toml b/pyproject.toml index c5b35d0..c54436a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ include = [ dependencies = [ "libtmux>=0.58.0,<1.0", - "fastmcp>=3.2.4,<4.0.0", + "fastmcp>=3.4.0,<4.0.0", ] [project.urls] diff --git a/uv.lock b/uv.lock index e947901..854e5cf 100644 --- a/uv.lock +++ b/uv.lock @@ -1240,7 +1240,7 @@ testing = [ [package.metadata] requires-dist = [ - { name = "fastmcp", specifier = ">=3.2.4,<4.0.0" }, + { name = "fastmcp", specifier = ">=3.4.0,<4.0.0" }, { name = "libtmux", specifier = ">=0.58.0,<1.0" }, ] From 74b98b2f09e8edc57f7e2e764a6ac179d8ef5397 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 10:24:18 -0500 Subject: [PATCH 02/23] mcp(feat[errors]): Log expected tool failures at WARNING via ExpectedToolError why: Every agent-correctable failure (unknown pane id, invalid arguments, tier denial) logged at ERROR with a traceback, drowning operator logs in noise that isn't actionable. fastmcp 3.3 added ToolError(log_level=...) support (fastmcp#4036). what: - Add ExpectedToolError(ToolError) defaulting log_level=WARNING in _utils.py - _map_exception_to_tool_error: expected libtmux failure branches return ExpectedToolError; TmuxCommandNotFound (broken operator environment) and the unexpected catch-all stay at ERROR - Sweep all direct validation/state raises in tool modules and SafetyMiddleware tier denial to ExpectedToolError - Doctests: traceback class names follow the raise sites - tests: parametrized level partition for the mapper; in-memory Client + caplog proof that fastmcp's server layer honors the WARNING level (fastmcp logger needs propagate=True under caplog) --- CHANGES | 6 ++ src/libtmux_mcp/_utils.py | 54 +++++++++-- src/libtmux_mcp/middleware.py | 10 +- src/libtmux_mcp/tools/buffer_tools.py | 27 +++--- src/libtmux_mcp/tools/hook_tools.py | 6 +- src/libtmux_mcp/tools/option_tools.py | 6 +- .../tools/pane_tools/capture_since.py | 15 ++- src/libtmux_mcp/tools/pane_tools/io.py | 7 +- src/libtmux_mcp/tools/pane_tools/layout.py | 7 +- src/libtmux_mcp/tools/pane_tools/lifecycle.py | 9 +- src/libtmux_mcp/tools/pane_tools/pipe.py | 5 +- src/libtmux_mcp/tools/pane_tools/search.py | 5 +- src/libtmux_mcp/tools/pane_tools/state.py | 6 +- src/libtmux_mcp/tools/pane_tools/wait.py | 12 +-- src/libtmux_mcp/tools/server_tools.py | 3 +- src/libtmux_mcp/tools/session_tools.py | 12 +-- src/libtmux_mcp/tools/wait_for_tools.py | 17 ++-- src/libtmux_mcp/tools/window_tools.py | 6 +- tests/test_utils.py | 91 +++++++++++++++++++ 19 files changed, 218 insertions(+), 86 deletions(-) diff --git a/CHANGES b/CHANGES index b00df9b..a07f016 100644 --- a/CHANGES +++ b/CHANGES @@ -10,6 +10,12 @@ _Notes on upcoming releases will be added here_ **Minimum `fastmcp>=3.4.0`** (was `>=3.2.4`). Picks up the 3.3.x/3.4.0 releases. Visible effects of the bump alone: resource titles on the `tmux://` hierarchy are now preserved when a template materializes a resource (fastmcp#4061), expected tool failures are no longer logged with a traceback by fastmcp's server layer (fastmcp#4029), and OTEL spans follow the MCP semantic conventions — `gen_ai.tool.name` / `gen_ai.prompt.name` instead of the deprecated `rpc.*` attributes (fastmcp#3889, fastmcp#3890). +### What's new + +**Expected tool failures log at WARNING, not ERROR** + +Agent-correctable failures — unknown pane/window/session ids, invalid arguments, safety-tier denials, transient tmux errors — now log at WARNING instead of ERROR in the server's log stream. Two cases stay at ERROR so operators see them loudly: a missing tmux binary and unexpected exceptions that may indicate a bug in this server. Built on fastmcp 3.3's `ToolError(log_level=...)` support. + ## libtmux-mcp 0.1.0a10 (2026-05-24) ### What's new diff --git a/src/libtmux_mcp/_utils.py b/src/libtmux_mcp/_utils.py index b7fe0ff..1b15823 100644 --- a/src/libtmux_mcp/_utils.py +++ b/src/libtmux_mcp/_utils.py @@ -30,6 +30,38 @@ logger = logging.getLogger(__name__) +class ExpectedToolError(ToolError): + """``ToolError`` for expected, agent-correctable failures. + + Defaults the error's ``log_level`` to ``WARNING`` (honored by + fastmcp >= 3.3 when logging tool/resource failures) so routine + validation errors, missing objects, and tier denials do not surface + as ERROR records. Unexpected failures keep stock :class:`ToolError` + and its ERROR default — those are the ones operators must see. + + Examples + -------- + >>> import logging + >>> ExpectedToolError("Pane not found: %5").log_level == logging.WARNING + True + + An explicit level still wins: + + >>> err = ExpectedToolError("noisy", log_level=logging.INFO) + >>> err.log_level == logging.INFO + True + + Catch sites that handle ``ToolError`` keep working — this is a + plain subclass: + + >>> isinstance(ExpectedToolError("x"), ToolError) + True + """ + + def __init__(self, *args: object, log_level: int = logging.WARNING) -> None: + super().__init__(*args, log_level=log_level) + + @dataclasses.dataclass(frozen=True) class CallerIdentity: """Identity of the tmux pane hosting this MCP server process. @@ -725,10 +757,10 @@ def _coerce_dict_arg( decoded = json.loads(value) except (json.JSONDecodeError, ValueError) as e: msg = f"Invalid {name} JSON: {e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e if not isinstance(decoded, dict): msg = f"{name} must be a JSON object, got {type(decoded).__name__}" - raise ToolError(msg) from None + raise ExpectedToolError(msg) from None return decoded return value @@ -775,7 +807,7 @@ def _apply_filters( f"Invalid filter operator '{op}' in '{key}'. " f"Valid operators: {', '.join(valid_ops)}" ) - raise ToolError(msg) + raise ExpectedToolError(msg) filtered = items.filter(**filters) return [serializer(item) for item in filtered] @@ -922,20 +954,26 @@ def _map_exception_to_tool_error(fn_name: str, e: BaseException) -> ToolError: Shared between the sync and async ``handle_tool_errors*`` decorators so the two paths stay byte-for-byte identical in what agents see. + + Expected, agent-correctable failures map to + :class:`ExpectedToolError` (logged at WARNING). Two cases stay at + ERROR: a missing tmux binary (operator-environment fault that must + be loud) and the unexpected catch-all (potential bug in this + server). """ if isinstance(e, exc.TmuxCommandNotFound): msg = "tmux binary not found. Ensure tmux is installed and in PATH." return ToolError(msg) if isinstance(e, exc.TmuxSessionExists): - return ToolError(str(e)) + return ExpectedToolError(str(e)) if isinstance(e, exc.BadSessionName): - return ToolError(str(e)) + return ExpectedToolError(str(e)) if isinstance(e, exc.TmuxObjectDoesNotExist): - return ToolError(f"Object not found: {e}") + return ExpectedToolError(f"Object not found: {e}") if isinstance(e, exc.PaneNotFound): - return ToolError(f"Pane not found: {e}") + return ExpectedToolError(f"Pane not found: {e}") if isinstance(e, exc.LibTmuxException): - return ToolError(f"tmux error: {e}") + return ExpectedToolError(f"tmux error: {e}") logger.exception("unexpected error in MCP tool %s", fn_name) return ToolError(f"Unexpected error: {type(e).__name__}: {e}") diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index b3e0dc2..bd1ce6e 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -24,7 +24,6 @@ import time import typing as t -from fastmcp.exceptions import ToolError from fastmcp.server.middleware import Middleware, MiddlewareContext from fastmcp.server.middleware.error_handling import RetryMiddleware from fastmcp.server.middleware.response_limiting import ResponseLimitingMiddleware @@ -32,7 +31,12 @@ from libtmux import exc as libtmux_exc from mcp.types import TextContent -from libtmux_mcp._utils import TAG_DESTRUCTIVE, TAG_MUTATING, TAG_READONLY +from libtmux_mcp._utils import ( + TAG_DESTRUCTIVE, + TAG_MUTATING, + TAG_READONLY, + ExpectedToolError, +) _TIER_LEVELS: dict[str, int] = { TAG_READONLY: 0, @@ -90,7 +94,7 @@ async def on_call_tool( f"current safety level. Set LIBTMUX_SAFETY=destructive " f"to enable destructive tools." ) - raise ToolError(msg) + raise ExpectedToolError(msg) return await call_next(context) diff --git a/src/libtmux_mcp/tools/buffer_tools.py b/src/libtmux_mcp/tools/buffer_tools.py index e702c7f..c965e37 100644 --- a/src/libtmux_mcp/tools/buffer_tools.py +++ b/src/libtmux_mcp/tools/buffer_tools.py @@ -31,14 +31,13 @@ import typing as t import uuid -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( ANNOTATIONS_MUTATING, ANNOTATIONS_RO, ANNOTATIONS_SHELL, TAG_MUTATING, TAG_READONLY, + ExpectedToolError, _get_server, _resolve_pane, _tmux_argv, @@ -93,17 +92,17 @@ def _validate_logical_name(name: str) -> str: >>> _validate_logical_name("has space") Traceback (most recent call last): ... - fastmcp.exceptions.ToolError: Invalid logical buffer name: 'has space' + libtmux_mcp._utils.ExpectedToolError: Invalid logical buffer name: 'has space' >>> _validate_logical_name("with/slash") Traceback (most recent call last): ... - fastmcp.exceptions.ToolError: Invalid logical buffer name: 'with/slash' + libtmux_mcp._utils.ExpectedToolError: Invalid logical buffer name: 'with/slash' """ if name == "": return "buf" if not _LOGICAL_NAME_RE.fullmatch(name): msg = f"Invalid logical buffer name: {name!r}" - raise ToolError(msg) + raise ExpectedToolError(msg) return name @@ -122,15 +121,15 @@ def _validate_buffer_name(name: str) -> str: >>> _validate_buffer_name("clipboard") Traceback (most recent call last): ... - fastmcp.exceptions.ToolError: Invalid buffer name: 'clipboard' + libtmux_mcp._utils.ExpectedToolError: Invalid buffer name: 'clipboard' >>> _validate_buffer_name("libtmux_mcp_shortuuid_buf") Traceback (most recent call last): ... - fastmcp.exceptions.ToolError: Invalid buffer name: 'libtmux_mcp_shortuuid_buf' + libtmux_mcp._utils.ExpectedToolError: Invalid buffer name: 'libtmux_mcp_...' """ if not _BUFFER_NAME_RE.fullmatch(name): msg = f"Invalid buffer name: {name!r}" - raise ToolError(msg) + raise ExpectedToolError(msg) return name @@ -216,11 +215,11 @@ def load_buffer( subprocess.run(argv, check=True, capture_output=True, timeout=5.0) except subprocess.TimeoutExpired as e: msg = f"load-buffer timeout after 5s for {buffer_name!r}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e except subprocess.CalledProcessError as e: stderr = e.stderr.decode(errors="replace").strip() if e.stderr else "" msg = f"load-buffer failed: {stderr or e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e finally: if tmppath is not None: pathlib.Path(tmppath).unlink(missing_ok=True) @@ -317,11 +316,11 @@ def show_buffer( ) except subprocess.TimeoutExpired as e: msg = f"show-buffer timeout after 5s for {cname!r}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e except subprocess.CalledProcessError as e: stderr = e.stderr.decode(errors="replace").strip() if e.stderr else "" msg = f"show-buffer failed for {cname!r}: {stderr or e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e raw = completed.stdout.decode(errors="replace") # Preserve a possible trailing newline so round-tripping through # load_buffer/show_buffer stays byte-identical when truncation @@ -363,11 +362,11 @@ def delete_buffer( subprocess.run(argv, check=True, capture_output=True, timeout=5.0) except subprocess.TimeoutExpired as e: msg = f"delete-buffer timeout after 5s for {cname!r}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e except subprocess.CalledProcessError as e: stderr = e.stderr.decode(errors="replace").strip() if e.stderr else "" msg = f"delete-buffer failed for {cname!r}: {stderr or e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e return f"Buffer {cname!r} deleted" diff --git a/src/libtmux_mcp/tools/hook_tools.py b/src/libtmux_mcp/tools/hook_tools.py index 3b982cf..a42f4d6 100644 --- a/src/libtmux_mcp/tools/hook_tools.py +++ b/src/libtmux_mcp/tools/hook_tools.py @@ -30,13 +30,13 @@ import typing as t -from fastmcp.exceptions import ToolError from libtmux import exc as libtmux_exc from libtmux.constants import OptionScope from libtmux_mcp._utils import ( ANNOTATIONS_RO, TAG_READONLY, + ExpectedToolError, _get_server, _resolve_pane, _resolve_session, @@ -86,11 +86,11 @@ def _resolve_hook_target( if scope is not None and opt_scope is None: valid = ", ".join(sorted(_SCOPE_MAP)) msg = f"Invalid scope: {scope!r}. Valid: {valid}" - raise ToolError(msg) + raise ExpectedToolError(msg) if target is not None and opt_scope is None: msg = "scope is required when target is specified" - raise ToolError(msg) + raise ExpectedToolError(msg) if target is not None and opt_scope is not None: # Let the resolved object carry its own scope — passing scope diff --git a/src/libtmux_mcp/tools/option_tools.py b/src/libtmux_mcp/tools/option_tools.py index b4970ab..e992fd8 100644 --- a/src/libtmux_mcp/tools/option_tools.py +++ b/src/libtmux_mcp/tools/option_tools.py @@ -4,7 +4,6 @@ import typing as t -from fastmcp.exceptions import ToolError from libtmux.constants import OptionScope from libtmux_mcp._utils import ( @@ -12,6 +11,7 @@ ANNOTATIONS_RO, TAG_MUTATING, TAG_READONLY, + ExpectedToolError, _get_server, _resolve_pane, _resolve_session, @@ -44,11 +44,11 @@ def _resolve_option_target( if scope is not None and opt_scope is None: valid = ", ".join(sorted(_SCOPE_MAP)) msg = f"Invalid scope: {scope!r}. Valid: {valid}" - raise ToolError(msg) + raise ExpectedToolError(msg) if target is not None and opt_scope is None: msg = "scope is required when target is specified" - raise ToolError(msg) + raise ExpectedToolError(msg) if target is not None and opt_scope is not None: if opt_scope == OptionScope.Session: diff --git a/src/libtmux_mcp/tools/pane_tools/capture_since.py b/src/libtmux_mcp/tools/pane_tools/capture_since.py index 44ab2f6..6a1c20b 100644 --- a/src/libtmux_mcp/tools/pane_tools/capture_since.py +++ b/src/libtmux_mcp/tools/pane_tools/capture_since.py @@ -11,9 +11,8 @@ import typing as t from dataclasses import dataclass -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( + ExpectedToolError, _get_server, _resolve_pane, handle_tool_errors_async, @@ -106,7 +105,7 @@ def _raise_if_dead_without_baseline(pane: Pane, state: _PaneState) -> None: """Raise a tool error for a dead pane before a cursor exists.""" if state.pane_dead: msg = f"pane {pane.pane_id} died during pane read" - raise ToolError(msg) + raise ExpectedToolError(msg) def _read_stable_visible( @@ -306,7 +305,7 @@ def _build_cursor(pane_id: str, state: _PaneState, cursor_rows: list[str]) -> st def _raise_invalid_cursor(reason: str) -> t.NoReturn: """Raise a consistently worded invalid-cursor error.""" msg = f"invalid capture_since cursor: {reason}" - raise ToolError(msg) + raise ExpectedToolError(msg) def _cursor_str(payload: t.Mapping[str, t.Any], key: str) -> str: @@ -340,7 +339,7 @@ def _decode_cursor(cursor: str) -> _CaptureCursor: except (binascii.Error, json.JSONDecodeError, UnicodeDecodeError) as err: reason = "could not decode payload" msg = f"invalid capture_since cursor: {reason}" - raise ToolError(msg) from err + raise ExpectedToolError(msg) from err if not isinstance(payload, dict): reason = "payload is not an object" @@ -375,10 +374,10 @@ def _validate_limits(max_lines: int | None, max_bytes: int | None) -> None: """Validate caller-supplied truncation limits.""" if max_lines is not None and max_lines <= 0: msg = f"max_lines must be positive or None (received {max_lines})" - raise ToolError(msg) + raise ExpectedToolError(msg) if max_bytes is not None and max_bytes <= 0: msg = f"max_bytes must be positive or None (received {max_bytes})" - raise ToolError(msg) + raise ExpectedToolError(msg) def _encoded_size(lines: list[str]) -> int: @@ -507,7 +506,7 @@ async def capture_since( f"cursor pane {decoded.pane_id} does not match requested pane " f"{pane.pane_id}" ) - raise ToolError(msg) + raise ExpectedToolError(msg) start_time = time.monotonic() if decoded is None: diff --git a/src/libtmux_mcp/tools/pane_tools/io.py b/src/libtmux_mcp/tools/pane_tools/io.py index de209b2..65eccd6 100644 --- a/src/libtmux_mcp/tools/pane_tools/io.py +++ b/src/libtmux_mcp/tools/pane_tools/io.py @@ -8,9 +8,8 @@ import tempfile import uuid -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( + ExpectedToolError, _get_server, _resolve_pane, _tmux_argv, @@ -336,11 +335,11 @@ def paste_text( subprocess.run(load_args, check=True, capture_output=True, timeout=5.0) except subprocess.TimeoutExpired as e: msg = f"load-buffer timeout after 5s for {buffer_name!r}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e except subprocess.CalledProcessError as e: stderr = e.stderr.decode(errors="replace").strip() if e.stderr else "" msg = f"load-buffer failed: {stderr or e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e # Paste from the named buffer. ``delete_after=True`` (``-d``) # deletes only that named buffer, leaving any unnamed user diff --git a/src/libtmux_mcp/tools/pane_tools/layout.py b/src/libtmux_mcp/tools/pane_tools/layout.py index be4c74b..2f814ab 100644 --- a/src/libtmux_mcp/tools/pane_tools/layout.py +++ b/src/libtmux_mcp/tools/pane_tools/layout.py @@ -4,9 +4,8 @@ import typing as t -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( + ExpectedToolError, _get_server, _resolve_pane, _resolve_window, @@ -59,7 +58,7 @@ def resize_pane( """ if zoom is not None and (height is not None or width is not None): msg = "Cannot combine zoom with height/width" - raise ToolError(msg) + raise ExpectedToolError(msg) server = _get_server(socket_name=socket_name) pane = _resolve_pane( @@ -126,7 +125,7 @@ def select_pane( """ if pane_id is None and direction is None: msg = "Provide either pane_id or direction." - raise ToolError(msg) + raise ExpectedToolError(msg) server = _get_server(socket_name=socket_name) diff --git a/src/libtmux_mcp/tools/pane_tools/lifecycle.py b/src/libtmux_mcp/tools/pane_tools/lifecycle.py index 4eb2e0a..2123300 100644 --- a/src/libtmux_mcp/tools/pane_tools/lifecycle.py +++ b/src/libtmux_mcp/tools/pane_tools/lifecycle.py @@ -4,9 +4,8 @@ import typing as t -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( + ExpectedToolError, _caller_is_on_server, _get_caller_identity, _get_server, @@ -56,7 +55,7 @@ def kill_pane( "Refusing to kill the pane running this MCP server. " "Use a manual tmux command if intended." ) - raise ToolError(msg) + raise ExpectedToolError(msg) pane = _resolve_pane(server, pane_id=pane_id) pid = pane.pane_id @@ -149,7 +148,7 @@ def respawn_pane( "Refusing to respawn the pane running this MCP server. " "Use a manual tmux command if intended." ) - raise ToolError(msg) + raise ExpectedToolError(msg) pane.respawn( kill=kill, start_directory=start_directory, @@ -316,7 +315,7 @@ def find_pane_by_position( f"{window.window_id}. This is unusual — built-in layouts " "always have a pane at every corner." ) - raise ToolError(msg) + raise ExpectedToolError(msg) # When more than one pane qualifies (e.g. a single-pane window # touches all four edges, or an unusual layout), prefer the pane diff --git a/src/libtmux_mcp/tools/pane_tools/pipe.py b/src/libtmux_mcp/tools/pane_tools/pipe.py index d443ac7..a09d28f 100644 --- a/src/libtmux_mcp/tools/pane_tools/pipe.py +++ b/src/libtmux_mcp/tools/pane_tools/pipe.py @@ -4,9 +4,8 @@ import shlex -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( + ExpectedToolError, _get_server, _resolve_pane, handle_tool_errors, @@ -80,7 +79,7 @@ def pipe_pane( if not output_path.strip(): msg = "output_path must be a non-empty path, or None to stop piping." - raise ToolError(msg) + raise ExpectedToolError(msg) redirect = ">>" if append else ">" pane.pipe(f"cat {redirect} {shlex.quote(output_path)}") diff --git a/src/libtmux_mcp/tools/pane_tools/search.py b/src/libtmux_mcp/tools/pane_tools/search.py index cfc9512..cab6f12 100644 --- a/src/libtmux_mcp/tools/pane_tools/search.py +++ b/src/libtmux_mcp/tools/pane_tools/search.py @@ -4,9 +4,8 @@ import re -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( + ExpectedToolError, _coerce_bool, _coerce_int, _compute_is_caller, @@ -150,7 +149,7 @@ def search_panes( compiled = re.compile(search_pattern, flags) except re.error as e: msg = f"Invalid regex pattern: {e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e server = _get_server(socket_name=socket_name) diff --git a/src/libtmux_mcp/tools/pane_tools/state.py b/src/libtmux_mcp/tools/pane_tools/state.py index ea3ced5..c69f990 100644 --- a/src/libtmux_mcp/tools/pane_tools/state.py +++ b/src/libtmux_mcp/tools/pane_tools/state.py @@ -4,7 +4,7 @@ import typing as t -from fastmcp.exceptions import ToolError +from libtmux_mcp._utils import ExpectedToolError if t.TYPE_CHECKING: from libtmux.pane import Pane @@ -63,14 +63,14 @@ def _raise_if_pane_lifecycle_changed( """Raise ``ToolError`` when a cursor or wait baseline is invalid.""" if state.pane_dead: msg = f"pane {pane.pane_id} died; cursor/baseline anchor is no longer valid" - raise ToolError(msg) + raise ExpectedToolError(msg) if state.pane_pid != baseline_pid: msg = ( f"pane {pane.pane_id} was respawned " f"(pid {baseline_pid} -> {state.pane_pid}); " "cursor/baseline anchor is no longer valid" ) - raise ToolError(msg) + raise ExpectedToolError(msg) def _read_history_limit(pane: Pane) -> int: diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 1ffa7b1..b0812b1 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -10,9 +10,9 @@ import anyio from fastmcp import Context -from fastmcp.exceptions import ToolError from libtmux_mcp._utils import ( + ExpectedToolError, _get_server, _resolve_pane, handle_tool_errors_async, @@ -269,13 +269,13 @@ async def wait_for_text( """ if not pattern: msg = "pattern must be a non-empty string" - raise ToolError(msg) + raise ExpectedToolError(msg) if interval < 0.01: msg = f"interval must be at least 0.01 s (received {interval})" - raise ToolError(msg) + raise ExpectedToolError(msg) if timeout <= 0: msg = f"timeout must be positive (received {timeout})" - raise ToolError(msg) + raise ExpectedToolError(msg) search_pattern = pattern if regex else re.escape(pattern) flags = 0 if match_case else re.IGNORECASE @@ -284,7 +284,7 @@ async def wait_for_text( except re.error as e: msg = f"Invalid regex pattern: {e}" await _maybe_log(ctx, level="warning", message=msg) - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e server = _get_server(socket_name=socket_name) pane = _resolve_pane( @@ -381,7 +381,7 @@ async def wait_for_text( "re-arm wait_for_text or use wait_for_channel for " "deterministic synchronization" ) - raise ToolError(msg) + raise ExpectedToolError(msg) # The shrink guard above catches clear-history and the # entry-at-cap rollover edge. It does NOT catch # grid_collect_history trim during continuous output, where diff --git a/src/libtmux_mcp/tools/server_tools.py b/src/libtmux_mcp/tools/server_tools.py index 7f16043..55a5e77 100644 --- a/src/libtmux_mcp/tools/server_tools.py +++ b/src/libtmux_mcp/tools/server_tools.py @@ -18,6 +18,7 @@ TAG_DESTRUCTIVE, TAG_MUTATING, TAG_READONLY, + ExpectedToolError, _apply_filters, _caller_is_on_server, _coerce_dict_arg, @@ -150,7 +151,7 @@ def kill_server(socket_name: str | None = None) -> str: "Refusing to kill the tmux server while this MCP server is running " "inside it. Use a manual tmux command if intended." ) - raise ToolError(msg) + raise ExpectedToolError(msg) server.kill() _invalidate_server(socket_name=socket_name) diff --git a/src/libtmux_mcp/tools/session_tools.py b/src/libtmux_mcp/tools/session_tools.py index ca841ab..f2391ab 100644 --- a/src/libtmux_mcp/tools/session_tools.py +++ b/src/libtmux_mcp/tools/session_tools.py @@ -4,7 +4,6 @@ import typing as t -from fastmcp.exceptions import ToolError from libtmux.constants import WindowDirection from libtmux_mcp._utils import ( @@ -16,6 +15,7 @@ TAG_DESTRUCTIVE, TAG_MUTATING, TAG_READONLY, + ExpectedToolError, _apply_filters, _caller_is_on_server, _get_caller_identity, @@ -161,7 +161,7 @@ def create_window( if resolved is None: valid = ", ".join(sorted(direction_map)) msg = f"Invalid direction: {direction!r}. Valid: {valid}" - raise ToolError(msg) + raise ExpectedToolError(msg) kwargs["direction"] = resolved window = session.new_window(**kwargs) return _serialize_window(window) @@ -232,7 +232,7 @@ def kill_session( "Refusing to kill without an explicit target. " "Provide session_name or session_id." ) - raise ToolError(msg) + raise ExpectedToolError(msg) server = _get_server(socket_name=socket_name) session = _resolve_session(server, session_name=session_name, session_id=session_id) @@ -245,7 +245,7 @@ def kill_session( "Refusing to kill the session containing this MCP server's pane. " "Use a manual tmux command if intended." ) - raise ToolError(msg) + raise ExpectedToolError(msg) name = session.session_name or session.session_id session.kill() @@ -288,7 +288,7 @@ def select_window( """ if window_id is None and window_index is None and direction is None: msg = "Provide window_id, window_index, or direction." - raise ToolError(msg) + raise ExpectedToolError(msg) server = _get_server(socket_name=socket_name) @@ -319,7 +319,7 @@ def select_window( fn = _NAV.get(direction) if fn is None: msg = f"Invalid direction: {direction!r}. Valid: next, previous, last" - raise ToolError(msg) + raise ExpectedToolError(msg) active_window = fn() return _serialize_window(active_window) diff --git a/src/libtmux_mcp/tools/wait_for_tools.py b/src/libtmux_mcp/tools/wait_for_tools.py index cd7d051..14c33b8 100644 --- a/src/libtmux_mcp/tools/wait_for_tools.py +++ b/src/libtmux_mcp/tools/wait_for_tools.py @@ -35,11 +35,10 @@ import subprocess import typing as t -from fastmcp.exceptions import ToolError - from libtmux_mcp._utils import ( ANNOTATIONS_MUTATING, TAG_MUTATING, + ExpectedToolError, _get_server, _tmux_argv, handle_tool_errors_async, @@ -91,15 +90,15 @@ def _validate_channel_name(name: str) -> str: >>> _validate_channel_name("has space") Traceback (most recent call last): ... - fastmcp.exceptions.ToolError: Invalid channel name: 'has space' + libtmux_mcp._utils.ExpectedToolError: Invalid channel name: 'has space' >>> _validate_channel_name("") Traceback (most recent call last): ... - fastmcp.exceptions.ToolError: Invalid channel name: '' + libtmux_mcp._utils.ExpectedToolError: Invalid channel name: '' """ if not _CHANNEL_NAME_RE.fullmatch(name): msg = f"Invalid channel name: {name!r}" - raise ToolError(msg) + raise ExpectedToolError(msg) return name @@ -164,11 +163,11 @@ async def wait_for_channel( ) except subprocess.TimeoutExpired as e: msg = f"wait-for timeout: channel {cname!r} was not signalled within {timeout}s" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e except subprocess.CalledProcessError as e: stderr = e.stderr.decode(errors="replace").strip() if e.stderr else "" msg = f"wait-for failed for channel {cname!r}: {stderr or e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e return f"Channel {cname!r} was signalled" @@ -210,11 +209,11 @@ async def signal_channel( f"signal-channel timeout after {_SIGNAL_TIMEOUT_SECONDS}s: " f"channel {cname!r}" ) - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e except subprocess.CalledProcessError as e: stderr = e.stderr.decode(errors="replace").strip() if e.stderr else "" msg = f"signal-channel failed for channel {cname!r}: {stderr or e}" - raise ToolError(msg) from e + raise ExpectedToolError(msg) from e return f"Channel {cname!r} signalled" diff --git a/src/libtmux_mcp/tools/window_tools.py b/src/libtmux_mcp/tools/window_tools.py index e4cec2c..7dcd690 100644 --- a/src/libtmux_mcp/tools/window_tools.py +++ b/src/libtmux_mcp/tools/window_tools.py @@ -4,7 +4,6 @@ import typing as t -from fastmcp.exceptions import ToolError from libtmux.constants import PaneDirection from libtmux_mcp._utils import ( @@ -16,6 +15,7 @@ TAG_DESTRUCTIVE, TAG_MUTATING, TAG_READONLY, + ExpectedToolError, _apply_filters, _caller_is_on_server, _get_caller_identity, @@ -205,7 +205,7 @@ def split_window( if pane_dir is None: valid = ", ".join(sorted(_DIRECTION_MAP)) msg = f"Invalid direction: {direction!r}. Valid: {valid}" - raise ToolError(msg) + raise ExpectedToolError(msg) if pane_id is not None: pane = _resolve_pane(server, pane_id=pane_id) @@ -312,7 +312,7 @@ def kill_window( "Refusing to kill the window containing this MCP server's pane. " "Use a manual tmux command if intended." ) - raise ToolError(msg) + raise ExpectedToolError(msg) wid = window.window_id window.kill() diff --git a/tests/test_utils.py b/tests/test_utils.py index 7c9db4e..027a6a3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -862,3 +862,94 @@ async def _raiser() -> None: with pytest.raises(ToolError, match=r"Unexpected error: RuntimeError: boom"): asyncio.run(_raiser()) + + +# --------------------------------------------------------------------------- +# ExpectedToolError log-level tests +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "raised", + [ + exc.TmuxSessionExists("session foo already exists"), + exc.BadSessionName("bad name"), + exc.TmuxObjectDoesNotExist("@99"), + exc.PaneNotFound("%99"), + exc.LibTmuxException("server gone"), + ], + ids=lambda e: type(e).__name__, +) +def test_map_exception_expected_failures_log_at_warning( + raised: Exception, +) -> None: + """Agent-correctable libtmux failures map to WARNING-level errors.""" + import logging + + from libtmux_mcp._utils import ExpectedToolError, _map_exception_to_tool_error + + mapped = _map_exception_to_tool_error("some_tool", raised) + assert isinstance(mapped, ExpectedToolError) + assert mapped.log_level == logging.WARNING + + +@pytest.mark.parametrize( + "raised", + [ + exc.TmuxCommandNotFound("tmux missing"), + RuntimeError("boom"), + ], + ids=lambda e: type(e).__name__, +) +def test_map_exception_operator_faults_stay_at_error(raised: Exception) -> None: + """Environment faults and unexpected bugs keep the ERROR default.""" + import logging + + from libtmux_mcp._utils import ExpectedToolError, _map_exception_to_tool_error + + mapped = _map_exception_to_tool_error("some_tool", raised) + assert not isinstance(mapped, ExpectedToolError) + assert mapped.log_level == logging.ERROR + + +def test_expected_tool_error_logs_warning_through_server( + caplog: pytest.LogCaptureFixture, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Fastmcp's server-layer error log honors ``ExpectedToolError.log_level``. + + Uses a minimal FastMCP instance (no middleware stack) so the + assertion isolates fastmcp's own ``Error calling tool`` record — + the project middleware's log behavior is covered in + ``test_middleware.py``. + + fastmcp sets ``propagate = False`` on its logger (it installs a + rich handler instead), which hides records from caplog's + root-logger handler — re-enable propagation for the test. + """ + import asyncio + import logging + + from fastmcp import Client, FastMCP + + from libtmux_mcp._utils import ExpectedToolError + + monkeypatch.setattr(logging.getLogger("fastmcp"), "propagate", True) + + probe = FastMCP(name="probe") + + @probe.tool + def fail_expected() -> str: + msg = "Pane not found: %99" + raise ExpectedToolError(msg) + + async def _call() -> None: + async with Client(probe) as client: + await client.call_tool("fail_expected", raise_on_error=False) + + with caplog.at_level(logging.DEBUG): + asyncio.run(_call()) + + records = [r for r in caplog.records if "Error calling tool" in r.message] + assert records, "expected fastmcp to log the tool failure" + assert all(r.levelno == logging.WARNING for r in records) From 0f2793f067c0e2fe2278a55819dd3768985cfa65 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 10:37:21 -0500 Subject: [PATCH 03/23] mcp(feat[middleware]): Return rich ToolResult errors via ToolErrorResultMiddleware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: fastmcp's stock ErrorHandlingMiddleware(transform_errors=True) funnels every tool failure through a -32603 catch-all, so agents received 'Internal error: Pane not found: %5' — the transform mangled every expected failure message on the wire. fastmcp 3.4 added ToolResult(is_error=True) (fastmcp#4217), letting the server shape error results directly. what: - ToolErrorResultMiddleware(ErrorHandlingMiddleware) intercepts tool-call exceptions and returns ToolResult(is_error=True) carrying the message exactly as raised plus structured meta: error_type (originating exception class via __cause__), expected (agent-correctable vs operator fault/bug), suggestion (recovery hint when present) - _log_error override honors FastMCPError.log_level — the stock implementation hardcoded logger.error, re-shouting failures the ExpectedToolError demotion had moved to WARNING - Inherited on_message keeps the MCP -32002 resource-not-found transform for non-tool messages - Ordering invariant documented in the server.py stack comment: conversion must stay outside Audit/Retry/Safety, which depend on exception semantics (audit catches to record outcome=error, retry matches LibTmuxException via __cause__) - ExpectedToolError grows an optional suggestion kwarg; the not-found mapper branches point agents at the discovery tools (list_sessions / list_windows / list_panes) - docs(topics/logging): document the fastmcp.errors channel and the WARNING-expected / ERROR-operator-fault level contract - tests: probe-server coverage for clean messages, meta payload, suggestion plumbing, and per-level logging; production stack-order assertions updated to the subclass --- CHANGES | 4 + docs/topics/logging.md | 17 ++++ src/libtmux_mcp/_utils.py | 31 +++++- src/libtmux_mcp/middleware.py | 129 +++++++++++++++++++++++- src/libtmux_mcp/server.py | 17 ++-- tests/test_middleware.py | 178 ++++++++++++++++++++++++++++++++-- tests/test_utils.py | 32 ++++++ 7 files changed, 389 insertions(+), 19 deletions(-) diff --git a/CHANGES b/CHANGES index a07f016..b0c840f 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,10 @@ _Notes on upcoming releases will be added here_ Agent-correctable failures — unknown pane/window/session ids, invalid arguments, safety-tier denials, transient tmux errors — now log at WARNING instead of ERROR in the server's log stream. Two cases stay at ERROR so operators see them loudly: a missing tmux binary and unexpected exceptions that may indicate a bug in this server. Built on fastmcp 3.3's `ToolError(log_level=...)` support. +**Tool errors arrive clean, with structured detail and recovery hints** + +Failed tool calls now return their error message exactly as raised — previously every failure passed through fastmcp's stock error transform, which prefixed it with `Internal error:` (so agents saw `Internal error: Pane not found: %5`). Error results also carry a structured `_meta` payload: `error_type` names the originating exception class (`PaneNotFound`, not the wrapper), `expected` distinguishes agent-correctable failures from potential server bugs, and not-found errors include a `suggestion` pointing at the discovery tools (`list_sessions` / `list_windows` / `list_panes`) that resolve stale or guessed ids. Built on fastmcp 3.4's `ToolResult(is_error=True)` support. + ## libtmux-mcp 0.1.0a10 (2026-05-24) ### What's new diff --git a/docs/topics/logging.md b/docs/topics/logging.md index 8ad256a..3bcf742 100644 --- a/docs/topics/logging.md +++ b/docs/topics/logging.md @@ -23,6 +23,23 @@ are: {exc}`libtmux.exc.LibTmuxException`. - ``libtmux_mcp.server`` / ``libtmux_mcp.tools.*`` / etc. — ad-hoc warnings and debug messages from the codebase. +- ``fastmcp.errors`` — one record per failed tool call, emitted by + {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware`. + +## Error levels + +Tool failures are logged at a level matching who needs to act: + +- **WARNING** — expected, agent-correctable failures: unknown + pane/window/session ids, invalid arguments, safety-tier denials, + transient tmux errors. The calling agent receives the message and + can correct course; operators don't need to. +- **ERROR** — operator faults and potential bugs: a missing ``tmux`` + binary, or any unexpected exception escaping a tool. + +A WARNING-heavy, ERROR-quiet log stream is therefore healthy — it +means agents are probing and recovering. ERROR records are the ones +worth alerting on. ## Level control diff --git a/src/libtmux_mcp/_utils.py b/src/libtmux_mcp/_utils.py index 1b15823..7a9fb7f 100644 --- a/src/libtmux_mcp/_utils.py +++ b/src/libtmux_mcp/_utils.py @@ -56,10 +56,27 @@ class ExpectedToolError(ToolError): >>> isinstance(ExpectedToolError("x"), ToolError) True + + An optional ``suggestion`` carries an agent-facing recovery hint; + :class:`libtmux_mcp.middleware.ToolErrorResultMiddleware` surfaces + it in the error result's text and ``meta``: + + >>> err = ExpectedToolError("Pane not found: %5", + ... suggestion="Call list_panes to discover valid pane ids.") + >>> err.suggestion + 'Call list_panes to discover valid pane ids.' + >>> ExpectedToolError("no hint").suggestion is None + True """ - def __init__(self, *args: object, log_level: int = logging.WARNING) -> None: + def __init__( + self, + *args: object, + log_level: int = logging.WARNING, + suggestion: str | None = None, + ) -> None: super().__init__(*args, log_level=log_level) + self.suggestion = suggestion @dataclasses.dataclass(frozen=True) @@ -969,9 +986,17 @@ def _map_exception_to_tool_error(fn_name: str, e: BaseException) -> ToolError: if isinstance(e, exc.BadSessionName): return ExpectedToolError(str(e)) if isinstance(e, exc.TmuxObjectDoesNotExist): - return ExpectedToolError(f"Object not found: {e}") + return ExpectedToolError( + f"Object not found: {e}", + suggestion=( + "Call list_sessions / list_windows / list_panes to discover valid ids." + ), + ) if isinstance(e, exc.PaneNotFound): - return ExpectedToolError(f"Pane not found: {e}") + return ExpectedToolError( + f"Pane not found: {e}", + suggestion="Call list_panes to discover valid pane ids.", + ) if isinstance(e, exc.LibTmuxException): return ExpectedToolError(f"tmux error: {e}") logger.exception("unexpected error in MCP tool %s", fn_name) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index bd1ce6e..4d663a2 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -1,6 +1,6 @@ """Middleware for libtmux MCP server. -Provides three pieces of infrastructure: +Provides the project's middleware infrastructure: * :class:`SafetyMiddleware` gates tools by safety tier based on the ``LIBTMUX_SAFETY`` environment variable. Tools tagged above the @@ -9,6 +9,14 @@ invocation (name, duration, outcome, client/request ids, and a summary of arguments with payload-bearing fields redacted to a length + SHA-256 prefix). +* :class:`ReadonlyRetryMiddleware` retries transient libtmux failures, + but only for readonly tools — re-running a mutating tool would + silently double side effects. +* :class:`ToolErrorResultMiddleware` converts tool-call failures into + ``ToolResult(is_error=True)`` results that carry the clean error + message plus a structured ``meta`` payload, instead of fastmcp's + stock ``-32603`` catch-all that prefixed every expected failure + with ``"Internal error: "``. * :class:`TailPreservingResponseLimitingMiddleware` is a backstop cap for oversized tool output. Unlike FastMCP's stock ``ResponseLimitingMiddleware`` it preserves the **tail** of the @@ -25,7 +33,10 @@ import typing as t from fastmcp.server.middleware import Middleware, MiddlewareContext -from fastmcp.server.middleware.error_handling import RetryMiddleware +from fastmcp.server.middleware.error_handling import ( + ErrorHandlingMiddleware, + RetryMiddleware, +) from fastmcp.server.middleware.response_limiting import ResponseLimitingMiddleware from fastmcp.tools.base import ToolResult from libtmux import exc as libtmux_exc @@ -98,6 +109,120 @@ async def on_call_tool( return await call_next(context) +# --------------------------------------------------------------------------- +# Tool-error result conversion +# --------------------------------------------------------------------------- + + +def _error_tool_result(error: Exception) -> ToolResult: + """Build a rich ``ToolResult(is_error=True)`` from a tool failure. + + The text block carries the error message exactly as raised — no + transform-layer prefix — with the recovery ``suggestion`` appended + when the error provides one. ``meta`` mirrors the details in + machine-readable form: + + * ``error_type`` — class name of the originating exception + (``__cause__`` when the raise site chained one, so agents see + ``PaneNotFound`` rather than the ``ToolError`` wrapper). + * ``expected`` — True for agent-correctable failures + (:class:`~libtmux_mcp._utils.ExpectedToolError`), False for + operator faults and potential server bugs. + * ``suggestion`` — recovery hint, only when present. + + ``structured_content`` is deliberately left unset: tools declare + output schemas for their success payloads, and clients validate + ``structuredContent`` against them — an error-shaped payload there + would fail validation on strict clients. + """ + cause = error.__cause__ + origin = cause if cause is not None else error + meta: dict[str, t.Any] = { + "error_type": type(origin).__name__, + "expected": isinstance(error, ExpectedToolError), + } + text = str(error) + suggestion = getattr(error, "suggestion", None) + if suggestion: + meta["suggestion"] = suggestion + text = f"{text}\n{suggestion}" + return ToolResult( + content=[TextContent(type="text", text=text)], + meta=meta, + is_error=True, + ) + + +class ToolErrorResultMiddleware(ErrorHandlingMiddleware): + """Convert tool-call failures into rich ``ToolResult`` errors. + + Replaces the stock :class:`ErrorHandlingMiddleware` behavior for + ``tools/call`` only. The stock ``transform_errors=True`` path + funnels every non-MCP exception through a ``-32603`` catch-all, so + agents received ``"Internal error: Pane not found: %5"`` — the + transform mangled every expected failure message. This subclass + intercepts tool-call exceptions first (``on_call_tool`` is the + innermost hook of a middleware's chain) and returns + :func:`_error_tool_result` instead; non-tool messages fall through + to the inherited ``on_message``, preserving the MCP ``-32002`` + resource-not-found transform this middleware was originally + adopted for. + + Logging honors ``FastMCPError.log_level`` (fastmcp >= 3.3): the + expected failures demoted to WARNING by + :class:`~libtmux_mcp._utils.ExpectedToolError` no longer get + re-shouted at ERROR by the stock ``_log_error``. + + Ordering invariant: must sit **outside** ``AuditMiddleware``, + ``ReadonlyRetryMiddleware``, and ``SafetyMiddleware``. All three + depend on exception semantics — audit detects failures by catching, + retry matches ``LibTmuxException`` via ``__cause__`` — so + converting the exception to a result any deeper in the stack would + silently disable both. + """ + + def _log_error(self, error: Exception, context: MiddlewareContext) -> None: + """Log at the error's own ``log_level`` instead of a flat ERROR. + + Mirrors the stock implementation (error-count tracking, + optional traceback, error callback) but routes the record + through ``logger.log`` with ``FastMCPError.log_level`` — + defaulting to ERROR for exceptions that don't carry one. + """ + level = getattr(error, "log_level", logging.ERROR) + + error_type = type(error).__name__ + method = context.method or "unknown" + + error_key = f"{error_type}:{method}" + self.error_counts[error_key] = self.error_counts.get(error_key, 0) + 1 + + base_message = f"Error in {method}: {error_type}: {error!s}" + + if self.include_traceback: + self.logger.log(level, base_message, exc_info=True) + else: + self.logger.log(level, base_message) + + if self.error_callback: + try: + self.error_callback(error, context) + except Exception: + self.logger.exception("Error in error callback") + + async def on_call_tool( + self, + context: MiddlewareContext, + call_next: t.Any, + ) -> t.Any: + """Convert tool-call exceptions into ``is_error`` results.""" + try: + return await call_next(context) + except Exception as error: + self._log_error(error, context) + return _error_tool_result(error) + + # --------------------------------------------------------------------------- # Audit middleware # --------------------------------------------------------------------------- diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 1ff4afc..3041e18 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -12,7 +12,6 @@ import typing as t from fastmcp import FastMCP -from fastmcp.server.middleware.error_handling import ErrorHandlingMiddleware from fastmcp.server.middleware.timing import TimingMiddleware if t.TYPE_CHECKING: @@ -32,6 +31,7 @@ ReadonlyRetryMiddleware, SafetyMiddleware, TailPreservingResponseLimitingMiddleware, + ToolErrorResultMiddleware, ) from libtmux_mcp.tools.buffer_tools import _MCP_BUFFER_PREFIX @@ -276,11 +276,16 @@ def _gc_mcp_buffers(cache: t.Mapping[_ServerCacheKey, Server]) -> None: # 1. TimingMiddleware — neutral observer; start clock as early # as possible so timing captures middleware cost too. # 2. TailPreservingResponseLimitingMiddleware — bound the - # response *before* ErrorHandlingMiddleware can transform + # response *before* ToolErrorResultMiddleware can convert # exceptions; keeps the size cap independent of error path. - # 3. ErrorHandlingMiddleware — transforms resource errors to - # MCP code -32002; sits inside so it wraps the audit + safety - # pair. + # 3. ToolErrorResultMiddleware — converts tool-call failures to + # rich ToolResult(is_error=True) results and transforms + # resource errors to MCP code -32002. Must stay OUTSIDE the + # audit + retry + safety trio: all three depend on exception + # semantics (audit catches to record outcome=error, retry + # matches LibTmuxException via __cause__), so converting the + # exception to a result any deeper would silently disable + # both. # 4. AuditMiddleware — outside SafetyMiddleware so tier-denial # events (which raise ToolError before call_next inside # Safety) are still logged with outcome=error. Without this @@ -299,7 +304,7 @@ def _gc_mcp_buffers(cache: t.Mapping[_ServerCacheKey, Server]) -> None: max_size=DEFAULT_RESPONSE_LIMIT_BYTES, tools=_RESPONSE_LIMITED_TOOLS, ), - ErrorHandlingMiddleware(transform_errors=True), + ToolErrorResultMiddleware(transform_errors=True), AuditMiddleware(), ReadonlyRetryMiddleware(), SafetyMiddleware(max_tier=_safety_level), diff --git a/tests/test_middleware.py b/tests/test_middleware.py index d5ebe36..a26e84d 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -366,7 +366,6 @@ def test_server_middleware_stack_order() -> None: calls are audited once each (Audit wraps the retry loop) and tier-denied tools never reach retry (Safety stops them first). """ - from fastmcp.server.middleware.error_handling import ErrorHandlingMiddleware from fastmcp.server.middleware.timing import TimingMiddleware from libtmux_mcp.middleware import ( @@ -374,6 +373,7 @@ def test_server_middleware_stack_order() -> None: ReadonlyRetryMiddleware, SafetyMiddleware, TailPreservingResponseLimitingMiddleware, + ToolErrorResultMiddleware, ) from libtmux_mcp.server import mcp @@ -384,7 +384,7 @@ def test_server_middleware_stack_order() -> None: assert types[:6] == [ TimingMiddleware, TailPreservingResponseLimitingMiddleware, - ErrorHandlingMiddleware, + ToolErrorResultMiddleware, AuditMiddleware, ReadonlyRetryMiddleware, SafetyMiddleware, @@ -392,12 +392,14 @@ def test_server_middleware_stack_order() -> None: def test_error_handling_middleware_transforms_errors() -> None: - """ErrorHandlingMiddleware is configured with transform_errors=True. - - Regression guard: without ``transform_errors=True`` the middleware - would still log but not map resource errors to MCP error code - ``-32002``, which is the protocol-correctness point of adopting - this middleware in the first place. + """ToolErrorResultMiddleware is configured with transform_errors=True. + + Regression guard: without ``transform_errors=True`` the inherited + ``on_message`` would still log but not map resource errors to MCP + error code ``-32002``, which is the protocol-correctness point of + keeping the ErrorHandlingMiddleware base for non-tool messages + (tool-call errors are converted to ``is_error`` results before + they reach ``on_message``). """ from fastmcp.server.middleware.error_handling import ErrorHandlingMiddleware @@ -647,3 +649,163 @@ def test_readonly_retry_logger_uses_project_namespace() -> None: """ middleware = ReadonlyRetryMiddleware() assert middleware._retry.logger.name == "libtmux_mcp.retry" + + +# --------------------------------------------------------------------------- +# ToolErrorResultMiddleware tests +# --------------------------------------------------------------------------- + + +def _error_probe_server() -> t.Any: + """Build a minimal FastMCP instance wired like the production stack. + + Only ``ToolErrorResultMiddleware`` is installed — the assertions + target error-result conversion, not the audit/retry/safety trio. + Tools mirror the three production raise shapes: an expected + failure with a chained cause (decorator-mapped libtmux error), an + expected failure with a recovery suggestion, and an unexpected + crash. + """ + from fastmcp import FastMCP + from libtmux import exc as libtmux_exc + + from libtmux_mcp._utils import ExpectedToolError, _map_exception_to_tool_error + from libtmux_mcp.middleware import ToolErrorResultMiddleware + + probe = FastMCP( + name="error-probe", + middleware=[ToolErrorResultMiddleware(transform_errors=True)], + ) + + @probe.tool + def fail_expected_chained() -> str: + # Mirror the ``handle_tool_errors`` decorator's mapping shape: + # ``raise from ``. + cause = libtmux_exc.PaneNotFound("%99") + mapped = _map_exception_to_tool_error("fail_expected_chained", cause) + raise mapped from cause + + @probe.tool + def fail_expected_with_suggestion() -> str: + msg = "Pane not found: %99" + raise ExpectedToolError( + msg, + suggestion="Call list_panes to discover valid pane ids.", + ) + + @probe.tool + def fail_unexpected() -> str: + msg = "boom" + raise RuntimeError(msg) + + return probe + + +def test_tool_error_result_expected_failure_is_clean() -> None: + """Expected failures surface verbatim — no transform-layer prefix. + + Regression guard for the stock ``ErrorHandlingMiddleware`` + behavior this middleware replaces: its ``-32603`` catch-all + rewrote every tool failure to ``"Internal error: "``, + mangling the agent-facing text. + """ + from fastmcp import Client + + probe = _error_probe_server() + + async def _call() -> t.Any: + async with Client(probe) as client: + return await client.call_tool("fail_expected_chained", raise_on_error=False) + + result = asyncio.run(_call()) + + assert result.is_error is True + text = result.content[0].text + assert text.startswith("Pane not found:") + assert "Internal error" not in text + + +def test_tool_error_result_meta_carries_error_details() -> None: + """``meta`` reports the originating exception class and expectedness. + + ``error_type`` names the ``__cause__`` when the raise site chained + one — agents see ``PaneNotFound``, not the ``ToolError`` wrapper. + """ + from fastmcp import Client + + probe = _error_probe_server() + + async def _call(name: str) -> t.Any: + async with Client(probe) as client: + return await client.call_tool(name, raise_on_error=False) + + chained = asyncio.run(_call("fail_expected_chained")) + assert chained.is_error is True + meta = chained.meta or {} + assert meta["error_type"] == "PaneNotFound" + assert meta["expected"] is True + + crashed = asyncio.run(_call("fail_unexpected")) + assert crashed.is_error is True + crash_meta = crashed.meta or {} + assert crash_meta["error_type"] == "RuntimeError" + assert crash_meta["expected"] is False + + +def test_tool_error_result_appends_suggestion() -> None: + """A ``suggestion`` lands in both the text block and ``meta``.""" + from fastmcp import Client + + probe = _error_probe_server() + + async def _call() -> t.Any: + async with Client(probe) as client: + return await client.call_tool( + "fail_expected_with_suggestion", raise_on_error=False + ) + + result = asyncio.run(_call()) + + assert result.is_error is True + text = result.content[0].text + assert text == ("Pane not found: %99\nCall list_panes to discover valid pane ids.") + meta = getattr(result, "meta", None) or {} + assert meta["suggestion"] == "Call list_panes to discover valid pane ids." + + +def test_tool_error_result_logs_at_error_log_level( + caplog: pytest.LogCaptureFixture, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """``_log_error`` honors ``log_level``: WARNING expected, ERROR not. + + The stock ``ErrorHandlingMiddleware._log_error`` hardcodes + ``logger.error`` — without the override, every failure demoted to + WARNING by ``ExpectedToolError`` would be re-shouted at ERROR on + the ``fastmcp.errors`` channel. + """ + from fastmcp import Client + + # fastmcp's logger doesn't propagate to root (rich handler); + # re-enable propagation so caplog sees the records. + monkeypatch.setattr(logging.getLogger("fastmcp"), "propagate", True) + + probe = _error_probe_server() + + async def _call(name: str) -> None: + async with Client(probe) as client: + await client.call_tool(name, raise_on_error=False) + + with caplog.at_level(logging.DEBUG, logger="fastmcp.errors"): + asyncio.run(_call("fail_expected_chained")) + asyncio.run(_call("fail_unexpected")) + + records = { + r.message: r.levelno + for r in caplog.records + if r.name == "fastmcp.errors" and "Error in tools/call" in r.message + } + expected_records = [v for k, v in records.items() if "Pane not found" in k] + unexpected_records = [v for k, v in records.items() if "boom" in k] + assert expected_records == [logging.WARNING] + assert unexpected_records == [logging.ERROR] diff --git a/tests/test_utils.py b/tests/test_utils.py index 027a6a3..3b25f69 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -953,3 +953,35 @@ async def _call() -> None: records = [r for r in caplog.records if "Error calling tool" in r.message] assert records, "expected fastmcp to log the tool failure" assert all(r.levelno == logging.WARNING for r in records) + + +@pytest.mark.parametrize( + ("raised", "expected_suggestion_fragment"), + [ + (exc.TmuxObjectDoesNotExist("@99"), "list_sessions / list_windows"), + (exc.PaneNotFound("%99"), "list_panes"), + (exc.TmuxSessionExists("dup"), None), + (exc.BadSessionName("bad:name"), None), + (exc.LibTmuxException("transient"), None), + ], + ids=lambda v: type(v).__name__ if isinstance(v, Exception) else str(v), +) +def test_map_exception_suggestion_policy( + raised: Exception, + expected_suggestion_fragment: str | None, +) -> None: + """Only the not-found branches carry agent-facing recovery hints. + + Discovery tools are the canonical fix for stale/guessed ids — the + most common agent mistake. The other expected branches stay + hint-free until real transcripts show agents flailing on them. + """ + from libtmux_mcp._utils import _map_exception_to_tool_error + + mapped = _map_exception_to_tool_error("some_tool", raised) + suggestion = getattr(mapped, "suggestion", None) + if expected_suggestion_fragment is None: + assert suggestion is None + else: + assert suggestion is not None + assert expected_suggestion_fragment in suggestion From c4d8d8ea97550d546eb6ebee3ef5613571991306 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 10:49:42 -0500 Subject: [PATCH 04/23] docs(CHANGES) fastmcp 3.4 error-handling release notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Tighten the unreleased entries to product level now that the work has a pull request to link. what: - Trim upstream fastmcp PR numbers, OTEL attribute names, and fastmcp API mechanics from the Dependencies and What's new entries — that depth lives in the linked PR - Add the PR reference to both What's new entries - Lead What's new with the clean-error-results headline; point discovery-tool mentions through tooliconl roles --- CHANGES | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGES b/CHANGES index b0c840f..b0d3d0a 100644 --- a/CHANGES +++ b/CHANGES @@ -8,17 +8,17 @@ _Notes on upcoming releases will be added here_ ### Dependencies -**Minimum `fastmcp>=3.4.0`** (was `>=3.2.4`). Picks up the 3.3.x/3.4.0 releases. Visible effects of the bump alone: resource titles on the `tmux://` hierarchy are now preserved when a template materializes a resource (fastmcp#4061), expected tool failures are no longer logged with a traceback by fastmcp's server layer (fastmcp#4029), and OTEL spans follow the MCP semantic conventions — `gen_ai.tool.name` / `gen_ai.prompt.name` instead of the deprecated `rpc.*` attributes (fastmcp#3889, fastmcp#3890). +**Minimum `fastmcp>=3.4.0`** (was `>=3.2.4`). Enables the error-result and log-level improvements below; the bump alone also restores resource titles on the `tmux://` hierarchy and brings MCP-compliant telemetry span attributes. ### What's new -**Expected tool failures log at WARNING, not ERROR** +**Tool errors arrive clean, with structured detail and recovery hints** -Agent-correctable failures — unknown pane/window/session ids, invalid arguments, safety-tier denials, transient tmux errors — now log at WARNING instead of ERROR in the server's log stream. Two cases stay at ERROR so operators see them loudly: a missing tmux binary and unexpected exceptions that may indicate a bug in this server. Built on fastmcp 3.3's `ToolError(log_level=...)` support. +Failed tool calls now return their message exactly as raised — previously every failure was prefixed with `Internal error:`. Error results carry a structured `_meta` payload (`error_type`, `expected`, `suggestion`), and not-found errors point at {tooliconl}`list-sessions` / {tooliconl}`list-windows` / {tooliconl}`list-panes` to resolve stale or guessed ids. (#64) -**Tool errors arrive clean, with structured detail and recovery hints** +**Expected tool failures log at WARNING, not ERROR** -Failed tool calls now return their error message exactly as raised — previously every failure passed through fastmcp's stock error transform, which prefixed it with `Internal error:` (so agents saw `Internal error: Pane not found: %5`). Error results also carry a structured `_meta` payload: `error_type` names the originating exception class (`PaneNotFound`, not the wrapper), `expected` distinguishes agent-correctable failures from potential server bugs, and not-found errors include a `suggestion` pointing at the discovery tools (`list_sessions` / `list_windows` / `list_panes`) that resolve stale or guessed ids. Built on fastmcp 3.4's `ToolResult(is_error=True)` support. +Agent-correctable failures — unknown ids, invalid arguments, safety-tier denials, transient tmux errors — now log at WARNING in the server's log stream. A missing tmux binary and unexpected exceptions stay at ERROR, so ERROR records are always worth attention. (#64) ## libtmux-mcp 0.1.0a10 (2026-05-24) From 127b3aa3986b54d9e11bb8eb9d93b770fb217e9b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 11:57:27 -0500 Subject: [PATCH 05/23] mcp(fix[middleware]): Lazy %-formatting in ToolErrorResultMiddleware._log_error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: AGENTS.md logging standard prescribes lazy %-formatting over f-strings in log calls — deferred interpolation when the level is filtered, and aggregator message-template grouping. The new _log_error pre-formatted its message with an f-string; flagged in the PR code review. what: - Pass the template and args to logger.log directly; the stock include_traceback if/else collapses into exc_info= - tests: convert the log-level test to the NamedTuple + test_id parametrize pattern and assert via record.getMessage(), which both interpolates lazily-formatted records and would fail on a literal %s leaking into the message --- src/libtmux_mcp/middleware.py | 17 ++++++---- tests/test_middleware.py | 61 ++++++++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 4d663a2..7ee9342 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -197,12 +197,17 @@ def _log_error(self, error: Exception, context: MiddlewareContext) -> None: error_key = f"{error_type}:{method}" self.error_counts[error_key] = self.error_counts.get(error_key, 0) + 1 - base_message = f"Error in {method}: {error_type}: {error!s}" - - if self.include_traceback: - self.logger.log(level, base_message, exc_info=True) - else: - self.logger.log(level, base_message) + # Lazy %-formatting (project logging standard) — also collapses + # the stock implementation's include_traceback branch, since + # ``exc_info`` accepts a bool. + self.logger.log( + level, + "Error in %s: %s: %s", + method, + error_type, + error, + exc_info=self.include_traceback, + ) if self.error_callback: try: diff --git a/tests/test_middleware.py b/tests/test_middleware.py index a26e84d..e9bb5bc 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -773,7 +773,41 @@ async def _call() -> t.Any: assert meta["suggestion"] == "Call list_panes to discover valid pane ids." +class ErrorLogLevelFixture(t.NamedTuple): + """Test fixture for ToolErrorResultMiddleware._log_error levels.""" + + test_id: str + tool_name: str + expected_level: int + message_fragment: str + + +ERROR_LOG_LEVEL_FIXTURES: list[ErrorLogLevelFixture] = [ + ErrorLogLevelFixture( + test_id="expected_failure_logs_warning", + tool_name="fail_expected_chained", + expected_level=logging.WARNING, + message_fragment="Pane not found", + ), + ErrorLogLevelFixture( + test_id="unexpected_failure_logs_error", + tool_name="fail_unexpected", + expected_level=logging.ERROR, + message_fragment="boom", + ), +] + + +@pytest.mark.parametrize( + ErrorLogLevelFixture._fields, + ERROR_LOG_LEVEL_FIXTURES, + ids=[f.test_id for f in ERROR_LOG_LEVEL_FIXTURES], +) def test_tool_error_result_logs_at_error_log_level( + test_id: str, + tool_name: str, + expected_level: int, + message_fragment: str, caplog: pytest.LogCaptureFixture, monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -783,6 +817,11 @@ def test_tool_error_result_logs_at_error_log_level( ``logger.error`` — without the override, every failure demoted to WARNING by ``ExpectedToolError`` would be re-shouted at ERROR on the ``fastmcp.errors`` channel. + + Assertions go through ``record.getMessage()`` so the lazy + %-formatting args are interpolated regardless of whether a handler + formatted the record — and a literal ``%s`` leaking into the + message would fail the fragment match. """ from fastmcp import Client @@ -792,20 +831,18 @@ def test_tool_error_result_logs_at_error_log_level( probe = _error_probe_server() - async def _call(name: str) -> None: + async def _call() -> None: async with Client(probe) as client: - await client.call_tool(name, raise_on_error=False) + await client.call_tool(tool_name, raise_on_error=False) with caplog.at_level(logging.DEBUG, logger="fastmcp.errors"): - asyncio.run(_call("fail_expected_chained")) - asyncio.run(_call("fail_unexpected")) + asyncio.run(_call()) - records = { - r.message: r.levelno + levels = [ + r.levelno for r in caplog.records - if r.name == "fastmcp.errors" and "Error in tools/call" in r.message - } - expected_records = [v for k, v in records.items() if "Pane not found" in k] - unexpected_records = [v for k, v in records.items() if "boom" in k] - assert expected_records == [logging.WARNING] - assert unexpected_records == [logging.ERROR] + if r.name == "fastmcp.errors" + and "Error in tools/call" in r.getMessage() + and message_fragment in r.getMessage() + ] + assert levels == [expected_level] From 5bcddf61a60cdeca1c43016bbaaf66cebae43db3 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 12:01:24 -0500 Subject: [PATCH 06/23] docs(middleware): Order module docstring bullets by definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The module docstring listed ToolErrorResultMiddleware after ReadonlyRetryMiddleware, but the class is defined directly after SafetyMiddleware — a reader scanning the file by the docstring's order lands in the wrong place. Flagged in the PR code review. what: - Reorder the bullet list to match file definition order and say so in the lead-in (Safety, ToolErrorResult, Audit, ReadonlyRetry, TailPreserving) --- src/libtmux_mcp/middleware.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 7ee9342..439e66c 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -1,10 +1,15 @@ """Middleware for libtmux MCP server. -Provides the project's middleware infrastructure: +Provides the project's middleware infrastructure, in definition order: * :class:`SafetyMiddleware` gates tools by safety tier based on the ``LIBTMUX_SAFETY`` environment variable. Tools tagged above the configured tier are hidden from listing and blocked from execution. +* :class:`ToolErrorResultMiddleware` converts tool-call failures into + ``ToolResult(is_error=True)`` results that carry the clean error + message plus a structured ``meta`` payload, instead of fastmcp's + stock ``-32603`` catch-all that prefixed every expected failure + with ``"Internal error: "``. * :class:`AuditMiddleware` emits a structured log record for each tool invocation (name, duration, outcome, client/request ids, and a summary of arguments with payload-bearing fields redacted to a @@ -12,11 +17,6 @@ * :class:`ReadonlyRetryMiddleware` retries transient libtmux failures, but only for readonly tools — re-running a mutating tool would silently double side effects. -* :class:`ToolErrorResultMiddleware` converts tool-call failures into - ``ToolResult(is_error=True)`` results that carry the clean error - message plus a structured ``meta`` payload, instead of fastmcp's - stock ``-32603`` catch-all that prefixed every expected failure - with ``"Internal error: "``. * :class:`TailPreservingResponseLimitingMiddleware` is a backstop cap for oversized tool output. Unlike FastMCP's stock ``ResponseLimitingMiddleware`` it preserves the **tail** of the From f667ac4133823bfdc862b863c1f99c410aa6cadf Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 12:04:43 -0500 Subject: [PATCH 07/23] docs(_utils): Document ExpectedToolError mapping in error decorators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The handle_tool_errors docstrings still said failures re-raise as plain ToolError; the mapper now classifies expected failures as ExpectedToolError at WARNING — the docstrings were silent on the level partition and on why the re-raise chains from e. Flagged in the PR code review. what: - Document the ExpectedToolError(WARNING) / ToolError(ERROR) partition on both decorators - State the single-level __cause__ contract at the raise site: ReadonlyRetryMiddleware inspects exactly one cause hop, so re-wrapping the mapped error would silently disable readonly retries --- src/libtmux_mcp/_utils.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/libtmux_mcp/_utils.py b/src/libtmux_mcp/_utils.py index 7a9fb7f..a2324f1 100644 --- a/src/libtmux_mcp/_utils.py +++ b/src/libtmux_mcp/_utils.py @@ -1008,8 +1008,19 @@ def handle_tool_errors( ) -> t.Callable[P, R]: """Decorate synchronous MCP tool functions with standardized error handling. - Catches libtmux exceptions and re-raises as ``ToolError`` so that - MCP responses have ``isError=True`` with a descriptive message. + Catches libtmux exceptions and re-raises them through + :func:`_map_exception_to_tool_error` so MCP responses have + ``isError=True`` with a descriptive message — expected, + agent-correctable failures as :class:`ExpectedToolError` (logged + at WARNING), the unexpected catch-all as stock ``ToolError`` + (logged at ERROR). + + The re-raise chains the original exception via ``from e``. Keep it + single-level: :class:`~libtmux_mcp.middleware.ReadonlyRetryMiddleware` + matches :exc:`libtmux.exc.LibTmuxException` by inspecting exactly + one ``__cause__`` hop, so wrapping the mapped error again would + silently disable readonly retries. + Use :func:`handle_tool_errors_async` for ``async def`` tools — this wrapper only supports plain sync callables. """ @@ -1036,8 +1047,12 @@ def handle_tool_errors_async( ``report_progress``/``elicit``/``read_resource`` methods are coroutines that only run inside ``async def`` tools. - Maps the same libtmux exception set to the same ``ToolError`` - messages as the sync decorator by delegating to a shared helper. + Maps the same libtmux exception set to the same messages and + error classes as the sync decorator (expected failures as + :class:`ExpectedToolError` at WARNING, the unexpected catch-all as + stock ``ToolError`` at ERROR) by delegating to a shared helper, + and chains the original exception via the same single-level + ``from e`` that readonly retries depend on. """ @functools.wraps(fn) From 20813284a2a40a0fde694be9d88aafaecd06120c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 12:23:01 -0500 Subject: [PATCH 08/23] docs(topics/architecture): Document the full middleware stack and two-layer errors why: The request-flow diagram showed only SafetyMiddleware and the error-handling section described a single decorator-to-ToolError step; the branch's error redesign (classification in the decorator, conversion to is_error results in ToolErrorResultMiddleware) made both descriptions materially incomplete. what: - Request flow: list all six middleware outermost-first with a one-phrase role each; point at the server.py stack comment for the ordering rationale - Error handling: describe the classification/conversion split, the from-e cause chain retries depend on, and the exceptions-stay- exceptions invariant through the audit/retry/safety trio; link topics/logging for the level policy - Source layout: middleware.py one-liner covers all middleware, not just the safety tier --- docs/topics/architecture.md | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/topics/architecture.md b/docs/topics/architecture.md index 05d536d..19ef212 100644 --- a/docs/topics/architecture.md +++ b/docs/topics/architecture.md @@ -13,7 +13,7 @@ src/libtmux_mcp/ server.py # FastMCP instance and configuration _utils.py # Server caching, resolvers, serializers, error handling models.py # Pydantic output models - middleware.py # Safety tier middleware + middleware.py # Safety, audit, retry, and error-result middleware tools/ server_tools.py # list_sessions, create_session, kill_server, get_server_info session_tools.py # list_windows, create_window, rename_session, kill_session @@ -27,14 +27,22 @@ src/libtmux_mcp/ ## Request flow +Middleware wraps tool calls outermost-first (full ordering rationale in +the `server.py` stack comment): + ``` MCP Client (Claude, Cursor, etc.) → stdio transport → FastMCP server (server.py) - → SafetyMiddleware (middleware.py) - → Tool function (tools/*.py) - → libtmux Server/Session/Window/Pane - → tmux binary (via subprocess) + → TimingMiddleware (wall-time observer) + → TailPreservingResponseLimitingMiddleware (response size backstop) + → ToolErrorResultMiddleware (exceptions → is_error results) + → AuditMiddleware (one log record per call) + → ReadonlyRetryMiddleware (retries readonly tools only) + → SafetyMiddleware (tier gate, fail-closed) + → Tool function (tools/*.py) + → libtmux Server/Session/Window/Pane + → tmux binary (via subprocess) ``` ## Key design decisions @@ -60,7 +68,12 @@ Tools use resolver functions (`_resolve_session`, `_resolve_window`, `_resolve_p ### Error handling -The `@handle_tool_errors` decorator wraps all tool functions, catching libtmux exceptions and converting them to `ToolError` with descriptive messages. +Two layers split the work: + +1. **Classification** — the `@handle_tool_errors` decorator wraps all tool functions, mapping libtmux exceptions to `ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets `ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. +2. **Conversion** — `ToolErrorResultMiddleware` catches the exception at the outer edge of the stack and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). + +Errors must stay exceptions through the audit/retry/safety trio — audit detects failures by catching, retry matches via `__cause__` — so conversion happens only in the outermost error layer. Level policy lives in {doc}`/topics/logging`. ## References From 9f831e7770bb86651f1169475425ddfb6c9ff364 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 12:25:16 -0500 Subject: [PATCH 09/23] docs(src): Name ExpectedToolError in Raises sections at converted raise sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Seven docstrings still named ToolError where every raise site now raises ExpectedToolError — true only by subclassing, and the rendered API reference loses the expected/unexpected distinction the error redesign introduced. what: - Raises sections: _coerce_dict_arg and _apply_filters (_utils), _validate_channel_name and wait_for_channel (wait_for_tools), find_pane_by_position (pane_tools/lifecycle) - Prose: _raise_if_pane_lifecycle_changed one-liner (pane_tools/state), wait_for_content_change baseline-loss note (pane_tools/wait) --- src/libtmux_mcp/_utils.py | 4 ++-- src/libtmux_mcp/tools/pane_tools/lifecycle.py | 2 +- src/libtmux_mcp/tools/pane_tools/state.py | 2 +- src/libtmux_mcp/tools/pane_tools/wait.py | 2 +- src/libtmux_mcp/tools/wait_for_tools.py | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libtmux_mcp/_utils.py b/src/libtmux_mcp/_utils.py index a2324f1..7ed441b 100644 --- a/src/libtmux_mcp/_utils.py +++ b/src/libtmux_mcp/_utils.py @@ -763,7 +763,7 @@ def _coerce_dict_arg( Raises ------ - ToolError + ExpectedToolError If ``value`` is a string that is not valid JSON, or decodes to a JSON value that is not an object. """ @@ -807,7 +807,7 @@ def _apply_filters( Raises ------ - ToolError + ExpectedToolError If a filter key uses an invalid lookup operator. """ coerced = _coerce_dict_arg("filters", filters) diff --git a/src/libtmux_mcp/tools/pane_tools/lifecycle.py b/src/libtmux_mcp/tools/pane_tools/lifecycle.py index 2123300..a522b26 100644 --- a/src/libtmux_mcp/tools/pane_tools/lifecycle.py +++ b/src/libtmux_mcp/tools/pane_tools/lifecycle.py @@ -288,7 +288,7 @@ def find_pane_by_position( Raises ------ - ToolError + ExpectedToolError If no pane satisfies both edge predicates for that corner — in practice only possible for layouts tmux itself produced via custom layout strings; the built-in layouts always have a pane diff --git a/src/libtmux_mcp/tools/pane_tools/state.py b/src/libtmux_mcp/tools/pane_tools/state.py index c69f990..5ce5086 100644 --- a/src/libtmux_mcp/tools/pane_tools/state.py +++ b/src/libtmux_mcp/tools/pane_tools/state.py @@ -60,7 +60,7 @@ def _read_pane_state(pane: Pane) -> _PaneState: def _raise_if_pane_lifecycle_changed( pane: Pane, state: _PaneState, baseline_pid: str ) -> None: - """Raise ``ToolError`` when a cursor or wait baseline is invalid.""" + """Raise ``ExpectedToolError`` when a cursor or wait baseline is invalid.""" if state.pane_dead: msg = f"pane {pane.pane_id} died; cursor/baseline anchor is no longer valid" raise ExpectedToolError(msg) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index b0812b1..76589de 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -495,7 +495,7 @@ async def wait_for_content_change( what the output will be — it waits for "something happened" rather than a specific pattern. - Raises ``ToolError`` when pane respawn or pane death invalidates the + Raises ``ExpectedToolError`` when pane respawn or pane death invalidates the baseline captured at entry. For correctness-sensitive flows prefer ``wait_for_channel`` composed with ``tmux wait-for -S``. diff --git a/src/libtmux_mcp/tools/wait_for_tools.py b/src/libtmux_mcp/tools/wait_for_tools.py index 14c33b8..b486d27 100644 --- a/src/libtmux_mcp/tools/wait_for_tools.py +++ b/src/libtmux_mcp/tools/wait_for_tools.py @@ -75,7 +75,7 @@ def _validate_channel_name(name: str) -> str: Raises ------ - ToolError + ExpectedToolError When ``name`` is empty, too long, or contains disallowed characters. @@ -145,7 +145,7 @@ async def wait_for_channel( Raises ------ - ToolError + ExpectedToolError On timeout, invalid channel name, or tmux error. """ server = _get_server(socket_name=socket_name) From b93234f0354be919b28e021ac50f84200b3ffa0a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:13:10 -0500 Subject: [PATCH 10/23] docs(middleware,server): Count all three dependents in the ordering invariant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The ordering-invariant comment opened with 'All three depend on exception semantics' but explained only audit and retry and concluded 'silently disable both' — an internally inconsistent count in the one comment future maintainers rely on to not reorder the stack. Flagged in the PR code review. what: - Name safety's dependency explicitly (tier denials must propagate as exceptions for audit to record them) and close with 'break all three' at both sites: the ToolErrorResultMiddleware docstring and the server.py stack comment --- src/libtmux_mcp/middleware.py | 7 ++++--- src/libtmux_mcp/server.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 439e66c..3ee134d 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -176,9 +176,10 @@ class ToolErrorResultMiddleware(ErrorHandlingMiddleware): Ordering invariant: must sit **outside** ``AuditMiddleware``, ``ReadonlyRetryMiddleware``, and ``SafetyMiddleware``. All three depend on exception semantics — audit detects failures by catching, - retry matches ``LibTmuxException`` via ``__cause__`` — so - converting the exception to a result any deeper in the stack would - silently disable both. + retry matches ``LibTmuxException`` via ``__cause__``, and safety's + tier denials must propagate as exceptions for audit to record them + — so converting the exception to a result any deeper in the stack + would silently break all three. """ def _log_error(self, error: Exception, context: MiddlewareContext) -> None: diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 3041e18..2567561 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -283,9 +283,10 @@ def _gc_mcp_buffers(cache: t.Mapping[_ServerCacheKey, Server]) -> None: # resource errors to MCP code -32002. Must stay OUTSIDE the # audit + retry + safety trio: all three depend on exception # semantics (audit catches to record outcome=error, retry - # matches LibTmuxException via __cause__), so converting the - # exception to a result any deeper would silently disable - # both. + # matches LibTmuxException via __cause__, and safety's tier + # denials must propagate as exceptions for audit to record + # them), so converting the exception to a result any deeper + # would silently break all three. # 4. AuditMiddleware — outside SafetyMiddleware so tier-denial # events (which raise ToolError before call_next inside # Safety) are still logged with outcome=error. Without this From 5034fd2ef23eb65de5119492370e489add6fa9e5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:14:27 -0500 Subject: [PATCH 11/23] docs(topics/architecture): Fix ToolErrorResultMiddleware stack-position claim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The error-handling section said the middleware catches the exception 'at the outer edge of the stack', but Timing and the response limiter sit outside it — imprecise in the one page whose purpose is stack-position accuracy. Flagged in the PR code review. what: - Describe the catch point as 'once it has cleared the audit/retry/safety trio', matching the closing sentence's 'outermost error layer' framing --- docs/topics/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/topics/architecture.md b/docs/topics/architecture.md index 19ef212..753d30f 100644 --- a/docs/topics/architecture.md +++ b/docs/topics/architecture.md @@ -71,7 +71,7 @@ Tools use resolver functions (`_resolve_session`, `_resolve_window`, `_resolve_p Two layers split the work: 1. **Classification** — the `@handle_tool_errors` decorator wraps all tool functions, mapping libtmux exceptions to `ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets `ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. -2. **Conversion** — `ToolErrorResultMiddleware` catches the exception at the outer edge of the stack and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). +2. **Conversion** — `ToolErrorResultMiddleware` catches the exception once it has cleared the audit/retry/safety trio and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). Errors must stay exceptions through the audit/retry/safety trio — audit detects failures by catching, retry matches via `__cause__` — so conversion happens only in the outermost error layer. Level policy lives in {doc}`/topics/logging`. From 79d6425912cc540c415be23d8ad1f265ea17f482 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:15:50 -0500 Subject: [PATCH 12/23] docs(_utils): Add Parameters section to ExpectedToolError docstring why: The class documents its two keyword parameters only via prose and doctests; AGENTS.md mandates NumPy docstring style, and the codebase's parameterised classes (SafetyMiddleware, AuditMiddleware) all carry a Parameters section. Flagged in the PR code review. what: - Document *args, log_level, and suggestion in a NumPy Parameters block between the extended description and Examples --- src/libtmux_mcp/_utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/libtmux_mcp/_utils.py b/src/libtmux_mcp/_utils.py index 7ed441b..8c1bfea 100644 --- a/src/libtmux_mcp/_utils.py +++ b/src/libtmux_mcp/_utils.py @@ -39,6 +39,20 @@ class ExpectedToolError(ToolError): as ERROR records. Unexpected failures keep stock :class:`ToolError` and its ERROR default — those are the ones operators must see. + Parameters + ---------- + *args : object + Positional arguments forwarded to :class:`ToolError` + (typically the error message). + log_level : int + Level fastmcp's server layer logs this failure at. Defaults + to ``logging.WARNING``. + suggestion : str, optional + Agent-facing recovery hint. + :class:`~libtmux_mcp.middleware.ToolErrorResultMiddleware` + appends it to the error result's text and mirrors it into the + result's ``meta``. + Examples -------- >>> import logging From 99c1ac9262c089304000b6293ee820a83c599cda Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:25:04 -0500 Subject: [PATCH 13/23] mcp(fix[middleware]): Classify argument-schema validation failures as expected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: fastmcp validates tool arguments against the input schema before tool code runs, raising a bare pydantic.ValidationError that carries no log_level — the handle_tool_errors decorators never get a chance to classify it. The middleware logged these at ERROR and marked them expected=false, contradicting the documented policy that invalid arguments are agent-correctable WARNINGs (fastmcp's own server layer already logs the same failure as a WARNING). Reported in PR review. what: - _is_schema_validation_error: recognize pydantic.ValidationError on the error or its __cause__; output-shape failures cannot be confused with this case — fastmcp's tool layer converts those to error results before the middleware sees them (verified) - _log_error: exceptions without log_level classify via the helper — WARNING for argument validation, ERROR otherwise - _error_tool_result: expected=True for schema validation failures - tests: takes_int probe tool; ErrorLogLevelFixture grows an arguments field with a schema_validation_logs_warning case; meta classification test asserting expected=True + ValidationError --- src/libtmux_mcp/middleware.py | 46 +++++++++++++++++++++++++++----- tests/test_middleware.py | 50 +++++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 9 deletions(-) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 3ee134d..2abbc0a 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -41,6 +41,7 @@ from fastmcp.tools.base import ToolResult from libtmux import exc as libtmux_exc from mcp.types import TextContent +from pydantic import ValidationError as PydanticValidationError from libtmux_mcp._utils import ( TAG_DESTRUCTIVE, @@ -114,6 +115,25 @@ async def on_call_tool( # --------------------------------------------------------------------------- +def _is_schema_validation_error(error: BaseException) -> bool: + """Return True for fastmcp argument-schema validation failures. + + fastmcp validates tool arguments against the input schema *before* + tool code runs, raising a bare :exc:`pydantic.ValidationError` — + too early for the ``handle_tool_errors`` decorators to classify. + Bad arguments are agent-correctable (fix the call and retry), so + they get the same expected/WARNING treatment as + :class:`~libtmux_mcp._utils.ExpectedToolError`. + + Output validation cannot be mistaken for this case: fastmcp's tool + layer converts output-shape failures into error results itself, so + they never reach the middleware as exceptions. + """ + return isinstance(error, PydanticValidationError) or isinstance( + error.__cause__, PydanticValidationError + ) + + def _error_tool_result(error: Exception) -> ToolResult: """Build a rich ``ToolResult(is_error=True)`` from a tool failure. @@ -126,8 +146,9 @@ def _error_tool_result(error: Exception) -> ToolResult: (``__cause__`` when the raise site chained one, so agents see ``PaneNotFound`` rather than the ``ToolError`` wrapper). * ``expected`` — True for agent-correctable failures - (:class:`~libtmux_mcp._utils.ExpectedToolError`), False for - operator faults and potential server bugs. + (:class:`~libtmux_mcp._utils.ExpectedToolError` and + argument-schema validation errors), False for operator faults + and potential server bugs. * ``suggestion`` — recovery hint, only when present. ``structured_content`` is deliberately left unset: tools declare @@ -139,7 +160,8 @@ def _error_tool_result(error: Exception) -> ToolResult: origin = cause if cause is not None else error meta: dict[str, t.Any] = { "error_type": type(origin).__name__, - "expected": isinstance(error, ExpectedToolError), + "expected": isinstance(error, ExpectedToolError) + or _is_schema_validation_error(error), } text = str(error) suggestion = getattr(error, "suggestion", None) @@ -171,7 +193,10 @@ class ToolErrorResultMiddleware(ErrorHandlingMiddleware): Logging honors ``FastMCPError.log_level`` (fastmcp >= 3.3): the expected failures demoted to WARNING by :class:`~libtmux_mcp._utils.ExpectedToolError` no longer get - re-shouted at ERROR by the stock ``_log_error``. + re-shouted at ERROR by the stock ``_log_error``. Argument-schema + validation failures — raised by fastmcp before tool code can + classify them — are treated as expected too (see + :func:`_is_schema_validation_error`). Ordering invariant: must sit **outside** ``AuditMiddleware``, ``ReadonlyRetryMiddleware``, and ``SafetyMiddleware``. All three @@ -187,10 +212,17 @@ def _log_error(self, error: Exception, context: MiddlewareContext) -> None: Mirrors the stock implementation (error-count tracking, optional traceback, error callback) but routes the record - through ``logger.log`` with ``FastMCPError.log_level`` — - defaulting to ERROR for exceptions that don't carry one. + through ``logger.log`` with ``FastMCPError.log_level``. + Exceptions that don't carry one default to ERROR — except + argument-schema validation failures, which are + agent-correctable and log at WARNING like every other invalid + argument (see :func:`_is_schema_validation_error`). """ - level = getattr(error, "log_level", logging.ERROR) + level: int | None = getattr(error, "log_level", None) + if level is None: + level = ( + logging.WARNING if _is_schema_validation_error(error) else logging.ERROR + ) error_type = type(error).__name__ method = context.method or "unknown" diff --git a/tests/test_middleware.py b/tests/test_middleware.py index e9bb5bc..af20967 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -698,6 +698,12 @@ def fail_unexpected() -> str: msg = "boom" raise RuntimeError(msg) + @probe.tool + def takes_int(count: int) -> str: + # Never reached when the argument fails schema validation — + # fastmcp raises pydantic.ValidationError before tool code runs. + return str(count) + return probe @@ -778,6 +784,7 @@ class ErrorLogLevelFixture(t.NamedTuple): test_id: str tool_name: str + arguments: dict[str, t.Any] | None expected_level: int message_fragment: str @@ -786,15 +793,24 @@ class ErrorLogLevelFixture(t.NamedTuple): ErrorLogLevelFixture( test_id="expected_failure_logs_warning", tool_name="fail_expected_chained", + arguments=None, expected_level=logging.WARNING, message_fragment="Pane not found", ), ErrorLogLevelFixture( test_id="unexpected_failure_logs_error", tool_name="fail_unexpected", + arguments=None, expected_level=logging.ERROR, message_fragment="boom", ), + ErrorLogLevelFixture( + test_id="schema_validation_logs_warning", + tool_name="takes_int", + arguments={"count": "not-an-int"}, + expected_level=logging.WARNING, + message_fragment="validation error", + ), ] @@ -806,6 +822,7 @@ class ErrorLogLevelFixture(t.NamedTuple): def test_tool_error_result_logs_at_error_log_level( test_id: str, tool_name: str, + arguments: dict[str, t.Any] | None, expected_level: int, message_fragment: str, caplog: pytest.LogCaptureFixture, @@ -816,7 +833,9 @@ def test_tool_error_result_logs_at_error_log_level( The stock ``ErrorHandlingMiddleware._log_error`` hardcodes ``logger.error`` — without the override, every failure demoted to WARNING by ``ExpectedToolError`` would be re-shouted at ERROR on - the ``fastmcp.errors`` channel. + the ``fastmcp.errors`` channel. Argument-schema validation + failures carry no ``log_level`` at all; the middleware classifies + them as agent-correctable WARNINGs. Assertions go through ``record.getMessage()`` so the lazy %-formatting args are interpolated regardless of whether a handler @@ -833,7 +852,7 @@ def test_tool_error_result_logs_at_error_log_level( async def _call() -> None: async with Client(probe) as client: - await client.call_tool(tool_name, raise_on_error=False) + await client.call_tool(tool_name, arguments, raise_on_error=False) with caplog.at_level(logging.DEBUG, logger="fastmcp.errors"): asyncio.run(_call()) @@ -846,3 +865,30 @@ async def _call() -> None: and message_fragment in r.getMessage() ] assert levels == [expected_level] + + +def test_schema_validation_failure_marked_expected_in_meta() -> None: + """Argument-schema failures report ``expected=True`` in result meta. + + fastmcp validates arguments before tool code runs, so the + ``handle_tool_errors`` decorators never get the chance to classify + these as ``ExpectedToolError`` — the middleware must recognize the + bare ``pydantic.ValidationError`` itself. Bad arguments are + agent-correctable, same as a bad pane id. + """ + from fastmcp import Client + + probe = _error_probe_server() + + async def _call() -> t.Any: + async with Client(probe) as client: + return await client.call_tool( + "takes_int", {"count": "not-an-int"}, raise_on_error=False + ) + + result = asyncio.run(_call()) + + assert result.is_error is True + meta = result.meta or {} + assert meta["expected"] is True + assert meta["error_type"] == "ValidationError" From e893f32e2b676397c50ebbec74ed2c75e8d127a9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:30:10 -0500 Subject: [PATCH 14/23] mcp(fix[middleware]): Preserve is_error through response-limit truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The limiter sits outside ToolErrorResultMiddleware, so oversized error results (e.g. a validation message echoing a huge argument on a response-limited tool) hit the truncation path, which rebuilds the result without the error flag. The truncated error became an apparent success, and the MCP SDK client rejected it against the tool's output schema — the agent received a transport-level RuntimeError instead of the tool error. Reported in PR review; reproduced against the reference client. what: - TailPreservingResponseLimitingMiddleware.on_call_tool captures the inner result and restores is_error=True when the base class truncation replaced an error result; meta already survives via the base class, and tail-preservation keeps the suggestion line at the end of the text - Truncated successes never gain the flag - tests: LimiterErrorFixture parametrize (oversized error keeps flag + suggestion tail, small error untouched, oversized success stays non-error); the failing probe returns a Pydantic model so the MCP SDK client's output-schema validation guards the regression note: truncated SUCCESS results from schema'd tools still trip the same client-side validation — a pre-existing gap in fastmcp's stock limiter (its meta workaround only bypasses server-side validation), out of scope here --- src/libtmux_mcp/middleware.py | 34 ++++++++ tests/test_middleware.py | 144 ++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 2abbc0a..1da714c 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -526,8 +526,42 @@ class TailPreservingResponseLimitingMiddleware(ResponseLimitingMiddleware): caps at the tool layer fire first under normal operation; this middleware catches pathological output from future tools that forget to declare their own bounds. + + Error results keep their ``is_error`` flag through truncation. + The stock truncation path rebuilds the result without it, which + turns an oversized error (e.g. a validation message echoing a + huge argument) into an apparent success — MCP clients then + validate the truncated text against the tool's output schema and + fail with a transport-level error instead of delivering the tool + error. ``meta`` (``error_type`` / ``expected`` / ``suggestion``) + already survives via the base class, and tail-preservation keeps + the suggestion line, which sits at the end of the text. """ + async def on_call_tool( + self, + context: MiddlewareContext, + call_next: t.Any, + ) -> t.Any: + """Apply the size cap without dropping ``is_error``.""" + inner: t.Any = None + + async def _capture(ctx: t.Any) -> t.Any: + nonlocal inner + inner = await call_next(ctx) + return inner + + result = await super().on_call_tool(context, _capture) + if result is not inner and isinstance(inner, ToolResult) and inner.is_error: + # The base class truncated and rebuilt the result; restore + # the error flag it dropped. + return ToolResult( + content=result.content, + meta=result.meta, + is_error=True, + ) + return result + def _truncate_to_result( self, text: str, diff --git a/tests/test_middleware.py b/tests/test_middleware.py index af20967..3083d29 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -7,6 +7,7 @@ import logging import typing as t +import pydantic import pytest from fastmcp.server.middleware import MiddlewareContext from mcp.types import CallToolRequestParams @@ -892,3 +893,146 @@ async def _call() -> t.Any: meta = result.meta or {} assert meta["expected"] is True assert meta["error_type"] == "ValidationError" + + +class LimiterErrorFixture(t.NamedTuple): + """Test fixture for error-result preservation through the limiter.""" + + test_id: str + tool_name: str + arguments: dict[str, t.Any] | None + expect_is_error: bool + expect_header: bool + text_must_contain: str | None + + +LIMITER_ERROR_FIXTURES: list[LimiterErrorFixture] = [ + LimiterErrorFixture( + test_id="oversized_error_keeps_flag", + tool_name="limited_fail", + arguments={"payload": "x" * 5000}, + expect_is_error=True, + expect_header=True, + # Tail-preservation keeps the suggestion line at the end. + text_must_contain="Call list_panes to discover valid pane ids.", + ), + LimiterErrorFixture( + test_id="small_error_untouched", + tool_name="limited_fail", + arguments={"payload": "x"}, + expect_is_error=True, + expect_header=False, + text_must_contain="Invalid name: 'x'", + ), + LimiterErrorFixture( + test_id="oversized_success_not_marked_error", + tool_name="limited_ok", + arguments=None, + expect_is_error=False, + expect_header=True, + text_must_contain=None, + ), +] + + +class _LimiterOut(pydantic.BaseModel): + """Output model giving the ``limited_fail`` probe an output schema. + + Module-level (not test-local) because ``from __future__ import + annotations`` stringifies the return annotation and pydantic + resolves it against module globals when fastmcp builds the schema. + """ + + value: str + + +def _limiter_probe_server() -> t.Any: + """Build a FastMCP instance with the limiter wrapping error conversion. + + Mirrors the production ordering (limiter outside + ``ToolErrorResultMiddleware``) with a small ``max_size`` so the + truncation path fires. ``limited_fail`` returns a Pydantic model + so the MCP SDK client's output-schema validation is in play. + """ + from fastmcp import FastMCP + + from libtmux_mcp._utils import ExpectedToolError + from libtmux_mcp.middleware import ( + TailPreservingResponseLimitingMiddleware, + ToolErrorResultMiddleware, + ) + + probe = FastMCP( + name="limiter-probe", + middleware=[ + TailPreservingResponseLimitingMiddleware( + max_size=300, + tools=["limited_fail", "limited_ok"], + ), + ToolErrorResultMiddleware(transform_errors=True), + ], + ) + + @probe.tool + def limited_fail(payload: str) -> _LimiterOut: + msg = f"Invalid name: {payload!r}" + raise ExpectedToolError( + msg, + suggestion="Call list_panes to discover valid pane ids.", + ) + + # output_schema=None: fastmcp wraps even plain-str returns in a + # result schema, and the stock truncation path drops structured + # content — the MCP SDK client then rejects ANY truncated success + # from a schema'd tool. That pre-existing upstream gap is not what + # this probe tests; disable the schema so the success case + # exercises only the is_error handling. + @probe.tool(output_schema=None) + def limited_ok() -> str: + return "y" * 5000 + + return probe + + +@pytest.mark.parametrize( + LimiterErrorFixture._fields, + LIMITER_ERROR_FIXTURES, + ids=[f.test_id for f in LIMITER_ERROR_FIXTURES], +) +def test_response_limiter_preserves_error_results( + test_id: str, + tool_name: str, + arguments: dict[str, t.Any] | None, + expect_is_error: bool, + expect_header: bool, + text_must_contain: str | None, +) -> None: + """Truncation keeps ``is_error``; successes never gain it. + + Regression guard: the stock truncation path rebuilds the result + without the error flag, so an oversized error from a tool with an + output schema became an apparent success — and the MCP SDK client + raised ``RuntimeError: ... has an output schema but did not return + structured content`` instead of delivering the tool error. The + ``limited_fail`` probe returns a Pydantic model precisely so this + test exercises that client-side validation path. + """ + from fastmcp import Client + + probe = _limiter_probe_server() + + async def _call() -> t.Any: + async with Client(probe) as client: + return await client.call_tool(tool_name, arguments, raise_on_error=False) + + result = asyncio.run(_call()) + + assert result.is_error is expect_is_error + text = result.content[0].text + assert ("[... truncated" in text) is expect_header + if text_must_contain is not None: + assert text_must_contain in text + if expect_is_error: + meta = result.meta or {} + assert meta["expected"] is True + assert meta["error_type"] == "ExpectedToolError" From 8cc3d36296b5abca0e957d377193f4b6710a2f08 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:32:45 -0500 Subject: [PATCH 15/23] py(types[middleware]): Satisfy CallNext protocol in limiter capture callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The previous commit's verification chain piped mypy through tail, masking a real arg-type error — the capture callback didn't satisfy fastmcp's CallNext protocol, which matches the parameter name ('context'), not just the callable shape. what: - Annotate _capture with MiddlewareContext[CallToolRequestParams] -> ToolResult and name the parameter 'context' to structurally match the protocol; cast the captured Any back to ToolResult --- src/libtmux_mcp/middleware.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 1da714c..8f9e834 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -40,7 +40,7 @@ from fastmcp.server.middleware.response_limiting import ResponseLimitingMiddleware from fastmcp.tools.base import ToolResult from libtmux import exc as libtmux_exc -from mcp.types import TextContent +from mcp.types import CallToolRequestParams, TextContent from pydantic import ValidationError as PydanticValidationError from libtmux_mcp._utils import ( @@ -546,10 +546,14 @@ async def on_call_tool( """Apply the size cap without dropping ``is_error``.""" inner: t.Any = None - async def _capture(ctx: t.Any) -> t.Any: + async def _capture( + context: MiddlewareContext[CallToolRequestParams], + ) -> ToolResult: + # ``context`` (not ``ctx``): fastmcp's CallNext protocol + # matches the parameter name, not just the shape. nonlocal inner - inner = await call_next(ctx) - return inner + inner = await call_next(context) + return t.cast("ToolResult", inner) result = await super().on_call_tool(context, _capture) if result is not inner and isinstance(inner, ToolResult) and inner.is_error: From a15ad34f5af3920f1d8fa03dae3286e9af1aec5e Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:51:57 -0500 Subject: [PATCH 16/23] docs(fix[architecture]): Correct middleware error-flow contract why: The branch now preserves error results through the outer response limiter, so stale ordering prose made the documented middleware contract misleading. what: - Describe response limiting as return-path truncation after error conversion - Split tool exception classification from FastMCP schema validation handling - Document that truncated oversized errors preserve is_error and _meta --- docs/topics/architecture.md | 9 +++++---- src/libtmux_mcp/server.py | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/topics/architecture.md b/docs/topics/architecture.md index 753d30f..bf929c8 100644 --- a/docs/topics/architecture.md +++ b/docs/topics/architecture.md @@ -68,12 +68,13 @@ Tools use resolver functions (`_resolve_session`, `_resolve_window`, `_resolve_p ### Error handling -Two layers split the work: +Three boundaries split the work: -1. **Classification** — the `@handle_tool_errors` decorator wraps all tool functions, mapping libtmux exceptions to `ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets `ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. -2. **Conversion** — `ToolErrorResultMiddleware` catches the exception once it has cleared the audit/retry/safety trio and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). +1. **Tool classification** — the {func}`~libtmux_mcp._utils.handle_tool_errors` decorator wraps tool functions, mapping libtmux exceptions to {class}`~libtmux_mcp._utils.ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets {class}`~libtmux_mcp.middleware.ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. +2. **Schema classification** — FastMCP validates tool arguments before tool code runs, so Pydantic validation failures never reach the decorator. {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` classifies those schema-validation errors as expected, agent-correctable WARNINGs before converting them. +3. **Conversion** — {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` catches the exception once it has cleared the audit/retry/safety trio and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). -Errors must stay exceptions through the audit/retry/safety trio — audit detects failures by catching, retry matches via `__cause__` — so conversion happens only in the outermost error layer. Level policy lives in {doc}`/topics/logging`. +Errors must stay exceptions through the audit/retry/safety trio — audit detects failures by catching, retry matches via `__cause__` — so conversion happens only in the outermost error layer. The response limiter sits outside conversion and may truncate large success or error results on the return path; its truncation path preserves `is_error` and `_meta` so oversized expected failures stay tool errors. Level policy lives in {doc}`/topics/logging`. ## References diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 2567561..5ce702b 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -275,9 +275,10 @@ def _gc_mcp_buffers(cache: t.Mapping[_ServerCacheKey, Server]) -> None: # Middleware runs outermost-first. Order rationale: # 1. TimingMiddleware — neutral observer; start clock as early # as possible so timing captures middleware cost too. - # 2. TailPreservingResponseLimitingMiddleware — bound the - # response *before* ToolErrorResultMiddleware can convert - # exceptions; keeps the size cap independent of error path. + # 2. TailPreservingResponseLimitingMiddleware — bounds the final + # tool result on the way back out. Tool errors may already be + # ToolResult(is_error=True) here, so truncation preserves that + # flag instead of turning expected failures into schema errors. # 3. ToolErrorResultMiddleware — converts tool-call failures to # rich ToolResult(is_error=True) results and transforms # resource errors to MCP code -32002. Must stay OUTSIDE the From f8e3c0b4c5bf226c2aa5a910da00f99f9b150512 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 13:58:16 -0500 Subject: [PATCH 17/23] test(fix[pane_tools]): Synchronize snapshot truncation setup why: The full suite can capture snapshot output before a burst of queued shell commands finishes, making the truncation assertions depend on tmux timing. what: - Reuse the wait-for helper for snapshot output setup - Wait for shell command completion before calling snapshot_pane --- tests/test_pane_tools.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 793d6a3..2b925ed 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -2906,12 +2906,10 @@ def test_snapshot_pane_truncates_content(mcp_server: Server, mcp_pane: Pane) -> ``content_truncated`` and ``content_truncated_lines``. ``content`` itself is the kept tail with no marker. """ - for i in range(20): - mcp_pane.send_keys(f"echo snap_line_{i}", enter=True) - retry_until( - lambda: "snap_line_19" in "\n".join(mcp_pane.capture_pane()), - 2, - raises=True, + _signal_after_shell_payload( + mcp_server, + mcp_pane, + "; ".join(f"echo snap_line_{i}" for i in range(20)), ) result = snapshot_pane( @@ -2930,12 +2928,10 @@ def test_snapshot_pane_max_lines_none_keeps_full_content( mcp_server: Server, mcp_pane: Pane ) -> None: """``max_lines=None`` returns the full content with no truncation flag.""" - for i in range(20): - mcp_pane.send_keys(f"echo snapnone_{i}", enter=True) - retry_until( - lambda: "snapnone_19" in "\n".join(mcp_pane.capture_pane()), - 2, - raises=True, + _signal_after_shell_payload( + mcp_server, + mcp_pane, + "; ".join(f"echo snapnone_{i}" for i in range(20)), ) result = snapshot_pane( From 4b58f971e427604a56256ae704aa94a8b3ef5136 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 14:00:30 -0500 Subject: [PATCH 18/23] docs(fix[errors]): Use expected tool error terminology why: The branch distinguishes agent-correctable failures from operator faults, so stale ToolError wording obscured the intended client contract. what: - Name ExpectedToolError in internal docstrings and middleware comments - Use expected tool error language in client-facing tool docs --- docs/tools/buffer/delete-buffer.md | 2 +- docs/tools/hook/show-hook.md | 6 +++--- docs/tools/pane/wait-for-channel.md | 4 ++-- docs/topics/safety.md | 2 +- src/libtmux_mcp/server.py | 2 +- src/libtmux_mcp/tools/pane_tools/capture_since.py | 2 +- src/libtmux_mcp/tools/pane_tools/wait.py | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/tools/buffer/delete-buffer.md b/docs/tools/buffer/delete-buffer.md index 5c105e3..cca7b6f 100644 --- a/docs/tools/buffer/delete-buffer.md +++ b/docs/tools/buffer/delete-buffer.md @@ -10,7 +10,7 @@ across MCP restarts. **Side effects:** Removes the named buffer from the tmux server. Subsequent {tooliconl}`paste-buffer` calls against the deleted name -return `ToolError`. +return an expected tool error. ```{fastmcp-tool-input} buffer_tools.delete_buffer ``` diff --git a/docs/tools/hook/show-hook.md b/docs/tools/hook/show-hook.md index 161609b..4c97021 100644 --- a/docs/tools/hook/show-hook.md +++ b/docs/tools/hook/show-hook.md @@ -4,9 +4,9 @@ ``` **Use when** you know which hook you want to inspect by name. Returns -empty when the hook is unset; raises `ToolError` for unknown hook -names (typos, wrong scope) so input mistakes don't masquerade as -"nothing configured". +empty when the hook is unset; raises an expected tool error for +unknown hook names (typos, wrong scope) so input mistakes don't +masquerade as "nothing configured". **Side effects:** None. Readonly. diff --git a/docs/tools/pane/wait-for-channel.md b/docs/tools/pane/wait-for-channel.md index 1fbb7c9..8edd942 100644 --- a/docs/tools/pane/wait-for-channel.md +++ b/docs/tools/pane/wait-for-channel.md @@ -29,8 +29,8 @@ to poll for an output marker instead; state-polling is structurally safer than edge-triggered signalling for fragile commands. **Side effects:** Blocks the call up to `timeout` seconds (default 30). -Mandatory subprocess timeout — a crashed signaller raises `ToolError` -rather than blocking indefinitely. +Mandatory subprocess timeout — a crashed signaller raises an expected +tool error rather than blocking indefinitely. ```{fastmcp-tool-input} wait_for_tools.wait_for_channel ``` diff --git a/docs/topics/safety.md b/docs/topics/safety.md index 565fb5c..b1711d9 100644 --- a/docs/topics/safety.md +++ b/docs/topics/safety.md @@ -102,7 +102,7 @@ Unlike other `mutating` tools, the registration carries `destructiveHint=True` a Mitigations: -- `pane_id` is required (no fallback to "first pane in session/window"). Agents that pass only `session_name` get a `ToolError` instead of an unintended kill — resolve via {tool}`list-panes` first. +- `pane_id` is required (no fallback to "first pane in session/window"). Agents that pass only `session_name` get an expected tool error instead of an unintended kill — resolve via {tool}`list-panes` first. - Any `shell` argument is briefly visible in the OS process table and tmux's `pane_current_command` metadata before the spawned shell takes over; the audit log redacts `shell` payloads (see below), but do not pass credentials directly even with redaction. - The optional `environment` argument (`dict[str, str]`) maps to one tmux `-e KEY=VALUE` flag per item. The audit log redacts each *value* via a `{len, sha256_prefix}` digest while keeping the *keys* visible — env var names like `DATABASE_URL` are usually operator-debug-useful, but their values are the secret. The same OS-process-table caveat as `shell` applies: `respawn-pane -e DB_PASSWORD=...` may briefly appear in `ps` output before the spawned process inherits the env. - The same self-pane guard that protects the destructive kill commands also refuses to respawn the pane running the MCP server. diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 5ce702b..9b8bc42 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -289,7 +289,7 @@ def _gc_mcp_buffers(cache: t.Mapping[_ServerCacheKey, Server]) -> None: # them), so converting the exception to a result any deeper # would silently break all three. # 4. AuditMiddleware — outside SafetyMiddleware so tier-denial - # events (which raise ToolError before call_next inside + # events (which raise ExpectedToolError before call_next inside # Safety) are still logged with outcome=error. Without this # ordering, denied access attempts would silently bypass the # audit log — a security-observability gap. diff --git a/src/libtmux_mcp/tools/pane_tools/capture_since.py b/src/libtmux_mcp/tools/pane_tools/capture_since.py index 6a1c20b..0d08af5 100644 --- a/src/libtmux_mcp/tools/pane_tools/capture_since.py +++ b/src/libtmux_mcp/tools/pane_tools/capture_since.py @@ -449,7 +449,7 @@ async def capture_since( If tmux history was cleared or trimmed before the cursor anchor, the tool returns the current visible pane with ``lines_missed=True`` and a fresh cursor. Malformed cursors, cursors for a different - pane, pane death, and pane respawn fail with ``ToolError`` so + pane, pane death, and pane respawn fail with ``ExpectedToolError`` so agents do not accidentally observe the wrong process. Parameters diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 76589de..0397a69 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -211,7 +211,7 @@ async def wait_for_text( Notes ----- **Scrollback rollover detection is partial.** The tool raises - ``ToolError`` when ``hsize`` shrinks below the entry value — which + ``ExpectedToolError`` when ``hsize`` shrinks below the entry value — which catches ``clear-history`` and any rollover where the dip is observable between polls. It does **not** reliably detect ``grid_collect_history`` trim that fires during continuous output: From 4fd74643c91ac0b56c745f1f8f7641b3cc9ca968 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 14:03:00 -0500 Subject: [PATCH 19/23] mcp(feat[middleware]): Suggest dropping unrecognized arguments in validation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Gemini CLI's scheduler leaks its wait_for_previous sequencing flag into MCP tool arguments when batching calls in one turn (near-constant in gemini -p runs). Strict schemas reject it and Gemini self-recovers from the generic pydantic message, but the recovery depends on the model inferring what to do. MemPalace fixed the same leak by stripping the key (MemPalace/mempalace#322) and later whitelisting arguments (#647) — silent dropping would mask argument typos on mutating/destructive tools, so the rejection stays; the error just explains itself now. what: - _unexpected_kwargs: names flagged unexpected_keyword_argument in a pydantic validation failure (error or its __cause__) - _client_label: client 'name version' from the MCP initialize handshake (fastmcp_context.session.client_params.clientInfo); None-safe at every hop, wording-only — never gates behavior - _error_tool_result(error, context=None): synthesizes a suggestion for unexpected-argument rejections — name the argument(s) to drop or fix, plus a client-scheduling-flag note for wait_for_previous that names the connected client when the handshake exposed it - docs(topics/gotchas): the leak, batching-vs-mode findings, the self-recovery flow, and why rejection beats stripping here - tests: parametrized suggestion coverage (client-labeled flag leak, default handshake, plain typo, missing-arg silence) plus a direct no-handshake unit case via pydantic.validate_call --- docs/topics/gotchas.md | 14 ++++ src/libtmux_mcp/middleware.py | 90 +++++++++++++++++++++- tests/test_middleware.py | 136 ++++++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 4 deletions(-) diff --git a/docs/topics/gotchas.md b/docs/topics/gotchas.md index 02095c5..9a5c4e8 100644 --- a/docs/topics/gotchas.md +++ b/docs/topics/gotchas.md @@ -71,3 +71,17 @@ The `suppress_history` parameter on `send_keys` prepends a space before the comm `clear_pane` runs two tmux commands in sequence: `send-keys -R` (reset terminal) then `clear-history` (clear scrollback). There is a brief gap between them where partial content may be visible. For most use cases this is not a problem. If you need guaranteed clean state, add a small delay before the next `capture_pane`. + +## Gemini CLI injects `wait_for_previous` into tool arguments + +When Gemini CLI batches several tool calls in one turn, its scheduler merges the internal sequencing flag of the later calls into the MCP tool's arguments: + +```json +{"tool": "get_pane_info", "arguments": {"wait_for_previous": true, "pane_id": "%0"}} +``` + +This is stock Gemini CLI behavior (no extensions involved). Batching — and therefore the leak — is near-constant in non-interactive `gemini -p` runs, where the harness front-loads its topic tool and the first MCP calls into one parallel turn; interactive sessions schedule more sequentially and rarely trigger it. + +Tool schemas are strict (`additionalProperties: false`), so the call is rejected with a validation error — classified as expected (WARNING log, `expected: true` in the result's `_meta`) and carrying a suggestion that names the rejected argument and identifies `wait_for_previous` as a client scheduling flag to retry without. Gemini's model reads it, drops the flag, and retries successfully on its own. + +The visible symptom is harmless noise: `Error executing tool mcp_tmux_: ... reported an error` lines in Gemini's output for calls that then succeed on retry, and matching WARNING records in the server log. Other MCP servers crash outright on this injection ([MemPalace/mempalace#816](https://github.com/MemPalace/mempalace/issues/816)) and patched it by stripping the key or whitelisting arguments against the schema. This server deliberately keeps the rejection: silently dropping unknown arguments would also swallow genuine argument typos from every client — on a server with mutating and destructive tools, a mis-named flag (`enter` on `send_keys`, say) must fail loudly, not run with defaults. diff --git a/src/libtmux_mcp/middleware.py b/src/libtmux_mcp/middleware.py index 8f9e834..dfc751e 100644 --- a/src/libtmux_mcp/middleware.py +++ b/src/libtmux_mcp/middleware.py @@ -134,7 +134,63 @@ def _is_schema_validation_error(error: BaseException) -> bool: ) -def _error_tool_result(error: Exception) -> ToolResult: +#: Scheduling flag some MCP clients (notably Gemini CLI when batching +#: several tool calls in one turn) merge into the tool's arguments. +#: Recognized only to *word the rejection helpfully* — the argument is +#: still rejected, never silently stripped, so genuine argument typos +#: from other clients stay loud. Contrast MemPalace/mempalace#322, +#: which strips the key, and #647, which whitelists arguments against +#: the schema — silent dropping would let a mis-named flag on a +#: mutating tool (e.g. ``enter`` on send_keys) run with defaults. +_CLIENT_SCHEDULING_FLAG = "wait_for_previous" + + +def _unexpected_kwargs(error: BaseException) -> list[str]: + """Argument names rejected as unexpected by schema validation. + + Reads pydantic's structured ``errors()`` from ``error`` or its + ``__cause__`` (same posture as :func:`_is_schema_validation_error`) + and returns the names flagged ``unexpected_keyword_argument``. + Empty list for every other failure shape. + """ + err = error if isinstance(error, PydanticValidationError) else error.__cause__ + if not isinstance(err, PydanticValidationError): + return [] + return [ + str(item["loc"][-1]) + for item in err.errors() + if item.get("type") == "unexpected_keyword_argument" and item.get("loc") + ] + + +def _client_label(context: MiddlewareContext | None) -> str | None: + """``"name version"`` of the connected client, when the handshake exposed it. + + Walks ``fastmcp_context.session.client_params.clientInfo`` — the + MCP ``initialize`` handshake's client identity. Every hop can be + absent (unit-test contexts, background tasks, clients that omit + ``clientInfo``), so any failure resolves to ``None``. Used only to + word error suggestions; never gates behavior. + """ + if context is None: + return None + try: + fastmcp_ctx = context.fastmcp_context + if fastmcp_ctx is None: + return None + params = fastmcp_ctx.session.client_params + if params is None: + return None + info = params.clientInfo + return f"{info.name} {info.version}".strip() + except (AttributeError, RuntimeError): + return None + + +def _error_tool_result( + error: Exception, + context: MiddlewareContext | None = None, +) -> ToolResult: """Build a rich ``ToolResult(is_error=True)`` from a tool failure. The text block carries the error message exactly as raised — no @@ -149,7 +205,12 @@ def _error_tool_result(error: Exception) -> ToolResult: (:class:`~libtmux_mcp._utils.ExpectedToolError` and argument-schema validation errors), False for operator faults and potential server bugs. - * ``suggestion`` — recovery hint, only when present. + * ``suggestion`` — recovery hint. Carried by the error when the + raise site provided one; synthesized for schema-validation + failures that rejected unexpected arguments, so the agent knows + to drop or fix exactly those names (with a client-flag note for + :data:`_CLIENT_SCHEDULING_FLAG` leaks, naming the client via + ``context`` when the handshake exposed it). ``structured_content`` is deliberately left unset: tools declare output schemas for their success payloads, and clients validate @@ -165,6 +226,24 @@ def _error_tool_result(error: Exception) -> ToolResult: } text = str(error) suggestion = getattr(error, "suggestion", None) + if suggestion is None: + unknown = _unexpected_kwargs(error) + if unknown: + suggestion = ( + f"Remove or correct the unrecognized argument(s): {', '.join(unknown)}." + ) + if _CLIENT_SCHEDULING_FLAG in unknown: + client = _client_label(context) + who = ( + f"your client ({client})" + if client + else "some clients (e.g. Gemini CLI)" + ) + suggestion += ( + f" {_CLIENT_SCHEDULING_FLAG} is a scheduling flag " + f"{who} can leak into batched tool calls; retry " + f"the call without it." + ) if suggestion: meta["suggestion"] = suggestion text = f"{text}\n{suggestion}" @@ -196,7 +275,10 @@ class ToolErrorResultMiddleware(ErrorHandlingMiddleware): re-shouted at ERROR by the stock ``_log_error``. Argument-schema validation failures — raised by fastmcp before tool code can classify them — are treated as expected too (see - :func:`_is_schema_validation_error`). + :func:`_is_schema_validation_error`), and when the rejected + arguments include unexpected names the error result carries a + synthesized suggestion telling the agent which names to drop or + fix (see :func:`_error_tool_result`). Ordering invariant: must sit **outside** ``AuditMiddleware``, ``ReadonlyRetryMiddleware``, and ``SafetyMiddleware``. All three @@ -258,7 +340,7 @@ async def on_call_tool( return await call_next(context) except Exception as error: self._log_error(error, context) - return _error_tool_result(error) + return _error_tool_result(error, context) # --------------------------------------------------------------------------- diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 3083d29..9c379f4 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -1036,3 +1036,139 @@ async def _call() -> t.Any: meta = result.meta or {} assert meta["expected"] is True assert meta["error_type"] == "ExpectedToolError" + + +class UnknownArgSuggestionFixture(t.NamedTuple): + """Test fixture for synthesized unexpected-argument suggestions.""" + + test_id: str + arguments: dict[str, t.Any] + client_name: str | None + expect_suggestion: bool + contains: tuple[str, ...] + not_contains: tuple[str, ...] + + +UNKNOWN_ARG_SUGGESTION_FIXTURES: list[UnknownArgSuggestionFixture] = [ + UnknownArgSuggestionFixture( + test_id="scheduling_flag_names_client", + arguments={"count": 1, "wait_for_previous": True}, + client_name="gemini-test", + expect_suggestion=True, + contains=( + "Remove or correct the unrecognized argument(s): wait_for_previous.", + "your client (gemini-test 9.9)", + "retry the call without it", + ), + not_contains=("some clients",), + ), + UnknownArgSuggestionFixture( + test_id="scheduling_flag_default_client", + arguments={"count": 1, "wait_for_previous": True}, + client_name=None, + expect_suggestion=True, + # The in-memory fastmcp Client always sends clientInfo, so the + # handshake-derived label is used; the no-handshake generic + # wording is covered by the unit test below. + contains=("wait_for_previous", "your client ("), + not_contains=("gemini-test",), + ), + UnknownArgSuggestionFixture( + test_id="typo_lists_names_without_client_note", + arguments={"count": 1, "bogus_flag": True}, + client_name=None, + expect_suggestion=True, + contains=("Remove or correct the unrecognized argument(s): bogus_flag.",), + not_contains=("scheduling flag", "Gemini"), + ), + UnknownArgSuggestionFixture( + test_id="missing_required_arg_no_suggestion", + arguments={}, + client_name=None, + expect_suggestion=False, + contains=(), + not_contains=(), + ), +] + + +@pytest.mark.parametrize( + UnknownArgSuggestionFixture._fields, + UNKNOWN_ARG_SUGGESTION_FIXTURES, + ids=[f.test_id for f in UNKNOWN_ARG_SUGGESTION_FIXTURES], +) +def test_unexpected_argument_suggestion( + test_id: str, + arguments: dict[str, t.Any], + client_name: str | None, + expect_suggestion: bool, + contains: tuple[str, ...], + not_contains: tuple[str, ...], +) -> None: + """Schema rejections of unexpected arguments carry a recovery hint. + + The rejection itself is unchanged — the argument is never silently + stripped (contrast MemPalace/mempalace#322's pop-the-key approach) — + but the suggestion names exactly which argument(s) to drop or fix, + and flags ``wait_for_previous`` as a client scheduling leak, naming + the client from the MCP initialize handshake when available. + """ + import mcp.types + from fastmcp import Client + + probe = _error_probe_server() + + client_kwargs: dict[str, t.Any] = {} + if client_name is not None: + client_kwargs["client_info"] = mcp.types.Implementation( + name=client_name, version="9.9" + ) + + async def _call() -> t.Any: + async with Client(probe, **client_kwargs) as client: + return await client.call_tool("takes_int", arguments, raise_on_error=False) + + result = asyncio.run(_call()) + + assert result.is_error is True + meta = result.meta or {} + assert meta["expected"] is True + if not expect_suggestion: + assert "suggestion" not in meta + return + suggestion = meta["suggestion"] + text = result.content[0].text + assert suggestion in text # appended to the agent-visible message + for fragment in contains: + assert fragment in suggestion + for fragment in not_contains: + assert fragment not in suggestion + + +def test_unexpected_argument_suggestion_without_handshake() -> None: + """No client handshake -> the scheduling-flag note uses generic wording. + + Real connections always carry ``clientInfo`` (required by the MCP + initialize handshake), so the generic branch is reachable only + where no context exists — exercised here by calling + ``_error_tool_result`` directly with a manufactured validation + error, the same shape fastmcp raises for tool arguments. + """ + import pydantic + + from libtmux_mcp.middleware import _error_tool_result + + @pydantic.validate_call + def _probe_fn(count: int) -> int: + return count # pragma: no cover - validation rejects the call + + try: + _probe_fn(count=1, wait_for_previous=True) # type: ignore[call-arg] + except pydantic.ValidationError as exc: + result = _error_tool_result(exc, None) + else: # pragma: no cover - defends the test's own premise + pytest.fail("expected a ValidationError") + + meta = result.meta or {} + assert meta["expected"] is True + assert "some clients (e.g. Gemini CLI)" in meta["suggestion"] From 96f8f61aa5f757983de973b7596d89d3df861807 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 14:14:27 -0500 Subject: [PATCH 20/23] docs(errors): Link expected-tool-error mentions to the autodoc entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Prose said 'raises an expected tool error' in plain words while the API reference renders ExpectedToolError with a stable autodoc anchor — the docs convention is to cite autodoc'd APIs via roles, never plain backticks, and readers get a one-click path to the exception's full contract (log level, suggestion field). what: - Convert seven prose mentions to {exc}`~libtmux_mcp._utils.ExpectedToolError`: wait-for-channel (crashed signaller), show-hook (unknown hook names), delete-buffer (paste after delete), safety (kill-tool pane_id requirement), wait-for-content-change (pane lifecycle), capture-since (cursor/pane mismatch and pane lifecycle) - architecture: switch the existing {class} role for ExpectedToolError to {exc}, matching the LibTmuxException precedent in topics/logging - Bare fastmcp ToolError literals stay as code text: fastmcp publishes no objects.inv, so there is no link target --- docs/tools/buffer/delete-buffer.md | 2 +- docs/tools/hook/show-hook.md | 3 ++- docs/tools/pane/capture-since.md | 9 +++++---- docs/tools/pane/wait-for-channel.md | 5 +++-- docs/tools/pane/wait-for-content-change.md | 2 +- docs/topics/architecture.md | 2 +- docs/topics/safety.md | 2 +- 7 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/tools/buffer/delete-buffer.md b/docs/tools/buffer/delete-buffer.md index cca7b6f..d9cc899 100644 --- a/docs/tools/buffer/delete-buffer.md +++ b/docs/tools/buffer/delete-buffer.md @@ -10,7 +10,7 @@ across MCP restarts. **Side effects:** Removes the named buffer from the tmux server. Subsequent {tooliconl}`paste-buffer` calls against the deleted name -return an expected tool error. +return an {exc}`~libtmux_mcp._utils.ExpectedToolError`. ```{fastmcp-tool-input} buffer_tools.delete_buffer ``` diff --git a/docs/tools/hook/show-hook.md b/docs/tools/hook/show-hook.md index 4c97021..33fa3af 100644 --- a/docs/tools/hook/show-hook.md +++ b/docs/tools/hook/show-hook.md @@ -4,7 +4,8 @@ ``` **Use when** you know which hook you want to inspect by name. Returns -empty when the hook is unset; raises an expected tool error for +empty when the hook is unset; raises an +{exc}`~libtmux_mcp._utils.ExpectedToolError` for unknown hook names (typos, wrong scope) so input mistakes don't masquerade as "nothing configured". diff --git a/docs/tools/pane/capture-since.md b/docs/tools/pane/capture-since.md index 27e17ac..018b43a 100644 --- a/docs/tools/pane/capture-since.md +++ b/docs/tools/pane/capture-since.md @@ -60,8 +60,9 @@ Read only content since that cursor: ``` The cursor carries the original pane id, so the follow-up call does not need -`pane_id`. If you pass both, they must match; a cursor for another pane raises a -tool error instead of silently reading the wrong process. +`pane_id`. If you pass both, they must match; a cursor for another pane raises +an {exc}`~libtmux_mcp._utils.ExpectedToolError` instead of silently reading the +wrong process. If nothing new was written after the cursor, `lines` is empty and the response still includes a fresh cursor for the same pane. If the cursor row scrolled into @@ -73,8 +74,8 @@ needed to compute an exact delta. In that case, `lines` is a conservative current visible capture and the response includes a fresh cursor. Pane lifecycle is part of the cursor contract. If the pane dies or is respawned, -the call raises a tool error instead of reading from a different process that -reused the same pane id. +the call raises an {exc}`~libtmux_mcp._utils.ExpectedToolError` instead of +reading from a different process that reused the same pane id. `truncated`, `truncated_lines`, and `truncated_bytes` are structured metadata. No truncation marker is injected into `lines`, so clients can display terminal diff --git a/docs/tools/pane/wait-for-channel.md b/docs/tools/pane/wait-for-channel.md index 8edd942..b0574d8 100644 --- a/docs/tools/pane/wait-for-channel.md +++ b/docs/tools/pane/wait-for-channel.md @@ -29,8 +29,9 @@ to poll for an output marker instead; state-polling is structurally safer than edge-triggered signalling for fragile commands. **Side effects:** Blocks the call up to `timeout` seconds (default 30). -Mandatory subprocess timeout — a crashed signaller raises an expected -tool error rather than blocking indefinitely. +Mandatory subprocess timeout — a crashed signaller raises an +{exc}`~libtmux_mcp._utils.ExpectedToolError` rather than blocking +indefinitely. ```{fastmcp-tool-input} wait_for_tools.wait_for_channel ``` diff --git a/docs/tools/pane/wait-for-content-change.md b/docs/tools/pane/wait-for-content-change.md index 3f71065..aa2fa58 100644 --- a/docs/tools/pane/wait-for-content-change.md +++ b/docs/tools/pane/wait-for-content-change.md @@ -12,7 +12,7 @@ specific pattern. precise and avoids false positives from unrelated output. **Side effects:** None. Readonly. Blocks until content changes or timeout. -Raises a tool error if the pane dies or is respawned while waiting, because the +Raises an {exc}`~libtmux_mcp._utils.ExpectedToolError` if the pane dies or is respawned while waiting, because the entry content baseline no longer describes the same pane process. **Example:** diff --git a/docs/topics/architecture.md b/docs/topics/architecture.md index bf929c8..dd1d721 100644 --- a/docs/topics/architecture.md +++ b/docs/topics/architecture.md @@ -70,7 +70,7 @@ Tools use resolver functions (`_resolve_session`, `_resolve_window`, `_resolve_p Three boundaries split the work: -1. **Tool classification** — the {func}`~libtmux_mcp._utils.handle_tool_errors` decorator wraps tool functions, mapping libtmux exceptions to {class}`~libtmux_mcp._utils.ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets {class}`~libtmux_mcp.middleware.ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. +1. **Tool classification** — the {func}`~libtmux_mcp._utils.handle_tool_errors` decorator wraps tool functions, mapping libtmux exceptions to {exc}`~libtmux_mcp._utils.ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets {class}`~libtmux_mcp.middleware.ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. 2. **Schema classification** — FastMCP validates tool arguments before tool code runs, so Pydantic validation failures never reach the decorator. {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` classifies those schema-validation errors as expected, agent-correctable WARNINGs before converting them. 3. **Conversion** — {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` catches the exception once it has cleared the audit/retry/safety trio and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). diff --git a/docs/topics/safety.md b/docs/topics/safety.md index b1711d9..df00370 100644 --- a/docs/topics/safety.md +++ b/docs/topics/safety.md @@ -102,7 +102,7 @@ Unlike other `mutating` tools, the registration carries `destructiveHint=True` a Mitigations: -- `pane_id` is required (no fallback to "first pane in session/window"). Agents that pass only `session_name` get an expected tool error instead of an unintended kill — resolve via {tool}`list-panes` first. +- `pane_id` is required (no fallback to "first pane in session/window"). Agents that pass only `session_name` get an {exc}`~libtmux_mcp._utils.ExpectedToolError` instead of an unintended kill — resolve via {tool}`list-panes` first. - Any `shell` argument is briefly visible in the OS process table and tmux's `pane_current_command` metadata before the spawned shell takes over; the audit log redacts `shell` payloads (see below), but do not pass credentials directly even with redaction. - The optional `environment` argument (`dict[str, str]`) maps to one tmux `-e KEY=VALUE` flag per item. The audit log redacts each *value* via a `{len, sha256_prefix}` digest while keeping the *keys* visible — env var names like `DATABASE_URL` are usually operator-debug-useful, but their values are the secret. The same OS-process-table caveat as `shell` applies: `respawn-pane -e DB_PASSWORD=...` may briefly appear in `ps` output before the spawned process inherits the env. - The same self-pane guard that protects the destructive kill commands also refuses to respawn the pane running the MCP server. From 957f05a86ed30cbb1abbe6518e31d5427202442a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 14:24:42 -0500 Subject: [PATCH 21/23] docs(CHANGES) Unknown-argument hint release note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The unrecognized-argument suggestion (with client detection) is user-visible product surface the unreleased notes didn't cover — the behavior a Gemini CLI user upgrading will actually notice. what: - What's new entry covering the do-this-next hint for rejected unknown arguments and the wait_for_previous client-flag note --- CHANGES | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES b/CHANGES index b0d3d0a..9f46d93 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,10 @@ _Notes on upcoming releases will be added here_ Failed tool calls now return their message exactly as raised — previously every failure was prefixed with `Internal error:`. Error results carry a structured `_meta` payload (`error_type`, `expected`, `suggestion`), and not-found errors point at {tooliconl}`list-sessions` / {tooliconl}`list-windows` / {tooliconl}`list-panes` to resolve stale or guessed ids. (#64) +**Unknown tool arguments get a do-this-next hint** + +When a call includes arguments a tool doesn't accept, the error result now names exactly which argument(s) to remove or fix. When the stray key is `wait_for_previous` — a scheduling flag Gemini CLI can leak into batched tool calls — the hint says so and names the connected client, so agents recover in one read instead of re-deriving the problem from a validation traceback. (#64) + **Expected tool failures log at WARNING, not ERROR** Agent-correctable failures — unknown ids, invalid arguments, safety-tier denials, transient tmux errors — now log at WARNING in the server's log stream. A missing tmux binary and unexpected exceptions stay at ERROR, so ERROR records are always worth attention. (#64) From 4e1a0b29345566f59d15eb07220e3e863e90fc26 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 14:36:36 -0500 Subject: [PATCH 22/23] docs(errors): Refresh error-result recovery prose why: The branch now synthesizes recovery suggestions for schema-validation failures, including unknown arguments and client scheduling flags, so the architecture page's discovery-only wording was too narrow. The Gemini gotcha page also carried brittle current-state claims about client behavior and external MCP server fixes. what: - Broaden error-result suggestion prose to cover recovery hints generally - Keep ExpectedToolError references linked with {exc} - Soften stale-prone Gemini and external-server claims --- docs/topics/architecture.md | 2 +- docs/topics/gotchas.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/topics/architecture.md b/docs/topics/architecture.md index dd1d721..fa1d5c6 100644 --- a/docs/topics/architecture.md +++ b/docs/topics/architecture.md @@ -72,7 +72,7 @@ Three boundaries split the work: 1. **Tool classification** — the {func}`~libtmux_mcp._utils.handle_tool_errors` decorator wraps tool functions, mapping libtmux exceptions to {exc}`~libtmux_mcp._utils.ExpectedToolError` (agent-correctable: unknown ids, invalid arguments, transient tmux errors; logged at WARNING) or stock `ToolError` (operator faults and unexpected bugs; logged at ERROR). The raise chains the original exception via `from e`, which is what lets {class}`~libtmux_mcp.middleware.ReadonlyRetryMiddleware` match transient `LibTmuxException` causes. 2. **Schema classification** — FastMCP validates tool arguments before tool code runs, so Pydantic validation failures never reach the decorator. {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` classifies those schema-validation errors as expected, agent-correctable WARNINGs before converting them. -3. **Conversion** — {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` catches the exception once it has cleared the audit/retry/safety trio and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, optional `suggestion` pointing at discovery tools). +3. **Conversion** — {class}`~libtmux_mcp.middleware.ToolErrorResultMiddleware` catches the exception once it has cleared the audit/retry/safety trio and returns `ToolResult(is_error=True)` carrying the message exactly as raised, plus a `_meta` payload (`error_type`, `expected`, and an optional agent-facing `suggestion` for recovery hints such as discovery tools or rejected-argument fixes). Errors must stay exceptions through the audit/retry/safety trio — audit detects failures by catching, retry matches via `__cause__` — so conversion happens only in the outermost error layer. The response limiter sits outside conversion and may truncate large success or error results on the return path; its truncation path preserves `is_error` and `_meta` so oversized expected failures stay tool errors. Level policy lives in {doc}`/topics/logging`. diff --git a/docs/topics/gotchas.md b/docs/topics/gotchas.md index 9a5c4e8..99158f5 100644 --- a/docs/topics/gotchas.md +++ b/docs/topics/gotchas.md @@ -80,8 +80,8 @@ When Gemini CLI batches several tool calls in one turn, its scheduler merges the {"tool": "get_pane_info", "arguments": {"wait_for_previous": true, "pane_id": "%0"}} ``` -This is stock Gemini CLI behavior (no extensions involved). Batching — and therefore the leak — is near-constant in non-interactive `gemini -p` runs, where the harness front-loads its topic tool and the first MCP calls into one parallel turn; interactive sessions schedule more sequentially and rarely trigger it. +This has been observed with stock Gemini CLI behavior (no extensions involved). In local non-interactive `gemini -p` harness runs, batching can front-load the topic tool and the first MCP calls into one parallel turn, which is where the leaked flag usually appears; interactive sessions tend to schedule more sequentially and trigger it less often. Tool schemas are strict (`additionalProperties: false`), so the call is rejected with a validation error — classified as expected (WARNING log, `expected: true` in the result's `_meta`) and carrying a suggestion that names the rejected argument and identifies `wait_for_previous` as a client scheduling flag to retry without. Gemini's model reads it, drops the flag, and retries successfully on its own. -The visible symptom is harmless noise: `Error executing tool mcp_tmux_: ... reported an error` lines in Gemini's output for calls that then succeed on retry, and matching WARNING records in the server log. Other MCP servers crash outright on this injection ([MemPalace/mempalace#816](https://github.com/MemPalace/mempalace/issues/816)) and patched it by stripping the key or whitelisting arguments against the schema. This server deliberately keeps the rejection: silently dropping unknown arguments would also swallow genuine argument typos from every client — on a server with mutating and destructive tools, a mis-named flag (`enter` on `send_keys`, say) must fail loudly, not run with defaults. +The visible symptom is harmless noise: `Error executing tool mcp_tmux_: ... reported an error` lines in Gemini's output for calls that then succeed on retry, and matching WARNING records in the server log. Similar reports in other MCP servers have handled this injected key by stripping it or whitelisting arguments against the schema ([MemPalace/mempalace#816](https://github.com/MemPalace/mempalace/issues/816)). This server deliberately keeps the rejection: silently dropping unknown arguments would also swallow genuine argument typos from every client — on a server with mutating and destructive tools, a mis-named flag (`enter` on `send_keys`, say) must fail loudly, not run with defaults. From 5e3520d49fb6c97fb2e8ce090a5cf495dcff8231 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 6 Jun 2026 14:40:29 -0500 Subject: [PATCH 23/23] test(docs[middleware]): Refresh expected-error terminology why: Middleware tests still documented safety denials and decorated libtmux failures with the older ToolError-only mental model, while production now uses ExpectedToolError for agent-correctable failures. what: - Update stack-order and audit-denial docstrings for ExpectedToolError - Make the audit-denial simulation match current SafetyMiddleware behavior - Clarify retry/error-probe comments around mapped expected tool errors --- tests/test_middleware.py | 43 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 9c379f4..9384c38 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -356,7 +356,7 @@ def test_server_middleware_stack_order() -> None: The ordering is load-bearing (see server.py comment): TimingMiddleware must be outermost so it observes total wall time; AuditMiddleware must sit *outside* SafetyMiddleware so - tier-denial events (which raise ``ToolError`` before + tier-denial events (which raise ``ExpectedToolError`` before ``call_next``) are still recorded — without this ordering, forbidden-access attempts silently bypass the audit log. A refactor that swaps Audit and Safety would degrade @@ -420,12 +420,12 @@ def test_audit_records_safety_denial( Composes Audit and Safety in the production order (Audit outside Safety) by manually nesting their ``on_call_tool`` handlers: the inner ``call_next`` from Audit dispatches to Safety, which raises - ``ToolError`` for an over-tier tool. Audit should record that as - ``outcome=error error_type=ToolError`` rather than skipping the - record. Without this ordering, denied access attempts would - silently bypass forensic logging. + ``ExpectedToolError`` for an over-tier tool. Audit should record + that as ``outcome=error error_type=ExpectedToolError`` rather + than skipping the record. Without this ordering, denied access + attempts would silently bypass forensic logging. """ - from fastmcp.exceptions import ToolError + from libtmux_mcp._utils import ExpectedToolError audit = AuditMiddleware() ctx = _fake_context(name="kill_server", arguments={}) @@ -434,25 +434,25 @@ def test_audit_records_safety_denial( # context.fastmcp_context.fastmcp.get_tool(...). With # fastmcp_context=None the safety check short-circuits, so we # simulate the denial more directly: ``call_next`` is a coroutine - # that raises the same ``ToolError`` SafetyMiddleware would when - # blocking an over-tier call. The test's invariant is that the - # AuditMiddleware sitting *outside* Safety still records the - # attempt with outcome=error. + # that raises the same ``ExpectedToolError`` SafetyMiddleware + # would when blocking an over-tier call. The test's invariant is + # that the AuditMiddleware sitting *outside* Safety still records + # the attempt with outcome=error. msg = "Tool 'kill_server' is not available at the current safety level." async def _safety_denial(_ctx: t.Any) -> None: - raise ToolError(msg) + raise ExpectedToolError(msg) with ( caplog.at_level(logging.INFO, logger="libtmux_mcp.audit"), - pytest.raises(ToolError, match="not available"), + pytest.raises(ExpectedToolError, match="not available"), ): asyncio.run(audit.on_call_tool(ctx, _safety_denial)) rendered = "\n".join(rec.getMessage() for rec in caplog.records) assert "tool=kill_server" in rendered assert "outcome=error" in rendered - assert "error_type=ToolError" in rendered + assert "error_type=ExpectedToolError" in rendered # --------------------------------------------------------------------------- @@ -515,7 +515,7 @@ def test_readonly_retry_recovers_from_libtmux_exception() -> None: Models the production scenario the middleware exists to fix: a transient socket error from libtmux on the first call, then a successful call after the cache evicts the dead Server. Without - the retry the agent would see a ``ToolError`` on the first + the retry the agent would see an expected tool error on the first ``list_sessions``-style call. """ from libtmux import exc as libtmux_exc @@ -585,11 +585,11 @@ def test_readonly_retry_recovers_on_decorated_tool( Regression guard for the fastmcp <3.2.4 production no-op where ``RetryMiddleware._should_retry`` did not walk ``__cause__``. Every libtmux-mcp tool is wrapped by ``handle_tool_errors`` / - ``handle_tool_errors_async`` (``_utils.py:842-850``), which - converts ``LibTmuxException`` to ``ToolError(...) from - LibTmuxException``. At the middleware layer the exception type - is ``ToolError``, not ``LibTmuxException`` — so the retry - decision must walk ``__cause__`` to see the real failure type. + ``handle_tool_errors_async``, which converts ``LibTmuxException`` + to ``ExpectedToolError(...) from LibTmuxException``. At the + middleware layer the exception type is ``ExpectedToolError``, not + ``LibTmuxException`` — so the retry decision must walk + ``__cause__`` to see the real failure type. The unit tests above use ``_FlakyCallNext`` which raises ``LibTmuxException`` directly, bypassing the decorator. They @@ -681,7 +681,7 @@ def _error_probe_server() -> t.Any: @probe.tool def fail_expected_chained() -> str: # Mirror the ``handle_tool_errors`` decorator's mapping shape: - # ``raise from ``. + # ``raise from ``. cause = libtmux_exc.PaneNotFound("%99") mapped = _map_exception_to_tool_error("fail_expected_chained", cause) raise mapped from cause @@ -736,7 +736,8 @@ def test_tool_error_result_meta_carries_error_details() -> None: """``meta`` reports the originating exception class and expectedness. ``error_type`` names the ``__cause__`` when the raise site chained - one — agents see ``PaneNotFound``, not the ``ToolError`` wrapper. + one — agents see ``PaneNotFound``, not the mapped + ``ExpectedToolError`` wrapper. """ from fastmcp import Client