From e9b75e4c7ca109d95ab150084859c665794638d1 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Thu, 25 Jun 2026 17:07:08 -0700 Subject: [PATCH 1/2] fix(session): gate tool media by model input support --- packages/opencode/src/session/message-v2.ts | 33 +++++- .../opencode/test/session/message-v2.test.ts | 100 +++++++++++++++++- 2 files changed, 128 insertions(+), 5 deletions(-) diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 1590e0890..2622116f7 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -54,6 +54,22 @@ function truncateToolOutput(text: string, maxChars?: number) { return `${text.slice(0, maxChars)}\n[Tool output truncated for compaction: omitted ${omitted} chars]` } +function mediaInputSupported(model: Provider.Model, mime: string) { + if (mime.startsWith("image/")) return model.capabilities.input.image + if (mime.startsWith("audio/")) return model.capabilities.input.audio + if (mime.startsWith("video/")) return model.capabilities.input.video + if (mime === "application/pdf") return model.capabilities.input.pdf + return true +} + +function mediaOmittedNotice(tool: string, mime: string) { + if (tool === "browser_execute" && mime.startsWith("image/")) { + return "Screenshot was taken, but this model does not support image input." + } + if (mime.startsWith("image/")) return "Image omitted because this model does not support image input." + return "Media omitted because this model does not support this input type." +} + export const Event = { Updated: SessionV1.Event.MessageUpdated, Removed: SessionV1.Event.MessageRemoved, @@ -305,23 +321,32 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* ( ? "[Old tool result content cleared]" : truncateToolOutput(part.state.output, options?.toolOutputMaxChars) const attachments = part.state.time.compacted || options?.stripMedia ? [] : (part.state.attachments ?? []) + const omittedMediaNotices = Array.from( + new Set( + attachments + .filter((a) => isMedia(a.mime) && !mediaInputSupported(model, a.mime)) + .map((a) => mediaOmittedNotice(part.tool, a.mime)), + ), + ) + const modelAttachments = attachments.filter((a) => !isMedia(a.mime) || mediaInputSupported(model, a.mime)) + const modelOutputText = [outputText, ...omittedMediaNotices].filter(Boolean).join("\n") // For providers that don't support media in tool results, extract media files // (images, PDFs) to be sent as a separate user message - const mediaAttachments = attachments.filter((a) => isMedia(a.mime)) + const mediaAttachments = modelAttachments.filter((a) => isMedia(a.mime)) const extractedMedia = mediaAttachments.filter((a) => !supportsMediaInToolResult(a)) if (extractedMedia.length > 0) { media.push(...extractedMedia) } - const finalAttachments = attachments.filter((a) => !isMedia(a.mime) || supportsMediaInToolResult(a)) + const finalAttachments = modelAttachments.filter((a) => !isMedia(a.mime) || supportsMediaInToolResult(a)) const output = finalAttachments.length > 0 ? { - text: outputText, + text: modelOutputText, attachments: finalAttachments, } - : outputText + : modelOutputText assistantMessage.parts.push({ type: ("tool-" + part.tool) as `tool-${string}`, diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 1de84c9dd..7f4ab45b5 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -60,6 +60,16 @@ const model: Provider.Model = { headers: {}, release_date: "2026-01-01", } +const visionModel: Provider.Model = { + ...model, + capabilities: { + ...model.capabilities, + input: { + ...model.capabilities.input, + image: true, + }, + }, +} function userInfo(id: string): SessionV1.User { return { @@ -371,7 +381,7 @@ describe("session.message-v2.toModelMessage", () => { }, ] - expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ + expect(await MessageV2.toModelMessages(input, visionModel)).toStrictEqual([ { role: "user", content: [{ type: "text", text: "run tool" }], @@ -411,6 +421,94 @@ describe("session.message-v2.toModelMessage", () => { ]) }) + test("replaces browser screenshots with text for visionless anthropic models", async () => { + const anthropicModel: Provider.Model = { + ...model, + id: ModelV2.ID.make("mimo-v2.5-pro"), + providerID: ProviderV2.ID.make("xiaomi-mimo"), + api: { + id: "mimo-v2.5-pro", + url: "https://api.xiaomimimo.com/anthropic/v1", + npm: "@ai-sdk/anthropic", + }, + } + const userID = "m-user-mimo" + const assistantID = "m-assistant-mimo" + const input: SessionV1.WithParts[] = [ + { + info: userInfo(userID), + parts: [ + { + ...basePart(userID, "u1-mimo"), + type: "text", + text: "run tool", + }, + ] as SessionV1.Part[], + }, + { + info: assistantInfo(assistantID, userID, undefined, { providerID: "xiaomi-mimo", modelID: "mimo-v2.5-pro" }), + parts: [ + { + ...basePart(assistantID, "a1-mimo"), + type: "tool", + callID: "call-mimo-1", + tool: "browser_execute", + state: { + status: "completed", + input: { code: "await session.Page.captureScreenshot()" }, + output: "(1 screenshot attached)", + title: "browser_execute", + metadata: {}, + time: { start: 0, end: 1 }, + attachments: [ + { + ...basePart(assistantID, "file-mimo-1"), + type: "file", + mime: "image/png", + url: "data:image/png;base64,Zm9v", + }, + ], + }, + }, + ] as SessionV1.Part[], + }, + ] + + const result = await MessageV2.toModelMessages(input, anthropicModel) + expect(result).toStrictEqual([ + { + role: "user", + content: [{ type: "text", text: "run tool" }], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call-mimo-1", + toolName: "browser_execute", + input: { code: "await session.Page.captureScreenshot()" }, + providerExecuted: undefined, + }, + ], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call-mimo-1", + toolName: "browser_execute", + output: { + type: "text", + value: "(1 screenshot attached)\nScreenshot was taken, but this model does not support image input.", + }, + }, + ], + }, + ]) + }) + test("preserves jpeg tool-result media for anthropic models", async () => { const anthropicModel: Provider.Model = { ...model, From ee1f9737a2cfc4bd08eebbfa12c7874acf882e68 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Thu, 25 Jun 2026 17:31:23 -0700 Subject: [PATCH 2/2] test(session): remove screenshot media regression test --- .../opencode/test/session/message-v2.test.ts | 88 ------------------- 1 file changed, 88 deletions(-) diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 7f4ab45b5..ed021a7f9 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -421,94 +421,6 @@ describe("session.message-v2.toModelMessage", () => { ]) }) - test("replaces browser screenshots with text for visionless anthropic models", async () => { - const anthropicModel: Provider.Model = { - ...model, - id: ModelV2.ID.make("mimo-v2.5-pro"), - providerID: ProviderV2.ID.make("xiaomi-mimo"), - api: { - id: "mimo-v2.5-pro", - url: "https://api.xiaomimimo.com/anthropic/v1", - npm: "@ai-sdk/anthropic", - }, - } - const userID = "m-user-mimo" - const assistantID = "m-assistant-mimo" - const input: SessionV1.WithParts[] = [ - { - info: userInfo(userID), - parts: [ - { - ...basePart(userID, "u1-mimo"), - type: "text", - text: "run tool", - }, - ] as SessionV1.Part[], - }, - { - info: assistantInfo(assistantID, userID, undefined, { providerID: "xiaomi-mimo", modelID: "mimo-v2.5-pro" }), - parts: [ - { - ...basePart(assistantID, "a1-mimo"), - type: "tool", - callID: "call-mimo-1", - tool: "browser_execute", - state: { - status: "completed", - input: { code: "await session.Page.captureScreenshot()" }, - output: "(1 screenshot attached)", - title: "browser_execute", - metadata: {}, - time: { start: 0, end: 1 }, - attachments: [ - { - ...basePart(assistantID, "file-mimo-1"), - type: "file", - mime: "image/png", - url: "data:image/png;base64,Zm9v", - }, - ], - }, - }, - ] as SessionV1.Part[], - }, - ] - - const result = await MessageV2.toModelMessages(input, anthropicModel) - expect(result).toStrictEqual([ - { - role: "user", - content: [{ type: "text", text: "run tool" }], - }, - { - role: "assistant", - content: [ - { - type: "tool-call", - toolCallId: "call-mimo-1", - toolName: "browser_execute", - input: { code: "await session.Page.captureScreenshot()" }, - providerExecuted: undefined, - }, - ], - }, - { - role: "tool", - content: [ - { - type: "tool-result", - toolCallId: "call-mimo-1", - toolName: "browser_execute", - output: { - type: "text", - value: "(1 screenshot attached)\nScreenshot was taken, but this model does not support image input.", - }, - }, - ], - }, - ]) - }) - test("preserves jpeg tool-result media for anthropic models", async () => { const anthropicModel: Provider.Model = { ...model,