From e9b75e4c7ca109d95ab150084859c665794638d1 Mon Sep 17 00:00:00 2001
From: Alezander9 <alexander.j.yue@gmail.com>
Date: Thu, 25 Jun 2026 17:07:08 -0700
Subject: [PATCH 1/2] fix(session): gate tool media by model input support

---
 packages/opencode/src/session/message-v2.ts   |  33 +++++-
 .../opencode/test/session/message-v2.test.ts  | 100 +++++++++++++++++-
 2 files changed, 128 insertions(+), 5 deletions(-)

diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index 1590e0890..2622116f7 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -54,6 +54,22 @@ function truncateToolOutput(text: string, maxChars?: number) {
   return `${text.slice(0, maxChars)}\n[Tool output truncated for compaction: omitted ${omitted} chars]`
 }
 
+function mediaInputSupported(model: Provider.Model, mime: string) {
+  if (mime.startsWith("image/")) return model.capabilities.input.image
+  if (mime.startsWith("audio/")) return model.capabilities.input.audio
+  if (mime.startsWith("video/")) return model.capabilities.input.video
+  if (mime === "application/pdf") return model.capabilities.input.pdf
+  return true
+}
+
+function mediaOmittedNotice(tool: string, mime: string) {
+  if (tool === "browser_execute" && mime.startsWith("image/")) {
+    return "Screenshot was taken, but this model does not support image input."
+  }
+  if (mime.startsWith("image/")) return "Image omitted because this model does not support image input."
+  return "Media omitted because this model does not support this input type."
+}
+
 export const Event = {
   Updated: SessionV1.Event.MessageUpdated,
   Removed: SessionV1.Event.MessageRemoved,
@@ -305,23 +321,32 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
               ? "[Old tool result content cleared]"
               : truncateToolOutput(part.state.output, options?.toolOutputMaxChars)
             const attachments = part.state.time.compacted || options?.stripMedia ? [] : (part.state.attachments ?? [])
+            const omittedMediaNotices = Array.from(
+              new Set(
+                attachments
+                  .filter((a) => isMedia(a.mime) && !mediaInputSupported(model, a.mime))
+                  .map((a) => mediaOmittedNotice(part.tool, a.mime)),
+              ),
+            )
+            const modelAttachments = attachments.filter((a) => !isMedia(a.mime) || mediaInputSupported(model, a.mime))
+            const modelOutputText = [outputText, ...omittedMediaNotices].filter(Boolean).join("\n")
 
             // For providers that don't support media in tool results, extract media files
             // (images, PDFs) to be sent as a separate user message
-            const mediaAttachments = attachments.filter((a) => isMedia(a.mime))
+            const mediaAttachments = modelAttachments.filter((a) => isMedia(a.mime))
             const extractedMedia = mediaAttachments.filter((a) => !supportsMediaInToolResult(a))
             if (extractedMedia.length > 0) {
               media.push(...extractedMedia)
             }
-            const finalAttachments = attachments.filter((a) => !isMedia(a.mime) || supportsMediaInToolResult(a))
+            const finalAttachments = modelAttachments.filter((a) => !isMedia(a.mime) || supportsMediaInToolResult(a))
 
             const output =
               finalAttachments.length > 0
                 ? {
-                    text: outputText,
+                    text: modelOutputText,
                     attachments: finalAttachments,
                   }
-                : outputText
+                : modelOutputText
 
             assistantMessage.parts.push({
               type: ("tool-" + part.tool) as `tool-${string}`,
diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts
index 1de84c9dd..7f4ab45b5 100644
--- a/packages/opencode/test/session/message-v2.test.ts
+++ b/packages/opencode/test/session/message-v2.test.ts
@@ -60,6 +60,16 @@ const model: Provider.Model = {
   headers: {},
   release_date: "2026-01-01",
 }
+const visionModel: Provider.Model = {
+  ...model,
+  capabilities: {
+    ...model.capabilities,
+    input: {
+      ...model.capabilities.input,
+      image: true,
+    },
+  },
+}
 
 function userInfo(id: string): SessionV1.User {
   return {
@@ -371,7 +381,7 @@ describe("session.message-v2.toModelMessage", () => {
       },
     ]
 
-    expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
+    expect(await MessageV2.toModelMessages(input, visionModel)).toStrictEqual([
       {
         role: "user",
         content: [{ type: "text", text: "run tool" }],
@@ -411,6 +421,94 @@ describe("session.message-v2.toModelMessage", () => {
     ])
   })
 
+  test("replaces browser screenshots with text for visionless anthropic models", async () => {
+    const anthropicModel: Provider.Model = {
+      ...model,
+      id: ModelV2.ID.make("mimo-v2.5-pro"),
+      providerID: ProviderV2.ID.make("xiaomi-mimo"),
+      api: {
+        id: "mimo-v2.5-pro",
+        url: "https://api.xiaomimimo.com/anthropic/v1",
+        npm: "@ai-sdk/anthropic",
+      },
+    }
+    const userID = "m-user-mimo"
+    const assistantID = "m-assistant-mimo"
+    const input: SessionV1.WithParts[] = [
+      {
+        info: userInfo(userID),
+        parts: [
+          {
+            ...basePart(userID, "u1-mimo"),
+            type: "text",
+            text: "run tool",
+          },
+        ] as SessionV1.Part[],
+      },
+      {
+        info: assistantInfo(assistantID, userID, undefined, { providerID: "xiaomi-mimo", modelID: "mimo-v2.5-pro" }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1-mimo"),
+            type: "tool",
+            callID: "call-mimo-1",
+            tool: "browser_execute",
+            state: {
+              status: "completed",
+              input: { code: "await session.Page.captureScreenshot()" },
+              output: "(1 screenshot attached)",
+              title: "browser_execute",
+              metadata: {},
+              time: { start: 0, end: 1 },
+              attachments: [
+                {
+                  ...basePart(assistantID, "file-mimo-1"),
+                  type: "file",
+                  mime: "image/png",
+                  url: "data:image/png;base64,Zm9v",
+                },
+              ],
+            },
+          },
+        ] as SessionV1.Part[],
+      },
+    ]
+
+    const result = await MessageV2.toModelMessages(input, anthropicModel)
+    expect(result).toStrictEqual([
+      {
+        role: "user",
+        content: [{ type: "text", text: "run tool" }],
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "tool-call",
+            toolCallId: "call-mimo-1",
+            toolName: "browser_execute",
+            input: { code: "await session.Page.captureScreenshot()" },
+            providerExecuted: undefined,
+          },
+        ],
+      },
+      {
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: "call-mimo-1",
+            toolName: "browser_execute",
+            output: {
+              type: "text",
+              value: "(1 screenshot attached)\nScreenshot was taken, but this model does not support image input.",
+            },
+          },
+        ],
+      },
+    ])
+  })
+
   test("preserves jpeg tool-result media for anthropic models", async () => {
     const anthropicModel: Provider.Model = {
       ...model,

From ee1f9737a2cfc4bd08eebbfa12c7874acf882e68 Mon Sep 17 00:00:00 2001
From: Alezander9 <alexander.j.yue@gmail.com>
Date: Thu, 25 Jun 2026 17:31:23 -0700
Subject: [PATCH 2/2] test(session): remove screenshot media regression test

---
 .../opencode/test/session/message-v2.test.ts  | 88 -------------------
 1 file changed, 88 deletions(-)

diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts
index 7f4ab45b5..ed021a7f9 100644
--- a/packages/opencode/test/session/message-v2.test.ts
+++ b/packages/opencode/test/session/message-v2.test.ts
@@ -421,94 +421,6 @@ describe("session.message-v2.toModelMessage", () => {
     ])
   })
 
-  test("replaces browser screenshots with text for visionless anthropic models", async () => {
-    const anthropicModel: Provider.Model = {
-      ...model,
-      id: ModelV2.ID.make("mimo-v2.5-pro"),
-      providerID: ProviderV2.ID.make("xiaomi-mimo"),
-      api: {
-        id: "mimo-v2.5-pro",
-        url: "https://api.xiaomimimo.com/anthropic/v1",
-        npm: "@ai-sdk/anthropic",
-      },
-    }
-    const userID = "m-user-mimo"
-    const assistantID = "m-assistant-mimo"
-    const input: SessionV1.WithParts[] = [
-      {
-        info: userInfo(userID),
-        parts: [
-          {
-            ...basePart(userID, "u1-mimo"),
-            type: "text",
-            text: "run tool",
-          },
-        ] as SessionV1.Part[],
-      },
-      {
-        info: assistantInfo(assistantID, userID, undefined, { providerID: "xiaomi-mimo", modelID: "mimo-v2.5-pro" }),
-        parts: [
-          {
-            ...basePart(assistantID, "a1-mimo"),
-            type: "tool",
-            callID: "call-mimo-1",
-            tool: "browser_execute",
-            state: {
-              status: "completed",
-              input: { code: "await session.Page.captureScreenshot()" },
-              output: "(1 screenshot attached)",
-              title: "browser_execute",
-              metadata: {},
-              time: { start: 0, end: 1 },
-              attachments: [
-                {
-                  ...basePart(assistantID, "file-mimo-1"),
-                  type: "file",
-                  mime: "image/png",
-                  url: "data:image/png;base64,Zm9v",
-                },
-              ],
-            },
-          },
-        ] as SessionV1.Part[],
-      },
-    ]
-
-    const result = await MessageV2.toModelMessages(input, anthropicModel)
-    expect(result).toStrictEqual([
-      {
-        role: "user",
-        content: [{ type: "text", text: "run tool" }],
-      },
-      {
-        role: "assistant",
-        content: [
-          {
-            type: "tool-call",
-            toolCallId: "call-mimo-1",
-            toolName: "browser_execute",
-            input: { code: "await session.Page.captureScreenshot()" },
-            providerExecuted: undefined,
-          },
-        ],
-      },
-      {
-        role: "tool",
-        content: [
-          {
-            type: "tool-result",
-            toolCallId: "call-mimo-1",
-            toolName: "browser_execute",
-            output: {
-              type: "text",
-              value: "(1 screenshot attached)\nScreenshot was taken, but this model does not support image input.",
-            },
-          },
-        ],
-      },
-    ])
-  })
-
   test("preserves jpeg tool-result media for anthropic models", async () => {
     const anthropicModel: Provider.Model = {
       ...model,