From 8540cdba92d8581c2eabd6507021663360726ca3 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Thu, 25 Jun 2026 20:58:07 -0700 Subject: [PATCH 01/19] feat(web): add language model inputModalities capability plumbing Add an optional `inputModalities` declaration to language model config and expose a resolved capability set to the client. - Schema: add optional `inputModalities` (`text` | `image` | `pdf`) to every provider definition in `schemas/v3/languageModel.json` and regenerate the schema types/snippets. - Add a fail-closed `resolveModelInputModalities` resolver that defaults to text-only when a model does not declare its input modalities. - Expose the resolved `inputModalities` on the client-safe `LanguageModelInfo` (populated via `getConfiguredLanguageModelsInfo` and the MCP ask path). This is groundwork for chat file attachments. It adds no attachment UI and no live provider capability probing yet. Co-authored-by: Cursor --- docs/snippets/schemas/v3/index.schema.mdx | 264 ++++++++++++++++++ .../schemas/v3/languageModel.schema.mdx | 264 ++++++++++++++++++ packages/schemas/src/v3/index.schema.ts | 264 ++++++++++++++++++ packages/schemas/src/v3/index.type.ts | 48 ++++ .../schemas/src/v3/languageModel.schema.ts | 264 ++++++++++++++++++ packages/schemas/src/v3/languageModel.type.ts | 48 ++++ .../web/src/ee/features/mcp/askCodebase.ts | 2 + .../src/features/chat/modelCapabilities.ts | 13 + packages/web/src/features/chat/types.ts | 4 + .../web/src/features/chat/utils.server.ts | 2 + schemas/v3/languageModel.json | 134 ++++++++- 11 files changed, 1306 insertions(+), 1 deletion(-) create mode 100644 packages/web/src/features/chat/modelCapabilities.ts diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 864359251..e0b00c540 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1860,6 +1860,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1998,6 +2009,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2133,6 +2155,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2240,6 +2273,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2361,6 +2405,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2484,6 +2539,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2623,6 +2689,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2730,6 +2807,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2863,6 +2951,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3027,6 +3126,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3135,6 +3245,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3246,6 +3367,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3426,6 +3558,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3564,6 +3707,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3699,6 +3853,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3806,6 +3971,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3927,6 +4103,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4050,6 +4237,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4189,6 +4387,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4296,6 +4505,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4429,6 +4649,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4593,6 +4824,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4701,6 +4943,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4812,6 +5065,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 90aee08af..7c7874207 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -174,6 +174,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -312,6 +323,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -447,6 +469,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -554,6 +587,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -675,6 +719,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -798,6 +853,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -937,6 +1003,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1044,6 +1121,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1177,6 +1265,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1341,6 +1440,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1449,6 +1559,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1560,6 +1681,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1740,6 +1872,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1878,6 +2021,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2013,6 +2167,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2120,6 +2285,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2241,6 +2417,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2364,6 +2551,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2503,6 +2701,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2610,6 +2819,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2743,6 +2963,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2907,6 +3138,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3015,6 +3257,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3126,6 +3379,17 @@ } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 8c1d64b52..257c8ae7d 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1859,6 +1859,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1997,6 +2008,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2132,6 +2154,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2239,6 +2272,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2360,6 +2404,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2483,6 +2538,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2622,6 +2688,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2729,6 +2806,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2862,6 +2950,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3026,6 +3125,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3134,6 +3244,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3245,6 +3366,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3425,6 +3557,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3563,6 +3706,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3698,6 +3852,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3805,6 +3970,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3926,6 +4102,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4049,6 +4236,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4188,6 +4386,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4295,6 +4504,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4428,6 +4648,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4592,6 +4823,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4700,6 +4942,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -4811,6 +5064,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 7fa7f5a17..85dbaac43 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -762,6 +762,10 @@ export interface AmazonBedrockLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional headers to use with the model. @@ -842,6 +846,10 @@ export interface AnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface AzureLanguageModel { /** @@ -897,6 +905,10 @@ export interface AzureLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface DeepSeekLanguageModel { /** @@ -936,6 +948,10 @@ export interface DeepSeekLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -983,6 +999,10 @@ export interface GoogleGenerativeAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -1030,6 +1050,10 @@ export interface GoogleVertexAnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexLanguageModel { /** @@ -1085,6 +1109,10 @@ export interface GoogleVertexLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface MistralLanguageModel { /** @@ -1124,6 +1152,10 @@ export interface MistralLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAILanguageModel { /** @@ -1171,6 +1203,10 @@ export interface OpenAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAICompatibleLanguageModel { /** @@ -1215,6 +1251,10 @@ export interface OpenAICompatibleLanguageModel { * Optional temperature setting to use with the model. */ temperature?: number; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional query parameters to include in the request url. @@ -1279,6 +1319,10 @@ export interface OpenRouterLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface XaiLanguageModel { /** @@ -1318,6 +1362,10 @@ export interface XaiLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GitHubAppConfig { /** diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index ab418ce79..85c2bf8a8 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -173,6 +173,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -311,6 +322,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -446,6 +468,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -553,6 +586,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -674,6 +718,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -797,6 +852,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -936,6 +1002,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1043,6 +1120,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1176,6 +1264,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1340,6 +1439,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1448,6 +1558,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1559,6 +1680,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1739,6 +1871,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -1877,6 +2020,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2012,6 +2166,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2119,6 +2284,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2240,6 +2416,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2363,6 +2550,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2502,6 +2700,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2609,6 +2818,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2742,6 +2962,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -2906,6 +3137,17 @@ const schema = { "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3014,6 +3256,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -3125,6 +3378,17 @@ const schema = { } }, "additionalProperties": false + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index 5c3b25668..df4569ee8 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -88,6 +88,10 @@ export interface AmazonBedrockLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional headers to use with the model. @@ -168,6 +172,10 @@ export interface AnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface AzureLanguageModel { /** @@ -223,6 +231,10 @@ export interface AzureLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface DeepSeekLanguageModel { /** @@ -262,6 +274,10 @@ export interface DeepSeekLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -309,6 +325,10 @@ export interface GoogleGenerativeAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -356,6 +376,10 @@ export interface GoogleVertexAnthropicLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface GoogleVertexLanguageModel { /** @@ -411,6 +435,10 @@ export interface GoogleVertexLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface MistralLanguageModel { /** @@ -450,6 +478,10 @@ export interface MistralLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAILanguageModel { /** @@ -497,6 +529,10 @@ export interface OpenAILanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface OpenAICompatibleLanguageModel { /** @@ -541,6 +577,10 @@ export interface OpenAICompatibleLanguageModel { * Optional temperature setting to use with the model. */ temperature?: number; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } /** * Optional query parameters to include in the request url. @@ -605,6 +645,10 @@ export interface OpenRouterLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } export interface XaiLanguageModel { /** @@ -644,4 +688,8 @@ export interface XaiLanguageModel { */ temperature?: number; headers?: LanguageModelHeaders; + /** + * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "pdf")[]; } diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index 4b7cfb7b0..8b2432fb5 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -4,6 +4,7 @@ import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; +import { resolveModelInputModalities } from "@/features/chat/modelCapabilities"; import { ErrorCode } from "@/lib/errorCodes"; import { ServiceError, ServiceErrorException } from "@/lib/serviceError"; import { withOptionalAuth } from "@/middleware/withAuth"; @@ -243,6 +244,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise { + const declared = config.inputModalities; + if (declared && declared.length > 0) { + return declared; + } + return ['text']; +} diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 38a737a09..615fe2b1c 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -208,10 +208,13 @@ type _AssertAllProviders = LanguageModelProvider extends typeof languageModelPro const _assertAllProviders: _AssertAllProviders = true; void _assertAllProviders; +export type InputModality = 'text' | 'image' | 'pdf'; + export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), + inputModalities: z.array(z.enum(['text', 'image', 'pdf'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), }); /** @@ -221,6 +224,7 @@ export type LanguageModelInfo = { provider: LanguageModelProvider, model: LanguageModel['model'], displayName?: LanguageModel['displayName'], + inputModalities: InputModality[], } // Additional request body data that we send along to the chat API. diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index ffc3483a4..7ec47b677 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -7,6 +7,7 @@ import { env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; +import { resolveModelInputModalities } from './modelCapabilities'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; @@ -131,5 +132,6 @@ export const getConfiguredLanguageModelsInfo = async () => { provider: model.provider, model: model.model, displayName: model.displayName, + inputModalities: resolveModelInputModalities(model), })); }; diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index 3f1d13d52..0fb96217a 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -50,6 +50,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -93,6 +104,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -160,6 +182,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -199,6 +232,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -252,6 +296,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -307,6 +362,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -378,6 +444,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -417,6 +494,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -482,6 +570,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -537,6 +636,17 @@ "temperature": { "type": "number", "description": "Optional temperature setting to use with the model." + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -577,6 +687,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -620,6 +741,17 @@ }, "headers": { "$ref": "./shared.json#/definitions/LanguageModelHeaders" + }, + "inputModalities": { + "type": "array", + "items": { + "enum": [ + "text", + "image", + "pdf" + ] + }, + "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." } }, "required": [ @@ -667,4 +799,4 @@ "$ref": "#/definitions/XaiLanguageModel" } ] -} \ No newline at end of file +} From a473b49cd8de430e00183305a563f34dba39c113 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Thu, 25 Jun 2026 20:58:48 -0700 Subject: [PATCH 02/19] docs: add CHANGELOG entry for language model inputModalities Co-authored-by: Cursor --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 689718d36..5163f833a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) +- Added optional `inputModalities` configuration for language models, exposing model input-modality capabilities (defaults to text-only). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) From 4b57d279bea951a86e806b46e555a30eebe615dc Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:08:53 -0700 Subject: [PATCH 03/19] refactor(schemas): split document types out of inputModalities inputModalities now only enumerates true perceptual channels (text | image | audio | video). Document/container formats like PDF move to a separate fail-closed `supportedDocumentTypes` field, since PDF is not a model modality but a format providers decompose into text/image internally. Co-authored-by: Cursor --- CHANGELOG.md | 2 +- docs/snippets/schemas/v3/index.schema.mdx | 288 ++++++++++++++++-- .../schemas/v3/languageModel.schema.mdx | 288 ++++++++++++++++-- packages/schemas/src/v3/index.schema.ts | 288 ++++++++++++++++-- packages/schemas/src/v3/index.type.ts | 96 ++++-- .../schemas/src/v3/languageModel.schema.ts | 288 ++++++++++++++++-- packages/schemas/src/v3/languageModel.type.ts | 96 ++++-- .../web/src/ee/features/mcp/askCodebase.ts | 3 +- .../src/features/chat/modelCapabilities.ts | 13 +- packages/web/src/features/chat/types.ts | 7 +- .../web/src/features/chat/utils.server.ts | 3 +- schemas/v3/languageModel.json | 144 ++++++++- 12 files changed, 1354 insertions(+), 162 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5163f833a..caa90e9b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353) -- Added optional `inputModalities` configuration for language models, exposing model input-modality capabilities (defaults to text-only). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) +- Added optional `inputModalities` and `supportedDocumentTypes` configuration for language models, exposing model input-modality and document capabilities (defaults to text-only, no documents). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index e0b00c540..5b099d724 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1867,10 +1867,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2016,10 +2026,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2162,10 +2182,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2280,10 +2310,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2412,10 +2452,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2546,10 +2596,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2696,10 +2756,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2814,10 +2884,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2958,10 +3038,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3133,10 +3223,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3252,10 +3352,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3374,10 +3484,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3565,10 +3685,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3714,10 +3844,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3860,10 +4000,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3978,10 +4128,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4110,10 +4270,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4244,10 +4414,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4394,10 +4574,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4512,10 +4702,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4656,10 +4856,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4831,10 +5041,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4950,10 +5170,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -5072,10 +5302,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 7c7874207..7b1e774cf 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -181,10 +181,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -330,10 +340,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -476,10 +496,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -594,10 +624,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -726,10 +766,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -860,10 +910,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1010,10 +1070,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1128,10 +1198,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1272,10 +1352,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1447,10 +1537,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1566,10 +1666,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1688,10 +1798,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1879,10 +1999,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2028,10 +2158,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2174,10 +2314,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2292,10 +2442,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2424,10 +2584,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2558,10 +2728,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2708,10 +2888,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2826,10 +3016,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2970,10 +3170,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3145,10 +3355,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3264,10 +3484,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3386,10 +3616,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 257c8ae7d..7d051544c 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1866,10 +1866,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2015,10 +2025,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2161,10 +2181,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2279,10 +2309,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2411,10 +2451,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2545,10 +2595,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2695,10 +2755,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2813,10 +2883,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2957,10 +3037,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3132,10 +3222,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3251,10 +3351,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3373,10 +3483,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3564,10 +3684,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3713,10 +3843,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3859,10 +3999,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3977,10 +4127,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4109,10 +4269,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4243,10 +4413,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4393,10 +4573,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4511,10 +4701,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4655,10 +4855,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4830,10 +5040,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -4949,10 +5169,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -5071,10 +5301,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 85dbaac43..14c8c14e2 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -763,9 +763,13 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional headers to use with the model. @@ -847,9 +851,13 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface AzureLanguageModel { /** @@ -906,9 +914,13 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface DeepSeekLanguageModel { /** @@ -949,9 +961,13 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -1000,9 +1016,13 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -1051,9 +1071,13 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexLanguageModel { /** @@ -1110,9 +1134,13 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface MistralLanguageModel { /** @@ -1153,9 +1181,13 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAILanguageModel { /** @@ -1204,9 +1236,13 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAICompatibleLanguageModel { /** @@ -1252,9 +1288,13 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional query parameters to include in the request url. @@ -1320,9 +1360,13 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface XaiLanguageModel { /** @@ -1363,9 +1407,13 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GitHubAppConfig { /** diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index 85c2bf8a8..9c9ae7b2d 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -180,10 +180,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -329,10 +339,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -475,10 +495,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -593,10 +623,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -725,10 +765,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -859,10 +909,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1009,10 +1069,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1127,10 +1197,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1271,10 +1351,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1446,10 +1536,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1565,10 +1665,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1687,10 +1797,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -1878,10 +1998,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2027,10 +2157,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2173,10 +2313,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2291,10 +2441,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2423,10 +2583,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2557,10 +2727,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2707,10 +2887,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2825,10 +3015,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -2969,10 +3169,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3144,10 +3354,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3263,10 +3483,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -3385,10 +3615,20 @@ const schema = { "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index df4569ee8..3297689b7 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -89,9 +89,13 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional headers to use with the model. @@ -173,9 +177,13 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface AzureLanguageModel { /** @@ -232,9 +240,13 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface DeepSeekLanguageModel { /** @@ -275,9 +287,13 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface GoogleGenerativeAILanguageModel { /** @@ -326,9 +342,13 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexAnthropicLanguageModel { /** @@ -377,9 +397,13 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface GoogleVertexLanguageModel { /** @@ -436,9 +460,13 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } export interface MistralLanguageModel { /** @@ -479,9 +507,13 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAILanguageModel { /** @@ -530,9 +562,13 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface OpenAICompatibleLanguageModel { /** @@ -578,9 +614,13 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + */ + supportedDocumentTypes?: "pdf"[]; } /** * Optional query parameters to include in the request url. @@ -646,9 +686,13 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } export interface XaiLanguageModel { /** @@ -689,7 +733,11 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + */ + inputModalities?: ("text" | "image" | "audio" | "video")[]; + /** + * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). */ - inputModalities?: ("text" | "image" | "pdf")[]; + supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/web/src/ee/features/mcp/askCodebase.ts b/packages/web/src/ee/features/mcp/askCodebase.ts index 8b2432fb5..7f779ffc8 100644 --- a/packages/web/src/ee/features/mcp/askCodebase.ts +++ b/packages/web/src/ee/features/mcp/askCodebase.ts @@ -4,7 +4,7 @@ import { generateChatNameFromMessage } from "@/ee/features/chat/llm.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { LanguageModelInfo, SBChatMessage, SearchScope } from "@/features/chat/types"; import { convertLLMOutputToPortableMarkdown, getAnswerPartFromAssistantMessage, getLanguageModelKey } from "@/features/chat/utils"; -import { resolveModelInputModalities } from "@/features/chat/modelCapabilities"; +import { resolveModelInputModalities, resolveModelSupportedDocumentTypes } from "@/features/chat/modelCapabilities"; import { ErrorCode } from "@/lib/errorCodes"; import { ServiceError, ServiceErrorException } from "@/lib/serviceError"; import { withOptionalAuth } from "@/middleware/withAuth"; @@ -245,6 +245,7 @@ export const askCodebase = (params: AskCodebaseParams): Promise { + const declared = config.supportedDocumentTypes; + if (declared && declared.length > 0) { + return declared; + } + return []; +} diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 615fe2b1c..e1daf0bdb 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -208,13 +208,15 @@ type _AssertAllProviders = LanguageModelProvider extends typeof languageModelPro const _assertAllProviders: _AssertAllProviders = true; void _assertAllProviders; -export type InputModality = 'text' | 'image' | 'pdf'; +export type InputModality = 'text' | 'image' | 'audio' | 'video'; +export type DocumentType = 'pdf'; export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), - inputModalities: z.array(z.enum(['text', 'image', 'pdf'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), + inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), + supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("The document/file container formats the model can ingest natively. Defaults to none."), }); /** @@ -225,6 +227,7 @@ export type LanguageModelInfo = { model: LanguageModel['model'], displayName?: LanguageModel['displayName'], inputModalities: InputModality[], + supportedDocumentTypes: DocumentType[], } // Additional request body data that we send along to the chat API. diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index 7ec47b677..0b04226d8 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -7,7 +7,7 @@ import { env, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; import { LanguageModelInfo, SBChatMessage } from './types'; -import { resolveModelInputModalities } from './modelCapabilities'; +import { resolveModelInputModalities, resolveModelSupportedDocumentTypes } from './modelCapabilities'; import { hasEntitlement } from '@/lib/entitlements'; import { ServiceError } from '@/lib/serviceError'; import { ErrorCode } from '@/lib/errorCodes'; @@ -133,5 +133,6 @@ export const getConfiguredLanguageModelsInfo = async () => { model: model.model, displayName: model.displayName, inputModalities: resolveModelInputModalities(model), + supportedDocumentTypes: resolveModelSupportedDocumentTypes(model), })); }; diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index 0fb96217a..e49707484 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -57,10 +57,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -111,10 +121,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -189,10 +209,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -239,10 +269,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -303,10 +343,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -369,10 +419,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -451,10 +511,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -501,10 +571,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -577,10 +657,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -643,10 +733,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -694,10 +794,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ @@ -748,10 +858,20 @@ "enum": [ "text", "image", + "audio", + "video" + ] + }, + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + }, + "supportedDocumentTypes": { + "type": "array", + "items": { + "enum": [ "pdf" ] }, - "description": "Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." } }, "required": [ From 0baabcba43e86432a1a69846d90a47c83b62499f Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:17:14 -0700 Subject: [PATCH 04/19] docs(schemas): clarify what counts as a document type Tighten the inputModalities / supportedDocumentTypes descriptions to remove the implication that omitting supportedDocumentTypes blocks all non-text attachments. Clarify the taxonomy: single-medium files (images, audio, video) and plain-text files (.txt, .md) are governed by inputModalities; supportedDocumentTypes only gates rich compound container formats like PDF. Co-authored-by: Cursor --- docs/snippets/schemas/v3/index.schema.mdx | 96 +++++++++---------- .../schemas/v3/languageModel.schema.mdx | 96 +++++++++---------- packages/schemas/src/v3/index.schema.ts | 96 +++++++++---------- packages/schemas/src/v3/index.type.ts | 48 +++++----- .../schemas/src/v3/languageModel.schema.ts | 96 +++++++++---------- packages/schemas/src/v3/languageModel.type.ts | 48 +++++----- packages/web/src/features/chat/types.ts | 4 +- schemas/v3/languageModel.json | 48 +++++----- 8 files changed, 266 insertions(+), 266 deletions(-) diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 5b099d724..356da2009 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -1871,7 +1871,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1880,7 +1880,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2030,7 +2030,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2039,7 +2039,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2186,7 +2186,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2195,7 +2195,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2314,7 +2314,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2323,7 +2323,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2456,7 +2456,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2465,7 +2465,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2600,7 +2600,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2609,7 +2609,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2760,7 +2760,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2769,7 +2769,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2888,7 +2888,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2897,7 +2897,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3042,7 +3042,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3051,7 +3051,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3227,7 +3227,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3236,7 +3236,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3356,7 +3356,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3365,7 +3365,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3488,7 +3488,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3497,7 +3497,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3689,7 +3689,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3698,7 +3698,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3848,7 +3848,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3857,7 +3857,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4004,7 +4004,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4013,7 +4013,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4132,7 +4132,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4141,7 +4141,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4274,7 +4274,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4283,7 +4283,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4418,7 +4418,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4427,7 +4427,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4578,7 +4578,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4587,7 +4587,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4706,7 +4706,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4715,7 +4715,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4860,7 +4860,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4869,7 +4869,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5045,7 +5045,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5054,7 +5054,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5174,7 +5174,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5183,7 +5183,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5306,7 +5306,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5315,7 +5315,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx index 7b1e774cf..5af4b3d96 100644 --- a/docs/snippets/schemas/v3/languageModel.schema.mdx +++ b/docs/snippets/schemas/v3/languageModel.schema.mdx @@ -185,7 +185,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -194,7 +194,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -344,7 +344,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -353,7 +353,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -500,7 +500,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -509,7 +509,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -628,7 +628,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -637,7 +637,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -770,7 +770,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -779,7 +779,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -914,7 +914,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -923,7 +923,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1074,7 +1074,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1083,7 +1083,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1202,7 +1202,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1211,7 +1211,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1356,7 +1356,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1365,7 +1365,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1541,7 +1541,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1550,7 +1550,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1670,7 +1670,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1679,7 +1679,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1802,7 +1802,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1811,7 +1811,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2003,7 +2003,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2012,7 +2012,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2162,7 +2162,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2171,7 +2171,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2318,7 +2318,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2327,7 +2327,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2446,7 +2446,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2455,7 +2455,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2588,7 +2588,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2597,7 +2597,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2732,7 +2732,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2741,7 +2741,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2892,7 +2892,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2901,7 +2901,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3020,7 +3020,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3029,7 +3029,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3174,7 +3174,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3183,7 +3183,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3359,7 +3359,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3368,7 +3368,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3488,7 +3488,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3497,7 +3497,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3620,7 +3620,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3629,7 +3629,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 7d051544c..123fd4a8b 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -1870,7 +1870,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1879,7 +1879,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2029,7 +2029,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2038,7 +2038,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2185,7 +2185,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2194,7 +2194,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2313,7 +2313,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2322,7 +2322,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2455,7 +2455,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2464,7 +2464,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2599,7 +2599,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2608,7 +2608,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2759,7 +2759,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2768,7 +2768,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2887,7 +2887,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2896,7 +2896,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3041,7 +3041,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3050,7 +3050,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3226,7 +3226,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3235,7 +3235,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3355,7 +3355,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3364,7 +3364,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3487,7 +3487,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3496,7 +3496,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3688,7 +3688,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3697,7 +3697,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3847,7 +3847,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3856,7 +3856,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4003,7 +4003,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4012,7 +4012,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4131,7 +4131,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4140,7 +4140,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4273,7 +4273,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4282,7 +4282,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4417,7 +4417,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4426,7 +4426,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4577,7 +4577,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4586,7 +4586,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4705,7 +4705,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4714,7 +4714,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -4859,7 +4859,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -4868,7 +4868,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5044,7 +5044,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5053,7 +5053,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5173,7 +5173,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5182,7 +5182,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -5305,7 +5305,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -5314,7 +5314,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 14c8c14e2..d6f555e8d 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -763,11 +763,11 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -851,11 +851,11 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -914,11 +914,11 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -961,11 +961,11 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1016,11 +1016,11 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1071,11 +1071,11 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1134,11 +1134,11 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1181,11 +1181,11 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1236,11 +1236,11 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1288,11 +1288,11 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1360,11 +1360,11 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -1407,11 +1407,11 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts index 9c9ae7b2d..61cc0adf3 100644 --- a/packages/schemas/src/v3/languageModel.schema.ts +++ b/packages/schemas/src/v3/languageModel.schema.ts @@ -184,7 +184,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -193,7 +193,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -343,7 +343,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -352,7 +352,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -499,7 +499,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -508,7 +508,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -627,7 +627,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -636,7 +636,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -769,7 +769,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -778,7 +778,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -913,7 +913,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -922,7 +922,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1073,7 +1073,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1082,7 +1082,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1201,7 +1201,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1210,7 +1210,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1355,7 +1355,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1364,7 +1364,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1540,7 +1540,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1549,7 +1549,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1669,7 +1669,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1678,7 +1678,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -1801,7 +1801,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -1810,7 +1810,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2002,7 +2002,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2011,7 +2011,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2161,7 +2161,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2170,7 +2170,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2317,7 +2317,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2326,7 +2326,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2445,7 +2445,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2454,7 +2454,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2587,7 +2587,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2596,7 +2596,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2731,7 +2731,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2740,7 +2740,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -2891,7 +2891,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -2900,7 +2900,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3019,7 +3019,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3028,7 +3028,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3173,7 +3173,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3182,7 +3182,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3358,7 +3358,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3367,7 +3367,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3487,7 +3487,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3496,7 +3496,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -3619,7 +3619,7 @@ const schema = { "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -3628,7 +3628,7 @@ const schema = { "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts index 3297689b7..90a53b423 100644 --- a/packages/schemas/src/v3/languageModel.type.ts +++ b/packages/schemas/src/v3/languageModel.type.ts @@ -89,11 +89,11 @@ export interface AmazonBedrockLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -177,11 +177,11 @@ export interface AnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -240,11 +240,11 @@ export interface AzureLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -287,11 +287,11 @@ export interface DeepSeekLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -342,11 +342,11 @@ export interface GoogleGenerativeAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -397,11 +397,11 @@ export interface GoogleVertexAnthropicLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -460,11 +460,11 @@ export interface GoogleVertexLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -507,11 +507,11 @@ export interface MistralLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -562,11 +562,11 @@ export interface OpenAILanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -614,11 +614,11 @@ export interface OpenAICompatibleLanguageModel { */ temperature?: number; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -686,11 +686,11 @@ export interface OpenRouterLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } @@ -733,11 +733,11 @@ export interface XaiLanguageModel { temperature?: number; headers?: LanguageModelHeaders; /** - * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed). + * Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed). */ inputModalities?: ("text" | "image" | "audio" | "video")[]; /** - * Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed). + * Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`. */ supportedDocumentTypes?: "pdf"[]; } diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index e1daf0bdb..3547c5d0a 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -215,8 +215,8 @@ export const languageModelInfoSchema = z.object({ provider: z.enum(languageModelProviders).describe("The model provider (e.g., 'anthropic', 'openai')"), model: z.string().describe("The model ID"), displayName: z.string().optional().describe("Optional display name for the model"), - inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept. Defaults to text-only."), - supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("The document/file container formats the model can ingest natively. Defaults to none."), + inputModalities: z.array(z.enum(['text', 'image', 'audio', 'video'])).default(['text']).describe("The input modalities the model can accept (images, audio, video, text). Single-medium attachments are gated by these. Defaults to text-only."), + supportedDocumentTypes: z.array(z.enum(['pdf'])).default([]).describe("Rich compound document formats (e.g. PDF) the model can ingest natively, distinct from single-medium attachments gated by inputModalities. Defaults to none."), }); /** diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json index e49707484..a952554b9 100644 --- a/schemas/v3/languageModel.json +++ b/schemas/v3/languageModel.json @@ -61,7 +61,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -70,7 +70,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -125,7 +125,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -134,7 +134,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -213,7 +213,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -222,7 +222,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -273,7 +273,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -282,7 +282,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -347,7 +347,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -356,7 +356,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -423,7 +423,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -432,7 +432,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -515,7 +515,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -524,7 +524,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -575,7 +575,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -584,7 +584,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -661,7 +661,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -670,7 +670,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -737,7 +737,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -746,7 +746,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -798,7 +798,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -807,7 +807,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ @@ -862,7 +862,7 @@ "video" ] }, - "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed)." + "description": "Optional list of input modalities (perceptual channels the model ingests natively) this model can accept. Governs single-medium attachments by their content: images require `image`, audio requires `audio`, video requires `video`, and plain-text files (.txt, .md, source code) count as `text`. Rich container documents like PDF are gated separately via `supportedDocumentTypes`. When omitted, the model is treated as text-only (fail-closed)." }, "supportedDocumentTypes": { "type": "array", @@ -871,7 +871,7 @@ "pdf" ] }, - "description": "Optional list of document/file container formats (e.g. PDF) the model can ingest natively, distinct from raw input modalities. Used to gate document attachments. When omitted, no document types are supported (fail-closed)." + "description": "Optional list of rich document formats (e.g. PDF) the model can ingest natively. A document here means a compound container format that bundles text with embedded images and layout, which the provider parses server-side — NOT plain-text files (.txt, .md, which count as `text`) and NOT single-medium files (images/audio/video, which are governed by `inputModalities`). When omitted, no document formats are accepted (fail-closed); this does not restrict `text` or the modalities declared in `inputModalities`." } }, "required": [ From 5e4045b0ef25d95ac740961d0368b5250e820daf Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:25:57 -0700 Subject: [PATCH 05/19] fix(web): widen getLanguageModelKey param to keyable subset LanguageModelInfo now has required inputModalities/supportedDocumentTypes, so a raw LanguageModel config (where those are optional) is no longer assignable to it. getLanguageModelKey only reads provider/model/displayName, so type its parameter as that Pick subset, letting both LanguageModel and LanguageModelInfo be keyed. Fixes the docker build type check. Co-authored-by: Cursor --- packages/web/src/features/chat/utils.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index c7f409ac7..b103ada7c 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -422,9 +422,11 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isTurn } /** - * Generates a unique key given a LanguageModelInfo object. + * Generates a unique key for a language model. Accepts any object carrying the + * identifying fields, so both the full `LanguageModel` config and the + * client-safe `LanguageModelInfo` can be keyed with it. */ -export const getLanguageModelKey = (model: LanguageModelInfo) => { +export const getLanguageModelKey = (model: Pick) => { return `${model.provider}-${model.model}-${model.displayName}`; } From 507d7586cb2f10f8ae166629cb36af19872d4e6d Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 10:45:05 -0700 Subject: [PATCH 06/19] chore(schemas,web): keep schema dist fresh and resolve types from source Two dev-experience fixes for the stale-build-output footgun: - schemas watch now runs `yarn build` (generate + tsc) instead of generate-only, so editing a schema JSON during `yarn dev` refreshes dist (both the .d.ts types and the runtime index.schema.js used by ajv), not just the generated source. - web tsconfig maps @sourcebot/schemas/v3|v2/* to the package source, so type-checking and the IDE read committed source directly instead of stale built .d.ts. Web only imports .type files (erased at compile), so there is no bundling/runtime impact. Co-authored-by: Cursor --- packages/schemas/package.json | 2 +- packages/web/tsconfig.json | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/schemas/package.json b/packages/schemas/package.json index 13fe2cb7a..3719a6da5 100644 --- a/packages/schemas/package.json +++ b/packages/schemas/package.json @@ -5,7 +5,7 @@ "scripts": { "build": "yarn generate && tsc", "generate": "tsx tools/generate.ts", - "watch": "nodemon --watch ../../schemas -e json -x 'yarn generate'", + "watch": "nodemon --watch ../../schemas -e json -x 'yarn build'", "postinstall": "yarn build" }, "devDependencies": { diff --git a/packages/web/tsconfig.json b/packages/web/tsconfig.json index f18162100..3f0e7534b 100644 --- a/packages/web/tsconfig.json +++ b/packages/web/tsconfig.json @@ -27,6 +27,12 @@ ], "@/public/*": [ "./public/*" + ], + "@sourcebot/schemas/v3/*": [ + "../schemas/src/v3/*" + ], + "@sourcebot/schemas/v2/*": [ + "../schemas/src/v2/*" ] }, "target": "ES2017" From a1aeb372d527ae42ffa1089f297244147044fd17 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Fri, 26 Jun 2026 16:18:10 -0700 Subject: [PATCH 07/19] First pass file attachments, picker and drag and drop, with preview cards, wired into user message via xml-like tags similar to system context --- packages/web/package.json | 1 + .../[owner]/[repo]/components/landingPage.tsx | 19 +- .../src/app/(app)/chat/chatLandingPage.tsx | 5 +- .../chat/components/chatLandingDropzone.tsx | 44 +++++ .../chat/components/landingPageChatBox.tsx | 9 +- packages/web/src/ee/features/chat/agent.ts | 13 +- .../chat/components/chatThread/chatThread.tsx | 17 +- .../chatThread/chatThreadListItem.tsx | 44 +++-- .../components/chatThread/detailsCard.tsx | 1 + .../chatThread/messageAttachments.tsx | 47 +++++ .../web/src/features/chat/attachmentUtils.ts | 164 ++++++++++++++++++ .../components/chatBox/attachmentButton.tsx | 54 ++++++ .../components/chatBox/attachmentTray.tsx | 61 +++++++ .../chatBox/attachmentViewerDialog.tsx | 29 ++++ .../chat/components/chatBox/chatBox.tsx | 83 +++++++-- .../components/chatBox/chatPaneDropzone.tsx | 96 ++++++++++ .../features/chat/components/chatBox/index.ts | 3 +- packages/web/src/features/chat/constants.ts | 29 ++++ packages/web/src/features/chat/types.ts | 20 +++ .../features/chat/useCreateNewChatThread.ts | 6 +- packages/web/src/features/chat/utils.ts | 43 ++++- yarn.lock | 32 +++- 22 files changed, 767 insertions(+), 53 deletions(-) create mode 100644 packages/web/src/app/(app)/chat/components/chatLandingDropzone.tsx create mode 100644 packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx create mode 100644 packages/web/src/features/chat/attachmentUtils.ts create mode 100644 packages/web/src/features/chat/components/chatBox/attachmentButton.tsx create mode 100644 packages/web/src/features/chat/components/chatBox/attachmentTray.tsx create mode 100644 packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx create mode 100644 packages/web/src/features/chat/components/chatBox/chatPaneDropzone.tsx diff --git a/packages/web/package.json b/packages/web/package.json index 82543adbd..928802411 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -175,6 +175,7 @@ "react-day-picker": "^9.14.0", "react-device-detect": "^2.2.3", "react-dom": "19.2.4", + "react-dropzone": "^15.0.0", "react-hook-form": "^7.53.0", "react-hotkeys-hook": "^4.5.1", "react-icons": "^5.6.0", diff --git a/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx b/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx index f18ea5c74..afc7af4c3 100644 --- a/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx +++ b/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx @@ -3,14 +3,15 @@ import Image from 'next/image'; import { SearchModeSelector } from "@/app/(app)/components/searchModeSelector"; import { Separator } from "@/components/ui/separator"; -import { ChatBox } from "@/features/chat/components/chatBox"; +import { ChatBox, ChatBoxHandle } from "@/features/chat/components/chatBox"; import { ChatBoxToolbar } from "@/features/chat/components/chatBox/chatBoxToolbar"; +import { ChatPaneDropzone } from "@/features/chat/components/chatBox/chatPaneDropzone"; import { NotConfiguredErrorBanner } from "@/features/chat/components/notConfiguredErrorBanner"; import { LanguageModelInfo, RepoSearchScope } from "@/features/chat/types"; import { useCreateNewChatThread } from "@/features/chat/useCreateNewChatThread"; import { DISABLED_MCP_SERVER_IDS_LOCAL_STORAGE_KEY } from "@/features/chat/constants"; import { getRepoImageSrc } from '@/lib/utils'; -import { useMemo, useState } from "react"; +import { useMemo, useRef, useState } from "react"; import { useLocalStorage } from "usehooks-ts"; interface LandingPageProps { @@ -33,6 +34,7 @@ export const LandingPage = ({ const { createNewChatThread, isLoading } = useCreateNewChatThread(); const [isContextSelectorOpen, setIsContextSelectorOpen] = useState(false); const [disabledMcpServerIds, setDisabledMcpServerIds] = useLocalStorage(DISABLED_MCP_SERVER_IDS_LOCAL_STORAGE_KEY, [], { initializeWithValue: false }); + const chatBoxRef = useRef(null); const isChatBoxDisabled = languageModels.length === 0; const selectedSearchScopes = useMemo(() => [ @@ -67,11 +69,16 @@ export const LandingPage = ({ {/* ChatBox */} -
+ chatBoxRef.current?.addFiles(files)} + disabled={isChatBoxDisabled} + >
{ - createNewChatThread(children, selectedSearchScopes, disabledMcpServerIds); + ref={chatBoxRef} + onSubmit={(children, _editor, attachments) => { + createNewChatThread(children, selectedSearchScopes, disabledMcpServerIds, attachments); }} className="min-h-[50px]" isRedirecting={isLoading} @@ -107,7 +114,7 @@ export const LandingPage = ({ {isChatBoxDisabled && ( )} -
+
) diff --git a/packages/web/src/app/(app)/chat/chatLandingPage.tsx b/packages/web/src/app/(app)/chat/chatLandingPage.tsx index 5bd84a3d0..f81e27247 100644 --- a/packages/web/src/app/(app)/chat/chatLandingPage.tsx +++ b/packages/web/src/app/(app)/chat/chatLandingPage.tsx @@ -5,6 +5,7 @@ import { CustomSlateEditor } from "@/features/chat/customSlateEditor"; import { ServiceErrorException } from "@/lib/serviceError"; import { isServiceError, measure } from "@/lib/utils"; import { LandingPageChatBox } from "./components/landingPageChatBox"; +import { ChatLandingDropzone } from "./components/chatLandingDropzone"; import { RepositoryCarousel } from "../components/repositoryCarousel"; import { Separator } from "@/components/ui/separator"; import { DemoCards } from "./components/demoCards"; @@ -56,7 +57,7 @@ export async function ChatLandingPage() { })() : undefined; return ( -
+
)}
-
+
) } diff --git a/packages/web/src/app/(app)/chat/components/chatLandingDropzone.tsx b/packages/web/src/app/(app)/chat/components/chatLandingDropzone.tsx new file mode 100644 index 000000000..821b3a5ba --- /dev/null +++ b/packages/web/src/app/(app)/chat/components/chatLandingDropzone.tsx @@ -0,0 +1,44 @@ +'use client'; + +import { ChatBoxHandle } from "@/features/chat/components/chatBox"; +import { ChatPaneDropzone } from "@/features/chat/components/chatBox/chatPaneDropzone"; +import { createContext, ReactNode, useCallback, useContext, useRef } from "react"; + +type RegisterChatBoxHandle = (handle: ChatBoxHandle | null) => void; + +const LandingChatBoxContext = createContext(null); + +// Lets the (nested) landing chat box register its imperative handle so the +// pane-level drop zone can forward dropped files into it. Returns a no-op when +// rendered outside the provider. +export const useRegisterLandingChatBox = (): RegisterChatBoxHandle => { + return useContext(LandingChatBoxContext) ?? (() => { }); +} + +interface ChatLandingDropzoneProps { + disabled?: boolean; + children: ReactNode; +} + +// Wraps the entire unstarted-chat landing pane in a drag-and-drop target. +// The chat box lives deeper in the tree (and behind a server/client boundary), +// so it registers its handle via context rather than a direct ref. +export const ChatLandingDropzone = ({ disabled, children }: ChatLandingDropzoneProps) => { + const handleRef = useRef(null); + + const register = useCallback((handle) => { + handleRef.current = handle; + }, []); + + return ( + + handleRef.current?.addFiles(files)} + disabled={disabled} + > + {children} + + + ) +} diff --git a/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx b/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx index ed749450f..61f78bf4d 100644 --- a/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx +++ b/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx @@ -7,6 +7,7 @@ import { LanguageModelInfo, SearchScope } from "@/features/chat/types"; import { useCreateNewChatThread } from "@/features/chat/useCreateNewChatThread"; import { RepositoryQuery, SearchContextQuery } from "@/lib/types"; import { useState } from "react"; +import { useRegisterLandingChatBox } from "./chatLandingDropzone"; import { useLocalStorage } from "usehooks-ts"; import { DISABLED_MCP_SERVER_IDS_LOCAL_STORAGE_KEY, SELECTED_SEARCH_SCOPES_LOCAL_STORAGE_KEY } from "@/features/chat/constants"; import { SearchModeSelector } from "../../components/searchModeSelector"; @@ -31,14 +32,16 @@ export const LandingPageChatBox = ({ const [selectedSearchScopes, setSelectedSearchScopes] = useLocalStorage(SELECTED_SEARCH_SCOPES_LOCAL_STORAGE_KEY, [], { initializeWithValue: false }); const [disabledMcpServerIds, setDisabledMcpServerIds] = useLocalStorage(DISABLED_MCP_SERVER_IDS_LOCAL_STORAGE_KEY, [], { initializeWithValue: false }); const [isContextSelectorOpen, setIsContextSelectorOpen] = useState(false); + const registerChatBox = useRegisterLandingChatBox(); const isChatBoxDisabled = languageModels.length === 0; return (
{ - createNewChatThread(children, selectedSearchScopes, disabledMcpServerIds); + ref={registerChatBox} + onSubmit={(children, _editor, attachments) => { + createNewChatThread(children, selectedSearchScopes, disabledMcpServerIds, attachments); }} className="min-h-[50px]" isRedirecting={isLoading} @@ -74,6 +77,6 @@ export const LandingPageChatBox = ({ {isChatBoxDisabled && ( )} -
+
) } diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts index 3a300a08b..abbafc9a9 100644 --- a/packages/web/src/ee/features/chat/agent.ts +++ b/packages/web/src/ee/features/chat/agent.ts @@ -22,7 +22,8 @@ import { randomUUID } from "crypto"; import _dedent from "dedent"; import { ANSWER_TAG, FILE_REFERENCE_PREFIX } from "@/features/chat/constants"; import { Source } from "@/features/chat/types"; -import { addLineNumbers, fileReferenceToString, getAnswerPartFromAssistantMessage, getTurnProgressState, getUserMessageText } from "@/features/chat/utils"; +import { addLineNumbers, fileReferenceToString, formatAttachmentsForPrompt, getAnswerPartFromAssistantMessage, getTurnProgressState, getUserMessageAttachments, getUserMessageText } from "@/features/chat/utils"; +import { ATTACHMENT_MAX_TEXT_BYTES } from "@/features/chat/constants"; import { createTools } from "./tools"; import { getConnectedMcpClients } from "@/ee/features/chat/mcp/mcpClientFactory"; import { getMcpTools, McpToolsResult } from "@/ee/features/chat/mcp/mcpToolSets"; @@ -105,9 +106,17 @@ export const createMessageStream = async ({ let messageHistory: ModelMessage[] = messages.map((message, index): ModelMessage | undefined => { if (message.role === 'user') { + // Fold any inline-text attachments into this turn's content (not + // the system prompt) so they stay bound to the turn they were + // attached to and are re-emitted per turn from the persisted parts. + const text = getUserMessageText(message); + const attachmentsBlock = formatAttachmentsForPrompt( + getUserMessageAttachments(message), + ATTACHMENT_MAX_TEXT_BYTES, + ); return { role: 'user', - content: getUserMessageText(message), + content: attachmentsBlock ? `${text}\n\n${attachmentsBlock}` : text, }; } diff --git a/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx b/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx index 87faf79f8..a7dfa82af 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx @@ -4,7 +4,7 @@ import { useToast } from '@/components/hooks/use-toast'; import { Button } from '@/components/ui/button'; import { Separator } from '@/components/ui/separator'; import { CustomSlateEditor } from '@/features/chat/customSlateEditor'; -import { AdditionalChatRequestParams, CustomEditor, LanguageModelInfo, SBChatMessage, SearchScope, Source } from '@/features/chat/types'; +import { AdditionalChatRequestParams, AttachmentData, CustomEditor, LanguageModelInfo, SBChatMessage, SearchScope, Source } from '@/features/chat/types'; import { createUIMessage, getAllMentionElements, getTurnProgressState, getUserMessageText, resetEditor, slateContentToString } from '@/features/chat/utils'; import { useChat } from '@ai-sdk/react'; import { CreateUIMessage, DefaultChatTransport, lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai'; @@ -15,8 +15,9 @@ import { useStickToBottom } from 'use-stick-to-bottom'; import { Descendant } from 'slate'; import { useMessagePairs } from '../../useMessagePairs'; import { useSelectedLanguageModel } from '@/features/chat/useSelectedLanguageModel'; -import { ChatBox } from '@/features/chat/components/chatBox'; +import { ChatBox, ChatBoxHandle } from '@/features/chat/components/chatBox'; import { ChatBoxToolbar } from '@/features/chat/components/chatBox/chatBoxToolbar'; +import { ChatPaneDropzone } from '@/features/chat/components/chatBox/chatPaneDropzone'; import { ChatThreadListItem } from './chatThreadListItem'; import { ErrorBanner } from './errorBanner'; import { McpFailedServersBanner } from './mcpFailedServersBanner'; @@ -72,6 +73,7 @@ export const ChatThread = ({ }: ChatThreadProps) => { const [isErrorBannerVisible, setIsErrorBannerVisible] = useState(false); const hasSubmittedInputMessage = useRef(false); + const chatBoxRef = useRef(null); const { scrollRef, contentRef, scrollToBottom, isAtBottom } = useStickToBottom({ initial: false }); const { toast } = useToast(); const router = useRouter(); @@ -347,11 +349,11 @@ export const ChatThread = ({ } }, [error]); - const onSubmit = useCallback(async (children: Descendant[], editor: CustomEditor) => { + const onSubmit = useCallback(async (children: Descendant[], editor: CustomEditor, attachments: AttachmentData[]) => { const text = slateContentToString(children); const mentions = getAllMentionElements(children); - const message = createUIMessage(text, mentions.map(({ data }) => data), selectedSearchScopes, disabledMcpServerIds); + const message = createUIMessage(text, mentions.map(({ data }) => data), selectedSearchScopes, disabledMcpServerIds, attachments); sendMessage(message); scrollToBottom(); @@ -381,6 +383,11 @@ export const ChatThread = ({ return ( + chatBoxRef.current?.addFiles(files)} + disabled={!isOwner || languageModels.length === 0} + > {error && ( )}
+ ); diff --git a/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx b/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx index 6b79de0e6..b296dde53 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/chatThreadListItem.tsx @@ -8,8 +8,9 @@ import { CSSProperties, forwardRef, memo, useCallback, useEffect, useMemo, useRe import scrollIntoView from 'scroll-into-view-if-needed'; import { Reference, referenceSchema, SBChatMessage, Source } from "@/features/chat/types"; import { useExtractReferences } from '../../useExtractReferences'; -import { getAnswerPartFromAssistantMessage, getLastStepParts, getUserMessageText, groupMessageIntoSteps, isSBChatToolPart, repairReferences, tryResolveFileReference } from '@/features/chat/utils'; +import { getAnswerPartFromAssistantMessage, getLastStepParts, getUserMessageAttachments, getUserMessageText, groupMessageIntoSteps, isSBChatToolPart, repairReferences, tryResolveFileReference } from '@/features/chat/utils'; import { AnswerCard } from './answerCard'; +import { MessageAttachments } from './messageAttachments'; import { DetailsCard } from './detailsCard'; import { ApprovalRequestedToolPart, ToolApprovalBanner } from './toolApprovalBanner'; import { MarkdownRenderer, REFERENCE_PAYLOAD_ATTRIBUTE } from './markdownRenderer'; @@ -52,6 +53,10 @@ const ChatThreadListItemComponent = forwardRef { + return getUserMessageAttachments(userMessage); + }, [userMessage]); + // Take the assistant message and repair any references that are not properly formatted. // This applies to parts that are text (i.e., text & reasoning). const assistantMessage = useMemo(() => { @@ -370,27 +375,30 @@ const ChatThreadListItemComponent = forwardRef -
- {isTurnInProgress ? ( - - ) : ( - +
+ {userAttachments.length > 0 && ( + )} - + +
+ {isTurnInProgress ? ( + + ) : ( + + )} + +
{isThinking && ( -
- -
- - - -
+
+ + +
)} diff --git a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx index 7b6c7867f..c705b6be5 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/detailsCard.tsx @@ -537,6 +537,7 @@ export const StepPartRenderer = ({ part, toolTokenUsageMap }: { part: SBChatMess case 'data-source': case 'data-mcp-server': case 'data-mcp-failed-server': + case 'data-attachment': case 'file': case 'source-document': case 'source-url': diff --git a/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx b/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx new file mode 100644 index 000000000..7d2b5040e --- /dev/null +++ b/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx @@ -0,0 +1,47 @@ +'use client'; + +import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; +import { AttachmentViewerDialog } from "@/features/chat/components/chatBox/attachmentViewerDialog"; +import { AttachmentData } from "@/features/chat/types"; +import { cn } from "@/lib/utils"; +import { useState } from "react"; + +interface MessageAttachmentsProps { + attachments: AttachmentData[]; + className?: string; +} + +export const MessageAttachments = ({ attachments, className }: MessageAttachmentsProps) => { + const [activeAttachment, setActiveAttachment] = useState(null); + + if (attachments.length === 0) { + return null; + } + + return ( + <> +
+ {attachments.map((attachment, index) => ( + + ))} +
+ !open && setActiveAttachment(null)} + filename={activeAttachment?.filename} + text={activeAttachment?.kind === 'text' ? activeAttachment.text : undefined} + /> + + ) +} diff --git a/packages/web/src/features/chat/attachmentUtils.ts b/packages/web/src/features/chat/attachmentUtils.ts new file mode 100644 index 000000000..75828cb32 --- /dev/null +++ b/packages/web/src/features/chat/attachmentUtils.ts @@ -0,0 +1,164 @@ +'use client'; + +import { + ATTACHMENT_ALLOWED_TEXT_EXTENSIONS, + ATTACHMENT_ALLOWED_TEXT_MIME_TYPES, + ATTACHMENT_MAX_COUNT, + ATTACHMENT_MAX_FILENAME_LENGTH, + ATTACHMENT_MAX_TEXT_BYTES, +} from "./constants"; +import { AttachmentData, TextAttachment } from "./types"; + +// Normalizes an untrusted filename: keeps only the basename, drops control +// characters (which could break the prompt's `` tag +// or the UI), collapses whitespace, and caps the length while preserving the +// extension. Long/abusive names are truncated rather than rejected. +export const sanitizeFilename = (name: string): string => { + const basename = name.split(/[\\/]/).pop() ?? name; + const cleaned = Array.from(basename) + .filter((char) => { + const code = char.charCodeAt(0); + return code >= 32 && code !== 127; + }) + .join('') + .replace(/\s+/g, ' ') + .trim() || 'attachment'; + + if (cleaned.length <= ATTACHMENT_MAX_FILENAME_LENGTH) { + return cleaned; + } + + const dotIndex = cleaned.lastIndexOf('.'); + const extension = dotIndex > 0 ? cleaned.slice(dotIndex) : ''; + const stem = dotIndex > 0 ? cleaned.slice(0, dotIndex) : cleaned; + const keep = Math.max(1, ATTACHMENT_MAX_FILENAME_LENGTH - extension.length - 1); + return `${stem.slice(0, keep)}…${extension}`; +} + +// A text attachment selected in the chat box but not yet submitted. The `id` +// is a client-only key for list rendering and removal; it is stripped before +// the attachment becomes part of a message. +export type PendingAttachment = TextAttachment & { id: string }; + +// Builds the comma-separated `accept` attribute for a native `` +// so the OS picker only surfaces supported text file types. +export const getAttachmentAcceptAttribute = (): string => { + return [ + 'text/*', + ...ATTACHMENT_ALLOWED_TEXT_MIME_TYPES, + ...ATTACHMENT_ALLOWED_TEXT_EXTENSIONS.map((extension) => `.${extension}`), + ].join(','); +} + +// Builds the `accept` map for react-dropzone (and the native file picker) so +// the OS dialog and drag overlay only surface supported text file types. The +// extension list is attached to `text/plain` so code files that report an empty +// or unusual MIME type are still selectable by extension. +export const getAttachmentDropzoneAccept = (): Record => { + const accept: Record = { + 'text/*': [], + 'text/plain': ATTACHMENT_ALLOWED_TEXT_EXTENSIONS.map((extension) => `.${extension}`), + }; + for (const mimeType of ATTACHMENT_ALLOWED_TEXT_MIME_TYPES) { + accept[mimeType] = []; + } + return accept; +} + +export const toAttachmentData = (attachment: PendingAttachment): AttachmentData => { + return { + kind: attachment.kind, + filename: attachment.filename, + mediaType: attachment.mediaType, + sizeBytes: attachment.sizeBytes, + text: attachment.text, + }; +} + +const getExtension = (filename: string): string => { + const parts = filename.toLowerCase().split('.'); + return parts.length > 1 ? (parts[parts.length - 1] ?? '') : ''; +} + +export const isAllowedTextFile = (file: File): boolean => { + if (file.type.startsWith('text/')) { + return true; + } + if (ATTACHMENT_ALLOWED_TEXT_MIME_TYPES.includes(file.type)) { + return true; + } + + const extension = getExtension(file.name); + if (ATTACHMENT_ALLOWED_TEXT_EXTENSIONS.includes(extension)) { + return true; + } + + // Files with no extension (e.g. "Dockerfile") report an empty extension; + // fall back to matching the whole lowercased filename. + const nameLower = file.name.toLowerCase(); + if (ATTACHMENT_ALLOWED_TEXT_EXTENSIONS.includes(nameLower)) { + return true; + } + + return false; +} + +const readAsText = (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onerror = () => reject(reader.error ?? new Error('Failed to read file')); + reader.onload = () => resolve(typeof reader.result === 'string' ? reader.result : ''); + reader.readAsText(file); + }); +} + +export type ReadFilesResult = { + attachments: PendingAttachment[]; + errors: string[]; +}; + +// Reads and validates a set of files into pending text attachments, enforcing +// the per-message count, per-file size, and allowed-type caps. Rejected files +// produce a human-readable error message instead of throwing. +export const readFilesAsAttachments = async ( + files: File[], + existingCount: number, +): Promise => { + const attachments: PendingAttachment[] = []; + const errors: string[] = []; + let count = existingCount; + + for (const file of files) { + if (count >= ATTACHMENT_MAX_COUNT) { + errors.push(`You can attach at most ${ATTACHMENT_MAX_COUNT} files per message.`); + break; + } + + if (!isAllowedTextFile(file)) { + errors.push(`${file.name}: unsupported file type (text files only).`); + continue; + } + + if (file.size > ATTACHMENT_MAX_TEXT_BYTES) { + errors.push(`${file.name}: exceeds the ${Math.round(ATTACHMENT_MAX_TEXT_BYTES / 1024)}KB limit.`); + continue; + } + + try { + const text = await readAsText(file); + attachments.push({ + id: crypto.randomUUID(), + kind: 'text', + filename: sanitizeFilename(file.name), + mediaType: file.type || 'text/plain', + sizeBytes: file.size, + text, + }); + count++; + } catch { + errors.push(`${file.name}: failed to read file.`); + } + } + + return { attachments, errors }; +} diff --git a/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx b/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx new file mode 100644 index 000000000..fef235c06 --- /dev/null +++ b/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx @@ -0,0 +1,54 @@ +'use client'; + +import { Button } from "@/components/ui/button"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; +import { getAttachmentAcceptAttribute } from "@/features/chat/attachmentUtils"; +import { Paperclip } from "lucide-react"; +import { useRef } from "react"; + +interface AttachmentButtonProps { + onAddFiles: (files: File[]) => void; + disabled?: boolean; +} + +export const AttachmentButton = ({ onAddFiles, disabled }: AttachmentButtonProps) => { + const inputRef = useRef(null); + + return ( + <> + { + const files = e.target.files ? Array.from(e.target.files) : []; + if (files.length > 0) { + onAddFiles(files); + } + // Reset so selecting the same file again re-triggers onChange. + e.target.value = ''; + }} + /> + + + + + + Attach text files + + + + ) +} diff --git a/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx b/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx new file mode 100644 index 000000000..090320e92 --- /dev/null +++ b/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx @@ -0,0 +1,61 @@ +'use client'; + +import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; +import { cn } from "@/lib/utils"; +import { X } from "lucide-react"; +import { useState } from "react"; +import { PendingAttachment } from "../../attachmentUtils"; +import { AttachmentViewerDialog } from "./attachmentViewerDialog"; + +interface AttachmentTrayProps { + attachments: PendingAttachment[]; + onRemove: (id: string) => void; + className?: string; +} + +export const AttachmentTray = ({ attachments, onRemove, className }: AttachmentTrayProps) => { + const [activeAttachment, setActiveAttachment] = useState(null); + + if (attachments.length === 0) { + return null; + } + + return ( + <> +
+ {attachments.map((attachment) => ( +
+ + +
+ ))} +
+ !open && setActiveAttachment(null)} + filename={activeAttachment?.filename} + text={activeAttachment?.text} + /> + + ) +} diff --git a/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx b/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx new file mode 100644 index 000000000..588a142df --- /dev/null +++ b/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx @@ -0,0 +1,29 @@ +'use client'; + +import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; + +interface AttachmentViewerDialogProps { + filename?: string; + text?: string; + open: boolean; + onOpenChange: (open: boolean) => void; +} + +// Shared viewer for inspecting an inline-text attachment's contents. Used for +// both staged (not-yet-sent) and sent attachments. +export const AttachmentViewerDialog = ({ filename, text, open, onOpenChange }: AttachmentViewerDialogProps) => { + return ( + + + + + {filename} + + +
+                    {text}
+                
+
+
+ ) +} diff --git a/packages/web/src/features/chat/components/chatBox/chatBox.tsx b/packages/web/src/features/chat/components/chatBox/chatBox.tsx index e405e8266..e8a624cb8 100644 --- a/packages/web/src/features/chat/components/chatBox/chatBox.tsx +++ b/packages/web/src/features/chat/components/chatBox/chatBox.tsx @@ -3,13 +3,16 @@ import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; import { Button } from "@/components/ui/button"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { CustomEditor, MentionElement, RenderElementPropsFor, SearchScope } from "@/features/chat/types"; +import { AttachmentData, CustomEditor, MentionElement, RenderElementPropsFor, SearchScope } from "@/features/chat/types"; import { insertMention, slateContentToString } from "@/features/chat/utils"; +import { PendingAttachment, readFilesAsAttachments, toAttachmentData } from "@/features/chat/attachmentUtils"; +import { AttachmentButton } from "./attachmentButton"; +import { AttachmentTray } from "./attachmentTray"; import { cn } from "@/lib/utils"; import { useIsMac } from "@/hooks/useIsMac"; import { computePosition, flip, offset, shift, VirtualElement } from "@floating-ui/react"; import { ArrowUp, Loader2, StopCircleIcon } from "lucide-react"; -import { Fragment, KeyboardEvent, memo, useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { forwardRef, Fragment, KeyboardEvent, memo, Ref, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react"; import { useHotkeys } from "react-hotkeys-hook"; import { Descendant, insertText } from "slate"; import { Editable, ReactEditor, RenderElementProps, RenderLeafProps, useFocused, useSelected, useSlate } from "slate-react"; @@ -28,8 +31,12 @@ import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useHasEntitlement } from "@/features/entitlements/useHasEntitlement"; import { UpsellDialog } from "@/features/billing/upsellDialog"; +export interface ChatBoxHandle { + addFiles: (files: File[]) => void; +} + interface ChatBoxProps { - onSubmit: (children: Descendant[], editor: CustomEditor) => void; + onSubmit: (children: Descendant[], editor: CustomEditor, attachments: AttachmentData[]) => void; onStop?: () => void; preferredSuggestionsBoxPlacement?: "top-start" | "bottom-start"; className?: string; @@ -56,7 +63,7 @@ const ChatBoxComponent = ({ isAuthenticated, selectedSearchScopes, searchContexts, -}: ChatBoxProps) => { +}: ChatBoxProps, ref: Ref) => { const suggestionsBoxRef = useRef(null); const [index, setIndex] = useState(0); const editor = useSlate(); @@ -85,8 +92,41 @@ const ChatBoxComponent = ({ const isAskEnabled = useHasEntitlement('ask'); const [isLoginDialogOpen, setIsLoginDialogOpen] = useState(false); const [isUpsellDialogOpen, setIsUpsellDialogOpen] = useState(false); + const [attachments, setAttachments] = useState([]); const pathname = usePathname(); + const onAddFiles = useCallback(async (files: File[]) => { + if (files.length === 0) { + return; + } + + const { attachments: added, errors } = await readFilesAsAttachments(files, attachments.length); + if (added.length > 0) { + setAttachments((prev) => [...prev, ...added]); + } + if (errors.length > 0) { + toast({ + description: `⚠️ ${errors.join(' ')}`, + variant: "destructive", + }); + } + + // Return focus to the prompt input so the user can keep typing. + ReactEditor.focus(editor); + }, [attachments.length, toast, editor]); + + const removeAttachment = useCallback((id: string) => { + setAttachments((prev) => prev.filter((attachment) => attachment.id !== id)); + }, []); + + // Allow an ancestor pane-level drop zone to forward dropped files into this + // chat box (which owns attachment state). See `ChatPaneDropzone`. + useImperativeHandle(ref, () => ({ + addFiles: (files: File[]) => { + void onAddFiles(files); + }, + }), [onAddFiles]); + // Reset the index when the suggestion mode changes. useEffect(() => { setIndex(0); @@ -123,7 +163,7 @@ const ChatBoxComponent = ({ isSubmitDisabled: false, isSubmitDisabledReason: undefined, } => { - if (slateContentToString(editor.children).trim().length === 0) { + if (slateContentToString(editor.children).trim().length === 0 && attachments.length === 0) { return { isSubmitDisabled: true, isSubmitDisabledReason: "empty", @@ -157,7 +197,7 @@ const ChatBoxComponent = ({ isSubmitDisabledReason: undefined, } - }, [editor.children, isRedirecting, isTurnInProgress, selectedLanguageModel]) + }, [editor.children, isRedirecting, isTurnInProgress, selectedLanguageModel, attachments.length]) const { requiresLogin, @@ -202,7 +242,8 @@ const ChatBoxComponent = ({ return; } - _onSubmit(editor.children, editor); + _onSubmit(editor.children, editor, attachments.map(toAttachmentData)); + setAttachments([]); }, [ isSubmitDisabled, requiresLogin, @@ -212,7 +253,8 @@ const ChatBoxComponent = ({ isSubmitDisabledReason, toast, pathname, - captureEvent + captureEvent, + attachments ]); useEffect(() => { @@ -235,7 +277,8 @@ const ChatBoxComponent = ({ } sessionStorage.removeItem(PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY); - _onSubmit(children, editor); + // Attachments are not persisted across the login/upgrade redirect. + _onSubmit(children, editor, []); } catch (error) { console.error('Failed to restore pending chat submission:', error); sessionStorage.removeItem(PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY); @@ -364,6 +407,13 @@ const ChatBoxComponent = ({
+ {attachments.length > 0 && ( + + )} { + const files = event.clipboardData?.files ? Array.from(event.clipboardData.files) : []; + if (files.length > 0) { + event.preventDefault(); + void onAddFiles(files); + } + }} /> -
+
+ {isRedirecting ? ( - + {onRemove && ( + + )}
))}
diff --git a/packages/web/src/features/chat/components/chatBox/chatBox.tsx b/packages/web/src/features/chat/components/chatBox/chatBox.tsx index 38ee39ee1..1e6bac099 100644 --- a/packages/web/src/features/chat/components/chatBox/chatBox.tsx +++ b/packages/web/src/features/chat/components/chatBox/chatBox.tsx @@ -5,7 +5,7 @@ import { Button } from "@/components/ui/button"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { AttachmentData, CustomEditor, MentionElement, RenderElementPropsFor, SearchScope } from "@/features/chat/types"; import { insertMention, slateContentToString } from "@/features/chat/utils"; -import { createPastedTextAttachment, PendingAttachment, readFilesAsAttachments, shouldAutoConvertPaste, toAttachmentData } from "@/features/chat/attachmentUtils"; +import { createPastedTextAttachment, getSubmittedTextBytes, PendingAttachment, readFilesAsAttachments, shouldAutoConvertPaste, toAttachmentData } from "@/features/chat/attachmentUtils"; import { AttachmentButton } from "./attachmentButton"; import { AttachmentTray } from "./attachmentTray"; import { cn } from "@/lib/utils"; @@ -14,7 +14,7 @@ import { computePosition, flip, offset, shift, VirtualElement } from "@floating- import { ArrowUp, Loader2, StopCircleIcon } from "lucide-react"; import { forwardRef, Fragment, KeyboardEvent, memo, Ref, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react"; import { useHotkeys } from "react-hotkeys-hook"; -import { Descendant, Editor, insertText, Transforms } from "slate"; +import { Descendant, insertText } from "slate"; import { Editable, ReactEditor, RenderElementProps, RenderLeafProps, useFocused, useSelected, useSlate } from "slate-react"; import { useSelectedLanguageModel } from "../../useSelectedLanguageModel"; import { SuggestionBox } from "./suggestionsBox"; @@ -26,7 +26,7 @@ import { SearchContextQuery } from "@/lib/types"; import isEqual from "fast-deep-equal/react"; import { LoginDialog } from "./loginDialog"; import { usePathname } from "next/navigation"; -import { PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY } from "@/features/chat/constants"; +import { ATTACHMENT_MAX_TURN_TEXT_BYTES, PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY } from "@/features/chat/constants"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useHasEntitlement } from "@/features/entitlements/useHasEntitlement"; import { UpsellDialog } from "@/features/billing/upsellDialog"; @@ -102,61 +102,60 @@ const ChatBoxComponent = ({ // this so the large-paste auto-conversion is skipped for that one paste. const rawPasteRequestedRef = useRef(false); - // Inserts text at the current selection, falling back to the end of the - // document if the editor has no selection (e.g. focus was lost after a - // toast action). - const insertTextInline = useCallback((text: string) => { - ReactEditor.focus(editor); - if (!editor.selection) { - Transforms.select(editor, Editor.end(editor, [])); + // Warning shown when prompt text + `nextAttachments` would exceed the per-turn + // budget, so an over-budget add surfaces immediately instead of just disabling submit. + const getOverBudgetWarning = useCallback((nextAttachments: PendingAttachment[]): string | null => { + const totalBytes = getSubmittedTextBytes(slateContentToString(editor.children), nextAttachments); + if (totalBytes <= ATTACHMENT_MAX_TURN_TEXT_BYTES) { + return null; } - insertText(editor, text); + return `Attachments exceed the ${Math.round(ATTACHMENT_MAX_TURN_TEXT_BYTES / 1024)}KB per-message limit. Remove a file or shorten your message to send.`; }, [editor]); const onAddPastedText = useCallback((text: string) => { - const result = createPastedTextAttachment(text, attachments); - if (!result.ok) { + const attachment = createPastedTextAttachment(text, attachments); + setAttachments((prev) => [...prev, attachment]); + + const overBudgetWarning = getOverBudgetWarning([...attachments, attachment]); + if (overBudgetWarning) { toast({ - description: `⚠️ ${result.error}`, + description: `⚠️ ${overBudgetWarning}`, variant: "destructive", }); - // Don't lose the user's paste: fall back to inserting it inline. - insertTextInline(text); - return; + } else { + toast({ + title: "Large paste added as an attachment", + duration: 5 * 1000, + className: "w-fit ml-auto", + description: `Use ${isMac ? "⌘+⇧+V" : "Ctrl+Shift+V"} to paste inline instead`, + }); } - const { attachment } = result; - setAttachments((prev) => [...prev, attachment]); - - toast({ - title: "Large paste added as an attachment", - duration: 5 * 1000, - className: "w-fit ml-auto", - description: `Use ${isMac ? "⌘+⇧+V" : "Ctrl+Shift+V"} to paste inline instead`, - }); - ReactEditor.focus(editor); - }, [attachments, editor, toast, isMac, insertTextInline]); + }, [attachments, editor, toast, isMac, getOverBudgetWarning]); const onAddFiles = useCallback(async (files: File[]) => { if (files.length === 0) { return; } - const { attachments: added, errors } = await readFilesAsAttachments(files, attachments.length); + const { attachments: added, errors } = await readFilesAsAttachments(files); if (added.length > 0) { setAttachments((prev) => [...prev, ...added]); } - if (errors.length > 0) { + + const overBudgetWarning = added.length > 0 ? getOverBudgetWarning([...attachments, ...added]) : null; + const messages = [...errors, ...(overBudgetWarning ? [overBudgetWarning] : [])]; + if (messages.length > 0) { toast({ - description: `⚠️ ${errors.join(' ')}`, + description: `⚠️ ${messages.join(' ')}`, variant: "destructive", }); } // Return focus to the prompt input so the user can keep typing. ReactEditor.focus(editor); - }, [attachments.length, toast, editor]); + }, [attachments, toast, editor, getOverBudgetWarning]); const removeAttachment = useCallback((id: string) => { setAttachments((prev) => prev.filter((attachment) => attachment.id !== id)); @@ -201,18 +200,27 @@ const ChatBoxComponent = ({ const { isSubmitDisabled, isSubmitDisabledReason } = useMemo((): { isSubmitDisabled: true, - isSubmitDisabledReason: "empty" | "redirecting" | "generating" | "no-language-model-selected" + isSubmitDisabledReason: "empty" | "too-large" | "redirecting" | "generating" | "no-language-model-selected" } | { isSubmitDisabled: false, isSubmitDisabledReason: undefined, } => { - if (slateContentToString(editor.children).trim().length === 0 && attachments.length === 0) { + const text = slateContentToString(editor.children); + if (text.trim().length === 0 && attachments.length === 0) { return { isSubmitDisabled: true, isSubmitDisabledReason: "empty", } } + // Single per-turn bound on the submitted text (prompt + attachments). + if (getSubmittedTextBytes(text, attachments) > ATTACHMENT_MAX_TURN_TEXT_BYTES) { + return { + isSubmitDisabled: true, + isSubmitDisabledReason: "too-large", + } + } + if (isRedirecting) { return { isSubmitDisabled: true, @@ -240,7 +248,7 @@ const ChatBoxComponent = ({ isSubmitDisabledReason: undefined, } - }, [editor.children, isRedirecting, isTurnInProgress, selectedLanguageModel, attachments.length]) + }, [editor.children, isRedirecting, isTurnInProgress, selectedLanguageModel, attachments]) const { requiresLogin, @@ -261,6 +269,11 @@ const ChatBoxComponent = ({ description: "⚠️ You must select a language model", variant: "destructive", }); + } else if (isSubmitDisabledReason === "too-large") { + toast({ + description: `⚠️ Message and attachments exceed the ${Math.round(ATTACHMENT_MAX_TURN_TEXT_BYTES / 1024)}KB per-message limit. Remove a file or shorten the text.`, + variant: "destructive", + }); } return; @@ -464,7 +477,7 @@ const ChatBoxComponent = ({ {(isRedirecting ? submittedAttachments : attachments).length > 0 && ( )} diff --git a/packages/web/src/features/chat/constants.ts b/packages/web/src/features/chat/constants.ts index 95b89e26e..1306bb7a5 100644 --- a/packages/web/src/features/chat/constants.ts +++ b/packages/web/src/features/chat/constants.ts @@ -13,12 +13,10 @@ export const PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY = 'pendingChatSubmissio export const DISABLED_MCP_SERVER_IDS_LOCAL_STORAGE_KEY = 'disabledMcpServerIds'; export const MCP_OAUTH_DRAFT_SESSION_STORAGE_KEY = 'mcpOAuthDraft'; -// Text attachment limits. Text is inlined into the message (and, for new -// threads, into the sessionStorage stash), so caps are kept conservative to -// bound `messages` JSON growth and stay well under the sessionStorage limit. -export const ATTACHMENT_MAX_TEXT_BYTES = 256 * 1024; // 256KB per file -export const ATTACHMENT_MAX_COUNT = 5; // per message -export const ATTACHMENT_MAX_FILENAME_LENGTH = 200; // characters +// Single upper bound on the total attachment text submitted per turn (text is +// inlined and re-emitted every turn). ~256KB ≈ 65-85K tokens: enough for a few +// files or a large log while leaving room for retrieval, history, and output. +export const ATTACHMENT_MAX_TURN_TEXT_BYTES = 256 * 1024; // 256KB per turn // A plain-text paste at or above either of these thresholds is automatically // converted into a text attachment instead of being inserted inline diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index 3f6742c19..15a4907ff 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -2,7 +2,7 @@ import { BrowseHighlightRange, getBrowsePath } from "@/app/(app)/browse/hooks/ut import { CreateUIMessage, isToolUIPart, TextUIPart, UIMessagePart } from "ai"; import type { ChatStatus, DynamicToolUIPart, ToolUIPart } from "ai"; import { Descendant, Editor, Point, Range, Transforms } from "slate"; -import { ANSWER_TAG, ATTACHMENT_MAX_FILENAME_LENGTH, FILE_REFERENCE_PREFIX, FILE_REFERENCE_REGEX } from "./constants"; +import { ANSWER_TAG, FILE_REFERENCE_PREFIX, FILE_REFERENCE_REGEX } from "./constants"; import { AttachmentData, CustomEditor, @@ -417,27 +417,29 @@ export const getUserMessageAttachments = (message: Pick) .map((part) => part.data); } -// Formats a user message's attachments into a delimited block suitable for -// inlining into that turn's content. Returns an empty string when there are no -// (text) attachments. `maxBytesPerAttachment` defensively truncates each -// attachment's text (defense-in-depth against an oversized client payload). -export const formatAttachmentsForPrompt = (attachments: AttachmentData[], maxBytesPerAttachment?: number): string => { +// Neutralizes ``/`` sequences in a body so it can't +// close its own wrapper early. Unrelated markup (e.g. `
`) is left intact. +const escapeAttachmentBody = (text: string): string => { + return text.replace(/<(\/attachments?>)/gi, '<$1'); +} + +// Formats a user message's text attachments into a delimited block to inline +// into the turn's content. Returns '' when there are none. Size is bounded at +// submit, so nothing is truncated here. +export const formatAttachmentsForPrompt = (attachments: AttachmentData[]): string => { const textAttachments = attachments.filter((attachment) => attachment.kind === 'text'); if (textAttachments.length === 0) { return ''; } const blocks = textAttachments.map((attachment) => { - const text = maxBytesPerAttachment !== undefined - ? attachment.text.slice(0, maxBytesPerAttachment) - : attachment.text; - // Defense-in-depth: keep the filename on a single line, escape quotes, - // and cap its length so a crafted client can't break the tag or bloat - // the prompt (the client also sanitizes via sanitizeFilename). + const text = escapeAttachmentBody(attachment.text); + // Keep the filename on a single line and escape quotes so the body + // can't break out of the tag (the client also sanitizes via + // sanitizeFilename). const filename = attachment.filename .replace(/\s+/g, ' ') - .replace(/"/g, '"') - .slice(0, ATTACHMENT_MAX_FILENAME_LENGTH); + .replace(/"/g, '"'); return `\n${text}\n`; }); From cb8181df5c0fbb16e429100d1a64a74e472634c2 Mon Sep 17 00:00:00 2001 From: whoisthey Date: Sat, 27 Jun 2026 15:43:55 -0700 Subject: [PATCH 19/19] pass attachments through the login/upgrade redirect --- .../features/chat/components/chatBox/chatBox.tsx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/web/src/features/chat/components/chatBox/chatBox.tsx b/packages/web/src/features/chat/components/chatBox/chatBox.tsx index 1e6bac099..1bd33235f 100644 --- a/packages/web/src/features/chat/components/chatBox/chatBox.tsx +++ b/packages/web/src/features/chat/components/chatBox/chatBox.tsx @@ -282,7 +282,7 @@ const ChatBoxComponent = ({ if (requiresLogin) { sessionStorage.setItem( PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY, - JSON.stringify({ pathname, children: editor.children }), + JSON.stringify({ pathname, children: editor.children, attachments: attachments.map(toAttachmentData) }), ); captureEvent('wa_askgh_login_wall_prompted', {}); setIsLoginDialogOpen(true); @@ -292,7 +292,7 @@ const ChatBoxComponent = ({ if (requiresUpgrade) { sessionStorage.setItem( PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY, - JSON.stringify({ pathname, children: editor.children }), + JSON.stringify({ pathname, children: editor.children, attachments: attachments.map(toAttachmentData) }), ); setIsUpsellDialogOpen(true); return; @@ -328,14 +328,17 @@ const ChatBoxComponent = ({ } try { - const { pathname: storedPathname, children } = JSON.parse(stored) as { pathname: string; children: Descendant[] }; + const { pathname: storedPathname, children, attachments: storedAttachments = [] } = JSON.parse(stored) as { + pathname: string; + children: Descendant[]; + attachments?: AttachmentData[]; + }; if (storedPathname !== pathname) { return; } sessionStorage.removeItem(PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY); - // Attachments are not persisted across the login/upgrade redirect. - _onSubmit(children, editor, []); + _onSubmit(children, editor, storedAttachments); } catch (error) { console.error('Failed to restore pending chat submission:', error); sessionStorage.removeItem(PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY);