Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions pkg/attachment/attachment.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Package attachment provides MIME-aware routing for document attachments.
//
// It defines how a chat.Document should be sent to a model: either dropped
// (unsupported), wrapped in a plain-text envelope (StrategyTXT), or encoded
// as inline base64 data (StrategyB64).
package attachment

import (
"fmt"
"strings"
"unicode"

"github.com/docker/docker-agent/pkg/attachment/modelcaps"
"github.com/docker/docker-agent/pkg/chat"
)

// Strategy describes how an attachment should be handled before sending to the
// provider.
type Strategy int

const (
// StrategyDrop means the attachment is not supported by the model or has no
// inline content, and should be silently skipped (with a log warning).
StrategyDrop Strategy = iota

// StrategyTXT means the attachment should be wrapped in a TXTEnvelope and
// sent as plain text. Used for text/* MIME types whose content is already
// in Source.InlineText.
StrategyTXT

// StrategyB64 means the attachment content (Source.InlineData) should be
// base64-encoded and sent as a native provider image/document block.
StrategyB64
)

// Decide returns the routing Strategy for a document given the current model's
// capabilities.
//
// Algorithm:
// 1. If the model does not support the document's MIME type → (Drop, reason).
// 2. If Source.InlineData is non-empty → (B64, "").
// 3. If Source.InlineText is non-empty → (TXT, "").
// 4. Otherwise → (Drop, "no inline content").
func Decide(doc chat.Document, mc modelcaps.ModelCapabilities) (Strategy, string) {
if !mc.Supports(doc.MimeType) {
return StrategyDrop, fmt.Sprintf("model does not support MIME type %q", doc.MimeType)
}
if len(doc.Source.InlineData) > 0 {
return StrategyB64, ""
}
if doc.Source.InlineText != "" {
return StrategyTXT, ""
}
return StrategyDrop, "no inline content"
}

// TXTEnvelope wraps text content in a unique XML-like tag derived from the
// document name and MIME type. The tag name is a slug of both, making
// accidental tag break-out in the content practically impossible without
// escaping the body.
//
// Example: a document named "report.md" with MIME "text/markdown" produces:
//
// <document-report-md-text-markdown>
// …body…
// </document-report-md-text-markdown>
func TXTEnvelope(name, mimeType, body string) string {
slug := slugify(name + "-" + mimeType)
tag := "document-" + slug
return fmt.Sprintf("<%s>\n%s\n</%s>", tag, body, tag)
}

// slugify converts s to a lowercase, alphanumeric-and-hyphens-only string.
// Non-alphanumeric runes are replaced with hyphens; consecutive hyphens are
// collapsed to one; leading and trailing hyphens are trimmed.
// If the result is empty, "doc" is returned as a safe fallback.
func slugify(s string) string {
var b strings.Builder
prevHyphen := false
for _, r := range strings.ToLower(s) {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
b.WriteRune(r)
prevHyphen = false
} else if !prevHyphen {
b.WriteRune('-')
prevHyphen = true
}
}
result := strings.Trim(b.String(), "-")
if result == "" {
return "doc"
}
return result
}
183 changes: 183 additions & 0 deletions pkg/attachment/decide_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
package attachment_test

import (
"strings"
"testing"

"github.com/docker/docker-agent/pkg/attachment"
"github.com/docker/docker-agent/pkg/attachment/modelcaps"
"github.com/docker/docker-agent/pkg/chat"
)

// testCaps is a small helper that builds a ModelCapabilities directly.
func visionCaps() modelcaps.ModelCapabilities {
return modelcaps.CapsWith(true, true)
}

func textOnlyCaps() modelcaps.ModelCapabilities {
return modelcaps.CapsWith(false, false)
}

func imageNoPDFCaps() modelcaps.ModelCapabilities {
return modelcaps.CapsWith(true, false)
}

func TestDecide(t *testing.T) {
tests := []struct {
name string
doc chat.Document
caps modelcaps.ModelCapabilities
wantStrategy attachment.Strategy
wantReasonHas string // non-empty: reason must contain this substring
}{
{
name: "b64 image supported",
doc: chat.Document{
Name: "photo.jpg",
MimeType: "image/jpeg",
Source: chat.DocumentSource{InlineData: []byte{0xFF, 0xD8}},
},
caps: visionCaps(),
wantStrategy: attachment.StrategyB64,
},
{
name: "txt text plain",
doc: chat.Document{
Name: "notes.txt",
MimeType: "text/plain",
Source: chat.DocumentSource{InlineText: "hello world"},
},
caps: textOnlyCaps(),
wantStrategy: attachment.StrategyTXT,
},
{
name: "drop image when model has no vision",
doc: chat.Document{
Name: "photo.jpg",
MimeType: "image/jpeg",
Source: chat.DocumentSource{InlineData: []byte{0xFF, 0xD8}},
},
caps: textOnlyCaps(),
wantStrategy: attachment.StrategyDrop,
wantReasonHas: "does not support MIME type",
},
{
name: "drop pdf when model has no pdf support",
doc: chat.Document{
Name: "doc.pdf",
MimeType: "application/pdf",
Source: chat.DocumentSource{InlineData: []byte{0x25, 0x50, 0x44, 0x46}},
},
caps: imageNoPDFCaps(),
wantStrategy: attachment.StrategyDrop,
wantReasonHas: "does not support MIME type",
},
{
name: "drop no inline content",
doc: chat.Document{
Name: "empty.md",
MimeType: "text/markdown",
Source: chat.DocumentSource{},
},
caps: textOnlyCaps(),
wantStrategy: attachment.StrategyDrop,
wantReasonHas: "no inline content",
},
{
name: "b64 pdf when pdf supported",
doc: chat.Document{
Name: "spec.pdf",
MimeType: "application/pdf",
Source: chat.DocumentSource{InlineData: []byte{0x25, 0x50, 0x44, 0x46}},
},
caps: visionCaps(),
wantStrategy: attachment.StrategyB64,
},
{
name: "drop office doc (DOCX is binary, not supported without models.dev office modality)",
doc: chat.Document{
Name: "report.docx",
MimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
Source: chat.DocumentSource{InlineData: []byte{0x50, 0x4B}}, // ZIP magic bytes
},
caps: visionCaps(), // even full caps can't send DOCX — no modality
wantStrategy: attachment.StrategyDrop,
wantReasonHas: "does not support MIME type",
},
{
name: "b64 wins over txt when both inline sources present",
doc: chat.Document{
Name: "data.txt",
MimeType: "text/plain",
Source: chat.DocumentSource{InlineData: []byte("hello"), InlineText: "hello"},
},
caps: textOnlyCaps(),
wantStrategy: attachment.StrategyB64,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
gotStrategy, gotReason := attachment.Decide(tc.doc, tc.caps)
if gotStrategy != tc.wantStrategy {
t.Errorf("strategy: got %d, want %d", gotStrategy, tc.wantStrategy)
}
if tc.wantReasonHas != "" {
if !strings.Contains(gotReason, tc.wantReasonHas) {
t.Errorf("reason %q does not contain %q", gotReason, tc.wantReasonHas)
}
}
})
}
}

func TestTXTEnvelope(t *testing.T) {
got := attachment.TXTEnvelope("readme.md", "text/markdown", "# Hello")
// Tag must start with "document-" followed by a slug of name+mimeType.
if !strings.HasPrefix(got, "<document-") {
t.Errorf("TXTEnvelope: expected tag to start with <document-, got %q", got)
}
// Body must be present.
if !strings.Contains(got, "# Hello") {
t.Errorf("TXTEnvelope: body not found in %q", got)
}
// Must be a valid open/close tag pair.
if !strings.Contains(got, "</document-") {
t.Errorf("TXTEnvelope: expected closing tag, got %q", got)
}
}

func TestTXTEnvelope_UniqueTag(t *testing.T) {
// The tag should contain slugged name and MIME type, making collisions
// between different documents practically impossible.
got1 := attachment.TXTEnvelope("report.md", "text/markdown", "body")
got2 := attachment.TXTEnvelope("notes.txt", "text/plain", "body")

if got1 == got2 {
t.Error("TXTEnvelope produced identical tags for different name+MIME combinations")
}

// Each envelope's opening tag should appear verbatim as its closing tag.
for _, tc := range []struct {
name, mime, body string
}{
{"report.md", "text/markdown", "hello"},
{"my file.txt", "text/plain", "world"},
{"data", "text/csv", "a,b,c"},
} {
out := attachment.TXTEnvelope(tc.name, tc.mime, tc.body)
// Extract opening tag.
closeIdx := strings.Index(out, ">")
if closeIdx < 0 {
t.Fatalf("no closing > in envelope: %q", out)
}
openTag := out[1:closeIdx] // e.g. "document-report-md-text-markdown"
closeTag := "</" + openTag + ">"
if !strings.HasSuffix(strings.TrimSpace(out), closeTag) {
t.Errorf("envelope missing matching close tag %q in %q", closeTag, out)
}
if !strings.Contains(out, tc.body) {
t.Errorf("body %q not found in envelope %q", tc.body, out)
}
}
}
Loading
Loading