diff --git a/pkg/chat/attach.go b/pkg/chat/attach.go new file mode 100644 index 000000000..b0f98bd4c --- /dev/null +++ b/pkg/chat/attach.go @@ -0,0 +1,261 @@ +package chat + +// Package-level attach-time processing pipeline. +// +// The attach-time pipeline runs once when a user adds a file or image to a +// message. It produces a fully-resolved [Document] whose Source.InlineData or +// Source.InlineText is populated. Provider-level convertDocument functions then +// consume the Document at inference time — they never perform I/O or resizing. +// +// producer (app.go / runner.go) +// └─ ProcessAttachment(ctx, part) → Document +// └─ session.UserMessage(…, MessagePart{Type: MessagePartTypeDocument, Document: &doc}) +// └─ per-provider convertDocument(ctx, doc, modelID) → wire format + +import ( + "context" + "encoding/base64" + "errors" + "fmt" + "os" + "path/filepath" + "strings" +) + +const ( + // MaxInlineBinarySize is the maximum byte size of a binary file (PDF, image, + // etc.) that will be read into memory for inline attachment. + MaxInlineBinarySize = 20 * 1024 * 1024 // 20 MB +) + +// ProcessAttachment converts a raw [MessagePart] into a [Document] with fully +// resolved Source.InlineData or Source.InlineText. It is called once when a +// message is assembled — never at inference time. +// +// Supported input types: +// +// - [MessagePartTypeFile]: reads the file from the local filesystem, detects +// its MIME type, and either inlines text content (text/* files) or reads +// binary bytes (images are transcoded+resized via [ResizeImage]; PDFs and +// other supported types are read verbatim). +// +// - [MessagePartTypeImageURL]: handles data: URIs (decoded inline). Remote +// http(s):// URLs are not supported; callers should download the file +// locally first and pass it as a [MessagePartTypeFile] instead. +// +// - [MessagePartTypeDocument]: if Source.InlineData or Source.InlineText is +// already set, the document is returned as-is after applying image +// transcoding to any image/* InlineData. A Document with no inline content +// is an error. +func ProcessAttachment(_ context.Context, part MessagePart) (Document, error) { + doc, _, err := ProcessAttachmentWithMetadata(part) + return doc, err +} + +// ProcessAttachmentWithMetadata is like [ProcessAttachment] but also returns +// the [ImageResizeResult] when the attachment was an image that went through +// [ResizeImage]. The metadata is nil for non-image attachments. +// +// Callers that need to emit a dimension note (for model coordinate-mapping) +// should use this variant and call [FormatDimensionNote] on the returned +// metadata. +func ProcessAttachmentWithMetadata(part MessagePart) (Document, *ImageResizeResult, error) { + switch part.Type { + case MessagePartTypeFile: + return processFilePart(part) + case MessagePartTypeImageURL: + return processImageURLPart(part) + case MessagePartTypeDocument: + return processDocumentPart(part) + default: + return Document{}, nil, fmt.Errorf("ProcessAttachment: unsupported part type %q", part.Type) + } +} + +// processFilePart handles MessagePartTypeFile: reads from disk, detects MIME, +// routes to text-inline or binary-inline as appropriate. +func processFilePart(part MessagePart) (Document, *ImageResizeResult, error) { + if part.File == nil { + return Document{}, nil, errors.New("ProcessAttachment: file part has nil File field") + } + absPath := part.File.Path + name := filepath.Base(absPath) + + fi, err := os.Stat(absPath) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: cannot stat %q: %w", absPath, err) + } + if !fi.Mode().IsRegular() { + return Document{}, nil, fmt.Errorf("ProcessAttachment: %q is not a regular file", absPath) + } + + mimeType := DetectMimeType(absPath) + + // Route by MIME type. Note: MIME-type check must precede IsTextFile because + // some binary formats (e.g. PDF) may pass the text heuristic when the file + // content happens to be printable ASCII. + switch { + case IsImageMimeType(mimeType): + if fi.Size() > MaxInlineBinarySize { + return Document{}, nil, fmt.Errorf("ProcessAttachment: image file %q too large to inline (%d bytes, max %d)", absPath, fi.Size(), MaxInlineBinarySize) + } + data, err := os.ReadFile(absPath) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: read image %q: %w", absPath, err) + } + return transcodeImageWithMeta(name, data, mimeType) + + case mimeType == "application/pdf" || (IsSupportedMimeType(mimeType) && !IsTextFile(absPath)): + // PDF and other supported binary types — read verbatim. + // The !IsTextFile guard ensures that binary formats whose extension + // is unknown but content is ASCII-printable are not incorrectly inlined. + if fi.Size() > MaxInlineBinarySize { + return Document{}, nil, fmt.Errorf("ProcessAttachment: binary file %q too large to inline (%d bytes, max %d)", absPath, fi.Size(), MaxInlineBinarySize) + } + data, err := os.ReadFile(absPath) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: read binary file %q: %w", absPath, err) + } + return Document{ + Name: name, + MimeType: mimeType, + Size: int64(len(data)), + Source: DocumentSource{InlineData: data}, + }, nil, nil + + case IsTextFile(absPath): + if fi.Size() > MaxInlineFileSize { + return Document{}, nil, fmt.Errorf("ProcessAttachment: text file %q too large to inline (%d bytes, max %d)", absPath, fi.Size(), MaxInlineFileSize) + } + content, err := ReadFileForInline(absPath) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: read text file %q: %w", absPath, err) + } + return Document{ + Name: name, + MimeType: mimeType, + Size: fi.Size(), + Source: DocumentSource{InlineText: content}, + }, nil, nil + + default: + // Unknown binary — read verbatim and let modelcaps gate it at inference time. + if fi.Size() > MaxInlineBinarySize { + return Document{}, nil, fmt.Errorf("ProcessAttachment: file %q too large to inline (%d bytes, max %d)", absPath, fi.Size(), MaxInlineBinarySize) + } + data, err := os.ReadFile(absPath) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: read file %q: %w", absPath, err) + } + return Document{ + Name: name, + MimeType: mimeType, + Size: int64(len(data)), + Source: DocumentSource{InlineData: data}, + }, nil, nil + } +} + +// processImageURLPart handles MessagePartTypeImageURL. +// Only data: URIs are supported; remote http(s):// URLs are rejected. +// Callers with a remote URL should download the file locally first and +// pass it as a MessagePartTypeFile instead. +func processImageURLPart(part MessagePart) (Document, *ImageResizeResult, error) { + if part.ImageURL == nil { + return Document{}, nil, errors.New("ProcessAttachment: image-url part has nil ImageURL field") + } + rawURL := part.ImageURL.URL + + switch { + case strings.HasPrefix(rawURL, "data:"): + mimeType, data, err := parseDataURI(rawURL) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: parse data URI: %w", err) + } + // When content detection returns an image type, prefer it over the + // declared MIME. (Only image types are trusted from the sniffer.) + if detected := DetectMimeTypeByContent(data); IsImageMimeType(detected) { + mimeType = detected + } + return transcodeImageWithMeta("image", data, mimeType) + + case strings.HasPrefix(rawURL, "http://") || strings.HasPrefix(rawURL, "https://"): + return Document{}, nil, errors.New("attachment: remote URLs are not supported; download the file locally first") + + default: + return Document{}, nil, fmt.Errorf("attachment: unsupported image URL scheme: %q", rawURL) + } +} + +// processDocumentPart handles MessagePartTypeDocument. +// Images with InlineData are transcoded; other already-resolved documents pass through. +func processDocumentPart(part MessagePart) (Document, *ImageResizeResult, error) { + if part.Document == nil { + return Document{}, nil, errors.New("ProcessAttachment: document part has nil Document field") + } + doc := *part.Document + + if len(doc.Source.InlineData) > 0 { + if IsImageMimeType(doc.MimeType) { + return transcodeImageWithMeta(doc.Name, doc.Source.InlineData, doc.MimeType) + } + return doc, nil, nil + } + + if doc.Source.InlineText != "" { + return doc, nil, nil + } + + return Document{}, nil, fmt.Errorf("ProcessAttachment: document %q has no inline content (InlineData and InlineText are both empty)", doc.Name) +} + +// transcodeImageWithMeta runs bytes through ResizeImage to normalise the image +// to JPEG or PNG within provider limits, then wraps the result in a Document. +// Returns the [ImageResizeResult] so callers can emit dimension notes. +func transcodeImageWithMeta(name string, data []byte, mimeType string) (Document, *ImageResizeResult, error) { + result, err := ResizeImage(data, mimeType) + if err != nil { + return Document{}, nil, fmt.Errorf("ProcessAttachment: transcode image %q: %w", name, err) + } + return Document{ + Name: name, + MimeType: result.MimeType, + Size: int64(len(result.Data)), + Source: DocumentSource{InlineData: result.Data}, + }, result, nil +} + +// parseDataURI parses a data URI of the form "data:;base64,". +// Returns the MIME type and decoded bytes. +func parseDataURI(uri string) (mimeType string, data []byte, err error) { + rest, ok := strings.CutPrefix(uri, "data:") + if !ok { + return "", nil, errors.New("not a data URI") + } + + header, payload, ok := strings.Cut(rest, ",") + if !ok { + return "", nil, errors.New("data URI missing comma separator") + } + + // Header is "[;charset=…];base64" or "" (plain text, unsupported here). + if !strings.HasSuffix(header, ";base64") { + return "", nil, errors.New("data URI is not base64-encoded (only base64 data URIs are supported)") + } + mimeType = strings.TrimSuffix(header, ";base64") + + // Strip any charset parameter (e.g. "image/png;charset=utf-8;base64" → "image/png"). + if idx := strings.Index(mimeType, ";"); idx >= 0 { + mimeType = mimeType[:idx] + } + mimeType = strings.TrimSpace(mimeType) + if mimeType == "" { + mimeType = "application/octet-stream" + } + + data, err = base64.StdEncoding.DecodeString(payload) + if err != nil { + return "", nil, fmt.Errorf("base64 decode: %w", err) + } + return mimeType, data, nil +} diff --git a/pkg/chat/attach_test.go b/pkg/chat/attach_test.go new file mode 100644 index 000000000..f2a484b95 --- /dev/null +++ b/pkg/chat/attach_test.go @@ -0,0 +1,361 @@ +package chat_test + +import ( + "bytes" + "encoding/base64" + "image" + "image/color" + "image/jpeg" + "image/png" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/chat" +) + +// ────────────────────────────────────────────────────────────────────────────── +// Helpers +// ────────────────────────────────────────────────────────────────────────────── + +func encodeJPEGBytes(w, h int) []byte { + img := image.NewRGBA(image.Rect(0, 0, w, h)) + for y := range h { + for x := range w { + img.Set(x, y, color.RGBA{R: 200, G: 100, B: 50, A: 255}) + } + } + var buf bytes.Buffer + if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 80}); err != nil { + panic(err) + } + return buf.Bytes() +} + +func encodePNGBytes(w, h int, alpha bool) []byte { + if alpha { + img := image.NewNRGBA(image.Rect(0, 0, w, h)) + for y := range h { + for x := range w { + img.Set(x, y, color.NRGBA{R: 0, G: 128, B: 255, A: 128}) + } + } + var buf bytes.Buffer + if err := png.Encode(&buf, img); err != nil { + panic(err) + } + return buf.Bytes() + } + img := image.NewRGBA(image.Rect(0, 0, w, h)) + for y := range h { + for x := range w { + img.Set(x, y, color.RGBA{R: 0, G: 128, B: 255, A: 255}) + } + } + var buf bytes.Buffer + if err := png.Encode(&buf, img); err != nil { + panic(err) + } + return buf.Bytes() +} + +func writeTempFile(t *testing.T, ext string, data []byte) string { + t.Helper() + f, err := os.CreateTemp(t.TempDir(), "attach-*"+ext) + require.NoError(t, err) + _, err = f.Write(data) + require.NoError(t, err) + require.NoError(t, f.Close()) + return f.Name() +} + +// ────────────────────────────────────────────────────────────────────────────── +// ProcessAttachment — MessagePartTypeFile +// ────────────────────────────────────────────────────────────────────────────── + +func TestProcessAttachment_JPEG_Passthrough(t *testing.T) { + data := encodeJPEGBytes(100, 100) + path := writeTempFile(t, ".jpg", data) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path, MimeType: "image/jpeg"}, + }) + require.NoError(t, err) + assert.Equal(t, "image/jpeg", doc.MimeType) + assert.NotEmpty(t, doc.Source.InlineData) + assert.Empty(t, doc.Source.InlineText) + assert.Equal(t, filepath.Base(path), doc.Name) +} + +func TestProcessAttachment_PNG_Passthrough(t *testing.T) { + data := encodePNGBytes(100, 100, false) + path := writeTempFile(t, ".png", data) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path, MimeType: "image/png"}, + }) + require.NoError(t, err) + assert.Equal(t, "image/png", doc.MimeType) + assert.NotEmpty(t, doc.Source.InlineData) +} + +func TestProcessAttachment_PNG_WithAlpha_StaysPNG(t *testing.T) { + data := encodePNGBytes(100, 100, true) + path := writeTempFile(t, ".png", data) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path, MimeType: "image/png"}, + }) + require.NoError(t, err) + assert.True(t, doc.MimeType == "image/png" || doc.MimeType == "image/jpeg", + "expected png or jpeg, got %q", doc.MimeType) + assert.NotEmpty(t, doc.Source.InlineData) +} + +func TestProcessAttachment_ImageTooLarge_Resized(t *testing.T) { + bigData := encodeJPEGBytes(chat.MaxImageDimension+200, chat.MaxImageDimension+200) + path := writeTempFile(t, ".jpg", bigData) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path, MimeType: "image/jpeg"}, + }) + require.NoError(t, err) + assert.NotEmpty(t, doc.Source.InlineData) + + img, _, decErr := image.Decode(bytes.NewReader(doc.Source.InlineData)) + require.NoError(t, decErr) + b := img.Bounds() + assert.LessOrEqual(t, b.Dx(), chat.MaxImageDimension) + assert.LessOrEqual(t, b.Dy(), chat.MaxImageDimension) +} + +func TestProcessAttachment_PDF_Passthrough(t *testing.T) { + pdfBytes := []byte("%PDF-1.4 fake pdf content for testing") + path := writeTempFile(t, ".pdf", pdfBytes) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path, MimeType: "application/pdf"}, + }) + require.NoError(t, err) + assert.Equal(t, "application/pdf", doc.MimeType) + assert.Equal(t, pdfBytes, doc.Source.InlineData) + assert.Empty(t, doc.Source.InlineText) +} + +func TestProcessAttachment_BinaryFileTooLarge_Error(t *testing.T) { + // Sparse file: Stat.Size > MaxInlineBinarySize without allocating memory. + path := writeTempFile(t, ".pdf", nil) + f, err := os.OpenFile(path, os.O_WRONLY, 0o600) + require.NoError(t, err) + require.NoError(t, f.Truncate(chat.MaxInlineBinarySize+1)) + require.NoError(t, f.Close()) + + _, err = chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "too large") +} + +func TestProcessAttachment_TextFile_InlineText(t *testing.T) { + content := "Hello, this is a text file.\nLine 2." + path := writeTempFile(t, ".txt", []byte(content)) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path, MimeType: "text/plain"}, + }) + require.NoError(t, err) + assert.Empty(t, doc.Source.InlineData) + assert.Contains(t, doc.Source.InlineText, content) +} + +func TestProcessAttachment_MarkdownFile_InlineText(t *testing.T) { + content := "# Title\n\nBody paragraph." + path := writeTempFile(t, ".md", []byte(content)) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: path}, + }) + require.NoError(t, err) + assert.Empty(t, doc.Source.InlineData) + assert.Contains(t, doc.Source.InlineText, content) +} + +func TestProcessAttachment_MissingFile_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: &chat.MessageFile{Path: "/nonexistent/path/file.jpg"}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot stat") +} + +func TestProcessAttachment_NilFile_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeFile, + File: nil, + }) + require.Error(t, err) +} + +// ────────────────────────────────────────────────────────────────────────────── +// ProcessAttachment — MessagePartTypeImageURL +// ────────────────────────────────────────────────────────────────────────────── + +func TestProcessAttachment_DataURI_JPEG(t *testing.T) { + jpegData := encodeJPEGBytes(50, 50) + dataURI := "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(jpegData) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: &chat.MessageImageURL{URL: dataURI}, + }) + require.NoError(t, err) + assert.NotEmpty(t, doc.Source.InlineData) + assert.True(t, doc.MimeType == "image/jpeg" || doc.MimeType == "image/png") +} + +func TestProcessAttachment_DataURI_PNG(t *testing.T) { + pngData := encodePNGBytes(50, 50, false) + dataURI := "data:image/png;base64," + base64.StdEncoding.EncodeToString(pngData) + + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: &chat.MessageImageURL{URL: dataURI}, + }) + require.NoError(t, err) + assert.NotEmpty(t, doc.Source.InlineData) +} + +func TestProcessAttachment_DataURI_NonBase64_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: &chat.MessageImageURL{URL: "data:text/plain,hello"}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "not base64") +} + +func TestProcessAttachment_RemoteURL_Error(t *testing.T) { + // Remote http(s):// URLs are not supported; callers must download locally. + for _, scheme := range []string{"http://", "https://"} { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: &chat.MessageImageURL{URL: scheme + "example.com/photo.jpg"}, + }) + require.Error(t, err, "expected error for scheme %s", scheme) + assert.Contains(t, err.Error(), "remote URLs are not supported") + } +} + +func TestProcessAttachment_UnsupportedScheme_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: &chat.MessageImageURL{URL: "ftp://example.com/image.jpg"}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported image URL scheme") +} + +func TestProcessAttachment_NilImageURL_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeImageURL, + ImageURL: nil, + }) + require.Error(t, err) +} + +// ────────────────────────────────────────────────────────────────────────────── +// ProcessAttachment — MessagePartTypeDocument +// ────────────────────────────────────────────────────────────────────────────── + +func TestProcessAttachment_Document_WithInlineData_Passthrough(t *testing.T) { + pdfBytes := []byte("%PDF-1.4 test") + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeDocument, + Document: &chat.Document{ + Name: "spec.pdf", + MimeType: "application/pdf", + Source: chat.DocumentSource{InlineData: pdfBytes}, + }, + }) + require.NoError(t, err) + assert.Equal(t, pdfBytes, doc.Source.InlineData) + assert.Equal(t, "application/pdf", doc.MimeType) +} + +func TestProcessAttachment_Document_WithInlineText_Passthrough(t *testing.T) { + text := "# Markdown content" + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeDocument, + Document: &chat.Document{ + Name: "readme.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{InlineText: text}, + }, + }) + require.NoError(t, err) + assert.Equal(t, text, doc.Source.InlineText) + assert.Empty(t, doc.Source.InlineData) +} + +func TestProcessAttachment_Document_ImageInlineData_Transcoded(t *testing.T) { + jpegData := encodeJPEGBytes(40, 40) + doc, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeDocument, + Document: &chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: jpegData}, + }, + }) + require.NoError(t, err) + assert.NotEmpty(t, doc.Source.InlineData) + assert.True(t, doc.MimeType == "image/jpeg" || doc.MimeType == "image/png") +} + +func TestProcessAttachment_Document_NoContent_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeDocument, + Document: &chat.Document{ + Name: "empty.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{}, + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "no inline content") +} + +func TestProcessAttachment_Document_NilDocument_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeDocument, + Document: nil, + }) + require.Error(t, err) +} + +// ────────────────────────────────────────────────────────────────────────────── +// ProcessAttachment — unsupported type +// ────────────────────────────────────────────────────────────────────────────── + +func TestProcessAttachment_UnsupportedType_Error(t *testing.T) { + _, err := chat.ProcessAttachment(t.Context(), chat.MessagePart{ + Type: chat.MessagePartTypeText, + Text: "hello", + }) + require.Error(t, err) + assert.Contains(t, strings.ToLower(err.Error()), "unsupported") +}