Skip to content

Commit 0472016

Browse files
fix: remove URL fetching from ProcessAttachment – local files and data: URIs only
Per rumpl's review: remove all http(s):// fetching from the attach-time pipeline. The library should not download things from the internet. Changes: - Remove fetchRemoteImage function entirely - Remove safeHTTPClient, newSafeHTTPClient, privateIPNets, isPrivateIP, checkSafeHostCtx, SetFetchHTTPClientForTest (whole SSRF mitigation is now unnecessary) - Remove time, io, mime, net, net/http imports - processImageURLPart: data: URIs still decoded and transcoded as before; http(s):// URLs now return errors.New(...remote URLs are not supported...) - ProcessAttachment / ProcessAttachmentWithMetadata: context parameter kept on the public API (for forward compat) but marked _ since no I/O needs it - Remove network tests: TestProcessAttachment_HTTPS_Image_Via_HTTPTestServer, TestProcessAttachment_HTTP_Non200_Error, TestProcessAttachment_SSRF_* - Add TestProcessAttachment_RemoteURL_Error covering both http:// and https:// - MaxInlineBinarySize guard retained (local files, still valid) Assisted-By: docker-agent
1 parent 247c919 commit 0472016

2 files changed

Lines changed: 37 additions & 278 deletions

File tree

pkg/chat/attach.go

Lines changed: 24 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -17,128 +17,17 @@ import (
1717
"encoding/base64"
1818
"errors"
1919
"fmt"
20-
"io"
21-
"mime"
22-
"net"
23-
"net/http"
2420
"os"
2521
"path/filepath"
2622
"strings"
27-
"time"
2823
)
2924

3025
const (
31-
// attachHTTPTimeout is the maximum time allowed to fetch a remote image URL.
32-
attachHTTPTimeout = 10 * time.Second
33-
34-
// attachMaxRemoteBytes is the maximum number of bytes read from a remote URL.
35-
attachMaxRemoteBytes = 20 * 1024 * 1024 // 20 MB
36-
37-
// MaxInlineBinarySize is the maximum byte size of a binary file (PDF, etc.)
38-
// that will be read into memory for inline attachment. Matches the remote
39-
// fetch cap so local and remote paths behave consistently.
26+
// MaxInlineBinarySize is the maximum byte size of a binary file (PDF, image,
27+
// etc.) that will be read into memory for inline attachment.
4028
MaxInlineBinarySize = 20 * 1024 * 1024 // 20 MB
4129
)
4230

43-
// privateIPNets lists address ranges that must not be dialled by the
44-
// attach-time URL fetcher. Blocking these prevents SSRF attacks against
45-
// cloud metadata services, internal APIs, and loopback services.
46-
var privateIPNets = func() []*net.IPNet {
47-
blocks := []string{
48-
// Loopback
49-
"127.0.0.0/8",
50-
"::1/128",
51-
// Link-local — covers AWS/GCP/Azure metadata endpoints (169.254.169.254)
52-
"169.254.0.0/16",
53-
"fe80::/10",
54-
// RFC-1918 private ranges
55-
"10.0.0.0/8",
56-
"172.16.0.0/12",
57-
"192.168.0.0/16",
58-
// IPv6 unique-local
59-
"fc00::/7",
60-
}
61-
nets := make([]*net.IPNet, 0, len(blocks))
62-
for _, b := range blocks {
63-
_, ipNet, err := net.ParseCIDR(b)
64-
if err != nil {
65-
panic("attachment: invalid built-in CIDR " + b + ": " + err.Error())
66-
}
67-
nets = append(nets, ipNet)
68-
}
69-
return nets
70-
}()
71-
72-
// isPrivateIP reports whether ip falls in any of the blocked address ranges.
73-
func isPrivateIP(ip net.IP) bool {
74-
for _, block := range privateIPNets {
75-
if block.Contains(ip) {
76-
return true
77-
}
78-
}
79-
return false
80-
}
81-
82-
// checkSafeHostCtx resolves host to IP addresses and returns an error if any
83-
// resolved address is in a private/reserved range. This is called both at
84-
// dial time (where a context is available) and on redirect destinations.
85-
func checkSafeHostCtx(ctx context.Context, host string) error {
86-
addrs, err := net.DefaultResolver.LookupHost(ctx, host)
87-
if err != nil {
88-
return fmt.Errorf("attachment: cannot resolve host %q: %w", host, err)
89-
}
90-
for _, addr := range addrs {
91-
ip := net.ParseIP(addr)
92-
if ip == nil {
93-
continue
94-
}
95-
if isPrivateIP(ip) {
96-
return fmt.Errorf("attachment: URL resolves to private/reserved address %s (SSRF protection)", addr)
97-
}
98-
}
99-
return nil
100-
}
101-
102-
// safeHTTPClient is a shared HTTP client used by fetchRemoteImage.
103-
// It enforces SSRF protection by refusing connections to private and
104-
// reserved IP ranges at both dial time and on each redirect hop.
105-
//
106-
// Tests may override this variable to inject a custom client that bypasses
107-
// SSRF filtering (e.g. to reach httptest.Server on loopback). Only do this
108-
// in test code guarded by a t.Cleanup restore.
109-
var safeHTTPClient = newSafeHTTPClient()
110-
111-
func newSafeHTTPClient() *http.Client {
112-
return &http.Client{
113-
Timeout: attachHTTPTimeout,
114-
Transport: &http.Transport{
115-
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
116-
host, _, err := net.SplitHostPort(addr)
117-
if err != nil {
118-
return nil, fmt.Errorf("attachment: malformed dial address %q: %w", addr, err)
119-
}
120-
if err := checkSafeHostCtx(ctx, host); err != nil {
121-
return nil, err
122-
}
123-
return (&net.Dialer{}).DialContext(ctx, network, addr)
124-
},
125-
},
126-
CheckRedirect: func(req *http.Request, _ []*http.Request) error {
127-
return checkSafeHostCtx(req.Context(), req.URL.Hostname())
128-
},
129-
}
130-
}
131-
132-
// SetFetchHTTPClientForTest replaces the HTTP client used by fetchRemoteImage
133-
// and returns a restore function. Only for use in tests.
134-
//
135-
// defer chat.SetFetchHTTPClientForTest(t, myClient)()
136-
func SetFetchHTTPClientForTest(client *http.Client) (restore func()) {
137-
prev := safeHTTPClient
138-
safeHTTPClient = client
139-
return func() { safeHTTPClient = prev }
140-
}
141-
14231
// ProcessAttachment converts a raw [MessagePart] into a [Document] with fully
14332
// resolved Source.InlineData or Source.InlineText. It is called once when a
14433
// message is assembled — never at inference time.
@@ -150,19 +39,16 @@ func SetFetchHTTPClientForTest(client *http.Client) (restore func()) {
15039
// binary bytes (images are transcoded+resized via [ResizeImage]; PDFs and
15140
// other supported types are read verbatim).
15241
//
153-
// - [MessagePartTypeImageURL]: handles data: URIs (decoded inline) and
154-
// http(s):// URLs (fetched with a 10-second timeout). The image bytes are
155-
// then passed through [ResizeImage] to normalise to JPEG or PNG.
42+
// - [MessagePartTypeImageURL]: handles data: URIs (decoded inline). Remote
43+
// http(s):// URLs are not supported; callers should download the file
44+
// locally first and pass it as a [MessagePartTypeFile] instead.
15645
//
15746
// - [MessagePartTypeDocument]: if Source.InlineData or Source.InlineText is
15847
// already set, the document is returned as-is after applying image
15948
// transcoding to any image/* InlineData. A Document with no inline content
16049
// is an error.
161-
//
162-
// The context is forwarded to any network operations; filesystem and image
163-
// decoding operations are not yet context-aware.
164-
func ProcessAttachment(ctx context.Context, part MessagePart) (Document, error) {
165-
doc, _, err := ProcessAttachmentWithMetadata(ctx, part)
50+
func ProcessAttachment(_ context.Context, part MessagePart) (Document, error) {
51+
doc, _, err := ProcessAttachmentWithMetadata(part)
16652
return doc, err
16753
}
16854

@@ -173,12 +59,12 @@ func ProcessAttachment(ctx context.Context, part MessagePart) (Document, error)
17359
// Callers that need to emit a dimension note (for model coordinate-mapping)
17460
// should use this variant and call [FormatDimensionNote] on the returned
17561
// metadata.
176-
func ProcessAttachmentWithMetadata(ctx context.Context, part MessagePart) (Document, *ImageResizeResult, error) {
62+
func ProcessAttachmentWithMetadata(part MessagePart) (Document, *ImageResizeResult, error) {
17763
switch part.Type {
17864
case MessagePartTypeFile:
17965
return processFilePart(part)
18066
case MessagePartTypeImageURL:
181-
return processImageURLPart(ctx, part)
67+
return processImageURLPart(part)
18268
case MessagePartTypeDocument:
18369
return processDocumentPart(part)
18470
default:
@@ -271,48 +157,34 @@ func processFilePart(part MessagePart) (Document, *ImageResizeResult, error) {
271157
}
272158

273159
// processImageURLPart handles MessagePartTypeImageURL.
274-
// Supports:
275-
// - data: URIs (data:<mime>;base64,<payload>)
276-
// - http:// and https:// URLs (fetched with attachHTTPTimeout)
277-
func processImageURLPart(ctx context.Context, part MessagePart) (Document, *ImageResizeResult, error) {
160+
// Only data: URIs are supported; remote http(s):// URLs are rejected.
161+
// Callers with a remote URL should download the file locally first and
162+
// pass it as a MessagePartTypeFile instead.
163+
func processImageURLPart(part MessagePart) (Document, *ImageResizeResult, error) {
278164
if part.ImageURL == nil {
279165
return Document{}, nil, errors.New("ProcessAttachment: image-url part has nil ImageURL field")
280166
}
281167
rawURL := part.ImageURL.URL
282168

283-
var (
284-
data []byte
285-
mimeType string
286-
name string
287-
err error
288-
)
289-
290169
switch {
291170
case strings.HasPrefix(rawURL, "data:"):
292-
mimeType, data, err = parseDataURI(rawURL)
171+
mimeType, data, err := parseDataURI(rawURL)
293172
if err != nil {
294173
return Document{}, nil, fmt.Errorf("ProcessAttachment: parse data URI: %w", err)
295174
}
296-
name = "image"
175+
// When content detection returns an image type, prefer it over the
176+
// declared MIME. (Only image types are trusted from the sniffer.)
177+
if detected := DetectMimeTypeByContent(data); IsImageMimeType(detected) {
178+
mimeType = detected
179+
}
180+
return transcodeImageWithMeta("image", data, mimeType)
297181

298182
case strings.HasPrefix(rawURL, "http://") || strings.HasPrefix(rawURL, "https://"):
299-
data, mimeType, name, err = fetchRemoteImage(ctx, rawURL)
300-
if err != nil {
301-
return Document{}, nil, fmt.Errorf("ProcessAttachment: fetch remote image %q: %w", rawURL, err)
302-
}
183+
return Document{}, nil, errors.New("attachment: remote URLs are not supported; download the file locally first")
303184

304185
default:
305-
return Document{}, nil, fmt.Errorf("ProcessAttachment: unsupported image URL scheme (must be data: or http(s)://): %q", rawURL)
186+
return Document{}, nil, fmt.Errorf("attachment: unsupported image URL scheme: %q", rawURL)
306187
}
307-
308-
// When content detection returns an image type, prefer it over whatever
309-
// the URI or Content-Type header said. (Only image types are trusted from
310-
// the sniffer to avoid promoting an unknown binary to a valid MIME.)
311-
if detected := DetectMimeTypeByContent(data); IsImageMimeType(detected) {
312-
mimeType = detected
313-
}
314-
315-
return transcodeImageWithMeta(name, data, mimeType)
316188
}
317189

318190
// processDocumentPart handles MessagePartTypeDocument.
@@ -325,15 +197,12 @@ func processDocumentPart(part MessagePart) (Document, *ImageResizeResult, error)
325197

326198
if len(doc.Source.InlineData) > 0 {
327199
if IsImageMimeType(doc.MimeType) {
328-
// Apply transcoding/resizing to normalise the image.
329200
return transcodeImageWithMeta(doc.Name, doc.Source.InlineData, doc.MimeType)
330201
}
331-
// Non-image binary — pass through unchanged.
332202
return doc, nil, nil
333203
}
334204

335205
if doc.Source.InlineText != "" {
336-
// Already resolved text — pass through unchanged.
337206
return doc, nil, nil
338207
}
339208

@@ -348,13 +217,12 @@ func transcodeImageWithMeta(name string, data []byte, mimeType string) (Document
348217
if err != nil {
349218
return Document{}, nil, fmt.Errorf("ProcessAttachment: transcode image %q: %w", name, err)
350219
}
351-
doc := Document{
220+
return Document{
352221
Name: name,
353222
MimeType: result.MimeType,
354223
Size: int64(len(result.Data)),
355224
Source: DocumentSource{InlineData: result.Data},
356-
}
357-
return doc, result, nil
225+
}, result, nil
358226
}
359227

360228
// parseDataURI parses a data URI of the form "data:<mime>;base64,<payload>".
@@ -391,49 +259,3 @@ func parseDataURI(uri string) (mimeType string, data []byte, err error) {
391259
}
392260
return mimeType, data, nil
393261
}
394-
395-
// fetchRemoteImage fetches an image from an http(s) URL.
396-
// The safeHTTPClient enforces a timeout and blocks connections to
397-
// private/reserved IP ranges (SSRF protection).
398-
// Returns the image bytes, detected MIME type, and a filename derived from the URL.
399-
func fetchRemoteImage(ctx context.Context, rawURL string) (data []byte, mimeType, name string, err error) {
400-
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, http.NoBody)
401-
if err != nil {
402-
return nil, "", "", fmt.Errorf("create request: %w", err)
403-
}
404-
405-
resp, err := safeHTTPClient.Do(req)
406-
if err != nil {
407-
return nil, "", "", fmt.Errorf("HTTP GET: %w", err)
408-
}
409-
defer resp.Body.Close()
410-
411-
if resp.StatusCode != http.StatusOK {
412-
return nil, "", "", fmt.Errorf("HTTP %d for %q", resp.StatusCode, rawURL)
413-
}
414-
415-
data, err = io.ReadAll(io.LimitReader(resp.Body, attachMaxRemoteBytes+1))
416-
if err != nil {
417-
return nil, "", "", fmt.Errorf("read response body: %w", err)
418-
}
419-
if int64(len(data)) > attachMaxRemoteBytes {
420-
return nil, "", "", fmt.Errorf("remote image too large (max %d bytes)", attachMaxRemoteBytes)
421-
}
422-
423-
// Determine MIME type: prefer Content-Type header, fall back to content sniff.
424-
mimeType = resp.Header.Get("Content-Type")
425-
if ct, _, parseErr := mime.ParseMediaType(mimeType); parseErr == nil {
426-
mimeType = ct
427-
}
428-
if mimeType == "" || mimeType == "application/octet-stream" {
429-
mimeType = DetectMimeTypeByContent(data)
430-
}
431-
432-
// Derive a filename from the URL path.
433-
name = filepath.Base(req.URL.Path)
434-
if name == "" || name == "." || name == "/" {
435-
name = "image"
436-
}
437-
438-
return data, mimeType, name, nil
439-
}

0 commit comments

Comments
 (0)