@@ -17,128 +17,17 @@ import (
1717 "encoding/base64"
1818 "errors"
1919 "fmt"
20- "io"
21- "mime"
22- "net"
23- "net/http"
2420 "os"
2521 "path/filepath"
2622 "strings"
27- "time"
2823)
2924
3025const (
31- // attachHTTPTimeout is the maximum time allowed to fetch a remote image URL.
32- attachHTTPTimeout = 10 * time .Second
33-
34- // attachMaxRemoteBytes is the maximum number of bytes read from a remote URL.
35- attachMaxRemoteBytes = 20 * 1024 * 1024 // 20 MB
36-
37- // MaxInlineBinarySize is the maximum byte size of a binary file (PDF, etc.)
38- // that will be read into memory for inline attachment. Matches the remote
39- // fetch cap so local and remote paths behave consistently.
26+ // MaxInlineBinarySize is the maximum byte size of a binary file (PDF, image,
27+ // etc.) that will be read into memory for inline attachment.
4028 MaxInlineBinarySize = 20 * 1024 * 1024 // 20 MB
4129)
4230
43- // privateIPNets lists address ranges that must not be dialled by the
44- // attach-time URL fetcher. Blocking these prevents SSRF attacks against
45- // cloud metadata services, internal APIs, and loopback services.
46- var privateIPNets = func () []* net.IPNet {
47- blocks := []string {
48- // Loopback
49- "127.0.0.0/8" ,
50- "::1/128" ,
51- // Link-local — covers AWS/GCP/Azure metadata endpoints (169.254.169.254)
52- "169.254.0.0/16" ,
53- "fe80::/10" ,
54- // RFC-1918 private ranges
55- "10.0.0.0/8" ,
56- "172.16.0.0/12" ,
57- "192.168.0.0/16" ,
58- // IPv6 unique-local
59- "fc00::/7" ,
60- }
61- nets := make ([]* net.IPNet , 0 , len (blocks ))
62- for _ , b := range blocks {
63- _ , ipNet , err := net .ParseCIDR (b )
64- if err != nil {
65- panic ("attachment: invalid built-in CIDR " + b + ": " + err .Error ())
66- }
67- nets = append (nets , ipNet )
68- }
69- return nets
70- }()
71-
72- // isPrivateIP reports whether ip falls in any of the blocked address ranges.
73- func isPrivateIP (ip net.IP ) bool {
74- for _ , block := range privateIPNets {
75- if block .Contains (ip ) {
76- return true
77- }
78- }
79- return false
80- }
81-
82- // checkSafeHostCtx resolves host to IP addresses and returns an error if any
83- // resolved address is in a private/reserved range. This is called both at
84- // dial time (where a context is available) and on redirect destinations.
85- func checkSafeHostCtx (ctx context.Context , host string ) error {
86- addrs , err := net .DefaultResolver .LookupHost (ctx , host )
87- if err != nil {
88- return fmt .Errorf ("attachment: cannot resolve host %q: %w" , host , err )
89- }
90- for _ , addr := range addrs {
91- ip := net .ParseIP (addr )
92- if ip == nil {
93- continue
94- }
95- if isPrivateIP (ip ) {
96- return fmt .Errorf ("attachment: URL resolves to private/reserved address %s (SSRF protection)" , addr )
97- }
98- }
99- return nil
100- }
101-
102- // safeHTTPClient is a shared HTTP client used by fetchRemoteImage.
103- // It enforces SSRF protection by refusing connections to private and
104- // reserved IP ranges at both dial time and on each redirect hop.
105- //
106- // Tests may override this variable to inject a custom client that bypasses
107- // SSRF filtering (e.g. to reach httptest.Server on loopback). Only do this
108- // in test code guarded by a t.Cleanup restore.
109- var safeHTTPClient = newSafeHTTPClient ()
110-
111- func newSafeHTTPClient () * http.Client {
112- return & http.Client {
113- Timeout : attachHTTPTimeout ,
114- Transport : & http.Transport {
115- DialContext : func (ctx context.Context , network , addr string ) (net.Conn , error ) {
116- host , _ , err := net .SplitHostPort (addr )
117- if err != nil {
118- return nil , fmt .Errorf ("attachment: malformed dial address %q: %w" , addr , err )
119- }
120- if err := checkSafeHostCtx (ctx , host ); err != nil {
121- return nil , err
122- }
123- return (& net.Dialer {}).DialContext (ctx , network , addr )
124- },
125- },
126- CheckRedirect : func (req * http.Request , _ []* http.Request ) error {
127- return checkSafeHostCtx (req .Context (), req .URL .Hostname ())
128- },
129- }
130- }
131-
132- // SetFetchHTTPClientForTest replaces the HTTP client used by fetchRemoteImage
133- // and returns a restore function. Only for use in tests.
134- //
135- // defer chat.SetFetchHTTPClientForTest(t, myClient)()
136- func SetFetchHTTPClientForTest (client * http.Client ) (restore func ()) {
137- prev := safeHTTPClient
138- safeHTTPClient = client
139- return func () { safeHTTPClient = prev }
140- }
141-
14231// ProcessAttachment converts a raw [MessagePart] into a [Document] with fully
14332// resolved Source.InlineData or Source.InlineText. It is called once when a
14433// message is assembled — never at inference time.
@@ -150,19 +39,16 @@ func SetFetchHTTPClientForTest(client *http.Client) (restore func()) {
15039// binary bytes (images are transcoded+resized via [ResizeImage]; PDFs and
15140// other supported types are read verbatim).
15241//
153- // - [MessagePartTypeImageURL]: handles data: URIs (decoded inline) and
154- // http(s):// URLs (fetched with a 10-second timeout). The image bytes are
155- // then passed through [ResizeImage] to normalise to JPEG or PNG .
42+ // - [MessagePartTypeImageURL]: handles data: URIs (decoded inline). Remote
43+ // http(s):// URLs are not supported; callers should download the file
44+ // locally first and pass it as a [MessagePartTypeFile] instead .
15645//
15746// - [MessagePartTypeDocument]: if Source.InlineData or Source.InlineText is
15847// already set, the document is returned as-is after applying image
15948// transcoding to any image/* InlineData. A Document with no inline content
16049// is an error.
161- //
162- // The context is forwarded to any network operations; filesystem and image
163- // decoding operations are not yet context-aware.
164- func ProcessAttachment (ctx context.Context , part MessagePart ) (Document , error ) {
165- doc , _ , err := ProcessAttachmentWithMetadata (ctx , part )
50+ func ProcessAttachment (_ context.Context , part MessagePart ) (Document , error ) {
51+ doc , _ , err := ProcessAttachmentWithMetadata (part )
16652 return doc , err
16753}
16854
@@ -173,12 +59,12 @@ func ProcessAttachment(ctx context.Context, part MessagePart) (Document, error)
17359// Callers that need to emit a dimension note (for model coordinate-mapping)
17460// should use this variant and call [FormatDimensionNote] on the returned
17561// metadata.
176- func ProcessAttachmentWithMetadata (ctx context. Context , part MessagePart ) (Document , * ImageResizeResult , error ) {
62+ func ProcessAttachmentWithMetadata (part MessagePart ) (Document , * ImageResizeResult , error ) {
17763 switch part .Type {
17864 case MessagePartTypeFile :
17965 return processFilePart (part )
18066 case MessagePartTypeImageURL :
181- return processImageURLPart (ctx , part )
67+ return processImageURLPart (part )
18268 case MessagePartTypeDocument :
18369 return processDocumentPart (part )
18470 default :
@@ -271,48 +157,34 @@ func processFilePart(part MessagePart) (Document, *ImageResizeResult, error) {
271157}
272158
273159// processImageURLPart handles MessagePartTypeImageURL.
274- // Supports:
275- // - data: URIs (data:<mime>;base64,<payload>)
276- // - http:// and https:// URLs (fetched with attachHTTPTimeout)
277- func processImageURLPart (ctx context. Context , part MessagePart ) (Document , * ImageResizeResult , error ) {
160+ // Only data: URIs are supported; remote http(s):// URLs are rejected.
161+ // Callers with a remote URL should download the file locally first and
162+ // pass it as a MessagePartTypeFile instead.
163+ func processImageURLPart (part MessagePart ) (Document , * ImageResizeResult , error ) {
278164 if part .ImageURL == nil {
279165 return Document {}, nil , errors .New ("ProcessAttachment: image-url part has nil ImageURL field" )
280166 }
281167 rawURL := part .ImageURL .URL
282168
283- var (
284- data []byte
285- mimeType string
286- name string
287- err error
288- )
289-
290169 switch {
291170 case strings .HasPrefix (rawURL , "data:" ):
292- mimeType , data , err = parseDataURI (rawURL )
171+ mimeType , data , err : = parseDataURI (rawURL )
293172 if err != nil {
294173 return Document {}, nil , fmt .Errorf ("ProcessAttachment: parse data URI: %w" , err )
295174 }
296- name = "image"
175+ // When content detection returns an image type, prefer it over the
176+ // declared MIME. (Only image types are trusted from the sniffer.)
177+ if detected := DetectMimeTypeByContent (data ); IsImageMimeType (detected ) {
178+ mimeType = detected
179+ }
180+ return transcodeImageWithMeta ("image" , data , mimeType )
297181
298182 case strings .HasPrefix (rawURL , "http://" ) || strings .HasPrefix (rawURL , "https://" ):
299- data , mimeType , name , err = fetchRemoteImage (ctx , rawURL )
300- if err != nil {
301- return Document {}, nil , fmt .Errorf ("ProcessAttachment: fetch remote image %q: %w" , rawURL , err )
302- }
183+ return Document {}, nil , errors .New ("attachment: remote URLs are not supported; download the file locally first" )
303184
304185 default :
305- return Document {}, nil , fmt .Errorf ("ProcessAttachment : unsupported image URL scheme (must be data: or http(s)://) : %q" , rawURL )
186+ return Document {}, nil , fmt .Errorf ("attachment : unsupported image URL scheme: %q" , rawURL )
306187 }
307-
308- // When content detection returns an image type, prefer it over whatever
309- // the URI or Content-Type header said. (Only image types are trusted from
310- // the sniffer to avoid promoting an unknown binary to a valid MIME.)
311- if detected := DetectMimeTypeByContent (data ); IsImageMimeType (detected ) {
312- mimeType = detected
313- }
314-
315- return transcodeImageWithMeta (name , data , mimeType )
316188}
317189
318190// processDocumentPart handles MessagePartTypeDocument.
@@ -325,15 +197,12 @@ func processDocumentPart(part MessagePart) (Document, *ImageResizeResult, error)
325197
326198 if len (doc .Source .InlineData ) > 0 {
327199 if IsImageMimeType (doc .MimeType ) {
328- // Apply transcoding/resizing to normalise the image.
329200 return transcodeImageWithMeta (doc .Name , doc .Source .InlineData , doc .MimeType )
330201 }
331- // Non-image binary — pass through unchanged.
332202 return doc , nil , nil
333203 }
334204
335205 if doc .Source .InlineText != "" {
336- // Already resolved text — pass through unchanged.
337206 return doc , nil , nil
338207 }
339208
@@ -348,13 +217,12 @@ func transcodeImageWithMeta(name string, data []byte, mimeType string) (Document
348217 if err != nil {
349218 return Document {}, nil , fmt .Errorf ("ProcessAttachment: transcode image %q: %w" , name , err )
350219 }
351- doc := Document {
220+ return Document {
352221 Name : name ,
353222 MimeType : result .MimeType ,
354223 Size : int64 (len (result .Data )),
355224 Source : DocumentSource {InlineData : result .Data },
356- }
357- return doc , result , nil
225+ }, result , nil
358226}
359227
360228// parseDataURI parses a data URI of the form "data:<mime>;base64,<payload>".
@@ -391,49 +259,3 @@ func parseDataURI(uri string) (mimeType string, data []byte, err error) {
391259 }
392260 return mimeType , data , nil
393261}
394-
395- // fetchRemoteImage fetches an image from an http(s) URL.
396- // The safeHTTPClient enforces a timeout and blocks connections to
397- // private/reserved IP ranges (SSRF protection).
398- // Returns the image bytes, detected MIME type, and a filename derived from the URL.
399- func fetchRemoteImage (ctx context.Context , rawURL string ) (data []byte , mimeType , name string , err error ) {
400- req , err := http .NewRequestWithContext (ctx , http .MethodGet , rawURL , http .NoBody )
401- if err != nil {
402- return nil , "" , "" , fmt .Errorf ("create request: %w" , err )
403- }
404-
405- resp , err := safeHTTPClient .Do (req )
406- if err != nil {
407- return nil , "" , "" , fmt .Errorf ("HTTP GET: %w" , err )
408- }
409- defer resp .Body .Close ()
410-
411- if resp .StatusCode != http .StatusOK {
412- return nil , "" , "" , fmt .Errorf ("HTTP %d for %q" , resp .StatusCode , rawURL )
413- }
414-
415- data , err = io .ReadAll (io .LimitReader (resp .Body , attachMaxRemoteBytes + 1 ))
416- if err != nil {
417- return nil , "" , "" , fmt .Errorf ("read response body: %w" , err )
418- }
419- if int64 (len (data )) > attachMaxRemoteBytes {
420- return nil , "" , "" , fmt .Errorf ("remote image too large (max %d bytes)" , attachMaxRemoteBytes )
421- }
422-
423- // Determine MIME type: prefer Content-Type header, fall back to content sniff.
424- mimeType = resp .Header .Get ("Content-Type" )
425- if ct , _ , parseErr := mime .ParseMediaType (mimeType ); parseErr == nil {
426- mimeType = ct
427- }
428- if mimeType == "" || mimeType == "application/octet-stream" {
429- mimeType = DetectMimeTypeByContent (data )
430- }
431-
432- // Derive a filename from the URL path.
433- name = filepath .Base (req .URL .Path )
434- if name == "" || name == "." || name == "/" {
435- name = "image"
436- }
437-
438- return data , mimeType , name , nil
439- }
0 commit comments