diff --git a/docs/line.md b/docs/line.md index a7fe4f8d..2618cb0d 100644 --- a/docs/line.md +++ b/docs/line.md @@ -83,6 +83,8 @@ In the LINE Developers Console → **Messaging API** tab → scan the QR code wi - **1:1 chat** — send a message to the bot, get an AI agent response - **Group chat** — add the bot to a group, it responds to all messages +- **Images** — send image messages to the bot (automatically compressed and resized) +- **Audio** — send audio messages (e.g. voice notes). They are automatically transcribed (if STT is enabled in Core) and passed to the agent as text. - **Webhook signature validation** — HMAC-SHA256 via `LINE_CHANNEL_SECRET` ### Not Supported (LINE API limitations) diff --git a/docs/telegram.md b/docs/telegram.md index d7dd9ae0..70a4f19a 100644 --- a/docs/telegram.md +++ b/docs/telegram.md @@ -168,6 +168,12 @@ explain VPC peering ← ignored in groups DMs and replies within forum topics always trigger the agent (no @mention needed). +### File Attachments + +- **Images** — send photos (compressed/resized automatically). +- **Documents** — send text-based files (e.g. `.txt`, `.csv`, `.rs`, `.py`) up to 512KB. They are passed directly to the agent as text. +- **Audio/Voice** — send voice notes or audio files. They are automatically transcribed (if STT is enabled in Core) and passed to the agent as text. + ### Emoji reactions The bot shows status reactions on your message as the agent works: diff --git a/gateway/Cargo.lock b/gateway/Cargo.lock index b0fa728b..8b5ba6be 100644 --- a/gateway/Cargo.lock +++ b/gateway/Cargo.lock @@ -1112,7 +1112,7 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "openab-gateway" -version = "0.1.0" +version = "0.4.0" dependencies = [ "aes", "anyhow", diff --git a/gateway/src/adapters/feishu.rs b/gateway/src/adapters/feishu.rs index 922fae34..fa3b8406 100644 --- a/gateway/src/adapters/feishu.rs +++ b/gateway/src/adapters/feishu.rs @@ -1,3 +1,4 @@ +use crate::media::{resize_and_compress, FILE_MAX_DOWNLOAD, IMAGE_MAX_DOWNLOAD}; use crate::schema::*; use axum::extract::State; use prost::Message as ProstMessage; @@ -1268,38 +1269,6 @@ pub enum MediaRef { File { message_id: String, file_key: String, file_name: String }, } -const IMAGE_MAX_DIMENSION_PX: u32 = 1200; -const IMAGE_JPEG_QUALITY: u8 = 75; -const IMAGE_MAX_DOWNLOAD: u64 = 10 * 1024 * 1024; // 10 MB -const FILE_MAX_DOWNLOAD: u64 = 512 * 1024; // 512 KB - -/// Resize image so longest side <= 1200px, then encode as JPEG. -/// GIFs are passed through unchanged to preserve animation. -fn resize_and_compress(raw: &[u8]) -> Result<(Vec, String), image::ImageError> { - use image::ImageReader; - use std::io::Cursor; - - let reader = ImageReader::new(Cursor::new(raw)).with_guessed_format()?; - let format = reader.format(); - if format == Some(image::ImageFormat::Gif) { - return Ok((raw.to_vec(), "image/gif".to_string())); - } - let img = reader.decode()?; - let (w, h) = (img.width(), img.height()); - let img = if w > IMAGE_MAX_DIMENSION_PX || h > IMAGE_MAX_DIMENSION_PX { - let max_side = std::cmp::max(w, h); - let ratio = f64::from(IMAGE_MAX_DIMENSION_PX) / f64::from(max_side); - let new_w = (f64::from(w) * ratio) as u32; - let new_h = (f64::from(h) * ratio) as u32; - img.resize(new_w, new_h, image::imageops::FilterType::Lanczos3) - } else { - img - }; - let mut buf = Cursor::new(Vec::new()); - let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, IMAGE_JPEG_QUALITY); - img.write_with_encoder(encoder)?; - Ok((buf.into_inner(), "image/jpeg".to_string())) -} /// Download a Feishu image by message_id + image_key → resize/compress → base64 Attachment. pub async fn download_feishu_image( diff --git a/gateway/src/adapters/googlechat.rs b/gateway/src/adapters/googlechat.rs index 68759e02..13a97993 100644 --- a/gateway/src/adapters/googlechat.rs +++ b/gateway/src/adapters/googlechat.rs @@ -1371,6 +1371,7 @@ mod tests { content: Content { content_type: "text".into(), text: "hello".into(), + attachments: Vec::new(), }, command: None, request_id: Some("req_123".into()), @@ -1413,6 +1414,7 @@ mod tests { content: Content { content_type: "text".into(), text: "hello".into(), + attachments: Vec::new(), }, command: None, request_id: Some("req_fail".into()), @@ -1459,6 +1461,7 @@ mod tests { content: Content { content_type: "text".into(), text: "".into(), + attachments: Vec::new(), }, command: None, request_id: Some("req_empty".into()), @@ -1502,6 +1505,7 @@ mod tests { content: Content { content_type: "text".into(), text: long_text, + attachments: Vec::new(), }, command: None, request_id: Some("req_multi_fail".into()), @@ -1535,6 +1539,7 @@ mod tests { content: Content { content_type: "text".into(), text: "hello".into(), + attachments: Vec::new(), }, command: None, request_id: Some("req_notoken".into()), @@ -1579,6 +1584,7 @@ mod tests { content: Content { content_type: "text".into(), text: "updated text".into(), + attachments: Vec::new(), }, command: Some("edit_message".into()), request_id: None, @@ -1620,6 +1626,7 @@ mod tests { content: Content { content_type: "text".into(), text: long_text, + attachments: Vec::new(), }, command: None, request_id: Some("req_multi".into()), @@ -1676,6 +1683,7 @@ mod tests { content: Content { content_type: "text".into(), text: long_text, + attachments: Vec::new(), }, command: None, request_id: Some("req_partial".into()), diff --git a/gateway/src/adapters/line.rs b/gateway/src/adapters/line.rs index 3bfc36d0..f4268c3f 100644 --- a/gateway/src/adapters/line.rs +++ b/gateway/src/adapters/line.rs @@ -1,3 +1,4 @@ +use crate::media::{resize_and_compress, AUDIO_MAX_DOWNLOAD, IMAGE_MAX_DOWNLOAD}; use crate::schema::*; use axum::extract::State; use serde::Deserialize; @@ -90,45 +91,55 @@ pub async fn webhook( let Some(ref msg) = event.message else { continue; }; - if msg.message_type != "text" { + let is_text = msg.message_type == "text"; + let is_image = msg.message_type == "image"; + let is_audio = msg.message_type == "audio"; + + if !is_text && !is_image && !is_audio { continue; } - let Some(ref text) = msg.text else { - continue; - }; - if text.trim().is_empty() { + + let text = msg.text.clone().unwrap_or_default(); + if is_text && text.trim().is_empty() { continue; } + let mut attachments = Vec::new(); + if is_image || is_audio { + if let Some(ref access_token) = state.line_access_token { + let client = &state.client; + let att_type = if is_image { "image" } else { "audio" }; + if let Some(att) = download_line_media(client, access_token, &msg.id, att_type).await { + attachments.push(att); + } + } else { + warn!("LINE media received but LINE_CHANNEL_ACCESS_TOKEN not set"); + } + } + let source = event.source.as_ref(); let (channel_id, channel_type) = match source { - Some(s) if s.source_type == "group" => { - match s.group_id.as_deref() { - Some(id) if !id.is_empty() => (id.to_string(), "group".to_string()), - _ => { - warn!("LINE group event missing groupId, skipping"); - continue; - } + Some(s) if s.source_type == "group" => match s.group_id.as_deref() { + Some(id) if !id.is_empty() => (id.to_string(), "group".to_string()), + _ => { + warn!("LINE group event missing groupId, skipping"); + continue; } - } - Some(s) if s.source_type == "room" => { - match s.room_id.as_deref() { - Some(id) if !id.is_empty() => (id.to_string(), "room".to_string()), - _ => { - warn!("LINE room event missing roomId, skipping"); - continue; - } + }, + Some(s) if s.source_type == "room" => match s.room_id.as_deref() { + Some(id) if !id.is_empty() => (id.to_string(), "room".to_string()), + _ => { + warn!("LINE room event missing roomId, skipping"); + continue; } - } - Some(s) => { - match s.user_id.as_deref() { - Some(id) if !id.is_empty() => (id.to_string(), "user".to_string()), - _ => { - warn!("LINE user event missing userId, skipping"); - continue; - } + }, + Some(s) => match s.user_id.as_deref() { + Some(id) if !id.is_empty() => (id.to_string(), "user".to_string()), + _ => { + warn!("LINE user event missing userId, skipping"); + continue; } - } + }, None => { warn!("LINE event missing source, skipping"); continue; @@ -138,7 +149,7 @@ pub async fn webhook( .and_then(|s| s.user_id.as_deref()) .unwrap_or("unknown"); - let gateway_event = GatewayEvent::new( + let mut gateway_event = GatewayEvent::new( "line", ChannelInfo { id: channel_id.clone(), @@ -151,10 +162,11 @@ pub async fn webhook( display_name: user_id.into(), is_bot: false, }, - text, + &text, &msg.id, vec![], ); + gateway_event.content.attachments = attachments; // Cache the reply token for hybrid Reply/Push dispatch if let Some(ref reply_token) = event.reply_token { @@ -266,3 +278,81 @@ pub async fn dispatch_line_reply( used_reply } + +/// Download media content from LINE Messaging API. +async fn download_line_media( + client: &reqwest::Client, + access_token: &str, + message_id: &str, + attachment_type: &str, +) -> Option { + let url = format!( + "https://api-data.line.me/v2/bot/message/{}/content", + message_id + ); + let resp = client + .get(url) + .bearer_auth(access_token) + .send() + .await + .ok()?; + + if !resp.status().is_success() { + error!(status = %resp.status(), "LINE media download failed"); + return None; + } + + let max_size = if attachment_type == "image" { + IMAGE_MAX_DOWNLOAD + } else { + AUDIO_MAX_DOWNLOAD + }; + + if let Some(cl) = resp.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(size) = cl.to_str().unwrap_or("0").parse::() { + if size > max_size { + warn!(message_id, size, "LINE {} Content-Length exceeds limit", attachment_type); + return None; + } + } + } + + let content_type = resp + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|h| h.to_str().ok()) + .unwrap_or(if attachment_type == "image" { "image/jpeg" } else { "audio/mp4" }) + .to_string(); + + let bytes = resp.bytes().await.ok()?; + if bytes.len() as u64 > max_size { + warn!(message_id, size = bytes.len(), "LINE {} exceeds limit", attachment_type); + return None; + } + + let (data_bytes, mime, filename) = if attachment_type == "image" { + match resize_and_compress(&bytes) { + Ok((c, _m)) => (c, content_type, format!("{}.jpg", message_id)), + Err(e) => { + error!(err = %e, "LINE image processing failed"); + return None; + } + } + } else { + // For audio, we don't process, just send as is. + // LINE audio is usually m4a. + (bytes.to_vec(), content_type, format!("{}.m4a", message_id)) + }; + + use base64::Engine; + let b64_data = base64::engine::general_purpose::STANDARD.encode(&data_bytes); + info!(message_id, size = data_bytes.len(), "LINE {} download successful", attachment_type); + + Some(Attachment { + attachment_type: attachment_type.into(), + filename, + mime_type: mime, + data: b64_data, + size: data_bytes.len() as u64, + }) +} diff --git a/gateway/src/adapters/telegram.rs b/gateway/src/adapters/telegram.rs index 6ae01624..19daf12a 100644 --- a/gateway/src/adapters/telegram.rs +++ b/gateway/src/adapters/telegram.rs @@ -1,3 +1,4 @@ +use crate::media::{resize_and_compress, AUDIO_MAX_DOWNLOAD, FILE_MAX_DOWNLOAD, IMAGE_MAX_DOWNLOAD}; use crate::schema::*; use axum::extract::State; use axum::Json; @@ -25,8 +26,41 @@ struct TelegramMessage { chat: TelegramChat, from: Option, text: Option, + caption: Option, #[serde(default)] entities: Vec, + #[serde(default)] + photo: Vec, + document: Option, + voice: Option, + audio: Option, +} + +#[derive(Debug, Deserialize)] +struct TelegramPhoto { + file_id: String, + width: u32, + height: u32, +} + +#[derive(Debug, Deserialize)] +struct TelegramDocument { + file_id: String, + file_name: Option, + mime_type: Option, +} + +#[derive(Debug, Deserialize)] +struct TelegramVoice { + file_id: String, + mime_type: Option, +} + +#[derive(Debug, Deserialize)] +struct TelegramAudio { + file_id: String, + file_name: Option, + mime_type: Option, } #[derive(Debug, Deserialize)] @@ -75,13 +109,48 @@ pub async fn webhook( let Some(msg) = update.message else { return axum::http::StatusCode::OK; }; - let Some(text) = msg.text.as_deref() else { - return axum::http::StatusCode::OK; - }; - if text.trim().is_empty() { + let is_photo = !msg.photo.is_empty(); + let is_document = msg.document.is_some(); + let is_voice = msg.voice.is_some(); + let is_audio = msg.audio.is_some(); + let text = msg.text.as_deref().or(msg.caption.as_deref()).unwrap_or(""); + + if text.trim().is_empty() && !is_photo && !is_document && !is_voice && !is_audio { return axum::http::StatusCode::OK; } + let mut attachments = Vec::new(); + if is_photo || is_document || is_voice || is_audio { + if let Some(ref token) = state.telegram_bot_token { + let client = &state.client; + if is_photo { + // Take the largest photo + if let Some(largest) = msg.photo.iter().max_by_key(|p| p.width * p.height) { + if let Some(att) = + download_telegram_media(client, token, &largest.file_id, "image").await + { + attachments.push(att); + } + } + } else if let Some(doc) = msg.document { + let file_name = doc.file_name.unwrap_or_else(|| "unknown.txt".to_string()); + if let Some(att) = + download_telegram_document(client, token, &doc.file_id, &file_name).await + { + attachments.push(att); + } + } else if let Some(voice) = msg.voice { + if let Some(att) = download_telegram_media(client, token, &voice.file_id, "audio").await { + attachments.push(att); + } + } else if let Some(audio) = msg.audio { + if let Some(att) = download_telegram_media(client, token, &audio.file_id, "audio").await { + attachments.push(att); + } + } + } + } + let from = msg.from.as_ref(); let sender_name = from .and_then(|u| u.username.as_deref()) @@ -107,7 +176,7 @@ pub async fn webhook( }) .collect(); - let event = GatewayEvent::new( + let mut event = GatewayEvent::new( "telegram", ChannelInfo { id: msg.chat.id.to_string(), @@ -124,6 +193,7 @@ pub async fn webhook( &msg.message_id.to_string(), mentions, ); + event.content.attachments = attachments; let json = serde_json::to_string(&event).unwrap(); info!(chat_id = %msg.chat.id, sender = %sender_name, "telegram → gateway"); @@ -262,3 +332,164 @@ pub async fn handle_reply( .await .map_err(|e| error!("telegram send error: {e}")); } + +/// Download photo from Telegram via getFile + download URL. +async fn download_telegram_media( + client: &reqwest::Client, + bot_token: &str, + file_id: &str, + attachment_type: &str, +) -> Option { + // 1. Get file path + let get_file_url = format!("{TELEGRAM_API_BASE}/bot{}/getFile", bot_token); + let resp = client + .get(get_file_url) + .query(&[("file_id", file_id)]) + .send() + .await + .ok()?; + + let body: serde_json::Value = resp.json().await.ok()?; + let file_path = body["result"]["file_path"].as_str()?; + + // 2. Download file + let download_url = format!("{TELEGRAM_API_BASE}/file/bot{}/{}", bot_token, file_path); + let resp = client.get(download_url).send().await.ok()?; + if !resp.status().is_success() { + return None; + } + + let max_size = if attachment_type == "image" { + IMAGE_MAX_DOWNLOAD + } else { + AUDIO_MAX_DOWNLOAD + }; + + if let Some(cl) = resp.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(size) = cl.to_str().unwrap_or("0").parse::() { + if size > max_size { + warn!(file_id, size, "Telegram {} Content-Length exceeds limit", attachment_type); + return None; + } + } + } + + let content_type = resp + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|h| h.to_str().ok()) + .unwrap_or(if attachment_type == "image" { "image/jpeg" } else { "audio/ogg" }) + .to_string(); + + let bytes = resp.bytes().await.ok()?; + if bytes.len() as u64 > max_size { + warn!(file_id, size = bytes.len(), "Telegram {} exceeds limit", attachment_type); + return None; + } + + let (data_bytes, mime, filename) = if attachment_type == "image" { + match resize_and_compress(&bytes) { + Ok((c, _m)) => (c, content_type, format!("{}.jpg", file_id)), + Err(e) => { + error!(err = %e, "Telegram image processing failed"); + return None; + } + } + } else { + // For audio/voice, we don't process. + let ext = if content_type.contains("mpeg") || content_type.contains("mp3") { + "mp3" + } else if content_type.contains("m4a") { + "m4a" + } else { + "ogg" + }; + (bytes.to_vec(), content_type, format!("{}.{}", file_id, ext)) + }; + + use base64::Engine; + let b64_data = base64::engine::general_purpose::STANDARD.encode(&data_bytes); + info!(file_id, size = data_bytes.len(), "Telegram {} download successful", attachment_type); + + Some(Attachment { + attachment_type: attachment_type.into(), + filename, + mime_type: mime, + data: b64_data, + size: data_bytes.len() as u64, + }) +} + +/// Download document from Telegram via getFile + download URL (text files only). +async fn download_telegram_document( + client: &reqwest::Client, + bot_token: &str, + file_id: &str, + file_name: &str, +) -> Option { + // Only download text-like files + let ext = file_name.rsplit('.').next().unwrap_or("").to_lowercase(); + const TEXT_EXTS: &[&str] = &[ + "txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml", "rs", "py", "js", + "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp", "rb", "sh", "bash", "sql", + "html", "css", "ini", "cfg", "conf", "env", + ]; + if !TEXT_EXTS.contains(&ext.as_str()) { + tracing::debug!(file_name, "skipping non-text file attachment"); + return None; + } + + // 1. Get file path + let get_file_url = format!("{TELEGRAM_API_BASE}/bot{}/getFile", bot_token); + let resp = client + .get(get_file_url) + .query(&[("file_id", file_id)]) + .send() + .await + .ok()?; + + let body: serde_json::Value = resp.json().await.ok()?; + let file_path = body["result"]["file_path"].as_str()?; + + // 2. Download file + let download_url = format!("{TELEGRAM_API_BASE}/file/bot{}/{}", bot_token, file_path); + let resp = client.get(download_url).send().await.ok()?; + if !resp.status().is_success() { + return None; + } + + if let Some(cl) = resp.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(size) = cl.to_str().unwrap_or("0").parse::() { + if size > FILE_MAX_DOWNLOAD { + warn!( + file_id, + size, "Telegram document Content-Length exceeds limit" + ); + return None; + } + } + } + + let bytes = resp.bytes().await.ok()?; + if bytes.len() as u64 > FILE_MAX_DOWNLOAD { + warn!( + file_id, + size = bytes.len(), + "Telegram document exceeds limit" + ); + return None; + } + + let text = String::from_utf8_lossy(&bytes); + use base64::Engine; + let data = base64::engine::general_purpose::STANDARD.encode(text.as_bytes()); + info!(file_id, file_name, size = bytes.len(), "Telegram document download successful"); + + Some(Attachment { + attachment_type: "text_file".into(), + filename: file_name.to_string(), + mime_type: "text/plain".into(), + data, + size: bytes.len() as u64, + }) +} diff --git a/gateway/src/main.rs b/gateway/src/main.rs index 3df4ab1a..1e253977 100644 --- a/gateway/src/main.rs +++ b/gateway/src/main.rs @@ -1,4 +1,5 @@ mod adapters; +mod media; mod schema; use anyhow::Result; @@ -59,8 +60,11 @@ pub struct AppState { /// the first client to `remove()` a token wins the free Reply API call; /// other clients for the same event naturally fall back to Push API. pub reply_token_cache: ReplyTokenCache, + /// Shared HTTP client for media downloads and API calls + pub client: reqwest::Client, } + // --- WebSocket handler (OAB connects here) --- async fn ws_handler( @@ -105,7 +109,7 @@ async fn handle_oab_connection(state: Arc, socket: axum::extract::ws:: let reaction_state: Arc>>> = Arc::new(Mutex::new(HashMap::new())); let recv_task = tokio::spawn(async move { - let client = reqwest::Client::new(); + let client = &state_for_recv.client; while let Some(Ok(msg)) = ws_rx.next().await { if let Message::Text(text) = msg { match serde_json::from_str::(&*text) { @@ -159,7 +163,12 @@ async fn handle_oab_connection(state: Arc, socket: axum::extract::ws:: } "feishu" => { if let Some(ref feishu) = state_for_recv.feishu { - adapters::feishu::handle_reply(&reply, feishu, &state_for_recv.event_tx).await; + adapters::feishu::handle_reply( + &reply, + feishu, + &state_for_recv.event_tx, + ) + .await; } else { warn!("reply for feishu but adapter not configured"); } @@ -306,10 +315,16 @@ async fn main() -> Result<()> { warn!("GOOGLE_CHAT_ACCESS_TOKEN / GOOGLE_CHAT_SA_KEY_JSON not set — replies will be logged but not sent"); } if jwt_verifier.is_none() { - warn!("GOOGLE_CHAT_AUDIENCE not set — webhook requests are NOT authenticated (insecure)"); + warn!( + "GOOGLE_CHAT_AUDIENCE not set — webhook requests are NOT authenticated (insecure)" + ); } - Some(adapters::googlechat::GoogleChatAdapter::new(token_cache, access_token, jwt_verifier)) + Some(adapters::googlechat::GoogleChatAdapter::new( + token_cache, + access_token, + jwt_verifier, + )) } else { None }; @@ -323,6 +338,10 @@ async fn main() -> Result<()> { warn!("no adapters configured — set TELEGRAM_BOT_TOKEN, LINE_CHANNEL_ACCESS_TOKEN, TEAMS_APP_ID + TEAMS_APP_SECRET, FEISHU_APP_ID + FEISHU_APP_SECRET, and/or GOOGLE_CHAT_ENABLED=true"); } + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build()?; + let state = Arc::new(AppState { telegram_bot_token, telegram_secret_token, @@ -335,6 +354,7 @@ async fn main() -> Result<()> { ws_token, event_tx, reply_token_cache, + client, }); // Background task: sweep expired reply tokens every REPLY_TOKEN_TTL_SECS @@ -390,7 +410,13 @@ async fn main() -> Result<()> { let (feishu_shutdown_tx, feishu_shutdown_rx) = tokio::sync::watch::channel(false); if feishu_ws_mode { if let Some(ref feishu) = state.feishu { - match adapters::feishu::start_websocket(feishu, state.event_tx.clone(), feishu_shutdown_rx).await { + match adapters::feishu::start_websocket( + feishu, + state.event_tx.clone(), + feishu_shutdown_rx, + ) + .await + { Ok(_handle) => info!("feishu websocket task spawned"), Err(e) => tracing::error!(err = %e, "feishu websocket startup failed"), } diff --git a/gateway/src/media.rs b/gateway/src/media.rs new file mode 100644 index 00000000..af3cd345 --- /dev/null +++ b/gateway/src/media.rs @@ -0,0 +1,33 @@ +use image::ImageReader; +use std::io::Cursor; + +pub const IMAGE_MAX_DIMENSION_PX: u32 = 1200; +pub const IMAGE_JPEG_QUALITY: u8 = 75; +pub const IMAGE_MAX_DOWNLOAD: u64 = 10 * 1024 * 1024; // 10 MB +pub const FILE_MAX_DOWNLOAD: u64 = 512 * 1024; // 512 KB +pub const AUDIO_MAX_DOWNLOAD: u64 = 20 * 1024 * 1024; // 20 MB + +/// Resize image so longest side <= 1200px, then encode as JPEG. +/// GIFs are passed through unchanged to preserve animation. +pub fn resize_and_compress(raw: &[u8]) -> Result<(Vec, String), image::ImageError> { + let reader = ImageReader::new(Cursor::new(raw)).with_guessed_format()?; + let format = reader.format(); + if format == Some(image::ImageFormat::Gif) { + return Ok((raw.to_vec(), "image/gif".to_string())); + } + let img = reader.decode()?; + let (w, h) = (img.width(), img.height()); + let img = if w > IMAGE_MAX_DIMENSION_PX || h > IMAGE_MAX_DIMENSION_PX { + let max_side = std::cmp::max(w, h); + let ratio = f64::from(IMAGE_MAX_DIMENSION_PX) / f64::from(max_side); + let new_w = (f64::from(w) * ratio) as u32; + let new_h = (f64::from(h) * ratio) as u32; + img.resize(new_w, new_h, image::imageops::FilterType::Lanczos3) + } else { + img + }; + let mut buf = Cursor::new(Vec::new()); + let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, IMAGE_JPEG_QUALITY); + img.write_with_encoder(encoder)?; + Ok((buf.into_inner(), "image/jpeg".to_string())) +} diff --git a/src/gateway.rs b/src/gateway.rs index 8aed6aab..5cc50d22 100644 --- a/src/gateway.rs +++ b/src/gateway.rs @@ -487,6 +487,7 @@ pub struct GatewayParams { pub allow_all_users: bool, pub allowed_users: Vec, pub streaming: bool, + pub stt: crate::config::SttConfig, } pub async fn run_gateway_adapter( @@ -505,6 +506,7 @@ pub async fn run_gateway_adapter( let allow_all_users = params.allow_all_users; let allowed_users = params.allowed_users; let streaming = params.streaming; + let stt = params.stt; let connect_url = match ¶ms.token { Some(token) => { @@ -671,6 +673,23 @@ pub async fn run_gateway_adapter( }); } } + "audio" if stt.enabled => { + use base64::Engine; + if let Ok(bytes) = base64::engine::general_purpose::STANDARD.decode(&att.data) { + let client = reqwest::Client::new(); + if let Some(text) = crate::stt::transcribe( + &client, + &stt, + bytes, + att.filename.clone(), + &att.mime_type + ).await { + extra_blocks.push(ContentBlock::Text { + text: format!("[Audio: {}]", text), + }); + } + } + } _ => {} } } diff --git a/src/main.rs b/src/main.rs index 04a0937f..b512cbe0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -269,6 +269,7 @@ async fn main() -> anyhow::Result<()> { allow_all_users: config::resolve_allow_all(gw_cfg.allow_all_users, &gw_cfg.allowed_users), allowed_users: gw_cfg.allowed_users, streaming: gw_cfg.streaming, + stt: cfg.stt.clone(), }; let gw_router = router.clone(); Some(tokio::spawn(async move {