diff --git a/src/agent/channel.rs b/src/agent/channel.rs index c30430585..fb0a3bda8 100644 --- a/src/agent/channel.rs +++ b/src/agent/channel.rs @@ -2451,26 +2451,87 @@ async fn download_attachments( parts } +/// Download raw bytes from an attachment URL, including auth if present. +/// +/// When `auth_header` is set (Slack), uses a no-redirect client and manually +/// follows redirects so the `Authorization` header isn't silently stripped on +/// cross-origin redirects. For public URLs (Discord/Telegram), uses a plain GET. +async fn download_attachment_bytes( + http: &reqwest::Client, + attachment: &crate::Attachment, +) -> std::result::Result, String> { + if attachment.auth_header.is_some() { + download_attachment_bytes_with_auth(attachment).await + } else { + let response = http + .get(&attachment.url) + .send() + .await + .map_err(|e| e.to_string())?; + if !response.status().is_success() { + return Err(format!("HTTP {}", response.status())); + } + response + .bytes() + .await + .map(|b| b.to_vec()) + .map_err(|e| e.to_string()) + } +} + +/// Slack-specific download: manually follows redirects to preserve the auth header. +async fn download_attachment_bytes_with_auth( + attachment: &crate::Attachment, +) -> std::result::Result, String> { + let client = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .timeout(std::time::Duration::from_secs(60)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + let auth = attachment.auth_header.as_deref().unwrap_or_default(); + let mut url = attachment.url.clone(); + + for _ in 0..5 { + let response = client + .get(&url) + .header(reqwest::header::AUTHORIZATION, auth) + .send() + .await + .map_err(|e| e.to_string())?; + let status = response.status(); + + if status.is_redirection() { + if let Some(location) = response.headers().get(reqwest::header::LOCATION) { + url = location.to_str().unwrap_or_default().to_string(); + continue; + } + return Err(format!("redirect without Location header ({})", status)); + } + + if !status.is_success() { + return Err(format!("HTTP {}", status)); + } + + return response + .bytes() + .await + .map(|b| b.to_vec()) + .map_err(|e| e.to_string()); + } + + Err("too many redirects".into()) +} + /// Download an image attachment and encode it as base64 for the LLM. async fn download_image_attachment( http: &reqwest::Client, attachment: &crate::Attachment, ) -> UserContent { - let response = match http.get(&attachment.url).send().await { - Ok(r) => r, - Err(error) => { - tracing::warn!(%error, filename = %attachment.filename, "failed to download image"); - return UserContent::text(format!( - "[Failed to download image: {}]", - attachment.filename - )); - } - }; - - let bytes = match response.bytes().await { + let bytes = match download_attachment_bytes(http, attachment).await { Ok(b) => b, Err(error) => { - tracing::warn!(%error, filename = %attachment.filename, "failed to read image bytes"); + tracing::warn!(%error, filename = %attachment.filename, "failed to download image"); return UserContent::text(format!( "[Failed to download image: {}]", attachment.filename @@ -2498,21 +2559,10 @@ async fn transcribe_audio_attachment( http: &reqwest::Client, attachment: &crate::Attachment, ) -> UserContent { - let response = match http.get(&attachment.url).send().await { - Ok(r) => r, - Err(error) => { - tracing::warn!(%error, filename = %attachment.filename, "failed to download audio"); - return UserContent::text(format!( - "[Failed to download audio: {}]", - attachment.filename - )); - } - }; - - let bytes = match response.bytes().await { + let bytes = match download_attachment_bytes(http, attachment).await { Ok(b) => b, Err(error) => { - tracing::warn!(%error, filename = %attachment.filename, "failed to read audio bytes"); + tracing::warn!(%error, filename = %attachment.filename, "failed to download audio"); return UserContent::text(format!( "[Failed to download audio: {}]", attachment.filename @@ -2594,7 +2644,9 @@ async fn transcribe_audio_attachment( "temperature": 0 }); - let response = match http + let response = match deps + .llm_manager + .http_client() .post(&endpoint) .header("authorization", format!("Bearer {}", provider.api_key)) .header("content-type", "application/json") @@ -2722,8 +2774,8 @@ async fn download_text_attachment( http: &reqwest::Client, attachment: &crate::Attachment, ) -> UserContent { - let response = match http.get(&attachment.url).send().await { - Ok(r) => r, + let bytes = match download_attachment_bytes(http, attachment).await { + Ok(b) => b, Err(error) => { tracing::warn!(%error, filename = %attachment.filename, "failed to download text file"); return UserContent::text(format!( @@ -2733,13 +2785,7 @@ async fn download_text_attachment( } }; - let content = match response.text().await { - Ok(c) => c, - Err(error) => { - tracing::warn!(%error, filename = %attachment.filename, "failed to read text file"); - return UserContent::text(format!("[Failed to read file: {}]", attachment.filename)); - } - }; + let content = String::from_utf8_lossy(&bytes).into_owned(); // Truncate very large files to avoid blowing up context let truncated = if content.len() > 50_000 { diff --git a/src/lib.rs b/src/lib.rs index 0e28ff0a3..609b01bbe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -311,6 +311,9 @@ pub struct Attachment { pub mime_type: String, pub url: String, pub size_bytes: Option, + /// Optional auth header value for private URLs (e.g. Slack's `url_private`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub auth_header: Option, } /// Outbound response to messaging platforms. diff --git a/src/messaging/discord.rs b/src/messaging/discord.rs index ab433dd3a..5857c3eba 100644 --- a/src/messaging/discord.rs +++ b/src/messaging/discord.rs @@ -787,6 +787,7 @@ fn extract_content(message: &Message) -> MessageContent { mime_type: attachment.content_type.clone().unwrap_or_default(), url: attachment.url.clone(), size_bytes: Some(attachment.size as u64), + auth_header: None, }) .collect(); diff --git a/src/messaging/slack.rs b/src/messaging/slack.rs index 2aa1e3cbf..e41b2995a 100644 --- a/src/messaging/slack.rs +++ b/src/messaging/slack.rs @@ -150,8 +150,14 @@ async fn handle_message_event( client: Arc, states: SlackClientEventsUserState, ) -> UserCallbackResult<()> { - // Skip message edits / deletes / bot_message subtypes - if msg_event.subtype.is_some() { + // Skip message edits / deletes / bot_message subtypes, but allow file-related + // subtypes so user-uploaded images and documents are processed. + if let Some(ref subtype) = msg_event.subtype + && !matches!( + subtype, + SlackMessageEventType::FileShare | SlackMessageEventType::FileShared + ) + { return Ok(()); } @@ -231,7 +237,7 @@ async fn handle_message_event( format!("slack:{}:{}", team_id_str, channel_id) }; - let content = extract_message_content(&msg_event.content); + let content = extract_message_content(&msg_event.content, &adapter_state.bot_token); let (metadata, formatted_author) = build_metadata_and_author( &team_id_str, @@ -1259,7 +1265,10 @@ fn markdown_content(text: impl Into) -> SlackMessageContent { } /// Extract `MessageContent` from an optional `SlackMessageContent`. -fn extract_message_content(content: &Option) -> MessageContent { +fn extract_message_content( + content: &Option, + bot_token: &str, +) -> MessageContent { let Some(msg_content) = content else { return MessageContent::Text(String::new()); }; @@ -1274,6 +1283,7 @@ fn extract_message_content(content: &Option) -> MessageCont mime_type: f.mimetype.as_ref().map(|m| m.0.clone()).unwrap_or_default(), url: url.to_string(), size_bytes: None, + auth_header: Some(format!("Bearer {}", bot_token)), }) }) .collect(); diff --git a/src/messaging/telegram.rs b/src/messaging/telegram.rs index eab7994c2..f477f0b7c 100644 --- a/src/messaging/telegram.rs +++ b/src/messaging/telegram.rs @@ -665,6 +665,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec { mime_type: "image/jpeg".into(), url: largest.file.id.to_string(), size_bytes: Some(largest.file.size as u64), + auth_header: None, }); } } @@ -683,6 +684,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec { .unwrap_or_else(|| "application/octet-stream".into()), url: doc.document.file.id.to_string(), size_bytes: Some(doc.document.file.size as u64), + auth_header: None, }); } MediaKind::Video(video) => { @@ -700,6 +702,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec { .unwrap_or_else(|| "video/mp4".into()), url: video.video.file.id.to_string(), size_bytes: Some(video.video.file.size as u64), + auth_header: None, }); } MediaKind::Voice(voice) => { @@ -713,6 +716,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec { .unwrap_or_else(|| "audio/ogg".into()), url: voice.voice.file.id.to_string(), size_bytes: Some(voice.voice.file.size as u64), + auth_header: None, }); } MediaKind::Audio(audio) => { @@ -730,6 +734,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec { .unwrap_or_else(|| "audio/mpeg".into()), url: audio.audio.file.id.to_string(), size_bytes: Some(audio.audio.file.size as u64), + auth_header: None, }); } _ => {}