Skip to content
118 changes: 82 additions & 36 deletions src/agent/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2451,26 +2451,87 @@ async fn download_attachments(
parts
}

/// Download raw bytes from an attachment URL, including auth if present.
///
/// When `auth_header` is set (Slack), uses a no-redirect client and manually
/// follows redirects so the `Authorization` header isn't silently stripped on
/// cross-origin redirects. For public URLs (Discord/Telegram), uses a plain GET.
async fn download_attachment_bytes(
http: &reqwest::Client,
attachment: &crate::Attachment,
) -> std::result::Result<Vec<u8>, String> {
if attachment.auth_header.is_some() {
download_attachment_bytes_with_auth(attachment).await
} else {
let response = http
.get(&attachment.url)
.send()
.await
.map_err(|e| e.to_string())?;
if !response.status().is_success() {
return Err(format!("HTTP {}", response.status()));
}
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| e.to_string())
}
}

/// Slack-specific download: manually follows redirects to preserve the auth header.
async fn download_attachment_bytes_with_auth(
attachment: &crate::Attachment,
) -> std::result::Result<Vec<u8>, String> {
let client = reqwest::Client::builder()
.redirect(reqwest::redirect::Policy::none())
.timeout(std::time::Duration::from_secs(60))
.build()
.map_err(|e| format!("failed to build HTTP client: {e}"))?;

let auth = attachment.auth_header.as_deref().unwrap_or_default();
let mut url = attachment.url.clone();

for _ in 0..5 {
let response = client
.get(&url)
.header(reqwest::header::AUTHORIZATION, auth)
.send()
.await
.map_err(|e| e.to_string())?;
let status = response.status();

if status.is_redirection() {
if let Some(location) = response.headers().get(reqwest::header::LOCATION) {
url = location.to_str().unwrap_or_default().to_string();
continue;
}
return Err(format!("redirect without Location header ({})", status));
}

if !status.is_success() {
return Err(format!("HTTP {}", status));
}

return response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| e.to_string());
}

Err("too many redirects".into())
}
Comment on lines +2454 to +2524
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🌐 Web query:

Slack url_private redirect behavior cross-origin domains Authorization header

💡 Result:

Slack file.url_private / url_private_download URLs must be fetched with an OAuth token in an Authorization: Bearer … header (token needs files:read).[1]

The gotcha is that hitting url_private often returns an HTTP 3xx redirect to a different host (commonly another Slack-controlled domain such as files.slack.com). Many HTTP stacks intentionally strip Authorization on cross-host redirects to prevent credential leaks—so the redirected request becomes unauthenticated and the download fails (often showing up as 404/redirect loops).

  • Python requests: it strips Authorization when the redirect hostname changes (should_strip_auth).[2]
  • curl/libcurl: by default, custom Authorization: headers are only sent to the original host; you must explicitly allow sending them to redirected hosts (e.g., --location-trusted / CURLOPT_UNRESTRICTED_AUTH).[3]

Practical fixes

  1. Don’t auto-follow redirects. Fetch url_private with redirects disabled, read the Location, then make a second request to that URL adding the Authorization header again only if the redirect target is a Slack domain you trust.
  2. Or, if you control the client behavior, enable “unrestricted auth on redirect” (e.g., curl --location-trusted)—again only if you’ve validated the redirect chain is to Slack-controlled hosts.[3]
  3. If you’re doing this from a browser (cross-origin), prefer a server-side fetch/proxy, since browser redirect/auth-header behavior is constrained for security/privacy reasons.[4]

Sources: Slack file auth requirement[1]; requests redirect auth-stripping logic[2]; curl redirect/auth behavior and --location-trusted guidance[3]; Fetch/redirect security discussion[4].

[1] (docs.slack.dev)
[2] (requests.readthedocs.io)
[3] (curl.se)
[4] (github.com)

Citations:


🌐 Web query:

HTTP redirect Location relative URL RFC 7231 3986

💡 Result:

Location in HTTP redirects may be a relative reference.

  • RFC 7231 defines the header as: Location = URI-reference (i.e., a URI reference, not necessarily an absolute URI). If the value is a relative reference, the client computes the final target by resolving it against the effective request URI, using RFC 3986’s rules. [1]
  • RFC 3986 defines what a relative reference is and states the target URI is obtained by applying the reference resolution algorithm (Section 5, especially 5.2 “Relative Resolution”). [2]

So: a redirect like Location: /new-path is valid, and the client resolves it relative to the request URL per RFC 3986. [1][2]

Sources: [1] RFC 7231 (HTTP/1.1 Semantics), Section 7.1.2 Location; [2] RFC 3986 (URI Generic Syntax), Sections 4.2 and 5.2.


🏁 Script executed:

# Check if there are any other uses of redirect handling in the codebase
rg "redirect|Location" --type rs -i -A 2 -B 2 | head -100

Repository: spacedriveapp/spacebot

Length of output: 92


🏁 Script executed:

# Check reqwest documentation/examples for redirect handling best practices
rg "reqwest.*redirect|Policy::none" --type rs -A 3 -B 1 | head -80

Repository: spacedriveapp/spacebot

Length of output: 92


🏁 Script executed:

rg "download_attachment_bytes_with_auth" -A 40 src/agent/channel.rs

Repository: spacedriveapp/spacebot

Length of output: 1963


🏁 Script executed:

# Search for any host validation or auth-stripping logic in the file
rg "host|trust|domain|auth.*redirect" src/agent/channel.rs -i -B 2 -A 2

Repository: spacedriveapp/spacebot

Length of output: 382


Fix critical Authorization header leak on Slack redirect chains and validate Location header parsing.

The code forwards the Authorization header to every redirect target without host validation. Slack's url_private URLs redirect to different hosts (e.g., files.slack.com); standard HTTP clients intentionally strip Authorization on cross-host redirects to prevent credential leaks. The current implementation bypasses this protection and sends credentials to untrusted hosts.

Additionally, location.to_str().unwrap_or_default() silently discards UTF-8 decoding errors, and relative Location header values (valid per RFC 7231/3986) are not resolved against the request URL.

Required fixes:

  1. Parse the base URL using reqwest::Url::parse() to enable proper relative URL resolution via .join()
  2. Validate the redirect target host against the original host before forwarding the Authorization header
  3. Use .map_err() on header parsing instead of unwrap_or_default()
  4. Add logging for redirect attempts to improve visibility

Failing to validate the redirect host will leak OAuth tokens to attacker-controlled domains if a Slack URL is compromised or during a network attack.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/agent/channel.rs` around lines 2454 - 2524,
download_attachment_bytes_with_auth currently forwards the Authorization header
to every redirect and silently drops Location header errors; fix it by parsing
the original attachment.url into a reqwest::Url (use Url::parse) and on each
redirect resolve the Location via url.join(location_str).map_err(...) instead of
unwrap_or_default, then compare the redirect target's host (new_url.host_str())
to the original_url.host_str() and only include the Authorization header on
requests where the host matches; if the host differs, send the GET without AUTH.
Also add logging calls around each redirect attempt (include status and target
URL) and keep the existing redirect limit behavior in
download_attachment_bytes_with_auth; update download_attachment_bytes to still
call the auth-specific path only when attachment.auth_header.is_some().


/// Download an image attachment and encode it as base64 for the LLM.
async fn download_image_attachment(
http: &reqwest::Client,
attachment: &crate::Attachment,
) -> UserContent {
let response = match http.get(&attachment.url).send().await {
Ok(r) => r,
Err(error) => {
tracing::warn!(%error, filename = %attachment.filename, "failed to download image");
return UserContent::text(format!(
"[Failed to download image: {}]",
attachment.filename
));
}
};

let bytes = match response.bytes().await {
let bytes = match download_attachment_bytes(http, attachment).await {
Ok(b) => b,
Err(error) => {
tracing::warn!(%error, filename = %attachment.filename, "failed to read image bytes");
tracing::warn!(%error, filename = %attachment.filename, "failed to download image");
return UserContent::text(format!(
"[Failed to download image: {}]",
attachment.filename
Expand Down Expand Up @@ -2498,21 +2559,10 @@ async fn transcribe_audio_attachment(
http: &reqwest::Client,
attachment: &crate::Attachment,
) -> UserContent {
let response = match http.get(&attachment.url).send().await {
Ok(r) => r,
Err(error) => {
tracing::warn!(%error, filename = %attachment.filename, "failed to download audio");
return UserContent::text(format!(
"[Failed to download audio: {}]",
attachment.filename
));
}
};

let bytes = match response.bytes().await {
let bytes = match download_attachment_bytes(http, attachment).await {
Ok(b) => b,
Err(error) => {
tracing::warn!(%error, filename = %attachment.filename, "failed to read audio bytes");
tracing::warn!(%error, filename = %attachment.filename, "failed to download audio");
return UserContent::text(format!(
"[Failed to download audio: {}]",
attachment.filename
Expand Down Expand Up @@ -2594,7 +2644,9 @@ async fn transcribe_audio_attachment(
"temperature": 0
});

let response = match http
let response = match deps
.llm_manager
.http_client()
.post(&endpoint)
.header("authorization", format!("Bearer {}", provider.api_key))
.header("content-type", "application/json")
Expand Down Expand Up @@ -2722,8 +2774,8 @@ async fn download_text_attachment(
http: &reqwest::Client,
attachment: &crate::Attachment,
) -> UserContent {
let response = match http.get(&attachment.url).send().await {
Ok(r) => r,
let bytes = match download_attachment_bytes(http, attachment).await {
Ok(b) => b,
Err(error) => {
tracing::warn!(%error, filename = %attachment.filename, "failed to download text file");
return UserContent::text(format!(
Expand All @@ -2733,13 +2785,7 @@ async fn download_text_attachment(
}
};

let content = match response.text().await {
Ok(c) => c,
Err(error) => {
tracing::warn!(%error, filename = %attachment.filename, "failed to read text file");
return UserContent::text(format!("[Failed to read file: {}]", attachment.filename));
}
};
let content = String::from_utf8_lossy(&bytes).into_owned();

// Truncate very large files to avoid blowing up context
let truncated = if content.len() > 50_000 {
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ pub struct Attachment {
pub mime_type: String,
pub url: String,
pub size_bytes: Option<u64>,
/// Optional auth header value for private URLs (e.g. Slack's `url_private`).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub auth_header: Option<String>,
Comment on lines +314 to +316
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Find uses of auth_header and related serialization patterns
rg -n "auth_header" -g'*.rs' -A 3 -B 3

Repository: spacedriveapp/spacebot

Length of output: 7152


🏁 Script executed:

#!/bin/bash
# Find Attachment struct definition and related serialization
rg -n "struct Attachment|impl.*Attachment|#\[derive.*Serialize" -g'*.rs' -A 5

Repository: spacedriveapp/spacebot

Length of output: 50378


🏁 Script executed:

#!/bin/bash
# Check for SpacebotHook and on_tool_result implementation
rg -n "SpacebotHook|on_tool_result|on_completion_response" -g'*.rs' -A 10

Repository: spacedriveapp/spacebot

Length of output: 13524


🏁 Script executed:

#!/bin/bash
# Search for actual persistence/logging of messages or attachments
rg -n "save_conversation|persist_message|log_.*message|to_string|to_json|format!" -g'*.rs' | head -50

Repository: spacedriveapp/spacebot

Length of output: 4086


🏁 Script executed:

#!/bin/bash
# Read the full on_tool_result implementation in SpacebotHook
sed -n '220,260p' src/hooks/spacebot.rs

Repository: spacedriveapp/spacebot

Length of output: 1749


🏁 Script executed:

#!/bin/bash
# Search for where InboundMessage or Attachment are actually persisted or logged
rg -n "InboundMessage|Attachment" -g'*.rs' | rg -i "save|persist|log|serialize|json|store|write|db" | head -30

Repository: spacedriveapp/spacebot

Length of output: 48


🏁 Script executed:

#!/bin/bash
# Check if conversation history includes attachments and whether it's saved to DB
rg -n "conversation.*save|history.*persist|TimelineItem|save_conversation" -g'*.rs' -A 3 | head -50

Repository: spacedriveapp/spacebot

Length of output: 3210


🏁 Script executed:

#!/bin/bash
# Find the scan_for_leaks implementation to see what it detects
rg -n "fn scan_for_leaks|scan_for_leaks" -g'*.rs' -A 20 | head -80

Repository: spacedriveapp/spacebot

Length of output: 4462


🏁 Script executed:

#!/bin/bash
# Check if InboundMessage is ever saved to conversation history or DB
rg -n "InboundMessage" -g'*.rs' | rg -v "use |pub struct|pub enum|fn.*InboundMessage|=>|Message::Text|Message::Media" | head -20

Repository: spacedriveapp/spacebot

Length of output: 1634


🏁 Script executed:

#!/bin/bash
# Search for what gets saved in conversation history - look at Message enum
rg -n "TimelineItem::Message" -g'*.rs' -B 5 -A 5 | head -60

Repository: spacedriveapp/spacebot

Length of output: 1867


🏁 Script executed:

#!/bin/bash
# Check if Attachment is ever returned in API responses or serialized in outbound messages
rg -n "Attachment|auth_header" -g'*.rs' | rg -E "OutboundResponse|api|response|serialize" | head -20

Repository: spacedriveapp/spacebot

Length of output: 462


🏁 Script executed:

#!/bin/bash
# Check what happens with attachments in tool results - can they leak through tool outputs?
rg -n "attachment|media|Attachment" -g'*.rs' | rg -E "to_string|format|json|serde" | head -20

Repository: spacedriveapp/spacebot

Length of output: 462


🏁 Script executed:

#!/bin/bash
# Verify if download_attachment_bytes_with_auth actually uses the Bearer token and whether the result is exposed
sed -n '2475,2530p' src/agent/channel.rs

Repository: spacedriveapp/spacebot

Length of output: 1802


🏁 Script executed:

#!/bin/bash
# Check if Attachment is ever serialized in API responses or output
rg -n "Attachment" -g'*.rs' -A 3 -B 1 | grep -E "(to_json|serde_json|Serialize|api|response)" | head -20

Repository: spacedriveapp/spacebot

Length of output: 128


🏁 Script executed:

#!/bin/bash
# Check if there's any path where download_attachment_bytes result could include the auth header or leak
rg -n "download_attachment_bytes|download_image_attachment" -g'*.rs' -A 10 | head -50

Repository: spacedriveapp/spacebot

Length of output: 3219


🏁 Script executed:

#!/bin/bash
# Check if MessageContent::Media is ever logged or exposed in logs/APIs
rg -n "MessageContent::Media" -g'*.rs' -B 2 -A 5

Repository: spacedriveapp/spacebot

Length of output: 3965


🏁 Script executed:

#!/bin/bash
# Final check: is InboundMessage ever logged or serialized in error/debug paths?
rg -n "tracing::|log::|println|debug|InboundMessage|Attachment" -g'*.rs' | grep -E "tracing::(debug|trace|warn|error).*InboundMessage|format.*InboundMessage" | head -10

Repository: spacedriveapp/spacebot

Length of output: 168


🏁 Script executed:

#!/bin/bash
# Check if there's any serialization of InboundMessage to JSON for logs or APIs
rg -n "serde_json::to_|json!" -g'*.rs' -B 2 -A 2 | grep -E "(InboundMessage|Attachment|message)" | head -15

Repository: spacedriveapp/spacebot

Length of output: 1495


Add #[serde(skip)] to auth_header to prevent accidental credential leakage.

auth_header contains bearer tokens (e.g., Slack's url_private auth), and Attachment derives Serialize/Deserialize. While currently auth_header is not persisted in conversation history, the serializable field with embedded secrets is a risk vector. A single refactor or error path could leak it (e.g., debug logging, serialized error responses, or future feature additions). Leak detection already scans tool results in SpacebotHook.on_tool_result(), but inbound attachment headers bypass this check. Apply #[serde(skip)] to auth_header as a zero-cost defensive measure, or migrate to an internal-only struct that excludes this field from serialization.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/lib.rs` around lines 314 - 316, The Attachment struct's auth_header field
currently uses #[serde(default, skip_serializing_if = "Option::is_none")] but
must never be serialized; update the field-level serde attributes on auth_header
to #[serde(skip)] (remove the existing default/skip_serializing_if), so
Attachment::auth_header (Option<String>) is excluded from all serde
Serialize/Deserialize paths and cannot leak in logs or persisted data; ensure no
other derived serialization relies on that attribute and run tests to confirm
compile/serde behavior.

}

/// Outbound response to messaging platforms.
Expand Down
1 change: 1 addition & 0 deletions src/messaging/discord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ fn extract_content(message: &Message) -> MessageContent {
mime_type: attachment.content_type.clone().unwrap_or_default(),
url: attachment.url.clone(),
size_bytes: Some(attachment.size as u64),
auth_header: None,
})
.collect();

Expand Down
18 changes: 14 additions & 4 deletions src/messaging/slack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,14 @@ async fn handle_message_event(
client: Arc<SlackHyperClient>,
states: SlackClientEventsUserState,
) -> UserCallbackResult<()> {
// Skip message edits / deletes / bot_message subtypes
if msg_event.subtype.is_some() {
// Skip message edits / deletes / bot_message subtypes, but allow file-related
// subtypes so user-uploaded images and documents are processed.
if let Some(ref subtype) = msg_event.subtype
&& !matches!(
subtype,
SlackMessageEventType::FileShare | SlackMessageEventType::FileShared
)
{
return Ok(());
}

Expand Down Expand Up @@ -231,7 +237,7 @@ async fn handle_message_event(
format!("slack:{}:{}", team_id_str, channel_id)
};

let content = extract_message_content(&msg_event.content);
let content = extract_message_content(&msg_event.content, &adapter_state.bot_token);

let (metadata, formatted_author) = build_metadata_and_author(
&team_id_str,
Expand Down Expand Up @@ -1259,7 +1265,10 @@ fn markdown_content(text: impl Into<String>) -> SlackMessageContent {
}

/// Extract `MessageContent` from an optional `SlackMessageContent`.
fn extract_message_content(content: &Option<SlackMessageContent>) -> MessageContent {
fn extract_message_content(
content: &Option<SlackMessageContent>,
bot_token: &str,
) -> MessageContent {
let Some(msg_content) = content else {
return MessageContent::Text(String::new());
};
Expand All @@ -1274,6 +1283,7 @@ fn extract_message_content(content: &Option<SlackMessageContent>) -> MessageCont
mime_type: f.mimetype.as_ref().map(|m| m.0.clone()).unwrap_or_default(),
url: url.to_string(),
size_bytes: None,
auth_header: Some(format!("Bearer {}", bot_token)),
})
})
.collect();
Expand Down
5 changes: 5 additions & 0 deletions src/messaging/telegram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec<Attachment> {
mime_type: "image/jpeg".into(),
url: largest.file.id.to_string(),
size_bytes: Some(largest.file.size as u64),
auth_header: None,
});
}
}
Expand All @@ -683,6 +684,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec<Attachment> {
.unwrap_or_else(|| "application/octet-stream".into()),
url: doc.document.file.id.to_string(),
size_bytes: Some(doc.document.file.size as u64),
auth_header: None,
});
}
MediaKind::Video(video) => {
Expand All @@ -700,6 +702,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec<Attachment> {
.unwrap_or_else(|| "video/mp4".into()),
url: video.video.file.id.to_string(),
size_bytes: Some(video.video.file.size as u64),
auth_header: None,
});
}
MediaKind::Voice(voice) => {
Expand All @@ -713,6 +716,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec<Attachment> {
.unwrap_or_else(|| "audio/ogg".into()),
url: voice.voice.file.id.to_string(),
size_bytes: Some(voice.voice.file.size as u64),
auth_header: None,
});
}
MediaKind::Audio(audio) => {
Expand All @@ -730,6 +734,7 @@ fn extract_attachments(message: &teloxide::types::Message) -> Vec<Attachment> {
.unwrap_or_else(|| "audio/mpeg".into()),
url: audio.audio.file.id.to_string(),
size_bytes: Some(audio.audio.file.size as u64),
auth_header: None,
});
}
_ => {}
Expand Down