From 389876f174ac048c6b2ceed9aa3af91a82aa8b1f Mon Sep 17 00:00:00 2001 From: Daniel Cadenas Date: Tue, 15 Apr 2025 12:08:43 +0200 Subject: [PATCH] S3 storage backend integration --- Cargo.lock | 52 ----- Cargo.toml | 23 +- Dockerfile | 10 +- Dockerfile.arm64 | 7 +- config.yaml | 21 +- docker-compose.yml | 6 +- scripts/blossom_media_upload.sh | 164 ++++++++++++++ src/bin/main.rs | 8 +- src/bin/storage-manage.rs | 368 -------------------------------- src/lib.rs | 1 - src/settings.rs | 27 ++- src/spaces.rs | 65 ++++-- 12 files changed, 259 insertions(+), 493 deletions(-) create mode 100644 scripts/blossom_media_upload.sh delete mode 100644 src/bin/storage-manage.rs diff --git a/Cargo.lock b/Cargo.lock index 1455e94..cc3d1c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1314,19 +1314,6 @@ dependencies = [ "yaml-rust2", ] -[[package]] -name = "console" -version = "0.15.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" -dependencies = [ - "encode_unicode", - "libc", - "once_cell", - "unicode-width", - "windows-sys 0.59.0", -] - [[package]] name = "const-oid" version = "0.9.6" @@ -1743,12 +1730,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - [[package]] name = "encoding_rs" version = "0.8.35" @@ -2985,19 +2966,6 @@ dependencies = [ "serde", ] -[[package]] -name = "indicatif" -version = "0.17.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" -dependencies = [ - "console", - "number_prefix", - "portable-atomic", - "unicode-width", - "web-time", -] - [[package]] name = "infer" version = "0.19.0" @@ -3644,12 +3612,6 @@ dependencies = [ "syn", ] -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - [[package]] name = "object" version = "0.36.7" @@ -3966,12 +3928,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portable-atomic" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" - [[package]] name = "powerfmt" version = "0.2.0" @@ -4616,7 +4572,6 @@ dependencies = [ "http-body 1.0.1", "http-range-header", "image", - "indicatif", "infer", "libc", "log", @@ -4635,7 +4590,6 @@ dependencies = [ "tokio", "tokio-util", "uuid", - "walkdir", ] [[package]] @@ -6065,12 +6019,6 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" -[[package]] -name = "unicode-width" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" - [[package]] name = "unicode-xid" version = "0.2.6" diff --git a/Cargo.toml b/Cargo.toml index e1bb721..67a8faf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,16 +7,11 @@ edition = "2021" name = "route96" path = "src/bin/main.rs" -[[bin]] -name = "storage-manage" -path = "src/bin/storage-manage.rs" -required-features = ["storage-manage"] - [lib] name = "route96" [features] -default = ["media-compression", "sqlx-postgres", "s3-storage", "ranges"] +default = ["media-compression", "sqlx-postgres"] media-compression = [ "dep:ffmpeg-the-third", "dep:infer", @@ -28,14 +23,10 @@ media-compression = [ "dep:candle-transformers", ] labels = ["dep:candle-core", "dep:candle-nn", "dep:candle-transformers"] -blossom = [] torrent-v2 = [] react-ui = [] -storage-manage = ["dep:walkdir", "dep:indicatif"] -dev = ["blossom", "ranges"] +dev = [] void-cat-redirects = [] -s3-storage = ["dep:aws-config", "dep:aws-sdk-s3", "dep:http-body-0-4"] -ranges = ["dep:http-range-header"] [dependencies] log = "0.4.27" @@ -67,17 +58,15 @@ candle-core = { git = "https://github.com/huggingface/candle.git", tag = "0.8.4" candle-nn = { git = "https://github.com/huggingface/candle.git", tag = "0.8.4", optional = true } candle-transformers = { git = "https://github.com/huggingface/candle.git", tag = "0.8.4", optional = true } sqlx-postgres = { version = "0.8.3", optional = true, features = ["chrono", "uuid"] } -http-range-header = { version = "0.4.2", optional = true } -walkdir = { version = "2.5.0", optional = true } -indicatif = { version = "0.17.11", optional = true } +http-range-header = { version = "0.4.2" } async-trait = "0.1.80" futures = "0.3" # AWS SDK Dependencies for S3/Spaces -aws-config = { version = "1.1.8", optional = true } -aws-sdk-s3 = { version = "1.19.0", optional = true } -http-body-0-4 = { version = "1.0.0", package = "http-body", optional = true } +aws-config = { version = "1.1.8" } +aws-sdk-s3 = { version = "1.19.0" } +http-body-0-4 = { version = "1.0.0", package = "http-body" } serde_json = "1.0.117" image = { version = "0.25.1", optional = true } diff --git a/Dockerfile b/Dockerfile index 194daa8..b0dbd2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,9 +34,15 @@ RUN git clone --single-branch --branch release/7.1 https://github.com/ffmpeg/FFm RUN rm Cargo.lock # RUN cargo tree -i half | cat RUN mkdir -p ~/.cargo && \ - echo '[net]' > ~/.cargo/config.toml && \ + echo '[build]' > ~/.cargo/config.toml && \ + echo 'rustflags = ["-C", "target-cpu=native"]' >> ~/.cargo/config.toml && \ + echo '[net]' >> ~/.cargo/config.toml && \ echo 'git-fetch-with-cli = true' >> ~/.cargo/config.toml -RUN cargo install --path . --root /app/build --features "blossom,ranges" + +# Perform the installation, enabling only necessary features +# Remove s3-storage and ranges from features +# Ensure default features (like media-compression) are included +RUN cargo install --path . --root /app/build FROM node:bookworm AS ui_builder WORKDIR /app/src diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index 87e2de9..ccf0519 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -57,9 +57,8 @@ RUN wget https://ffmpeg.org/releases/ffmpeg-7.1.tar.xz && \ COPY src src COPY migrations migrations -# Clean potential stale artifacts and Build with hardcoded features using cargo build -RUN cargo clean && \ - cargo build --release --no-default-features --features "blossom,ranges" +# Build the binary without default features, enabling only s3-storage and ranges +RUN cargo build --release FROM --platform=linux/arm64 node:bookworm AS ui_builder WORKDIR /app/src @@ -81,7 +80,7 @@ LABEL org.opencontainers.image.licenses="MIT" LABEL org.opencontainers.image.authors="Kieran" WORKDIR /app RUN apt update && \ - apt install -y libx264-164 libwebp7 libvpx7 ca-certificates gosu && \ + apt install -y libx264-164 libwebp7 libvpx7 ca-certificates gosu libxcb1 libxcb-shm0 && \ rm -rf /var/lib/apt/lists/* RUN groupadd -r appgroup && useradd --no-log-init -r -g appgroup appuser diff --git a/config.yaml b/config.yaml index ed91b2b..e3dfd7e 100644 --- a/config.yaml +++ b/config.yaml @@ -4,7 +4,7 @@ listen: "0.0.0.0:8000" # Specifies which storage backend to use ("FileSystem" or "S3") # Defaults to FileSystem if not specified. # storage_type: FileSystem -# storage_type: S3 # Requires the 's3-storage' feature to be enabled during build +storage_type: S3 # Database connection string (PostgreSQL) database: "postgres://postgres:root@db:5432/route96" @@ -12,19 +12,20 @@ database: "postgres://postgres:root@db:5432/route96" # --- Storage Backend Settings --- # Settings for the FileSystem backend (used if storage_type is FileSystem or omitted) -filesystem: +#filesystem: # Directory to store uploads - storage_dir: "/app/data" + #storage_dir: "/app/data" # Settings for the S3 backend (used if storage_type is S3) # Requires the 's3-storage' feature flag during compilation. -# s3: -# region: "us-east-1" # Optional, defaults to provider default -# endpoint_url: "https://nyc3.digitaloceanspaces.com" # Optional, use for non-AWS S3-compatible storage like DigitalOcean Spaces -# bucket_name: "your-bucket-name" -# access_key_id: "YOUR_ACCESS_KEY" -# secret_access_key: "YOUR_SECRET_KEY" -# force_path_style: false # Optional, default is false. Set to true for some S3-compatible services like MinIO +s3: + region: "us-east-1" # Optional, defaults to provider default + endpoint_url: "https://nyc3.digitaloceanspaces.com" # Optional, use for non-AWS S3-compatible storage like DigitalOcean Spaces + bucket_name: "blossom-test" + access_key_id: "DO801BJEC2FP8YC8B7XD" + # Secret key is now set via environment variable APP__S3__SECRET_ACCESS_KEY + secret_access_key: "" + force_path_style: false # Optional, default is false. Set to true for some S3-compatible services like MinIO # Maximum support filesize for uploading max_upload_bytes: 5242880 # 5 MiB default diff --git a/docker-compose.yml b/docker-compose.yml index 1695d28..d682528 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,15 +26,13 @@ services: build: context: . dockerfile: Dockerfile.arm64 - args: - # Only including essential features: - # - ranges: For better file serving (partial downloads/streaming) - FEATURES: "ranges" environment: # Config values overridden by environment variables - keeping only essential/sensitive ones APP__DATABASE: postgres://postgres:root@db:5432/route96?sslmode=require APP__FILESYSTEM__STORAGE_DIR: /app/data APP__PUBLIC_URL: http://localhost:8000 + # S3 credentials + APP__S3__SECRET_ACCESS_KEY: ${BLOSSOM_DEV_SECRET} # Nested values with double underscores APP__NIP29_RELAY__URL: ws://host.docker.internal:8080 # Test key (pubkey: 385c3a6ec0b9d57a4330dbd6284989be5bd00e41c535f9ca39b6ae7c521b81cd) - replace if needed diff --git a/scripts/blossom_media_upload.sh b/scripts/blossom_media_upload.sh new file mode 100644 index 0000000..170ba66 --- /dev/null +++ b/scripts/blossom_media_upload.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# Blossom Media Upload Script (for Optimization/Processing) +# This script uploads a media file to a Blossom server's /media endpoint +# using NIP-24242 authentication. The server is expected to process/optimize this file. + +# Function to display command and wait for user input +pause_and_run() { + local command="$1" + echo "====================================================" + echo "COMMAND TO RUN:" + echo "$command" + echo "====================================================" + read -p "Press any key to continue..." -n1 -s + echo "" + eval "$command" +} + +# Check for required arguments +if [ "$#" -lt 4 ]; then + echo "Usage: $0 " + echo "Example: $0 video.mp4 http://example.com your-group-id npub1..." + exit 1 +fi + +# Assign arguments to variables +FILE_PATH="$1" +SERVER_URL="$2" +GROUP_ID="$3" +SECRET_KEY="$4" + +# Ensure SERVER_URL does not end with a trailing slash +SERVER_URL=$(echo "$SERVER_URL" | sed 's#/$##') + +# Check if file exists +if [ ! -f "$FILE_PATH" ]; then + echo "Error: File '$FILE_PATH' not found" + exit 1 +fi + +# Check if nak is installed +if ! command -v nak &> /dev/null; then + echo "Error: 'nak' command not found. Please install it first." + echo "You can install it with: cargo install nak" + exit 1 +fi + +# Get file information +FILE_HASH=$(sha256sum "$FILE_PATH" | cut -d ' ' -f 1) +FILE_SIZE=$(stat -f%z "$FILE_PATH" 2>/dev/null || stat -c%s "$FILE_PATH") +FILE_TYPE=$(file --mime-type -b "$FILE_PATH") + +# Current time and expiration (30 seconds from now for more reliability) +NOW=$(date +%s) +EXPIRATION=$((NOW + 30)) + +# Generate the authentication event for the /media endpoint +# Note: t=media is used here instead of t=upload +BASE64_AUTH_EVENT=$(nak event \ + --content='Upload media for processing' \ + --kind 24242 \ + -t t='media' \ + -t expiration="$EXPIRATION" \ + -t x="$FILE_HASH" \ + -t h="$GROUP_ID" \ + --sec "$SECRET_KEY" | base64) + +# Upload the file to the /media endpoint + +# Create temporary files for response body and headers +TEMP_RESPONSE_FILE=$(mktemp) +TEMP_HEADERS_FILE=$(mktemp) + +# If FILE_TYPE is text/plain for a markdown file, correct it (though unlikely for media endpoint) +if [[ "$FILE_PATH" == *.md ]] && [[ "$FILE_TYPE" == "text/plain" ]]; then + FILE_TYPE="text/markdown" +fi + +# Perform the upload and capture HTTP status code, saving headers and body +CURL_COMMAND="curl -s \\ + -D \"$TEMP_HEADERS_FILE\" \\ + -o \"$TEMP_RESPONSE_FILE\" \\ + \"${SERVER_URL}/media\" \\ + -X PUT \\ + -H \"Content-Type: $FILE_TYPE\" \\ + -H \"X-Content-Type: $FILE_TYPE\" \\ + -H \"X-SHA-256: $FILE_HASH\" \\ + -H \"X-Content-Length: $FILE_SIZE\" \\ + -H \"Authorization: Nostr $BASE64_AUTH_EVENT\" \\ + --data-binary @\"$FILE_PATH\"" + +pause_and_run "$CURL_COMMAND" + +# Get the HTTP code in a macOS-compatible way +HTTP_CODE=$(head -1 "$TEMP_HEADERS_FILE" | cut -d' ' -f2) + +# Read the response body from the temp file +RESPONSE=$(cat "$TEMP_RESPONSE_FILE") + +# Extract reason header if present +REASON="" +if grep -q "X-Reason:" "$TEMP_HEADERS_FILE"; then + REASON=$(grep "X-Reason:" "$TEMP_HEADERS_FILE" | sed 's/X-Reason: //' | tr -d '\r') +elif grep -q "x-reason:" "$TEMP_HEADERS_FILE"; then + REASON=$(grep "x-reason:" "$TEMP_HEADERS_FILE" | sed 's/x-reason: //' | tr -d '\r') +fi + +# Check for error status codes +# Note: The specific "already exists" checks from upload might not apply +# if /media always returns a new blob descriptor. Kept for robustness. +if [ -z "$HTTP_CODE" ] || [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then + echo "Error: Server returned HTTP $HTTP_CODE" + echo "Server response:" + echo "$RESPONSE" | jq . 2>/dev/null || echo "$RESPONSE" + + # Try to extract error message if it exists + if command -v jq &> /dev/null; then + ERROR_MSG=$(echo "$RESPONSE" | jq -r '.message' 2>/dev/null) + if [ "$ERROR_MSG" != "null" ] && [ "$ERROR_MSG" != "" ]; then + echo "Error message: $ERROR_MSG" + fi + fi + + # Display reason if we haven't already + if [ -n "$REASON" ]; then + echo "Reason: $REASON" + fi + + # Display full headers for debugging + echo "Full response headers:" + cat "$TEMP_HEADERS_FILE" + + # Clean up temp files + rm -f "$TEMP_RESPONSE_FILE" "$TEMP_HEADERS_FILE" + exit 1 +fi + +echo "Media upload successful (HTTP $HTTP_CODE)." +echo "Response:" +echo "$RESPONSE" | jq . 2>/dev/null || echo "$RESPONSE" +echo "---" +echo "Full response headers:" +cat "$TEMP_HEADERS_FILE" +echo "---" + +# Extract and display the URL if the response is JSON +if command -v jq &> /dev/null; then + URL=$(echo "$RESPONSE" | jq -r '.url' 2>/dev/null) + SHA256=$(echo "$RESPONSE" | jq -r '.sha256' 2>/dev/null) + if [ "$URL" != "null" ] && [ "$URL" != "" ]; then + echo "Processed Media URL: $URL" + fi + if [ "$SHA256" != "null" ] && [ "$SHA256" != "" ]; then + echo "Processed Media SHA256: $SHA256" + if [ "$SHA256" != "$FILE_HASH" ]; then + echo "NOTE: Processed SHA256 ($SHA256) differs from original ($FILE_HASH), as expected." + fi + fi +fi + +# Clean up temp files +rm -f "$TEMP_RESPONSE_FILE" "$TEMP_HEADERS_FILE" + +echo "Done." +exit 0 \ No newline at end of file diff --git a/src/bin/main.rs b/src/bin/main.rs index 40fe339..7ef7b60 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -18,8 +18,7 @@ use route96::filesystem::FileStore; use route96::nip29::init_nip29_client; use route96::routes; use route96::routes::{get_blob_route, head_blob, health_check, root}; -use route96::settings::{Settings, StorageBackendType}; -#[cfg(feature = "s3-storage")] +use route96::settings::{Settings, StorageType}; use route96::spaces::SpacesStore; use route96::storage::StorageBackend; use std::env; @@ -71,7 +70,7 @@ async fn main() -> Result<(), Error> { info!("Using temporary directory: {:?}", temp_dir); let storage: Arc = match settings.storage_type { - StorageBackendType::FileSystem => { + StorageType::FileSystem => { info!("Using FileSystem storage backend."); let fs_settings = settings.filesystem.clone().context( "FileSystem storage type selected, but [filesystem] settings are missing", @@ -80,8 +79,7 @@ async fn main() -> Result<(), Error> { let fs = FileStore::new(&fs_settings).expect("Failed to initialize FileStore"); Arc::new(fs) } - #[cfg(feature = "s3-storage")] - StorageBackendType::S3 => { + StorageType::S3 => { info!("Using S3 storage backend."); let s3_settings = settings .s3 diff --git a/src/bin/storage-manage.rs b/src/bin/storage-manage.rs deleted file mode 100644 index 921eda4..0000000 --- a/src/bin/storage-manage.rs +++ /dev/null @@ -1,368 +0,0 @@ -use anyhow::{anyhow, Context, Error, Result}; -use clap::{Parser, Subcommand}; -use config::Config; -use indicatif::{ProgressBar, ProgressStyle}; -use log::{error, info, warn}; -use route96::db::{Database, FileUpload}; -use route96::filesystem::FileStore; -use route96::processing::probe_file; -use route96::settings::{Settings, StorageBackendType}; -#[cfg(feature = "s3-storage")] -use route96::spaces::SpacesStore; -use route96::storage::{StorageBackend, StorageResult}; -use sha2::{Digest, Sha256}; -use std::future::Future; -use std::path::{Path, PathBuf}; -use std::pin::Pin; -use std::sync::Arc; -use std::time::SystemTime; -use tokio::fs::File; -use tokio::io::{AsyncRead, AsyncReadExt}; -use tokio::sync::Semaphore; - -#[derive(Parser, Debug)] -#[command(version, about)] -struct Args { - #[arg(long)] - pub config: Option, - - #[clap(subcommand)] - pub command: Commands, -} - -#[derive(Debug, Subcommand)] -enum Commands { - /// Verify file hash matches filename / path and optionally delete mismatches. - VerifyIntegrity { - #[arg(long)] - delete: Option, - }, - - /// Import files from an external directory into the storage directory. - /// Does NOT index files into the database; use index-storage for that. - ImportFiles { - #[arg(long)] - from: PathBuf, - #[arg(long, default_missing_value = "true", num_args = 0..=1)] - probe_media: Option, - }, - - /// Scan storage directory and add files missing from the database index. - IndexStorage { - /// Print files that would be indexed without actually modifying the database. - #[arg(long, default_missing_value = "true", num_args = 0..=1)] - dry_run: Option, - }, -} - -#[tokio::main] -async fn main() -> Result<(), Error> { - if std::env::var("RUST_LOG").is_err() { - std::env::set_var("RUST_LOG", "info"); - } - pretty_env_logger::init(); - - let args: Args = Args::parse(); - - let builder = Config::builder() - .add_source(config::File::with_name(if let Some(ref c) = args.config { - c.as_str() - } else { - "config.yaml" - })) - .add_source(config::Environment::with_prefix("APP")) - .build()?; - - let settings: Settings = builder - .try_deserialize() - .context("Failed to deserialize settings")?; - - let storage: Arc = match settings.storage_type { - StorageBackendType::FileSystem => { - info!("Using FileSystem storage backend."); - let fs_settings = settings.filesystem.clone().context( - "FileSystem storage type selected, but [filesystem] settings are missing", - )?; - let temp_dir = PathBuf::from(&settings.temp_dir); - tokio::fs::create_dir_all(&temp_dir) - .await - .context(format!("Failed to create temp directory: {:?}", temp_dir))?; - - let fs = FileStore::new(&fs_settings, temp_dir); - Arc::new(fs) - } - #[cfg(feature = "s3-storage")] - StorageBackendType::S3 => { - info!("Using S3 storage backend."); - let s3_settings = settings - .s3 - .clone() - .context("S3 storage type selected, but [s3] settings are missing")?; - let temp_dir = PathBuf::from(&settings.temp_dir); - tokio::fs::create_dir_all(&temp_dir) - .await - .context(format!("Failed to create temp directory: {:?}", temp_dir))?; - - let spaces = SpacesStore::new(&s3_settings, temp_dir) - .await - .context("Failed to initialize S3 storage client")?; - Arc::new(spaces) - } - }; - - match args.command { - Commands::VerifyIntegrity { delete } => { - if settings.storage_type != StorageBackendType::FileSystem { - return Err(anyhow!( - "'VerifyIntegrity' command only supports the FileSystem storage backend." - )); - } - - let fs_settings = settings - .filesystem - .context("[filesystem] settings missing")?; - let storage_dir = PathBuf::from(&fs_settings.storage_dir); - info!("Checking files in: {}", storage_dir.display()); - - iter_files(&storage_dir, 4, |entry, p| { - let p = p.clone(); - Box::pin(async move { - let id = if let Some(i) = id_from_path(&entry) { - i - } else { - p.set_message(format!("Skipping invalid path: {}", &entry.display())); - return Ok(()); - }; - - let calculated_hash = match hash_local_file(&entry).await { - Ok(h) => h, - Err(e) => { - p.set_message(format!("Error hashing {}: {}", entry.display(), e)); - return Ok(()); - } - }; - - if calculated_hash != id { - if delete.unwrap_or(false) { - p.set_message(format!("Deleting corrupt file: {}", &entry.display())); - if let Err(e) = tokio::fs::remove_file(&entry).await { - p.set_message(format!( - "Failed to delete {}: {}", - entry.display(), - e - )); - } - } else { - p.set_message(format!("File is corrupted: {}", &entry.display())); - } - } else { - p.set_message(format!("Verified OK: {}", &entry.display())); - } - Ok(()) - }) - }) - .await?; - } - Commands::ImportFiles { from, probe_media } => { - let db = Database::new(&settings.database).await?; - db.migrate().await?; - info!("Importing from directory: {}", from.display()); - - iter_files(&from, 4, |entry, p| { - let storage = storage.clone(); - let p = p.clone(); - Box::pin(async move { - let mime = infer::get_from_path(&entry)? - .map(|m| m.mime_type()) - .unwrap_or("application/octet-stream"); - - if probe_media.unwrap_or(true) - && (mime.starts_with("image/") || mime.starts_with("video/")) - && probe_file(&entry).is_err() - { - p.set_message(format!("Skipping invalid media file: {}", &entry.display())); - return Ok(()); - } - - let file = File::open(&entry) - .await - .context("Failed to open file for import")?; - let boxed_stream: Box = Box::pin(file); - - let dst = storage - .put(boxed_stream, mime) - .await - .context("Storage put failed")?; - - match dst { - StorageResult::AlreadyExists(hash) => { - p.set_message(format!( - "Duplicate file (hash: {}): {}", - hex::encode(&hash), - &entry.display() - )); - } - StorageResult::NewFile { id, .. } => { - p.set_message(format!( - "Imported (hash: {}): {}", - hex::encode(&id), - &entry.display() - )); - } - } - Ok(()) - }) - }) - .await?; - } - Commands::IndexStorage { dry_run } => { - if settings.storage_type != StorageBackendType::FileSystem { - return Err(anyhow!( - "'IndexStorage' command only supports the FileSystem storage backend." - )); - } - let fs_settings = settings - .filesystem - .context("[filesystem] settings missing")?; - let storage_dir = PathBuf::from(&fs_settings.storage_dir); - - let db = Database::new(&settings.database).await?; - db.migrate().await?; - info!( - "Indexing DB from storage directory: {}", - storage_dir.display() - ); - - iter_files(&storage_dir, 4, |entry, p| { - let db = db.clone(); - let p = p.clone(); - Box::pin(async move { - let id = if let Some(i) = id_from_path(&entry) { - i - } else { - p.set_message(format!("Skipping invalid path: {}", &entry.display())); - return Ok(()); - }; - - let u = db.get_file(&id).await.context("db get_file")?; - if u.is_none() { - if !dry_run.unwrap_or(false) { - p.set_message(format!("Indexing file: {}", &entry.display())); - - let mime = infer::get_from_path(&entry) - .context("infer mime type")? - .map(|m| m.mime_type()) - .unwrap_or("application/octet-stream") - .to_string(); - let meta = entry.metadata().context("get file metadata")?; - - let upload_entry = FileUpload { - id, - size: meta.len() as i64, - mime_type: mime, - created: meta.created().unwrap_or(SystemTime::now()).into(), - width: None, - height: None, - blur_hash: None, - alt: None, - duration: None, - bitrate: None, - h_tag: None, - #[cfg(feature = "labels")] - labels: vec![], - }; - - db.add_file(&upload_entry, None) - .await - .context("db add_file")?; - } else { - p.set_message(format!( - "[DRY-RUN] Would index file: {}", - &entry.display() - )); - } - } else { - p.set_message(format!("Already indexed: {}", &entry.display())); - } - Ok(()) - }) - }) - .await?; - } - } - Ok(()) -} - -async fn hash_local_file(p: &Path) -> Result, Error> { - let mut file = File::open(p) - .await - .context(format!("Failed to open file for hashing: {:?}", p))?; - let mut hasher = Sha256::new(); - let mut buf = [0; 8192]; - - loop { - let n = file - .read(&mut buf) - .await - .context(format!("Failed to read file for hashing: {:?}", p))?; - if n == 0 { - break; - } - hasher.update(&buf[..n]); - } - - let res = hasher.finalize(); - Ok(res.to_vec()) -} - -async fn iter_files(dir: &PathBuf, max_concurrent: usize, func: F) -> Result<()> -where - F: Fn(PathBuf, Arc) -> Pin> + Send>> - + Send - + Sync - + 'static, -{ - let semaphore = Arc::new(Semaphore::new(max_concurrent)); - let mut entries = tokio::fs::read_dir(dir).await?; - let mut tasks = vec![]; - let pb = Arc::new(ProgressBar::new_spinner()); - pb.set_style( - ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] {wide_msg}") - .unwrap() - .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ "), - ); - - while let Some(entry) = entries.next_entry().await? { - let path = entry.path(); - if path.is_file() { - let permit = semaphore.clone().acquire_owned().await.unwrap(); - let func = Arc::new(func); - let pb_clone = pb.clone(); - tasks.push(tokio::spawn(async move { - let result = func(path, pb_clone).await; - drop(permit); - result - })); - } - } - - let mut results = vec![]; - for task in tasks { - results.push(task.await); - } - - for result in results { - match result { - Ok(Ok(_)) => {} - Ok(Err(e)) => return Err(e), - Err(e) => return Err(anyhow!(e)), - } - } - - pb.finish_with_message("Done."); - Ok(()) -} - -fn id_from_path(path: &Path) -> Option> { - hex::decode(path.file_name()?.to_str()?).ok() -} diff --git a/src/lib.rs b/src/lib.rs index 0c4f79a..3b3f4bc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,6 @@ pub mod nip29; pub mod processing; pub mod routes; pub mod settings; -#[cfg(feature = "s3-storage")] pub mod spaces; pub mod storage; // pub mod types; // Removed diff --git a/src/settings.rs b/src/settings.rs index 849f91f..52483a4 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -1,16 +1,14 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[serde(rename_all = "PascalCase")] -pub enum StorageBackendType { +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum StorageType { FileSystem, - #[cfg(feature = "s3-storage")] S3, } -fn default_storage_type() -> StorageBackendType { - StorageBackendType::FileSystem +fn default_storage_type() -> StorageType { + StorageType::FileSystem } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -26,14 +24,13 @@ pub struct Settings { /// Specifies which storage backend to use ("FileSystem" or "S3") #[serde(default = "default_storage_type")] - pub storage_type: StorageBackendType, + pub storage_type: StorageType, /// Filesystem backend specific settings #[serde(default)] pub filesystem: Option, /// S3 backend specific settings (only available if 's3-storage' feature is enabled) - #[cfg(feature = "s3-storage")] #[serde(default)] pub s3: Option, @@ -81,7 +78,6 @@ pub struct Nip29RelayConfig { pub cache_expiration: Option, } -#[cfg(feature = "s3-storage")] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct S3StorageSettings { pub region: Option, @@ -92,3 +88,16 @@ pub struct S3StorageSettings { #[serde(default)] pub force_path_style: bool, } + +impl Default for S3StorageSettings { + fn default() -> Self { + Self { + region: None, + endpoint_url: None, + bucket_name: String::new(), + access_key_id: String::new(), + secret_access_key: String::new(), + force_path_style: false, + } + } +} diff --git a/src/spaces.rs b/src/spaces.rs index 841c39e..828365b 100644 --- a/src/spaces.rs +++ b/src/spaces.rs @@ -1,5 +1,3 @@ -#![cfg(feature = "s3-storage")] - use crate::settings::S3StorageSettings; use crate::storage::{BlobMetadata, HttpRange, StorageBackend, StorageResult}; use anyhow::{anyhow, Context, Result}; @@ -156,30 +154,55 @@ impl StorageBackend for SpacesStore { key, self.bucket_name ); - // Note: Temp file is deleted automatically when _cleanup goes out of scope here return Ok(StorageResult::AlreadyExists(hash)); } - Err(e) => { - // Check if the error is NotFound using ProvideErrorMetadata trait - if let Some(aws_err) = e.as_service_error() { - if aws_err.is_not_found() { - log::debug!("Object {} not found, proceeding with upload", key); - // Continue to upload logic below - } else { - // Other AWS error, clean up temp file and return error - // Note: Temp file is deleted automatically when _cleanup goes out of scope here - return Err(anyhow::Error::new(e).context(format!( - "Failed to check S3 object existence for key {}", - key - ))); - } + Err(aws_sdk_s3::error::SdkError::ServiceError(service_err)) => { + // Extract the raw HTTP response from the ServiceError variant + let raw_response = service_err.raw(); + let http_status = raw_response.status().as_u16(); + // Get the underlying HeadObjectError + let inner_error = service_err.into_err(); + + // Check the specific error type OR the HTTP status code + if inner_error.is_not_found() { + log::debug!( + "Object {} not found (is_not_found() == true, HTTP {}), proceeding with upload", + key, + http_status + ); + // Fallthrough to upload logic + } else if http_status == 404 { + log::debug!( + "Object {} not found (HTTP status 404), proceeding with upload", + key + ); + // Fallthrough to upload logic } else { - // Not an AWS service error (e.g., network error), clean up and return - // Note: Temp file is deleted automatically when _cleanup goes out of scope here - return Err(anyhow::Error::new(e) - .context(format!("Error during S3 head_object call for key {}", key))); + // It's a different service error (not NotFound or 404) + log::error!( + "S3 head_object failed. SDK Error: {:?}, HTTP Status: {} for key {}", + inner_error, + http_status, + key + ); + // Note: Temp file cleanup happens when _cleanup guard drops + return Err(anyhow::Error::new(inner_error).context(format!( + "Failed to check S3 object existence (HTTP {}), key {}", + http_status, key + ))); } } + Err(other_err) => { + // Handle other SDK errors (timeout, connection, build errors, etc.) + log::error!( + "Non-service error during S3 head_object call for key {}: {}", + key, + other_err + ); + // Note: Temp file cleanup happens when _cleanup guard drops + return Err(anyhow::Error::new(other_err) + .context(format!("Error during S3 head_object call for key {}", key))); + } } // 4. If not exists, upload temp file using PutObject