Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Changed
- Stream cache writes to disk via `std::io::copy` instead of buffering the full payload in memory
- `download_async()` now preserves raw bytes, matching `download()`
- Default blocking HTTP clients are reused across reads and content-length probes
- S3 status failures now use structured errors instead of string parsing
- S3 readers now stream data through a bounded channel instead of materializing the full object in memory

### Added
- Added `bzip2_decompress` benchmark coverage
- Added a benchmark helper script for comparing gzip backend feature flags and bz2 decompression

## v0.20.1 -- 2025-12-18

### Changed
Expand Down
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ s3 = ["rust-s3"]
gz = ["gz-zlib-rs"]
# internal feature to enable gzip support
any_gz = []
gz-miniz = ["any_gz", "flate2/miniz_oxide"]
gz-miniz = ["any_gz", "flate2/rust_backend"]
gz-zlib-rs = ["any_gz", "flate2/zlib-rs"]
gz-zlib-ng = ["any_gz", "flate2/zlib-ng"]
gz-zlib-cloudflare = ["any_gz", "flate2/cloudflare_zlib"]
Expand Down Expand Up @@ -128,6 +128,11 @@ name = "gzip_decompress"
harness = false
required-features = ["any_gz"]

[[bench]]
name = "bzip2_decompress"
harness = false
required-features = ["bz"]

# This list only includes examples which require additional features to run. These are more in the examples' directory.
[[example]]
name = "s3_operations"
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
"local_data.csv.gz"
).await?;

// download_async preserves the remote bytes.

Ok(())
}
```
Expand Down Expand Up @@ -295,6 +297,7 @@ match oneio::get_reader("file.txt") {
Ok(reader) => { /* use reader */ },
Err(OneIoError::Io(e)) => { /* filesystem error */ },
Err(OneIoError::Network(e)) => { /* network error */ },
Err(OneIoError::Status { service, code }) => { /* remote status error */ },
Err(OneIoError::NotSupported(msg)) => { /* feature not compiled */ },
}
```
Expand Down
53 changes: 53 additions & 0 deletions benches/bzip2_decompress.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
mod common;

use std::hint::black_box;
use std::io::{Read, Write};

use bzip2::read::BzDecoder;
use bzip2::write::BzEncoder;
use bzip2::Compression;
use criterion::{criterion_group, criterion_main, Criterion, Throughput};

fn build_bzip2_fixture() -> (Vec<u8>, usize, String) {
let corpus = common::build_text_corpus();
let mut encoder = BzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&corpus).unwrap();
let compressed = encoder.finish().unwrap();
let fixture = common::write_fixture("bzip2.txt.bz2", &compressed);

(
compressed,
corpus.len(),
fixture.to_string_lossy().into_owned(),
)
}

fn bench_bzip2_decompress(c: &mut Criterion) {
let (input, output_len, fixture_path) = build_bzip2_fixture();

let mut group = c.benchmark_group("bzip2_decompress");
group.throughput(Throughput::Bytes(output_len as u64));

group.bench_function("raw_decoder", |b| {
b.iter(|| {
let mut reader = BzDecoder::new(input.as_slice());
let mut out = Vec::with_capacity(output_len);
reader.read_to_end(&mut out).unwrap();
black_box(out.len())
})
});

group.bench_function("oneio_get_reader", |b| {
b.iter(|| {
let mut reader = oneio::get_reader(&fixture_path).unwrap();
let mut out = Vec::with_capacity(output_len);
reader.read_to_end(&mut out).unwrap();
black_box(out.len())
})
});

group.finish();
}

criterion_group!(benches, bench_bzip2_decompress);
criterion_main!(benches);
38 changes: 38 additions & 0 deletions benches/common/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use std::fs;
use std::io::Write;
use std::path::PathBuf;

const TARGET_CORPUS_SIZE: usize = 16 * 1024 * 1024;

pub fn build_text_corpus() -> Vec<u8> {
let mut data = Vec::with_capacity(TARGET_CORPUS_SIZE);
let mut seq = 0_u64;

while data.len() < TARGET_CORPUS_SIZE {
writeln!(
&mut data,
"{seq},AS{:05},AS{:05},peer=route-views.eqix,next-hop=192.0.2.{},med={},local-pref={},community={}:{}",
(seq % 64512) + 100,
((seq * 7) % 64512) + 100,
(seq % 254) + 1,
seq % 1000,
100 + (seq % 200),
64512 + (seq % 64),
100 + (seq % 4096)
)
.unwrap();
seq += 1;
}

data.truncate(TARGET_CORPUS_SIZE);
data
}

pub fn write_fixture(name: &str, bytes: &[u8]) -> PathBuf {
let fixture_dir = PathBuf::from("target/bench-fixtures");
fs::create_dir_all(&fixture_dir).unwrap();

let path = fixture_dir.join(name);
fs::write(&path, bytes).unwrap();
path
}
85 changes: 55 additions & 30 deletions benches/gzip_decompress.rs
Original file line number Diff line number Diff line change
@@ -1,42 +1,67 @@
use std::fs::File;
mod common;

use std::hint::black_box;
use std::io::Read;
use std::io::{Read, Write};

use criterion::{criterion_group, criterion_main, BatchSize, Criterion, Throughput};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;

#[cfg(feature = "gz-miniz")]
const GZIP_BACKEND: &str = "miniz_oxide";
#[cfg(all(not(feature = "gz-miniz"), feature = "gz-zlib-rs"))]
const GZIP_BACKEND: &str = "zlib-rs";
#[cfg(all(
not(feature = "gz-miniz"),
not(feature = "gz-zlib-rs"),
feature = "gz-zlib-ng"
))]
const GZIP_BACKEND: &str = "zlib-ng";
#[cfg(all(
not(feature = "gz-miniz"),
not(feature = "gz-zlib-rs"),
not(feature = "gz-zlib-ng"),
feature = "gz-zlib-cloudflare"
))]
const GZIP_BACKEND: &str = "cloudflare-zlib";

fn build_gzip_fixture() -> (Vec<u8>, usize, String) {
let corpus = common::build_text_corpus();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&corpus).unwrap();
let compressed = encoder.finish().unwrap();
let fixture = common::write_fixture(&format!("gzip-{GZIP_BACKEND}.txt.gz"), &compressed);

// Benchmark gzip decompression using flate2 with the selected backend.
// To run with default (miniz_oxide) backend:
// cargo bench --bench gzip_decompress --no-default-features --features gz-miniz
// To run with zlib-rs backend:
// cargo bench --bench gzip_decompress --no-default-features --features gz-zlib-rs
// To compare, run both commands and compare Criterion reports.

fn load_gz_bytes() -> Vec<u8> {
let mut f = File::open("tests/test_data.txt.gz").expect("missing tests/test_data.txt.gz");
let mut buf = Vec::new();
f.read_to_end(&mut buf).unwrap();
buf
(
compressed,
corpus.len(),
fixture.to_string_lossy().into_owned(),
)
}

fn bench_gzip_decompress(c: &mut Criterion) {
let input = load_gz_bytes();
let (input, output_len, fixture_path) = build_gzip_fixture();

let mut group = c.benchmark_group("gzip_decompress");
group.throughput(Throughput::Bytes(input.len() as u64));

group.bench_function("flate2_gz_decode", |b| {
b.iter_batched(
|| input.clone(),
|bytes| {
let reader = GzDecoder::new(bytes.as_slice());
let mut out = Vec::with_capacity(128 * 1024);
let mut r = reader;
r.read_to_end(&mut out).unwrap();
black_box(out)
},
BatchSize::SmallInput,
)
group.throughput(Throughput::Bytes(output_len as u64));

group.bench_function(format!("raw_decoder/{GZIP_BACKEND}"), |b| {
b.iter(|| {
let mut reader = GzDecoder::new(input.as_slice());
let mut out = Vec::with_capacity(output_len);
reader.read_to_end(&mut out).unwrap();
black_box(out.len())
})
});

group.bench_function(format!("oneio_get_reader/{GZIP_BACKEND}"), |b| {
b.iter(|| {
let mut reader = oneio::get_reader(&fixture_path).unwrap();
let mut out = Vec::with_capacity(output_len);
reader.read_to_end(&mut out).unwrap();
black_box(out.len())
})
});

group.finish();
Expand Down
17 changes: 4 additions & 13 deletions examples/s3_operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,18 @@ fn main() {
info!("error if file does not exist");
let res = s3_stats("oneio-test", "test/README___NON_EXISTS.md");
assert!(res.is_err());
assert_eq!(
false,
s3_exists("oneio-test", "test/README___NON_EXISTS.md").unwrap()
);
assert_eq!(true, s3_exists("oneio-test", "test/README.md").unwrap());
assert!(!s3_exists("oneio-test", "test/README___NON_EXISTS.md").unwrap());
assert!(s3_exists("oneio-test", "test/README.md").unwrap());

info!("copy S3 file to a different location");
let res = s3_copy("oneio-test", "test/README.md", "test/README-temporary.md");
assert!(res.is_ok());
assert_eq!(
true,
s3_exists("oneio-test", "test/README-temporary.md").unwrap()
);
assert!(s3_exists("oneio-test", "test/README-temporary.md").unwrap());

info!("delete temporary copied S3 file");
let res = s3_delete("oneio-test", "test/README-temporary.md");
assert!(res.is_ok());
assert_eq!(
false,
s3_exists("oneio-test", "test/README-temporary.md").unwrap()
);
assert!(!s3_exists("oneio-test", "test/README-temporary.md").unwrap());

info!("list S3 files");
let res = s3_list("oneio-test", "test/", Some("/".to_string()), false).unwrap();
Expand Down
18 changes: 18 additions & 0 deletions scripts/bench_decompression_backends.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -euo pipefail

criterion_args=(-- --warm-up-time 1 --measurement-time 5 --sample-size 20)

run_case() {
local name="$1"
shift
echo
echo "== ${name} =="
cargo bench "$@" "${criterion_args[@]}"
}

run_case "gzip miniz_oxide" --bench gzip_decompress --no-default-features --features gz-miniz
run_case "gzip zlib-rs" --bench gzip_decompress --no-default-features --features gz-zlib-rs
run_case "gzip zlib-ng" --bench gzip_decompress --no-default-features --features gz-zlib-ng
run_case "gzip cloudflare-zlib" --bench gzip_decompress --no-default-features --features gz-zlib-cloudflare
run_case "bzip2" --bench bzip2_decompress --no-default-features --features bz
4 changes: 4 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ pub enum OneIoError {
#[error("{0}")]
Network(Box<dyn std::error::Error + Send + Sync>),

/// Structured status errors from remote services
#[error("{service} status error: {code}")]
Status { service: &'static str, code: u16 },

/// Feature not supported/compiled
#[error("Not supported: {0}")]
NotSupported(String),
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
"local_data.csv.gz"
).await?;

// download_async preserves the remote bytes.

Ok(())
}
```
Expand Down Expand Up @@ -296,6 +298,7 @@ match oneio::get_reader("file.txt") {
Ok(reader) => { /* use reader */ },
Err(OneIoError::Io(e)) => { /* filesystem error */ },
Err(OneIoError::Network(e)) => { /* network error */ },
Err(OneIoError::Status { service, code }) => { /* remote status error */ },
Err(OneIoError::NotSupported(msg)) => { /* feature not compiled */ },
}
```
Expand Down
Loading