From 87d9cdc10662d2d944349d642700e998ce217b8c Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 14:56:14 +1200
Subject: [PATCH 01/27] update
---
CHANGELOG.md | 7 --
Cargo.toml | 40 +++----
README-zh_CN.md | 51 ---------
README.md | 102 +++++++++++------
benches/decode.rs | 114 +++++++++++++++++++
benches/foo.rs | 1 -
docs/design.md | 154 +++++++++++++++++++++++++
examples/decode.rs | 74 ++++++++++++
examples/foo.rs | 1 -
src/backend.rs | 137 ++++++++++++++++++++++
src/decoder.rs | 278 +++++++++++++++++++++++++++++++++++++++++++++
src/error.rs | 37 ++++++
src/ffi.rs | 70 ++++++++++++
src/lib.rs | 26 ++++-
tests/decode.rs | 68 +++++++++++
tests/foo.rs | 1 -
tests/hw_smoke.rs | 64 +++++++++++
17 files changed, 1106 insertions(+), 119 deletions(-)
delete mode 100644 CHANGELOG.md
delete mode 100644 README-zh_CN.md
create mode 100644 benches/decode.rs
delete mode 100644 benches/foo.rs
create mode 100644 docs/design.md
create mode 100644 examples/decode.rs
delete mode 100644 examples/foo.rs
create mode 100644 src/backend.rs
create mode 100644 src/decoder.rs
create mode 100644 src/error.rs
create mode 100644 src/ffi.rs
create mode 100644 tests/decode.rs
delete mode 100644 tests/foo.rs
create mode 100644 tests/hw_smoke.rs
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index bd7a668..0000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# UNRELEASED
-
-# 0.1.2 (January 6th, 2022)
-
-FEATURES
-
-
diff --git a/Cargo.toml b/Cargo.toml
index ff7fe91..8e4ea79 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,35 +1,37 @@
[package]
-name = "template-rs"
+name = "hwdecode"
version = "0.0.0"
edition = "2021"
-repository = "https://github.com/al8n/template-rs"
-homepage = "https://github.com/al8n/template-rs"
-documentation = "https://docs.rs/template-rs"
-description = "A template for creating Rust open-source repo on GitHub"
+rust-version = "1.95"
+description = "Cross-platform hardware-accelerated video decoder built on top of ffmpeg-next, with auto-probe and software fallback."
+repository = "https://github.com/findit-ai/hwdecode"
+homepage = "https://github.com/findit-ai/hwdecode"
+documentation = "https://docs.rs/hwdecode"
license = "MIT OR Apache-2.0"
-rust-version = "1.73"
-
-[[bench]]
-path = "benches/foo.rs"
-name = "foo"
-harness = false
-
-[features]
-default = ["std"]
-alloc = []
-std = []
[dependencies]
+ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] }
+thiserror = "2"
+tracing = "0.1"
+libc = "0.2"
[dev-dependencies]
criterion = "0.8"
-tempfile = "3"
+
+[[example]]
+name = "decode"
+path = "examples/decode.rs"
+
+[[bench]]
+name = "decode"
+path = "benches/decode.rs"
+harness = false
[profile.bench]
opt-level = 3
debug = false
codegen-units = 1
-lto = 'thin'
+lto = "thin"
incremental = false
debug-assertions = false
overflow-checks = false
@@ -41,8 +43,6 @@ rustdoc-args = ["--cfg", "docsrs"]
[lints.rust]
rust_2018_idioms = "warn"
-single_use_lifetimes = "warn"
unexpected_cfgs = { level = "warn", check-cfg = [
- 'cfg(all_tests)',
'cfg(tarpaulin)',
] }
diff --git a/README-zh_CN.md b/README-zh_CN.md
deleted file mode 100644
index 7a07f4d..0000000
--- a/README-zh_CN.md
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
template-rs
-
-
-
-开源Rust代码库GitHub模版
-
-[

][Github-url]
-

-[

][CI-url]
-[

][codecov-url]
-
-[

][doc-url]
-[

][crates-url]
-[

][crates-url]
-

-
-[English][en-url] | 简体中文
-
-
-
-## Installation
-
-```toml
-[dependencies]
-template_rs = "0.1"
-```
-
-## Features
-
-- [x] 更快的创建GitHub开源Rust代码库
-
-#### License
-
-`Template-rs` is under the terms of both the MIT license and the
-Apache License (Version 2.0).
-
-See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details.
-
-Copyright (c) 2021 Al Liu.
-
-[Github-url]: https://github.com/al8n/template-rs/
-[CI-url]: https://github.com/al8n/template/actions/workflows/template.yml
-[doc-url]: https://docs.rs/template-rs
-[crates-url]: https://crates.io/crates/template-rs
-[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/
-[license-url]: https://opensource.org/licenses/Apache-2.0
-[rustc-url]: https://github.com/rust-lang/rust/blob/master/RELEASES.md
-[license-apache-url]: https://opensource.org/licenses/Apache-2.0
-[license-mit-url]: https://opensource.org/licenses/MIT
-[en-url]: https://github.com/al8n/template-rs/tree/main/README.md
diff --git a/README.md b/README.md
index 1af27e2..bcfb058 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,84 @@
-
-
template-rs
-
-
+# hwdecode
-A template for creating Rust open-source GitHub repo.
+Cross-platform hardware-accelerated video decoder for Rust, built on top of
+[`ffmpeg-next`](https://crates.io/crates/ffmpeg-next).
-[

][Github-url]
-

-[

][CI-url]
-[

][codecov-url]
+`VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of
+`ffmpeg::decoder::Video` and silently picks the best hardware backend for the
+host platform, falling back to software if none open. Output frames are
+CPU-side — for HW backends they are downloaded with `av_hwframe_transfer_data`
+(NV12 for 8-bit, P010 for 10-bit). Pixel-format conversion is intentionally
+out of scope.
-[

][doc-url]
-[

][crates-url]
-[

][crates-url]
-

+## Backends
-English | [简体中文][zh-cn-url]
+| Target | Probe order |
+| ------------------- | --------------------------------- |
+| macOS / iOS / tvOS | VideoToolbox → Software |
+| Linux | VAAPI → CUDA → Software |
+| Windows | D3D11VA → CUDA → Software |
+| other | Software |
-
+## Usage
-## Installation
+```rust
+use ffmpeg_next as ffmpeg;
+use ffmpeg::{format, frame, media};
+use hwdecode::VideoDecoder;
-```toml
-[dependencies]
-template_rs = "0.1"
+ffmpeg::init()?;
+
+let mut input = format::input(path)?;
+let stream = input.streams().best(media::Type::Video).unwrap();
+let stream_index = stream.index();
+
+let mut decoder = VideoDecoder::open(stream.parameters())?;
+println!("backend = {:?}", decoder.backend());
+
+let mut frame = frame::Video::empty();
+for (s, packet) in input.packets() {
+ if s.index() != stream_index { continue; }
+ decoder.send_packet(&packet)?;
+ while decoder.receive_frame(&mut frame).is_ok() {
+ // frame.format() is NV12 / P010 (HW path) or codec-native (SW path)
+ // ... do something with frame ...
+ }
+}
+decoder.send_eof()?;
+while decoder.receive_frame(&mut frame).is_ok() {
+ // ... drain ...
+}
```
-## Features
-- [x] Create a Rust open-source repo fast
+To force a specific backend (no probe, no fallback):
+
+```rust
+use hwdecode::{Backend, VideoDecoder};
+let decoder = VideoDecoder::open_with(parameters, Backend::Software)?;
+```
+
+## Running tests and benches
+
+The integration test and benchmark expect a real video file. Set
+`HWDECODE_SAMPLE_VIDEO` to enable them:
+
+```sh
+HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test
+HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored
+HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench
+```
-#### License
+Without the env var the integration test skips with a notice; unit tests run
+unconditionally.
-`template-rs` is under the terms of both the MIT license and the
-Apache License (Version 2.0).
+## Build requirements
-See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details.
+- A system FFmpeg ≥ 4.x linkable via `pkg-config`. Verify with
+ `ffmpeg -hwaccels` that your build has the backends you expect compiled in
+ (e.g. `videotoolbox` on macOS, `vaapi` / `cuda` on Linux,
+ `d3d11va` / `cuda` on Windows).
+- Rust ≥ 1.95.
-Copyright (c) 2021 Al Liu.
+## License
-[Github-url]: https://github.com/al8n/template-rs/
-[CI-url]: https://github.com/al8n/template-rs/actions/workflows/ci.yml
-[doc-url]: https://docs.rs/template-rs
-[crates-url]: https://crates.io/crates/template-rs
-[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/
-[zh-cn-url]: https://github.com/al8n/template-rs/tree/main/README-zh_CN.md
+MIT or Apache-2.0, at your option.
diff --git a/benches/decode.rs b/benches/decode.rs
new file mode 100644
index 0000000..be7281d
--- /dev/null
+++ b/benches/decode.rs
@@ -0,0 +1,114 @@
+//! Benchmark comparing software-only decode against the auto-probed
+//! hardware backend on the same input file.
+//!
+//! Set `HWDECODE_SAMPLE_VIDEO` to a video file path. The hardware bench is
+//! skipped (with a notice) when the auto-probe falls back to software.
+//!
+//! ```sh
+//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench
+//! ```
+
+use std::{path::PathBuf, time::Duration};
+
+use criterion::{criterion_group, criterion_main, Criterion};
+use ffmpeg::{format, frame, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::{Backend, VideoDecoder};
+
+const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
+
+fn sample_path() -> Option {
+ std::env::var_os(SAMPLE_ENV).map(PathBuf::from)
+}
+
+/// Decode every video frame in the file using `decoder`, returning the count.
+/// Re-opens the input each call so each iteration measures a full decode pass.
+fn decode_all(path: &PathBuf, backend: Backend) -> Result {
+ let mut input = format::input(path).map_err(hwdecode::Error::Ffmpeg)?;
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .ok_or(hwdecode::Error::Ffmpeg(ffmpeg::Error::StreamNotFound))?;
+ let stream_index = stream.index();
+
+ let mut decoder = match backend {
+ Backend::Software => VideoDecoder::open_with(stream.parameters(), Backend::Software)?,
+ _ => VideoDecoder::open(stream.parameters())?,
+ };
+
+ let mut frame = frame::Video::empty();
+ let mut count = 0_usize;
+
+ let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> {
+ loop {
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => *count += 1,
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ return Ok(());
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => return Ok(()),
+ Err(e) => return Err(e),
+ }
+ }
+ };
+
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet)?;
+ drain(&mut decoder, &mut count)?;
+ }
+ decoder.send_eof()?;
+ drain(&mut decoder, &mut count)?;
+ Ok(count)
+}
+
+fn bench_decode(c: &mut Criterion) {
+ ffmpeg::init().expect("ffmpeg init");
+
+ let Some(path) = sample_path() else {
+ eprintln!("skipping benches: set {SAMPLE_ENV} to a video file path");
+ return;
+ };
+
+ // Probe backend once to print which HW backend (if any) we'd be benching.
+ let probed_backend = {
+ let input = format::input(&path).expect("open input");
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream");
+ let dec = VideoDecoder::open(stream.parameters()).expect("auto-probe");
+ let b = dec.backend();
+ drop(dec);
+ b
+ };
+ eprintln!("auto-probe selected backend: {probed_backend:?}");
+
+ let mut group = c.benchmark_group("decode");
+ group.measurement_time(Duration::from_secs(15));
+ group.sample_size(20);
+
+ group.bench_function("software", |b| {
+ b.iter(|| decode_all(&path, Backend::Software).expect("software decode"))
+ });
+
+ if probed_backend != Backend::Software {
+ group.bench_function("hardware", |b| {
+ b.iter(|| {
+ let n = decode_all(&path, probed_backend).expect("hardware decode");
+ std::hint::black_box(n);
+ })
+ });
+ } else {
+ eprintln!("skipping hardware bench: auto-probe fell back to Software");
+ }
+
+ group.finish();
+}
+
+criterion_group!(benches, bench_decode);
+criterion_main!(benches);
diff --git a/benches/foo.rs b/benches/foo.rs
deleted file mode 100644
index f328e4d..0000000
--- a/benches/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-fn main() {}
diff --git a/docs/design.md b/docs/design.md
new file mode 100644
index 0000000..056bb4f
--- /dev/null
+++ b/docs/design.md
@@ -0,0 +1,154 @@
+# hwdecode — design
+
+Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next` 8.1.
+
+## Goals
+
+- Drop-in replacement for `ffmpeg::decoder::Video` at the call site (`send_packet` / `receive_frame` / `send_eof` / `flush`).
+- Auto-probe the platform's hardware backends and silently fall back to software if none open. Caller never has to think about hwaccel availability.
+- Hand back native-format CPU frames (NV12/P010 from the HW path, codec-native from the SW path). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`).
+- Cross-platform: macOS / iOS / iPadOS / tvOS, Linux (Intel/AMD/NVIDIA), Windows (any GPU + CUDA on NVIDIA).
+
+## Non-goals
+
+- Audio hardware decoding. Out of scope; software AAC/Opus/etc. is fast enough that the complexity isn't justified.
+- Demuxing. Callers open files/streams themselves (e.g. via `findit-demuxer`) and feed packets in.
+- Pixel-format conversion. Done downstream (`colconv`).
+- Encoding.
+
+## Public API
+
+```rust
+pub struct VideoDecoder { /* private */ }
+
+impl VideoDecoder {
+ /// Auto-probe HW backends in platform order; fall back to software.
+ /// On success, `backend()` reports the one that won.
+ pub fn open(parameters: ffmpeg::codec::Parameters) -> Result;
+
+ /// Force a specific backend. No probe, no fallback.
+ pub fn open_with(parameters: ffmpeg::codec::Parameters, backend: Backend) -> Result;
+
+ pub fn backend(&self) -> Backend;
+ pub fn width(&self) -> u32;
+ pub fn height(&self) -> u32;
+ pub fn format(&self) -> ffmpeg::format::Pixel;
+ pub fn time_base(&self) -> ffmpeg::Rational;
+ pub fn frame_rate(&self) -> ffmpeg::Rational;
+
+ pub fn send_packet(&mut self, packet: &ffmpeg::Packet) -> Result<(), Error>;
+ pub fn send_eof(&mut self) -> Result<(), Error>;
+
+ /// Receive a CPU-side frame. For HW backends, internally calls
+ /// `av_hwframe_transfer_data` and copies PTS/timing onto the result;
+ /// output format is NV12 (8-bit) or P010 (10-bit). For SW, the frame
+ /// is in the codec's native format.
+ pub fn receive_frame(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), Error>;
+
+ pub fn flush(&mut self);
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Backend {
+ Software,
+ VideoToolbox, // macOS, iOS, iPadOS, tvOS
+ Vaapi, // Linux (Intel/AMD)
+ Cuda, // Linux/Windows (NVIDIA)
+ D3d11va, // Windows
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ #[error("ffmpeg error: {0}")]
+ Ffmpeg(#[from] ffmpeg::Error),
+ #[error("no decoder for codec id {0:?}")]
+ NoCodec(ffmpeg::codec::Id),
+ #[error("hardware device init failed for {backend:?}: {source}")]
+ HwDeviceInitFailed { backend: Backend, source: ffmpeg::Error },
+ #[error("all backends failed; attempts: {attempts:?}")]
+ AllBackendsFailed { attempts: Vec<(Backend, ffmpeg::Error)> },
+}
+```
+
+## Behavior
+
+### Probe order
+
+| Target | Order tried |
+| ------------------- | -------------------------------------------- |
+| macOS, iOS, tvOS | `[VideoToolbox, Software]` |
+| Linux | `[Vaapi, Cuda, Software]` |
+| Windows | `[D3d11va, Cuda, Software]` |
+| Other | `[Software]` |
+
+A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next.
+
+### Device selection
+
+Always device 0 / system default (`av_hwdevice_ctx_create(.., NULL, ..)`). No env var, no config knob in v1. Add later if the multi-GPU use case appears.
+
+### `get_format` callback
+
+A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState { wanted: AVPixelFormat }`. The callback walks the offered `pix_fmts` list, returns `wanted` if present, else `AV_PIX_FMT_NONE` (which forces FFmpeg to retry with software). This is the standard pattern from `doc/examples/hw_decode.c`.
+
+### Frame transfer
+
+`receive_frame` always:
+
+1. Reads from the codec into an internal `hw_frame: ffmpeg::frame::Video` (allocated once, reused).
+2. If the frame's format is the HW pix fmt, calls `av_hwframe_transfer_data(out, hw_frame, 0)` into the caller's `&mut frame`. Copies `pts`, `pkt_dts`, `time_base`, `duration` (FFmpeg does not transfer timing).
+3. Otherwise (SW path or decoder fell back mid-stream), clones the frame into the caller's slot.
+
+### Threading
+
+`VideoDecoder: Send + !Sync`. Each instance owns its own `AVCodecContext` and `AVBufferRef*`. Multiple decoders can run on different threads; a single decoder is not concurrent.
+
+### Drop
+
+`Drop` calls `av_buffer_unref(&mut self.hw_device_ref)` if non-null, frees the boxed `CallbackState`, then lets `ffmpeg::decoder::Video`'s own Drop free the codec context.
+
+## Internals
+
+```text
+src/
+├── lib.rs // re-exports + crate-level docs
+├── error.rs // Error enum
+├── backend.rs // Backend enum, probe order, AVHWDeviceType <-> Backend mapping
+├── decoder.rs // VideoDecoder, open/open_with, send/receive
+└── ffi.rs // get_format callback, av_hwdevice_ctx_create / transfer wrappers,
+ // avcodec_get_hw_config probe
+```
+
+No other modules. Keep the surface small.
+
+## Build & dependencies
+
+- `ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] }`
+- `thiserror = "2"`
+- `tracing = "0.1"`
+- `libc = "0.2"`
+
+No platform-specific Cargo features. `cfg!(target_os = ...)` selects which `AVHWDeviceType` constants we even attempt — the FFI symbols are linked unconditionally via `ffmpeg-sys-next`.
+
+System FFmpeg ≥ 4.x. Verified against the user's macOS Homebrew build (FFmpeg 8.1, VideoToolbox enabled).
+
+## Testing
+
+1. **Unit tests** (`src/backend.rs`, `src/error.rs`) — pure-Rust: probe-order construction per platform, `Backend` ↔ `AVHWDeviceType` mapping, error formatting.
+2. **Integration** (`tests/decode.rs`) — opens a sample H.264 file via `ffmpeg::format::input`, decodes 30 frames through `VideoDecoder::open` (auto-probe), asserts frame count and dimensions. Sample path comes from env var `HWDECODE_SAMPLE_VIDEO`; test is skipped with a clear `eprintln!` if unset.
+3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, but additionally asserts `decoder.backend() != Backend::Software`. CI runs this on platform-matched runners.
+
+Sample-file env var keeps the repo binary-free. Documented in `README.md`.
+
+## Benchmark
+
+`benches/decode.rs` (criterion) — two functions:
+
+- `bench_software_decode` — `VideoDecoder::open_with(.., Backend::Software)`, decode all frames of the sample, measure wall-clock per frame.
+- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped (`return`) if `decoder.backend() == Backend::Software` (no HW available).
+
+Both use the same `HWDECODE_SAMPLE_VIDEO` file. Bench prints which backend the HW run actually used, so results are interpretable across machines.
+
+## Examples
+
+`examples/decode.rs` — opens a path from `argv[1]` with `ffmpeg::format::input`, finds the best video stream, feeds packets through `VideoDecoder`, prints `(pts, width, height, format, backend)` for each frame.
diff --git a/examples/decode.rs b/examples/decode.rs
new file mode 100644
index 0000000..fa28582
--- /dev/null
+++ b/examples/decode.rs
@@ -0,0 +1,74 @@
+//! Decode every video frame in `argv[1]`, printing one line per frame.
+//!
+//! ```sh
+//! cargo run --release --example decode -- /path/to/video.mp4
+//! ```
+
+use ffmpeg::{format, frame, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::VideoDecoder;
+
+fn main() -> Result<(), Box> {
+ let path = std::env::args()
+ .nth(1)
+ .ok_or("usage: decode ")?;
+
+ ffmpeg::init()?;
+
+ let mut input = format::input(&path)?;
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .ok_or("no video stream")?;
+ let stream_index = stream.index();
+
+ let mut decoder = VideoDecoder::open(stream.parameters())?;
+ println!(
+ "backend={:?} {}x{} codec_pix_fmt_initial={:?}",
+ decoder.backend(),
+ decoder.width(),
+ decoder.height(),
+ decoder.format(),
+ );
+
+ let mut frame = frame::Video::empty();
+ let mut count: u64 = 0;
+
+ let drain = |decoder: &mut VideoDecoder, frame: &mut frame::Video, count: &mut u64| loop {
+ match decoder.receive_frame(frame) {
+ Ok(()) => {
+ *count += 1;
+ println!(
+ "frame#{count} pts={:?} {}x{} fmt={:?}",
+ frame.pts(),
+ frame.width(),
+ frame.height(),
+ frame.format(),
+ );
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ break
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => break,
+ Err(e) => {
+ eprintln!("decode error: {e}");
+ break;
+ }
+ }
+ };
+
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet)?;
+ drain(&mut decoder, &mut frame, &mut count);
+ }
+ decoder.send_eof()?;
+ drain(&mut decoder, &mut frame, &mut count);
+
+ println!("decoded {count} frames");
+ Ok(())
+}
diff --git a/examples/foo.rs b/examples/foo.rs
deleted file mode 100644
index f328e4d..0000000
--- a/examples/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-fn main() {}
diff --git a/src/backend.rs b/src/backend.rs
new file mode 100644
index 0000000..cfcd48b
--- /dev/null
+++ b/src/backend.rs
@@ -0,0 +1,137 @@
+use ffmpeg_next::{ffi::AVHWDeviceType, format::Pixel};
+
+/// Decoding backend selected (or forced) for a [`crate::VideoDecoder`].
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Backend {
+ /// Pure software decode via libavcodec.
+ Software,
+ /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS).
+ VideoToolbox,
+ /// Linux Video Acceleration API (Intel / AMD GPUs).
+ Vaapi,
+ /// NVIDIA NVDEC via CUDA (Linux / Windows on NVIDIA hardware).
+ Cuda,
+ /// Microsoft Direct3D 11 Video Acceleration (Windows).
+ D3d11va,
+}
+
+impl Backend {
+ /// `AVHWDeviceType` corresponding to this backend, or `None` for
+ /// [`Backend::Software`].
+ pub(crate) fn av_hwdevice_type(self) -> Option {
+ match self {
+ Self::Software => None,
+ Self::VideoToolbox => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX),
+ Self::Vaapi => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI),
+ Self::Cuda => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA),
+ Self::D3d11va => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA),
+ }
+ }
+
+ /// Hardware pixel format the codec is expected to produce when this
+ /// backend is in use. Used to inspect the result of `get_format`.
+ /// `None` for [`Backend::Software`].
+ #[allow(dead_code)] // surfaced for tests / future use
+ pub(crate) fn hw_pixel_format(self) -> Option {
+ match self {
+ Self::Software => None,
+ Self::VideoToolbox => Some(Pixel::VIDEOTOOLBOX),
+ Self::Vaapi => Some(Pixel::VAAPI),
+ Self::Cuda => Some(Pixel::CUDA),
+ Self::D3d11va => Some(Pixel::D3D11),
+ }
+ }
+}
+
+/// Probe order for `VideoDecoder::open` on the current target.
+///
+/// Always ends in [`Backend::Software`]; auto-probe never returns an empty
+/// list. Order is fixed at compile time per `target_os`.
+pub(crate) fn probe_order() -> &'static [Backend] {
+ #[cfg(any(
+ target_os = "macos",
+ target_os = "ios",
+ target_os = "tvos",
+ target_os = "visionos",
+ ))]
+ {
+ &[Backend::VideoToolbox, Backend::Software]
+ }
+ #[cfg(target_os = "linux")]
+ {
+ &[Backend::Vaapi, Backend::Cuda, Backend::Software]
+ }
+ #[cfg(target_os = "windows")]
+ {
+ &[Backend::D3d11va, Backend::Cuda, Backend::Software]
+ }
+ #[cfg(not(any(
+ target_os = "macos",
+ target_os = "ios",
+ target_os = "tvos",
+ target_os = "visionos",
+ target_os = "linux",
+ target_os = "windows",
+ )))]
+ {
+ &[Backend::Software]
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn probe_order_ends_in_software() {
+ let order = probe_order();
+ assert!(!order.is_empty());
+ assert_eq!(*order.last().unwrap(), Backend::Software);
+ }
+
+ #[test]
+ fn software_has_no_av_hwdevice_type() {
+ assert!(Backend::Software.av_hwdevice_type().is_none());
+ assert!(Backend::Software.hw_pixel_format().is_none());
+ }
+
+ #[test]
+ fn hw_backends_have_av_hwdevice_type() {
+ for b in [
+ Backend::VideoToolbox,
+ Backend::Vaapi,
+ Backend::Cuda,
+ Backend::D3d11va,
+ ] {
+ assert!(
+ b.av_hwdevice_type().is_some(),
+ "{b:?} missing hwdevice type"
+ );
+ assert!(b.hw_pixel_format().is_some(), "{b:?} missing hw pix fmt");
+ }
+ }
+
+ #[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))]
+ #[test]
+ fn apple_probe_order() {
+ assert_eq!(probe_order(), &[Backend::VideoToolbox, Backend::Software]);
+ }
+
+ #[cfg(target_os = "linux")]
+ #[test]
+ fn linux_probe_order() {
+ assert_eq!(
+ probe_order(),
+ &[Backend::Vaapi, Backend::Cuda, Backend::Software]
+ );
+ }
+
+ #[cfg(target_os = "windows")]
+ #[test]
+ fn windows_probe_order() {
+ assert_eq!(
+ probe_order(),
+ &[Backend::D3d11va, Backend::Cuda, Backend::Software]
+ );
+ }
+}
diff --git a/src/decoder.rs b/src/decoder.rs
new file mode 100644
index 0000000..7a3a357
--- /dev/null
+++ b/src/decoder.rs
@@ -0,0 +1,278 @@
+use std::{mem::ManuallyDrop, ptr};
+
+use ffmpeg_next::{
+ codec::{self, Context},
+ ffi::{
+ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_hwdevice_ctx_create,
+ av_hwframe_transfer_data,
+ },
+ format::Pixel,
+ frame, Codec, Packet, Rational,
+};
+
+use crate::{
+ backend::{self, Backend},
+ error::{Error, Result},
+ ffi::{find_hw_pix_fmt, get_hw_format, CallbackState},
+};
+
+/// Hardware-accelerated video decoder with software fallback.
+///
+/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface.
+/// Frames returned by [`Self::receive_frame`] are always CPU-side; for hardware
+/// backends they are downloaded with `av_hwframe_transfer_data` (NV12 / P010).
+pub struct VideoDecoder {
+ /// Wrapped FFmpeg decoder. `ManuallyDrop` so we can sequence its drop
+ /// before freeing the callback state in our [`Drop`] impl.
+ inner: ManuallyDrop,
+ backend: Backend,
+ /// Owned reference produced by `av_hwdevice_ctx_create`. Null for software.
+ hw_device_ref: *mut ffmpeg_next::ffi::AVBufferRef,
+ /// Owned `Box` raw pointer; `AVCodecContext::opaque` aliases
+ /// it. Null for software.
+ callback_state: *mut CallbackState,
+ /// Reusable frame buffer used for hw-side decoding before transfer.
+ /// Unused on the software path (`receive_frame` writes the caller's frame
+ /// directly).
+ hw_frame: frame::Video,
+}
+
+// SAFETY: All raw pointers are exclusively owned by the struct and never
+// shared. `ffmpeg::decoder::Video` itself is Send (its `Context` is `unsafe
+// impl Send`). The decoder is not safe for concurrent use, hence not `Sync`.
+unsafe impl Send for VideoDecoder {}
+
+impl VideoDecoder {
+ /// Auto-probe hardware backends in the platform's default order, falling
+ /// back to software. The chosen backend is reported by [`Self::backend`].
+ pub fn open(parameters: codec::Parameters) -> Result {
+ let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
+ let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
+
+ let mut attempts = Vec::new();
+ for &backend in backend::probe_order() {
+ match Self::try_open(parameters.clone(), codec, backend) {
+ Ok(decoder) => {
+ tracing::info!(?backend, "hwdecode: opened video decoder");
+ return Ok(decoder);
+ }
+ Err(e) => {
+ tracing::warn!(?backend, error = %e, "hwdecode: backend probe failed");
+ attempts.push((backend, Box::new(e)));
+ }
+ }
+ }
+ Err(Error::AllBackendsFailed { attempts })
+ }
+
+ /// Open the decoder with a specific backend. No probe, no fallback.
+ /// Returns an error if `backend` is not supported by the codec or fails to
+ /// initialise.
+ pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result {
+ let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
+ let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
+ Self::try_open(parameters, codec, backend)
+ }
+
+ /// The backend that opened this decoder.
+ pub fn backend(&self) -> Backend {
+ self.backend
+ }
+
+ /// Decoder width in pixels.
+ pub fn width(&self) -> u32 {
+ self.inner.width()
+ }
+
+ /// Decoder height in pixels.
+ pub fn height(&self) -> u32 {
+ self.inner.height()
+ }
+
+ /// Current pixel format of the codec context. For HW backends this is the
+ /// hardware pixel format (e.g. `Pixel::VIDEOTOOLBOX`) once the first frame
+ /// has been negotiated; the format of frames returned from
+ /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010) and
+ /// must be read from the frame itself.
+ pub fn format(&self) -> Pixel {
+ self.inner.format()
+ }
+
+ /// Codec context time base.
+ pub fn time_base(&self) -> Rational {
+ self.inner.time_base()
+ }
+
+ /// Frame rate from the codec context, if known.
+ pub fn frame_rate(&self) -> Option {
+ self.inner.frame_rate()
+ }
+
+ /// Submit a packet to the decoder.
+ pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
+ self.inner.send_packet(packet).map_err(Error::Ffmpeg)
+ }
+
+ /// Signal end-of-stream to the decoder; remaining frames can be drained
+ /// with [`Self::receive_frame`].
+ pub fn send_eof(&mut self) -> Result<()> {
+ self.inner.send_eof().map_err(Error::Ffmpeg)
+ }
+
+ /// Receive a CPU-side decoded frame.
+ ///
+ /// For hardware backends the frame is transferred from GPU memory via
+ /// `av_hwframe_transfer_data` and frame metadata (pts, time_base, side
+ /// data, ...) is copied with `av_frame_copy_props`. For the software
+ /// backend this is a direct passthrough.
+ ///
+ /// Returns the same errors as `ffmpeg::decoder::Video::receive_frame`,
+ /// e.g. `Error::Other { errno: EAGAIN }` when no frame is ready.
+ pub fn receive_frame(&mut self, frame: &mut frame::Video) -> Result<()> {
+ if self.backend == Backend::Software {
+ return self.inner.receive_frame(frame).map_err(Error::Ffmpeg);
+ }
+
+ // HW path: receive into our reusable hw_frame, then transfer.
+ self
+ .inner
+ .receive_frame(&mut self.hw_frame)
+ .map_err(Error::Ffmpeg)?;
+
+ // SAFETY: both frames are valid AVFrame pointers owned by us. transfer
+ // allocates buffers on `frame` as needed; copy_props moves timing and
+ // side data over (transfer_data does not).
+ unsafe {
+ let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0);
+ if ret < 0 {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ }
+ let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr());
+ if ret < 0 {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ }
+ }
+ Ok(())
+ }
+
+ /// Flush internal buffers (e.g. after a seek).
+ pub fn flush(&mut self) {
+ self.inner.flush();
+ }
+
+ /// Inner open: tries one backend exactly, no probing.
+ fn try_open(parameters: codec::Parameters, codec: Codec, backend: Backend) -> Result {
+ let mut ctx = Context::from_parameters(parameters)?;
+
+ let (hw_device_ref, callback_state) = match backend.av_hwdevice_type() {
+ None => (ptr::null_mut(), ptr::null_mut()),
+ Some(av_type) => {
+ // Verify the codec advertises this hwaccel.
+ let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type)
+ .ok_or(Error::BackendUnsupportedByCodec(backend))?;
+
+ // Create the device context.
+ let mut hw_device_ref = ptr::null_mut();
+ // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill.
+ let ret = unsafe {
+ av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0)
+ };
+ if ret < 0 {
+ return Err(Error::HwDeviceInitFailed {
+ backend,
+ source: ffmpeg_next::Error::from(ret),
+ });
+ }
+
+ // Wire up the codec context: a fresh ref for FFmpeg, a heap
+ // pointer for the get_format callback to read.
+ let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt }));
+ // SAFETY: ctx is a freshly-constructed AVCodecContext we own;
+ // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
+ // use (we keep our own ref in `hw_device_ref` for cleanup).
+ unsafe {
+ let raw = ctx.as_mut_ptr();
+ (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref);
+ (*raw).opaque = callback_state.cast();
+ (*raw).get_format = Some(get_hw_format);
+ }
+ (hw_device_ref, callback_state)
+ }
+ };
+
+ // Open the decoder. On any failure, release the resources we just
+ // allocated so we don't leak.
+ let opened = match ctx.decoder().open_as(codec).and_then(|o| o.video()) {
+ Ok(d) => d,
+ Err(e) => {
+ // SAFETY: we either allocated these in this function above or
+ // they are null; av_buffer_unref / Box::from_raw handle null
+ // explicitly (we check first).
+ unsafe {
+ let mut hw = hw_device_ref;
+ if !hw.is_null() {
+ av_buffer_unref(&mut hw);
+ }
+ if !callback_state.is_null() {
+ drop(Box::from_raw(callback_state));
+ }
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ };
+
+ Ok(Self {
+ inner: ManuallyDrop::new(opened),
+ backend,
+ hw_device_ref,
+ callback_state,
+ hw_frame: frame::Video::empty(),
+ })
+ }
+}
+
+impl Drop for VideoDecoder {
+ fn drop(&mut self) {
+ // Order matters:
+ // 1. Drop the codec context first. While it lives, FFmpeg may invoke
+ // `get_format`, which dereferences `callback_state` via `opaque`.
+ // 2. Free the callback state heap allocation.
+ // 3. Release our hw device reference (FFmpeg released its own when
+ // the codec context was freed in step 1).
+ unsafe {
+ ManuallyDrop::drop(&mut self.inner);
+ if !self.callback_state.is_null() {
+ drop(Box::from_raw(self.callback_state));
+ }
+ if !self.hw_device_ref.is_null() {
+ av_buffer_unref(&mut self.hw_device_ref);
+ }
+ }
+ }
+}
+
+#[allow(dead_code)]
+fn _assert_send() {
+ fn check() {}
+ check::();
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn no_codec_for_unknown_id() {
+ // Build a Parameters with an unknown id — easiest path is to allocate
+ // empty parameters and inspect; here we just confirm Error::NoCodec
+ // formats sensibly. (Open behavior is exercised by integration tests
+ // because it requires real stream params.)
+ let err = Error::NoCodec(codec::Id::None);
+ assert!(format!("{err}").contains("no decoder"));
+ }
+
+ #[test]
+ fn videodecoder_is_send() {
+ _assert_send();
+ }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..92cb2d1
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,37 @@
+use crate::backend::Backend;
+
+/// Crate result alias.
+pub type Result = std::result::Result;
+
+/// Errors returned from [`crate::VideoDecoder`].
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ /// An underlying FFmpeg error.
+ #[error("ffmpeg error: {0}")]
+ Ffmpeg(#[from] ffmpeg_next::Error),
+
+ /// `avcodec_find_decoder` returned null for the input codec id.
+ #[error("no decoder for codec id {0:?}")]
+ NoCodec(ffmpeg_next::codec::Id),
+
+ /// The codec does not advertise a hardware configuration matching the
+ /// requested backend (via `avcodec_get_hw_config`).
+ #[error("codec does not support backend {0:?}")]
+ BackendUnsupportedByCodec(Backend),
+
+ /// `av_hwdevice_ctx_create` failed for the requested backend.
+ #[error("hardware device init failed for {backend:?}: {source}")]
+ HwDeviceInitFailed {
+ /// Backend that failed to initialise.
+ backend: Backend,
+ /// Underlying FFmpeg error.
+ source: ffmpeg_next::Error,
+ },
+
+ /// Auto-probe exhausted every backend in the platform's order.
+ #[error("all backends failed; attempts: {attempts:?}")]
+ AllBackendsFailed {
+ /// Per-backend errors collected during probing, in the order tried.
+ attempts: Vec<(Backend, Box)>,
+ },
+}
diff --git a/src/ffi.rs b/src/ffi.rs
new file mode 100644
index 0000000..6020079
--- /dev/null
+++ b/src/ffi.rs
@@ -0,0 +1,70 @@
+//! FFI shims used by the decoder. Kept in one place so the unsafe surface is
+//! easy to audit.
+
+use ffmpeg_next::ffi::{
+ avcodec_get_hw_config, AVCodec, AVCodecContext, AVHWDeviceType, AVPixelFormat,
+ AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX,
+};
+
+/// State pointed to by `AVCodecContext::opaque` so [`get_hw_format`] can pick
+/// the correct hardware pixel format without globals. One instance per
+/// decoder; freed in [`crate::VideoDecoder::drop`].
+#[repr(C)]
+pub(crate) struct CallbackState {
+ pub(crate) wanted: AVPixelFormat,
+}
+
+/// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of
+/// pixel formats the codec is willing to output for the current stream. We
+/// pick the hardware format we wired up at open time, or [`AVPixelFormat::AV_PIX_FMT_NONE`]
+/// to signal "no usable format" (which causes FFmpeg to error out — the caller
+/// then sees a normal `ffmpeg::Error` and probes the next backend).
+pub(crate) unsafe extern "C" fn get_hw_format(
+ ctx: *mut AVCodecContext,
+ mut pix_fmts: *const AVPixelFormat,
+) -> AVPixelFormat {
+ debug_assert!(!ctx.is_null());
+ debug_assert!(!pix_fmts.is_null());
+
+ // SAFETY: opaque was set by `try_open` to a valid `Box`
+ // pointer that outlives the codec context (we only free it after the
+ // codec context's drop runs).
+ let state = unsafe { (*ctx).opaque as *const CallbackState };
+ if state.is_null() {
+ return AVPixelFormat::AV_PIX_FMT_NONE;
+ }
+ let wanted = unsafe { (*state).wanted };
+
+ // Walk the offered list looking for our format.
+ while unsafe { *pix_fmts } != AVPixelFormat::AV_PIX_FMT_NONE {
+ if unsafe { *pix_fmts } == wanted {
+ return wanted;
+ }
+ pix_fmts = unsafe { pix_fmts.add(1) };
+ }
+ AVPixelFormat::AV_PIX_FMT_NONE
+}
+
+/// Walk the codec's `AVCodecHWConfig` table and return the hardware pixel
+/// format associated with `device_type`, if the codec advertises one that
+/// uses the `HW_DEVICE_CTX` setup method.
+pub(crate) fn find_hw_pix_fmt(
+ codec: *const AVCodec,
+ device_type: AVHWDeviceType,
+) -> Option {
+ debug_assert!(!codec.is_null());
+ let mut i = 0;
+ loop {
+ // SAFETY: `avcodec_get_hw_config` returns null past the end; we stop then.
+ let cfg = unsafe { avcodec_get_hw_config(codec, i) };
+ if cfg.is_null() {
+ return None;
+ }
+ let cfg = unsafe { *cfg };
+ let supports_device_ctx = cfg.methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0;
+ if supports_device_ctx && cfg.device_type == device_type {
+ return Some(cfg.pix_fmt);
+ }
+ i += 1;
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 0a58390..7d9c7bd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,11 +1,25 @@
-//! A template for creating Rust open-source repo on GitHub
-#![cfg_attr(not(feature = "std"), no_std)]
+//! Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next`.
+//!
+//! [`VideoDecoder`] mirrors the surface of `ffmpeg::decoder::Video`
+//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and silently picks the best
+//! hardware backend for the host platform, falling back to software if none open.
+//!
+//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side. For
+//! hardware backends they are downloaded with `av_hwframe_transfer_data` (NV12
+//! for 8-bit input, P010 for 10-bit). For software backends the frame is in the
+//! codec's native format.
+//!
+//! Pixel-format conversion is intentionally out of scope; downstream code is
+//! expected to handle that (e.g. via `colconv`).
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(docsrs, allow(unused_attributes))]
#![deny(missing_docs)]
-#[cfg(all(not(feature = "std"), feature = "alloc"))]
-extern crate alloc as std;
+mod backend;
+mod decoder;
+mod error;
+mod ffi;
-#[cfg(feature = "std")]
-extern crate std;
+pub use backend::Backend;
+pub use decoder::VideoDecoder;
+pub use error::{Error, Result};
diff --git a/tests/decode.rs b/tests/decode.rs
new file mode 100644
index 0000000..a936ae3
--- /dev/null
+++ b/tests/decode.rs
@@ -0,0 +1,68 @@
+//! Integration test: open the auto-probed decoder against a real video file
+//! and decode the first 30 frames. Skipped (with a clear message) when no
+//! sample is configured.
+//!
+//! Set `HWDECODE_SAMPLE_VIDEO` to an absolute path to enable.
+
+use ffmpeg::{format, frame, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::VideoDecoder;
+
+const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
+
+#[test]
+fn auto_open_decodes_at_least_one_frame() {
+ let Some(path) = std::env::var_os(SAMPLE_ENV) else {
+ eprintln!("skipping: set {SAMPLE_ENV} to a video file path to run this test");
+ return;
+ };
+
+ ffmpeg::init().expect("ffmpeg init");
+
+ let mut input = format::input(&path).expect("open input");
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream");
+ let stream_index = stream.index();
+ let expected_w = unsafe { (*stream.parameters().as_ptr()).width as u32 };
+ let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 };
+
+ let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder");
+ eprintln!("backend = {:?}", decoder.backend());
+
+ assert_eq!(decoder.width(), expected_w);
+ assert_eq!(decoder.height(), expected_h);
+
+ let mut frame = frame::Video::empty();
+ let mut count = 0_usize;
+ let target = 30_usize;
+
+ 'outer: for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet).expect("send packet");
+ loop {
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => {
+ assert_eq!(frame.width(), expected_w);
+ assert_eq!(frame.height(), expected_h);
+ count += 1;
+ if count >= target {
+ break 'outer;
+ }
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ break;
+ }
+ Err(e) => panic!("receive_frame: {e}"),
+ }
+ }
+ }
+
+ assert!(count >= 1, "expected at least 1 decoded frame, got {count}");
+ eprintln!("decoded {count} frames");
+}
diff --git a/tests/foo.rs b/tests/foo.rs
deleted file mode 100644
index 8b13789..0000000
--- a/tests/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs
new file mode 100644
index 0000000..5aa37c9
--- /dev/null
+++ b/tests/hw_smoke.rs
@@ -0,0 +1,64 @@
+//! `#[ignore]`-gated smoke test that asserts the auto-probed backend is
+//! actually a hardware backend (not Software). Run with:
+//!
+//! ```sh
+//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored
+//! ```
+
+use ffmpeg::{format, frame, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::{Backend, VideoDecoder};
+
+const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
+
+#[test]
+#[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"]
+fn auto_probe_picks_hardware_backend() {
+ let path = std::env::var_os(SAMPLE_ENV).unwrap_or_else(|| panic!("{SAMPLE_ENV} not set"));
+
+ ffmpeg::init().expect("ffmpeg init");
+
+ let mut input = format::input(&path).expect("open input");
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream");
+ let stream_index = stream.index();
+
+ let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder");
+ eprintln!("auto-probe selected backend = {:?}", decoder.backend());
+ assert_ne!(
+ decoder.backend(),
+ Backend::Software,
+ "expected hardware backend; got Software"
+ );
+
+ // Verify we can actually decode at least one HW frame end-to-end.
+ let mut frame = frame::Video::empty();
+ let mut got_frame = false;
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet).expect("send packet");
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => {
+ got_frame = true;
+ eprintln!(
+ "first hw frame: {}x{} fmt={:?}",
+ frame.width(),
+ frame.height(),
+ frame.format()
+ );
+ break;
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ continue;
+ }
+ Err(e) => panic!("receive_frame: {e}"),
+ }
+ }
+ assert!(got_frame, "no frames decoded");
+}
From bfd9b525cba209ea1a8ec29cb79d1e3fffa8e628 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 15:56:13 +1200
Subject: [PATCH 02/27] update
---
benches/decode.rs | 32 +++-
src/decoder.rs | 420 +++++++++++++++++++++++++++++++++++-----------
src/ffi.rs | 112 +++++++++++--
tests/hw_smoke.rs | 20 ++-
4 files changed, 455 insertions(+), 129 deletions(-)
diff --git a/benches/decode.rs b/benches/decode.rs
index be7281d..2433de9 100644
--- a/benches/decode.rs
+++ b/benches/decode.rs
@@ -74,19 +74,37 @@ fn bench_decode(c: &mut Criterion) {
return;
};
- // Probe backend once to print which HW backend (if any) we'd be benching.
+ // Probe by decoding one frame so the probe collapses to the backend that
+ // actually produced output. Reading `backend()` before the first frame
+ // would observe the optimistically-selected value and mislabel HW runs
+ // that silently degraded.
let probed_backend = {
- let input = format::input(&path).expect("open input");
+ let mut input = format::input(&path).expect("open input");
let stream = input
.streams()
.best(media::Type::Video)
.expect("video stream");
- let dec = VideoDecoder::open(stream.parameters()).expect("auto-probe");
- let b = dec.backend();
- drop(dec);
- b
+ let stream_index = stream.index();
+ let mut dec = VideoDecoder::open(stream.parameters()).expect("auto-probe");
+ let mut frame = frame::Video::empty();
+ 'probe: for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ dec.send_packet(&packet).expect("probe send_packet");
+ match dec.receive_frame(&mut frame) {
+ Ok(()) => break 'probe,
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ continue;
+ }
+ Err(e) => panic!("probe receive_frame: {e}"),
+ }
+ }
+ dec.backend()
};
- eprintln!("auto-probe selected backend: {probed_backend:?}");
+ eprintln!("auto-probe settled on backend: {probed_backend:?}");
let mut group = c.benchmark_group("decode");
group.measurement_time(Duration::from_secs(15));
diff --git a/src/decoder.rs b/src/decoder.rs
index 7a3a357..76fe3e4 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -3,8 +3,8 @@ use std::{mem::ManuallyDrop, ptr};
use ffmpeg_next::{
codec::{self, Context},
ffi::{
- av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_hwdevice_ctx_create,
- av_hwframe_transfer_data,
+ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
+ av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVPixelFormat,
},
format::Pixel,
frame, Codec, Packet, Rational,
@@ -19,45 +19,128 @@ use crate::{
/// Hardware-accelerated video decoder with software fallback.
///
/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface.
-/// Frames returned by [`Self::receive_frame`] are always CPU-side; for hardware
-/// backends they are downloaded with `av_hwframe_transfer_data` (NV12 / P010).
+/// Frames returned by [`Self::receive_frame`] are always CPU-side; for the
+/// hardware path they are downloaded with `av_hwframe_transfer_data` (NV12 /
+/// P010).
+///
+/// `open` does a true probe: each backend opens with a strict `get_format`
+/// callback, and on the first non-transient error the decoder is torn down
+/// and the next backend is tried with all packets seen so far replayed
+/// through it. Once the first frame is successfully received the probe
+/// collapses and subsequent calls go straight to the active backend.
pub struct VideoDecoder {
+ /// Live FFmpeg state for the currently active backend.
+ state: DecoderState,
+ /// Reusable frame buffer used for hw-side decoding before transfer / move.
+ hw_frame: frame::Video,
+ /// Probe state: present until the first frame is received from the active
+ /// backend, then `None`. While `Some`, packets are buffered for replay and
+ /// non-transient errors / decoder failures advance to the next backend.
+ probe: Option,
+}
+
+/// Owned FFmpeg state for one open codec context. Has its own `Drop` so we
+/// can swap it out cleanly during a probe advance via `mem::replace`.
+struct DecoderState {
/// Wrapped FFmpeg decoder. `ManuallyDrop` so we can sequence its drop
- /// before freeing the callback state in our [`Drop`] impl.
+ /// before freeing the callback state.
inner: ManuallyDrop,
+ /// Backend driving this state.
backend: Backend,
/// Owned reference produced by `av_hwdevice_ctx_create`. Null for software.
- hw_device_ref: *mut ffmpeg_next::ffi::AVBufferRef,
+ hw_device_ref: *mut AVBufferRef,
/// Owned `Box` raw pointer; `AVCodecContext::opaque` aliases
/// it. Null for software.
callback_state: *mut CallbackState,
- /// Reusable frame buffer used for hw-side decoding before transfer.
- /// Unused on the software path (`receive_frame` writes the caller's frame
- /// directly).
- hw_frame: frame::Video,
+ /// Hardware pixel format we asked the decoder to produce. Compared (as
+ /// `i32` to avoid enum-discriminant UB) against each received frame's
+ /// format. `AV_PIX_FMT_NONE` for the software path.
+ hw_pix_fmt: AVPixelFormat,
+}
+
+/// State carried only during the probe window (before the first successful
+/// frame). Holds enough information to tear down the current decoder and
+/// retry with the next backend.
+struct ProbeState {
+ parameters: codec::Parameters,
+ codec: Codec,
+ /// Backends still to try, in order. Empty means "no more options after
+ /// the active one fails".
+ remaining_backends: Vec,
+ /// Packets sent so far, kept for replay through the next backend.
+ buffered_packets: Vec,
+ /// Whether `send_eof` has been called; replayed alongside packets.
+ eof_sent: bool,
}
-// SAFETY: All raw pointers are exclusively owned by the struct and never
-// shared. `ffmpeg::decoder::Video` itself is Send (its `Context` is `unsafe
-// impl Send`). The decoder is not safe for concurrent use, hence not `Sync`.
+// SAFETY: All raw pointers are exclusively owned by `DecoderState` and never
+// shared. `ffmpeg::decoder::Video` is itself `Send` (its `Context` carries an
+// `unsafe impl Send`). The decoder is not safe for concurrent use, hence not
+// `Sync`.
+unsafe impl Send for DecoderState {}
unsafe impl Send for VideoDecoder {}
+impl Drop for DecoderState {
+ fn drop(&mut self) {
+ // Order matters:
+ // 1. Drop the codec context first. While it lives, FFmpeg may invoke
+ // `get_format`, which dereferences `callback_state` via `opaque`.
+ // 2. Free the callback state heap allocation.
+ // 3. Release our hw device reference (FFmpeg released its own when
+ // the codec context was freed in step 1).
+ unsafe {
+ ManuallyDrop::drop(&mut self.inner);
+ if !self.callback_state.is_null() {
+ drop(Box::from_raw(self.callback_state));
+ self.callback_state = ptr::null_mut();
+ }
+ if !self.hw_device_ref.is_null() {
+ av_buffer_unref(&mut self.hw_device_ref);
+ }
+ }
+ }
+}
+
impl VideoDecoder {
- /// Auto-probe hardware backends in the platform's default order, falling
- /// back to software. The chosen backend is reported by [`Self::backend`].
+ /// Auto-probe hardware backends in the platform's default order.
+ ///
+ /// Each backend opens with a strict `get_format` callback. The first
+ /// backend whose `avcodec_open2` succeeds becomes active; if the first
+ /// frame from it fails (e.g. `get_format` returns `NONE` because the
+ /// backend can't handle this stream's profile/depth), the decoder is torn
+ /// down and the next backend is tried — packets sent so far are replayed
+ /// through the new decoder, transparently to the caller.
+ ///
+ /// [`Self::backend`] reflects whichever backend ultimately produced the
+ /// first frame. Software is the last entry in every probe order, so
+ /// `open` cannot return without a working decoder for codecs that
+ /// libavcodec supports at all.
pub fn open(parameters: codec::Parameters) -> Result {
let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
+ let order = backend::probe_order();
- let mut attempts = Vec::new();
- for &backend in backend::probe_order() {
- match Self::try_open(parameters.clone(), codec, backend) {
- Ok(decoder) => {
- tracing::info!(?backend, "hwdecode: opened video decoder");
- return Ok(decoder);
+ let mut attempts: Vec<(Backend, Box)> = Vec::new();
+ for (i, &backend) in order.iter().enumerate() {
+ match Self::build_state(parameters.clone(), codec, backend) {
+ Ok(state) => {
+ tracing::info!(?backend, "hwdecode: opened video decoder (probing)");
+ let remaining = order[(i + 1)..].to_vec();
+ let probe = (!remaining.is_empty()).then(|| ProbeState {
+ parameters,
+ codec,
+ remaining_backends: remaining,
+ buffered_packets: Vec::new(),
+ eof_sent: false,
+ });
+ return Ok(Self {
+ state,
+ hw_frame: frame::Video::empty(),
+ probe,
+ });
}
Err(e) => {
- tracing::warn!(?backend, error = %e, "hwdecode: backend probe failed");
+ tracing::warn!(?backend, error = %e, "hwdecode: backend open failed");
attempts.push((backend, Box::new(e)));
}
}
@@ -66,113 +149,260 @@ impl VideoDecoder {
}
/// Open the decoder with a specific backend. No probe, no fallback.
- /// Returns an error if `backend` is not supported by the codec or fails to
- /// initialise.
+ ///
+ /// If `backend` is a hardware backend that the codec can't actually use
+ /// for this stream, the failure surfaces from
+ /// [`Self::receive_frame`] (the strict `get_format` callback returns
+ /// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible
+ /// for retrying with `Backend::Software` or another backend if desired.
pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result {
let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
- Self::try_open(parameters, codec, backend)
+ let state = Self::build_state(parameters, codec, backend)?;
+ Ok(Self {
+ state,
+ hw_frame: frame::Video::empty(),
+ probe: None,
+ })
}
- /// The backend that opened this decoder.
+ /// The backend currently producing frames. While the probe is still in
+ /// progress (no frame received yet) this returns the optimistically
+ /// selected backend; after the first frame, it is the backend that
+ /// actually produced it. Once stable, never changes again.
pub fn backend(&self) -> Backend {
- self.backend
+ self.state.backend
}
/// Decoder width in pixels.
pub fn width(&self) -> u32 {
- self.inner.width()
+ self.state.inner.width()
}
/// Decoder height in pixels.
pub fn height(&self) -> u32 {
- self.inner.height()
+ self.state.inner.height()
}
- /// Current pixel format of the codec context. For HW backends this is the
+ /// Codec context's current pixel format. For HW backends this is the
/// hardware pixel format (e.g. `Pixel::VIDEOTOOLBOX`) once the first frame
- /// has been negotiated; the format of frames returned from
- /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010) and
- /// must be read from the frame itself.
+ /// has been negotiated; the caller-facing format produced by
+ /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010 for
+ /// HW, codec-native for SW) and must be read from the frame itself.
pub fn format(&self) -> Pixel {
- self.inner.format()
+ self.state.inner.format()
}
/// Codec context time base.
pub fn time_base(&self) -> Rational {
- self.inner.time_base()
+ self.state.inner.time_base()
}
/// Frame rate from the codec context, if known.
pub fn frame_rate(&self) -> Option {
- self.inner.frame_rate()
+ self.state.inner.frame_rate()
}
- /// Submit a packet to the decoder.
+ /// Submit a packet to the decoder. While the probe is active the packet is
+ /// also buffered for potential replay through a fallback backend.
pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
- self.inner.send_packet(packet).map_err(Error::Ffmpeg)
+ if let Some(probe) = self.probe.as_mut() {
+ probe.buffered_packets.push(packet.clone());
+ }
+ self.state.inner.send_packet(packet).map_err(Error::Ffmpeg)
}
/// Signal end-of-stream to the decoder; remaining frames can be drained
- /// with [`Self::receive_frame`].
+ /// with [`Self::receive_frame`]. Recorded for replay if probe is active.
pub fn send_eof(&mut self) -> Result<()> {
- self.inner.send_eof().map_err(Error::Ffmpeg)
+ if let Some(probe) = self.probe.as_mut() {
+ probe.eof_sent = true;
+ }
+ self.state.inner.send_eof().map_err(Error::Ffmpeg)
}
/// Receive a CPU-side decoded frame.
///
- /// For hardware backends the frame is transferred from GPU memory via
+ /// On the hardware path the frame is transferred from GPU memory via
/// `av_hwframe_transfer_data` and frame metadata (pts, time_base, side
- /// data, ...) is copied with `av_frame_copy_props`. For the software
- /// backend this is a direct passthrough.
+ /// data, ...) is copied with `av_frame_copy_props`. The caller's frame is
+ /// always unref'd first so reuse across resolution changes or different
+ /// decoders is safe (mirrors `avcodec_receive_frame`'s own contract).
+ ///
+ /// While the probe window is open and the active backend produces a
+ /// non-transient error or a software-format frame instead of the
+ /// configured hardware format, the decoder is torn down and the next
+ /// backend in probe order is tried with all buffered packets replayed.
+ /// The caller observes only the eventual successful frame (or, if every
+ /// backend has been exhausted, the underlying error).
///
- /// Returns the same errors as `ffmpeg::decoder::Video::receive_frame`,
- /// e.g. `Error::Other { errno: EAGAIN }` when no frame is ready.
+ /// Returns the same transient signals as `ffmpeg::decoder::Video`:
+ /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and
+ /// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained.
pub fn receive_frame(&mut self, frame: &mut frame::Video) -> Result<()> {
- if self.backend == Backend::Software {
- return self.inner.receive_frame(frame).map_err(Error::Ffmpeg);
- }
+ loop {
+ let res = self.state.inner.receive_frame(&mut self.hw_frame);
+ match res {
+ Err(e) => {
+ if is_transient(&e) {
+ return Err(Error::Ffmpeg(e));
+ }
+ if self.probe.is_some() && self.advance_probe()? {
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ Ok(()) => {
+ // Compare format as i32 to avoid constructing an AVPixelFormat
+ // enum from an unvalidated integer. Library/header skew or a new
+ // hardware format would otherwise be UB.
+ let received_fmt: i32 = unsafe { (*self.hw_frame.as_ptr()).format };
- // HW path: receive into our reusable hw_frame, then transfer.
- self
- .inner
- .receive_frame(&mut self.hw_frame)
- .map_err(Error::Ffmpeg)?;
+ if self.state.backend == Backend::Software {
+ // Pure SW path: just hand over the frame.
+ unsafe {
+ av_frame_unref(frame.as_mut_ptr());
+ av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr());
+ }
+ self.probe = None;
+ return Ok(());
+ }
- // SAFETY: both frames are valid AVFrame pointers owned by us. transfer
- // allocates buffers on `frame` as needed; copy_props moves timing and
- // side data over (transfer_data does not).
- unsafe {
- let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0);
- if ret < 0 {
- return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
- }
- let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr());
- if ret < 0 {
- return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ if received_fmt == self.state.hw_pix_fmt as i32 {
+ // True HW frame: download to CPU and copy timing/side data.
+ unsafe {
+ av_frame_unref(frame.as_mut_ptr());
+ let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0);
+ if ret < 0 {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ }
+ let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr());
+ if ret < 0 {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ }
+ }
+ self.probe = None;
+ return Ok(());
+ }
+
+ // The decoder produced a CPU frame from a HW-opened context. With
+ // strict `get_format` this is unusual (the codec would normally
+ // error on get_format=NONE). If it does happen and we still have
+ // backends to try, treat it as a probe failure and advance.
+ if self.probe.is_some() && self.advance_probe()? {
+ continue;
+ }
+ // No fallback left; accept the SW frame and update the active
+ // backend so `backend()` reflects reality.
+ unsafe {
+ av_frame_unref(frame.as_mut_ptr());
+ av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr());
+ }
+ self.state.backend = Backend::Software;
+ self.probe = None;
+ return Ok(());
+ }
}
}
- Ok(())
}
- /// Flush internal buffers (e.g. after a seek).
+ /// Flush internal buffers (e.g. after a seek). Resets probe-time buffer if
+ /// active, since post-seek packets do not align with replayed history.
pub fn flush(&mut self) {
- self.inner.flush();
+ self.state.inner.flush();
+ if let Some(probe) = self.probe.as_mut() {
+ probe.buffered_packets.clear();
+ probe.eof_sent = false;
+ }
+ }
+
+ /// Tear down the active decoder and bring up the next backend in
+ /// `remaining_backends`, replaying buffered packets. Returns `true` if a
+ /// new backend was successfully installed (caller should retry the
+ /// receive); `false` if the probe is exhausted.
+ fn advance_probe(&mut self) -> Result {
+ let next_backend = match self.probe.as_mut() {
+ Some(probe) if !probe.remaining_backends.is_empty() => probe.remaining_backends.remove(0),
+ _ => return Ok(false),
+ };
+ let prev_backend = self.state.backend;
+ tracing::warn!(
+ from = ?prev_backend,
+ to = ?next_backend,
+ "hwdecode: backend rejected stream, advancing probe"
+ );
+
+ // Snapshot probe inputs before mutating self.
+ let (parameters, codec, buffered_packets, eof_sent) = {
+ let probe = self.probe.as_mut().expect("probe state");
+ (
+ probe.parameters.clone(),
+ probe.codec,
+ std::mem::take(&mut probe.buffered_packets),
+ probe.eof_sent,
+ )
+ };
+
+ // Build the new state. If this open fails, we fall through to advancing
+ // again — which is what the caller's loop will do once it sees the next
+ // probe iteration also fail. To keep semantics simple, propagate the
+ // open error directly: the caller's loop will see it as the decode
+ // error and return upward; in practice probe order ends in Software
+ // which always opens.
+ let new_state = Self::build_state(parameters, codec, next_backend)?;
+
+ // Replace state. The old DecoderState's Drop runs here, in order:
+ // codec context first, then callback_state box, then hw_device_ref.
+ self.state = new_state;
+
+ // hw_frame may hold residual data from the old decoder. Clear it so
+ // the next receive starts clean.
+ unsafe {
+ av_frame_unref(self.hw_frame.as_mut_ptr());
+ }
+
+ // Replay buffered packets and (if previously sent) EOF through the new
+ // decoder. We re-buffer them on the way through so a subsequent probe
+ // advance still has the full history.
+ let probe = self.probe.as_mut().expect("probe still present");
+ probe.buffered_packets.clear();
+ probe.eof_sent = false;
+
+ for pkt in buffered_packets {
+ // Mirror `send_packet`'s buffering behaviour.
+ probe.buffered_packets.push(pkt.clone());
+ self.state.inner.send_packet(&pkt).map_err(Error::Ffmpeg)?;
+ }
+ if eof_sent {
+ self.probe.as_mut().expect("probe still present").eof_sent = true;
+ self.state.inner.send_eof().map_err(Error::Ffmpeg)?;
+ }
+
+ Ok(true)
}
- /// Inner open: tries one backend exactly, no probing.
- fn try_open(parameters: codec::Parameters, codec: Codec, backend: Backend) -> Result {
+ /// Build raw FFmpeg state for one backend. Strict `get_format` (NONE on
+ /// missing HW format); cross-backend fallback is the caller's job.
+ fn build_state(
+ parameters: codec::Parameters,
+ codec: Codec,
+ backend: Backend,
+ ) -> Result {
let mut ctx = Context::from_parameters(parameters)?;
- let (hw_device_ref, callback_state) = match backend.av_hwdevice_type() {
- None => (ptr::null_mut(), ptr::null_mut()),
+ let (hw_device_ref, callback_state, hw_pix_fmt) = match backend.av_hwdevice_type() {
+ None => (
+ ptr::null_mut(),
+ ptr::null_mut(),
+ AVPixelFormat::AV_PIX_FMT_NONE,
+ ),
Some(av_type) => {
// Verify the codec advertises this hwaccel.
let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type)
.ok_or(Error::BackendUnsupportedByCodec(backend))?;
// Create the device context.
- let mut hw_device_ref = ptr::null_mut();
+ let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut();
// SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill.
let ret = unsafe {
av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0)
@@ -184,8 +414,6 @@ impl VideoDecoder {
});
}
- // Wire up the codec context: a fresh ref for FFmpeg, a heap
- // pointer for the get_format callback to read.
let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt }));
// SAFETY: ctx is a freshly-constructed AVCodecContext we own;
// av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
@@ -196,7 +424,7 @@ impl VideoDecoder {
(*raw).opaque = callback_state.cast();
(*raw).get_format = Some(get_hw_format);
}
- (hw_device_ref, callback_state)
+ (hw_device_ref, callback_state, hw_pix_fmt)
}
};
@@ -221,34 +449,21 @@ impl VideoDecoder {
}
};
- Ok(Self {
+ Ok(DecoderState {
inner: ManuallyDrop::new(opened),
backend,
hw_device_ref,
callback_state,
- hw_frame: frame::Video::empty(),
+ hw_pix_fmt,
})
}
}
-impl Drop for VideoDecoder {
- fn drop(&mut self) {
- // Order matters:
- // 1. Drop the codec context first. While it lives, FFmpeg may invoke
- // `get_format`, which dereferences `callback_state` via `opaque`.
- // 2. Free the callback state heap allocation.
- // 3. Release our hw device reference (FFmpeg released its own when
- // the codec context was freed in step 1).
- unsafe {
- ManuallyDrop::drop(&mut self.inner);
- if !self.callback_state.is_null() {
- drop(Box::from_raw(self.callback_state));
- }
- if !self.hw_device_ref.is_null() {
- av_buffer_unref(&mut self.hw_device_ref);
- }
- }
- }
+/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame`
+/// and must not be treated as backend failures.
+fn is_transient(e: &ffmpeg_next::Error) -> bool {
+ matches!(e, ffmpeg_next::Error::Other { errno } if *errno == ffmpeg_next::error::EAGAIN)
+ || matches!(e, ffmpeg_next::Error::Eof)
}
#[allow(dead_code)]
@@ -263,10 +478,6 @@ mod tests {
#[test]
fn no_codec_for_unknown_id() {
- // Build a Parameters with an unknown id — easiest path is to allocate
- // empty parameters and inspect; here we just confirm Error::NoCodec
- // formats sensibly. (Open behavior is exercised by integration tests
- // because it requires real stream params.)
let err = Error::NoCodec(codec::Id::None);
assert!(format!("{err}").contains("no decoder"));
}
@@ -275,4 +486,15 @@ mod tests {
fn videodecoder_is_send() {
_assert_send();
}
+
+ #[test]
+ fn is_transient_recognises_eagain_and_eof() {
+ let eagain = ffmpeg_next::Error::Other {
+ errno: ffmpeg_next::error::EAGAIN,
+ };
+ assert!(is_transient(&eagain));
+ assert!(is_transient(&ffmpeg_next::Error::Eof));
+ let other = ffmpeg_next::Error::InvalidData;
+ assert!(!is_transient(&other));
+ }
}
diff --git a/src/ffi.rs b/src/ffi.rs
index 6020079..78ee80c 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -8,39 +8,48 @@ use ffmpeg_next::ffi::{
/// State pointed to by `AVCodecContext::opaque` so [`get_hw_format`] can pick
/// the correct hardware pixel format without globals. One instance per
-/// decoder; freed in [`crate::VideoDecoder::drop`].
+/// decoder; freed by [`crate::VideoDecoder`] after the codec context is
+/// dropped.
#[repr(C)]
pub(crate) struct CallbackState {
+ /// Hardware pixel format we want the decoder to produce.
pub(crate) wanted: AVPixelFormat,
}
/// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of
-/// pixel formats the codec is willing to output for the current stream. We
-/// pick the hardware format we wired up at open time, or [`AVPixelFormat::AV_PIX_FMT_NONE`]
-/// to signal "no usable format" (which causes FFmpeg to error out — the caller
-/// then sees a normal `ffmpeg::Error` and probes the next backend).
+/// pixel formats the codec is willing to output for the current stream.
+///
+/// Returns the configured hardware format if present; otherwise
+/// [`AVPixelFormat::AV_PIX_FMT_NONE`], which causes the decoder to fail. The
+/// failure surfaces as a normal `Error::Ffmpeg` from
+/// [`crate::VideoDecoder::receive_frame`]; for `VideoDecoder::open` callers
+/// the probe loop tears down and retries with the next backend (replaying
+/// buffered packets), so software fallback happens at the decoder level
+/// rather than silently in-context.
pub(crate) unsafe extern "C" fn get_hw_format(
ctx: *mut AVCodecContext,
- mut pix_fmts: *const AVPixelFormat,
+ pix_fmts: *const AVPixelFormat,
) -> AVPixelFormat {
debug_assert!(!ctx.is_null());
debug_assert!(!pix_fmts.is_null());
// SAFETY: opaque was set by `try_open` to a valid `Box`
// pointer that outlives the codec context (we only free it after the
- // codec context's drop runs).
+ // codec context's drop runs). When opaque is null we treat the call as
+ // strict — a stray invocation cannot silently downgrade.
let state = unsafe { (*ctx).opaque as *const CallbackState };
- if state.is_null() {
- return AVPixelFormat::AV_PIX_FMT_NONE;
- }
- let wanted = unsafe { (*state).wanted };
+ let wanted = if state.is_null() {
+ AVPixelFormat::AV_PIX_FMT_NONE
+ } else {
+ unsafe { (*state).wanted }
+ };
- // Walk the offered list looking for our format.
- while unsafe { *pix_fmts } != AVPixelFormat::AV_PIX_FMT_NONE {
- if unsafe { *pix_fmts } == wanted {
+ let mut p = pix_fmts;
+ while unsafe { *p } != AVPixelFormat::AV_PIX_FMT_NONE {
+ if unsafe { *p } == wanted {
return wanted;
}
- pix_fmts = unsafe { pix_fmts.add(1) };
+ p = unsafe { p.add(1) };
}
AVPixelFormat::AV_PIX_FMT_NONE
}
@@ -68,3 +77,76 @@ pub(crate) fn find_hw_pix_fmt(
i += 1;
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::ptr;
+
+ // The callback derefs `(*ctx).opaque`, so we need a real-looking
+ // AVCodecContext. We construct a zeroed one (the callback only reads opaque).
+ struct FakeCtx(*mut AVCodecContext);
+ impl FakeCtx {
+ fn new(state: *mut CallbackState) -> Self {
+ let boxed: Box = unsafe { Box::new(std::mem::zeroed()) };
+ let raw = Box::into_raw(boxed);
+ unsafe { (*raw).opaque = state.cast() };
+ Self(raw)
+ }
+ }
+ impl Drop for FakeCtx {
+ fn drop(&mut self) {
+ unsafe { drop(Box::from_raw(self.0)) };
+ }
+ }
+
+ fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat {
+ offered.push(AVPixelFormat::AV_PIX_FMT_NONE);
+ let ctx = FakeCtx::new(state as *const _ as *mut _);
+ unsafe { get_hw_format(ctx.0, offered.as_ptr()) }
+ }
+
+ #[test]
+ fn returns_wanted_hw_format_when_offered() {
+ let state = CallbackState {
+ wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
+ };
+ let got = run(
+ &state,
+ vec![
+ AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
+ AVPixelFormat::AV_PIX_FMT_NV12,
+ ],
+ );
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
+ }
+
+ #[test]
+ fn returns_none_when_wanted_absent() {
+ let state = CallbackState {
+ wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
+ };
+ let got = run(
+ &state,
+ vec![
+ AVPixelFormat::AV_PIX_FMT_NV12,
+ AVPixelFormat::AV_PIX_FMT_YUV420P,
+ ],
+ );
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
+ }
+
+ #[test]
+ fn null_opaque_is_treated_as_strict() {
+ let boxed: Box = unsafe { Box::new(std::mem::zeroed()) };
+ let ctx_raw = Box::into_raw(boxed);
+ unsafe { (*ctx_raw).opaque = ptr::null_mut() };
+ let offered = [
+ AVPixelFormat::AV_PIX_FMT_NV12,
+ AVPixelFormat::AV_PIX_FMT_NONE,
+ ];
+ let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr()) };
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
+ unsafe { drop(Box::from_raw(ctx_raw)) };
+ }
+}
diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs
index 5aa37c9..3084faf 100644
--- a/tests/hw_smoke.rs
+++ b/tests/hw_smoke.rs
@@ -26,14 +26,12 @@ fn auto_probe_picks_hardware_backend() {
let stream_index = stream.index();
let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder");
- eprintln!("auto-probe selected backend = {:?}", decoder.backend());
- assert_ne!(
- decoder.backend(),
- Backend::Software,
- "expected hardware backend; got Software"
- );
+ eprintln!("auto-probe optimistic backend = {:?}", decoder.backend());
- // Verify we can actually decode at least one HW frame end-to-end.
+ // Decode at least one frame so the probe collapses, then check the
+ // backend that actually produced it. Checking `decoder.backend()` before
+ // any frame has been received would observe the optimistic pre-probe
+ // value and could false-pass when a HW backend silently degrades.
let mut frame = frame::Video::empty();
let mut got_frame = false;
for (s, packet) in input.packets() {
@@ -45,7 +43,8 @@ fn auto_probe_picks_hardware_backend() {
Ok(()) => {
got_frame = true;
eprintln!(
- "first hw frame: {}x{} fmt={:?}",
+ "first frame: backend={:?} {}x{} fmt={:?}",
+ decoder.backend(),
frame.width(),
frame.height(),
frame.format()
@@ -61,4 +60,9 @@ fn auto_probe_picks_hardware_backend() {
}
}
assert!(got_frame, "no frames decoded");
+ assert_ne!(
+ decoder.backend(),
+ Backend::Software,
+ "expected hardware backend after first frame; got Software"
+ );
}
From 9dc8542d5de68f57fd5e5f11184380802bfd12e6 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 16:23:16 +1200
Subject: [PATCH 03/27] update
---
benches/decode.rs | 8 +-
examples/decode.rs | 20 ++--
src/decoder.rs | 284 ++++++++++++++++++++++++++-------------------
src/frame.rs | 109 +++++++++++++++++
src/lib.rs | 2 +
tests/decode.rs | 10 +-
tests/hw_smoke.rs | 10 +-
7 files changed, 302 insertions(+), 141 deletions(-)
create mode 100644 src/frame.rs
diff --git a/benches/decode.rs b/benches/decode.rs
index 2433de9..82d0ba9 100644
--- a/benches/decode.rs
+++ b/benches/decode.rs
@@ -11,9 +11,9 @@
use std::{path::PathBuf, time::Duration};
use criterion::{criterion_group, criterion_main, Criterion};
-use ffmpeg::{format, frame, media};
+use ffmpeg::{format, media};
use ffmpeg_next as ffmpeg;
-use hwdecode::{Backend, VideoDecoder};
+use hwdecode::{Backend, Frame, VideoDecoder};
const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
@@ -36,7 +36,7 @@ fn decode_all(path: &PathBuf, backend: Backend) -> Result VideoDecoder::open(stream.parameters())?,
};
- let mut frame = frame::Video::empty();
+ let mut frame = Frame::empty();
let mut count = 0_usize;
let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> {
@@ -86,7 +86,7 @@ fn bench_decode(c: &mut Criterion) {
.expect("video stream");
let stream_index = stream.index();
let mut dec = VideoDecoder::open(stream.parameters()).expect("auto-probe");
- let mut frame = frame::Video::empty();
+ let mut frame = Frame::empty();
'probe: for (s, packet) in input.packets() {
if s.index() != stream_index {
continue;
diff --git a/examples/decode.rs b/examples/decode.rs
index fa28582..69763bf 100644
--- a/examples/decode.rs
+++ b/examples/decode.rs
@@ -4,9 +4,9 @@
//! cargo run --release --example decode -- /path/to/video.mp4
//! ```
-use ffmpeg::{format, frame, media};
+use ffmpeg::{format, media};
use ffmpeg_next as ffmpeg;
-use hwdecode::VideoDecoder;
+use hwdecode::{Frame, VideoDecoder};
fn main() -> Result<(), Box> {
let path = std::env::args()
@@ -24,26 +24,25 @@ fn main() -> Result<(), Box> {
let mut decoder = VideoDecoder::open(stream.parameters())?;
println!(
- "backend={:?} {}x{} codec_pix_fmt_initial={:?}",
+ "open: backend={:?} {}x{}",
decoder.backend(),
decoder.width(),
decoder.height(),
- decoder.format(),
);
- let mut frame = frame::Video::empty();
+ let mut frame = Frame::empty();
let mut count: u64 = 0;
- let drain = |decoder: &mut VideoDecoder, frame: &mut frame::Video, count: &mut u64| loop {
+ let drain = |decoder: &mut VideoDecoder, frame: &mut Frame, count: &mut u64| loop {
match decoder.receive_frame(frame) {
Ok(()) => {
*count += 1;
println!(
- "frame#{count} pts={:?} {}x{} fmt={:?}",
+ "frame#{count} pts={:?} {}x{} pix_fmt={}",
frame.pts(),
frame.width(),
frame.height(),
- frame.format(),
+ frame.pix_fmt(),
);
}
Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
@@ -69,6 +68,9 @@ fn main() -> Result<(), Box> {
decoder.send_eof()?;
drain(&mut decoder, &mut frame, &mut count);
- println!("decoded {count} frames");
+ println!(
+ "decoded {count} frames; final backend={:?}",
+ decoder.backend()
+ );
Ok(())
}
diff --git a/src/decoder.rs b/src/decoder.rs
index 76fe3e4..a776697 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -6,7 +6,6 @@ use ffmpeg_next::{
av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVPixelFormat,
},
- format::Pixel,
frame, Codec, Packet, Rational,
};
@@ -14,24 +13,30 @@ use crate::{
backend::{self, Backend},
error::{Error, Result},
ffi::{find_hw_pix_fmt, get_hw_format, CallbackState},
+ frame::Frame,
};
/// Hardware-accelerated video decoder with software fallback.
///
/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface.
-/// Frames returned by [`Self::receive_frame`] are always CPU-side; for the
-/// hardware path they are downloaded with `av_hwframe_transfer_data` (NV12 /
-/// P010).
+/// Decoded frames are returned through [`crate::Frame`], a CPU-side wrapper
+/// whose accessors avoid the `AVPixelFormat`-enum UB that an unvalidated read
+/// of FFmpeg's raw integer pixel formats can trigger.
///
/// `open` does a true probe: each backend opens with a strict `get_format`
-/// callback, and on the first non-transient error the decoder is torn down
-/// and the next backend is tried with all packets seen so far replayed
-/// through it. Once the first frame is successfully received the probe
-/// collapses and subsequent calls go straight to the active backend.
+/// callback. On the first non-transient error from a backend the decoder is
+/// torn down and the next backend in probe order is tried, with all packets
+/// seen so far replayed through it. The advance is *transactional* — the
+/// candidate backend must successfully build and accept the replayed packets
+/// before any probe state is consumed, so a failing backend in the middle of
+/// the order does not strand the caller without history. Once the first frame
+/// is delivered the probe collapses and subsequent calls go straight to the
+/// active backend.
pub struct VideoDecoder {
/// Live FFmpeg state for the currently active backend.
state: DecoderState,
/// Reusable frame buffer used for hw-side decoding before transfer / move.
+ /// Internal use only — never handed to callers.
hw_frame: frame::Video,
/// Probe state: present until the first frame is received from the active
/// backend, then `None`. While `Some`, packets are buffered for replay and
@@ -67,7 +72,9 @@ struct ProbeState {
/// Backends still to try, in order. Empty means "no more options after
/// the active one fails".
remaining_backends: Vec,
- /// Packets sent so far, kept for replay through the next backend.
+ /// Packets sent so far, kept for replay through any candidate backend.
+ /// Preserved across failed candidates — only cleared when the probe
+ /// collapses on a successful first frame.
buffered_packets: Vec,
/// Whether `send_eof` has been called; replayed alongside packets.
eof_sent: bool,
@@ -105,16 +112,18 @@ impl VideoDecoder {
/// Auto-probe hardware backends in the platform's default order.
///
/// Each backend opens with a strict `get_format` callback. The first
- /// backend whose `avcodec_open2` succeeds becomes active; if the first
- /// frame from it fails (e.g. `get_format` returns `NONE` because the
- /// backend can't handle this stream's profile/depth), the decoder is torn
- /// down and the next backend is tried — packets sent so far are replayed
- /// through the new decoder, transparently to the caller.
+ /// backend whose `avcodec_open2` succeeds becomes active; if its first
+ /// frame is unusable (decode error, transfer failure, or a CPU-format
+ /// frame from a HW context) the decoder is torn down and the next backend
+ /// is tried — packets sent so far are replayed through the new decoder
+ /// transparently. The probe advance is transactional: the next backend
+ /// must build *and* accept the replayed history before any probe state is
+ /// consumed, so a misbehaving middle backend cannot strand the caller.
///
/// [`Self::backend`] reflects whichever backend ultimately produced the
/// first frame. Software is the last entry in every probe order, so
- /// `open` cannot return without a working decoder for codecs that
- /// libavcodec supports at all.
+ /// `open` cannot return without a working decoder for any codec libavcodec
+ /// supports.
pub fn open(parameters: codec::Parameters) -> Result {
let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
@@ -184,15 +193,6 @@ impl VideoDecoder {
self.state.inner.height()
}
- /// Codec context's current pixel format. For HW backends this is the
- /// hardware pixel format (e.g. `Pixel::VIDEOTOOLBOX`) once the first frame
- /// has been negotiated; the caller-facing format produced by
- /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010 for
- /// HW, codec-native for SW) and must be read from the frame itself.
- pub fn format(&self) -> Pixel {
- self.state.inner.format()
- }
-
/// Codec context time base.
pub fn time_base(&self) -> Rational {
self.state.inner.time_base()
@@ -223,23 +223,22 @@ impl VideoDecoder {
/// Receive a CPU-side decoded frame.
///
- /// On the hardware path the frame is transferred from GPU memory via
- /// `av_hwframe_transfer_data` and frame metadata (pts, time_base, side
- /// data, ...) is copied with `av_frame_copy_props`. The caller's frame is
- /// always unref'd first so reuse across resolution changes or different
- /// decoders is safe (mirrors `avcodec_receive_frame`'s own contract).
+ /// On the hardware path the frame is downloaded with
+ /// `av_hwframe_transfer_data` and metadata is copied via
+ /// `av_frame_copy_props`. The caller's frame is always unref'd first, so
+ /// reuse across resolution changes or different decoders is safe.
///
- /// While the probe window is open and the active backend produces a
- /// non-transient error or a software-format frame instead of the
- /// configured hardware format, the decoder is torn down and the next
- /// backend in probe order is tried with all buffered packets replayed.
+ /// While the probe window is open, *any* non-transient failure (decode
+ /// error, transfer error, copy_props error, or a CPU-format frame from a
+ /// HW-opened context) tears down the current decoder and advances to the
+ /// next backend in probe order, replaying buffered packets through it.
/// The caller observes only the eventual successful frame (or, if every
/// backend has been exhausted, the underlying error).
///
/// Returns the same transient signals as `ffmpeg::decoder::Video`:
/// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and
/// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained.
- pub fn receive_frame(&mut self, frame: &mut frame::Video) -> Result<()> {
+ pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<()> {
loop {
let res = self.state.inner.receive_frame(&mut self.hw_frame);
match res {
@@ -253,50 +252,56 @@ impl VideoDecoder {
return Err(Error::Ffmpeg(e));
}
Ok(()) => {
- // Compare format as i32 to avoid constructing an AVPixelFormat
- // enum from an unvalidated integer. Library/header skew or a new
- // hardware format would otherwise be UB.
+ // Read AVFrame.format as i32 — avoid constructing an
+ // AVPixelFormat enum from a raw integer (UB on library/header skew).
let received_fmt: i32 = unsafe { (*self.hw_frame.as_ptr()).format };
if self.state.backend == Backend::Software {
- // Pure SW path: just hand over the frame.
unsafe {
- av_frame_unref(frame.as_mut_ptr());
- av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr());
+ av_frame_unref(frame.as_inner_mut().as_mut_ptr());
+ av_frame_move_ref(
+ frame.as_inner_mut().as_mut_ptr(),
+ self.hw_frame.as_mut_ptr(),
+ );
}
self.probe = None;
return Ok(());
}
if received_fmt == self.state.hw_pix_fmt as i32 {
- // True HW frame: download to CPU and copy timing/side data.
- unsafe {
- av_frame_unref(frame.as_mut_ptr());
- let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0);
- if ret < 0 {
- return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ // True HW frame: try to download to CPU.
+ let transfer_result = unsafe { transfer_hw_frame(frame, &mut self.hw_frame) };
+ match transfer_result {
+ Ok(()) => {
+ self.probe = None;
+ return Ok(());
}
- let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr());
- if ret < 0 {
- return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ Err(e) => {
+ // Transfer failures during the probe window are also
+ // backend-level failures — try the next backend.
+ if self.probe.is_some() && self.advance_probe()? {
+ unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) };
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
}
}
- self.probe = None;
- return Ok(());
}
- // The decoder produced a CPU frame from a HW-opened context. With
- // strict `get_format` this is unusual (the codec would normally
- // error on get_format=NONE). If it does happen and we still have
- // backends to try, treat it as a probe failure and advance.
+ // Decoder produced a CPU frame from a HW-opened context. With
+ // strict `get_format` this only happens if the codec ignores it
+ // (uncommon). Treat as a probe failure if we still have backends.
if self.probe.is_some() && self.advance_probe()? {
continue;
}
// No fallback left; accept the SW frame and update the active
// backend so `backend()` reflects reality.
unsafe {
- av_frame_unref(frame.as_mut_ptr());
- av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr());
+ av_frame_unref(frame.as_inner_mut().as_mut_ptr());
+ av_frame_move_ref(
+ frame.as_inner_mut().as_mut_ptr(),
+ self.hw_frame.as_mut_ptr(),
+ );
}
self.state.backend = Backend::Software;
self.probe = None;
@@ -316,69 +321,92 @@ impl VideoDecoder {
}
}
- /// Tear down the active decoder and bring up the next backend in
- /// `remaining_backends`, replaying buffered packets. Returns `true` if a
- /// new backend was successfully installed (caller should retry the
- /// receive); `false` if the probe is exhausted.
+ /// Try the next backend in `remaining_backends`. Transactional: a
+ /// candidate must successfully build and accept the replayed history
+ /// before any probe state is consumed. Backends that fail to build or
+ /// reject the replay are skipped (with `tracing::warn!`) and the loop
+ /// continues to the next one. Returns:
+ /// - `Ok(true)` when a candidate is installed and replay completed.
+ /// - `Ok(false)` when the probe is exhausted (no more backends to try).
+ /// - `Err(_)` only for genuinely fatal conditions surfaced by `build_state`
+ /// on the very first inspection (e.g. a malformed `Parameters`); the
+ /// per-candidate failures during the loop are absorbed and logged.
fn advance_probe(&mut self) -> Result {
- let next_backend = match self.probe.as_mut() {
- Some(probe) if !probe.remaining_backends.is_empty() => probe.remaining_backends.remove(0),
- _ => return Ok(false),
- };
- let prev_backend = self.state.backend;
- tracing::warn!(
- from = ?prev_backend,
- to = ?next_backend,
- "hwdecode: backend rejected stream, advancing probe"
- );
-
- // Snapshot probe inputs before mutating self.
- let (parameters, codec, buffered_packets, eof_sent) = {
- let probe = self.probe.as_mut().expect("probe state");
- (
- probe.parameters.clone(),
- probe.codec,
- std::mem::take(&mut probe.buffered_packets),
- probe.eof_sent,
- )
- };
-
- // Build the new state. If this open fails, we fall through to advancing
- // again — which is what the caller's loop will do once it sees the next
- // probe iteration also fail. To keep semantics simple, propagate the
- // open error directly: the caller's loop will see it as the decode
- // error and return upward; in practice probe order ends in Software
- // which always opens.
- let new_state = Self::build_state(parameters, codec, next_backend)?;
-
- // Replace state. The old DecoderState's Drop runs here, in order:
- // codec context first, then callback_state box, then hw_device_ref.
- self.state = new_state;
-
- // hw_frame may hold residual data from the old decoder. Clear it so
- // the next receive starts clean.
- unsafe {
- av_frame_unref(self.hw_frame.as_mut_ptr());
- }
+ loop {
+ // Snapshot inputs without mutating probe state.
+ let (next_backend, parameters, codec) = match self.probe.as_ref() {
+ Some(probe) if !probe.remaining_backends.is_empty() => (
+ probe.remaining_backends[0],
+ probe.parameters.clone(),
+ probe.codec,
+ ),
+ _ => return Ok(false),
+ };
+
+ let prev_backend = self.state.backend;
+ tracing::warn!(from = ?prev_backend, to = ?next_backend, "hwdecode: advancing probe");
+
+ // Build candidate. On failure, pop and continue without touching the
+ // packet buffer.
+ let mut candidate_state = match Self::build_state(parameters, codec, next_backend) {
+ Ok(s) => s,
+ Err(e) => {
+ tracing::warn!(?next_backend, error = %e, "hwdecode: candidate build failed");
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ continue;
+ }
+ };
+
+ // Replay buffered history through the candidate WITHOUT installing it.
+ // We borrow the buffer immutably; if replay fails the candidate's Drop
+ // releases the FFmpeg state and the buffer is preserved for the next
+ // attempt.
+ let replay_result: std::result::Result<(), ffmpeg_next::Error> = {
+ let probe = self.probe.as_ref().expect("probe state present");
+ let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(());
+ for pkt in &probe.buffered_packets {
+ if let Err(e) = candidate_state.inner.send_packet(pkt) {
+ r = Err(e);
+ break;
+ }
+ }
+ if r.is_ok() && probe.eof_sent {
+ if let Err(e) = candidate_state.inner.send_eof() {
+ r = Err(e);
+ }
+ }
+ r
+ };
+
+ if let Err(e) = replay_result {
+ tracing::warn!(?next_backend, error = %e, "hwdecode: candidate replay failed");
+ // Drop candidate explicitly so its FFI cleanup runs now.
+ drop(candidate_state);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ continue;
+ }
- // Replay buffered packets and (if previously sent) EOF through the new
- // decoder. We re-buffer them on the way through so a subsequent probe
- // advance still has the full history.
- let probe = self.probe.as_mut().expect("probe still present");
- probe.buffered_packets.clear();
- probe.eof_sent = false;
-
- for pkt in buffered_packets {
- // Mirror `send_packet`'s buffering behaviour.
- probe.buffered_packets.push(pkt.clone());
- self.state.inner.send_packet(&pkt).map_err(Error::Ffmpeg)?;
- }
- if eof_sent {
- self.probe.as_mut().expect("probe still present").eof_sent = true;
- self.state.inner.send_eof().map_err(Error::Ffmpeg)?;
+ // Commit: install the candidate, clear residual hw_frame, pop backend.
+ self.state = candidate_state;
+ unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) };
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ return Ok(true);
}
-
- Ok(true)
}
/// Build raw FFmpeg state for one backend. Strict `get_format` (NONE on
@@ -459,6 +487,26 @@ impl VideoDecoder {
}
}
+/// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination
+/// first so reuse across resolution changes is safe.
+unsafe fn transfer_hw_frame(
+ dst: &mut Frame,
+ src: &mut frame::Video,
+) -> std::result::Result<(), ffmpeg_next::Error> {
+ unsafe {
+ av_frame_unref(dst.as_inner_mut().as_mut_ptr());
+ let ret = av_hwframe_transfer_data(dst.as_inner_mut().as_mut_ptr(), src.as_ptr(), 0);
+ if ret < 0 {
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ let ret = av_frame_copy_props(dst.as_inner_mut().as_mut_ptr(), src.as_ptr());
+ if ret < 0 {
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ }
+ Ok(())
+}
+
/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame`
/// and must not be treated as backend failures.
fn is_transient(e: &ffmpeg_next::Error) -> bool {
diff --git a/src/frame.rs b/src/frame.rs
new file mode 100644
index 0000000..f221b68
--- /dev/null
+++ b/src/frame.rs
@@ -0,0 +1,109 @@
+//! CPU-side decoded video frame.
+//!
+//! Wraps `ffmpeg_next::frame::Video` so callers cannot reach the upstream
+//! `format()` accessor, which constructs an `AVPixelFormat` enum from the
+//! raw integer FFmpeg writes into `AVFrame.format`. That conversion is UB
+//! when the value isn't in the bindgen-generated enum (library/header skew,
+//! a new pixel format added upstream, etc.). The wrapper exposes
+//! [`Frame::pix_fmt`] which reads the field as a plain `i32` — sound for any
+//! value FFmpeg can produce — and accessors are limited to fields whose
+//! reads do not invoke the same hazard.
+//!
+//! Compare formats against integer constants taken from the FFI layer, e.g.
+//!
+//! ```ignore
+//! use ffmpeg_next::ffi::AVPixelFormat;
+//! if frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32 { ... }
+//! ```
+
+use ffmpeg_next::frame;
+
+/// CPU-side decoded video frame produced by [`crate::VideoDecoder`].
+pub struct Frame {
+ inner: frame::Video,
+}
+
+impl Frame {
+ /// Construct an empty frame, suitable as the destination passed to
+ /// [`crate::VideoDecoder::receive_frame`].
+ pub fn empty() -> Self {
+ Self {
+ inner: frame::Video::empty(),
+ }
+ }
+
+ /// Width in pixels.
+ pub fn width(&self) -> u32 {
+ self.inner.width()
+ }
+
+ /// Height in pixels.
+ pub fn height(&self) -> u32 {
+ self.inner.height()
+ }
+
+ /// Pixel format, returned as the raw `i32` value FFmpeg wrote to
+ /// `AVFrame.format`. Sound regardless of the linked FFmpeg version —
+ /// no `AVPixelFormat` enum is constructed.
+ ///
+ /// Compare against integer constants from `ffmpeg_next::ffi`, e.g.
+ /// `frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32`.
+ pub fn pix_fmt(&self) -> i32 {
+ // SAFETY: `AVFrame.format` is bound as `c_int`; reading it yields a
+ // plain integer with no validity invariants.
+ unsafe { (*self.inner.as_ptr()).format }
+ }
+
+ /// Presentation timestamp in stream time base, or `None` if the frame
+ /// carries `AV_NOPTS_VALUE`.
+ pub fn pts(&self) -> Option {
+ self.inner.pts()
+ }
+
+ /// Number of populated planes (e.g. 3 for `YUV420P`, 2 for `NV12`).
+ pub fn planes(&self) -> usize {
+ self.inner.planes()
+ }
+
+ /// Bytes per row for `plane`. Panics if `plane >= planes()`.
+ pub fn stride(&self, plane: usize) -> usize {
+ self.inner.stride(plane)
+ }
+
+ /// Pixel data for `plane`. Panics if `plane >= planes()`.
+ pub fn data(&self, plane: usize) -> &[u8] {
+ self.inner.data(plane)
+ }
+
+ /// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code.
+ pub(crate) fn as_inner_mut(&mut self) -> &mut frame::Video {
+ &mut self.inner
+ }
+}
+
+impl Default for Frame {
+ fn default() -> Self {
+ Self::empty()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn empty_frame_has_zero_dimensions_and_no_pts() {
+ let f = Frame::empty();
+ assert_eq!(f.width(), 0);
+ assert_eq!(f.height(), 0);
+ assert_eq!(f.pts(), None);
+ // AVFrame.format defaults to -1 (AV_PIX_FMT_NONE) for an empty frame.
+ assert_eq!(f.pix_fmt(), -1);
+ }
+
+ #[test]
+ fn frame_is_send() {
+ fn check() {}
+ check::();
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 7d9c7bd..69a0660 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -19,7 +19,9 @@ mod backend;
mod decoder;
mod error;
mod ffi;
+mod frame;
pub use backend::Backend;
pub use decoder::VideoDecoder;
pub use error::{Error, Result};
+pub use frame::Frame;
diff --git a/tests/decode.rs b/tests/decode.rs
index a936ae3..bc15f30 100644
--- a/tests/decode.rs
+++ b/tests/decode.rs
@@ -4,9 +4,9 @@
//!
//! Set `HWDECODE_SAMPLE_VIDEO` to an absolute path to enable.
-use ffmpeg::{format, frame, media};
+use ffmpeg::{format, media};
use ffmpeg_next as ffmpeg;
-use hwdecode::VideoDecoder;
+use hwdecode::{Frame, VideoDecoder};
const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
@@ -29,12 +29,12 @@ fn auto_open_decodes_at_least_one_frame() {
let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 };
let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder");
- eprintln!("backend = {:?}", decoder.backend());
+ eprintln!("optimistic backend = {:?}", decoder.backend());
assert_eq!(decoder.width(), expected_w);
assert_eq!(decoder.height(), expected_h);
- let mut frame = frame::Video::empty();
+ let mut frame = Frame::empty();
let mut count = 0_usize;
let target = 30_usize;
@@ -64,5 +64,5 @@ fn auto_open_decodes_at_least_one_frame() {
}
assert!(count >= 1, "expected at least 1 decoded frame, got {count}");
- eprintln!("decoded {count} frames");
+ eprintln!("decoded {count} frames via backend {:?}", decoder.backend());
}
diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs
index 3084faf..e734533 100644
--- a/tests/hw_smoke.rs
+++ b/tests/hw_smoke.rs
@@ -5,9 +5,9 @@
//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored
//! ```
-use ffmpeg::{format, frame, media};
+use ffmpeg::{format, media};
use ffmpeg_next as ffmpeg;
-use hwdecode::{Backend, VideoDecoder};
+use hwdecode::{Backend, Frame, VideoDecoder};
const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
@@ -32,7 +32,7 @@ fn auto_probe_picks_hardware_backend() {
// backend that actually produced it. Checking `decoder.backend()` before
// any frame has been received would observe the optimistic pre-probe
// value and could false-pass when a HW backend silently degrades.
- let mut frame = frame::Video::empty();
+ let mut frame = Frame::empty();
let mut got_frame = false;
for (s, packet) in input.packets() {
if s.index() != stream_index {
@@ -43,11 +43,11 @@ fn auto_probe_picks_hardware_backend() {
Ok(()) => {
got_frame = true;
eprintln!(
- "first frame: backend={:?} {}x{} fmt={:?}",
+ "first frame: backend={:?} {}x{} pix_fmt={}",
decoder.backend(),
frame.width(),
frame.height(),
- frame.format()
+ frame.pix_fmt()
);
break;
}
From 979a2bfbe4a3ad87d1bfd1dbcc52d517c07a40bf Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 16:54:03 +1200
Subject: [PATCH 04/27] update
---
benches/decode.rs | 113 +++++++++++++++++++++----------
examples/decode.rs | 16 ++++-
src/backend.rs | 90 ++++++++++---------------
src/decoder.rs | 164 +++++++++++++++++----------------------------
src/error.rs | 7 +-
src/lib.rs | 1 +
src/pix_fmt.rs | 113 +++++++++++++++++++++++++++++++
tests/decode.rs | 12 +++-
tests/hw_smoke.rs | 9 +--
9 files changed, 319 insertions(+), 206 deletions(-)
create mode 100644 src/pix_fmt.rs
diff --git a/benches/decode.rs b/benches/decode.rs
index 82d0ba9..5f53a66 100644
--- a/benches/decode.rs
+++ b/benches/decode.rs
@@ -1,8 +1,9 @@
-//! Benchmark comparing software-only decode against the auto-probed
+//! Benchmark comparing software-only decode (via `ffmpeg-next` directly,
+//! since `hwdecode` is hardware-only) against `hwdecode`'s auto-probed
//! hardware backend on the same input file.
//!
//! Set `HWDECODE_SAMPLE_VIDEO` to a video file path. The hardware bench is
-//! skipped (with a notice) when the auto-probe falls back to software.
+//! skipped (with a notice) when no hardware backend is available on the host.
//!
//! ```sh
//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench
@@ -11,9 +12,9 @@
use std::{path::PathBuf, time::Duration};
use criterion::{criterion_group, criterion_main, Criterion};
-use ffmpeg::{format, media};
+use ffmpeg::{codec::Context as CodecContext, format, frame, media};
use ffmpeg_next as ffmpeg;
-use hwdecode::{Backend, Frame, VideoDecoder};
+use hwdecode::{Frame, VideoDecoder};
const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
@@ -21,9 +22,8 @@ fn sample_path() -> Option {
std::env::var_os(SAMPLE_ENV).map(PathBuf::from)
}
-/// Decode every video frame in the file using `decoder`, returning the count.
-/// Re-opens the input each call so each iteration measures a full decode pass.
-fn decode_all(path: &PathBuf, backend: Backend) -> Result {
+/// Decode every frame using `hwdecode`'s auto-probed hardware backend.
+fn decode_all_hw(path: &PathBuf) -> Result {
let mut input = format::input(path).map_err(hwdecode::Error::Ffmpeg)?;
let stream = input
.streams()
@@ -31,11 +31,7 @@ fn decode_all(path: &PathBuf, backend: Backend) -> Result VideoDecoder::open_with(stream.parameters(), Backend::Software)?,
- _ => VideoDecoder::open(stream.parameters())?,
- };
-
+ let mut decoder = VideoDecoder::open(stream.parameters())?;
let mut frame = Frame::empty();
let mut count = 0_usize;
@@ -66,6 +62,46 @@ fn decode_all(path: &PathBuf, backend: Backend) -> Result Result {
+ let mut input = format::input(path)?;
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .ok_or(ffmpeg::Error::StreamNotFound)?;
+ let stream_index = stream.index();
+ let mut decoder = CodecContext::from_parameters(stream.parameters())?
+ .decoder()
+ .video()?;
+
+ let mut frame = frame::Video::empty();
+ let mut count = 0_usize;
+
+ let mut drain =
+ |decoder: &mut ffmpeg::decoder::Video, count: &mut usize| -> Result<(), ffmpeg::Error> {
+ loop {
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => *count += 1,
+ Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::error::EAGAIN => return Ok(()),
+ Err(ffmpeg::Error::Eof) => return Ok(()),
+ Err(e) => return Err(e),
+ }
+ }
+ };
+
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet)?;
+ drain(&mut decoder, &mut count)?;
+ }
+ decoder.send_eof()?;
+ drain(&mut decoder, &mut count)?;
+ Ok(count)
+}
+
fn bench_decode(c: &mut Criterion) {
ffmpeg::init().expect("ffmpeg init");
@@ -75,9 +111,8 @@ fn bench_decode(c: &mut Criterion) {
};
// Probe by decoding one frame so the probe collapses to the backend that
- // actually produced output. Reading `backend()` before the first frame
- // would observe the optimistically-selected value and mislabel HW runs
- // that silently degraded.
+ // actually produced output. None means no HW backend is available — we
+ // skip the HW arm and bench SW only.
let probed_backend = {
let mut input = format::input(&path).expect("open input");
let stream = input
@@ -85,44 +120,50 @@ fn bench_decode(c: &mut Criterion) {
.best(media::Type::Video)
.expect("video stream");
let stream_index = stream.index();
- let mut dec = VideoDecoder::open(stream.parameters()).expect("auto-probe");
- let mut frame = Frame::empty();
- 'probe: for (s, packet) in input.packets() {
- if s.index() != stream_index {
- continue;
- }
- dec.send_packet(&packet).expect("probe send_packet");
- match dec.receive_frame(&mut frame) {
- Ok(()) => break 'probe,
- Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
- if errno == ffmpeg::error::EAGAIN =>
- {
- continue;
+ match VideoDecoder::open(stream.parameters()) {
+ Ok(mut dec) => {
+ let mut frame = Frame::empty();
+ 'probe: for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ dec.send_packet(&packet).expect("probe send_packet");
+ match dec.receive_frame(&mut frame) {
+ Ok(()) => break 'probe,
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ continue;
+ }
+ Err(e) => panic!("probe receive_frame: {e}"),
+ }
}
- Err(e) => panic!("probe receive_frame: {e}"),
+ Some(dec.backend())
}
+ Err(hwdecode::Error::AllBackendsFailed { .. }) => None,
+ Err(e) => panic!("hwdecode probe: {e}"),
}
- dec.backend()
};
- eprintln!("auto-probe settled on backend: {probed_backend:?}");
+ match probed_backend {
+ Some(b) => eprintln!("auto-probe settled on backend: {b:?}"),
+ None => eprintln!("no hardware backend available — hardware bench will be skipped"),
+ }
let mut group = c.benchmark_group("decode");
group.measurement_time(Duration::from_secs(15));
group.sample_size(20);
group.bench_function("software", |b| {
- b.iter(|| decode_all(&path, Backend::Software).expect("software decode"))
+ b.iter(|| decode_all_sw(&path).expect("software decode"))
});
- if probed_backend != Backend::Software {
+ if probed_backend.is_some() {
group.bench_function("hardware", |b| {
b.iter(|| {
- let n = decode_all(&path, probed_backend).expect("hardware decode");
+ let n = decode_all_hw(&path).expect("hardware decode");
std::hint::black_box(n);
})
});
- } else {
- eprintln!("skipping hardware bench: auto-probe fell back to Software");
}
group.finish();
diff --git a/examples/decode.rs b/examples/decode.rs
index 69763bf..a1439d7 100644
--- a/examples/decode.rs
+++ b/examples/decode.rs
@@ -22,7 +22,21 @@ fn main() -> Result<(), Box> {
.ok_or("no video stream")?;
let stream_index = stream.index();
- let mut decoder = VideoDecoder::open(stream.parameters())?;
+ let mut decoder = match VideoDecoder::open(stream.parameters()) {
+ Ok(d) => d,
+ Err(hwdecode::Error::AllBackendsFailed { attempts }) => {
+ eprintln!(
+ "no hardware backend available; tried {} backend(s):",
+ attempts.len()
+ );
+ for (b, e) in &attempts {
+ eprintln!(" {b:?}: {e}");
+ }
+ eprintln!("(callers handle software fallback themselves — see ffmpeg::decoder::Video)");
+ return Ok(());
+ }
+ Err(e) => return Err(e.into()),
+ };
println!(
"open: backend={:?} {}x{}",
decoder.backend(),
diff --git a/src/backend.rs b/src/backend.rs
index cfcd48b..bce8699 100644
--- a/src/backend.rs
+++ b/src/backend.rs
@@ -1,11 +1,15 @@
use ffmpeg_next::{ffi::AVHWDeviceType, format::Pixel};
-/// Decoding backend selected (or forced) for a [`crate::VideoDecoder`].
+/// Hardware decoding backend.
+///
+/// `hwdecode` only manages **hardware** decoders — software fallback is
+/// out of scope. If no backend in [`probe_order`] for the current platform
+/// can decode a stream, [`crate::VideoDecoder::open`] returns
+/// [`crate::Error::AllBackendsFailed`] and the caller decides how to fall
+/// back (e.g. by opening an `ffmpeg::decoder::Video` directly).
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Backend {
- /// Pure software decode via libavcodec.
- Software,
- /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS).
+ /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS, visionOS).
VideoToolbox,
/// Linux Video Acceleration API (Intel / AMD GPUs).
Vaapi,
@@ -16,37 +20,33 @@ pub enum Backend {
}
impl Backend {
- /// `AVHWDeviceType` corresponding to this backend, or `None` for
- /// [`Backend::Software`].
- pub(crate) fn av_hwdevice_type(self) -> Option {
+ /// `AVHWDeviceType` corresponding to this backend.
+ pub(crate) fn av_hwdevice_type(self) -> AVHWDeviceType {
match self {
- Self::Software => None,
- Self::VideoToolbox => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX),
- Self::Vaapi => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI),
- Self::Cuda => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA),
- Self::D3d11va => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA),
+ Self::VideoToolbox => AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
+ Self::Vaapi => AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI,
+ Self::Cuda => AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA,
+ Self::D3d11va => AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA,
}
}
/// Hardware pixel format the codec is expected to produce when this
- /// backend is in use. Used to inspect the result of `get_format`.
- /// `None` for [`Backend::Software`].
+ /// backend is in use. (The post-`av_hwframe_transfer_data` CPU format is
+ /// typically `NV12` or `P010LE`; this is the *pre-transfer* sentinel.)
#[allow(dead_code)] // surfaced for tests / future use
- pub(crate) fn hw_pixel_format(self) -> Option {
+ pub(crate) fn hw_pixel_format(self) -> Pixel {
match self {
- Self::Software => None,
- Self::VideoToolbox => Some(Pixel::VIDEOTOOLBOX),
- Self::Vaapi => Some(Pixel::VAAPI),
- Self::Cuda => Some(Pixel::CUDA),
- Self::D3d11va => Some(Pixel::D3D11),
+ Self::VideoToolbox => Pixel::VIDEOTOOLBOX,
+ Self::Vaapi => Pixel::VAAPI,
+ Self::Cuda => Pixel::CUDA,
+ Self::D3d11va => Pixel::D3D11,
}
}
}
-/// Probe order for `VideoDecoder::open` on the current target.
-///
-/// Always ends in [`Backend::Software`]; auto-probe never returns an empty
-/// list. Order is fixed at compile time per `target_os`.
+/// Probe order for `VideoDecoder::open` on the current target. Hardware
+/// backends only, in preference order. Empty for platforms with no known
+/// HW backend; on those `open()` returns `AllBackendsFailed` immediately.
pub(crate) fn probe_order() -> &'static [Backend] {
#[cfg(any(
target_os = "macos",
@@ -55,15 +55,15 @@ pub(crate) fn probe_order() -> &'static [Backend] {
target_os = "visionos",
))]
{
- &[Backend::VideoToolbox, Backend::Software]
+ &[Backend::VideoToolbox]
}
#[cfg(target_os = "linux")]
{
- &[Backend::Vaapi, Backend::Cuda, Backend::Software]
+ &[Backend::Vaapi, Backend::Cuda]
}
#[cfg(target_os = "windows")]
{
- &[Backend::D3d11va, Backend::Cuda, Backend::Software]
+ &[Backend::D3d11va, Backend::Cuda]
}
#[cfg(not(any(
target_os = "macos",
@@ -74,7 +74,7 @@ pub(crate) fn probe_order() -> &'static [Backend] {
target_os = "windows",
)))]
{
- &[Backend::Software]
+ &[]
}
}
@@ -83,55 +83,33 @@ mod tests {
use super::*;
#[test]
- fn probe_order_ends_in_software() {
- let order = probe_order();
- assert!(!order.is_empty());
- assert_eq!(*order.last().unwrap(), Backend::Software);
- }
-
- #[test]
- fn software_has_no_av_hwdevice_type() {
- assert!(Backend::Software.av_hwdevice_type().is_none());
- assert!(Backend::Software.hw_pixel_format().is_none());
- }
-
- #[test]
- fn hw_backends_have_av_hwdevice_type() {
+ fn all_backends_have_hwdevice_type_and_pix_fmt() {
for b in [
Backend::VideoToolbox,
Backend::Vaapi,
Backend::Cuda,
Backend::D3d11va,
] {
- assert!(
- b.av_hwdevice_type().is_some(),
- "{b:?} missing hwdevice type"
- );
- assert!(b.hw_pixel_format().is_some(), "{b:?} missing hw pix fmt");
+ let _ = b.av_hwdevice_type();
+ let _ = b.hw_pixel_format();
}
}
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))]
#[test]
fn apple_probe_order() {
- assert_eq!(probe_order(), &[Backend::VideoToolbox, Backend::Software]);
+ assert_eq!(probe_order(), &[Backend::VideoToolbox]);
}
#[cfg(target_os = "linux")]
#[test]
fn linux_probe_order() {
- assert_eq!(
- probe_order(),
- &[Backend::Vaapi, Backend::Cuda, Backend::Software]
- );
+ assert_eq!(probe_order(), &[Backend::Vaapi, Backend::Cuda]);
}
#[cfg(target_os = "windows")]
#[test]
fn windows_probe_order() {
- assert_eq!(
- probe_order(),
- &[Backend::D3d11va, Backend::Cuda, Backend::Software]
- );
+ assert_eq!(probe_order(), &[Backend::D3d11va, Backend::Cuda]);
}
}
diff --git a/src/decoder.rs b/src/decoder.rs
index a776697..09f5c67 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -3,8 +3,8 @@ use std::{mem::ManuallyDrop, ptr};
use ffmpeg_next::{
codec::{self, Context},
ffi::{
- av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
- av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVPixelFormat,
+ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_unref, av_hwdevice_ctx_create,
+ av_hwframe_transfer_data, AVBufferRef,
},
frame, Codec, Packet, Rational,
};
@@ -52,15 +52,11 @@ struct DecoderState {
inner: ManuallyDrop,
/// Backend driving this state.
backend: Backend,
- /// Owned reference produced by `av_hwdevice_ctx_create`. Null for software.
+ /// Owned reference produced by `av_hwdevice_ctx_create`.
hw_device_ref: *mut AVBufferRef,
- /// Owned `Box` raw pointer; `AVCodecContext::opaque` aliases
- /// it. Null for software.
+ /// Owned `Box` raw pointer; `AVCodecContext::opaque`
+ /// aliases it.
callback_state: *mut CallbackState,
- /// Hardware pixel format we asked the decoder to produce. Compared (as
- /// `i32` to avoid enum-discriminant UB) against each received frame's
- /// format. `AV_PIX_FMT_NONE` for the software path.
- hw_pix_fmt: AVPixelFormat,
}
/// State carried only during the probe window (before the first successful
@@ -223,17 +219,22 @@ impl VideoDecoder {
/// Receive a CPU-side decoded frame.
///
- /// On the hardware path the frame is downloaded with
- /// `av_hwframe_transfer_data` and metadata is copied via
- /// `av_frame_copy_props`. The caller's frame is always unref'd first, so
- /// reuse across resolution changes or different decoders is safe.
+ /// The frame is downloaded with `av_hwframe_transfer_data` and metadata
+ /// is copied via `av_frame_copy_props`. The caller's frame is always
+ /// unref'd first, so reuse across resolution changes or different
+ /// decoders is safe.
///
/// While the probe window is open, *any* non-transient failure (decode
/// error, transfer error, copy_props error, or a CPU-format frame from a
/// HW-opened context) tears down the current decoder and advances to the
- /// next backend in probe order, replaying buffered packets through it.
- /// The caller observes only the eventual successful frame (or, if every
- /// backend has been exhausted, the underlying error).
+ /// next hardware backend in probe order, replaying buffered packets
+ /// through it. The caller observes only the eventual successful frame
+ /// (or, if every backend has been exhausted, the underlying error).
+ ///
+ /// This crate is hardware-only: there is no software fallback inside the
+ /// decoder. If every backend is exhausted, the failure surfaces as the
+ /// last decoder error (or [`Error::HwBackendProducedSwFrame`] for the
+ /// degraded-CPU-frame case). Callers handle software fallback themselves.
///
/// Returns the same transient signals as `ffmpeg::decoder::Video`:
/// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and
@@ -252,60 +253,25 @@ impl VideoDecoder {
return Err(Error::Ffmpeg(e));
}
Ok(()) => {
- // Read AVFrame.format as i32 — avoid constructing an
- // AVPixelFormat enum from a raw integer (UB on library/header skew).
- let received_fmt: i32 = unsafe { (*self.hw_frame.as_ptr()).format };
-
- if self.state.backend == Backend::Software {
- unsafe {
- av_frame_unref(frame.as_inner_mut().as_mut_ptr());
- av_frame_move_ref(
- frame.as_inner_mut().as_mut_ptr(),
- self.hw_frame.as_mut_ptr(),
- );
+ // Always attempt the HW→CPU transfer. With strict `get_format`,
+ // libavcodec can only deliver frames in the wired-up HW format
+ // (or fail). If a misbehaving codec ever hands us a CPU-side
+ // frame anyway, `av_hwframe_transfer_data` returns AVERROR(EINVAL)
+ // (neither src nor dst has an AVHWFramesContext attached) and we
+ // route through the same error path below.
+ match unsafe { transfer_hw_frame(frame, &mut self.hw_frame) } {
+ Ok(()) => {
+ self.probe = None;
+ return Ok(());
}
- self.probe = None;
- return Ok(());
- }
-
- if received_fmt == self.state.hw_pix_fmt as i32 {
- // True HW frame: try to download to CPU.
- let transfer_result = unsafe { transfer_hw_frame(frame, &mut self.hw_frame) };
- match transfer_result {
- Ok(()) => {
- self.probe = None;
- return Ok(());
- }
- Err(e) => {
- // Transfer failures during the probe window are also
- // backend-level failures — try the next backend.
- if self.probe.is_some() && self.advance_probe()? {
- unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) };
- continue;
- }
- return Err(Error::Ffmpeg(e));
+ Err(e) => {
+ if self.probe.is_some() && self.advance_probe()? {
+ unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) };
+ continue;
}
+ return Err(Error::Ffmpeg(e));
}
}
-
- // Decoder produced a CPU frame from a HW-opened context. With
- // strict `get_format` this only happens if the codec ignores it
- // (uncommon). Treat as a probe failure if we still have backends.
- if self.probe.is_some() && self.advance_probe()? {
- continue;
- }
- // No fallback left; accept the SW frame and update the active
- // backend so `backend()` reflects reality.
- unsafe {
- av_frame_unref(frame.as_inner_mut().as_mut_ptr());
- av_frame_move_ref(
- frame.as_inner_mut().as_mut_ptr(),
- self.hw_frame.as_mut_ptr(),
- );
- }
- self.state.backend = Backend::Software;
- self.probe = None;
- return Ok(());
}
}
}
@@ -409,52 +375,43 @@ impl VideoDecoder {
}
}
- /// Build raw FFmpeg state for one backend. Strict `get_format` (NONE on
- /// missing HW format); cross-backend fallback is the caller's job.
+ /// Build raw FFmpeg state for one hardware backend. Strict `get_format`
+ /// (NONE on missing HW format); cross-backend fallback is the caller's job.
fn build_state(
parameters: codec::Parameters,
codec: Codec,
backend: Backend,
) -> Result {
let mut ctx = Context::from_parameters(parameters)?;
+ let av_type = backend.av_hwdevice_type();
- let (hw_device_ref, callback_state, hw_pix_fmt) = match backend.av_hwdevice_type() {
- None => (
- ptr::null_mut(),
- ptr::null_mut(),
- AVPixelFormat::AV_PIX_FMT_NONE,
- ),
- Some(av_type) => {
- // Verify the codec advertises this hwaccel.
- let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type)
- .ok_or(Error::BackendUnsupportedByCodec(backend))?;
-
- // Create the device context.
- let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut();
- // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill.
- let ret = unsafe {
- av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0)
- };
- if ret < 0 {
- return Err(Error::HwDeviceInitFailed {
- backend,
- source: ffmpeg_next::Error::from(ret),
- });
- }
+ // Verify the codec advertises this hwaccel.
+ let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type)
+ .ok_or(Error::BackendUnsupportedByCodec(backend))?;
- let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt }));
- // SAFETY: ctx is a freshly-constructed AVCodecContext we own;
- // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
- // use (we keep our own ref in `hw_device_ref` for cleanup).
- unsafe {
- let raw = ctx.as_mut_ptr();
- (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref);
- (*raw).opaque = callback_state.cast();
- (*raw).get_format = Some(get_hw_format);
- }
- (hw_device_ref, callback_state, hw_pix_fmt)
- }
+ // Create the device context.
+ let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut();
+ // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill.
+ let ret = unsafe {
+ av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0)
};
+ if ret < 0 {
+ return Err(Error::HwDeviceInitFailed {
+ backend,
+ source: ffmpeg_next::Error::from(ret),
+ });
+ }
+
+ let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt }));
+ // SAFETY: ctx is a freshly-constructed AVCodecContext we own;
+ // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
+ // use (we keep our own ref in `hw_device_ref` for cleanup).
+ unsafe {
+ let raw = ctx.as_mut_ptr();
+ (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref);
+ (*raw).opaque = callback_state.cast();
+ (*raw).get_format = Some(get_hw_format);
+ }
// Open the decoder. On any failure, release the resources we just
// allocated so we don't leak.
@@ -482,7 +439,6 @@ impl VideoDecoder {
backend,
hw_device_ref,
callback_state,
- hw_pix_fmt,
})
}
}
diff --git a/src/error.rs b/src/error.rs
index 92cb2d1..ef5373c 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -28,8 +28,11 @@ pub enum Error {
source: ffmpeg_next::Error,
},
- /// Auto-probe exhausted every backend in the platform's order.
- #[error("all backends failed; attempts: {attempts:?}")]
+ /// Auto-probe exhausted every backend in the platform's order. Empty
+ /// `attempts` means the platform has no hardware backends listed in
+ /// [`crate::Backend`] for the current `target_os` — callers must
+ /// fall back to a software decoder of their choice.
+ #[error("all hardware backends failed; attempts: {attempts:?}")]
AllBackendsFailed {
/// Per-backend errors collected during probing, in the order tried.
attempts: Vec<(Backend, Box)>,
diff --git a/src/lib.rs b/src/lib.rs
index 69a0660..e6c12ce 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -20,6 +20,7 @@ mod decoder;
mod error;
mod ffi;
mod frame;
+pub mod pix_fmt;
pub use backend::Backend;
pub use decoder::VideoDecoder;
diff --git a/src/pix_fmt.rs b/src/pix_fmt.rs
new file mode 100644
index 0000000..f3c594e
--- /dev/null
+++ b/src/pix_fmt.rs
@@ -0,0 +1,113 @@
+//! Stable `i32` constants for the pixel formats produced by `hwdecode`'s
+//! hardware decoders after `av_hwframe_transfer_data`.
+//!
+//! `Frame::pix_fmt()` returns the raw integer FFmpeg wrote to `AVFrame.format`
+//! (as a plain `i32` to avoid the enum-construction UB that an unvalidated
+//! cast would invoke). This module names the constants relevant to dispatch
+//! after a successful hardware decode.
+//!
+//! Because `hwdecode` is hardware-only, the formats listed here cover what
+//! the supported HW backends actually produce — the **NV** family (semi-
+//! planar 8-bit) and the **P0xx / P2xx / P4xx** family (semi-planar 10/12/16
+//! bit). VideoToolbox, VAAPI, NVDEC, and D3D11VA all download into one of
+//! these.
+//!
+//! Software-decoder output formats (`YUV420P`, `YUV422P`, `RGB24`, etc.) are
+//! intentionally **not** listed: callers handle software fallback outside
+//! this crate, and dispatch tables for those formats belong with the SW
+//! pipeline.
+//!
+//! For values not listed here, write `AVPixelFormat::AV_PIX_FMT_X as i32`
+//! directly — that's exactly the cast we use to define these constants.
+//!
+//! ```ignore
+//! use hwdecode::{pix_fmt, Frame};
+//! match frame.pix_fmt() {
+//! pix_fmt::NV12 => /* 8-bit 4:2:0 → colconv::frame::Nv12Frame */,
+//! pix_fmt::P010LE => /* 10-bit 4:2:0 → colconv::frame::PnFrame<10> */,
+//! other => unimplemented!("pix_fmt {other}"),
+//! }
+//! ```
+
+use ffmpeg_next::ffi::AVPixelFormat;
+
+// --- semi-planar YUV (NV*) — 8-bit hardware download outputs ----------------
+
+/// 4:2:0, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV12`). The
+/// dominant 8-bit HW download format on every supported backend.
+pub const NV12: i32 = AVPixelFormat::AV_PIX_FMT_NV12 as i32;
+/// 4:2:0, 8-bit, Y plane + interleaved Cr/Cb (`AV_PIX_FMT_NV21`).
+pub const NV21: i32 = AVPixelFormat::AV_PIX_FMT_NV21 as i32;
+/// 4:2:2, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV16`).
+pub const NV16: i32 = AVPixelFormat::AV_PIX_FMT_NV16 as i32;
+/// 4:4:4, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV24`).
+pub const NV24: i32 = AVPixelFormat::AV_PIX_FMT_NV24 as i32;
+
+// --- semi-planar YUV (P0xx) — 4:2:0 high-bit-depth HW downloads -------------
+
+/// 4:2:0, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P010LE`). The
+/// dominant 10-bit HW download format.
+pub const P010LE: i32 = AVPixelFormat::AV_PIX_FMT_P010LE as i32;
+/// 4:2:0, 10-bit, semi-planar big-endian (`AV_PIX_FMT_P010BE`).
+pub const P010BE: i32 = AVPixelFormat::AV_PIX_FMT_P010BE as i32;
+/// 4:2:0, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P012LE`).
+pub const P012LE: i32 = AVPixelFormat::AV_PIX_FMT_P012LE as i32;
+/// 4:2:0, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P016LE`).
+pub const P016LE: i32 = AVPixelFormat::AV_PIX_FMT_P016LE as i32;
+
+// --- semi-planar YUV (P2xx) — 4:2:2 high-bit-depth HW downloads -------------
+
+/// 4:2:2, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P210LE`).
+pub const P210LE: i32 = AVPixelFormat::AV_PIX_FMT_P210LE as i32;
+/// 4:2:2, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P212LE`, FFmpeg 5.0+).
+pub const P212LE: i32 = AVPixelFormat::AV_PIX_FMT_P212LE as i32;
+/// 4:2:2, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P216LE`).
+pub const P216LE: i32 = AVPixelFormat::AV_PIX_FMT_P216LE as i32;
+
+// --- semi-planar YUV (P4xx) — 4:4:4 high-bit-depth HW downloads -------------
+
+/// 4:4:4, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P410LE`).
+pub const P410LE: i32 = AVPixelFormat::AV_PIX_FMT_P410LE as i32;
+/// 4:4:4, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P412LE`, FFmpeg 5.0+).
+pub const P412LE: i32 = AVPixelFormat::AV_PIX_FMT_P412LE as i32;
+/// 4:4:4, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P416LE`).
+pub const P416LE: i32 = AVPixelFormat::AV_PIX_FMT_P416LE as i32;
+
+// --- sentinel ---------------------------------------------------------------
+
+/// Sentinel value FFmpeg writes to `AVFrame.format` for an unset frame
+/// (`AV_PIX_FMT_NONE`). [`crate::Frame::empty`] returns this until the frame
+/// is filled by a decoder.
+pub const NONE: i32 = AVPixelFormat::AV_PIX_FMT_NONE as i32;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Regression check: if the underlying `AVPixelFormat` discriminants ever
+ /// change in `ffmpeg-sys-next`'s bindings, this catches it.
+ #[test]
+ fn constants_match_bindings() {
+ assert_eq!(NV12, AVPixelFormat::AV_PIX_FMT_NV12 as i32);
+ assert_eq!(P010LE, AVPixelFormat::AV_PIX_FMT_P010LE as i32);
+ assert_eq!(P416LE, AVPixelFormat::AV_PIX_FMT_P416LE as i32);
+ assert_eq!(NONE, -1, "AV_PIX_FMT_NONE must be -1 (FFmpeg ABI sentinel)");
+ }
+
+ #[test]
+ fn match_dispatch_compiles() {
+ fn classify(v: i32) -> &'static str {
+ match v {
+ NV12 => "nv12",
+ NV21 => "nv21",
+ P010LE => "p010le",
+ P210LE => "p210le",
+ P410LE => "p410le",
+ _ => "other",
+ }
+ }
+ assert_eq!(classify(NV12), "nv12");
+ assert_eq!(classify(P010LE), "p010le");
+ assert_eq!(classify(NONE), "other");
+ }
+}
diff --git a/tests/decode.rs b/tests/decode.rs
index bc15f30..10a8bcb 100644
--- a/tests/decode.rs
+++ b/tests/decode.rs
@@ -28,7 +28,17 @@ fn auto_open_decodes_at_least_one_frame() {
let expected_w = unsafe { (*stream.parameters().as_ptr()).width as u32 };
let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 };
- let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder");
+ let mut decoder = match VideoDecoder::open(stream.parameters()) {
+ Ok(d) => d,
+ Err(hwdecode::Error::AllBackendsFailed { attempts }) => {
+ eprintln!(
+ "skipping: no hardware backend available ({} attempts)",
+ attempts.len()
+ );
+ return;
+ }
+ Err(e) => panic!("open decoder: {e}"),
+ };
eprintln!("optimistic backend = {:?}", decoder.backend());
assert_eq!(decoder.width(), expected_w);
diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs
index e734533..6e11765 100644
--- a/tests/hw_smoke.rs
+++ b/tests/hw_smoke.rs
@@ -7,7 +7,7 @@
use ffmpeg::{format, media};
use ffmpeg_next as ffmpeg;
-use hwdecode::{Backend, Frame, VideoDecoder};
+use hwdecode::{Frame, VideoDecoder};
const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
@@ -60,9 +60,6 @@ fn auto_probe_picks_hardware_backend() {
}
}
assert!(got_frame, "no frames decoded");
- assert_ne!(
- decoder.backend(),
- Backend::Software,
- "expected hardware backend after first frame; got Software"
- );
+ // hwdecode is hardware-only — `backend()` after a successful first frame
+ // is by construction one of the HW variants. Logged above for visibility.
}
From 189b6ba0161e29af5ae976593973d7c7fdd0c3fc Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 17:15:59 +1200
Subject: [PATCH 05/27] update
---
src/backend.rs | 17 +++---
src/decoder.rs | 38 +++++++++---
src/ffi.rs | 156 +++++++++++++++++++++++++++++++++--------------
src/frame.rs | 162 ++++++++++++++++++++++++++++++++++++++++---------
4 files changed, 284 insertions(+), 89 deletions(-)
diff --git a/src/backend.rs b/src/backend.rs
index bce8699..00cf82e 100644
--- a/src/backend.rs
+++ b/src/backend.rs
@@ -1,4 +1,4 @@
-use ffmpeg_next::{ffi::AVHWDeviceType, format::Pixel};
+use ffmpeg_next::ffi::{AVHWDeviceType, AVPixelFormat};
/// Hardware decoding backend.
///
@@ -33,13 +33,16 @@ impl Backend {
/// Hardware pixel format the codec is expected to produce when this
/// backend is in use. (The post-`av_hwframe_transfer_data` CPU format is
/// typically `NV12` or `P010LE`; this is the *pre-transfer* sentinel.)
- #[allow(dead_code)] // surfaced for tests / future use
- pub(crate) fn hw_pixel_format(self) -> Pixel {
+ ///
+ /// Returns a `AVPixelFormat` value constructed from a hardcoded constant
+ /// in our bindings — never reads an enum value supplied by FFmpeg, so
+ /// no enum-discriminant UB risk.
+ pub(crate) fn hw_pixel_format(self) -> AVPixelFormat {
match self {
- Self::VideoToolbox => Pixel::VIDEOTOOLBOX,
- Self::Vaapi => Pixel::VAAPI,
- Self::Cuda => Pixel::CUDA,
- Self::D3d11va => Pixel::D3D11,
+ Self::VideoToolbox => AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
+ Self::Vaapi => AVPixelFormat::AV_PIX_FMT_VAAPI,
+ Self::Cuda => AVPixelFormat::AV_PIX_FMT_CUDA,
+ Self::D3d11va => AVPixelFormat::AV_PIX_FMT_D3D11,
}
}
}
diff --git a/src/decoder.rs b/src/decoder.rs
index 09f5c67..d6cffe3 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -12,7 +12,7 @@ use ffmpeg_next::{
use crate::{
backend::{self, Backend},
error::{Error, Result},
- ffi::{find_hw_pix_fmt, get_hw_format, CallbackState},
+ ffi::{codec_supports_hwaccel, get_hw_format, CallbackState},
frame::Frame,
};
@@ -199,22 +199,32 @@ impl VideoDecoder {
self.state.inner.frame_rate()
}
- /// Submit a packet to the decoder. While the probe is active the packet is
- /// also buffered for potential replay through a fallback backend.
+ /// Submit a packet to the decoder. On success — and only on success —
+ /// the packet is buffered for potential replay through a fallback backend
+ /// while the probe is active. A failed send (including EAGAIN) does not
+ /// mutate replay state, so a later probe advance only replays history
+ /// FFmpeg actually accepted.
pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
+ self
+ .state
+ .inner
+ .send_packet(packet)
+ .map_err(Error::Ffmpeg)?;
if let Some(probe) = self.probe.as_mut() {
probe.buffered_packets.push(packet.clone());
}
- self.state.inner.send_packet(packet).map_err(Error::Ffmpeg)
+ Ok(())
}
/// Signal end-of-stream to the decoder; remaining frames can be drained
- /// with [`Self::receive_frame`]. Recorded for replay if probe is active.
+ /// with [`Self::receive_frame`]. Recorded for replay only if the underlying
+ /// `send_eof` succeeds.
pub fn send_eof(&mut self) -> Result<()> {
+ self.state.inner.send_eof().map_err(Error::Ffmpeg)?;
if let Some(probe) = self.probe.as_mut() {
probe.eof_sent = true;
}
- self.state.inner.send_eof().map_err(Error::Ffmpeg)
+ Ok(())
}
/// Receive a CPU-side decoded frame.
@@ -385,9 +395,14 @@ impl VideoDecoder {
let mut ctx = Context::from_parameters(parameters)?;
let av_type = backend.av_hwdevice_type();
- // Verify the codec advertises this hwaccel.
- let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type)
- .ok_or(Error::BackendUnsupportedByCodec(backend))?;
+ // Verify the codec advertises this hwaccel. We do *not* read the
+ // codec's advertised pix_fmt — we use the hardcoded constant from
+ // `Backend::hw_pixel_format` so no FFmpeg-supplied enum value is ever
+ // interpreted as `AVPixelFormat`.
+ if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type) {
+ return Err(Error::BackendUnsupportedByCodec(backend));
+ }
+ let hw_pix_fmt = backend.hw_pixel_format();
// Create the device context.
let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut();
@@ -402,7 +417,10 @@ impl VideoDecoder {
});
}
- let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt }));
+ let callback_state = Box::into_raw(Box::new(CallbackState {
+ wanted: hw_pix_fmt,
+ wanted_int: hw_pix_fmt as i32,
+ }));
// SAFETY: ctx is a freshly-constructed AVCodecContext we own;
// av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
// use (we keep our own ref in `hw_device_ref` for cleanup).
diff --git a/src/ffi.rs b/src/ffi.rs
index 78ee80c..794d474 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -1,5 +1,15 @@
//! FFI shims used by the decoder. Kept in one place so the unsafe surface is
//! easy to audit.
+//!
+//! All reads of `AVPixelFormat` / `AVHWDeviceType` values returned by FFmpeg
+//! at runtime go through `ptr::read::` after a pointer cast, never
+//! through the bindgen-generated Rust enum. The enums are `#[repr(i32)]`
+//! and constructing them from a value not in the listed discriminants is
+//! undefined behavior — exactly the situation header/library skew creates.
+//! See the doc comments on individual functions for what is read as raw
+//! integer vs. constructed from a known constant.
+
+use std::ptr;
use ffmpeg_next::ffi::{
avcodec_get_hw_config, AVCodec, AVCodecContext, AVHWDeviceType, AVPixelFormat,
@@ -10,22 +20,29 @@ use ffmpeg_next::ffi::{
/// the correct hardware pixel format without globals. One instance per
/// decoder; freed by [`crate::VideoDecoder`] after the codec context is
/// dropped.
+///
+/// `wanted` is set from a hardcoded `AVPixelFormat` constant in our bindings
+/// (via `Backend::hw_pixel_format`), so it is always a valid enum value. We
+/// also store its raw `i32` so the callback can compare against the offered
+/// list without going through enum reads.
#[repr(C)]
pub(crate) struct CallbackState {
- /// Hardware pixel format we want the decoder to produce.
+ /// Hardware pixel format we want the decoder to produce. Constructed
+ /// from a known constant; safe to use as the callback's return value.
pub(crate) wanted: AVPixelFormat,
+ /// Same value as `wanted` cast to `i32`, cached so the callback's
+ /// pix_fmts walk doesn't have to convert per iteration.
+ pub(crate) wanted_int: i32,
}
/// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of
/// pixel formats the codec is willing to output for the current stream.
///
-/// Returns the configured hardware format if present; otherwise
-/// [`AVPixelFormat::AV_PIX_FMT_NONE`], which causes the decoder to fail. The
-/// failure surfaces as a normal `Error::Ffmpeg` from
-/// [`crate::VideoDecoder::receive_frame`]; for `VideoDecoder::open` callers
-/// the probe loop tears down and retries with the next backend (replaying
-/// buffered packets), so software fallback happens at the decoder level
-/// rather than silently in-context.
+/// The offered list is walked as `*const i32` (cast from `*const AVPixelFormat`)
+/// to avoid constructing the bindgen enum from values that may not be in our
+/// build's discriminant set. The return value is either `wanted` (a known
+/// constant) or `AV_PIX_FMT_NONE` (also a known constant) — both safe to
+/// produce as `AVPixelFormat`.
pub(crate) unsafe extern "C" fn get_hw_format(
ctx: *mut AVCodecContext,
pix_fmts: *const AVPixelFormat,
@@ -38,41 +55,68 @@ pub(crate) unsafe extern "C" fn get_hw_format(
// codec context's drop runs). When opaque is null we treat the call as
// strict — a stray invocation cannot silently downgrade.
let state = unsafe { (*ctx).opaque as *const CallbackState };
- let wanted = if state.is_null() {
- AVPixelFormat::AV_PIX_FMT_NONE
+ let (wanted, wanted_int) = if state.is_null() {
+ (
+ AVPixelFormat::AV_PIX_FMT_NONE,
+ AVPixelFormat::AV_PIX_FMT_NONE as i32,
+ )
} else {
- unsafe { (*state).wanted }
+ unsafe { ((*state).wanted, (*state).wanted_int) }
};
- let mut p = pix_fmts;
- while unsafe { *p } != AVPixelFormat::AV_PIX_FMT_NONE {
- if unsafe { *p } == wanted {
+ // Walk the offered list as i32. The pointer cast is sound because
+ // `AVPixelFormat` is `#[repr(i32)]` (same size and alignment as i32).
+ // Reading as i32 cannot be UB regardless of the value FFmpeg wrote.
+ let mut p = pix_fmts as *const i32;
+ let none_int = AVPixelFormat::AV_PIX_FMT_NONE as i32;
+ loop {
+ // SAFETY: FFmpeg guarantees the list is terminated by AV_PIX_FMT_NONE.
+ // We bail at the sentinel; reads up to and including it are in-bounds.
+ let v = unsafe { ptr::read(p) };
+ if v == none_int {
+ return AVPixelFormat::AV_PIX_FMT_NONE;
+ }
+ if v == wanted_int {
return wanted;
}
p = unsafe { p.add(1) };
}
- AVPixelFormat::AV_PIX_FMT_NONE
}
-/// Walk the codec's `AVCodecHWConfig` table and return the hardware pixel
-/// format associated with `device_type`, if the codec advertises one that
-/// uses the `HW_DEVICE_CTX` setup method.
-pub(crate) fn find_hw_pix_fmt(
- codec: *const AVCodec,
- device_type: AVHWDeviceType,
-) -> Option {
+/// Walk the codec's `AVCodecHWConfig` table and return whether the codec
+/// advertises support for `device_type` via the `HW_DEVICE_CTX` setup method.
+///
+/// We do not return the codec's advertised `pix_fmt` — we know it already
+/// from [`crate::backend::Backend::hw_pixel_format`] (a hardcoded constant
+/// from our bindings). All reads from the FFmpeg-supplied `AVCodecHWConfig`
+/// are performed as raw integers via `addr_of!` + `ptr::read::` to
+/// avoid copying or interpreting enum-typed fields whose runtime values
+/// might not match our build's discriminant set.
+pub(crate) fn codec_supports_hwaccel(codec: *const AVCodec, device_type: AVHWDeviceType) -> bool {
debug_assert!(!codec.is_null());
+ let device_type_int = device_type as i32;
let mut i = 0;
loop {
// SAFETY: `avcodec_get_hw_config` returns null past the end; we stop then.
let cfg = unsafe { avcodec_get_hw_config(codec, i) };
if cfg.is_null() {
- return None;
+ return false;
}
- let cfg = unsafe { *cfg };
- let supports_device_ctx = cfg.methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0;
- if supports_device_ctx && cfg.device_type == device_type {
- return Some(cfg.pix_fmt);
+ // Read each field as raw integer rather than copying the whole struct
+ // (which would interpret `pix_fmt` and `device_type` as their enum types).
+ // SAFETY: `cfg` is non-null and points to a valid `AVCodecHWConfig` for
+ // the lifetime of the call; `addr_of!` projects to a sized field; the
+ // `*const i32` cast is sound because `methods` is `c_int` (i32) and
+ // `device_type` is `AVHWDeviceType` (`#[repr(u32)]`, but FFmpeg's
+ // assigned values fit in i32 and the runtime layout is i32-sized).
+ let methods: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).methods)) };
+ let cfg_device_type_int: i32 =
+ unsafe { ptr::read(ptr::addr_of!((*cfg).device_type) as *const i32) };
+
+ if methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0
+ && cfg_device_type_int == device_type_int
+ {
+ return true;
}
i += 1;
}
@@ -81,7 +125,6 @@ pub(crate) fn find_hw_pix_fmt(
#[cfg(test)]
mod tests {
use super::*;
- use std::ptr;
// The callback derefs `(*ctx).opaque`, so we need a real-looking
// AVCodecContext. We construct a zeroed one (the callback only reads opaque).
@@ -100,22 +143,32 @@ mod tests {
}
}
- fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat {
- offered.push(AVPixelFormat::AV_PIX_FMT_NONE);
+ fn make_state(wanted: AVPixelFormat) -> CallbackState {
+ CallbackState {
+ wanted,
+ wanted_int: wanted as i32,
+ }
+ }
+
+ fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat {
+ // Build the offered list as raw i32, terminated by AV_PIX_FMT_NONE.
+ offered.push(AVPixelFormat::AV_PIX_FMT_NONE as i32);
let ctx = FakeCtx::new(state as *const _ as *mut _);
- unsafe { get_hw_format(ctx.0, offered.as_ptr()) }
+ // SAFETY: we cast the i32 buffer pointer to *const AVPixelFormat
+ // because that's the function's declared signature. The callback only
+ // ever reads through *const i32 internally, so this transit through
+ // *const AVPixelFormat is purely a type system formality.
+ unsafe { get_hw_format(ctx.0, offered.as_ptr() as *const AVPixelFormat) }
}
#[test]
fn returns_wanted_hw_format_when_offered() {
- let state = CallbackState {
- wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
- };
+ let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
let got = run(
&state,
vec![
- AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
- AVPixelFormat::AV_PIX_FMT_NV12,
+ AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32,
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
],
);
assert_eq!(got, AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
@@ -123,14 +176,12 @@ mod tests {
#[test]
fn returns_none_when_wanted_absent() {
- let state = CallbackState {
- wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
- };
+ let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
let got = run(
&state,
vec![
- AVPixelFormat::AV_PIX_FMT_NV12,
- AVPixelFormat::AV_PIX_FMT_YUV420P,
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ AVPixelFormat::AV_PIX_FMT_YUV420P as i32,
],
);
assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
@@ -142,11 +193,28 @@ mod tests {
let ctx_raw = Box::into_raw(boxed);
unsafe { (*ctx_raw).opaque = ptr::null_mut() };
let offered = [
- AVPixelFormat::AV_PIX_FMT_NV12,
- AVPixelFormat::AV_PIX_FMT_NONE,
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ AVPixelFormat::AV_PIX_FMT_NONE as i32,
];
- let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr()) };
+ let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr() as *const AVPixelFormat) };
assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
unsafe { drop(Box::from_raw(ctx_raw)) };
}
+
+ #[test]
+ fn unknown_offered_value_is_skipped_without_ub() {
+ // Simulate a header-skewed FFmpeg that offers a pixel-format value we
+ // don't have a binding constant for (e.g. some future format). The
+ // callback walks the list as i32 — no enum is constructed from that
+ // value, so this read is sound.
+ let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
+ let got = run(
+ &state,
+ vec![
+ 99_999_i32, // imaginary unknown
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ ],
+ );
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
+ }
}
diff --git a/src/frame.rs b/src/frame.rs
index f221b68..d6ceca6 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -1,23 +1,27 @@
//! CPU-side decoded video frame.
//!
-//! Wraps `ffmpeg_next::frame::Video` so callers cannot reach the upstream
-//! `format()` accessor, which constructs an `AVPixelFormat` enum from the
-//! raw integer FFmpeg writes into `AVFrame.format`. That conversion is UB
-//! when the value isn't in the bindgen-generated enum (library/header skew,
-//! a new pixel format added upstream, etc.). The wrapper exposes
-//! [`Frame::pix_fmt`] which reads the field as a plain `i32` — sound for any
-//! value FFmpeg can produce — and accessors are limited to fields whose
-//! reads do not invoke the same hazard.
+//! Wraps `ffmpeg_next::frame::Video`. All accessors read from raw `AVFrame`
+//! fields (`format`, `linesize`, `data`, `width`, `height`, `pts`) directly
+//! and never go through ffmpeg-next's `Video::format()` / `plane_height()`
+//! / `plane_width()` / `data()` — those construct `AVPixelFormat` from the
+//! frame's raw `format` integer via `transmute`, which is undefined behavior
+//! when the value isn't in the build's bindgen-generated discriminant set
+//! (the exact failure mode this crate is designed to survive).
//!
-//! Compare formats against integer constants taken from the FFI layer, e.g.
+//! Plane lengths for [`Frame::data`] are computed from a hardcoded chroma-
+//! subsampling table keyed on the safe `pix_fmt()` integer, covering only
+//! the formats `hwdecode` produces (the NV* and P0xx/P2xx/P4xx families
+//! after `av_hwframe_transfer_data`). For any other format, [`Frame::data`]
+//! returns `None` rather than guessing at a slice length.
//!
-//! ```ignore
-//! use ffmpeg_next::ffi::AVPixelFormat;
-//! if frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32 { ... }
-//! ```
+//! Compare formats against integer constants in [`crate::pix_fmt`].
+
+use std::slice;
use ffmpeg_next::frame;
+use crate::pix_fmt;
+
/// CPU-side decoded video frame produced by [`crate::VideoDecoder`].
pub struct Frame {
inner: frame::Video,
@@ -34,45 +38,93 @@ impl Frame {
/// Width in pixels.
pub fn width(&self) -> u32 {
- self.inner.width()
+ // SAFETY: AVFrame.width is c_int; safe to read regardless of value.
+ unsafe { (*self.inner.as_ptr()).width as u32 }
}
/// Height in pixels.
pub fn height(&self) -> u32 {
- self.inner.height()
+ // SAFETY: AVFrame.height is c_int.
+ unsafe { (*self.inner.as_ptr()).height as u32 }
}
/// Pixel format, returned as the raw `i32` value FFmpeg wrote to
/// `AVFrame.format`. Sound regardless of the linked FFmpeg version —
/// no `AVPixelFormat` enum is constructed.
///
- /// Compare against integer constants from `ffmpeg_next::ffi`, e.g.
- /// `frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32`.
+ /// Compare against constants in [`crate::pix_fmt`].
pub fn pix_fmt(&self) -> i32 {
- // SAFETY: `AVFrame.format` is bound as `c_int`; reading it yields a
- // plain integer with no validity invariants.
+ // SAFETY: AVFrame.format is bound as c_int.
unsafe { (*self.inner.as_ptr()).format }
}
- /// Presentation timestamp in stream time base, or `None` if the frame
- /// carries `AV_NOPTS_VALUE`.
+ /// Presentation timestamp in stream time base, or `None` for
+ /// `AV_NOPTS_VALUE`.
pub fn pts(&self) -> Option {
+ // ffmpeg-next's Frame::pts performs no enum conversion; safe to use.
self.inner.pts()
}
- /// Number of populated planes (e.g. 3 for `YUV420P`, 2 for `NV12`).
+ /// Number of populated planes (1 for packed formats, 2 for NV12/P010,
+ /// 3 for planar YUV, etc.). Computed by scanning `linesize` for the
+ /// first zero entry — no enum reads.
pub fn planes(&self) -> usize {
- self.inner.planes()
+ // SAFETY: AVFrame.linesize is `[c_int; 8]`; reads are sound.
+ unsafe {
+ let linesize = &(*self.inner.as_ptr()).linesize;
+ for (i, ls) in linesize.iter().enumerate() {
+ if *ls == 0 {
+ return i;
+ }
+ }
+ linesize.len()
+ }
}
- /// Bytes per row for `plane`. Panics if `plane >= planes()`.
+ /// Bytes per row for `plane`. Reads `AVFrame.linesize[plane]` directly.
+ /// Panics if `plane >= planes()`.
pub fn stride(&self, plane: usize) -> usize {
- self.inner.stride(plane)
+ let n = self.planes();
+ assert!(
+ plane < n,
+ "stride: plane {plane} out of bounds (planes={n})"
+ );
+ // SAFETY: bounds-checked above; linesize is `[c_int; 8]`.
+ unsafe { (*self.inner.as_ptr()).linesize[plane] as usize }
}
- /// Pixel data for `plane`. Panics if `plane >= planes()`.
- pub fn data(&self, plane: usize) -> &[u8] {
- self.inner.data(plane)
+ /// Pixel data for `plane`.
+ ///
+ /// Returns `None` when the frame's pixel format is not one of the
+ /// hardware-output formats listed in [`crate::pix_fmt`] — we cannot
+ /// safely compute the plane size for an unknown layout. Returns `None`
+ /// for an out-of-bounds plane index, a null data pointer, or an empty
+ /// frame.
+ ///
+ /// Currently supported (post-`av_hwframe_transfer_data`):
+ /// - 4:2:0 semi-planar 8-bit: `NV12`, `NV21`
+ /// - 4:2:2 semi-planar 8-bit: `NV16`
+ /// - 4:4:4 semi-planar 8-bit: `NV24`
+ /// - 4:2:0 semi-planar 10/12/16-bit: `P010LE`/`P010BE`/`P012LE`/`P016LE`
+ /// - 4:2:2 semi-planar 10/12/16-bit: `P210LE`/`P212LE`/`P216LE`
+ /// - 4:4:4 semi-planar 10/12/16-bit: `P410LE`/`P412LE`/`P416LE`
+ pub fn data(&self, plane: usize) -> Option<&[u8]> {
+ if plane >= self.planes() {
+ return None;
+ }
+ let stride = self.stride(plane);
+ let plane_height = plane_height_for(self.pix_fmt(), plane, self.height() as usize)?;
+ let len = stride.checked_mul(plane_height)?;
+ // SAFETY: bounds-checked plane index above. We trust FFmpeg to populate
+ // `data[plane]` validly when `linesize[plane]` is non-zero (which we
+ // verified via `planes()`); null-check guards against edge cases.
+ unsafe {
+ let ptr = (*self.inner.as_ptr()).data[plane];
+ if ptr.is_null() {
+ return None;
+ }
+ Some(slice::from_raw_parts(ptr, len))
+ }
}
/// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code.
@@ -87,6 +139,37 @@ impl Default for Frame {
}
}
+/// Number of rows in `plane` for a frame of `frame_height` and the given
+/// pixel format. `None` for formats not in the supported HW-output set.
+fn plane_height_for(pix_fmt_int: i32, plane: usize, frame_height: usize) -> Option {
+ match pix_fmt_int {
+ // 4:2:0 semi-planar — Y full height, chroma half height.
+ pix_fmt::NV12
+ | pix_fmt::NV21
+ | pix_fmt::P010LE
+ | pix_fmt::P010BE
+ | pix_fmt::P012LE
+ | pix_fmt::P016LE => match plane {
+ 0 => Some(frame_height),
+ 1 => Some(frame_height.div_ceil(2)),
+ _ => None,
+ },
+ // 4:2:2 / 4:4:4 semi-planar — both planes full height.
+ pix_fmt::NV16
+ | pix_fmt::NV24
+ | pix_fmt::P210LE
+ | pix_fmt::P212LE
+ | pix_fmt::P216LE
+ | pix_fmt::P410LE
+ | pix_fmt::P412LE
+ | pix_fmt::P416LE => match plane {
+ 0 | 1 => Some(frame_height),
+ _ => None,
+ },
+ _ => None,
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -99,6 +182,15 @@ mod tests {
assert_eq!(f.pts(), None);
// AVFrame.format defaults to -1 (AV_PIX_FMT_NONE) for an empty frame.
assert_eq!(f.pix_fmt(), -1);
+ // No active planes for an empty frame (all linesize entries are 0).
+ assert_eq!(f.planes(), 0);
+ }
+
+ #[test]
+ fn data_returns_none_for_unknown_format() {
+ let f = Frame::empty();
+ // pix_fmt is NONE (-1), not in the supported set.
+ assert!(f.data(0).is_none());
}
#[test]
@@ -106,4 +198,18 @@ mod tests {
fn check() {}
check::();
}
+
+ #[test]
+ fn plane_height_table_covers_supported_formats() {
+ // Spot-check the chroma subsampling table.
+ assert_eq!(plane_height_for(pix_fmt::NV12, 0, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1080), Some(540));
+ assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1081), Some(541));
+ assert_eq!(plane_height_for(pix_fmt::P010LE, 1, 1080), Some(540));
+ assert_eq!(plane_height_for(pix_fmt::NV16, 1, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::NV24, 1, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::P416LE, 1, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::NONE, 0, 1080), None);
+ assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None);
+ }
}
From fa30939968282e84a3752df080c96e9346307a88 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 18:09:51 +1200
Subject: [PATCH 06/27] update
---
src/decoder.rs | 261 +++++++++++++++++++++++++++++++++++++++++--------
src/error.rs | 9 +-
src/frame.rs | 99 ++++++++++++++++---
3 files changed, 315 insertions(+), 54 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index d6cffe3..fcaea44 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -1,14 +1,27 @@
-use std::{mem::ManuallyDrop, ptr};
+use std::{collections::VecDeque, mem::ManuallyDrop, ptr};
use ffmpeg_next::{
codec::{self, Context},
ffi::{
- av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_unref, av_hwdevice_ctx_create,
- av_hwframe_transfer_data, AVBufferRef,
+ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
+ av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVCodec,
},
frame, Codec, Packet, Rational,
};
+/// Local FFI shim: `avcodec_find_decoder` declared with `c_int` instead of
+/// the bindgen `AVCodecID` enum. Constructing `AVCodecID` from a runtime
+/// integer that isn't in our build's discriminant set is UB; calling the
+/// C function with a raw int avoids that boundary entirely. Both Rust
+/// declarations resolve to the same C symbol at link time.
+mod c_shims {
+ use super::AVCodec;
+ use libc::c_int;
+ extern "C" {
+ pub fn avcodec_find_decoder(id: c_int) -> *const AVCodec;
+ }
+}
+
use crate::{
backend::{self, Backend},
error::{Error, Result},
@@ -42,6 +55,13 @@ pub struct VideoDecoder {
/// backend, then `None`. While `Some`, packets are buffered for replay and
/// non-transient errors / decoder failures advance to the next backend.
probe: Option,
+ /// CPU-side frames produced by a candidate decoder during probe replay
+ /// (when its internal queue filled and we had to drain output before the
+ /// next `send_packet`). Already transferred from the candidate's
+ /// `AVHWFramesContext` to a CPU frame, so they remain valid after the
+ /// candidate state is committed. [`Self::receive_frame`] dequeues these
+ /// FIFO before reading from `state.inner`.
+ pending_frames: VecDeque,
}
/// Owned FFmpeg state for one open codec context. Has its own `Drop` so we
@@ -121,8 +141,7 @@ impl VideoDecoder {
/// `open` cannot return without a working decoder for any codec libavcodec
/// supports.
pub fn open(parameters: codec::Parameters) -> Result {
- let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
- let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
+ let codec = find_decoder(¶meters)?;
let order = backend::probe_order();
let mut attempts: Vec<(Backend, Box)> = Vec::new();
@@ -142,6 +161,7 @@ impl VideoDecoder {
state,
hw_frame: frame::Video::empty(),
probe,
+ pending_frames: VecDeque::new(),
});
}
Err(e) => {
@@ -161,13 +181,13 @@ impl VideoDecoder {
/// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible
/// for retrying with `Backend::Software` or another backend if desired.
pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result {
- let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id });
- let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?;
+ let codec = find_decoder(¶meters)?;
let state = Self::build_state(parameters, codec, backend)?;
Ok(Self {
state,
hw_frame: frame::Video::empty(),
probe: None,
+ pending_frames: VecDeque::new(),
})
}
@@ -199,32 +219,64 @@ impl VideoDecoder {
self.state.inner.frame_rate()
}
- /// Submit a packet to the decoder. On success — and only on success —
- /// the packet is buffered for potential replay through a fallback backend
- /// while the probe is active. A failed send (including EAGAIN) does not
- /// mutate replay state, so a later probe advance only replays history
- /// FFmpeg actually accepted.
+ /// Submit a packet to the decoder.
+ ///
+ /// On success — and only on success — the packet is buffered for potential
+ /// replay through a fallback backend while the probe is active. EAGAIN
+ /// (decoder needs `receive_frame` to drain output first) propagates as
+ /// normal backpressure; the caller drains then retries.
+ ///
+ /// While the probe is active, a non-transient error (e.g. the active HW
+ /// backend rejecting this stream's geometry on first packet) advances the
+ /// probe to the next candidate and retries the packet there. The caller
+ /// observes only the eventual success or, if the probe is exhausted, the
+ /// final error.
pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
- self
- .state
- .inner
- .send_packet(packet)
- .map_err(Error::Ffmpeg)?;
- if let Some(probe) = self.probe.as_mut() {
- probe.buffered_packets.push(packet.clone());
+ loop {
+ match self.state.inner.send_packet(packet) {
+ Ok(()) => {
+ if let Some(probe) = self.probe.as_mut() {
+ probe.buffered_packets.push(packet.clone());
+ }
+ return Ok(());
+ }
+ Err(e) if is_transient(&e) => {
+ // Normal backpressure / EOF — pass through unchanged.
+ return Err(Error::Ffmpeg(e));
+ }
+ Err(e) => {
+ if self.probe.is_some() && self.advance_probe()? {
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ }
}
- Ok(())
}
- /// Signal end-of-stream to the decoder; remaining frames can be drained
- /// with [`Self::receive_frame`]. Recorded for replay only if the underlying
- /// `send_eof` succeeds.
+ /// Signal end-of-stream to the decoder.
+ ///
+ /// Recorded for replay only if the underlying `send_eof` succeeds. While
+ /// the probe is active, non-transient errors trigger probe advance and
+ /// retry, matching `send_packet`'s behaviour.
pub fn send_eof(&mut self) -> Result<()> {
- self.state.inner.send_eof().map_err(Error::Ffmpeg)?;
- if let Some(probe) = self.probe.as_mut() {
- probe.eof_sent = true;
+ loop {
+ match self.state.inner.send_eof() {
+ Ok(()) => {
+ if let Some(probe) = self.probe.as_mut() {
+ probe.eof_sent = true;
+ }
+ return Ok(());
+ }
+ Err(e) if is_transient(&e) => return Err(Error::Ffmpeg(e)),
+ Err(e) => {
+ if self.probe.is_some() && self.advance_probe()? {
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ }
}
- Ok(())
}
/// Receive a CPU-side decoded frame.
@@ -238,18 +290,25 @@ impl VideoDecoder {
/// error, transfer error, copy_props error, or a CPU-format frame from a
/// HW-opened context) tears down the current decoder and advances to the
/// next hardware backend in probe order, replaying buffered packets
- /// through it. The caller observes only the eventual successful frame
- /// (or, if every backend has been exhausted, the underlying error).
+ /// through it. Frames the candidate produced during replay (drained when
+ /// `send_packet` returned EAGAIN) are queued and delivered FIFO via this
+ /// method, so the caller never loses initial frames after a fallback.
///
/// This crate is hardware-only: there is no software fallback inside the
/// decoder. If every backend is exhausted, the failure surfaces as the
- /// last decoder error (or [`Error::HwBackendProducedSwFrame`] for the
- /// degraded-CPU-frame case). Callers handle software fallback themselves.
+ /// last decoder error. Callers handle software fallback themselves.
///
/// Returns the same transient signals as `ffmpeg::decoder::Video`:
/// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and
/// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained.
pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<()> {
+ // Pre-drain frames queued during probe replay. They are already CPU-side
+ // (transferred at drain time, when the candidate's HW context was alive)
+ // so we just move them into the caller's slot.
+ if self.try_pop_pending(frame) {
+ return Ok(());
+ }
+
loop {
let res = self.state.inner.receive_frame(&mut self.hw_frame);
match res {
@@ -258,6 +317,11 @@ impl VideoDecoder {
return Err(Error::Ffmpeg(e));
}
if self.probe.is_some() && self.advance_probe()? {
+ // Probe advance may have populated `pending_frames`; deliver
+ // one of those before reading more from the new candidate.
+ if self.try_pop_pending(frame) {
+ return Ok(());
+ }
continue;
}
return Err(Error::Ffmpeg(e));
@@ -277,6 +341,9 @@ impl VideoDecoder {
Err(e) => {
if self.probe.is_some() && self.advance_probe()? {
unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) };
+ if self.try_pop_pending(frame) {
+ return Ok(());
+ }
continue;
}
return Err(Error::Ffmpeg(e));
@@ -287,6 +354,24 @@ impl VideoDecoder {
}
}
+ /// Pop one queued frame (produced by a candidate decoder during probe
+ /// replay) into the caller's slot. Returns `true` when a frame was
+ /// delivered, `false` when the queue was empty.
+ fn try_pop_pending(&mut self, frame: &mut Frame) -> bool {
+ let Some(mut buffered) = self.pending_frames.pop_front() else {
+ return false;
+ };
+ // SAFETY: `buffered` is a CPU-side AVFrame we previously transferred
+ // and pushed into the queue; both pointers are valid.
+ unsafe {
+ av_frame_unref(frame.as_inner_mut().as_mut_ptr());
+ av_frame_move_ref(frame.as_inner_mut().as_mut_ptr(), buffered.as_mut_ptr());
+ }
+ // Probe semantics: delivering a frame collapses the probe.
+ self.probe = None;
+ true
+ }
+
/// Flush internal buffers (e.g. after a seek). Resets probe-time buffer if
/// active, since post-seek packets do not align with replayed history.
pub fn flush(&mut self) {
@@ -342,13 +427,39 @@ impl VideoDecoder {
// We borrow the buffer immutably; if replay fails the candidate's Drop
// releases the FFmpeg state and the buffer is preserved for the next
// attempt.
+ //
+ // EAGAIN handling: `avcodec_send_packet` may return EAGAIN when its
+ // internal queue is full and the user is expected to drain output
+ // first (B-frame buffering, candidate-specific queue depth, etc.).
+ // This is normal flow — we drain frames out of the candidate, transfer
+ // each one to a CPU frame, and stash them in `local_pending`. After
+ // commit they move to `self.pending_frames` and are delivered FIFO
+ // by `receive_frame`, so the caller never loses initial frames.
+ let mut local_pending: VecDeque = VecDeque::new();
let replay_result: std::result::Result<(), ffmpeg_next::Error> = {
let probe = self.probe.as_ref().expect("probe state present");
+ let mut hw_buf = frame::Video::empty();
let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(());
- for pkt in &probe.buffered_packets {
- if let Err(e) = candidate_state.inner.send_packet(pkt) {
- r = Err(e);
- break;
+
+ 'replay: for pkt in &probe.buffered_packets {
+ loop {
+ match candidate_state.inner.send_packet(pkt) {
+ Ok(()) => break,
+ Err(e) if is_eagain(&e) => {
+ // Drain candidate output (transferring + queueing each frame)
+ // and retry the same packet.
+ if let Err(de) =
+ drain_into_pending(&mut candidate_state.inner, &mut hw_buf, &mut local_pending)
+ {
+ r = Err(de);
+ break 'replay;
+ }
+ }
+ Err(e) => {
+ r = Err(e);
+ break 'replay;
+ }
+ }
}
}
if r.is_ok() && probe.eof_sent {
@@ -361,8 +472,11 @@ impl VideoDecoder {
if let Err(e) = replay_result {
tracing::warn!(?next_backend, error = %e, "hwdecode: candidate replay failed");
- // Drop candidate explicitly so its FFI cleanup runs now.
+ // Drop candidate explicitly so its FFI cleanup runs now. Discard any
+ // frames we drained from this candidate — they're tied to a decoder
+ // we're throwing away.
drop(candidate_state);
+ drop(local_pending);
self
.probe
.as_mut()
@@ -372,9 +486,11 @@ impl VideoDecoder {
continue;
}
- // Commit: install the candidate, clear residual hw_frame, pop backend.
+ // Commit: install the candidate, clear residual hw_frame, queue the
+ // drained frames for the caller, and pop the now-active backend.
self.state = candidate_state;
unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) };
+ self.pending_frames.append(&mut local_pending);
self
.probe
.as_mut()
@@ -484,8 +600,75 @@ unsafe fn transfer_hw_frame(
/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame`
/// and must not be treated as backend failures.
fn is_transient(e: &ffmpeg_next::Error) -> bool {
+ is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof)
+}
+
+/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine
+/// distinguishes "drain output and retry" from "stream over").
+fn is_eagain(e: &ffmpeg_next::Error) -> bool {
matches!(e, ffmpeg_next::Error::Other { errno } if *errno == ffmpeg_next::error::EAGAIN)
- || matches!(e, ffmpeg_next::Error::Eof)
+}
+
+/// Look up the decoder for `parameters` without going through the bindgen
+/// `AVCodecID` Rust enum. Reads the codec_id field as raw `u32` via
+/// `addr_of!` + `ptr::read` so a value not in our build's discriminant
+/// set never invokes UB.
+fn find_decoder(parameters: &codec::Parameters) -> Result {
+ // SAFETY: parameters owns a valid AVCodecParameters; addr_of! projects
+ // to the codec_id field; the *const u32 cast is sound because AVCodecID
+ // is `#[repr(u32)]` (same size and alignment as u32). Reading as u32
+ // cannot be UB regardless of the value FFmpeg wrote.
+ let raw_id: u32 =
+ unsafe { ptr::read(ptr::addr_of!((*parameters.as_ptr()).codec_id) as *const u32) };
+
+ // Call C `avcodec_find_decoder` via our local `c_int`-typed shim — we
+ // never construct an `AVCodecID` enum from `raw_id`. The C function
+ // returns NULL for unknown ids, which we surface as `Error::NoCodec`.
+ // SAFETY: avcodec_find_decoder is a pure FFmpeg lookup; passing any
+ // c_int is sound (returns NULL for unknown).
+ let codec_ptr = unsafe { c_shims::avcodec_find_decoder(raw_id as libc::c_int) };
+ if codec_ptr.is_null() {
+ return Err(Error::NoCodec(raw_id));
+ }
+ // SAFETY: codec_ptr is a non-null *const AVCodec into FFmpeg's static
+ // codec table; it lives for the duration of the program.
+ Ok(unsafe { Codec::wrap(codec_ptr) })
+}
+
+/// Drain output frames from a candidate decoder during probe replay,
+/// transferring each one from the candidate's HW context to a fresh CPU
+/// frame and queueing it. Returns `Ok(())` once the candidate signals
+/// EAGAIN/EOF. The transfer happens while the candidate is still alive
+/// (its `AVHWFramesContext` is reachable); the resulting CPU frames remain
+/// valid after the candidate is committed because they hold their own
+/// buffer references with no dependency on the original device context.
+fn drain_into_pending(
+ decoder: &mut ffmpeg_next::decoder::Video,
+ hw_buf: &mut frame::Video,
+ pending: &mut VecDeque,
+) -> std::result::Result<(), ffmpeg_next::Error> {
+ loop {
+ match decoder.receive_frame(hw_buf) {
+ Ok(()) => {
+ let mut cpu = frame::Video::empty();
+ // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data
+ // allocates buffers on `cpu`. copy_props moves timing/side data over.
+ unsafe {
+ let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0);
+ if r1 < 0 {
+ return Err(ffmpeg_next::Error::from(r1));
+ }
+ let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr());
+ if r2 < 0 {
+ return Err(ffmpeg_next::Error::from(r2));
+ }
+ }
+ pending.push_back(cpu);
+ }
+ Err(e) if is_transient(&e) => return Ok(()),
+ Err(e) => return Err(e),
+ }
+ }
}
#[allow(dead_code)]
@@ -500,7 +683,7 @@ mod tests {
#[test]
fn no_codec_for_unknown_id() {
- let err = Error::NoCodec(codec::Id::None);
+ let err = Error::NoCodec(0);
assert!(format!("{err}").contains("no decoder"));
}
diff --git a/src/error.rs b/src/error.rs
index ef5373c..955d215 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -10,9 +10,12 @@ pub enum Error {
#[error("ffmpeg error: {0}")]
Ffmpeg(#[from] ffmpeg_next::Error),
- /// `avcodec_find_decoder` returned null for the input codec id.
- #[error("no decoder for codec id {0:?}")]
- NoCodec(ffmpeg_next::codec::Id),
+ /// `avcodec_find_decoder` returned null for the input codec id. The id
+ /// is reported as the raw integer (`AVCodecID` discriminant) — we do not
+ /// construct the bindgen `AVCodecID` enum from a runtime value, since
+ /// values outside our build's discriminant set would invoke UB.
+ #[error("no decoder for codec id {0}")]
+ NoCodec(u32),
/// The codec does not advertise a hardware configuration matching the
/// requested backend (via `avcodec_get_hw_config`).
diff --git a/src/frame.rs b/src/frame.rs
index d6ceca6..15e903c 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -82,7 +82,9 @@ impl Frame {
}
/// Bytes per row for `plane`. Reads `AVFrame.linesize[plane]` directly.
- /// Panics if `plane >= planes()`.
+ /// Panics if `plane >= planes()` or the linesize is non-positive (FFmpeg
+ /// allows negative linesize for vertically-flipped formats; this crate
+ /// does not surface those — call [`Self::data`] first to test safely).
pub fn stride(&self, plane: usize) -> usize {
let n = self.planes();
assert!(
@@ -90,16 +92,29 @@ impl Frame {
"stride: plane {plane} out of bounds (planes={n})"
);
// SAFETY: bounds-checked above; linesize is `[c_int; 8]`.
- unsafe { (*self.inner.as_ptr()).linesize[plane] as usize }
+ let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] };
+ assert!(
+ linesize > 0,
+ "stride: non-positive linesize {linesize} for plane {plane} \
+ (negative linesize means vertically-flipped — not supported)"
+ );
+ linesize as usize
}
/// Pixel data for `plane`.
///
- /// Returns `None` when the frame's pixel format is not one of the
- /// hardware-output formats listed in [`crate::pix_fmt`] — we cannot
- /// safely compute the plane size for an unknown layout. Returns `None`
- /// for an out-of-bounds plane index, a null data pointer, or an empty
- /// frame.
+ /// Returns `None` for any of the following — never panics:
+ /// - The frame's pixel format is not one of the hardware-output formats
+ /// listed in [`crate::pix_fmt`] (we cannot safely compute the plane
+ /// size for an unknown layout).
+ /// - The plane index is out of range.
+ /// - `AVFrame.linesize[plane]` is `<= 0` (negative linesize signals
+ /// vertically-flipped FFmpeg layouts which we do not surface; zero is
+ /// "no plane").
+ /// - `AVFrame.height` is `<= 0`.
+ /// - The computed slice length would overflow or exceed `isize::MAX`
+ /// (a precondition of [`std::slice::from_raw_parts`]).
+ /// - The plane's data pointer is null.
///
/// Currently supported (post-`av_hwframe_transfer_data`):
/// - 4:2:0 semi-planar 8-bit: `NV12`, `NV21`
@@ -112,12 +127,26 @@ impl Frame {
if plane >= self.planes() {
return None;
}
- let stride = self.stride(plane);
- let plane_height = plane_height_for(self.pix_fmt(), plane, self.height() as usize)?;
+
+ // SAFETY: bounds-checked plane index; `linesize` and `height` are
+ // primitive c_int reads that cannot themselves be UB.
+ let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] };
+ let height_int: i32 = unsafe { (*self.inner.as_ptr()).height };
+ if linesize <= 0 || height_int <= 0 {
+ return None;
+ }
+ let stride = linesize as usize;
+
+ let plane_height = plane_height_for(self.pix_fmt(), plane, height_int as usize)?;
let len = stride.checked_mul(plane_height)?;
- // SAFETY: bounds-checked plane index above. We trust FFmpeg to populate
- // `data[plane]` validly when `linesize[plane]` is non-zero (which we
- // verified via `planes()`); null-check guards against edge cases.
+ if len > isize::MAX as usize {
+ return None;
+ }
+
+ // SAFETY: linesize > 0 and height > 0 verified; len <= isize::MAX
+ // verified — both preconditions of `slice::from_raw_parts`. We trust
+ // FFmpeg to populate `data[plane]` validly when linesize[plane] is
+ // non-zero; the null check is a final defensive guard.
unsafe {
let ptr = (*self.inner.as_ptr()).data[plane];
if ptr.is_null() {
@@ -193,6 +222,52 @@ mod tests {
assert!(f.data(0).is_none());
}
+ /// Synthesize a frame with a negative linesize (FFmpeg's vertical-flip
+ /// convention) and assert `data()` refuses to construct a slice. Without
+ /// the linesize > 0 check, the negative `i32 as usize` would produce a
+ /// huge positive length and `from_raw_parts` would be UB.
+ #[test]
+ fn data_returns_none_for_negative_linesize() {
+ let mut f = Frame::empty();
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).format = pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = 1080;
+ (*raw).linesize[0] = -1920; // vertically-flipped
+ (*raw).linesize[1] = -1920;
+ // data pointers stay null; `data()` would return None on the null
+ // check anyway, but should bail earlier on the linesize sign.
+ }
+ assert!(f.data(0).is_none());
+ assert!(f.data(1).is_none());
+ }
+
+ #[test]
+ fn data_returns_none_for_non_positive_height() {
+ let mut f = Frame::empty();
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).format = pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = 0;
+ (*raw).linesize[0] = 1920;
+ (*raw).linesize[1] = 1920;
+ }
+ assert!(f.data(0).is_none());
+ }
+
+ #[test]
+ #[should_panic(expected = "non-positive linesize")]
+ fn stride_panics_on_negative_linesize() {
+ let mut f = Frame::empty();
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).linesize[0] = -1920;
+ }
+ let _ = f.stride(0);
+ }
+
#[test]
fn frame_is_send() {
fn check() {}
From b48e5329253cdf4f6ae6740c950dd4486e0e898a Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 18:37:37 +1200
Subject: [PATCH 07/27] update
---
src/decoder.rs | 44 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 40 insertions(+), 4 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index fcaea44..0bd638e 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -313,9 +313,14 @@ impl VideoDecoder {
let res = self.state.inner.receive_frame(&mut self.hw_frame);
match res {
Err(e) => {
- if is_transient(&e) {
+ // EAGAIN is normal backpressure — pass through unconditionally.
+ if is_eagain(&e) {
return Err(Error::Ffmpeg(e));
}
+ // EOF (and every other non-transient error): if we are still
+ // probing, treat it as candidate failure — a backend that drains
+ // to EOF without ever producing a frame should not silently
+ // present as "stream over" to the caller. Advance and retry.
if self.probe.is_some() && self.advance_probe()? {
// Probe advance may have populated `pending_frames`; deliver
// one of those before reading more from the new candidate.
@@ -324,6 +329,8 @@ impl VideoDecoder {
}
continue;
}
+ // Probe collapsed or exhausted — surface the error (including EOF
+ // for a genuinely empty stream).
return Err(Error::Ffmpeg(e));
}
Ok(()) => {
@@ -372,10 +379,20 @@ impl VideoDecoder {
true
}
- /// Flush internal buffers (e.g. after a seek). Resets probe-time buffer if
- /// active, since post-seek packets do not align with replayed history.
+ /// Flush internal buffers (e.g. after a seek).
+ ///
+ /// Discards every frame buffered by the decoder, every frame queued during
+ /// probe replay (`pending_frames`), and the residual `hw_frame` scratch
+ /// buffer. Probe-time replay state (buffered packets, EOF marker) is also
+ /// cleared since post-seek packets do not align with the previously
+ /// captured history. After a flush, the next `receive_frame` waits for new
+ /// post-seek input.
pub fn flush(&mut self) {
self.state.inner.flush();
+ // SAFETY: hw_frame is a valid AVFrame we own; av_frame_unref is a no-op
+ // for an already-empty frame.
+ unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) };
+ self.pending_frames.clear();
if let Some(probe) = self.probe.as_mut() {
probe.buffered_packets.clear();
probe.eof_sent = false;
@@ -540,9 +557,28 @@ impl VideoDecoder {
// SAFETY: ctx is a freshly-constructed AVCodecContext we own;
// av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
// use (we keep our own ref in `hw_device_ref` for cleanup).
+ // av_buffer_ref returns NULL on allocation failure; we must check it
+ // before assigning, otherwise the codec context would be opened with a
+ // HW-flagged setup but no actual device reference.
+ let device_ref_for_ctx = unsafe { av_buffer_ref(hw_device_ref) };
+ if device_ref_for_ctx.is_null() {
+ // SAFETY: rolling back what we just allocated above. hw_device_ref
+ // is non-null (we checked after av_hwdevice_ctx_create); callback_state
+ // was just freshly Box::into_raw'd.
+ unsafe {
+ let mut hw = hw_device_ref;
+ av_buffer_unref(&mut hw);
+ drop(Box::from_raw(callback_state));
+ }
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ // SAFETY: device_ref_for_ctx is a valid AVBufferRef* from av_buffer_ref;
+ // ctx is freshly built and owned by us.
unsafe {
let raw = ctx.as_mut_ptr();
- (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref);
+ (*raw).hw_device_ctx = device_ref_for_ctx;
(*raw).opaque = callback_state.cast();
(*raw).get_format = Some(get_hw_format);
}
From 4dc0be9ed900f0d0ca2477ebdf052dddc5110a8a Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 18:52:46 +1200
Subject: [PATCH 08/27] update
---
src/decoder.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 47 insertions(+), 2 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index 0bd638e..542b36f 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -79,6 +79,20 @@ struct DecoderState {
callback_state: *mut CallbackState,
}
+/// Maximum number of packets we are willing to buffer for probe replay
+/// before abandoning the fallback safety net. Set high enough to absorb
+/// long B-frame GOPs and codec setup latency, low enough to bound memory
+/// against malicious / pathological streams that never produce a first
+/// frame.
+const MAX_PROBE_PACKETS: usize = 256;
+
+/// Maximum total compressed-byte size of buffered probe packets. Each
+/// `Packet` clone holds a refcounted reference to the demuxer's bitstream
+/// data — even though the clone itself is shallow, the underlying buffers
+/// stay alive until we drop them. 64 MiB is generous for normal video and
+/// gives untrusted media a hard ceiling.
+const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024;
+
/// State carried only during the probe window (before the first successful
/// frame). Holds enough information to tear down the current decoder and
/// retry with the next backend.
@@ -90,8 +104,12 @@ struct ProbeState {
remaining_backends: Vec,
/// Packets sent so far, kept for replay through any candidate backend.
/// Preserved across failed candidates — only cleared when the probe
- /// collapses on a successful first frame.
+ /// collapses on a successful first frame, or when the probe is
+ /// abandoned due to the size caps.
buffered_packets: Vec,
+ /// Cumulative size (in compressed bytes) of `buffered_packets`. Tracked
+ /// incrementally so we don't have to re-sum on every send.
+ buffered_bytes: usize,
/// Whether `send_eof` has been called; replayed alongside packets.
eof_sent: bool,
}
@@ -155,6 +173,7 @@ impl VideoDecoder {
codec,
remaining_backends: remaining,
buffered_packets: Vec::new(),
+ buffered_bytes: 0,
eof_sent: false,
});
return Ok(Self {
@@ -231,12 +250,37 @@ impl VideoDecoder {
/// probe to the next candidate and retries the packet there. The caller
/// observes only the eventual success or, if the probe is exhausted, the
/// final error.
+ ///
+ /// If the probe window grows beyond [`MAX_PROBE_PACKETS`] or
+ /// [`MAX_PROBE_PACKET_BYTES`] without producing a first frame (a stream
+ /// the active backend is silently mishandling, or pathological input),
+ /// the probe is **abandoned**: replay history is dropped, queued frames
+ /// are cleared, and `self.probe = None`. The active backend continues
+ /// serving the caller without fallback. A `tracing::warn!` records this
+ /// so it is visible in production logs.
pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
loop {
match self.state.inner.send_packet(packet) {
Ok(()) => {
if let Some(probe) = self.probe.as_mut() {
- probe.buffered_packets.push(packet.clone());
+ let pkt_size = packet.size();
+ let new_count = probe.buffered_packets.len() + 1;
+ let new_bytes = probe.buffered_bytes.saturating_add(pkt_size);
+ if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES {
+ tracing::warn!(
+ packets = new_count,
+ bytes = new_bytes,
+ max_packets = MAX_PROBE_PACKETS,
+ max_bytes = MAX_PROBE_PACKET_BYTES,
+ "hwdecode: probe window exceeded caps without first frame; \
+ abandoning fallback safety net"
+ );
+ self.probe = None;
+ self.pending_frames.clear();
+ } else {
+ probe.buffered_packets.push(packet.clone());
+ probe.buffered_bytes = new_bytes;
+ }
}
return Ok(());
}
@@ -395,6 +439,7 @@ impl VideoDecoder {
self.pending_frames.clear();
if let Some(probe) = self.probe.as_mut() {
probe.buffered_packets.clear();
+ probe.buffered_bytes = 0;
probe.eof_sent = false;
}
}
From 964e9e7c571269bce792094fa09bf566e1d4e32d Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 21:35:50 +1200
Subject: [PATCH 09/27] update
---
README.md | 4 +-
docs/design.md | 4 +-
src/decoder.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++++--
src/frame.rs | 10 ++++-
4 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index bcfb058..007eac3 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,9 @@ unconditionally.
## Build requirements
-- A system FFmpeg ≥ 4.x linkable via `pkg-config`. Verify with
+- A system FFmpeg ≥ **5.1** linkable via `pkg-config` (we reference
+ `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`, which were added in 5.1).
+ Tested against 8.1. Verify with
`ffmpeg -hwaccels` that your build has the backends you expect compiled in
(e.g. `videotoolbox` on macOS, `vaapi` / `cuda` on Linux,
`d3d11va` / `cuda` on Windows).
diff --git a/docs/design.md b/docs/design.md
index 056bb4f..6acc8c5 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -130,7 +130,9 @@ No other modules. Keep the surface small.
No platform-specific Cargo features. `cfg!(target_os = ...)` selects which `AVHWDeviceType` constants we even attempt — the FFI symbols are linked unconditionally via `ffmpeg-sys-next`.
-System FFmpeg ≥ 4.x. Verified against the user's macOS Homebrew build (FFmpeg 8.1, VideoToolbox enabled).
+System FFmpeg ≥ **5.1** (we reference `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`,
+added upstream in 5.1). Verified against the macOS Homebrew build (FFmpeg 8.1,
+VideoToolbox enabled).
## Testing
diff --git a/src/decoder.rs b/src/decoder.rs
index 542b36f..3a8067e 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -62,6 +62,10 @@ pub struct VideoDecoder {
/// candidate state is committed. [`Self::receive_frame`] dequeues these
/// FIFO before reading from `state.inner`.
pending_frames: VecDeque,
+ /// Per-decoder byte budget for [`Self::pending_frames`] during probe
+ /// replay. Defaults to [`DEFAULT_MAX_PROBE_PENDING_BYTES`]; override via
+ /// [`Self::with_max_probe_pending_bytes`].
+ max_probe_pending_bytes: usize,
}
/// Owned FFmpeg state for one open codec context. Has its own `Drop` so we
@@ -93,6 +97,30 @@ const MAX_PROBE_PACKETS: usize = 256;
/// gives untrusted media a hard ceiling.
const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024;
+/// Maximum number of CPU frames we are willing to queue from a candidate
+/// during probe replay. Each frame is a fully-allocated CPU buffer
+/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so
+/// an unbounded queue would OOM on a candidate with a shallow internal
+/// queue against a deep replay history. Drained candidate frames in
+/// excess of this cap (or [`DEFAULT_MAX_PROBE_PENDING_BYTES`], whichever
+/// hits first) are discarded with a `tracing::warn!`; we still drain so
+/// `send_packet` can keep feeding the candidate.
+const MAX_PROBE_PENDING_FRAMES: usize = 16;
+
+/// Default byte budget for probe-replay drained frames. 256 MiB is enough
+/// for 16 frames at 4K P010 (~24 MiB each = 384 MiB worst case under the
+/// count cap), and is the cap that fires first for very high-resolution
+/// content (8K P010: ~96 MiB per frame → only ~2 frames fit).
+///
+/// Override per-decoder with [`VideoDecoder::with_max_probe_pending_bytes`]
+/// when targeting 8K+ workloads or memory-constrained environments.
+///
+/// TODO: when frames significantly exceed typical sizes, consider
+/// memmap-backed pending buffers (write transferred frames to a temp file
+/// or shared-memory segment) so the resident set stays bounded even when
+/// the byte cap is raised. Out of scope for v0.0.0.
+pub const DEFAULT_MAX_PROBE_PENDING_BYTES: usize = 256 * 1024 * 1024;
+
/// State carried only during the probe window (before the first successful
/// frame). Holds enough information to tear down the current decoder and
/// retry with the next backend.
@@ -181,6 +209,7 @@ impl VideoDecoder {
hw_frame: frame::Video::empty(),
probe,
pending_frames: VecDeque::new(),
+ max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES,
});
}
Err(e) => {
@@ -207,9 +236,30 @@ impl VideoDecoder {
hw_frame: frame::Video::empty(),
probe: None,
pending_frames: VecDeque::new(),
+ max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES,
})
}
+ /// Override the byte budget for probe-replay queued frames. Defaults to
+ /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`]. Use a higher value when targeting
+ /// 8K+ workloads where 16 frames at full size could exceed the default;
+ /// use a lower value in memory-constrained services to bound peak
+ /// allocation more tightly.
+ ///
+ /// Setting after the first frame has been delivered is harmless but has
+ /// no observable effect — the probe has already collapsed and the cap
+ /// only applies during replay drain.
+ ///
+ /// Returns `self` for builder-style chaining:
+ /// ```ignore
+ /// let decoder = VideoDecoder::open(params)?
+ /// .with_max_probe_pending_bytes(1024 * 1024 * 1024); // 1 GiB
+ /// ```
+ pub fn with_max_probe_pending_bytes(mut self, bytes: usize) -> Self {
+ self.max_probe_pending_bytes = bytes;
+ self
+ }
+
/// The backend currently producing frames. While the probe is still in
/// progress (no frame received yet) this returns the optimistically
/// selected backend; after the first frame, it is the backend that
@@ -498,6 +548,8 @@ impl VideoDecoder {
// commit they move to `self.pending_frames` and are delivered FIFO
// by `receive_frame`, so the caller never loses initial frames.
let mut local_pending: VecDeque = VecDeque::new();
+ let mut local_pending_bytes: usize = 0;
+ let max_pending_bytes = self.max_probe_pending_bytes;
let replay_result: std::result::Result<(), ffmpeg_next::Error> = {
let probe = self.probe.as_ref().expect("probe state present");
let mut hw_buf = frame::Video::empty();
@@ -510,9 +562,13 @@ impl VideoDecoder {
Err(e) if is_eagain(&e) => {
// Drain candidate output (transferring + queueing each frame)
// and retry the same packet.
- if let Err(de) =
- drain_into_pending(&mut candidate_state.inner, &mut hw_buf, &mut local_pending)
- {
+ if let Err(de) = drain_into_pending(
+ &mut candidate_state.inner,
+ &mut hw_buf,
+ &mut local_pending,
+ &mut local_pending_bytes,
+ max_pending_bytes,
+ ) {
r = Err(de);
break 'replay;
}
@@ -727,10 +783,33 @@ fn drain_into_pending(
decoder: &mut ffmpeg_next::decoder::Video,
hw_buf: &mut frame::Video,
pending: &mut VecDeque,
+ pending_bytes: &mut usize,
+ max_bytes: usize,
) -> std::result::Result<(), ffmpeg_next::Error> {
loop {
match decoder.receive_frame(hw_buf) {
Ok(()) => {
+ // Either cap (count or bytes) closes the queue. We still drain so
+ // `send_packet` can resume on the next iteration; we just stop
+ // accumulating.
+ //
+ // TODO: at very large frame sizes (8K HDR P010, > ~96 MiB each)
+ // even a single retained frame is significant. Future direction:
+ // memmap-backed pending frames (write to a temp file or shared
+ // memory segment) so the resident set stays bounded even when the
+ // byte cap is raised. Out of scope for v0.0.0.
+ if pending.len() >= MAX_PROBE_PENDING_FRAMES || *pending_bytes >= max_bytes {
+ tracing::warn!(
+ frames = pending.len(),
+ bytes = *pending_bytes,
+ max_frames = MAX_PROBE_PENDING_FRAMES,
+ max_bytes = max_bytes,
+ "hwdecode: probe pending cap reached; discarding drained candidate frame"
+ );
+ // SAFETY: hw_buf is owned and valid; unref of an empty frame is a no-op.
+ unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
+ continue;
+ }
let mut cpu = frame::Video::empty();
// SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data
// allocates buffers on `cpu`. copy_props moves timing/side data over.
@@ -744,6 +823,7 @@ fn drain_into_pending(
return Err(ffmpeg_next::Error::from(r2));
}
}
+ *pending_bytes = pending_bytes.saturating_add(cpu_frame_bytes(&cpu));
pending.push_back(cpu);
}
Err(e) if is_transient(&e) => return Ok(()),
@@ -752,6 +832,32 @@ fn drain_into_pending(
}
}
+/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
+/// plane_height` across populated planes. Returns 0 for unknown formats
+/// (we under-count rather than over-count, on the principle that under-
+/// counting only delays the cap firing, while over-counting could starve
+/// legitimate streams).
+fn cpu_frame_bytes(frame: &frame::Video) -> usize {
+ // SAFETY: AVFrame.height / format / linesize are c_int reads.
+ let (height, pix_fmt, linesizes) = unsafe {
+ let raw = frame.as_ptr();
+ ((*raw).height as usize, (*raw).format, (*raw).linesize)
+ };
+ let mut total: usize = 0;
+ for (plane, linesize) in linesizes.iter().enumerate() {
+ if *linesize <= 0 {
+ break;
+ }
+ let stride = *linesize as usize;
+ let Some(plane_h) = crate::frame::plane_height_for(pix_fmt, plane, height) else {
+ // Unknown format / unsupported plane index — bail out, accept under-count.
+ break;
+ };
+ total = total.saturating_add(stride.saturating_mul(plane_h));
+ }
+ total
+}
+
#[allow(dead_code)]
fn _assert_send() {
fn check() {}
diff --git a/src/frame.rs b/src/frame.rs
index 15e903c..22f7783 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -170,7 +170,15 @@ impl Default for Frame {
/// Number of rows in `plane` for a frame of `frame_height` and the given
/// pixel format. `None` for formats not in the supported HW-output set.
-fn plane_height_for(pix_fmt_int: i32, plane: usize, frame_height: usize) -> Option {
+///
+/// Crate-internal so the decoder's probe-replay accountant can compute
+/// per-frame byte sizes without re-implementing the chroma-subsampling
+/// table.
+pub(crate) fn plane_height_for(
+ pix_fmt_int: i32,
+ plane: usize,
+ frame_height: usize,
+) -> Option {
match pix_fmt_int {
// 4:2:0 semi-planar — Y full height, chroma half height.
pix_fmt::NV12
From d2d96a8b00d99aaa4c1f249477163eb47f5c814f Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 21:59:19 +1200
Subject: [PATCH 10/27] update
---
benches/decode.rs | 4 +--
examples/decode.rs | 2 +-
src/decoder.rs | 76 ++++++++++++++++++++++++++++++++++++----------
src/frame.rs | 39 +++++++++++++++---------
tests/decode.rs | 2 +-
tests/hw_smoke.rs | 2 +-
6 files changed, 90 insertions(+), 35 deletions(-)
diff --git a/benches/decode.rs b/benches/decode.rs
index 5f53a66..9e53f0a 100644
--- a/benches/decode.rs
+++ b/benches/decode.rs
@@ -32,7 +32,7 @@ fn decode_all_hw(path: &PathBuf) -> Result {
let stream_index = stream.index();
let mut decoder = VideoDecoder::open(stream.parameters())?;
- let mut frame = Frame::empty();
+ let mut frame = Frame::empty()?;
let mut count = 0_usize;
let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> {
@@ -122,7 +122,7 @@ fn bench_decode(c: &mut Criterion) {
let stream_index = stream.index();
match VideoDecoder::open(stream.parameters()) {
Ok(mut dec) => {
- let mut frame = Frame::empty();
+ let mut frame = Frame::empty().expect("alloc probe frame");
'probe: for (s, packet) in input.packets() {
if s.index() != stream_index {
continue;
diff --git a/examples/decode.rs b/examples/decode.rs
index a1439d7..1d14de1 100644
--- a/examples/decode.rs
+++ b/examples/decode.rs
@@ -44,7 +44,7 @@ fn main() -> Result<(), Box> {
decoder.height(),
);
- let mut frame = Frame::empty();
+ let mut frame = Frame::empty()?;
let mut count: u64 = 0;
let drain = |decoder: &mut VideoDecoder, frame: &mut Frame, count: &mut u64| loop {
diff --git a/src/decoder.rs b/src/decoder.rs
index 3a8067e..a7739c2 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -206,7 +206,7 @@ impl VideoDecoder {
});
return Ok(Self {
state,
- hw_frame: frame::Video::empty(),
+ hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?,
probe,
pending_frames: VecDeque::new(),
max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES,
@@ -233,7 +233,7 @@ impl VideoDecoder {
let state = Self::build_state(parameters, codec, backend)?;
Ok(Self {
state,
- hw_frame: frame::Video::empty(),
+ hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?,
probe: None,
pending_frames: VecDeque::new(),
max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES,
@@ -552,7 +552,10 @@ impl VideoDecoder {
let max_pending_bytes = self.max_probe_pending_bytes;
let replay_result: std::result::Result<(), ffmpeg_next::Error> = {
let probe = self.probe.as_ref().expect("probe state present");
- let mut hw_buf = frame::Video::empty();
+ let mut hw_buf = match alloc_av_frame() {
+ Ok(f) => f,
+ Err(e) => return Err(Error::Ffmpeg(e)),
+ };
let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(());
'replay: for pkt in &probe.buffered_packets {
@@ -740,6 +743,21 @@ fn is_transient(e: &ffmpeg_next::Error) -> bool {
is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof)
}
+/// Allocate a fresh `frame::Video`, checking that `av_frame_alloc` did not
+/// return NULL. ffmpeg-next's `frame::Video::empty()` does not surface that
+/// failure and the resulting null pointer would be UB on the next field
+/// access; this wrapper catches it and surfaces it as `ENOMEM`.
+fn alloc_av_frame() -> std::result::Result {
+ let inner = frame::Video::empty();
+ // SAFETY: as_ptr() just exposes the inner pointer for inspection.
+ if unsafe { inner.as_ptr() }.is_null() {
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ Ok(inner)
+}
+
/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine
/// distinguishes "drain output and retry" from "stream over").
fn is_eagain(e: &ffmpeg_next::Error) -> bool {
@@ -810,7 +828,7 @@ fn drain_into_pending(
unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
continue;
}
- let mut cpu = frame::Video::empty();
+ let mut cpu = alloc_av_frame()?;
// SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data
// allocates buffers on `cpu`. copy_props moves timing/side data over.
unsafe {
@@ -823,8 +841,26 @@ fn drain_into_pending(
return Err(ffmpeg_next::Error::from(r2));
}
}
- *pending_bytes = pending_bytes.saturating_add(cpu_frame_bytes(&cpu));
- pending.push_back(cpu);
+ // Conservative byte-cap accounting: if we can't size this frame
+ // (unknown CPU pix_fmt — should not happen with strict get_format,
+ // but a misbehaving codec could surface one), discard rather than
+ // queue an unaccounted-for allocation. Never push something whose
+ // size we can't deduct from the budget.
+ match cpu_frame_bytes(&cpu) {
+ Some(bytes) => {
+ *pending_bytes = pending_bytes.saturating_add(bytes);
+ pending.push_back(cpu);
+ }
+ None => {
+ // SAFETY: AVFrame.format is c_int, safe to read.
+ let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format };
+ tracing::warn!(
+ pix_fmt,
+ "hwdecode: cannot size unknown CPU pix_fmt during replay; discarding drained frame"
+ );
+ // cpu drops here, freeing its buffers via Frame::drop.
+ }
+ }
}
Err(e) if is_transient(&e) => return Ok(()),
Err(e) => return Err(e),
@@ -833,29 +869,37 @@ fn drain_into_pending(
}
/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
-/// plane_height` across populated planes. Returns 0 for unknown formats
-/// (we under-count rather than over-count, on the principle that under-
-/// counting only delays the cap firing, while over-counting could starve
-/// legitimate streams).
-fn cpu_frame_bytes(frame: &frame::Video) -> usize {
+/// plane_height` across populated planes.
+///
+/// Returns `None` for pixel formats not in our chroma-subsampling table,
+/// so the caller can refuse to queue an allocation it can't account for.
+/// Returning 0 for unknown formats would silently bypass the byte cap and
+/// let an unbounded number of large frames into `pending_frames`.
+fn cpu_frame_bytes(frame: &frame::Video) -> Option {
// SAFETY: AVFrame.height / format / linesize are c_int reads.
let (height, pix_fmt, linesizes) = unsafe {
let raw = frame.as_ptr();
((*raw).height as usize, (*raw).format, (*raw).linesize)
};
let mut total: usize = 0;
+ let mut any_plane = false;
for (plane, linesize) in linesizes.iter().enumerate() {
if *linesize <= 0 {
break;
}
+ any_plane = true;
let stride = *linesize as usize;
- let Some(plane_h) = crate::frame::plane_height_for(pix_fmt, plane, height) else {
- // Unknown format / unsupported plane index — bail out, accept under-count.
- break;
- };
+ // If we can't size *any* populated plane, the format is outside our
+ // table — refuse to size the frame at all (conservative; discarding
+ // is safer than under-counting against the byte cap).
+ let plane_h = crate::frame::plane_height_for(pix_fmt, plane, height)?;
total = total.saturating_add(stride.saturating_mul(plane_h));
}
- total
+ if !any_plane {
+ // Genuinely empty frame (no populated planes) — nothing to account for.
+ return Some(0);
+ }
+ Some(total)
}
#[allow(dead_code)]
diff --git a/src/frame.rs b/src/frame.rs
index 22f7783..65ec63d 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -20,7 +20,10 @@ use std::slice;
use ffmpeg_next::frame;
-use crate::pix_fmt;
+use crate::{
+ error::{Error, Result},
+ pix_fmt,
+};
/// CPU-side decoded video frame produced by [`crate::VideoDecoder`].
pub struct Frame {
@@ -30,10 +33,20 @@ pub struct Frame {
impl Frame {
/// Construct an empty frame, suitable as the destination passed to
/// [`crate::VideoDecoder::receive_frame`].
- pub fn empty() -> Self {
- Self {
- inner: frame::Video::empty(),
+ ///
+ /// Returns `Err(Error::Ffmpeg(Other { errno: ENOMEM }))` when the
+ /// underlying `av_frame_alloc()` returns NULL — `ffmpeg_next` does not
+ /// surface that failure, so we check it here rather than letting a null
+ /// pointer flow into the safe accessors and become UB on first read.
+ pub fn empty() -> Result {
+ // SAFETY: as_ptr() is safe; we just inspect the value (potentially null).
+ let inner = frame::Video::empty();
+ if unsafe { inner.as_ptr() }.is_null() {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
}
+ Ok(Self { inner })
}
/// Width in pixels.
@@ -162,11 +175,9 @@ impl Frame {
}
}
-impl Default for Frame {
- fn default() -> Self {
- Self::empty()
- }
-}
+// `Default` intentionally omitted: constructing a frame can fail (OOM
+// in `av_frame_alloc`), and a panicking `default()` would defeat the
+// safety stance of [`Frame::empty`]. Use `Frame::empty()?` directly.
/// Number of rows in `plane` for a frame of `frame_height` and the given
/// pixel format. `None` for formats not in the supported HW-output set.
@@ -213,7 +224,7 @@ mod tests {
#[test]
fn empty_frame_has_zero_dimensions_and_no_pts() {
- let f = Frame::empty();
+ let f = Frame::empty().expect("alloc");
assert_eq!(f.width(), 0);
assert_eq!(f.height(), 0);
assert_eq!(f.pts(), None);
@@ -225,7 +236,7 @@ mod tests {
#[test]
fn data_returns_none_for_unknown_format() {
- let f = Frame::empty();
+ let f = Frame::empty().expect("alloc");
// pix_fmt is NONE (-1), not in the supported set.
assert!(f.data(0).is_none());
}
@@ -236,7 +247,7 @@ mod tests {
/// huge positive length and `from_raw_parts` would be UB.
#[test]
fn data_returns_none_for_negative_linesize() {
- let mut f = Frame::empty();
+ let mut f = Frame::empty().expect("alloc");
unsafe {
let raw = f.inner.as_mut_ptr();
(*raw).format = pix_fmt::NV12;
@@ -253,7 +264,7 @@ mod tests {
#[test]
fn data_returns_none_for_non_positive_height() {
- let mut f = Frame::empty();
+ let mut f = Frame::empty().expect("alloc");
unsafe {
let raw = f.inner.as_mut_ptr();
(*raw).format = pix_fmt::NV12;
@@ -268,7 +279,7 @@ mod tests {
#[test]
#[should_panic(expected = "non-positive linesize")]
fn stride_panics_on_negative_linesize() {
- let mut f = Frame::empty();
+ let mut f = Frame::empty().expect("alloc");
unsafe {
let raw = f.inner.as_mut_ptr();
(*raw).linesize[0] = -1920;
diff --git a/tests/decode.rs b/tests/decode.rs
index 10a8bcb..2431ff1 100644
--- a/tests/decode.rs
+++ b/tests/decode.rs
@@ -44,7 +44,7 @@ fn auto_open_decodes_at_least_one_frame() {
assert_eq!(decoder.width(), expected_w);
assert_eq!(decoder.height(), expected_h);
- let mut frame = Frame::empty();
+ let mut frame = Frame::empty().expect("alloc frame");
let mut count = 0_usize;
let target = 30_usize;
diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs
index 6e11765..372c8f7 100644
--- a/tests/hw_smoke.rs
+++ b/tests/hw_smoke.rs
@@ -32,7 +32,7 @@ fn auto_probe_picks_hardware_backend() {
// backend that actually produced it. Checking `decoder.backend()` before
// any frame has been received would observe the optimistic pre-probe
// value and could false-pass when a HW backend silently degrades.
- let mut frame = Frame::empty();
+ let mut frame = Frame::empty().expect("alloc frame");
let mut got_frame = false;
for (s, packet) in input.packets() {
if s.index() != stream_index {
From e1899e63e1aea2aabb3da6cca5382d40ddf9bb8f Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 22:17:29 +1200
Subject: [PATCH 11/27] update
---
Cargo.toml | 2 +-
README.md | 8 +++++--
docs/design.md | 36 ++++++++++++++++++----------
src/decoder.rs | 64 +++++++++++++++++++++++++++++++++-----------------
src/lib.rs | 21 +++++++++--------
5 files changed, 84 insertions(+), 47 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 8e4ea79..7691656 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ name = "hwdecode"
version = "0.0.0"
edition = "2021"
rust-version = "1.95"
-description = "Cross-platform hardware-accelerated video decoder built on top of ffmpeg-next, with auto-probe and software fallback."
+description = "Cross-platform hardware-only video decoder built on top of ffmpeg-next, with auto-probe across HW backends. Callers handle software fallback."
repository = "https://github.com/findit-ai/hwdecode"
homepage = "https://github.com/findit-ai/hwdecode"
documentation = "https://docs.rs/hwdecode"
diff --git a/README.md b/README.md
index 007eac3..c4e9108 100644
--- a/README.md
+++ b/README.md
@@ -50,13 +50,17 @@ while decoder.receive_frame(&mut frame).is_ok() {
}
```
-To force a specific backend (no probe, no fallback):
+To force a specific hardware backend (no probe, no fallback):
```rust
use hwdecode::{Backend, VideoDecoder};
-let decoder = VideoDecoder::open_with(parameters, Backend::Software)?;
+let decoder = VideoDecoder::open_with(parameters, Backend::VideoToolbox)?;
```
+`hwdecode` is hardware-only: there is no `Backend::Software`. If `open`
+returns `Error::AllBackendsFailed`, fall back to a software decoder
+yourself (typically `ffmpeg::decoder::Video`).
+
## Running tests and benches
The integration test and benchmark expect a real video file. Set
diff --git a/docs/design.md b/docs/design.md
index 6acc8c5..2c54aee 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -1,12 +1,20 @@
# hwdecode — design
-Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next` 8.1.
+Cross-platform **hardware-only** video decoder built on top of `ffmpeg-next` 8.1.
+
+> **Status note.** This document was the original spec from the brainstorm
+> phase and parts have evolved since: the crate is hardware-only (no
+> `Backend::Software`), `Frame` is its own safe wrapper, and several pixel-
+> format / safety details were tightened during review. For the canonical
+> behavior, read `src/lib.rs` and `README.md`. Sections below have been
+> trimmed where they conflicted; the spec is otherwise preserved as
+> historical context.
## Goals
- Drop-in replacement for `ffmpeg::decoder::Video` at the call site (`send_packet` / `receive_frame` / `send_eof` / `flush`).
-- Auto-probe the platform's hardware backends and silently fall back to software if none open. Caller never has to think about hwaccel availability.
-- Hand back native-format CPU frames (NV12/P010 from the HW path, codec-native from the SW path). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`).
+- Auto-probe the platform's hardware backends. **No software fallback inside this crate** — callers handle that themselves (e.g. via `ffmpeg::decoder::Video`) when `open` returns `Error::AllBackendsFailed`.
+- Hand back native-format CPU frames (NV12/P010 from the HW path post-transfer). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`).
- Cross-platform: macOS / iOS / iPadOS / tvOS, Linux (Intel/AMD/NVIDIA), Windows (any GPU + CUDA on NVIDIA).
## Non-goals
@@ -22,8 +30,10 @@ Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next`
pub struct VideoDecoder { /* private */ }
impl VideoDecoder {
- /// Auto-probe HW backends in platform order; fall back to software.
- /// On success, `backend()` reports the one that won.
+ /// Auto-probe HW backends in platform order. Returns
+ /// `Error::AllBackendsFailed` if no backend can decode this stream;
+ /// caller falls back to software decoder of choice. On success,
+ /// `backend()` reports the one that won.
pub fn open(parameters: ffmpeg::codec::Parameters) -> Result;
/// Force a specific backend. No probe, no fallback.
@@ -39,11 +49,11 @@ impl VideoDecoder {
pub fn send_packet(&mut self, packet: &ffmpeg::Packet) -> Result<(), Error>;
pub fn send_eof(&mut self) -> Result<(), Error>;
- /// Receive a CPU-side frame. For HW backends, internally calls
+ /// Receive a CPU-side frame. Internally calls
/// `av_hwframe_transfer_data` and copies PTS/timing onto the result;
- /// output format is NV12 (8-bit) or P010 (10-bit). For SW, the frame
- /// is in the codec's native format.
- pub fn receive_frame(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), Error>;
+ /// output format is NV12 (8-bit) or P010 (10-bit) per the HW backend's
+ /// `AVHWFramesContext::sw_format`.
+ pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<(), Error>;
pub fn flush(&mut self);
}
@@ -89,7 +99,7 @@ Always device 0 / system default (`av_hwdevice_ctx_create(.., NULL, ..)`). No en
### `get_format` callback
-A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState { wanted: AVPixelFormat }`. The callback walks the offered `pix_fmts` list, returns `wanted` if present, else `AV_PIX_FMT_NONE` (which forces FFmpeg to retry with software). This is the standard pattern from `doc/examples/hw_decode.c`.
+A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState`. The callback walks the offered `pix_fmts` list as raw `i32` (avoiding bindgen-enum UB on header skew), returns `wanted` if present, else `AV_PIX_FMT_NONE` (which causes the decoder to fail; the caller-side probe loop then tears down and tries the next hardware backend).
### Frame transfer
@@ -138,7 +148,7 @@ VideoToolbox enabled).
1. **Unit tests** (`src/backend.rs`, `src/error.rs`) — pure-Rust: probe-order construction per platform, `Backend` ↔ `AVHWDeviceType` mapping, error formatting.
2. **Integration** (`tests/decode.rs`) — opens a sample H.264 file via `ffmpeg::format::input`, decodes 30 frames through `VideoDecoder::open` (auto-probe), asserts frame count and dimensions. Sample path comes from env var `HWDECODE_SAMPLE_VIDEO`; test is skipped with a clear `eprintln!` if unset.
-3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, but additionally asserts `decoder.backend() != Backend::Software`. CI runs this on platform-matched runners.
+3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, asserts `decoder.backend()` returns one of the hardware variants (the enum no longer has a Software variant; this is a sanity check against accidental no-op selection). CI runs this on platform-matched runners.
Sample-file env var keeps the repo binary-free. Documented in `README.md`.
@@ -146,8 +156,8 @@ Sample-file env var keeps the repo binary-free. Documented in `README.md`.
`benches/decode.rs` (criterion) — two functions:
-- `bench_software_decode` — `VideoDecoder::open_with(.., Backend::Software)`, decode all frames of the sample, measure wall-clock per frame.
-- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped (`return`) if `decoder.backend() == Backend::Software` (no HW available).
+- `bench_software_decode` — drives `ffmpeg::decoder::Video` directly (this crate has no software backend), decodes all frames, measures wall-clock per frame.
+- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped if `open` returns `AllBackendsFailed` (no HW backend available on this host).
Both use the same `HWDECODE_SAMPLE_VIDEO` file. Bench prints which backend the HW run actually used, so results are interpretable across machines.
diff --git a/src/decoder.rs b/src/decoder.rs
index a7739c2..cbc995b 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -29,7 +29,13 @@ use crate::{
frame::Frame,
};
-/// Hardware-accelerated video decoder with software fallback.
+/// Hardware-accelerated video decoder.
+///
+/// Hardware-only — there is no software fallback inside this crate. If
+/// every hardware backend in the platform's probe order fails to open,
+/// `open` returns [`Error::AllBackendsFailed`] and the caller is
+/// responsible for falling back to a software decoder of their choice
+/// (e.g. `ffmpeg::decoder::Video`).
///
/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface.
/// Decoded frames are returned through [`crate::Frame`], a CPU-side wrapper
@@ -183,9 +189,9 @@ impl VideoDecoder {
/// consumed, so a misbehaving middle backend cannot strand the caller.
///
/// [`Self::backend`] reflects whichever backend ultimately produced the
- /// first frame. Software is the last entry in every probe order, so
- /// `open` cannot return without a working decoder for any codec libavcodec
- /// supports.
+ /// first frame. If no hardware backend in the platform's probe order can
+ /// decode this stream, `open` returns [`Error::AllBackendsFailed`];
+ /// callers handle software fallback themselves.
pub fn open(parameters: codec::Parameters) -> Result {
let codec = find_decoder(¶meters)?;
let order = backend::probe_order();
@@ -223,11 +229,11 @@ impl VideoDecoder {
/// Open the decoder with a specific backend. No probe, no fallback.
///
- /// If `backend` is a hardware backend that the codec can't actually use
- /// for this stream, the failure surfaces from
- /// [`Self::receive_frame`] (the strict `get_format` callback returns
+ /// If `backend` cannot actually decode this stream, the failure surfaces
+ /// from [`Self::receive_frame`] (the strict `get_format` callback returns
/// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible
- /// for retrying with `Backend::Software` or another backend if desired.
+ /// for retrying with another hardware backend or falling back to a
+ /// software decoder of their choice (e.g. `ffmpeg::decoder::Video`).
pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result {
let codec = find_decoder(¶meters)?;
let state = Self::build_state(parameters, codec, backend)?;
@@ -807,9 +813,11 @@ fn drain_into_pending(
loop {
match decoder.receive_frame(hw_buf) {
Ok(()) => {
- // Either cap (count or bytes) closes the queue. We still drain so
- // `send_packet` can resume on the next iteration; we just stop
- // accumulating.
+ // Pre-transfer cap check: if we are already at or over either cap,
+ // the candidate is producing more than we can hold. Treat as an
+ // explicit candidate failure so `advance_probe` can try the next
+ // backend instead of committing a stream with silently-dropped
+ // frames in the middle.
//
// TODO: at very large frame sizes (8K HDR P010, > ~96 MiB each)
// even a single retained frame is significant. Future direction:
@@ -822,11 +830,13 @@ fn drain_into_pending(
bytes = *pending_bytes,
max_frames = MAX_PROBE_PENDING_FRAMES,
max_bytes = max_bytes,
- "hwdecode: probe pending cap reached; discarding drained candidate frame"
+ "hwdecode: probe pending cap reached; failing candidate replay"
);
// SAFETY: hw_buf is owned and valid; unref of an empty frame is a no-op.
unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
- continue;
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
}
let mut cpu = alloc_av_frame()?;
// SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data
@@ -841,14 +851,26 @@ fn drain_into_pending(
return Err(ffmpeg_next::Error::from(r2));
}
}
- // Conservative byte-cap accounting: if we can't size this frame
- // (unknown CPU pix_fmt — should not happen with strict get_format,
- // but a misbehaving codec could surface one), discard rather than
- // queue an unaccounted-for allocation. Never push something whose
- // size we can't deduct from the budget.
+ // Post-transfer accounting: size the frame and confirm we can fit
+ // it without exceeding the byte budget. If sizing fails (unknown
+ // pix_fmt) we still queue the frame — the count cap (16) bounds
+ // memory — but log that byte accounting under-counts.
match cpu_frame_bytes(&cpu) {
Some(bytes) => {
- *pending_bytes = pending_bytes.saturating_add(bytes);
+ let new_total = pending_bytes.saturating_add(bytes);
+ if new_total > max_bytes {
+ tracing::warn!(
+ pending_bytes = *pending_bytes,
+ frame_bytes = bytes,
+ max_bytes,
+ "hwdecode: queueing this frame would exceed byte cap; failing candidate replay"
+ );
+ // cpu drops here.
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ *pending_bytes = new_total;
pending.push_back(cpu);
}
None => {
@@ -856,9 +878,9 @@ fn drain_into_pending(
let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format };
tracing::warn!(
pix_fmt,
- "hwdecode: cannot size unknown CPU pix_fmt during replay; discarding drained frame"
+ "hwdecode: unknown CPU pix_fmt during replay; queueing without byte accounting (count cap still applies)"
);
- // cpu drops here, freeing its buffers via Frame::drop.
+ pending.push_back(cpu);
}
}
}
diff --git a/src/lib.rs b/src/lib.rs
index e6c12ce..b487132 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,17 @@
-//! Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next`.
+//! Cross-platform **hardware** video decoder built on top of `ffmpeg-next`.
//!
//! [`VideoDecoder`] mirrors the surface of `ffmpeg::decoder::Video`
-//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and silently picks the best
-//! hardware backend for the host platform, falling back to software if none open.
+//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and auto-probes the
+//! host's hardware backends (VideoToolbox / VAAPI / NVDEC / D3D11VA).
+//! There is **no software fallback inside this crate** — if no hardware
+//! backend can decode the stream, [`VideoDecoder::open`] returns
+//! [`Error::AllBackendsFailed`] and the caller picks how to fall back
+//! (e.g. by opening an `ffmpeg::decoder::Video` directly).
//!
-//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side. For
-//! hardware backends they are downloaded with `av_hwframe_transfer_data` (NV12
-//! for 8-bit input, P010 for 10-bit). For software backends the frame is in the
-//! codec's native format.
-//!
-//! Pixel-format conversion is intentionally out of scope; downstream code is
-//! expected to handle that (e.g. via `colconv`).
+//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side
+//! and downloaded via `av_hwframe_transfer_data` (NV12 for 8-bit input,
+//! P010 for 10-bit). Pixel-format conversion is intentionally out of
+//! scope; downstream code handles that (e.g. via `colconv`).
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(docsrs, allow(unused_attributes))]
#![deny(missing_docs)]
From d351f1fb1a5539e54bc307067b01dfa9a36db960 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 22:31:58 +1200
Subject: [PATCH 12/27] update
---
src/decoder.rs | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index cbc995b..06b9504 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -202,8 +202,15 @@ impl VideoDecoder {
Ok(state) => {
tracing::info!(?backend, "hwdecode: opened video decoder (probing)");
let remaining = order[(i + 1)..].to_vec();
+ // Deep-copy the caller's `parameters` before storing in ProbeState.
+ // `codec::Parameters` from `stream.parameters()` carries an Rc
+ // owner pointing at the demuxer; moving that Rc to a worker
+ // thread (when VideoDecoder is sent) would race with the demuxer's
+ // Rc on the original thread. `Parameters::clone()` does
+ // `avcodec_parameters_copy` and returns a fully owned Parameters
+ // with `owner: None`, severing the link.
let probe = (!remaining.is_empty()).then(|| ProbeState {
- parameters,
+ parameters: parameters.clone(),
codec,
remaining_backends: remaining,
buffered_packets: Vec::new(),
@@ -852,9 +859,9 @@ fn drain_into_pending(
}
}
// Post-transfer accounting: size the frame and confirm we can fit
- // it without exceeding the byte budget. If sizing fails (unknown
- // pix_fmt) we still queue the frame — the count cap (16) bounds
- // memory — but log that byte accounting under-counts.
+ // it without exceeding the byte budget. Both cap-hit and inability
+ // to size the frame are treated as candidate failures, so the byte
+ // budget is *strict* — we never queue a frame we can't account for.
match cpu_frame_bytes(&cpu) {
Some(bytes) => {
let new_total = pending_bytes.saturating_add(bytes);
@@ -874,13 +881,21 @@ fn drain_into_pending(
pending.push_back(cpu);
}
None => {
+ // Unknown pix_fmt — we cannot bound this frame's contribution
+ // against the byte cap, so up to MAX_PROBE_PENDING_FRAMES of
+ // them could exhaust memory. Fail the candidate so probing
+ // tries the next backend rather than queueing untracked
+ // allocations.
// SAFETY: AVFrame.format is c_int, safe to read.
let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format };
tracing::warn!(
pix_fmt,
- "hwdecode: unknown CPU pix_fmt during replay; queueing without byte accounting (count cap still applies)"
+ "hwdecode: cannot size unknown CPU pix_fmt during replay; failing candidate"
);
- pending.push_back(cpu);
+ // cpu drops here.
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
}
}
}
From 3b888d81b77027bb2650e23234b15f32d55124cf Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 22:52:12 +1200
Subject: [PATCH 13/27] update
---
src/decoder.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 90 insertions(+), 5 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index 06b9504..cb325fd 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -1,10 +1,16 @@
use std::{collections::VecDeque, mem::ManuallyDrop, ptr};
use ffmpeg_next::{
- codec::{self, Context},
+ codec::{
+ self,
+ packet::{Mut as PacketMut, Ref as PacketRef},
+ Context,
+ },
ffi::{
av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
- av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVCodec,
+ av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref,
+ avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_to_context, AVBufferRef,
+ AVCodec,
},
frame, Codec, Packet, Rational,
};
@@ -341,8 +347,23 @@ impl VideoDecoder {
self.probe = None;
self.pending_frames.clear();
} else {
- probe.buffered_packets.push(packet.clone());
- probe.buffered_bytes = new_bytes;
+ // Use the checked clone — ffmpeg-next's `Packet::clone`
+ // discards av_packet_ref's return value and would silently
+ // store an empty packet on ENOMEM, corrupting future replay.
+ match try_clone_packet(packet) {
+ Ok(cloned) => {
+ probe.buffered_packets.push(cloned);
+ probe.buffered_bytes = new_bytes;
+ }
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: packet clone failed for probe history; abandoning fallback safety net"
+ );
+ self.probe = None;
+ self.pending_frames.clear();
+ }
+ }
}
}
return Ok(());
@@ -518,6 +539,14 @@ impl VideoDecoder {
/// on the very first inspection (e.g. a malformed `Parameters`); the
/// per-candidate failures during the loop are absorbed and logged.
fn advance_probe(&mut self) -> Result {
+ // Drop frames previously queued from the backend we're now abandoning.
+ // They came from a candidate that just failed for cause and cannot be
+ // trusted alongside frames we may queue from the next candidate. (If
+ // this method is called repeatedly via chained probe advances, this
+ // also keeps `pending_frames` from accumulating frames from multiple
+ // rejected backends.)
+ self.pending_frames.clear();
+
loop {
// Snapshot inputs without mutating probe state.
let (next_backend, parameters, codec) = match self.probe.as_ref() {
@@ -642,7 +671,10 @@ impl VideoDecoder {
codec: Codec,
backend: Backend,
) -> Result {
- let mut ctx = Context::from_parameters(parameters)?;
+ // Use our checked allocator instead of Context::from_parameters, which
+ // does not null-check avcodec_alloc_context3 and would feed a null
+ // AVCodecContext into FFmpeg under OOM.
+ let mut ctx = build_codec_context(¶meters)?;
let av_type = backend.av_hwdevice_type();
// Verify the codec advertises this hwaccel. We do *not* read the
@@ -771,6 +803,59 @@ fn alloc_av_frame() -> std::result::Result {
Ok(inner)
}
+/// Build a fresh `Context` from `parameters`, checking the underlying
+/// `avcodec_alloc_context3` for NULL before passing it to
+/// `avcodec_parameters_to_context`. ffmpeg-next's `Context::from_parameters`
+/// skips that check and would feed a null pointer into FFmpeg under OOM —
+/// undefined behavior. This helper surfaces the failure as `ENOMEM` and
+/// frees the context if `parameters_to_context` itself errors.
+fn build_codec_context(parameters: &codec::Parameters) -> Result {
+ // SAFETY: avcodec_alloc_context3(NULL) returns a fresh AVCodecContext
+ // or NULL on allocation failure.
+ let ctx_ptr = unsafe { avcodec_alloc_context3(ptr::null()) };
+ if ctx_ptr.is_null() {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ // SAFETY: ctx_ptr is non-null and freshly allocated; parameters.as_ptr()
+ // returns a valid AVCodecParameters pointer; the function copies bytes
+ // out of parameters into the context.
+ let ret = unsafe { avcodec_parameters_to_context(ctx_ptr, parameters.as_ptr()) };
+ if ret < 0 {
+ // SAFETY: ctx_ptr was allocated by us and never handed to anyone else.
+ let mut p = ctx_ptr;
+ unsafe { avcodec_free_context(&mut p) };
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ }
+ // SAFETY: ctx_ptr is valid; passing `owner: None` means our wrapper owns
+ // the allocation and `Context::drop` will run `avcodec_free_context`.
+ Ok(unsafe { Context::wrap(ctx_ptr, None) })
+}
+
+/// Checked counterpart to `Packet::clone()`. ffmpeg-next's `clone_from`
+/// calls `av_packet_ref` and ignores the int return value; on `ENOMEM`
+/// the destination is left empty while the caller assumes the clone
+/// succeeded — corrupting any later replay history. This helper surfaces
+/// the AVERROR.
+fn try_clone_packet(src: &Packet) -> std::result::Result {
+ let mut dst = Packet::empty();
+ // SAFETY: dst is a freshly zero-initialized Packet (av_init_packet inside
+ // Packet::empty); av_packet_ref initializes its data fields from src's
+ // refcounted buffer or returns AVERROR(ENOMEM) on failure.
+ let ret = unsafe { av_packet_ref(dst.as_mut_ptr(), src.as_ptr()) };
+ if ret < 0 {
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ // av_packet_make_writable allocates a writable copy if the buffer is
+ // shared. Can also fail with ENOMEM.
+ let ret = unsafe { av_packet_make_writable(dst.as_mut_ptr()) };
+ if ret < 0 {
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ Ok(dst)
+}
+
/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine
/// distinguishes "drain output and retry" from "stream over").
fn is_eagain(e: &ffmpeg_next::Error) -> bool {
From a95968d1bf780b2dd3d130565deded5370cb94da Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:29:39 +1200
Subject: [PATCH 14/27] update
---
src/decoder.rs | 120 +++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 101 insertions(+), 19 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index cb325fd..aee8366 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -9,7 +9,8 @@ use ffmpeg_next::{
ffi::{
av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref,
- avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_to_context, AVBufferRef,
+ avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_alloc,
+ avcodec_parameters_copy, avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef,
AVCodec,
},
frame, Codec, Packet, Rational,
@@ -204,7 +205,19 @@ impl VideoDecoder {
let mut attempts: Vec<(Backend, Box)> = Vec::new();
for (i, &backend) in order.iter().enumerate() {
- match Self::build_state(parameters.clone(), codec, backend) {
+ // Use the checked clone — ffmpeg-next's `Parameters::clone` does
+ // `avcodec_parameters_alloc` without a null check and ignores the
+ // return of `avcodec_parameters_copy`. Under OOM that path silently
+ // produces a Parameters with a null inner pointer.
+ let cloned_for_build = match try_clone_parameters(¶meters) {
+ Ok(p) => p,
+ Err(e) => {
+ tracing::warn!(?backend, error = %e, "hwdecode: parameters clone failed");
+ attempts.push((backend, Box::new(Error::Ffmpeg(e))));
+ continue;
+ }
+ };
+ match Self::build_state(cloned_for_build, codec, backend) {
Ok(state) => {
tracing::info!(?backend, "hwdecode: opened video decoder (probing)");
let remaining = order[(i + 1)..].to_vec();
@@ -212,17 +225,33 @@ impl VideoDecoder {
// `codec::Parameters` from `stream.parameters()` carries an Rc
// owner pointing at the demuxer; moving that Rc to a worker
// thread (when VideoDecoder is sent) would race with the demuxer's
- // Rc on the original thread. `Parameters::clone()` does
- // `avcodec_parameters_copy` and returns a fully owned Parameters
- // with `owner: None`, severing the link.
- let probe = (!remaining.is_empty()).then(|| ProbeState {
- parameters: parameters.clone(),
- codec,
- remaining_backends: remaining,
- buffered_packets: Vec::new(),
- buffered_bytes: 0,
- eof_sent: false,
- });
+ // Rc on the original thread. The checked clone copies the bytes
+ // into a fresh allocation with `owner: None`, severing the link.
+ //
+ // If the clone fails (ENOMEM), we keep the active `state` but
+ // skip probe setup — caller loses cross-backend fallback safety
+ // net but still gets a working decoder.
+ let probe = if remaining.is_empty() {
+ None
+ } else {
+ match try_clone_parameters(¶meters) {
+ Ok(probe_params) => Some(ProbeState {
+ parameters: probe_params,
+ codec,
+ remaining_backends: remaining,
+ buffered_packets: Vec::new(),
+ buffered_bytes: 0,
+ eof_sent: false,
+ }),
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: parameters clone failed for probe state; proceeding without fallback"
+ );
+ None
+ }
+ }
+ };
return Ok(Self {
state,
hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?,
@@ -548,13 +577,28 @@ impl VideoDecoder {
self.pending_frames.clear();
loop {
- // Snapshot inputs without mutating probe state.
+ // Snapshot inputs without mutating probe state. Use the checked
+ // clone helper rather than `Parameters::clone` (which masks ENOMEM).
let (next_backend, parameters, codec) = match self.probe.as_ref() {
- Some(probe) if !probe.remaining_backends.is_empty() => (
- probe.remaining_backends[0],
- probe.parameters.clone(),
- probe.codec,
- ),
+ Some(probe) if !probe.remaining_backends.is_empty() => {
+ let parameters = match try_clone_parameters(&probe.parameters) {
+ Ok(p) => p,
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: parameters clone failed during probe advance; popping backend and trying next"
+ );
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ continue;
+ }
+ };
+ (probe.remaining_backends[0], parameters, probe.codec)
+ }
_ => return Ok(false),
};
@@ -833,6 +877,44 @@ fn build_codec_context(parameters: &codec::Parameters) -> Result {
Ok(unsafe { Context::wrap(ctx_ptr, None) })
}
+/// Checked deep-clone of `codec::Parameters`. ffmpeg-next's
+/// `Parameters::clone` allocates via `avcodec_parameters_alloc` without
+/// checking for NULL and runs `avcodec_parameters_copy` without checking
+/// the return code. On `ENOMEM` the result is a `Parameters` with a null
+/// inner pointer, which becomes UB when later passed to FFmpeg.
+///
+/// This helper performs both calls explicitly, frees a partial allocation
+/// on failure, and surfaces the AVERROR. The returned `Parameters` has
+/// `owner: None`, severing any Rc link to the caller's demuxer (the
+/// reason we deep-clone in the first place — see Send safety in
+/// `VideoDecoder::open`).
+fn try_clone_parameters(
+ src: &codec::Parameters,
+) -> std::result::Result {
+ // SAFETY: avcodec_parameters_alloc returns a fresh AVCodecParameters
+ // pointer or NULL on allocation failure.
+ let dst_ptr = unsafe { avcodec_parameters_alloc() };
+ if dst_ptr.is_null() {
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ // SAFETY: dst_ptr is non-null and freshly allocated; src.as_ptr() is
+ // a valid AVCodecParameters pointer; the function copies bytes from
+ // src into dst.
+ let ret = unsafe { avcodec_parameters_copy(dst_ptr, src.as_ptr()) };
+ if ret < 0 {
+ // SAFETY: dst_ptr was allocated by us and never handed out.
+ let mut p = dst_ptr;
+ unsafe { avcodec_parameters_free(&mut p) };
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ // SAFETY: dst_ptr is a valid AVCodecParameters; passing `owner: None`
+ // means our wrapper owns the allocation and `Parameters::drop` will
+ // call `avcodec_parameters_free`.
+ Ok(unsafe { codec::Parameters::wrap(dst_ptr, None) })
+}
+
/// Checked counterpart to `Packet::clone()`. ffmpeg-next's `clone_from`
/// calls `av_packet_ref` and ignores the int return value; on `ENOMEM`
/// the destination is left empty while the caller assumes the clone
From ab25046e7e9bf01fabf399e94bb647101131bb1c Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:48:49 +1200
Subject: [PATCH 15/27] update
---
src/decoder.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 61 insertions(+), 4 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index aee8366..c9e99eb 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -832,6 +832,23 @@ fn is_transient(e: &ffmpeg_next::Error) -> bool {
is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof)
}
+/// Reject a `codec::Parameters` whose inner `*mut AVCodecParameters` is
+/// null. This guards the public trust boundary: ffmpeg-next can produce
+/// such a `Parameters` under OOM (`Parameters::new()` does not check
+/// `avcodec_parameters_alloc`), and a safe caller can legally hand one
+/// in. Without this check, the very next `(*p.as_ptr()).field` read
+/// would be a null deref.
+fn ensure_parameters_non_null(parameters: &codec::Parameters) -> Result<()> {
+ // SAFETY: as_ptr() returns the inner *const AVCodecParameters; we just
+ // inspect the pointer value (no deref).
+ if unsafe { parameters.as_ptr() }.is_null() {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ Ok(())
+}
+
/// Allocate a fresh `frame::Video`, checking that `av_frame_alloc` did not
/// return NULL. ffmpeg-next's `frame::Video::empty()` does not surface that
/// failure and the resulting null pointer would be UB on the next field
@@ -854,6 +871,7 @@ fn alloc_av_frame() -> std::result::Result {
/// undefined behavior. This helper surfaces the failure as `ENOMEM` and
/// frees the context if `parameters_to_context` itself errors.
fn build_codec_context(parameters: &codec::Parameters) -> Result {
+ ensure_parameters_non_null(parameters)?;
// SAFETY: avcodec_alloc_context3(NULL) returns a fresh AVCodecContext
// or NULL on allocation failure.
let ctx_ptr = unsafe { avcodec_alloc_context3(ptr::null()) };
@@ -891,6 +909,13 @@ fn build_codec_context(parameters: &codec::Parameters) -> Result {
fn try_clone_parameters(
src: &codec::Parameters,
) -> std::result::Result {
+ // Reject a null inner pointer at the boundary; a deref inside
+ // avcodec_parameters_copy below would otherwise be UB.
+ if unsafe { src.as_ptr() }.is_null() {
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
// SAFETY: avcodec_parameters_alloc returns a fresh AVCodecParameters
// pointer or NULL on allocation failure.
let dst_ptr = unsafe { avcodec_parameters_alloc() };
@@ -949,10 +974,11 @@ fn is_eagain(e: &ffmpeg_next::Error) -> bool {
/// `addr_of!` + `ptr::read` so a value not in our build's discriminant
/// set never invokes UB.
fn find_decoder(parameters: &codec::Parameters) -> Result {
- // SAFETY: parameters owns a valid AVCodecParameters; addr_of! projects
- // to the codec_id field; the *const u32 cast is sound because AVCodecID
- // is `#[repr(u32)]` (same size and alignment as u32). Reading as u32
- // cannot be UB regardless of the value FFmpeg wrote.
+ ensure_parameters_non_null(parameters)?;
+ // SAFETY: parameters' inner pointer is non-null (checked above);
+ // addr_of! projects to the codec_id field; the *const u32 cast is sound
+ // because AVCodecID is `#[repr(u32)]` (same size and alignment as u32).
+ // Reading as u32 cannot be UB regardless of the value FFmpeg wrote.
let raw_id: u32 =
unsafe { ptr::read(ptr::addr_of!((*parameters.as_ptr()).codec_id) as *const u32) };
@@ -1137,4 +1163,35 @@ mod tests {
let other = ffmpeg_next::Error::InvalidData;
assert!(!is_transient(&other));
}
+
+ /// Regression: a `codec::Parameters` with a null inner pointer must be
+ /// rejected at the entrypoint, not deref'd. ffmpeg-next's
+ /// `Parameters::new()` does not check `avcodec_parameters_alloc()`, so a
+ /// safe caller can hand us such a value under OOM.
+ #[test]
+ fn open_rejects_null_parameters() {
+ // SAFETY: Parameters::wrap accepts any pointer; we explicitly construct
+ // one with null inner. avcodec_parameters_free is null-safe on Drop.
+ let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) };
+ match VideoDecoder::open(null_params) {
+ Ok(_) => panic!("open should fail on null parameters"),
+ Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => {
+ assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}");
+ }
+ Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"),
+ }
+ }
+
+ #[test]
+ fn open_with_rejects_null_parameters() {
+ // SAFETY: see open_rejects_null_parameters.
+ let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) };
+ match VideoDecoder::open_with(null_params, Backend::VideoToolbox) {
+ Ok(_) => panic!("open_with should fail on null parameters"),
+ Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => {
+ assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}");
+ }
+ Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"),
+ }
+ }
}
From 36a4729606e825af94973f1229d83d2969f56401 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 00:26:00 +1200
Subject: [PATCH 16/27] update
---
src/decoder.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 52 insertions(+), 5 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index c9e99eb..165f6d8 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -11,7 +11,7 @@ use ffmpeg_next::{
av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref,
avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_alloc,
avcodec_parameters_copy, avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef,
- AVCodec,
+ AVCodec, AVMediaType,
},
frame, Codec, Packet, Rational,
};
@@ -670,8 +670,30 @@ impl VideoDecoder {
}
}
if r.is_ok() && probe.eof_sent {
- if let Err(e) = candidate_state.inner.send_eof() {
- r = Err(e);
+ // `avcodec_send_packet(NULL)` (which `send_eof` becomes) can
+ // return EAGAIN with the same drain-output-first semantics as
+ // a regular send_packet. Loop drain+retry instead of failing
+ // the candidate on backpressure.
+ loop {
+ match candidate_state.inner.send_eof() {
+ Ok(()) => break,
+ Err(e) if is_eagain(&e) => {
+ if let Err(de) = drain_into_pending(
+ &mut candidate_state.inner,
+ &mut hw_buf,
+ &mut local_pending,
+ &mut local_pending_bytes,
+ max_pending_bytes,
+ ) {
+ r = Err(de);
+ break;
+ }
+ }
+ Err(e) => {
+ r = Err(e);
+ break;
+ }
+ }
}
}
r
@@ -778,8 +800,15 @@ impl VideoDecoder {
// Open the decoder. On any failure, release the resources we just
// allocated so we don't leak.
- let opened = match ctx.decoder().open_as(codec).and_then(|o| o.video()) {
- Ok(d) => d,
+ //
+ // We deliberately bypass `Opened::video()` because it calls
+ // `Context::medium()`, which reads `AVCodecContext.codec_type` as the
+ // bindgen `AVMediaType` enum — the same UB hazard we've been
+ // systematically removing. Instead: validate `codec_type` as a raw
+ // `c_int` ourselves, then construct the `decoder::Video` wrapper
+ // directly via its public tuple field.
+ let opened = match ctx.decoder().open_as(codec) {
+ Ok(o) => o,
Err(e) => {
// SAFETY: we either allocated these in this function above or
// they are null; av_buffer_unref / Box::from_raw handle null
@@ -797,6 +826,24 @@ impl VideoDecoder {
}
};
+ // Validate codec_type as a raw integer — never construct AVMediaType
+ // from an unvalidated runtime value.
+ // SAFETY: codec_type is bound as AVMediaType (`#[repr(i32)]`), same
+ // size and alignment as i32; reading the bytes as i32 cannot be UB.
+ let codec_type_int: i32 =
+ unsafe { ptr::read(ptr::addr_of!((*opened.as_ptr()).codec_type) as *const i32) };
+ let video_type_int: i32 = AVMediaType::AVMEDIA_TYPE_VIDEO as i32;
+ if codec_type_int != video_type_int {
+ // Not a video codec context — surface the same error
+ // `Opened::video()` would have, without going through enum
+ // construction. Cleanup runs via `opened`'s Drop.
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::InvalidData));
+ }
+ // SAFETY of construction: `decoder::Video` is `pub struct Video(pub Opened)`.
+ // We construct via the public field; this is the same wrapping
+ // `Opened::video()` does on success, just without the enum read.
+ let opened = ffmpeg_next::decoder::Video(opened);
+
Ok(DecoderState {
inner: ManuallyDrop::new(opened),
backend,
From e27758846eca038a0be9c3c265193ac8d9856ef3 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 01:31:36 +1200
Subject: [PATCH 17/27] update
---
Cargo.toml | 2 +-
README.md | 51 ++++++++++++++++++++++-----------
docs/design.md | 72 +++++++++--------------------------------------
src/decoder.rs | 28 +++++++++---------
tests/hw_smoke.rs | 25 ++++++++++++----
5 files changed, 82 insertions(+), 96 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 7691656..9a3b19a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "hwdecode"
-version = "0.0.0"
+version = "0.1.0"
edition = "2021"
rust-version = "1.95"
description = "Cross-platform hardware-only video decoder built on top of ffmpeg-next, with auto-probe across HW backends. Callers handle software fallback."
diff --git a/README.md b/README.md
index c4e9108..05e7c56 100644
--- a/README.md
+++ b/README.md
@@ -4,27 +4,32 @@ Cross-platform hardware-accelerated video decoder for Rust, built on top of
[`ffmpeg-next`](https://crates.io/crates/ffmpeg-next).
`VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of
-`ffmpeg::decoder::Video` and silently picks the best hardware backend for the
-host platform, falling back to software if none open. Output frames are
-CPU-side — for HW backends they are downloaded with `av_hwframe_transfer_data`
-(NV12 for 8-bit, P010 for 10-bit). Pixel-format conversion is intentionally
-out of scope.
+`ffmpeg::decoder::Video` and auto-probes the host's hardware backends.
+This crate is **hardware-only** — there is no software fallback inside it.
+If no hardware backend can decode the stream, `VideoDecoder::open` returns
+`Error::AllBackendsFailed` and the caller decides how to fall back (typically
+by opening an `ffmpeg::decoder::Video` directly). Output frames are CPU-side,
+downloaded with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit).
+Pixel-format conversion is intentionally out of scope.
## Backends
-| Target | Probe order |
+| Target | Probe order (HW only) |
| ------------------- | --------------------------------- |
-| macOS / iOS / tvOS | VideoToolbox → Software |
-| Linux | VAAPI → CUDA → Software |
-| Windows | D3D11VA → CUDA → Software |
-| other | Software |
+| macOS / iOS / tvOS | VideoToolbox |
+| Linux | VAAPI → CUDA |
+| Windows | D3D11VA → CUDA |
+| other | (none) |
+
+If `open` returns `Error::AllBackendsFailed`, software fallback is the
+caller's responsibility (this crate intentionally does not include one).
## Usage
-```rust
+```rust,no_run
use ffmpeg_next as ffmpeg;
-use ffmpeg::{format, frame, media};
-use hwdecode::VideoDecoder;
+use ffmpeg::{format, media};
+use hwdecode::{Frame, VideoDecoder};
ffmpeg::init()?;
@@ -32,15 +37,29 @@ let mut input = format::input(path)?;
let stream = input.streams().best(media::Type::Video).unwrap();
let stream_index = stream.index();
-let mut decoder = VideoDecoder::open(stream.parameters())?;
+// HW-only open. On AllBackendsFailed, fall back to software yourself.
+let mut decoder = match VideoDecoder::open(stream.parameters()) {
+ Ok(d) => d,
+ Err(hwdecode::Error::AllBackendsFailed { .. }) => {
+ // Caller-side software fallback.
+ let _sw = ffmpeg::codec::Context::from_parameters(stream.parameters())?
+ .decoder()
+ .video()?;
+ // ... drive _sw with send_packet / receive_frame yourself ...
+ return Ok(());
+ }
+ Err(e) => return Err(e.into()),
+};
println!("backend = {:?}", decoder.backend());
-let mut frame = frame::Video::empty();
+let mut frame = Frame::empty()?;
for (s, packet) in input.packets() {
if s.index() != stream_index { continue; }
decoder.send_packet(&packet)?;
while decoder.receive_frame(&mut frame).is_ok() {
- // frame.format() is NV12 / P010 (HW path) or codec-native (SW path)
+ // frame.pix_fmt() is the integer constant — match against
+ // hwdecode::pix_fmt::{NV12, P010LE, ...} and dispatch to your
+ // pixel-format pipeline (e.g. `colconv`).
// ... do something with frame ...
}
}
diff --git a/docs/design.md b/docs/design.md
index 2c54aee..521dd49 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -26,72 +26,26 @@ Cross-platform **hardware-only** video decoder built on top of `ffmpeg-next` 8.1
## Public API
-```rust
-pub struct VideoDecoder { /* private */ }
-
-impl VideoDecoder {
- /// Auto-probe HW backends in platform order. Returns
- /// `Error::AllBackendsFailed` if no backend can decode this stream;
- /// caller falls back to software decoder of choice. On success,
- /// `backend()` reports the one that won.
- pub fn open(parameters: ffmpeg::codec::Parameters) -> Result;
-
- /// Force a specific backend. No probe, no fallback.
- pub fn open_with(parameters: ffmpeg::codec::Parameters, backend: Backend) -> Result;
-
- pub fn backend(&self) -> Backend;
- pub fn width(&self) -> u32;
- pub fn height(&self) -> u32;
- pub fn format(&self) -> ffmpeg::format::Pixel;
- pub fn time_base(&self) -> ffmpeg::Rational;
- pub fn frame_rate(&self) -> ffmpeg::Rational;
-
- pub fn send_packet(&mut self, packet: &ffmpeg::Packet) -> Result<(), Error>;
- pub fn send_eof(&mut self) -> Result<(), Error>;
-
- /// Receive a CPU-side frame. Internally calls
- /// `av_hwframe_transfer_data` and copies PTS/timing onto the result;
- /// output format is NV12 (8-bit) or P010 (10-bit) per the HW backend's
- /// `AVHWFramesContext::sw_format`.
- pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<(), Error>;
-
- pub fn flush(&mut self);
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum Backend {
- Software,
- VideoToolbox, // macOS, iOS, iPadOS, tvOS
- Vaapi, // Linux (Intel/AMD)
- Cuda, // Linux/Windows (NVIDIA)
- D3d11va, // Windows
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum Error {
- #[error("ffmpeg error: {0}")]
- Ffmpeg(#[from] ffmpeg::Error),
- #[error("no decoder for codec id {0:?}")]
- NoCodec(ffmpeg::codec::Id),
- #[error("hardware device init failed for {backend:?}: {source}")]
- HwDeviceInitFailed { backend: Backend, source: ffmpeg::Error },
- #[error("all backends failed; attempts: {attempts:?}")]
- AllBackendsFailed { attempts: Vec<(Backend, ffmpeg::Error)> },
-}
-```
+> The original spec listed an inline API surface here. It diverged from the
+> shipping crate (`Backend::Software` was removed; `format() -> Pixel` was
+> removed in favor of `Frame::pix_fmt() -> i32`; the `Frame` wrapper
+> replaced `frame::Video`; `Error` gained / dropped variants). Rather than
+> keep stale signatures here, the canonical reference is `src/lib.rs` and
+> the public docs on each item. See the README for a runnable usage
+> example.
## Behavior
### Probe order
-| Target | Order tried |
+| Target | Order tried (HW only) |
| ------------------- | -------------------------------------------- |
-| macOS, iOS, tvOS | `[VideoToolbox, Software]` |
-| Linux | `[Vaapi, Cuda, Software]` |
-| Windows | `[D3d11va, Cuda, Software]` |
-| Other | `[Software]` |
+| macOS, iOS, tvOS | `[VideoToolbox]` |
+| Linux | `[Vaapi, Cuda]` |
+| Windows | `[D3d11va, Cuda]` |
+| Other | `[]` → `Error::AllBackendsFailed` |
-A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next.
+A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next. If every backend fails (or the platform has none), `open` returns `Error::AllBackendsFailed`; software fallback is the caller's responsibility.
### Device selection
diff --git a/src/decoder.rs b/src/decoder.rs
index 165f6d8..e146a18 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -8,10 +8,9 @@ use ffmpeg_next::{
},
ffi::{
av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
- av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref,
- avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_alloc,
- avcodec_parameters_copy, avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef,
- AVCodec, AVMediaType,
+ av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3,
+ avcodec_free_context, avcodec_parameters_alloc, avcodec_parameters_copy,
+ avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, AVCodec, AVMediaType,
},
frame, Codec, Packet, Rational,
};
@@ -114,10 +113,12 @@ const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024;
/// during probe replay. Each frame is a fully-allocated CPU buffer
/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so
/// an unbounded queue would OOM on a candidate with a shallow internal
-/// queue against a deep replay history. Drained candidate frames in
-/// excess of this cap (or [`DEFAULT_MAX_PROBE_PENDING_BYTES`], whichever
-/// hits first) are discarded with a `tracing::warn!`; we still drain so
-/// `send_packet` can keep feeding the candidate.
+/// queue against a deep replay history. This cap, together with
+/// [`DEFAULT_MAX_PROBE_PENDING_BYTES`], is enforced as a hard limit during
+/// replay: once either limit is reached, probe buffering fails for the
+/// candidate (returns `ENOMEM` from `drain_into_pending`) instead of
+/// queueing additional drained frames. The probe loop then advances to
+/// the next backend or returns `Error::AllBackendsFailed` if exhausted.
const MAX_PROBE_PENDING_FRAMES: usize = 16;
/// Default byte budget for probe-replay drained frames. 256 MiB is enough
@@ -991,7 +992,10 @@ fn try_clone_parameters(
/// calls `av_packet_ref` and ignores the int return value; on `ENOMEM`
/// the destination is left empty while the caller assumes the clone
/// succeeded — corrupting any later replay history. This helper surfaces
-/// the AVERROR.
+/// the AVERROR. The result is a refcounted shallow clone — the payload
+/// buffer is shared with `src` rather than deep-copied; the probe replay
+/// only sends packets through `avcodec_send_packet`, which does not
+/// require a writable buffer.
fn try_clone_packet(src: &Packet) -> std::result::Result {
let mut dst = Packet::empty();
// SAFETY: dst is a freshly zero-initialized Packet (av_init_packet inside
@@ -1001,12 +1005,6 @@ fn try_clone_packet(src: &Packet) -> std::result::Result
Date: Mon, 27 Apr 2026 10:58:42 +1200
Subject: [PATCH 18/27] update
---
README.md | 16 ++-
src/decoder.rs | 177 +++++++++++++++++-------
src/frame.rs | 362 +++++++++++++++++++++++++++++++++++++++++--------
src/lib.rs | 10 +-
4 files changed, 456 insertions(+), 109 deletions(-)
diff --git a/README.md b/README.md
index 05e7c56..3da5fba 100644
--- a/README.md
+++ b/README.md
@@ -6,11 +6,17 @@ Cross-platform hardware-accelerated video decoder for Rust, built on top of
`VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of
`ffmpeg::decoder::Video` and auto-probes the host's hardware backends.
This crate is **hardware-only** — there is no software fallback inside it.
-If no hardware backend can decode the stream, `VideoDecoder::open` returns
-`Error::AllBackendsFailed` and the caller decides how to fall back (typically
-by opening an `ffmpeg::decoder::Video` directly). Output frames are CPU-side,
-downloaded with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit).
-Pixel-format conversion is intentionally out of scope.
+If no hardware backend can decode the stream, `Error::AllBackendsFailed`
+surfaces from `VideoDecoder::open` (when no backend opens) or from
+`receive_frame` / `send_packet` / `send_eof` (when the initially-opened
+backend fails at decode time and every remaining backend in the probe order
+also fails — the only way it surfaces on single-backend platforms like macOS).
+The caller decides how to fall back (typically by opening an
+`ffmpeg::decoder::Video` directly). Output frames are CPU-side, downloaded
+with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit). Pixel-
+format conversion is intentionally out of scope; safe per-row access is via
+`Frame::row` / `Frame::rows` (clipped to visible byte width — never includes
+FFmpeg's per-row alignment padding).
## Backends
diff --git a/src/decoder.rs b/src/decoder.rs
index e146a18..2fe4f08 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -142,7 +142,9 @@ struct ProbeState {
parameters: codec::Parameters,
codec: Codec,
/// Backends still to try, in order. Empty means "no more options after
- /// the active one fails".
+ /// the active one fails" — `advance_probe` then surfaces
+ /// [`Error::AllBackendsFailed`] so the contract is the same on
+ /// single-backend platforms (e.g. macOS) as on multi-backend ones.
remaining_backends: Vec,
/// Packets sent so far, kept for replay through any candidate backend.
/// Preserved across failed candidates — only cleared when the probe
@@ -154,6 +156,12 @@ struct ProbeState {
buffered_bytes: usize,
/// Whether `send_eof` has been called; replayed alongside packets.
eof_sent: bool,
+ /// Per-backend errors captured since the probe window opened. Pushed
+ /// whenever a backend's failure triggers `advance_probe` (the active
+ /// backend that just failed) or a candidate's build / replay rejects
+ /// it. Drained into [`Error::AllBackendsFailed`] when the probe
+ /// exhausts every option.
+ attempts: Vec<(Backend, Box)>,
}
// SAFETY: All raw pointers are exclusively owned by `DecoderState` and never
@@ -197,9 +205,21 @@ impl VideoDecoder {
/// consumed, so a misbehaving middle backend cannot strand the caller.
///
/// [`Self::backend`] reflects whichever backend ultimately produced the
- /// first frame. If no hardware backend in the platform's probe order can
- /// decode this stream, `open` returns [`Error::AllBackendsFailed`];
- /// callers handle software fallback themselves.
+ /// first frame.
+ ///
+ /// [`Error::AllBackendsFailed`] surfaces in two places, with the same
+ /// meaning ("no hardware backend can decode this stream — fall back to
+ /// software yourself"):
+ /// - From `open` itself, when no backend even opens.
+ /// - From [`Self::send_packet`] / [`Self::send_eof`] /
+ /// [`Self::receive_frame`], when the initially-opened backend fails
+ /// at decode time and every remaining backend in the probe order
+ /// either also fails or doesn't exist. On single-backend platforms
+ /// (e.g. macOS, where the order is `[VideoToolbox]`), this is the
+ /// only place a HW-only failure surfaces.
+ ///
+ /// In both cases, `attempts` carries the per-backend error log so the
+ /// caller can decide how to proceed with software fallback.
pub fn open(parameters: codec::Parameters) -> Result {
let codec = find_decoder(¶meters)?;
let order = backend::probe_order();
@@ -229,28 +249,33 @@ impl VideoDecoder {
// Rc on the original thread. The checked clone copies the bytes
// into a fresh allocation with `owner: None`, severing the link.
//
+ // We always create ProbeState — even when `remaining` is empty
+ // (single-backend platforms like macOS) — so that a first-frame
+ // failure on the only backend surfaces as
+ // `Error::AllBackendsFailed` from `receive_frame` /
+ // `send_packet` rather than as a raw FFmpeg error. That keeps
+ // the API contract the same regardless of how many HW backends
+ // the platform exposes.
+ //
// If the clone fails (ENOMEM), we keep the active `state` but
- // skip probe setup — caller loses cross-backend fallback safety
- // net but still gets a working decoder.
- let probe = if remaining.is_empty() {
- None
- } else {
- match try_clone_parameters(¶meters) {
- Ok(probe_params) => Some(ProbeState {
- parameters: probe_params,
- codec,
- remaining_backends: remaining,
- buffered_packets: Vec::new(),
- buffered_bytes: 0,
- eof_sent: false,
- }),
- Err(e) => {
- tracing::warn!(
- error = %e,
- "hwdecode: parameters clone failed for probe state; proceeding without fallback"
- );
- None
- }
+ // skip probe setup — caller loses the transactional probe /
+ // fallback safety net but still gets a working decoder.
+ let probe = match try_clone_parameters(¶meters) {
+ Ok(probe_params) => Some(ProbeState {
+ parameters: probe_params,
+ codec,
+ remaining_backends: remaining,
+ buffered_packets: Vec::new(),
+ buffered_bytes: 0,
+ eof_sent: false,
+ attempts: Vec::new(),
+ }),
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: parameters clone failed for probe state; proceeding without fallback"
+ );
+ None
}
};
return Ok(Self {
@@ -403,7 +428,10 @@ impl VideoDecoder {
return Err(Error::Ffmpeg(e));
}
Err(e) => {
- if self.probe.is_some() && self.advance_probe()? {
+ if self.probe.is_some() {
+ // advance_probe consumes the error into `attempts` and either
+ // installs a candidate (Ok) or surfaces AllBackendsFailed (Err).
+ self.advance_probe(Error::Ffmpeg(e))?;
continue;
}
return Err(Error::Ffmpeg(e));
@@ -428,7 +456,8 @@ impl VideoDecoder {
}
Err(e) if is_transient(&e) => return Err(Error::Ffmpeg(e)),
Err(e) => {
- if self.probe.is_some() && self.advance_probe()? {
+ if self.probe.is_some() {
+ self.advance_probe(Error::Ffmpeg(e))?;
continue;
}
return Err(Error::Ffmpeg(e));
@@ -453,8 +482,11 @@ impl VideoDecoder {
/// method, so the caller never loses initial frames after a fallback.
///
/// This crate is hardware-only: there is no software fallback inside the
- /// decoder. If every backend is exhausted, the failure surfaces as the
- /// last decoder error. Callers handle software fallback themselves.
+ /// decoder. When every backend in the probe order has been exhausted —
+ /// including the case of a single-backend platform whose only backend
+ /// failed — this returns [`Error::AllBackendsFailed`] with the per-
+ /// backend attempt log so the caller can branch into a software
+ /// decoder of their choice.
///
/// Returns the same transient signals as `ffmpeg::decoder::Video`:
/// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and
@@ -478,8 +510,11 @@ impl VideoDecoder {
// EOF (and every other non-transient error): if we are still
// probing, treat it as candidate failure — a backend that drains
// to EOF without ever producing a frame should not silently
- // present as "stream over" to the caller. Advance and retry.
- if self.probe.is_some() && self.advance_probe()? {
+ // present as "stream over" to the caller. Advance and retry; if
+ // every backend has been exhausted, advance_probe surfaces
+ // AllBackendsFailed and `?` propagates it.
+ if self.probe.is_some() {
+ self.advance_probe(Error::Ffmpeg(e))?;
// Probe advance may have populated `pending_frames`; deliver
// one of those before reading more from the new candidate.
if self.try_pop_pending(frame) {
@@ -487,7 +522,7 @@ impl VideoDecoder {
}
continue;
}
- // Probe collapsed or exhausted — surface the error (including EOF
+ // Probe collapsed already — surface the error (including EOF
// for a genuinely empty stream).
return Err(Error::Ffmpeg(e));
}
@@ -504,7 +539,8 @@ impl VideoDecoder {
return Ok(());
}
Err(e) => {
- if self.probe.is_some() && self.advance_probe()? {
+ if self.probe.is_some() {
+ self.advance_probe(Error::Ffmpeg(e))?;
unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) };
if self.try_pop_pending(frame) {
return Ok(());
@@ -561,14 +597,39 @@ impl VideoDecoder {
/// Try the next backend in `remaining_backends`. Transactional: a
/// candidate must successfully build and accept the replayed history
/// before any probe state is consumed. Backends that fail to build or
- /// reject the replay are skipped (with `tracing::warn!`) and the loop
- /// continues to the next one. Returns:
- /// - `Ok(true)` when a candidate is installed and replay completed.
- /// - `Ok(false)` when the probe is exhausted (no more backends to try).
- /// - `Err(_)` only for genuinely fatal conditions surfaced by `build_state`
- /// on the very first inspection (e.g. a malformed `Parameters`); the
- /// per-candidate failures during the loop are absorbed and logged.
- fn advance_probe(&mut self) -> Result {
+ /// reject the replay are recorded into `probe.attempts` and the loop
+ /// continues to the next one.
+ ///
+ /// `last_error` is the error that triggered this advance — i.e. the
+ /// failure of the currently active backend on `send_packet` /
+ /// `send_eof` / `receive_frame`. It is recorded against the active
+ /// backend before any candidate is tried so that a final
+ /// `AllBackendsFailed` carries the full attempt log including the
+ /// initially-opened backend's runtime failure.
+ ///
+ /// Returns:
+ /// - `Ok(())` when a candidate is installed and replay completed —
+ /// caller should retry the operation.
+ /// - `Err(Error::AllBackendsFailed { attempts })` when every remaining
+ /// backend has been exhausted (including the just-failed active one).
+ /// This is what the documented `open` contract promises, surfaced at
+ /// runtime so the caller can branch into a software fallback. On a
+ /// single-backend platform (e.g. macOS), this fires after the only
+ /// backend's first-frame failure; on multi-backend platforms it
+ /// fires after the last candidate's failure.
+ /// - `Err(_)` for other fatal conditions surfaced by probe machinery
+ /// itself (e.g. `alloc_av_frame` ENOMEM during replay drain).
+ fn advance_probe(&mut self, last_error: Error) -> Result<()> {
+ // Record the failure that triggered this advance against the active
+ // backend. If the probe was somehow already gone (shouldn't happen —
+ // call sites guard with `self.probe.is_some()`), just propagate the
+ // error so behaviour matches the pre-fix code path.
+ let active_backend = self.state.backend;
+ match self.probe.as_mut() {
+ Some(probe) => probe.attempts.push((active_backend, Box::new(last_error))),
+ None => return Err(last_error),
+ }
+
// Drop frames previously queued from the backend we're now abandoning.
// They came from a candidate that just failed for cause and cannot be
// trusted alongside frames we may queue from the next candidate. (If
@@ -589,25 +650,37 @@ impl VideoDecoder {
error = %e,
"hwdecode: parameters clone failed during probe advance; popping backend and trying next"
);
- self
+ let popped = self
.probe
.as_mut()
.expect("probe state present")
.remaining_backends
.remove(0);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .attempts
+ .push((popped, Box::new(Error::Ffmpeg(e))));
continue;
}
};
(probe.remaining_backends[0], parameters, probe.codec)
}
- _ => return Ok(false),
+ // No more candidates — surface the accumulated attempt log as
+ // AllBackendsFailed so single- and multi-backend platforms have
+ // the same contract for "every HW backend failed."
+ _ => {
+ let attempts = self.probe.take().map(|p| p.attempts).unwrap_or_default();
+ return Err(Error::AllBackendsFailed { attempts });
+ }
};
let prev_backend = self.state.backend;
tracing::warn!(from = ?prev_backend, to = ?next_backend, "hwdecode: advancing probe");
- // Build candidate. On failure, pop and continue without touching the
- // packet buffer.
+ // Build candidate. On failure, record into attempts and continue
+ // without touching the packet buffer.
let mut candidate_state = match Self::build_state(parameters, codec, next_backend) {
Ok(s) => s,
Err(e) => {
@@ -618,6 +691,12 @@ impl VideoDecoder {
.expect("probe state present")
.remaining_backends
.remove(0);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .attempts
+ .push((next_backend, Box::new(e)));
continue;
}
};
@@ -713,6 +792,12 @@ impl VideoDecoder {
.expect("probe state present")
.remaining_backends
.remove(0);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .attempts
+ .push((next_backend, Box::new(Error::Ffmpeg(e))));
continue;
}
@@ -727,7 +812,7 @@ impl VideoDecoder {
.expect("probe state present")
.remaining_backends
.remove(0);
- return Ok(true);
+ return Ok(());
}
}
diff --git a/src/frame.rs b/src/frame.rs
index 65ec63d..3f48075 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -8,11 +8,24 @@
//! when the value isn't in the build's bindgen-generated discriminant set
//! (the exact failure mode this crate is designed to survive).
//!
-//! Plane lengths for [`Frame::data`] are computed from a hardcoded chroma-
-//! subsampling table keyed on the safe `pix_fmt()` integer, covering only
-//! the formats `hwdecode` produces (the NV* and P0xx/P2xx/P4xx families
-//! after `av_hwframe_transfer_data`). For any other format, [`Frame::data`]
-//! returns `None` rather than guessing at a slice length.
+//! Per-row sizes for [`Frame::row`] / [`Frame::rows`] are computed from
+//! hardcoded chroma-subsampling and bit-depth tables keyed on the safe
+//! `pix_fmt()` integer, covering only the formats `hwdecode` produces (the
+//! NV* and P0xx/P2xx/P4xx families after `av_hwframe_transfer_data`). For
+//! any other format, the row accessors return `None` rather than guessing
+//! at a slice length.
+//!
+//! Why per-row, not whole-plane: FFmpeg allocates each row at
+//! `linesize[plane]` ([`Frame::stride`]) bytes for SIMD alignment, but
+//! hardware transfer paths only initialize the first
+//! [`Frame::row_bytes`]`(plane)` of every row. Exposing a stride-inclusive
+//! `&[u8]` over an entire plane would let safe code observe those
+//! uninitialized padding bytes, which violates `slice::from_raw_parts`.
+//! Per-row slices are tightly clipped to the visible byte width so the
+//! safe API never hands out an uninitialized byte. Callers that need a
+//! single base pointer (e.g. SIMD pixel converters keyed off stride) can
+//! reach for [`Frame::as_ptr`] and consume `stride * plane_h` bytes
+//! themselves under their own `unsafe` contract.
//!
//! Compare formats against integer constants in [`crate::pix_fmt`].
@@ -114,59 +127,138 @@ impl Frame {
linesize as usize
}
- /// Pixel data for `plane`.
+ /// Visible byte width of `plane` — the number of initialized bytes at
+ /// the start of every row in that plane.
+ ///
+ /// Distinct from [`Self::stride`], which returns the FFmpeg `linesize`.
+ /// `linesize` is `>= row_bytes` and may include trailing alignment
+ /// padding bytes that FFmpeg's hardware transfer paths do not
+ /// initialize. `row_bytes` is what `slice::from_raw_parts` can safely
+ /// see.
+ ///
+ /// Returns `None` when the format is not in the supported HW-output set
+ /// (see crate `pix_fmt`) or the plane is out of range.
+ pub fn row_bytes(&self, plane: usize) -> Option {
+ if plane >= self.planes() {
+ return None;
+ }
+ plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize)
+ }
+
+ /// Pixel data for one row of `plane`, tightly clipped to the visible
+ /// byte width ([`Self::row_bytes`]).
+ ///
+ /// Excludes the trailing alignment padding that [`Self::stride`]
+ /// includes — those bytes are not guaranteed to be initialized by
+ /// FFmpeg's hardware transfer paths and must not be exposed through a
+ /// safe `&[u8]`.
///
/// Returns `None` for any of the following — never panics:
- /// - The frame's pixel format is not one of the hardware-output formats
- /// listed in [`crate::pix_fmt`] (we cannot safely compute the plane
- /// size for an unknown layout).
+ /// - The frame's pixel format is not one of the supported hardware-
+ /// output formats listed in [`crate::pix_fmt`].
/// - The plane index is out of range.
- /// - `AVFrame.linesize[plane]` is `<= 0` (negative linesize signals
- /// vertically-flipped FFmpeg layouts which we do not surface; zero is
- /// "no plane").
- /// - `AVFrame.height` is `<= 0`.
- /// - The computed slice length would overflow or exceed `isize::MAX`
- /// (a precondition of [`std::slice::from_raw_parts`]).
+ /// - `y` is past the plane's row count.
+ /// - `AVFrame.linesize[plane]` is `<= 0` or `AVFrame.height` is `<= 0`.
/// - The plane's data pointer is null.
+ /// - The plane size would overflow `isize::MAX`.
+ pub fn row(&self, plane: usize, y: usize) -> Option<&[u8]> {
+ let info = self.plane_info(plane)?;
+ if y >= info.plane_h {
+ return None;
+ }
+ // y < plane_h and plane_h * stride ≤ isize::MAX (verified in plane_info),
+ // so y * stride is bounded by (plane_h - 1) * stride ≤ isize::MAX.
+ let offset = y * info.stride;
+ // SAFETY:
+ // - `info.plane_ptr` is non-null (verified in plane_info).
+ // - `offset + row_bytes ≤ plane_h * stride`, which is the size of the
+ // FFmpeg allocation for this plane.
+ // - Bytes 0..row_bytes of every row are written by FFmpeg's HW
+ // transfer; the slice is fully initialized.
+ // - `row_bytes ≤ stride ≤ isize::MAX` per plane_info.
+ unsafe {
+ let row_ptr = info.plane_ptr.add(offset);
+ Some(slice::from_raw_parts(row_ptr, info.row_bytes))
+ }
+ }
+
+ /// Iterator over every row of `plane`. Each yielded slice has length
+ /// [`Self::row_bytes`]`(plane)` — never includes the trailing alignment
+ /// padding that lives within [`Self::stride`].
+ ///
+ /// Returns `None` under the same conditions as [`Self::row`].
+ pub fn rows(&self, plane: usize) -> Option + '_> {
+ let info = self.plane_info(plane)?;
+ Some((0..info.plane_h).map(move |y| {
+ // Same bounds argument as `row()`.
+ let offset = y * info.stride;
+ // SAFETY: see `row()` — the same invariants hold here, and the
+ // iterator's lifetime is tied to `&self` so the pointer remains
+ // valid for every yielded slice.
+ unsafe { slice::from_raw_parts(info.plane_ptr.add(offset), info.row_bytes) }
+ }))
+ }
+
+ /// Raw base pointer to `plane`'s allocation, or `None` if the plane is
+ /// out of range or its data pointer is null.
+ ///
+ /// The returned pointer is valid for `stride(plane) * plane_height`
+ /// bytes, **but only the first [`Self::row_bytes`]`(plane)` bytes of
+ /// each row are guaranteed to be initialized.** The trailing per-row
+ /// alignment padding is uninitialized; callers performing wide SIMD
+ /// loads that read past `row_bytes` must mask the result and never
+ /// surface those bytes through a safe `&[u8]`.
///
- /// Currently supported (post-`av_hwframe_transfer_data`):
- /// - 4:2:0 semi-planar 8-bit: `NV12`, `NV21`
- /// - 4:2:2 semi-planar 8-bit: `NV16`
- /// - 4:4:4 semi-planar 8-bit: `NV24`
- /// - 4:2:0 semi-planar 10/12/16-bit: `P010LE`/`P010BE`/`P012LE`/`P016LE`
- /// - 4:2:2 semi-planar 10/12/16-bit: `P210LE`/`P212LE`/`P216LE`
- /// - 4:4:4 semi-planar 10/12/16-bit: `P410LE`/`P412LE`/`P416LE`
- pub fn data(&self, plane: usize) -> Option<&[u8]> {
+ /// This accessor exists for downstream pixel-format converters
+ /// (`colconv`) that work in `(ptr, stride, width, height)` quadruples;
+ /// safe code should prefer [`Self::row`] / [`Self::rows`].
+ pub fn as_ptr(&self, plane: usize) -> Option<*const u8> {
if plane >= self.planes() {
return None;
}
+ // SAFETY: plane index bounds-checked; AVFrame.data is `[*mut u8; 8]`.
+ let p = unsafe { (*self.inner.as_ptr()).data[plane] };
+ if p.is_null() {
+ None
+ } else {
+ Some(p)
+ }
+ }
- // SAFETY: bounds-checked plane index; `linesize` and `height` are
- // primitive c_int reads that cannot themselves be UB.
- let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] };
- let height_int: i32 = unsafe { (*self.inner.as_ptr()).height };
- if linesize <= 0 || height_int <= 0 {
+ /// Read every per-plane field needed by the row accessors with the
+ /// safety preconditions enforced once.
+ fn plane_info(&self, plane: usize) -> Option {
+ if plane >= self.planes() {
return None;
}
- let stride = linesize as usize;
-
- let plane_height = plane_height_for(self.pix_fmt(), plane, height_int as usize)?;
- let len = stride.checked_mul(plane_height)?;
- if len > isize::MAX as usize {
+ // SAFETY: bounds-checked plane index; linesize/height/data are raw
+ // c_int / pointer reads that cannot themselves be UB.
+ let (stride_int, height_int, plane_ptr) = unsafe {
+ let raw = self.inner.as_ptr();
+ ((*raw).linesize[plane], (*raw).height, (*raw).data[plane])
+ };
+ if stride_int <= 0 || height_int <= 0 || plane_ptr.is_null() {
return None;
}
-
- // SAFETY: linesize > 0 and height > 0 verified; len <= isize::MAX
- // verified — both preconditions of `slice::from_raw_parts`. We trust
- // FFmpeg to populate `data[plane]` validly when linesize[plane] is
- // non-zero; the null check is a final defensive guard.
- unsafe {
- let ptr = (*self.inner.as_ptr()).data[plane];
- if ptr.is_null() {
- return None;
- }
- Some(slice::from_raw_parts(ptr, len))
+ let stride = stride_int as usize;
+ let plane_h = plane_height_for(self.pix_fmt(), plane, height_int as usize)?;
+ let row_bytes = plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize)?;
+ if row_bytes > stride {
+ return None;
}
+ // Bound the entire plane allocation to isize::MAX so any byte offset
+ // computed as `y * stride` (y < plane_h) stays representable, satisfying
+ // the safety contract of `pointer::add` and `slice::from_raw_parts`.
+ let plane_size = stride.checked_mul(plane_h)?;
+ if plane_size > isize::MAX as usize {
+ return None;
+ }
+ Some(PlaneInfo {
+ plane_ptr,
+ stride,
+ plane_h,
+ row_bytes,
+ })
}
/// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code.
@@ -175,10 +267,68 @@ impl Frame {
}
}
+#[derive(Clone, Copy)]
+struct PlaneInfo {
+ plane_ptr: *const u8,
+ stride: usize,
+ plane_h: usize,
+ row_bytes: usize,
+}
+
// `Default` intentionally omitted: constructing a frame can fail (OOM
// in `av_frame_alloc`), and a panicking `default()` would defeat the
// safety stance of [`Frame::empty`]. Use `Frame::empty()?` directly.
+/// Visible byte width of `plane`'s rows for a frame of `frame_width` and
+/// the given pixel format. `None` for formats not in the supported HW-
+/// output set.
+///
+/// Distinct from `linesize` (FFmpeg's per-row stride, which may include
+/// alignment padding). HW transfer paths only initialize bytes
+/// `0..plane_row_bytes_for(...)` of each row; everything from there to
+/// `stride` is uninitialized padding and must not be exposed via
+/// `slice::from_raw_parts`.
+fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Option {
+ match pix_fmt_int {
+ // 8-bit semi-planar: Y at full width (1 byte/sample), UV interleaved
+ // at horizontally-subsampled chroma (4:2:0 / 4:2:2) with 2 bytes per
+ // chroma pair → both planes have row width == frame_width.
+ pix_fmt::NV12 | pix_fmt::NV21 | pix_fmt::NV16 => match plane {
+ 0 | 1 => Some(frame_width),
+ _ => None,
+ },
+ // 8-bit 4:4:4 semi-planar: chroma at full horizontal resolution,
+ // 2 bytes per pixel (1 byte U + 1 byte V).
+ pix_fmt::NV24 => match plane {
+ 0 => Some(frame_width),
+ 1 => Some(frame_width.checked_mul(2)?),
+ _ => None,
+ },
+ // 10/12/16-bit semi-planar 4:2:0 / 4:2:2: Y is 2 bytes/sample
+ // (high-bit-depth packed in 16-bit). UV interleaved at horizontally-
+ // subsampled chroma with 4 bytes per chroma pair (2 bytes U + 2 bytes
+ // V) → both planes have row width == 2 * frame_width.
+ pix_fmt::P010LE
+ | pix_fmt::P010BE
+ | pix_fmt::P012LE
+ | pix_fmt::P016LE
+ | pix_fmt::P210LE
+ | pix_fmt::P212LE
+ | pix_fmt::P216LE => match plane {
+ 0 | 1 => Some(frame_width.checked_mul(2)?),
+ _ => None,
+ },
+ // 10/12/16-bit 4:4:4 semi-planar: Y is 2 bytes/sample; UV at full
+ // horizontal resolution with 4 bytes per pixel (2 bytes U + 2 bytes V).
+ pix_fmt::P410LE | pix_fmt::P412LE | pix_fmt::P416LE => match plane {
+ 0 => Some(frame_width.checked_mul(2)?),
+ 1 => Some(frame_width.checked_mul(4)?),
+ _ => None,
+ },
+ _ => None,
+ }
+}
+
/// Number of rows in `plane` for a frame of `frame_height` and the given
/// pixel format. `None` for formats not in the supported HW-output set.
///
@@ -235,18 +385,20 @@ mod tests {
}
#[test]
- fn data_returns_none_for_unknown_format() {
+ fn row_returns_none_for_unknown_format() {
let f = Frame::empty().expect("alloc");
// pix_fmt is NONE (-1), not in the supported set.
- assert!(f.data(0).is_none());
+ assert!(f.row(0, 0).is_none());
+ assert!(f.rows(0).is_none());
+ assert!(f.row_bytes(0).is_none());
}
/// Synthesize a frame with a negative linesize (FFmpeg's vertical-flip
- /// convention) and assert `data()` refuses to construct a slice. Without
- /// the linesize > 0 check, the negative `i32 as usize` would produce a
- /// huge positive length and `from_raw_parts` would be UB.
+ /// convention) and assert the row accessors refuse to construct a slice.
+ /// Without the linesize > 0 check, the negative `i32 as usize` would
+ /// produce a huge positive length and `from_raw_parts` would be UB.
#[test]
- fn data_returns_none_for_negative_linesize() {
+ fn row_returns_none_for_negative_linesize() {
let mut f = Frame::empty().expect("alloc");
unsafe {
let raw = f.inner.as_mut_ptr();
@@ -255,15 +407,16 @@ mod tests {
(*raw).height = 1080;
(*raw).linesize[0] = -1920; // vertically-flipped
(*raw).linesize[1] = -1920;
- // data pointers stay null; `data()` would return None on the null
- // check anyway, but should bail earlier on the linesize sign.
+ // data pointers stay null; the accessors would also reject on null,
+ // but should bail earlier on the linesize sign.
}
- assert!(f.data(0).is_none());
- assert!(f.data(1).is_none());
+ assert!(f.row(0, 0).is_none());
+ assert!(f.row(1, 0).is_none());
+ assert!(f.rows(0).is_none());
}
#[test]
- fn data_returns_none_for_non_positive_height() {
+ fn row_returns_none_for_non_positive_height() {
let mut f = Frame::empty().expect("alloc");
unsafe {
let raw = f.inner.as_mut_ptr();
@@ -273,7 +426,81 @@ mod tests {
(*raw).linesize[0] = 1920;
(*raw).linesize[1] = 1920;
}
- assert!(f.data(0).is_none());
+ assert!(f.row(0, 0).is_none());
+ }
+
+ /// Synthesize a frame backed by a manually-allocated buffer with stride
+ /// strictly larger than visible row bytes (the exact case where
+ /// FFmpeg's HW transfer leaves trailing padding uninitialized) and
+ /// confirm the safe row accessor returns slices clipped to the visible
+ /// width.
+ #[test]
+ fn row_clips_to_visible_width_not_stride() {
+ use std::alloc::{alloc, dealloc, Layout};
+ let width = 64usize;
+ let height = 4usize;
+ // Stride > width: 16 bytes of padding per row in the Y plane.
+ let stride = 80usize;
+ let plane_size = stride * height;
+ // Allocate ourselves so we can fully control initialization. Fill
+ // bytes 0..width with 0xAA per row (the "valid pixel" range) and
+ // bytes width..stride with 0xFF (the simulated alignment padding —
+ // FFmpeg would leave these uninitialized; we set them to a sentinel
+ // that the test can detect if the safe slice ever exposes them).
+ let layout = Layout::from_size_align(plane_size, 32).unwrap();
+ let buf = unsafe { alloc(layout) };
+ assert!(!buf.is_null());
+ for y in 0..height {
+ let row = unsafe { buf.add(y * stride) };
+ for x in 0..width {
+ unsafe { *row.add(x) = 0xAA };
+ }
+ for x in width..stride {
+ unsafe { *row.add(x) = 0xFF };
+ }
+ }
+
+ let mut f = Frame::empty().expect("alloc");
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).format = pix_fmt::NV12;
+ (*raw).width = width as i32;
+ (*raw).height = height as i32;
+ (*raw).linesize[0] = stride as i32;
+ // linesize[1] = 0 keeps planes() at 1 so the test stays focused on
+ // plane 0 without owning a second allocation.
+ (*raw).data[0] = buf;
+ }
+
+ assert_eq!(f.row_bytes(0), Some(width));
+ assert_eq!(f.stride(0), stride);
+ let row0 = f.row(0, 0).expect("row 0");
+ assert_eq!(
+ row0.len(),
+ width,
+ "safe row must be clipped to visible width"
+ );
+ assert!(
+ row0.iter().all(|&b| b == 0xAA),
+ "row must not include padding sentinel 0xFF"
+ );
+
+ let collected: Vec<&[u8]> = f.rows(0).expect("rows iterator").collect();
+ assert_eq!(collected.len(), height);
+ for r in &collected {
+ assert_eq!(r.len(), width);
+ assert!(r.iter().all(|&b| b == 0xAA));
+ }
+
+ // Out-of-range row index returns None instead of panicking.
+ assert!(f.row(0, height).is_none());
+
+ // Detach the buffer before drop so AVFrame's own free path doesn't
+ // touch our manual allocation.
+ unsafe {
+ (*f.inner.as_mut_ptr()).data[0] = std::ptr::null_mut();
+ dealloc(buf, layout);
+ }
}
#[test]
@@ -306,4 +533,27 @@ mod tests {
assert_eq!(plane_height_for(pix_fmt::NONE, 0, 1080), None);
assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None);
}
+
+ #[test]
+ fn plane_row_bytes_table_covers_supported_formats() {
+ // 8-bit 4:2:0 / 4:2:2 — both planes at width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1920), Some(1920));
+ // 8-bit 4:4:4 — chroma plane is 2 * width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 0, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1920), Some(3840));
+ // 10/12/16-bit 4:2:0 / 4:2:2 — both planes at 2 * width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1920), Some(3840));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1920), Some(3840));
+ // 10/12/16-bit 4:4:4 — Y is 2 * width, chroma is 4 * width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 0, 1920), Some(3840));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1920), Some(7680));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P416LE, 1, 1920), Some(7680));
+ // Unsupported / out-of-range.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NONE, 0, 1920), None);
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 2, 1920), None);
+ }
}
diff --git a/src/lib.rs b/src/lib.rs
index b487132..3654016 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,8 +4,14 @@
//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and auto-probes the
//! host's hardware backends (VideoToolbox / VAAPI / NVDEC / D3D11VA).
//! There is **no software fallback inside this crate** — if no hardware
-//! backend can decode the stream, [`VideoDecoder::open`] returns
-//! [`Error::AllBackendsFailed`] and the caller picks how to fall back
+//! backend can decode the stream, [`Error::AllBackendsFailed`] surfaces
+//! either from [`VideoDecoder::open`] (when no backend even opens) or
+//! from [`VideoDecoder::receive_frame`] / [`VideoDecoder::send_packet`] /
+//! [`VideoDecoder::send_eof`] (when the initially-opened backend or any
+//! later candidate fails at decode time and the probe order is
+//! exhausted). On single-backend platforms (e.g. macOS, where the order
+//! is `[VideoToolbox]`), only the runtime path can return it. The
+//! caller picks how to fall back to a software decoder of their choice
//! (e.g. by opening an `ffmpeg::decoder::Video` directly).
//!
//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side
From b61c76a3badbf1a8458a0d392fdcb32f3ad3bf38 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 11:25:54 +1200
Subject: [PATCH 19/27] update
---
src/decoder.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++++++-
src/frame.rs | 55 ++++++++++++++++++++++++++++-----
2 files changed, 130 insertions(+), 9 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index 2fe4f08..87fe847 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -387,7 +387,12 @@ impl VideoDecoder {
match self.state.inner.send_packet(packet) {
Ok(()) => {
if let Some(probe) = self.probe.as_mut() {
- let pkt_size = packet.size();
+ // `try_clone_packet` calls `av_packet_ref`, which deep-copies
+ // side data via `av_packet_copy_props`. The probe budget must
+ // include side-data bytes or a malicious stream can keep
+ // `packet.size()` tiny while attaching megabytes of side data
+ // per packet and inflate retention beyond the advertised cap.
+ let pkt_size = packet.size().saturating_add(packet_side_data_bytes(packet));
let new_count = probe.buffered_packets.len() + 1;
let new_bytes = probe.buffered_bytes.saturating_add(pkt_size);
if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES {
@@ -1093,6 +1098,38 @@ fn try_clone_packet(src: &Packet) -> std::result::Result usize {
+ // SAFETY: AVPacket.side_data is `*mut AVPacketSideData` and
+ // side_data_elems is `c_int`; both are raw struct fields safe to read.
+ // Field projection (`.size`) does not reconstruct the enum-typed `type_`
+ // field, so the bindgen-enum UB hazard does not apply here.
+ unsafe {
+ let raw = packet.as_ptr();
+ let nel = (*raw).side_data_elems;
+ let arr = (*raw).side_data;
+ if arr.is_null() || nel <= 0 {
+ return 0;
+ }
+ let mut total: usize = 0;
+ for i in 0..(nel as usize) {
+ let entry = arr.add(i);
+ total = total.saturating_add((*entry).size);
+ }
+ total
+ }
+}
+
/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine
/// distinguishes "drain output and retry" from "stream over").
fn is_eagain(e: &ffmpeg_next::Error) -> bool {
@@ -1324,4 +1361,49 @@ mod tests {
Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"),
}
}
+
+ /// `try_clone_packet` calls `av_packet_ref`, which deep-copies side
+ /// data via `av_packet_copy_props`. The probe budget therefore has to
+ /// include side-data bytes — otherwise a stream with a 16-byte payload
+ /// and a 1 MiB side-data attachment would only consume 16 bytes of the
+ /// 64 MiB budget per packet, and 256 buffered clones would retain
+ /// ~256 MiB of side data while logs claim a few KiB.
+ #[test]
+ fn packet_side_data_counts_against_probe_budget() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ const PAYLOAD_SIZE: usize = 16;
+ const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB
+
+ let mut packet = Packet::new(PAYLOAD_SIZE);
+ // SAFETY: packet is a freshly allocated AVPacket; av_packet_new_side_data
+ // attaches a fresh `SIDE_DATA_SIZE`-byte buffer of the requested type
+ // to it and returns a writable pointer (or NULL on OOM).
+ let p = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA,
+ SIDE_DATA_SIZE,
+ )
+ };
+ assert!(!p.is_null(), "av_packet_new_side_data returned NULL");
+
+ assert_eq!(packet.size(), PAYLOAD_SIZE);
+ let side = packet_side_data_bytes(&packet);
+ assert!(
+ side >= SIDE_DATA_SIZE,
+ "side-data accounting must include the attached buffer; got {side}"
+ );
+ let total = packet.size().saturating_add(side);
+ assert!(
+ total >= PAYLOAD_SIZE + SIDE_DATA_SIZE,
+ "probe budget must charge payload + side data; got {total}"
+ );
+ }
+
+ #[test]
+ fn packet_side_data_is_zero_when_no_side_data() {
+ let packet = Packet::new(64);
+ assert_eq!(packet_side_data_bytes(&packet), 0);
+ }
}
diff --git a/src/frame.rs b/src/frame.rs
index 3f48075..9b651af 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -290,15 +290,19 @@ struct PlaneInfo {
/// `slice::from_raw_parts`.
fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Option {
match pix_fmt_int {
- // 8-bit semi-planar: Y at full width (1 byte/sample), UV interleaved
- // at horizontally-subsampled chroma (4:2:0 / 4:2:2) with 2 bytes per
- // chroma pair → both planes have row width == frame_width.
+ // 8-bit semi-planar 4:2:0 / 4:2:2: Y at full width (1 byte/sample);
+ // UV interleaved at horizontally-subsampled chroma with `ceil(W/2)`
+ // U+V pairs at 2 bytes per pair. For even W the chroma row equals
+ // `W` bytes (the simple case); for odd W it must round *up* to the
+ // next even byte so the trailing chroma sample is not silently
+ // dropped on width = 2k+1 frames.
pix_fmt::NV12 | pix_fmt::NV21 | pix_fmt::NV16 => match plane {
- 0 | 1 => Some(frame_width),
+ 0 => Some(frame_width),
+ 1 => Some(frame_width.div_ceil(2).checked_mul(2)?),
_ => None,
},
// 8-bit 4:4:4 semi-planar: chroma at full horizontal resolution,
- // 2 bytes per pixel (1 byte U + 1 byte V).
+ // 2 bytes per pixel (1 byte U + 1 byte V) — no rounding required.
pix_fmt::NV24 => match plane {
0 => Some(frame_width),
1 => Some(frame_width.checked_mul(2)?),
@@ -306,8 +310,9 @@ fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Op
},
// 10/12/16-bit semi-planar 4:2:0 / 4:2:2: Y is 2 bytes/sample
// (high-bit-depth packed in 16-bit). UV interleaved at horizontally-
- // subsampled chroma with 4 bytes per chroma pair (2 bytes U + 2 bytes
- // V) → both planes have row width == 2 * frame_width.
+ // subsampled chroma with `ceil(W/2)` U+V pairs at 4 bytes per pair
+ // (2 bytes U + 2 bytes V). Same odd-width rounding as the 8-bit
+ // chroma path, scaled by 2 bytes per sample.
pix_fmt::P010LE
| pix_fmt::P010BE
| pix_fmt::P012LE
@@ -315,7 +320,8 @@ fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Op
| pix_fmt::P210LE
| pix_fmt::P212LE
| pix_fmt::P216LE => match plane {
- 0 | 1 => Some(frame_width.checked_mul(2)?),
+ 0 => Some(frame_width.checked_mul(2)?),
+ 1 => Some(frame_width.div_ceil(2).checked_mul(4)?),
_ => None,
},
// 10/12/16-bit 4:4:4 semi-planar: Y is 2 bytes/sample; UV at full
@@ -534,6 +540,39 @@ mod tests {
assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None);
}
+ /// 4:2:0 / 4:2:2 chroma planes carry `ceil(W/2)` U+V pairs per row.
+ /// For odd `W`, dropping the round-up silently truncates the last chroma
+ /// sample — and the safe row slice would expose a buffer one byte (8-bit)
+ /// or two bytes (high-bit-depth) shorter than the data FFmpeg actually
+ /// wrote. Y planes and 4:4:4 chroma planes are unaffected because their
+ /// row count is just `W` or a fixed multiple of `W`.
+ #[test]
+ fn plane_row_bytes_rounds_up_chroma_for_odd_widths() {
+ // 8-bit subsampled chroma — odd W gains one byte (the missing sample
+ // pair).
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1921), Some(1922));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1921), Some(1922));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1921), Some(1922));
+ // High-bit-depth subsampled chroma — odd W gains two bytes.
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010BE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P012LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P016LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P212LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P216LE, 1, 1921), Some(3844));
+ // Y planes always at full width regardless of subsampling.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1921), Some(1921));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1921), Some(3842));
+ // 4:4:4 chroma is at full horizontal resolution — no rounding.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1921), Some(3842));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1921), Some(7684));
+ // Even widths must still match the original (pre-fix) values so the
+ // change is purely additive on the dominant code path.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840));
+ }
+
#[test]
fn plane_row_bytes_table_covers_supported_formats() {
// 8-bit 4:2:0 / 4:2:2 — both planes at width.
From bedf83627acb7874cfe33b1e32cb225b0ab51b91 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 11:51:14 +1200
Subject: [PATCH 20/27] update
---
src/decoder.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 79 insertions(+), 5 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index 87fe847..b99e3f7 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -1268,10 +1268,17 @@ fn drain_into_pending(
/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
/// plane_height` across populated planes.
///
-/// Returns `None` for pixel formats not in our chroma-subsampling table,
-/// so the caller can refuse to queue an allocation it can't account for.
-/// Returning 0 for unknown formats would silently bypass the byte cap and
-/// let an unbounded number of large frames into `pending_frames`.
+/// Returns `None` for pixel formats not in our chroma-subsampling table or
+/// for frames whose `linesize` is negative — both signal an allocation we
+/// cannot account for, so the caller refuses to queue them. Returning 0
+/// in either case would silently bypass the byte cap and let an unbounded
+/// number of large frames into `pending_frames`.
+///
+/// Distinguishes `linesize == 0` (FFmpeg's sentinel for "no more populated
+/// planes" — terminates the scan) from `linesize < 0` (FFmpeg's vertically-
+/// flipped layout — `Frame::row` rejects those as unusable, so queueing one
+/// during probe replay would only delay the failure to the consumer side
+/// while wasting `|linesize| * plane_h` bytes of unaccounted memory).
fn cpu_frame_bytes(frame: &frame::Video) -> Option {
// SAFETY: AVFrame.height / format / linesize are c_int reads.
let (height, pix_fmt, linesizes) = unsafe {
@@ -1281,9 +1288,17 @@ fn cpu_frame_bytes(frame: &frame::Video) -> Option {
let mut total: usize = 0;
let mut any_plane = false;
for (plane, linesize) in linesizes.iter().enumerate() {
- if *linesize <= 0 {
+ if *linesize == 0 {
+ // End of populated planes — FFmpeg zeroes the trailing entries.
break;
}
+ if *linesize < 0 {
+ // Vertically-flipped layout — refuse to size so `drain_into_pending`
+ // fails the candidate. The same pre-fix code path silently returned
+ // `Some(0)` for a frame whose first plane was negative, allowing up
+ // to MAX_PROBE_PENDING_FRAMES frames of unaccounted memory.
+ return None;
+ }
any_plane = true;
let stride = *linesize as usize;
// If we can't size *any* populated plane, the format is outside our
@@ -1406,4 +1421,63 @@ mod tests {
let packet = Packet::new(64);
assert_eq!(packet_side_data_bytes(&packet), 0);
}
+
+ /// `cpu_frame_bytes` must refuse to size a frame whose first plane has
+ /// a negative `linesize`. Pre-fix, the loop break treated negative the
+ /// same as zero (FFmpeg's "no more populated planes" sentinel), so a
+ /// vertically-flipped frame returned `Some(0)` and `drain_into_pending`
+ /// would queue it as a 0-byte allocation — letting up to
+ /// `MAX_PROBE_PENDING_FRAMES` such frames bypass the configured byte
+ /// budget entirely.
+ #[test]
+ fn cpu_frame_bytes_rejects_negative_first_plane_linesize() {
+ let mut f = frame::Video::empty();
+ // SAFETY: f is freshly allocated; we set `format` to NV12 and the
+ // first plane's linesize negative (FFmpeg's vertical-flip convention).
+ // No backing data buffer is allocated — cpu_frame_bytes must reject
+ // before any pointer dereference.
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).format = crate::pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = 1080;
+ (*raw).linesize[0] = -1920;
+ (*raw).linesize[1] = -1920;
+ }
+ assert!(
+ cpu_frame_bytes(&f).is_none(),
+ "negative linesize must be unsizeable, not Some(0)"
+ );
+ }
+
+ /// Sanity-check the positive path: a synthesized NV12 frame with valid
+ /// linesizes must report the sum across populated planes (Y full height
+ /// + UV half height).
+ #[test]
+ fn cpu_frame_bytes_sums_populated_planes() {
+ let mut f = frame::Video::empty();
+ let stride = 1920usize;
+ let height = 1080usize;
+ // SAFETY: same scheme as above; we only mutate primitive struct fields.
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).format = crate::pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = height as i32;
+ (*raw).linesize[0] = stride as i32;
+ (*raw).linesize[1] = stride as i32;
+ }
+ let expected = stride * height + stride * (height / 2);
+ assert_eq!(cpu_frame_bytes(&f), Some(expected));
+ }
+
+ /// A frame with only a zero linesize in plane 0 is "no populated
+ /// planes" — must return `Some(0)`, not `None`. Distinguishes the
+ /// FFmpeg sentinel from the vertically-flipped layout.
+ #[test]
+ fn cpu_frame_bytes_zero_first_plane_returns_zero() {
+ let f = frame::Video::empty();
+ // Default-allocated empty AVFrame already has all linesizes zero.
+ assert_eq!(cpu_frame_bytes(&f), Some(0));
+ }
}
From 5447670bf89d3b693fdcb856a3376bda12cbabe5 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 12:37:20 +1200
Subject: [PATCH 21/27] update
---
src/decoder.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 95 insertions(+), 2 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index b99e3f7..1783924 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -404,8 +404,14 @@ impl VideoDecoder {
"hwdecode: probe window exceeded caps without first frame; \
abandoning fallback safety net"
);
+ // Abandon the *future* probe-buffering only. `pending_frames`
+ // belong to the currently active backend (possibly the
+ // candidate `advance_probe` committed earlier in this same
+ // `send_packet` call) and are valid output the caller will
+ // dequeue via `receive_frame`. Clearing them here would
+ // silently drop initial frames at exactly the cap-overflow /
+ // OOM-stress paths.
self.probe = None;
- self.pending_frames.clear();
} else {
// Use the checked clone — ffmpeg-next's `Packet::clone`
// discards av_packet_ref's return value and would silently
@@ -420,8 +426,10 @@ impl VideoDecoder {
error = %e,
"hwdecode: packet clone failed for probe history; abandoning fallback safety net"
);
+ // Same reasoning as the cap-overflow branch above:
+ // `pending_frames` are owned by the active backend, not
+ // the probe buffer, so they survive abandonment.
self.probe = None;
- self.pending_frames.clear();
}
}
}
@@ -1480,4 +1488,89 @@ mod tests {
// Default-allocated empty AVFrame already has all linesizes zero.
assert_eq!(cpu_frame_bytes(&f), Some(0));
}
+
+ /// Probe-abandon paths in `send_packet` (cap exceeded, packet clone
+ /// failed) must not drop frames already queued in `pending_frames`.
+ /// Those frames belong to the currently active backend — possibly a
+ /// candidate that `advance_probe` just committed earlier in the same
+ /// `send_packet` call — and are valid output the caller will dequeue
+ /// via `receive_frame`.
+ ///
+ /// Pre-fix, both abandon branches called `pending_frames.clear()`
+ /// alongside `self.probe = None;`, silently dropping initial frames at
+ /// exactly the cap-overflow / OOM-stress paths.
+ ///
+ /// Live HW required: a real `VideoDecoder` is the only way to construct
+ /// a valid `DecoderState` (its `Drop` invokes FFmpeg cleanup), and
+ /// `send_packet` must reach the Ok branch on a real decoder for the
+ /// cap check to fire.
+ #[test]
+ #[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"]
+ fn cap_overflow_preserves_pending_frames_from_active_backend() {
+ use ffmpeg_next::{format, media};
+
+ let path = std::env::var_os("HWDECODE_SAMPLE_VIDEO")
+ .expect("HWDECODE_SAMPLE_VIDEO must be set for this test");
+
+ ffmpeg_next::init().expect("ffmpeg init");
+ let mut input = format::input(&path).expect("open input");
+ let stream_index = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream")
+ .index();
+ let stream_params = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream")
+ .parameters();
+
+ let mut decoder = VideoDecoder::open(stream_params).expect("open decoder");
+ assert!(
+ decoder.probe.is_some(),
+ "probe must be active immediately after open"
+ );
+
+ // Inject sentinel frames as if `advance_probe` had drained them from
+ // a freshly-committed candidate during this same send_packet call.
+ decoder.pending_frames.push_back(frame::Video::empty());
+ decoder.pending_frames.push_back(frame::Video::empty());
+ let pending_before = decoder.pending_frames.len();
+
+ // Fast-forward the probe state to the byte cap so the next successful
+ // send_packet trips the cap-overflow branch.
+ decoder
+ .probe
+ .as_mut()
+ .expect("probe present")
+ .buffered_bytes = MAX_PROBE_PACKET_BYTES;
+
+ // Find the first video packet and feed it. We don't care whether the
+ // underlying decoder actually accepts it cleanly; we only need to
+ // exercise the Ok branch's cap-overflow accounting at least once.
+ let mut hit_ok = false;
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ if decoder.send_packet(&packet).is_ok() {
+ hit_ok = true;
+ break;
+ }
+ }
+ assert!(
+ hit_ok,
+ "expected at least one send_packet to succeed and trigger the cap-overflow branch"
+ );
+
+ assert!(
+ decoder.probe.is_none(),
+ "probe must be abandoned after cap overflow"
+ );
+ assert_eq!(
+ decoder.pending_frames.len(),
+ pending_before,
+ "pending_frames belong to the active backend; abandon must not drop them"
+ );
+ }
}
From 88a84d49aa1b7631c8162a507c82eceeede182ed Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 12:59:22 +1200
Subject: [PATCH 22/27] update
---
src/frame.rs | 68 ++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 50 insertions(+), 18 deletions(-)
diff --git a/src/frame.rs b/src/frame.rs
index 9b651af..4642184 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -199,30 +199,42 @@ impl Frame {
}))
}
- /// Raw base pointer to `plane`'s allocation, or `None` if the plane is
- /// out of range or its data pointer is null.
+ /// Raw base pointer to `plane`'s allocation, or `None` if the plane
+ /// fails the same layout validation [`Self::row`] applies.
///
- /// The returned pointer is valid for `stride(plane) * plane_height`
- /// bytes, **but only the first [`Self::row_bytes`]`(plane)` bytes of
- /// each row are guaranteed to be initialized.** The trailing per-row
- /// alignment padding is uninitialized; callers performing wide SIMD
- /// loads that read past `row_bytes` must mask the result and never
- /// surface those bytes through a safe `&[u8]`.
+ /// Returns `None` whenever any of the following is true:
+ /// - The plane index is out of range (`plane >= planes()`).
+ /// - The frame's pixel format is not in the supported HW-output set.
+ /// - `linesize[plane] <= 0`. **In particular, FFmpeg permits negative
+ /// linesizes for vertically-flipped frames with `data[n]` pointing
+ /// at the *end* of the image. Returning that pointer with the
+ /// advertised "valid for `stride * plane_h` bytes forward" contract
+ /// would let a downstream converter walk past the buffer.** This
+ /// accessor refuses the layout instead of handing back a pointer the
+ /// caller cannot safely interpret as forward-addressable.
+ /// - `height <= 0`, the data pointer is null, `row_bytes > stride`, or
+ /// the total plane size would overflow `isize::MAX`.
+ ///
+ /// On `Some(ptr)` the pointer is valid for
+ /// `stride(plane) * plane_height` *forward-addressable* bytes, and
+ /// only the first [`Self::row_bytes`]`(plane)` bytes of each row are
+ /// guaranteed to be initialized. The trailing per-row alignment padding
+ /// is uninitialized; callers performing wide SIMD loads that read past
+ /// `row_bytes` must mask the result and never surface those bytes
+ /// through a safe `&[u8]`.
///
/// This accessor exists for downstream pixel-format converters
/// (`colconv`) that work in `(ptr, stride, width, height)` quadruples;
/// safe code should prefer [`Self::row`] / [`Self::rows`].
pub fn as_ptr(&self, plane: usize) -> Option<*const u8> {
- if plane >= self.planes() {
- return None;
- }
- // SAFETY: plane index bounds-checked; AVFrame.data is `[*mut u8; 8]`.
- let p = unsafe { (*self.inner.as_ptr()).data[plane] };
- if p.is_null() {
- None
- } else {
- Some(p)
- }
+ // Share the full plane-layout validation so the unsafe escape hatch
+ // never escapes a layout that `row()` / `rows()` reject. Returning a
+ // pointer for a negative-stride frame (FFmpeg's vertical-flip
+ // convention, where `data[n]` points at the *end* of the image)
+ // would invite forward-walking out-of-bounds reads from a caller
+ // that trusts the documented "valid for stride × plane_h bytes"
+ // contract.
+ self.plane_info(plane).map(|info| info.plane_ptr)
}
/// Read every per-plane field needed by the row accessors with the
@@ -403,6 +415,11 @@ mod tests {
/// convention) and assert the row accessors refuse to construct a slice.
/// Without the linesize > 0 check, the negative `i32 as usize` would
/// produce a huge positive length and `from_raw_parts` would be UB.
+ ///
+ /// `as_ptr` shares the same validation — handing back the data pointer
+ /// for a negative-stride frame would let a downstream converter
+ /// following the "valid for stride × plane_h bytes forward" contract
+ /// walk past the buffer.
#[test]
fn row_returns_none_for_negative_linesize() {
let mut f = Frame::empty().expect("alloc");
@@ -419,6 +436,12 @@ mod tests {
assert!(f.row(0, 0).is_none());
assert!(f.row(1, 0).is_none());
assert!(f.rows(0).is_none());
+ assert!(
+ f.as_ptr(0).is_none(),
+ "as_ptr must share row()/rows() validation — a negative-stride \
+ frame must not leak a forward-readable plane pointer"
+ );
+ assert!(f.as_ptr(1).is_none());
}
#[test]
@@ -498,6 +521,15 @@ mod tests {
assert!(r.iter().all(|&b| b == 0xAA));
}
+ // `as_ptr` accepts the valid layout and returns the same base pointer
+ // FFmpeg wrote into `data[0]`, so SIMD callers can reach the plane
+ // through the documented unsafe contract.
+ assert_eq!(
+ f.as_ptr(0),
+ Some(buf as *const u8),
+ "as_ptr must surface the plane base for a valid forward-stride frame"
+ );
+
// Out-of-range row index returns None instead of panicking.
assert!(f.row(0, height).is_none());
From 3e10b96091961005b808a37c9ad97682742b5920 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 15:21:16 +1200
Subject: [PATCH 23/27] update
---
src/decoder.rs | 186 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 154 insertions(+), 32 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index 1783924..c609e89 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -1215,45 +1215,32 @@ fn drain_into_pending(
}
let mut cpu = alloc_av_frame()?;
// SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data
- // allocates buffers on `cpu`. copy_props moves timing/side data over.
+ // allocates buffers on `cpu`. We deliberately defer
+ // `av_frame_copy_props` until *after* the cap check below — that
+ // call deep-copies every AVFrameSideData entry, which a malicious
+ // stream can size in megabytes; allocating then discarding the
+ // copies on cap rejection is wasted work and a real allocator
+ // pressure source.
unsafe {
let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0);
if r1 < 0 {
return Err(ffmpeg_next::Error::from(r1));
}
- let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr());
- if r2 < 0 {
- return Err(ffmpeg_next::Error::from(r2));
- }
}
- // Post-transfer accounting: size the frame and confirm we can fit
- // it without exceeding the byte budget. Both cap-hit and inability
- // to size the frame are treated as candidate failures, so the byte
- // budget is *strict* — we never queue a frame we can't account for.
- match cpu_frame_bytes(&cpu) {
- Some(bytes) => {
- let new_total = pending_bytes.saturating_add(bytes);
- if new_total > max_bytes {
- tracing::warn!(
- pending_bytes = *pending_bytes,
- frame_bytes = bytes,
- max_bytes,
- "hwdecode: queueing this frame would exceed byte cap; failing candidate replay"
- );
- // cpu drops here.
- return Err(ffmpeg_next::Error::Other {
- errno: libc::ENOMEM,
- });
- }
- *pending_bytes = new_total;
- pending.push_back(cpu);
- }
+ // Pre-copy_props accounting: size the frame's pixel storage and
+ // its (yet-to-be-copied) side data. Both cap-hit and inability to
+ // size the pixel layout are treated as candidate failures, so the
+ // byte budget is *strict* — we never queue a frame we can't fully
+ // account for, and we never pay the side-data deep copy on a
+ // frame we'd immediately drop.
+ let pixel_bytes = match cpu_frame_bytes(&cpu) {
+ Some(b) => b,
None => {
- // Unknown pix_fmt — we cannot bound this frame's contribution
- // against the byte cap, so up to MAX_PROBE_PENDING_FRAMES of
- // them could exhaust memory. Fail the candidate so probing
- // tries the next backend rather than queueing untracked
- // allocations.
+ // Unknown pix_fmt or vertically-flipped layout — we cannot
+ // bound this frame's contribution against the byte cap, so up
+ // to MAX_PROBE_PENDING_FRAMES of them could exhaust memory.
+ // Fail the candidate so probing tries the next backend
+ // rather than queueing untracked allocations.
// SAFETY: AVFrame.format is c_int, safe to read.
let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format };
tracing::warn!(
@@ -1265,7 +1252,34 @@ fn drain_into_pending(
errno: libc::ENOMEM,
});
}
+ };
+ let side_bytes = frame_side_data_bytes(hw_buf);
+ let frame_bytes = pixel_bytes.saturating_add(side_bytes);
+ let new_total = pending_bytes.saturating_add(frame_bytes);
+ if new_total > max_bytes {
+ tracing::warn!(
+ pending_bytes = *pending_bytes,
+ pixel_bytes,
+ side_bytes,
+ max_bytes,
+ "hwdecode: queueing this frame (pixels + side data) would exceed byte cap; \
+ failing candidate replay"
+ );
+ // cpu drops here without paying av_frame_copy_props.
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
}
+ // Cap check passed — now safe to pay the side-data deep copy.
+ // SAFETY: cpu and hw_buf are both valid AVFrames we own.
+ unsafe {
+ let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr());
+ if r2 < 0 {
+ return Err(ffmpeg_next::Error::from(r2));
+ }
+ }
+ *pending_bytes = new_total;
+ pending.push_back(cpu);
}
Err(e) if is_transient(&e) => return Ok(()),
Err(e) => return Err(e),
@@ -1273,6 +1287,42 @@ fn drain_into_pending(
}
}
+/// Sum of `AVFrameSideData[i].size` across every entry attached to
+/// `frame`. `av_frame_copy_props` performs a deep copy of every side
+/// data buffer (allocates a fresh `AVBufferRef` per entry), so a
+/// candidate decoder that produces large per-frame metadata (HDR
+/// mastering display info, A53 closed captions, ICC profiles, dynamic
+/// HDR, motion vectors, …) would otherwise bypass the
+/// `max_probe_pending_bytes` cap because [`cpu_frame_bytes`] only
+/// accounts for pixel-plane storage.
+///
+/// Reads only the `size` field of each `AVFrameSideData` — never
+/// constructs the bindgen `AVFrameSideDataType` enum, so unknown side-
+/// data types from a future FFmpeg do not invoke UB.
+fn frame_side_data_bytes(frame: &frame::Video) -> usize {
+ // SAFETY: AVFrame.side_data is `*mut *mut AVFrameSideData` and
+ // nb_side_data is `c_int`; both are raw struct fields safe to read.
+ // Field projection through the indirected pointer touches only the
+ // primitive `usize` `.size` field (never `type_`).
+ unsafe {
+ let raw = frame.as_ptr();
+ let nb = (*raw).nb_side_data;
+ let arr = (*raw).side_data;
+ if arr.is_null() || nb <= 0 {
+ return 0;
+ }
+ let mut total: usize = 0;
+ for i in 0..(nb as usize) {
+ let entry = *arr.add(i);
+ if entry.is_null() {
+ continue;
+ }
+ total = total.saturating_add((*entry).size);
+ }
+ total
+ }
+}
+
/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
/// plane_height` across populated planes.
///
@@ -1489,6 +1539,78 @@ mod tests {
assert_eq!(cpu_frame_bytes(&f), Some(0));
}
+ /// `av_frame_copy_props` deep-copies every AVFrameSideData attached
+ /// to the source frame. `frame_side_data_bytes` must surface that
+ /// retention so `drain_into_pending` can charge it against
+ /// `max_probe_pending_bytes` — otherwise a stream with megabytes of
+ /// per-frame metadata can queue up to `MAX_PROBE_PENDING_FRAMES`
+ /// frames and overshoot the configured cap by orders of magnitude.
+ #[test]
+ fn frame_side_data_bytes_counts_attached_buffers() {
+ use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType};
+
+ const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB
+
+ let mut f = frame::Video::empty();
+ // SAFETY: f is freshly allocated; av_frame_new_side_data attaches a
+ // fresh `SIDE_DATA_SIZE`-byte buffer of the requested type and returns
+ // a pointer to the entry (or NULL on OOM).
+ let p = unsafe {
+ av_frame_new_side_data(
+ f.as_mut_ptr(),
+ AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED,
+ SIDE_DATA_SIZE,
+ )
+ };
+ assert!(!p.is_null(), "av_frame_new_side_data returned NULL");
+
+ let bytes = frame_side_data_bytes(&f);
+ assert!(
+ bytes >= SIDE_DATA_SIZE,
+ "side-data accounting must include the attached buffer; got {bytes}"
+ );
+ }
+
+ #[test]
+ fn frame_side_data_bytes_is_zero_for_bare_frame() {
+ let f = frame::Video::empty();
+ assert_eq!(frame_side_data_bytes(&f), 0);
+ }
+
+ /// Multiple side-data entries must be summed, not just the first.
+ #[test]
+ fn frame_side_data_bytes_sums_all_entries() {
+ use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType};
+
+ const ENTRY_A: usize = 256 * 1024; // 256 KiB
+ const ENTRY_B: usize = 512 * 1024; // 512 KiB
+
+ let mut f = frame::Video::empty();
+ // Two distinct types so neither call replaces the other.
+ let p1 = unsafe {
+ av_frame_new_side_data(
+ f.as_mut_ptr(),
+ AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED,
+ ENTRY_A,
+ )
+ };
+ let p2 = unsafe {
+ av_frame_new_side_data(
+ f.as_mut_ptr(),
+ AVFrameSideDataType::AV_FRAME_DATA_A53_CC,
+ ENTRY_B,
+ )
+ };
+ assert!(!p1.is_null() && !p2.is_null());
+
+ let bytes = frame_side_data_bytes(&f);
+ assert!(
+ bytes >= ENTRY_A + ENTRY_B,
+ "must sum across all side-data entries; got {bytes}, expected at least {}",
+ ENTRY_A + ENTRY_B
+ );
+ }
+
/// Probe-abandon paths in `send_packet` (cap exceeded, packet clone
/// failed) must not drop frames already queued in `pending_frames`.
/// Those frames belong to the currently active backend — possibly a
From cb8e9e63f2d949fbe9025dabe87c4e8a28d2168f Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 16:27:38 +1200
Subject: [PATCH 24/27] update
---
src/decoder.rs | 177 +++++++++++++++++++++++++++++++++++++++----------
src/ffi.rs | 72 +++++++++++++++++---
2 files changed, 203 insertions(+), 46 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index c609e89..bac87b2 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -842,14 +842,18 @@ impl VideoDecoder {
let mut ctx = build_codec_context(¶meters)?;
let av_type = backend.av_hwdevice_type();
- // Verify the codec advertises this hwaccel. We do *not* read the
- // codec's advertised pix_fmt — we use the hardcoded constant from
- // `Backend::hw_pixel_format` so no FFmpeg-supplied enum value is ever
- // interpreted as `AVPixelFormat`.
- if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type) {
+ // Verify the codec advertises this hwaccel **with the exact HW pix_fmt
+ // we're about to wire up in `get_format`**. FFmpeg's HW config table
+ // is keyed per (device_type, pix_fmt); a codec can advertise the same
+ // device with several HW pix_fmts, so matching only on device_type
+ // would let probing succeed for a backend whose pix_fmt the codec
+ // never offers — the failure would then surface deep inside the
+ // probe/decode loop. Matching the exact pix_fmt keeps the strict
+ // `get_format` honest and gives `open_with` a clean rejection.
+ let hw_pix_fmt = backend.hw_pixel_format();
+ if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type, hw_pix_fmt as i32) {
return Err(Error::BackendUnsupportedByCodec(backend));
}
- let hw_pix_fmt = backend.hw_pixel_format();
// Create the device context.
let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut();
@@ -868,6 +872,19 @@ impl VideoDecoder {
wanted: hw_pix_fmt,
wanted_int: hw_pix_fmt as i32,
}));
+ // RAII guard: from now until the end-of-function `into_owned()`, every
+ // early return — `av_buffer_ref` failure, `open_as` failure, codec_type
+ // mismatch, or any future error path added between here and the
+ // `DecoderState` construction — frees `hw_device_ref` and
+ // `callback_state` via the guard's Drop. Without it, each error site
+ // had to remember to clean up these two FFI-owned resources by hand;
+ // the codec_type-mismatch branch was missed and silently leaked one
+ // device ref + one heap allocation per bad input.
+ let guard = PartialBuildState {
+ hw_device_ref,
+ callback_state,
+ };
+
// SAFETY: ctx is a freshly-constructed AVCodecContext we own;
// av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
// use (we keep our own ref in `hw_device_ref` for cleanup).
@@ -876,20 +893,18 @@ impl VideoDecoder {
// HW-flagged setup but no actual device reference.
let device_ref_for_ctx = unsafe { av_buffer_ref(hw_device_ref) };
if device_ref_for_ctx.is_null() {
- // SAFETY: rolling back what we just allocated above. hw_device_ref
- // is non-null (we checked after av_hwdevice_ctx_create); callback_state
- // was just freshly Box::into_raw'd.
- unsafe {
- let mut hw = hw_device_ref;
- av_buffer_unref(&mut hw);
- drop(Box::from_raw(callback_state));
- }
+ // guard's Drop frees hw_device_ref (the first ref) and callback_state.
return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
errno: libc::ENOMEM,
}));
}
// SAFETY: device_ref_for_ctx is a valid AVBufferRef* from av_buffer_ref;
- // ctx is freshly built and owned by us.
+ // ctx is freshly built and owned by us. After this point ctx aliases
+ // `callback_state` via `opaque` (FFmpeg never frees opaque, so
+ // `callback_state` ownership stays with us / the guard) and aliases
+ // `device_ref_for_ctx` (the second ref) via `hw_device_ctx` (FFmpeg
+ // unrefs that on codec context drop, independent of the guard's first
+ // ref).
unsafe {
let raw = ctx.as_mut_ptr();
(*raw).hw_device_ctx = device_ref_for_ctx;
@@ -897,8 +912,9 @@ impl VideoDecoder {
(*raw).get_format = Some(get_hw_format);
}
- // Open the decoder. On any failure, release the resources we just
- // allocated so we don't leak.
+ // Open the decoder. On failure `ctx`/`opened` Drop releases the codec
+ // context (and via that the second device ref); the guard releases the
+ // first device ref and the callback state.
//
// We deliberately bypass `Opened::video()` because it calls
// `Context::medium()`, which reads `AVCodecContext.codec_type` as the
@@ -906,24 +922,7 @@ impl VideoDecoder {
// systematically removing. Instead: validate `codec_type` as a raw
// `c_int` ourselves, then construct the `decoder::Video` wrapper
// directly via its public tuple field.
- let opened = match ctx.decoder().open_as(codec) {
- Ok(o) => o,
- Err(e) => {
- // SAFETY: we either allocated these in this function above or
- // they are null; av_buffer_unref / Box::from_raw handle null
- // explicitly (we check first).
- unsafe {
- let mut hw = hw_device_ref;
- if !hw.is_null() {
- av_buffer_unref(&mut hw);
- }
- if !callback_state.is_null() {
- drop(Box::from_raw(callback_state));
- }
- }
- return Err(Error::Ffmpeg(e));
- }
- };
+ let opened = ctx.decoder().open_as(codec).map_err(Error::Ffmpeg)?;
// Validate codec_type as a raw integer — never construct AVMediaType
// from an unvalidated runtime value.
@@ -935,7 +934,8 @@ impl VideoDecoder {
if codec_type_int != video_type_int {
// Not a video codec context — surface the same error
// `Opened::video()` would have, without going through enum
- // construction. Cleanup runs via `opened`'s Drop.
+ // construction. `opened`'s Drop releases the codec context; the
+ // guard releases the first hw_device_ref and the callback state.
return Err(Error::Ffmpeg(ffmpeg_next::Error::InvalidData));
}
// SAFETY of construction: `decoder::Video` is `pub struct Video(pub Opened)`.
@@ -943,6 +943,9 @@ impl VideoDecoder {
// `Opened::video()` does on success, just without the enum read.
let opened = ffmpeg_next::decoder::Video(opened);
+ // Disarm the guard and transfer ownership of both resources into the
+ // returned DecoderState (whose own Drop handles their lifetime).
+ let (hw_device_ref, callback_state) = guard.into_owned();
Ok(DecoderState {
inner: ManuallyDrop::new(opened),
backend,
@@ -952,6 +955,55 @@ impl VideoDecoder {
}
}
+/// RAII guard for the partially-owned FFmpeg state that
+/// [`VideoDecoder::build_state`] holds between the
+/// `av_hwdevice_ctx_create` and `Box::into_raw(CallbackState)`
+/// allocations and the final `DecoderState` construction.
+///
+/// If `build_state` returns `Err` for any reason in that window
+/// (`av_buffer_ref` ENOMEM, `open_as` failure, codec_type mismatch, or
+/// any future error path), this guard's `Drop` releases
+/// `hw_device_ref` — the first ref returned by `av_hwdevice_ctx_create`,
+/// distinct from the second ref FFmpeg unrefs when the codec context
+/// drops — and the boxed `CallbackState`, which FFmpeg never touches
+/// because `AVCodecContext::opaque` is purely user-owned.
+///
+/// Successful construction calls [`Self::into_owned`] to disarm the
+/// guard and hand both pointers to the new `DecoderState`.
+struct PartialBuildState {
+ hw_device_ref: *mut AVBufferRef,
+ callback_state: *mut CallbackState,
+}
+
+impl PartialBuildState {
+ /// Disarm the guard: return the owned pointers and replace the guard's
+ /// fields with null so its Drop is a no-op.
+ fn into_owned(mut self) -> (*mut AVBufferRef, *mut CallbackState) {
+ let hw = std::mem::replace(&mut self.hw_device_ref, ptr::null_mut());
+ let cb = std::mem::replace(&mut self.callback_state, ptr::null_mut());
+ (hw, cb)
+ }
+}
+
+impl Drop for PartialBuildState {
+ fn drop(&mut self) {
+ // SAFETY: pointers are either freshly allocated by `build_state` (via
+ // `av_hwdevice_ctx_create` and `Box::into_raw`) or null after
+ // `into_owned`. Both `av_buffer_unref` and `Box::from_raw` need the
+ // null check we apply here; both are otherwise sound on resources we
+ // own.
+ unsafe {
+ if !self.hw_device_ref.is_null() {
+ let mut hw = self.hw_device_ref;
+ av_buffer_unref(&mut hw);
+ }
+ if !self.callback_state.is_null() {
+ drop(Box::from_raw(self.callback_state));
+ }
+ }
+ }
+}
+
/// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination
/// first so reuse across resolution changes is safe.
unsafe fn transfer_hw_frame(
@@ -1611,6 +1663,59 @@ mod tests {
);
}
+ /// `PartialBuildState`'s `Drop` must be a no-op when both pointers are
+ /// null — the disarmed-by-`into_owned` post-state. A panic / double-free
+ /// here would break the success path of every `build_state` call.
+ #[test]
+ fn partial_build_state_drop_is_no_op_on_null_pointers() {
+ let _g = PartialBuildState {
+ hw_device_ref: ptr::null_mut(),
+ callback_state: ptr::null_mut(),
+ };
+ // Drops at end of scope. Test passes if it doesn't panic / crash.
+ }
+
+ /// `into_owned` must return the original pointers and disarm the guard
+ /// (so the guard's Drop becomes a no-op and the caller can safely
+ /// transfer ownership to `DecoderState` without double-freeing).
+ #[test]
+ fn partial_build_state_into_owned_disarms_and_returns_originals() {
+ use ffmpeg_next::ffi::{av_buffer_alloc, av_buffer_unref, AVPixelFormat};
+
+ // SAFETY: av_buffer_alloc returns a fresh AVBufferRef* with refcount
+ // 1, or NULL on OOM. We free it ourselves below (after into_owned
+ // disarms the guard).
+ let hw_ptr = unsafe { av_buffer_alloc(64) };
+ assert!(!hw_ptr.is_null(), "av_buffer_alloc(64) returned NULL");
+ let cb_ptr = Box::into_raw(Box::new(CallbackState {
+ wanted: AVPixelFormat::AV_PIX_FMT_NONE,
+ wanted_int: AVPixelFormat::AV_PIX_FMT_NONE as i32,
+ }));
+
+ let g = PartialBuildState {
+ hw_device_ref: hw_ptr,
+ callback_state: cb_ptr,
+ };
+ let (hw_back, cb_back) = g.into_owned();
+ assert_eq!(
+ hw_back, hw_ptr,
+ "into_owned must return the original device ref"
+ );
+ assert_eq!(
+ cb_back, cb_ptr,
+ "into_owned must return the original callback box"
+ );
+
+ // Guard is now disarmed (its Drop ran with null pointers as soon as
+ // into_owned consumed it). We own the pointers and must free them.
+ // SAFETY: hw_ptr and cb_ptr are still the freshly-allocated values.
+ unsafe {
+ let mut hw = hw_back;
+ av_buffer_unref(&mut hw);
+ drop(Box::from_raw(cb_back));
+ }
+ }
+
/// Probe-abandon paths in `send_packet` (cap exceeded, packet clone
/// failed) must not drop frames already queued in `pending_frames`.
/// Those frames belong to the currently active backend — possibly a
diff --git a/src/ffi.rs b/src/ffi.rs
index 794d474..04aa50f 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -84,15 +84,28 @@ pub(crate) unsafe extern "C" fn get_hw_format(
}
/// Walk the codec's `AVCodecHWConfig` table and return whether the codec
-/// advertises support for `device_type` via the `HW_DEVICE_CTX` setup method.
+/// advertises support for `device_type` **with** `wanted_pix_fmt` via the
+/// `HW_DEVICE_CTX` setup method.
///
-/// We do not return the codec's advertised `pix_fmt` — we know it already
-/// from [`crate::backend::Backend::hw_pixel_format`] (a hardcoded constant
-/// from our bindings). All reads from the FFmpeg-supplied `AVCodecHWConfig`
-/// are performed as raw integers via `addr_of!` + `ptr::read::` to
-/// avoid copying or interpreting enum-typed fields whose runtime values
-/// might not match our build's discriminant set.
-pub(crate) fn codec_supports_hwaccel(codec: *const AVCodec, device_type: AVHWDeviceType) -> bool {
+/// FFmpeg's HW config table is keyed per (device_type, pix_fmt) pair: a
+/// codec can advertise the same device with several different hardware
+/// pixel formats (e.g. VAAPI codecs that offer both `AV_PIX_FMT_VAAPI`
+/// and `AV_PIX_FMT_DRM_PRIME`). Matching only on `device_type` would let
+/// us proceed to install a strict `get_format` callback for a format the
+/// codec never advertises, and the failure would surface deep inside the
+/// probe / decode path instead of up front. Requiring the codec to
+/// advertise the **exact** pix_fmt our `Backend` uses keeps the strict
+/// `get_format` honest and gives `open_with` a clean rejection signal.
+///
+/// All reads from the FFmpeg-supplied `AVCodecHWConfig` are performed as
+/// raw integers via `addr_of!` + `ptr::read::` to avoid copying or
+/// interpreting enum-typed fields whose runtime values might not match
+/// our build's discriminant set.
+pub(crate) fn codec_supports_hwaccel(
+ codec: *const AVCodec,
+ device_type: AVHWDeviceType,
+ wanted_pix_fmt: i32,
+) -> bool {
debug_assert!(!codec.is_null());
let device_type_int = device_type as i32;
let mut i = 0;
@@ -106,15 +119,18 @@ pub(crate) fn codec_supports_hwaccel(codec: *const AVCodec, device_type: AVHWDev
// (which would interpret `pix_fmt` and `device_type` as their enum types).
// SAFETY: `cfg` is non-null and points to a valid `AVCodecHWConfig` for
// the lifetime of the call; `addr_of!` projects to a sized field; the
- // `*const i32` cast is sound because `methods` is `c_int` (i32) and
+ // `*const i32` cast is sound because `methods` is `c_int` (i32),
// `device_type` is `AVHWDeviceType` (`#[repr(u32)]`, but FFmpeg's
- // assigned values fit in i32 and the runtime layout is i32-sized).
+ // assigned values fit in i32 and the runtime layout is i32-sized),
+ // and `pix_fmt` is `AVPixelFormat` (`#[repr(i32)]`).
let methods: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).methods)) };
let cfg_device_type_int: i32 =
unsafe { ptr::read(ptr::addr_of!((*cfg).device_type) as *const i32) };
+ let cfg_pix_fmt_int: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).pix_fmt) as *const i32) };
if methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0
&& cfg_device_type_int == device_type_int
+ && cfg_pix_fmt_int == wanted_pix_fmt
{
return true;
}
@@ -217,4 +233,40 @@ mod tests {
);
assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
}
+
+ /// `codec_supports_hwaccel` must reject a (device_type, pix_fmt) pair
+ /// that the codec does not advertise — even if the device alone is
+ /// listed. Without this check, the strict `get_format` callback would
+ /// be wired up for a HW pix_fmt the codec never offers and the failure
+ /// would surface deep inside the probe / decode path instead of at
+ /// `open_with` / probe-build time.
+ ///
+ /// macOS-only: the test relies on FFmpeg's H.264 decoder advertising
+ /// `(AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX)`, which is
+ /// only present in builds with VideoToolbox compiled in.
+ #[cfg(target_os = "macos")]
+ #[test]
+ fn codec_supports_hwaccel_requires_matching_pix_fmt() {
+ use ffmpeg_next::ffi::{avcodec_find_decoder, AVCodecID, AVHWDeviceType, AVPixelFormat};
+
+ // SAFETY: AV_CODEC_ID_H264 is a known constant in our build's
+ // `AVCodecID` discriminant set; constructing it does not invoke the
+ // bindgen-enum UB we worry about for runtime-derived ids.
+ let codec_ptr = unsafe { avcodec_find_decoder(AVCodecID::AV_CODEC_ID_H264) };
+ assert!(!codec_ptr.is_null(), "H.264 decoder must be present");
+
+ let device = AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
+ let videotoolbox = AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32;
+ let nv12 = AVPixelFormat::AV_PIX_FMT_NV12 as i32;
+
+ assert!(
+ codec_supports_hwaccel(codec_ptr, device, videotoolbox),
+ "VideoToolbox + AV_PIX_FMT_VIDEOTOOLBOX must be advertised by FFmpeg's H.264 decoder"
+ );
+ assert!(
+ !codec_supports_hwaccel(codec_ptr, device, nv12),
+ "VideoToolbox + AV_PIX_FMT_NV12 must NOT match the codec's HW config — \
+ the strict get_format would have no offered HW format to return"
+ );
+ }
}
From 65ae604a74556ee89a249e2e4c5125735a6006bb Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 16:51:15 +1200
Subject: [PATCH 25/27] update
---
src/decoder.rs | 326 +++++++++++++++++++++++++++----------------------
1 file changed, 179 insertions(+), 147 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index bac87b2..81db18a 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -7,10 +7,10 @@ use ffmpeg_next::{
Context,
},
ffi::{
- av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref,
- av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3,
- avcodec_free_context, avcodec_parameters_alloc, avcodec_parameters_copy,
- avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, AVCodec, AVMediaType,
+ av_buffer_ref, av_buffer_unref, av_frame_move_ref, av_frame_unref, av_hwdevice_ctx_create,
+ av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3, avcodec_free_context,
+ avcodec_parameters_alloc, avcodec_parameters_copy, avcodec_parameters_free,
+ avcodec_parameters_to_context, AVBufferRef, AVCodec, AVFrame, AVMediaType,
},
frame, Codec, Packet, Rational,
};
@@ -109,6 +109,29 @@ const MAX_PROBE_PACKETS: usize = 256;
/// gives untrusted media a hard ceiling.
const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024;
+/// Hard cap on the number of side-data entries we tolerate per buffered
+/// packet. `av_packet_ref` allocates an `AVPacketSideData` descriptor and
+/// an `AVBufferRef` per entry, so a packet stuffed with many tiny or
+/// zero-sized entries can consume significant memory in descriptor /
+/// allocator overhead even after [`packet_side_data_bytes`] charges
+/// [`SIDE_DATA_ENTRY_OVERHEAD`] bytes per entry. Refusing to clone such
+/// packets short-circuits the descriptor explosion path.
+///
+/// Sized for legitimate streams (typical video packets carry 0-5 side-
+/// data entries; SEI-heavy HEVC/AV1 maybe a dozen) while comfortably
+/// rejecting weaponised input.
+const MAX_PROBE_PACKET_SIDE_DATA_ENTRIES: usize = 64;
+
+/// Conservative per-side-data-entry overhead estimate used by both
+/// [`packet_side_data_bytes`] and the budget accounting in
+/// [`VideoDecoder::send_packet`]. Counts the `AVPacketSideData`
+/// descriptor (24 bytes per the FFmpeg 8.x bindings), the `AVBufferRef`
+/// FFmpeg allocates per entry, and a margin for malloc bookkeeping
+/// (header bytes, alignment slack). Setting it on the high side keeps
+/// the byte cap a true upper bound on retained memory; under-charging
+/// would let many tiny entries slip past the cap.
+const SIDE_DATA_ENTRY_OVERHEAD: usize = 80;
+
/// Maximum number of CPU frames we are willing to queue from a candidate
/// during probe replay. Each frame is a fully-allocated CPU buffer
/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so
@@ -389,18 +412,32 @@ impl VideoDecoder {
if let Some(probe) = self.probe.as_mut() {
// `try_clone_packet` calls `av_packet_ref`, which deep-copies
// side data via `av_packet_copy_props`. The probe budget must
- // include side-data bytes or a malicious stream can keep
- // `packet.size()` tiny while attaching megabytes of side data
- // per packet and inflate retention beyond the advertised cap.
+ // include both descriptor + ref overhead per side-data entry
+ // (via `packet_side_data_bytes`) and a hard cap on the entry
+ // count itself — without the count cap, a packet stuffed with
+ // many tiny entries can dominate retained memory before the
+ // byte cap is even close to firing.
+ let side_count = packet_side_data_count(packet);
let pkt_size = packet.size().saturating_add(packet_side_data_bytes(packet));
let new_count = probe.buffered_packets.len() + 1;
let new_bytes = probe.buffered_bytes.saturating_add(pkt_size);
- if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES {
+ let entry_cap_exceeded = side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES;
+ if new_count > MAX_PROBE_PACKETS
+ || new_bytes > MAX_PROBE_PACKET_BYTES
+ || entry_cap_exceeded
+ {
tracing::warn!(
packets = new_count,
bytes = new_bytes,
+ side_data_entries = side_count,
max_packets = MAX_PROBE_PACKETS,
max_bytes = MAX_PROBE_PACKET_BYTES,
+ max_side_data_entries = MAX_PROBE_PACKET_SIDE_DATA_ENTRIES,
+ trigger = if entry_cap_exceeded {
+ "side_data_entry_cap"
+ } else {
+ "byte_or_packet_cap"
+ },
"hwdecode: probe window exceeded caps without first frame; \
abandoning fallback safety net"
);
@@ -1006,6 +1043,20 @@ impl Drop for PartialBuildState {
/// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination
/// first so reuse across resolution changes is safe.
+///
+/// Deliberately does **not** call `av_frame_copy_props`. That FFmpeg
+/// helper deep-copies AVFrame side data (SEI, mastering display, ICC
+/// profiles, dynamic HDR, etc.), the metadata dict, and bumps both
+/// `opaque_ref` and `private_ref` on every receive — none of which
+/// `Frame` exposes via its public accessors. On a crafted stream with
+/// megabytes of per-frame metadata that would mean an unbounded
+/// allocation per receive, with no caller-visible benefit. We instead
+/// copy only the scalar fields the public API can read (today: `pts`);
+/// pixel layout (`width`, `height`, `format`, `linesize`, `data`) is
+/// already set by `av_hwframe_transfer_data`. If `Frame` ever grows
+/// accessors for timing extras (`duration`, `time_base`, `pkt_dts`) or
+/// color metadata, add those to `copy_frame_props_minimal` at the same
+/// time.
unsafe fn transfer_hw_frame(
dst: &mut Frame,
src: &mut frame::Video,
@@ -1016,14 +1067,27 @@ unsafe fn transfer_hw_frame(
if ret < 0 {
return Err(ffmpeg_next::Error::from(ret));
}
- let ret = av_frame_copy_props(dst.as_inner_mut().as_mut_ptr(), src.as_ptr());
- if ret < 0 {
- return Err(ffmpeg_next::Error::from(ret));
- }
+ copy_frame_props_minimal(dst.as_inner_mut().as_mut_ptr(), src.as_ptr());
}
Ok(())
}
+/// Bounded substitute for `av_frame_copy_props`. Copies only the scalar
+/// AVFrame fields the public `Frame` API needs from `src` to `dst` —
+/// today just `pts`. Skips every allocating field (`av_dict_copy` for
+/// `metadata`, `av_frame_new_side_data` + memcpy for each `side_data[i]`,
+/// `av_buffer_replace` for `opaque_ref` / `private_ref`) so the cost is
+/// O(1) per frame regardless of what the source attaches.
+///
+/// # Safety
+/// Both pointers must be valid `AVFrame` pointers we own; field
+/// projection touches only POD scalars, no enums or buffer refs.
+unsafe fn copy_frame_props_minimal(dst: *mut AVFrame, src: *const AVFrame) {
+ unsafe {
+ (*dst).pts = (*src).pts;
+ }
+}
+
/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame`
/// and must not be treated as backend failures.
fn is_transient(e: &ffmpeg_next::Error) -> bool {
@@ -1181,8 +1245,12 @@ fn packet_side_data_bytes(packet: &Packet) -> usize {
if arr.is_null() || nel <= 0 {
return 0;
}
- let mut total: usize = 0;
- for i in 0..(nel as usize) {
+ let count = nel as usize;
+ // Descriptor + AVBufferRef + allocator overhead per entry — without
+ // this, a packet stuffed with many zero-size entries could slip past
+ // `MAX_PROBE_PACKET_BYTES` purely on descriptor cost.
+ let mut total = count.saturating_mul(SIDE_DATA_ENTRY_OVERHEAD);
+ for i in 0..count {
let entry = arr.add(i);
total = total.saturating_add((*entry).size);
}
@@ -1190,6 +1258,20 @@ fn packet_side_data_bytes(packet: &Packet) -> usize {
}
}
+/// Number of `AVPacketSideData` entries on `packet`. The probe buffer
+/// uses this to enforce [`MAX_PROBE_PACKET_SIDE_DATA_ENTRIES`] before
+/// cloning, so a packet whose entry count alone would dominate retained
+/// memory is rejected up front.
+fn packet_side_data_count(packet: &Packet) -> usize {
+ // SAFETY: side_data_elems is `c_int`, safe to read; clamp negatives to 0.
+ let nel = unsafe { (*packet.as_ptr()).side_data_elems };
+ if nel <= 0 {
+ 0
+ } else {
+ nel as usize
+ }
+}
+
/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine
/// distinguishes "drain output and retry" from "stream over").
fn is_eagain(e: &ffmpeg_next::Error) -> bool {
@@ -1266,25 +1348,20 @@ fn drain_into_pending(
});
}
let mut cpu = alloc_av_frame()?;
- // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data
- // allocates buffers on `cpu`. We deliberately defer
- // `av_frame_copy_props` until *after* the cap check below — that
- // call deep-copies every AVFrameSideData entry, which a malicious
- // stream can size in megabytes; allocating then discarding the
- // copies on cap rejection is wasted work and a real allocator
- // pressure source.
+ // SAFETY: hw_buf is a freshly-decoded HW frame;
+ // `av_hwframe_transfer_data` allocates pixel buffers on `cpu`.
+ // We use `copy_frame_props_minimal` (only `pts`) instead of
+ // `av_frame_copy_props` for the same reason as
+ // `transfer_hw_frame`: the public `Frame` API does not expose
+ // side data / metadata / opaque refs, so deep-copying them per
+ // frame is pure cost and an unbounded allocation source on
+ // attacker-controlled streams.
unsafe {
let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0);
if r1 < 0 {
return Err(ffmpeg_next::Error::from(r1));
}
}
- // Pre-copy_props accounting: size the frame's pixel storage and
- // its (yet-to-be-copied) side data. Both cap-hit and inability to
- // size the pixel layout are treated as candidate failures, so the
- // byte budget is *strict* — we never queue a frame we can't fully
- // account for, and we never pay the side-data deep copy on a
- // frame we'd immediately drop.
let pixel_bytes = match cpu_frame_bytes(&cpu) {
Some(b) => b,
None => {
@@ -1305,30 +1382,25 @@ fn drain_into_pending(
});
}
};
- let side_bytes = frame_side_data_bytes(hw_buf);
- let frame_bytes = pixel_bytes.saturating_add(side_bytes);
- let new_total = pending_bytes.saturating_add(frame_bytes);
+ let new_total = pending_bytes.saturating_add(pixel_bytes);
if new_total > max_bytes {
tracing::warn!(
pending_bytes = *pending_bytes,
pixel_bytes,
- side_bytes,
max_bytes,
- "hwdecode: queueing this frame (pixels + side data) would exceed byte cap; \
+ "hwdecode: queueing this frame would exceed byte cap; \
failing candidate replay"
);
- // cpu drops here without paying av_frame_copy_props.
+ // cpu drops here without ever paying a metadata deep copy.
return Err(ffmpeg_next::Error::Other {
errno: libc::ENOMEM,
});
}
- // Cap check passed — now safe to pay the side-data deep copy.
- // SAFETY: cpu and hw_buf are both valid AVFrames we own.
+ // Cap check passed — copy only the scalar AVFrame fields the
+ // public API needs. SAFETY: cpu and hw_buf are both valid
+ // AVFrames we own.
unsafe {
- let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr());
- if r2 < 0 {
- return Err(ffmpeg_next::Error::from(r2));
- }
+ copy_frame_props_minimal(cpu.as_mut_ptr(), hw_buf.as_ptr());
}
*pending_bytes = new_total;
pending.push_back(cpu);
@@ -1339,42 +1411,6 @@ fn drain_into_pending(
}
}
-/// Sum of `AVFrameSideData[i].size` across every entry attached to
-/// `frame`. `av_frame_copy_props` performs a deep copy of every side
-/// data buffer (allocates a fresh `AVBufferRef` per entry), so a
-/// candidate decoder that produces large per-frame metadata (HDR
-/// mastering display info, A53 closed captions, ICC profiles, dynamic
-/// HDR, motion vectors, …) would otherwise bypass the
-/// `max_probe_pending_bytes` cap because [`cpu_frame_bytes`] only
-/// accounts for pixel-plane storage.
-///
-/// Reads only the `size` field of each `AVFrameSideData` — never
-/// constructs the bindgen `AVFrameSideDataType` enum, so unknown side-
-/// data types from a future FFmpeg do not invoke UB.
-fn frame_side_data_bytes(frame: &frame::Video) -> usize {
- // SAFETY: AVFrame.side_data is `*mut *mut AVFrameSideData` and
- // nb_side_data is `c_int`; both are raw struct fields safe to read.
- // Field projection through the indirected pointer touches only the
- // primitive `usize` `.size` field (never `type_`).
- unsafe {
- let raw = frame.as_ptr();
- let nb = (*raw).nb_side_data;
- let arr = (*raw).side_data;
- if arr.is_null() || nb <= 0 {
- return 0;
- }
- let mut total: usize = 0;
- for i in 0..(nb as usize) {
- let entry = *arr.add(i);
- if entry.is_null() {
- continue;
- }
- total = total.saturating_add((*entry).size);
- }
- total
- }
-}
-
/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
/// plane_height` across populated planes.
///
@@ -1530,6 +1566,74 @@ mod tests {
fn packet_side_data_is_zero_when_no_side_data() {
let packet = Packet::new(64);
assert_eq!(packet_side_data_bytes(&packet), 0);
+ assert_eq!(packet_side_data_count(&packet), 0);
+ }
+
+ /// Packets with many tiny side-data entries must be charged the
+ /// per-entry descriptor + ref overhead, even when each entry's payload
+ /// `size` is zero. Without `SIDE_DATA_ENTRY_OVERHEAD`, a packet stuffed
+ /// with N zero-byte entries would charge 0 bytes against the budget
+ /// while `av_packet_ref` still allocates ~`N * 80` bytes of descriptor
+ /// + AVBufferRef + allocator overhead per cloned copy.
+ #[test]
+ fn packet_side_data_bytes_charges_descriptor_overhead_for_zero_size_entries() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ let mut packet = Packet::new(0);
+ // Attach two zero-byte entries of distinct types so neither call
+ // replaces the other.
+ let p1 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA,
+ 0,
+ )
+ };
+ let p2 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_PALETTE,
+ 0,
+ )
+ };
+ assert!(
+ !p1.is_null() && !p2.is_null(),
+ "av_packet_new_side_data NULL"
+ );
+
+ assert_eq!(packet_side_data_count(&packet), 2);
+ let bytes = packet_side_data_bytes(&packet);
+ assert!(
+ bytes >= 2 * SIDE_DATA_ENTRY_OVERHEAD,
+ "must charge descriptor overhead per entry even at zero payload; got {bytes}"
+ );
+ }
+
+ /// `MAX_PROBE_PACKET_SIDE_DATA_ENTRIES` is the cliff above which a
+ /// packet is rejected from the probe buffer regardless of byte total —
+ /// pure descriptor inflation is its own attack vector. Sanity-check
+ /// that `packet_side_data_count` reports the value the cap is checked
+ /// against.
+ #[test]
+ fn packet_side_data_count_reports_attached_entries() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ let mut packet = Packet::new(0);
+ let _p1 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA,
+ 4,
+ )
+ };
+ let _p2 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_PALETTE,
+ 4,
+ )
+ };
+ assert_eq!(packet_side_data_count(&packet), 2);
}
/// `cpu_frame_bytes` must refuse to size a frame whose first plane has
@@ -1591,78 +1695,6 @@ mod tests {
assert_eq!(cpu_frame_bytes(&f), Some(0));
}
- /// `av_frame_copy_props` deep-copies every AVFrameSideData attached
- /// to the source frame. `frame_side_data_bytes` must surface that
- /// retention so `drain_into_pending` can charge it against
- /// `max_probe_pending_bytes` — otherwise a stream with megabytes of
- /// per-frame metadata can queue up to `MAX_PROBE_PENDING_FRAMES`
- /// frames and overshoot the configured cap by orders of magnitude.
- #[test]
- fn frame_side_data_bytes_counts_attached_buffers() {
- use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType};
-
- const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB
-
- let mut f = frame::Video::empty();
- // SAFETY: f is freshly allocated; av_frame_new_side_data attaches a
- // fresh `SIDE_DATA_SIZE`-byte buffer of the requested type and returns
- // a pointer to the entry (or NULL on OOM).
- let p = unsafe {
- av_frame_new_side_data(
- f.as_mut_ptr(),
- AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED,
- SIDE_DATA_SIZE,
- )
- };
- assert!(!p.is_null(), "av_frame_new_side_data returned NULL");
-
- let bytes = frame_side_data_bytes(&f);
- assert!(
- bytes >= SIDE_DATA_SIZE,
- "side-data accounting must include the attached buffer; got {bytes}"
- );
- }
-
- #[test]
- fn frame_side_data_bytes_is_zero_for_bare_frame() {
- let f = frame::Video::empty();
- assert_eq!(frame_side_data_bytes(&f), 0);
- }
-
- /// Multiple side-data entries must be summed, not just the first.
- #[test]
- fn frame_side_data_bytes_sums_all_entries() {
- use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType};
-
- const ENTRY_A: usize = 256 * 1024; // 256 KiB
- const ENTRY_B: usize = 512 * 1024; // 512 KiB
-
- let mut f = frame::Video::empty();
- // Two distinct types so neither call replaces the other.
- let p1 = unsafe {
- av_frame_new_side_data(
- f.as_mut_ptr(),
- AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED,
- ENTRY_A,
- )
- };
- let p2 = unsafe {
- av_frame_new_side_data(
- f.as_mut_ptr(),
- AVFrameSideDataType::AV_FRAME_DATA_A53_CC,
- ENTRY_B,
- )
- };
- assert!(!p1.is_null() && !p2.is_null());
-
- let bytes = frame_side_data_bytes(&f);
- assert!(
- bytes >= ENTRY_A + ENTRY_B,
- "must sum across all side-data entries; got {bytes}, expected at least {}",
- ENTRY_A + ENTRY_B
- );
- }
-
/// `PartialBuildState`'s `Drop` must be a no-op when both pointers are
/// null — the disarmed-by-`into_owned` post-state. A panic / double-free
/// here would break the success path of every `build_state` call.
From 1dffd6fd82156316159b1bac94e0a56d613f1f0e Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 17:09:52 +1200
Subject: [PATCH 26/27] update
---
src/decoder.rs | 210 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 145 insertions(+), 65 deletions(-)
diff --git a/src/decoder.rs b/src/decoder.rs
index 81db18a..46a099a 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -410,63 +410,84 @@ impl VideoDecoder {
match self.state.inner.send_packet(packet) {
Ok(()) => {
if let Some(probe) = self.probe.as_mut() {
- // `try_clone_packet` calls `av_packet_ref`, which deep-copies
- // side data via `av_packet_copy_props`. The probe budget must
- // include both descriptor + ref overhead per side-data entry
- // (via `packet_side_data_bytes`) and a hard cap on the entry
- // count itself — without the count cap, a packet stuffed with
- // many tiny entries can dominate retained memory before the
- // byte cap is even close to firing.
+ // Step 1: reject by side-data entry count BEFORE walking the
+ // side-data array for byte accounting. `packet_side_data_bytes`
+ // dereferences each `AVPacket.side_data[i]` based on the
+ // FFmpeg-supplied `side_data_elems`; if that integer is
+ // corrupt or weaponised we don't want to walk it from the
+ // safe `send_packet` path. The byte helper still clamps its
+ // own walk to the cap as defense-in-depth, but checking the
+ // count first short-circuits the descriptor-explosion case
+ // entirely.
let side_count = packet_side_data_count(packet);
- let pkt_size = packet.size().saturating_add(packet_side_data_bytes(packet));
- let new_count = probe.buffered_packets.len() + 1;
- let new_bytes = probe.buffered_bytes.saturating_add(pkt_size);
- let entry_cap_exceeded = side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES;
- if new_count > MAX_PROBE_PACKETS
- || new_bytes > MAX_PROBE_PACKET_BYTES
- || entry_cap_exceeded
- {
+ if side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES {
tracing::warn!(
- packets = new_count,
- bytes = new_bytes,
side_data_entries = side_count,
- max_packets = MAX_PROBE_PACKETS,
- max_bytes = MAX_PROBE_PACKET_BYTES,
max_side_data_entries = MAX_PROBE_PACKET_SIDE_DATA_ENTRIES,
- trigger = if entry_cap_exceeded {
- "side_data_entry_cap"
- } else {
- "byte_or_packet_cap"
- },
- "hwdecode: probe window exceeded caps without first frame; \
- abandoning fallback safety net"
+ trigger = "side_data_entry_cap",
+ "hwdecode: packet side-data entry count exceeds cap; \
+ abandoning fallback safety net without byte accounting"
);
- // Abandon the *future* probe-buffering only. `pending_frames`
- // belong to the currently active backend (possibly the
- // candidate `advance_probe` committed earlier in this same
- // `send_packet` call) and are valid output the caller will
- // dequeue via `receive_frame`. Clearing them here would
- // silently drop initial frames at exactly the cap-overflow /
- // OOM-stress paths.
+ // Abandon the *future* probe-buffering only — see the byte/
+ // packet cap branch below for why `pending_frames` survives.
self.probe = None;
} else {
- // Use the checked clone — ffmpeg-next's `Packet::clone`
- // discards av_packet_ref's return value and would silently
- // store an empty packet on ENOMEM, corrupting future replay.
- match try_clone_packet(packet) {
- Ok(cloned) => {
- probe.buffered_packets.push(cloned);
- probe.buffered_bytes = new_bytes;
- }
- Err(e) => {
- tracing::warn!(
- error = %e,
- "hwdecode: packet clone failed for probe history; abandoning fallback safety net"
- );
- // Same reasoning as the cap-overflow branch above:
- // `pending_frames` are owned by the active backend, not
- // the probe buffer, so they survive abandonment.
- self.probe = None;
+ // Step 2: now safe to compute byte budget — `side_count`
+ // is bounded.
+ //
+ // `try_clone_packet` calls `av_packet_ref`, which deep-copies
+ // side data via `av_packet_copy_props`. The probe budget
+ // must include descriptor + ref overhead per side-data
+ // entry (via `packet_side_data_bytes`); without it, a
+ // packet stuffed with many tiny entries can dominate
+ // retained memory before the byte cap is even close to
+ // firing.
+ let pkt_size = packet.size().saturating_add(packet_side_data_bytes(
+ packet,
+ MAX_PROBE_PACKET_SIDE_DATA_ENTRIES,
+ ));
+ let new_count = probe.buffered_packets.len() + 1;
+ let new_bytes = probe.buffered_bytes.saturating_add(pkt_size);
+ if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES {
+ tracing::warn!(
+ packets = new_count,
+ bytes = new_bytes,
+ side_data_entries = side_count,
+ max_packets = MAX_PROBE_PACKETS,
+ max_bytes = MAX_PROBE_PACKET_BYTES,
+ trigger = "byte_or_packet_cap",
+ "hwdecode: probe window exceeded caps without first frame; \
+ abandoning fallback safety net"
+ );
+ // Abandon the *future* probe-buffering only.
+ // `pending_frames` belong to the currently active backend
+ // (possibly the candidate `advance_probe` committed
+ // earlier in this same `send_packet` call) and are valid
+ // output the caller will dequeue via `receive_frame`.
+ // Clearing them here would silently drop initial frames
+ // at exactly the cap-overflow / OOM-stress paths.
+ self.probe = None;
+ } else {
+ // Use the checked clone — ffmpeg-next's `Packet::clone`
+ // discards av_packet_ref's return value and would
+ // silently store an empty packet on ENOMEM, corrupting
+ // future replay.
+ match try_clone_packet(packet) {
+ Ok(cloned) => {
+ probe.buffered_packets.push(cloned);
+ probe.buffered_bytes = new_bytes;
+ }
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: packet clone failed for probe history; \
+ abandoning fallback safety net"
+ );
+ // Same reasoning as the cap-overflow branch above:
+ // `pending_frames` are owned by the active backend,
+ // not the probe buffer, so they survive abandonment.
+ self.probe = None;
+ }
}
}
}
@@ -1222,18 +1243,27 @@ fn try_clone_packet(src: &Packet) -> std::result::Result usize {
+fn packet_side_data_bytes(packet: &Packet, max_entries: usize) -> usize {
// SAFETY: AVPacket.side_data is `*mut AVPacketSideData` and
// side_data_elems is `c_int`; both are raw struct fields safe to read.
// Field projection (`.size`) does not reconstruct the enum-typed `type_`
@@ -1242,13 +1272,10 @@ fn packet_side_data_bytes(packet: &Packet) -> usize {
let raw = packet.as_ptr();
let nel = (*raw).side_data_elems;
let arr = (*raw).side_data;
- if arr.is_null() || nel <= 0 {
+ if arr.is_null() || nel <= 0 || max_entries == 0 {
return 0;
}
- let count = nel as usize;
- // Descriptor + AVBufferRef + allocator overhead per entry — without
- // this, a packet stuffed with many zero-size entries could slip past
- // `MAX_PROBE_PACKET_BYTES` purely on descriptor cost.
+ let count = (nel as usize).min(max_entries);
let mut total = count.saturating_mul(SIDE_DATA_ENTRY_OVERHEAD);
for i in 0..count {
let entry = arr.add(i);
@@ -1550,7 +1577,7 @@ mod tests {
assert!(!p.is_null(), "av_packet_new_side_data returned NULL");
assert_eq!(packet.size(), PAYLOAD_SIZE);
- let side = packet_side_data_bytes(&packet);
+ let side = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES);
assert!(
side >= SIDE_DATA_SIZE,
"side-data accounting must include the attached buffer; got {side}"
@@ -1565,7 +1592,10 @@ mod tests {
#[test]
fn packet_side_data_is_zero_when_no_side_data() {
let packet = Packet::new(64);
- assert_eq!(packet_side_data_bytes(&packet), 0);
+ assert_eq!(
+ packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES),
+ 0
+ );
assert_eq!(packet_side_data_count(&packet), 0);
}
@@ -1602,13 +1632,63 @@ mod tests {
);
assert_eq!(packet_side_data_count(&packet), 2);
- let bytes = packet_side_data_bytes(&packet);
+ let bytes = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES);
assert!(
bytes >= 2 * SIDE_DATA_ENTRY_OVERHEAD,
"must charge descriptor overhead per entry even at zero payload; got {bytes}"
);
}
+ /// `packet_side_data_bytes` must clamp its walk to `max_entries`
+ /// regardless of `side_data_elems`. Defense-in-depth: the caller is
+ /// expected to short-circuit packets whose count exceeds the cap, but
+ /// if a corrupt or weaponised packet ever does reach the helper, the
+ /// internal cap prevents an unbounded raw-pointer walk.
+ ///
+ /// This test attaches 5 entries of distinct types and asks the helper
+ /// to walk only the first 2. Result must equal exactly `2 * overhead +
+ /// (size_a + size_b)`, confirming entries 3-5 were not even read.
+ #[test]
+ fn packet_side_data_bytes_respects_max_entries_cap() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ let mut packet = Packet::new(0);
+ // Five distinct side-data types so each `av_packet_new_side_data`
+ // call appends rather than replaces.
+ let types_and_sizes: [(AVPacketSideDataType, usize); 5] = [
+ (AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, 100),
+ (AVPacketSideDataType::AV_PKT_DATA_PALETTE, 200),
+ (AVPacketSideDataType::AV_PKT_DATA_REPLAYGAIN, 300),
+ (AVPacketSideDataType::AV_PKT_DATA_DISPLAYMATRIX, 400),
+ (AVPacketSideDataType::AV_PKT_DATA_STEREO3D, 500),
+ ];
+ for (ty, size) in types_and_sizes {
+ let p = unsafe { av_packet_new_side_data(packet.as_mut_ptr(), ty, size) };
+ assert!(!p.is_null(), "av_packet_new_side_data returned NULL");
+ }
+ assert_eq!(packet_side_data_count(&packet), 5);
+
+ let walked_2 = packet_side_data_bytes(&packet, 2);
+ let walked_5 = packet_side_data_bytes(&packet, 5);
+
+ assert_eq!(
+ walked_2,
+ 2 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200,
+ "max_entries=2 must walk exactly the first two entries"
+ );
+ assert_eq!(
+ walked_5,
+ 5 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200 + 300 + 400 + 500,
+ "max_entries=5 must walk all five entries"
+ );
+ // max_entries=0 short-circuits to 0.
+ assert_eq!(packet_side_data_bytes(&packet, 0), 0);
+ // max_entries larger than the actual count clamps to the actual count
+ // (no out-of-bounds walk past `side_data_elems`).
+ let walked_huge = packet_side_data_bytes(&packet, 1_000_000);
+ assert_eq!(walked_huge, walked_5);
+ }
+
/// `MAX_PROBE_PACKET_SIDE_DATA_ENTRIES` is the cliff above which a
/// packet is rejected from the probe buffer regardless of byte total —
/// pure descriptor inflation is its own attack vector. Sanity-check
From 62c6ff139bff15823cd7fa8db6803d4354835203 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Mon, 27 Apr 2026 17:30:03 +1200
Subject: [PATCH 27/27] update
---
src/decoder.rs | 169 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 169 insertions(+)
diff --git a/src/decoder.rs b/src/decoder.rs
index 46a099a..a241b68 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -132,6 +132,29 @@ const MAX_PROBE_PACKET_SIDE_DATA_ENTRIES: usize = 64;
/// would let many tiny entries slip past the cap.
const SIDE_DATA_ENTRY_OVERHEAD: usize = 80;
+/// Conservative upper-bound bytes-per-pixel multiplier used to estimate
+/// the size of a CPU frame **before** `av_hwframe_transfer_data`
+/// allocates its pixel buffers. Covers every HW download format this
+/// crate produces (worst case is `P416LE` / `P412LE` at 6 bytes/pixel
+/// for 16-bit 4:4:4 semi-planar) plus a margin for FFmpeg's per-row
+/// stride alignment (typically 32-byte aligned, ~5% extra at HD widths
+/// and below).
+///
+/// Used by [`drain_into_pending`] as a pre-transfer guard: if the
+/// product `width * height * WORST_CASE_BYTES_PER_PIXEL` would already
+/// push `pending_bytes` past `max_probe_pending_bytes`, the candidate
+/// replay refuses the frame *before* allocating. Without this, FFmpeg
+/// would perform the full HW→CPU download (potentially ~100 MiB for
+/// 8K HDR) and we would only reject the frame after RSS had already
+/// spiked. The post-transfer accounting via [`cpu_frame_bytes`] stays in
+/// place as a backstop using the frame's actual stride/format.
+///
+/// Slightly over-charges true 4:2:0 NV12 / P010 frames (which dominate
+/// real workloads) — that's the right side to err on. Callers feeding
+/// 8K+ workloads through the probe path can tune
+/// [`VideoDecoder::with_max_probe_pending_bytes`] upward to compensate.
+const WORST_CASE_BYTES_PER_PIXEL: usize = 8;
+
/// Maximum number of CPU frames we are willing to queue from a candidate
/// during probe replay. Each frame is a fully-allocated CPU buffer
/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so
@@ -1374,6 +1397,54 @@ fn drain_into_pending(
errno: libc::ENOMEM,
});
}
+ // Pre-transfer size guard: `av_hwframe_transfer_data` will
+ // allocate the CPU buffer based on `hw_buf`'s dimensions. If a
+ // single frame's worst-case footprint already pushes past the
+ // cap, refuse the candidate **before** allocating so RSS does
+ // not spike on a frame we'd immediately drop. Uses a width *
+ // height * `WORST_CASE_BYTES_PER_PIXEL` upper bound; the
+ // post-transfer accounting via `cpu_frame_bytes` below stays in
+ // place as a backstop using the actual stride/format.
+ let estimated_bytes = match estimate_transfer_bytes(hw_buf) {
+ Some(b) => b,
+ None => {
+ // SAFETY: AVFrame.width/height are c_int reads.
+ let (w, h) = unsafe {
+ let raw = hw_buf.as_ptr();
+ ((*raw).width, (*raw).height)
+ };
+ tracing::warn!(
+ width = w,
+ height = h,
+ "hwdecode: HW frame dimensions invalid for sizing; failing candidate replay"
+ );
+ unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ };
+ let estimated_total = pending_bytes.saturating_add(estimated_bytes);
+ if estimated_total > max_bytes {
+ // SAFETY: AVFrame.width/height are c_int reads.
+ let (w, h) = unsafe {
+ let raw = hw_buf.as_ptr();
+ ((*raw).width, (*raw).height)
+ };
+ tracing::warn!(
+ pending_bytes = *pending_bytes,
+ estimated_bytes,
+ width = w,
+ height = h,
+ max_bytes = max_bytes,
+ "hwdecode: pre-transfer size estimate exceeds cap; \
+ refusing candidate replay before allocating CPU frame"
+ );
+ unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
let mut cpu = alloc_av_frame()?;
// SAFETY: hw_buf is a freshly-decoded HW frame;
// `av_hwframe_transfer_data` allocates pixel buffers on `cpu`.
@@ -1438,6 +1509,33 @@ fn drain_into_pending(
}
}
+/// Conservative upper-bound estimate of the bytes
+/// `av_hwframe_transfer_data` will allocate when downloading `hw_buf` to
+/// a CPU frame. Used by [`drain_into_pending`] as a pre-transfer guard
+/// so a candidate replay can refuse a frame whose footprint would
+/// exceed the byte budget *without* first paying the allocation. The
+/// estimate is `width * height * WORST_CASE_BYTES_PER_PIXEL` — see that
+/// constant for why we err on the high side.
+///
+/// Returns `None` when the frame's `width` or `height` are not strictly
+/// positive (caller treats as candidate failure — a HW frame with
+/// non-positive dimensions cannot be transferred meaningfully).
+fn estimate_transfer_bytes(hw_buf: &frame::Video) -> Option {
+ // SAFETY: AVFrame.width / height are c_int reads.
+ let (w, h) = unsafe {
+ let raw = hw_buf.as_ptr();
+ ((*raw).width, (*raw).height)
+ };
+ if w <= 0 || h <= 0 {
+ return None;
+ }
+ Some(
+ (w as usize)
+ .saturating_mul(h as usize)
+ .saturating_mul(WORST_CASE_BYTES_PER_PIXEL),
+ )
+}
+
/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
/// plane_height` across populated planes.
///
@@ -1775,6 +1873,77 @@ mod tests {
assert_eq!(cpu_frame_bytes(&f), Some(0));
}
+ /// `estimate_transfer_bytes` is the pre-transfer size guard for
+ /// `drain_into_pending`: it must compute `width * height *
+ /// WORST_CASE_BYTES_PER_PIXEL` so the candidate replay can refuse a
+ /// frame *before* `av_hwframe_transfer_data` allocates.
+ #[test]
+ fn estimate_transfer_bytes_uses_worst_case_per_pixel() {
+ let mut f = frame::Video::empty();
+ // SAFETY: f is freshly allocated; we set width/height directly.
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).width = 1920;
+ (*raw).height = 1080;
+ }
+ assert_eq!(
+ estimate_transfer_bytes(&f),
+ Some(1920 * 1080 * WORST_CASE_BYTES_PER_PIXEL),
+ );
+ }
+
+ /// Non-positive dimensions surface as `None` so `drain_into_pending`
+ /// fails the candidate before allocating anything. A zero-width or
+ /// zero-height frame would silently yield a 0-byte estimate under the
+ /// raw multiplication, letting the cap check pass and exposing the
+ /// allocation path to whatever the actual transfer would do.
+ #[test]
+ fn estimate_transfer_bytes_rejects_non_positive_dimensions() {
+ let mut f = frame::Video::empty();
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).width = 0;
+ (*raw).height = 1080;
+ }
+ assert!(estimate_transfer_bytes(&f).is_none());
+
+ unsafe {
+ (*f.as_mut_ptr()).width = 1920;
+ (*f.as_mut_ptr()).height = -1;
+ }
+ assert!(estimate_transfer_bytes(&f).is_none());
+ }
+
+ /// 8K HDR P010 has actual ~96 MiB resident size; the estimate should
+ /// over-charge it (the right side to err on for a memory cap) while
+ /// still fitting within the configurable
+ /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`] cap (256 MiB) for a single
+ /// frame so a default-configured decoder is not forced to reject 8K
+ /// streams outright.
+ #[test]
+ fn estimate_transfer_bytes_8k_fits_default_cap() {
+ let mut f = frame::Video::empty();
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).width = 7680;
+ (*raw).height = 4320;
+ }
+ let estimate = estimate_transfer_bytes(&f).expect("8K is sizable");
+ // ~256 MiB exactly — at-or-just-under the default cap.
+ assert!(
+ estimate <= DEFAULT_MAX_PROBE_PENDING_BYTES,
+ "8K estimate {estimate} must fit DEFAULT_MAX_PROBE_PENDING_BYTES \
+ {DEFAULT_MAX_PROBE_PENDING_BYTES}; otherwise the default cap rejects \
+ even a single 8K frame at probe time"
+ );
+ // And strictly larger than a typical 8K P010 (~96 MiB) so the guard
+ // is actually conservative, not under-charging.
+ assert!(
+ estimate > 96 * 1024 * 1024,
+ "estimate must over-charge real 8K P010 to bound the worst case; got {estimate}"
+ );
+ }
+
/// `PartialBuildState`'s `Drop` must be a no-op when both pointers are
/// null — the disarmed-by-`into_owned` post-state. A panic / double-free
/// here would break the success path of every `build_state` call.