From 87d9cdc10662d2d944349d642700e998ce217b8c Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 14:56:14 +1200 Subject: [PATCH 01/27] update --- CHANGELOG.md | 7 -- Cargo.toml | 40 +++---- README-zh_CN.md | 51 --------- README.md | 102 +++++++++++------ benches/decode.rs | 114 +++++++++++++++++++ benches/foo.rs | 1 - docs/design.md | 154 +++++++++++++++++++++++++ examples/decode.rs | 74 ++++++++++++ examples/foo.rs | 1 - src/backend.rs | 137 ++++++++++++++++++++++ src/decoder.rs | 278 +++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 37 ++++++ src/ffi.rs | 70 ++++++++++++ src/lib.rs | 26 ++++- tests/decode.rs | 68 +++++++++++ tests/foo.rs | 1 - tests/hw_smoke.rs | 64 +++++++++++ 17 files changed, 1106 insertions(+), 119 deletions(-) delete mode 100644 CHANGELOG.md delete mode 100644 README-zh_CN.md create mode 100644 benches/decode.rs delete mode 100644 benches/foo.rs create mode 100644 docs/design.md create mode 100644 examples/decode.rs delete mode 100644 examples/foo.rs create mode 100644 src/backend.rs create mode 100644 src/decoder.rs create mode 100644 src/error.rs create mode 100644 src/ffi.rs create mode 100644 tests/decode.rs delete mode 100644 tests/foo.rs create mode 100644 tests/hw_smoke.rs diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index bd7a668..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,7 +0,0 @@ -# UNRELEASED - -# 0.1.2 (January 6th, 2022) - -FEATURES - - diff --git a/Cargo.toml b/Cargo.toml index ff7fe91..8e4ea79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,35 +1,37 @@ [package] -name = "template-rs" +name = "hwdecode" version = "0.0.0" edition = "2021" -repository = "https://github.com/al8n/template-rs" -homepage = "https://github.com/al8n/template-rs" -documentation = "https://docs.rs/template-rs" -description = "A template for creating Rust open-source repo on GitHub" +rust-version = "1.95" +description = "Cross-platform hardware-accelerated video decoder built on top of ffmpeg-next, with auto-probe and software fallback." +repository = "https://github.com/findit-ai/hwdecode" +homepage = "https://github.com/findit-ai/hwdecode" +documentation = "https://docs.rs/hwdecode" license = "MIT OR Apache-2.0" -rust-version = "1.73" - -[[bench]] -path = "benches/foo.rs" -name = "foo" -harness = false - -[features] -default = ["std"] -alloc = [] -std = [] [dependencies] +ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] } +thiserror = "2" +tracing = "0.1" +libc = "0.2" [dev-dependencies] criterion = "0.8" -tempfile = "3" + +[[example]] +name = "decode" +path = "examples/decode.rs" + +[[bench]] +name = "decode" +path = "benches/decode.rs" +harness = false [profile.bench] opt-level = 3 debug = false codegen-units = 1 -lto = 'thin' +lto = "thin" incremental = false debug-assertions = false overflow-checks = false @@ -41,8 +43,6 @@ rustdoc-args = ["--cfg", "docsrs"] [lints.rust] rust_2018_idioms = "warn" -single_use_lifetimes = "warn" unexpected_cfgs = { level = "warn", check-cfg = [ - 'cfg(all_tests)', 'cfg(tarpaulin)', ] } diff --git a/README-zh_CN.md b/README-zh_CN.md deleted file mode 100644 index 7a07f4d..0000000 --- a/README-zh_CN.md +++ /dev/null @@ -1,51 +0,0 @@ -
-

template-rs

-
-
- -开源Rust代码库GitHub模版 - -[github][Github-url] -LoC -[Build][CI-url] -[codecov][codecov-url] - -[docs.rs][doc-url] -[crates.io][crates-url] -[crates.io][crates-url] -license - -[English][en-url] | 简体中文 - -
- -## Installation - -```toml -[dependencies] -template_rs = "0.1" -``` - -## Features - -- [x] 更快的创建GitHub开源Rust代码库 - -#### License - -`Template-rs` is under the terms of both the MIT license and the -Apache License (Version 2.0). - -See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details. - -Copyright (c) 2021 Al Liu. - -[Github-url]: https://github.com/al8n/template-rs/ -[CI-url]: https://github.com/al8n/template/actions/workflows/template.yml -[doc-url]: https://docs.rs/template-rs -[crates-url]: https://crates.io/crates/template-rs -[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/ -[license-url]: https://opensource.org/licenses/Apache-2.0 -[rustc-url]: https://github.com/rust-lang/rust/blob/master/RELEASES.md -[license-apache-url]: https://opensource.org/licenses/Apache-2.0 -[license-mit-url]: https://opensource.org/licenses/MIT -[en-url]: https://github.com/al8n/template-rs/tree/main/README.md diff --git a/README.md b/README.md index 1af27e2..bcfb058 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,84 @@ -
-

template-rs

-
-
+# hwdecode -A template for creating Rust open-source GitHub repo. +Cross-platform hardware-accelerated video decoder for Rust, built on top of +[`ffmpeg-next`](https://crates.io/crates/ffmpeg-next). -[github][Github-url] -LoC -[Build][CI-url] -[codecov][codecov-url] +`VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of +`ffmpeg::decoder::Video` and silently picks the best hardware backend for the +host platform, falling back to software if none open. Output frames are +CPU-side — for HW backends they are downloaded with `av_hwframe_transfer_data` +(NV12 for 8-bit, P010 for 10-bit). Pixel-format conversion is intentionally +out of scope. -[docs.rs][doc-url] -[crates.io][crates-url] -[crates.io][crates-url] -license +## Backends -English | [简体中文][zh-cn-url] +| Target | Probe order | +| ------------------- | --------------------------------- | +| macOS / iOS / tvOS | VideoToolbox → Software | +| Linux | VAAPI → CUDA → Software | +| Windows | D3D11VA → CUDA → Software | +| other | Software | -
+## Usage -## Installation +```rust +use ffmpeg_next as ffmpeg; +use ffmpeg::{format, frame, media}; +use hwdecode::VideoDecoder; -```toml -[dependencies] -template_rs = "0.1" +ffmpeg::init()?; + +let mut input = format::input(path)?; +let stream = input.streams().best(media::Type::Video).unwrap(); +let stream_index = stream.index(); + +let mut decoder = VideoDecoder::open(stream.parameters())?; +println!("backend = {:?}", decoder.backend()); + +let mut frame = frame::Video::empty(); +for (s, packet) in input.packets() { + if s.index() != stream_index { continue; } + decoder.send_packet(&packet)?; + while decoder.receive_frame(&mut frame).is_ok() { + // frame.format() is NV12 / P010 (HW path) or codec-native (SW path) + // ... do something with frame ... + } +} +decoder.send_eof()?; +while decoder.receive_frame(&mut frame).is_ok() { + // ... drain ... +} ``` -## Features -- [x] Create a Rust open-source repo fast +To force a specific backend (no probe, no fallback): + +```rust +use hwdecode::{Backend, VideoDecoder}; +let decoder = VideoDecoder::open_with(parameters, Backend::Software)?; +``` + +## Running tests and benches + +The integration test and benchmark expect a real video file. Set +`HWDECODE_SAMPLE_VIDEO` to enable them: + +```sh +HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test +HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored +HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench +``` -#### License +Without the env var the integration test skips with a notice; unit tests run +unconditionally. -`template-rs` is under the terms of both the MIT license and the -Apache License (Version 2.0). +## Build requirements -See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details. +- A system FFmpeg ≥ 4.x linkable via `pkg-config`. Verify with + `ffmpeg -hwaccels` that your build has the backends you expect compiled in + (e.g. `videotoolbox` on macOS, `vaapi` / `cuda` on Linux, + `d3d11va` / `cuda` on Windows). +- Rust ≥ 1.95. -Copyright (c) 2021 Al Liu. +## License -[Github-url]: https://github.com/al8n/template-rs/ -[CI-url]: https://github.com/al8n/template-rs/actions/workflows/ci.yml -[doc-url]: https://docs.rs/template-rs -[crates-url]: https://crates.io/crates/template-rs -[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/ -[zh-cn-url]: https://github.com/al8n/template-rs/tree/main/README-zh_CN.md +MIT or Apache-2.0, at your option. diff --git a/benches/decode.rs b/benches/decode.rs new file mode 100644 index 0000000..be7281d --- /dev/null +++ b/benches/decode.rs @@ -0,0 +1,114 @@ +//! Benchmark comparing software-only decode against the auto-probed +//! hardware backend on the same input file. +//! +//! Set `HWDECODE_SAMPLE_VIDEO` to a video file path. The hardware bench is +//! skipped (with a notice) when the auto-probe falls back to software. +//! +//! ```sh +//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench +//! ``` + +use std::{path::PathBuf, time::Duration}; + +use criterion::{criterion_group, criterion_main, Criterion}; +use ffmpeg::{format, frame, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::{Backend, VideoDecoder}; + +const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; + +fn sample_path() -> Option { + std::env::var_os(SAMPLE_ENV).map(PathBuf::from) +} + +/// Decode every video frame in the file using `decoder`, returning the count. +/// Re-opens the input each call so each iteration measures a full decode pass. +fn decode_all(path: &PathBuf, backend: Backend) -> Result { + let mut input = format::input(path).map_err(hwdecode::Error::Ffmpeg)?; + let stream = input + .streams() + .best(media::Type::Video) + .ok_or(hwdecode::Error::Ffmpeg(ffmpeg::Error::StreamNotFound))?; + let stream_index = stream.index(); + + let mut decoder = match backend { + Backend::Software => VideoDecoder::open_with(stream.parameters(), Backend::Software)?, + _ => VideoDecoder::open(stream.parameters())?, + }; + + let mut frame = frame::Video::empty(); + let mut count = 0_usize; + + let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> { + loop { + match decoder.receive_frame(&mut frame) { + Ok(()) => *count += 1, + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + return Ok(()); + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => return Ok(()), + Err(e) => return Err(e), + } + } + }; + + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet)?; + drain(&mut decoder, &mut count)?; + } + decoder.send_eof()?; + drain(&mut decoder, &mut count)?; + Ok(count) +} + +fn bench_decode(c: &mut Criterion) { + ffmpeg::init().expect("ffmpeg init"); + + let Some(path) = sample_path() else { + eprintln!("skipping benches: set {SAMPLE_ENV} to a video file path"); + return; + }; + + // Probe backend once to print which HW backend (if any) we'd be benching. + let probed_backend = { + let input = format::input(&path).expect("open input"); + let stream = input + .streams() + .best(media::Type::Video) + .expect("video stream"); + let dec = VideoDecoder::open(stream.parameters()).expect("auto-probe"); + let b = dec.backend(); + drop(dec); + b + }; + eprintln!("auto-probe selected backend: {probed_backend:?}"); + + let mut group = c.benchmark_group("decode"); + group.measurement_time(Duration::from_secs(15)); + group.sample_size(20); + + group.bench_function("software", |b| { + b.iter(|| decode_all(&path, Backend::Software).expect("software decode")) + }); + + if probed_backend != Backend::Software { + group.bench_function("hardware", |b| { + b.iter(|| { + let n = decode_all(&path, probed_backend).expect("hardware decode"); + std::hint::black_box(n); + }) + }); + } else { + eprintln!("skipping hardware bench: auto-probe fell back to Software"); + } + + group.finish(); +} + +criterion_group!(benches, bench_decode); +criterion_main!(benches); diff --git a/benches/foo.rs b/benches/foo.rs deleted file mode 100644 index f328e4d..0000000 --- a/benches/foo.rs +++ /dev/null @@ -1 +0,0 @@ -fn main() {} diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..056bb4f --- /dev/null +++ b/docs/design.md @@ -0,0 +1,154 @@ +# hwdecode — design + +Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next` 8.1. + +## Goals + +- Drop-in replacement for `ffmpeg::decoder::Video` at the call site (`send_packet` / `receive_frame` / `send_eof` / `flush`). +- Auto-probe the platform's hardware backends and silently fall back to software if none open. Caller never has to think about hwaccel availability. +- Hand back native-format CPU frames (NV12/P010 from the HW path, codec-native from the SW path). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`). +- Cross-platform: macOS / iOS / iPadOS / tvOS, Linux (Intel/AMD/NVIDIA), Windows (any GPU + CUDA on NVIDIA). + +## Non-goals + +- Audio hardware decoding. Out of scope; software AAC/Opus/etc. is fast enough that the complexity isn't justified. +- Demuxing. Callers open files/streams themselves (e.g. via `findit-demuxer`) and feed packets in. +- Pixel-format conversion. Done downstream (`colconv`). +- Encoding. + +## Public API + +```rust +pub struct VideoDecoder { /* private */ } + +impl VideoDecoder { + /// Auto-probe HW backends in platform order; fall back to software. + /// On success, `backend()` reports the one that won. + pub fn open(parameters: ffmpeg::codec::Parameters) -> Result; + + /// Force a specific backend. No probe, no fallback. + pub fn open_with(parameters: ffmpeg::codec::Parameters, backend: Backend) -> Result; + + pub fn backend(&self) -> Backend; + pub fn width(&self) -> u32; + pub fn height(&self) -> u32; + pub fn format(&self) -> ffmpeg::format::Pixel; + pub fn time_base(&self) -> ffmpeg::Rational; + pub fn frame_rate(&self) -> ffmpeg::Rational; + + pub fn send_packet(&mut self, packet: &ffmpeg::Packet) -> Result<(), Error>; + pub fn send_eof(&mut self) -> Result<(), Error>; + + /// Receive a CPU-side frame. For HW backends, internally calls + /// `av_hwframe_transfer_data` and copies PTS/timing onto the result; + /// output format is NV12 (8-bit) or P010 (10-bit). For SW, the frame + /// is in the codec's native format. + pub fn receive_frame(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), Error>; + + pub fn flush(&mut self); +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Backend { + Software, + VideoToolbox, // macOS, iOS, iPadOS, tvOS + Vaapi, // Linux (Intel/AMD) + Cuda, // Linux/Windows (NVIDIA) + D3d11va, // Windows +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("ffmpeg error: {0}")] + Ffmpeg(#[from] ffmpeg::Error), + #[error("no decoder for codec id {0:?}")] + NoCodec(ffmpeg::codec::Id), + #[error("hardware device init failed for {backend:?}: {source}")] + HwDeviceInitFailed { backend: Backend, source: ffmpeg::Error }, + #[error("all backends failed; attempts: {attempts:?}")] + AllBackendsFailed { attempts: Vec<(Backend, ffmpeg::Error)> }, +} +``` + +## Behavior + +### Probe order + +| Target | Order tried | +| ------------------- | -------------------------------------------- | +| macOS, iOS, tvOS | `[VideoToolbox, Software]` | +| Linux | `[Vaapi, Cuda, Software]` | +| Windows | `[D3d11va, Cuda, Software]` | +| Other | `[Software]` | + +A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next. + +### Device selection + +Always device 0 / system default (`av_hwdevice_ctx_create(.., NULL, ..)`). No env var, no config knob in v1. Add later if the multi-GPU use case appears. + +### `get_format` callback + +A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState { wanted: AVPixelFormat }`. The callback walks the offered `pix_fmts` list, returns `wanted` if present, else `AV_PIX_FMT_NONE` (which forces FFmpeg to retry with software). This is the standard pattern from `doc/examples/hw_decode.c`. + +### Frame transfer + +`receive_frame` always: + +1. Reads from the codec into an internal `hw_frame: ffmpeg::frame::Video` (allocated once, reused). +2. If the frame's format is the HW pix fmt, calls `av_hwframe_transfer_data(out, hw_frame, 0)` into the caller's `&mut frame`. Copies `pts`, `pkt_dts`, `time_base`, `duration` (FFmpeg does not transfer timing). +3. Otherwise (SW path or decoder fell back mid-stream), clones the frame into the caller's slot. + +### Threading + +`VideoDecoder: Send + !Sync`. Each instance owns its own `AVCodecContext` and `AVBufferRef*`. Multiple decoders can run on different threads; a single decoder is not concurrent. + +### Drop + +`Drop` calls `av_buffer_unref(&mut self.hw_device_ref)` if non-null, frees the boxed `CallbackState`, then lets `ffmpeg::decoder::Video`'s own Drop free the codec context. + +## Internals + +```text +src/ +├── lib.rs // re-exports + crate-level docs +├── error.rs // Error enum +├── backend.rs // Backend enum, probe order, AVHWDeviceType <-> Backend mapping +├── decoder.rs // VideoDecoder, open/open_with, send/receive +└── ffi.rs // get_format callback, av_hwdevice_ctx_create / transfer wrappers, + // avcodec_get_hw_config probe +``` + +No other modules. Keep the surface small. + +## Build & dependencies + +- `ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] }` +- `thiserror = "2"` +- `tracing = "0.1"` +- `libc = "0.2"` + +No platform-specific Cargo features. `cfg!(target_os = ...)` selects which `AVHWDeviceType` constants we even attempt — the FFI symbols are linked unconditionally via `ffmpeg-sys-next`. + +System FFmpeg ≥ 4.x. Verified against the user's macOS Homebrew build (FFmpeg 8.1, VideoToolbox enabled). + +## Testing + +1. **Unit tests** (`src/backend.rs`, `src/error.rs`) — pure-Rust: probe-order construction per platform, `Backend` ↔ `AVHWDeviceType` mapping, error formatting. +2. **Integration** (`tests/decode.rs`) — opens a sample H.264 file via `ffmpeg::format::input`, decodes 30 frames through `VideoDecoder::open` (auto-probe), asserts frame count and dimensions. Sample path comes from env var `HWDECODE_SAMPLE_VIDEO`; test is skipped with a clear `eprintln!` if unset. +3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, but additionally asserts `decoder.backend() != Backend::Software`. CI runs this on platform-matched runners. + +Sample-file env var keeps the repo binary-free. Documented in `README.md`. + +## Benchmark + +`benches/decode.rs` (criterion) — two functions: + +- `bench_software_decode` — `VideoDecoder::open_with(.., Backend::Software)`, decode all frames of the sample, measure wall-clock per frame. +- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped (`return`) if `decoder.backend() == Backend::Software` (no HW available). + +Both use the same `HWDECODE_SAMPLE_VIDEO` file. Bench prints which backend the HW run actually used, so results are interpretable across machines. + +## Examples + +`examples/decode.rs` — opens a path from `argv[1]` with `ffmpeg::format::input`, finds the best video stream, feeds packets through `VideoDecoder`, prints `(pts, width, height, format, backend)` for each frame. diff --git a/examples/decode.rs b/examples/decode.rs new file mode 100644 index 0000000..fa28582 --- /dev/null +++ b/examples/decode.rs @@ -0,0 +1,74 @@ +//! Decode every video frame in `argv[1]`, printing one line per frame. +//! +//! ```sh +//! cargo run --release --example decode -- /path/to/video.mp4 +//! ``` + +use ffmpeg::{format, frame, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::VideoDecoder; + +fn main() -> Result<(), Box> { + let path = std::env::args() + .nth(1) + .ok_or("usage: decode ")?; + + ffmpeg::init()?; + + let mut input = format::input(&path)?; + let stream = input + .streams() + .best(media::Type::Video) + .ok_or("no video stream")?; + let stream_index = stream.index(); + + let mut decoder = VideoDecoder::open(stream.parameters())?; + println!( + "backend={:?} {}x{} codec_pix_fmt_initial={:?}", + decoder.backend(), + decoder.width(), + decoder.height(), + decoder.format(), + ); + + let mut frame = frame::Video::empty(); + let mut count: u64 = 0; + + let drain = |decoder: &mut VideoDecoder, frame: &mut frame::Video, count: &mut u64| loop { + match decoder.receive_frame(frame) { + Ok(()) => { + *count += 1; + println!( + "frame#{count} pts={:?} {}x{} fmt={:?}", + frame.pts(), + frame.width(), + frame.height(), + frame.format(), + ); + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + break + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => break, + Err(e) => { + eprintln!("decode error: {e}"); + break; + } + } + }; + + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet)?; + drain(&mut decoder, &mut frame, &mut count); + } + decoder.send_eof()?; + drain(&mut decoder, &mut frame, &mut count); + + println!("decoded {count} frames"); + Ok(()) +} diff --git a/examples/foo.rs b/examples/foo.rs deleted file mode 100644 index f328e4d..0000000 --- a/examples/foo.rs +++ /dev/null @@ -1 +0,0 @@ -fn main() {} diff --git a/src/backend.rs b/src/backend.rs new file mode 100644 index 0000000..cfcd48b --- /dev/null +++ b/src/backend.rs @@ -0,0 +1,137 @@ +use ffmpeg_next::{ffi::AVHWDeviceType, format::Pixel}; + +/// Decoding backend selected (or forced) for a [`crate::VideoDecoder`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Backend { + /// Pure software decode via libavcodec. + Software, + /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS). + VideoToolbox, + /// Linux Video Acceleration API (Intel / AMD GPUs). + Vaapi, + /// NVIDIA NVDEC via CUDA (Linux / Windows on NVIDIA hardware). + Cuda, + /// Microsoft Direct3D 11 Video Acceleration (Windows). + D3d11va, +} + +impl Backend { + /// `AVHWDeviceType` corresponding to this backend, or `None` for + /// [`Backend::Software`]. + pub(crate) fn av_hwdevice_type(self) -> Option { + match self { + Self::Software => None, + Self::VideoToolbox => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX), + Self::Vaapi => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI), + Self::Cuda => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA), + Self::D3d11va => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA), + } + } + + /// Hardware pixel format the codec is expected to produce when this + /// backend is in use. Used to inspect the result of `get_format`. + /// `None` for [`Backend::Software`]. + #[allow(dead_code)] // surfaced for tests / future use + pub(crate) fn hw_pixel_format(self) -> Option { + match self { + Self::Software => None, + Self::VideoToolbox => Some(Pixel::VIDEOTOOLBOX), + Self::Vaapi => Some(Pixel::VAAPI), + Self::Cuda => Some(Pixel::CUDA), + Self::D3d11va => Some(Pixel::D3D11), + } + } +} + +/// Probe order for `VideoDecoder::open` on the current target. +/// +/// Always ends in [`Backend::Software`]; auto-probe never returns an empty +/// list. Order is fixed at compile time per `target_os`. +pub(crate) fn probe_order() -> &'static [Backend] { + #[cfg(any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ))] + { + &[Backend::VideoToolbox, Backend::Software] + } + #[cfg(target_os = "linux")] + { + &[Backend::Vaapi, Backend::Cuda, Backend::Software] + } + #[cfg(target_os = "windows")] + { + &[Backend::D3d11va, Backend::Cuda, Backend::Software] + } + #[cfg(not(any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + target_os = "linux", + target_os = "windows", + )))] + { + &[Backend::Software] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn probe_order_ends_in_software() { + let order = probe_order(); + assert!(!order.is_empty()); + assert_eq!(*order.last().unwrap(), Backend::Software); + } + + #[test] + fn software_has_no_av_hwdevice_type() { + assert!(Backend::Software.av_hwdevice_type().is_none()); + assert!(Backend::Software.hw_pixel_format().is_none()); + } + + #[test] + fn hw_backends_have_av_hwdevice_type() { + for b in [ + Backend::VideoToolbox, + Backend::Vaapi, + Backend::Cuda, + Backend::D3d11va, + ] { + assert!( + b.av_hwdevice_type().is_some(), + "{b:?} missing hwdevice type" + ); + assert!(b.hw_pixel_format().is_some(), "{b:?} missing hw pix fmt"); + } + } + + #[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))] + #[test] + fn apple_probe_order() { + assert_eq!(probe_order(), &[Backend::VideoToolbox, Backend::Software]); + } + + #[cfg(target_os = "linux")] + #[test] + fn linux_probe_order() { + assert_eq!( + probe_order(), + &[Backend::Vaapi, Backend::Cuda, Backend::Software] + ); + } + + #[cfg(target_os = "windows")] + #[test] + fn windows_probe_order() { + assert_eq!( + probe_order(), + &[Backend::D3d11va, Backend::Cuda, Backend::Software] + ); + } +} diff --git a/src/decoder.rs b/src/decoder.rs new file mode 100644 index 0000000..7a3a357 --- /dev/null +++ b/src/decoder.rs @@ -0,0 +1,278 @@ +use std::{mem::ManuallyDrop, ptr}; + +use ffmpeg_next::{ + codec::{self, Context}, + ffi::{ + av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_hwdevice_ctx_create, + av_hwframe_transfer_data, + }, + format::Pixel, + frame, Codec, Packet, Rational, +}; + +use crate::{ + backend::{self, Backend}, + error::{Error, Result}, + ffi::{find_hw_pix_fmt, get_hw_format, CallbackState}, +}; + +/// Hardware-accelerated video decoder with software fallback. +/// +/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface. +/// Frames returned by [`Self::receive_frame`] are always CPU-side; for hardware +/// backends they are downloaded with `av_hwframe_transfer_data` (NV12 / P010). +pub struct VideoDecoder { + /// Wrapped FFmpeg decoder. `ManuallyDrop` so we can sequence its drop + /// before freeing the callback state in our [`Drop`] impl. + inner: ManuallyDrop, + backend: Backend, + /// Owned reference produced by `av_hwdevice_ctx_create`. Null for software. + hw_device_ref: *mut ffmpeg_next::ffi::AVBufferRef, + /// Owned `Box` raw pointer; `AVCodecContext::opaque` aliases + /// it. Null for software. + callback_state: *mut CallbackState, + /// Reusable frame buffer used for hw-side decoding before transfer. + /// Unused on the software path (`receive_frame` writes the caller's frame + /// directly). + hw_frame: frame::Video, +} + +// SAFETY: All raw pointers are exclusively owned by the struct and never +// shared. `ffmpeg::decoder::Video` itself is Send (its `Context` is `unsafe +// impl Send`). The decoder is not safe for concurrent use, hence not `Sync`. +unsafe impl Send for VideoDecoder {} + +impl VideoDecoder { + /// Auto-probe hardware backends in the platform's default order, falling + /// back to software. The chosen backend is reported by [`Self::backend`]. + pub fn open(parameters: codec::Parameters) -> Result { + let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); + let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; + + let mut attempts = Vec::new(); + for &backend in backend::probe_order() { + match Self::try_open(parameters.clone(), codec, backend) { + Ok(decoder) => { + tracing::info!(?backend, "hwdecode: opened video decoder"); + return Ok(decoder); + } + Err(e) => { + tracing::warn!(?backend, error = %e, "hwdecode: backend probe failed"); + attempts.push((backend, Box::new(e))); + } + } + } + Err(Error::AllBackendsFailed { attempts }) + } + + /// Open the decoder with a specific backend. No probe, no fallback. + /// Returns an error if `backend` is not supported by the codec or fails to + /// initialise. + pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result { + let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); + let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; + Self::try_open(parameters, codec, backend) + } + + /// The backend that opened this decoder. + pub fn backend(&self) -> Backend { + self.backend + } + + /// Decoder width in pixels. + pub fn width(&self) -> u32 { + self.inner.width() + } + + /// Decoder height in pixels. + pub fn height(&self) -> u32 { + self.inner.height() + } + + /// Current pixel format of the codec context. For HW backends this is the + /// hardware pixel format (e.g. `Pixel::VIDEOTOOLBOX`) once the first frame + /// has been negotiated; the format of frames returned from + /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010) and + /// must be read from the frame itself. + pub fn format(&self) -> Pixel { + self.inner.format() + } + + /// Codec context time base. + pub fn time_base(&self) -> Rational { + self.inner.time_base() + } + + /// Frame rate from the codec context, if known. + pub fn frame_rate(&self) -> Option { + self.inner.frame_rate() + } + + /// Submit a packet to the decoder. + pub fn send_packet(&mut self, packet: &Packet) -> Result<()> { + self.inner.send_packet(packet).map_err(Error::Ffmpeg) + } + + /// Signal end-of-stream to the decoder; remaining frames can be drained + /// with [`Self::receive_frame`]. + pub fn send_eof(&mut self) -> Result<()> { + self.inner.send_eof().map_err(Error::Ffmpeg) + } + + /// Receive a CPU-side decoded frame. + /// + /// For hardware backends the frame is transferred from GPU memory via + /// `av_hwframe_transfer_data` and frame metadata (pts, time_base, side + /// data, ...) is copied with `av_frame_copy_props`. For the software + /// backend this is a direct passthrough. + /// + /// Returns the same errors as `ffmpeg::decoder::Video::receive_frame`, + /// e.g. `Error::Other { errno: EAGAIN }` when no frame is ready. + pub fn receive_frame(&mut self, frame: &mut frame::Video) -> Result<()> { + if self.backend == Backend::Software { + return self.inner.receive_frame(frame).map_err(Error::Ffmpeg); + } + + // HW path: receive into our reusable hw_frame, then transfer. + self + .inner + .receive_frame(&mut self.hw_frame) + .map_err(Error::Ffmpeg)?; + + // SAFETY: both frames are valid AVFrame pointers owned by us. transfer + // allocates buffers on `frame` as needed; copy_props moves timing and + // side data over (transfer_data does not). + unsafe { + let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0); + if ret < 0 { + return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + } + let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr()); + if ret < 0 { + return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + } + } + Ok(()) + } + + /// Flush internal buffers (e.g. after a seek). + pub fn flush(&mut self) { + self.inner.flush(); + } + + /// Inner open: tries one backend exactly, no probing. + fn try_open(parameters: codec::Parameters, codec: Codec, backend: Backend) -> Result { + let mut ctx = Context::from_parameters(parameters)?; + + let (hw_device_ref, callback_state) = match backend.av_hwdevice_type() { + None => (ptr::null_mut(), ptr::null_mut()), + Some(av_type) => { + // Verify the codec advertises this hwaccel. + let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type) + .ok_or(Error::BackendUnsupportedByCodec(backend))?; + + // Create the device context. + let mut hw_device_ref = ptr::null_mut(); + // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill. + let ret = unsafe { + av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0) + }; + if ret < 0 { + return Err(Error::HwDeviceInitFailed { + backend, + source: ffmpeg_next::Error::from(ret), + }); + } + + // Wire up the codec context: a fresh ref for FFmpeg, a heap + // pointer for the get_format callback to read. + let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt })); + // SAFETY: ctx is a freshly-constructed AVCodecContext we own; + // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's + // use (we keep our own ref in `hw_device_ref` for cleanup). + unsafe { + let raw = ctx.as_mut_ptr(); + (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref); + (*raw).opaque = callback_state.cast(); + (*raw).get_format = Some(get_hw_format); + } + (hw_device_ref, callback_state) + } + }; + + // Open the decoder. On any failure, release the resources we just + // allocated so we don't leak. + let opened = match ctx.decoder().open_as(codec).and_then(|o| o.video()) { + Ok(d) => d, + Err(e) => { + // SAFETY: we either allocated these in this function above or + // they are null; av_buffer_unref / Box::from_raw handle null + // explicitly (we check first). + unsafe { + let mut hw = hw_device_ref; + if !hw.is_null() { + av_buffer_unref(&mut hw); + } + if !callback_state.is_null() { + drop(Box::from_raw(callback_state)); + } + } + return Err(Error::Ffmpeg(e)); + } + }; + + Ok(Self { + inner: ManuallyDrop::new(opened), + backend, + hw_device_ref, + callback_state, + hw_frame: frame::Video::empty(), + }) + } +} + +impl Drop for VideoDecoder { + fn drop(&mut self) { + // Order matters: + // 1. Drop the codec context first. While it lives, FFmpeg may invoke + // `get_format`, which dereferences `callback_state` via `opaque`. + // 2. Free the callback state heap allocation. + // 3. Release our hw device reference (FFmpeg released its own when + // the codec context was freed in step 1). + unsafe { + ManuallyDrop::drop(&mut self.inner); + if !self.callback_state.is_null() { + drop(Box::from_raw(self.callback_state)); + } + if !self.hw_device_ref.is_null() { + av_buffer_unref(&mut self.hw_device_ref); + } + } + } +} + +#[allow(dead_code)] +fn _assert_send() { + fn check() {} + check::(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn no_codec_for_unknown_id() { + // Build a Parameters with an unknown id — easiest path is to allocate + // empty parameters and inspect; here we just confirm Error::NoCodec + // formats sensibly. (Open behavior is exercised by integration tests + // because it requires real stream params.) + let err = Error::NoCodec(codec::Id::None); + assert!(format!("{err}").contains("no decoder")); + } + + #[test] + fn videodecoder_is_send() { + _assert_send(); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..92cb2d1 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,37 @@ +use crate::backend::Backend; + +/// Crate result alias. +pub type Result = std::result::Result; + +/// Errors returned from [`crate::VideoDecoder`]. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// An underlying FFmpeg error. + #[error("ffmpeg error: {0}")] + Ffmpeg(#[from] ffmpeg_next::Error), + + /// `avcodec_find_decoder` returned null for the input codec id. + #[error("no decoder for codec id {0:?}")] + NoCodec(ffmpeg_next::codec::Id), + + /// The codec does not advertise a hardware configuration matching the + /// requested backend (via `avcodec_get_hw_config`). + #[error("codec does not support backend {0:?}")] + BackendUnsupportedByCodec(Backend), + + /// `av_hwdevice_ctx_create` failed for the requested backend. + #[error("hardware device init failed for {backend:?}: {source}")] + HwDeviceInitFailed { + /// Backend that failed to initialise. + backend: Backend, + /// Underlying FFmpeg error. + source: ffmpeg_next::Error, + }, + + /// Auto-probe exhausted every backend in the platform's order. + #[error("all backends failed; attempts: {attempts:?}")] + AllBackendsFailed { + /// Per-backend errors collected during probing, in the order tried. + attempts: Vec<(Backend, Box)>, + }, +} diff --git a/src/ffi.rs b/src/ffi.rs new file mode 100644 index 0000000..6020079 --- /dev/null +++ b/src/ffi.rs @@ -0,0 +1,70 @@ +//! FFI shims used by the decoder. Kept in one place so the unsafe surface is +//! easy to audit. + +use ffmpeg_next::ffi::{ + avcodec_get_hw_config, AVCodec, AVCodecContext, AVHWDeviceType, AVPixelFormat, + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, +}; + +/// State pointed to by `AVCodecContext::opaque` so [`get_hw_format`] can pick +/// the correct hardware pixel format without globals. One instance per +/// decoder; freed in [`crate::VideoDecoder::drop`]. +#[repr(C)] +pub(crate) struct CallbackState { + pub(crate) wanted: AVPixelFormat, +} + +/// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of +/// pixel formats the codec is willing to output for the current stream. We +/// pick the hardware format we wired up at open time, or [`AVPixelFormat::AV_PIX_FMT_NONE`] +/// to signal "no usable format" (which causes FFmpeg to error out — the caller +/// then sees a normal `ffmpeg::Error` and probes the next backend). +pub(crate) unsafe extern "C" fn get_hw_format( + ctx: *mut AVCodecContext, + mut pix_fmts: *const AVPixelFormat, +) -> AVPixelFormat { + debug_assert!(!ctx.is_null()); + debug_assert!(!pix_fmts.is_null()); + + // SAFETY: opaque was set by `try_open` to a valid `Box` + // pointer that outlives the codec context (we only free it after the + // codec context's drop runs). + let state = unsafe { (*ctx).opaque as *const CallbackState }; + if state.is_null() { + return AVPixelFormat::AV_PIX_FMT_NONE; + } + let wanted = unsafe { (*state).wanted }; + + // Walk the offered list looking for our format. + while unsafe { *pix_fmts } != AVPixelFormat::AV_PIX_FMT_NONE { + if unsafe { *pix_fmts } == wanted { + return wanted; + } + pix_fmts = unsafe { pix_fmts.add(1) }; + } + AVPixelFormat::AV_PIX_FMT_NONE +} + +/// Walk the codec's `AVCodecHWConfig` table and return the hardware pixel +/// format associated with `device_type`, if the codec advertises one that +/// uses the `HW_DEVICE_CTX` setup method. +pub(crate) fn find_hw_pix_fmt( + codec: *const AVCodec, + device_type: AVHWDeviceType, +) -> Option { + debug_assert!(!codec.is_null()); + let mut i = 0; + loop { + // SAFETY: `avcodec_get_hw_config` returns null past the end; we stop then. + let cfg = unsafe { avcodec_get_hw_config(codec, i) }; + if cfg.is_null() { + return None; + } + let cfg = unsafe { *cfg }; + let supports_device_ctx = cfg.methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0; + if supports_device_ctx && cfg.device_type == device_type { + return Some(cfg.pix_fmt); + } + i += 1; + } +} diff --git a/src/lib.rs b/src/lib.rs index 0a58390..7d9c7bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,25 @@ -//! A template for creating Rust open-source repo on GitHub -#![cfg_attr(not(feature = "std"), no_std)] +//! Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next`. +//! +//! [`VideoDecoder`] mirrors the surface of `ffmpeg::decoder::Video` +//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and silently picks the best +//! hardware backend for the host platform, falling back to software if none open. +//! +//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side. For +//! hardware backends they are downloaded with `av_hwframe_transfer_data` (NV12 +//! for 8-bit input, P010 for 10-bit). For software backends the frame is in the +//! codec's native format. +//! +//! Pixel-format conversion is intentionally out of scope; downstream code is +//! expected to handle that (e.g. via `colconv`). #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, allow(unused_attributes))] #![deny(missing_docs)] -#[cfg(all(not(feature = "std"), feature = "alloc"))] -extern crate alloc as std; +mod backend; +mod decoder; +mod error; +mod ffi; -#[cfg(feature = "std")] -extern crate std; +pub use backend::Backend; +pub use decoder::VideoDecoder; +pub use error::{Error, Result}; diff --git a/tests/decode.rs b/tests/decode.rs new file mode 100644 index 0000000..a936ae3 --- /dev/null +++ b/tests/decode.rs @@ -0,0 +1,68 @@ +//! Integration test: open the auto-probed decoder against a real video file +//! and decode the first 30 frames. Skipped (with a clear message) when no +//! sample is configured. +//! +//! Set `HWDECODE_SAMPLE_VIDEO` to an absolute path to enable. + +use ffmpeg::{format, frame, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::VideoDecoder; + +const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; + +#[test] +fn auto_open_decodes_at_least_one_frame() { + let Some(path) = std::env::var_os(SAMPLE_ENV) else { + eprintln!("skipping: set {SAMPLE_ENV} to a video file path to run this test"); + return; + }; + + ffmpeg::init().expect("ffmpeg init"); + + let mut input = format::input(&path).expect("open input"); + let stream = input + .streams() + .best(media::Type::Video) + .expect("video stream"); + let stream_index = stream.index(); + let expected_w = unsafe { (*stream.parameters().as_ptr()).width as u32 }; + let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 }; + + let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder"); + eprintln!("backend = {:?}", decoder.backend()); + + assert_eq!(decoder.width(), expected_w); + assert_eq!(decoder.height(), expected_h); + + let mut frame = frame::Video::empty(); + let mut count = 0_usize; + let target = 30_usize; + + 'outer: for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet).expect("send packet"); + loop { + match decoder.receive_frame(&mut frame) { + Ok(()) => { + assert_eq!(frame.width(), expected_w); + assert_eq!(frame.height(), expected_h); + count += 1; + if count >= target { + break 'outer; + } + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + break; + } + Err(e) => panic!("receive_frame: {e}"), + } + } + } + + assert!(count >= 1, "expected at least 1 decoded frame, got {count}"); + eprintln!("decoded {count} frames"); +} diff --git a/tests/foo.rs b/tests/foo.rs deleted file mode 100644 index 8b13789..0000000 --- a/tests/foo.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs new file mode 100644 index 0000000..5aa37c9 --- /dev/null +++ b/tests/hw_smoke.rs @@ -0,0 +1,64 @@ +//! `#[ignore]`-gated smoke test that asserts the auto-probed backend is +//! actually a hardware backend (not Software). Run with: +//! +//! ```sh +//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored +//! ``` + +use ffmpeg::{format, frame, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::{Backend, VideoDecoder}; + +const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; + +#[test] +#[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"] +fn auto_probe_picks_hardware_backend() { + let path = std::env::var_os(SAMPLE_ENV).unwrap_or_else(|| panic!("{SAMPLE_ENV} not set")); + + ffmpeg::init().expect("ffmpeg init"); + + let mut input = format::input(&path).expect("open input"); + let stream = input + .streams() + .best(media::Type::Video) + .expect("video stream"); + let stream_index = stream.index(); + + let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder"); + eprintln!("auto-probe selected backend = {:?}", decoder.backend()); + assert_ne!( + decoder.backend(), + Backend::Software, + "expected hardware backend; got Software" + ); + + // Verify we can actually decode at least one HW frame end-to-end. + let mut frame = frame::Video::empty(); + let mut got_frame = false; + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet).expect("send packet"); + match decoder.receive_frame(&mut frame) { + Ok(()) => { + got_frame = true; + eprintln!( + "first hw frame: {}x{} fmt={:?}", + frame.width(), + frame.height(), + frame.format() + ); + break; + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + continue; + } + Err(e) => panic!("receive_frame: {e}"), + } + } + assert!(got_frame, "no frames decoded"); +} From bfd9b525cba209ea1a8ec29cb79d1e3fffa8e628 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 15:56:13 +1200 Subject: [PATCH 02/27] update --- benches/decode.rs | 32 +++- src/decoder.rs | 420 +++++++++++++++++++++++++++++++++++----------- src/ffi.rs | 112 +++++++++++-- tests/hw_smoke.rs | 20 ++- 4 files changed, 455 insertions(+), 129 deletions(-) diff --git a/benches/decode.rs b/benches/decode.rs index be7281d..2433de9 100644 --- a/benches/decode.rs +++ b/benches/decode.rs @@ -74,19 +74,37 @@ fn bench_decode(c: &mut Criterion) { return; }; - // Probe backend once to print which HW backend (if any) we'd be benching. + // Probe by decoding one frame so the probe collapses to the backend that + // actually produced output. Reading `backend()` before the first frame + // would observe the optimistically-selected value and mislabel HW runs + // that silently degraded. let probed_backend = { - let input = format::input(&path).expect("open input"); + let mut input = format::input(&path).expect("open input"); let stream = input .streams() .best(media::Type::Video) .expect("video stream"); - let dec = VideoDecoder::open(stream.parameters()).expect("auto-probe"); - let b = dec.backend(); - drop(dec); - b + let stream_index = stream.index(); + let mut dec = VideoDecoder::open(stream.parameters()).expect("auto-probe"); + let mut frame = frame::Video::empty(); + 'probe: for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + dec.send_packet(&packet).expect("probe send_packet"); + match dec.receive_frame(&mut frame) { + Ok(()) => break 'probe, + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + continue; + } + Err(e) => panic!("probe receive_frame: {e}"), + } + } + dec.backend() }; - eprintln!("auto-probe selected backend: {probed_backend:?}"); + eprintln!("auto-probe settled on backend: {probed_backend:?}"); let mut group = c.benchmark_group("decode"); group.measurement_time(Duration::from_secs(15)); diff --git a/src/decoder.rs b/src/decoder.rs index 7a3a357..76fe3e4 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -3,8 +3,8 @@ use std::{mem::ManuallyDrop, ptr}; use ffmpeg_next::{ codec::{self, Context}, ffi::{ - av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_hwdevice_ctx_create, - av_hwframe_transfer_data, + av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, + av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVPixelFormat, }, format::Pixel, frame, Codec, Packet, Rational, @@ -19,45 +19,128 @@ use crate::{ /// Hardware-accelerated video decoder with software fallback. /// /// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface. -/// Frames returned by [`Self::receive_frame`] are always CPU-side; for hardware -/// backends they are downloaded with `av_hwframe_transfer_data` (NV12 / P010). +/// Frames returned by [`Self::receive_frame`] are always CPU-side; for the +/// hardware path they are downloaded with `av_hwframe_transfer_data` (NV12 / +/// P010). +/// +/// `open` does a true probe: each backend opens with a strict `get_format` +/// callback, and on the first non-transient error the decoder is torn down +/// and the next backend is tried with all packets seen so far replayed +/// through it. Once the first frame is successfully received the probe +/// collapses and subsequent calls go straight to the active backend. pub struct VideoDecoder { + /// Live FFmpeg state for the currently active backend. + state: DecoderState, + /// Reusable frame buffer used for hw-side decoding before transfer / move. + hw_frame: frame::Video, + /// Probe state: present until the first frame is received from the active + /// backend, then `None`. While `Some`, packets are buffered for replay and + /// non-transient errors / decoder failures advance to the next backend. + probe: Option, +} + +/// Owned FFmpeg state for one open codec context. Has its own `Drop` so we +/// can swap it out cleanly during a probe advance via `mem::replace`. +struct DecoderState { /// Wrapped FFmpeg decoder. `ManuallyDrop` so we can sequence its drop - /// before freeing the callback state in our [`Drop`] impl. + /// before freeing the callback state. inner: ManuallyDrop, + /// Backend driving this state. backend: Backend, /// Owned reference produced by `av_hwdevice_ctx_create`. Null for software. - hw_device_ref: *mut ffmpeg_next::ffi::AVBufferRef, + hw_device_ref: *mut AVBufferRef, /// Owned `Box` raw pointer; `AVCodecContext::opaque` aliases /// it. Null for software. callback_state: *mut CallbackState, - /// Reusable frame buffer used for hw-side decoding before transfer. - /// Unused on the software path (`receive_frame` writes the caller's frame - /// directly). - hw_frame: frame::Video, + /// Hardware pixel format we asked the decoder to produce. Compared (as + /// `i32` to avoid enum-discriminant UB) against each received frame's + /// format. `AV_PIX_FMT_NONE` for the software path. + hw_pix_fmt: AVPixelFormat, +} + +/// State carried only during the probe window (before the first successful +/// frame). Holds enough information to tear down the current decoder and +/// retry with the next backend. +struct ProbeState { + parameters: codec::Parameters, + codec: Codec, + /// Backends still to try, in order. Empty means "no more options after + /// the active one fails". + remaining_backends: Vec, + /// Packets sent so far, kept for replay through the next backend. + buffered_packets: Vec, + /// Whether `send_eof` has been called; replayed alongside packets. + eof_sent: bool, } -// SAFETY: All raw pointers are exclusively owned by the struct and never -// shared. `ffmpeg::decoder::Video` itself is Send (its `Context` is `unsafe -// impl Send`). The decoder is not safe for concurrent use, hence not `Sync`. +// SAFETY: All raw pointers are exclusively owned by `DecoderState` and never +// shared. `ffmpeg::decoder::Video` is itself `Send` (its `Context` carries an +// `unsafe impl Send`). The decoder is not safe for concurrent use, hence not +// `Sync`. +unsafe impl Send for DecoderState {} unsafe impl Send for VideoDecoder {} +impl Drop for DecoderState { + fn drop(&mut self) { + // Order matters: + // 1. Drop the codec context first. While it lives, FFmpeg may invoke + // `get_format`, which dereferences `callback_state` via `opaque`. + // 2. Free the callback state heap allocation. + // 3. Release our hw device reference (FFmpeg released its own when + // the codec context was freed in step 1). + unsafe { + ManuallyDrop::drop(&mut self.inner); + if !self.callback_state.is_null() { + drop(Box::from_raw(self.callback_state)); + self.callback_state = ptr::null_mut(); + } + if !self.hw_device_ref.is_null() { + av_buffer_unref(&mut self.hw_device_ref); + } + } + } +} + impl VideoDecoder { - /// Auto-probe hardware backends in the platform's default order, falling - /// back to software. The chosen backend is reported by [`Self::backend`]. + /// Auto-probe hardware backends in the platform's default order. + /// + /// Each backend opens with a strict `get_format` callback. The first + /// backend whose `avcodec_open2` succeeds becomes active; if the first + /// frame from it fails (e.g. `get_format` returns `NONE` because the + /// backend can't handle this stream's profile/depth), the decoder is torn + /// down and the next backend is tried — packets sent so far are replayed + /// through the new decoder, transparently to the caller. + /// + /// [`Self::backend`] reflects whichever backend ultimately produced the + /// first frame. Software is the last entry in every probe order, so + /// `open` cannot return without a working decoder for codecs that + /// libavcodec supports at all. pub fn open(parameters: codec::Parameters) -> Result { let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; + let order = backend::probe_order(); - let mut attempts = Vec::new(); - for &backend in backend::probe_order() { - match Self::try_open(parameters.clone(), codec, backend) { - Ok(decoder) => { - tracing::info!(?backend, "hwdecode: opened video decoder"); - return Ok(decoder); + let mut attempts: Vec<(Backend, Box)> = Vec::new(); + for (i, &backend) in order.iter().enumerate() { + match Self::build_state(parameters.clone(), codec, backend) { + Ok(state) => { + tracing::info!(?backend, "hwdecode: opened video decoder (probing)"); + let remaining = order[(i + 1)..].to_vec(); + let probe = (!remaining.is_empty()).then(|| ProbeState { + parameters, + codec, + remaining_backends: remaining, + buffered_packets: Vec::new(), + eof_sent: false, + }); + return Ok(Self { + state, + hw_frame: frame::Video::empty(), + probe, + }); } Err(e) => { - tracing::warn!(?backend, error = %e, "hwdecode: backend probe failed"); + tracing::warn!(?backend, error = %e, "hwdecode: backend open failed"); attempts.push((backend, Box::new(e))); } } @@ -66,113 +149,260 @@ impl VideoDecoder { } /// Open the decoder with a specific backend. No probe, no fallback. - /// Returns an error if `backend` is not supported by the codec or fails to - /// initialise. + /// + /// If `backend` is a hardware backend that the codec can't actually use + /// for this stream, the failure surfaces from + /// [`Self::receive_frame`] (the strict `get_format` callback returns + /// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible + /// for retrying with `Backend::Software` or another backend if desired. pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result { let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; - Self::try_open(parameters, codec, backend) + let state = Self::build_state(parameters, codec, backend)?; + Ok(Self { + state, + hw_frame: frame::Video::empty(), + probe: None, + }) } - /// The backend that opened this decoder. + /// The backend currently producing frames. While the probe is still in + /// progress (no frame received yet) this returns the optimistically + /// selected backend; after the first frame, it is the backend that + /// actually produced it. Once stable, never changes again. pub fn backend(&self) -> Backend { - self.backend + self.state.backend } /// Decoder width in pixels. pub fn width(&self) -> u32 { - self.inner.width() + self.state.inner.width() } /// Decoder height in pixels. pub fn height(&self) -> u32 { - self.inner.height() + self.state.inner.height() } - /// Current pixel format of the codec context. For HW backends this is the + /// Codec context's current pixel format. For HW backends this is the /// hardware pixel format (e.g. `Pixel::VIDEOTOOLBOX`) once the first frame - /// has been negotiated; the format of frames returned from - /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010) and - /// must be read from the frame itself. + /// has been negotiated; the caller-facing format produced by + /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010 for + /// HW, codec-native for SW) and must be read from the frame itself. pub fn format(&self) -> Pixel { - self.inner.format() + self.state.inner.format() } /// Codec context time base. pub fn time_base(&self) -> Rational { - self.inner.time_base() + self.state.inner.time_base() } /// Frame rate from the codec context, if known. pub fn frame_rate(&self) -> Option { - self.inner.frame_rate() + self.state.inner.frame_rate() } - /// Submit a packet to the decoder. + /// Submit a packet to the decoder. While the probe is active the packet is + /// also buffered for potential replay through a fallback backend. pub fn send_packet(&mut self, packet: &Packet) -> Result<()> { - self.inner.send_packet(packet).map_err(Error::Ffmpeg) + if let Some(probe) = self.probe.as_mut() { + probe.buffered_packets.push(packet.clone()); + } + self.state.inner.send_packet(packet).map_err(Error::Ffmpeg) } /// Signal end-of-stream to the decoder; remaining frames can be drained - /// with [`Self::receive_frame`]. + /// with [`Self::receive_frame`]. Recorded for replay if probe is active. pub fn send_eof(&mut self) -> Result<()> { - self.inner.send_eof().map_err(Error::Ffmpeg) + if let Some(probe) = self.probe.as_mut() { + probe.eof_sent = true; + } + self.state.inner.send_eof().map_err(Error::Ffmpeg) } /// Receive a CPU-side decoded frame. /// - /// For hardware backends the frame is transferred from GPU memory via + /// On the hardware path the frame is transferred from GPU memory via /// `av_hwframe_transfer_data` and frame metadata (pts, time_base, side - /// data, ...) is copied with `av_frame_copy_props`. For the software - /// backend this is a direct passthrough. + /// data, ...) is copied with `av_frame_copy_props`. The caller's frame is + /// always unref'd first so reuse across resolution changes or different + /// decoders is safe (mirrors `avcodec_receive_frame`'s own contract). + /// + /// While the probe window is open and the active backend produces a + /// non-transient error or a software-format frame instead of the + /// configured hardware format, the decoder is torn down and the next + /// backend in probe order is tried with all buffered packets replayed. + /// The caller observes only the eventual successful frame (or, if every + /// backend has been exhausted, the underlying error). /// - /// Returns the same errors as `ffmpeg::decoder::Video::receive_frame`, - /// e.g. `Error::Other { errno: EAGAIN }` when no frame is ready. + /// Returns the same transient signals as `ffmpeg::decoder::Video`: + /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and + /// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained. pub fn receive_frame(&mut self, frame: &mut frame::Video) -> Result<()> { - if self.backend == Backend::Software { - return self.inner.receive_frame(frame).map_err(Error::Ffmpeg); - } + loop { + let res = self.state.inner.receive_frame(&mut self.hw_frame); + match res { + Err(e) => { + if is_transient(&e) { + return Err(Error::Ffmpeg(e)); + } + if self.probe.is_some() && self.advance_probe()? { + continue; + } + return Err(Error::Ffmpeg(e)); + } + Ok(()) => { + // Compare format as i32 to avoid constructing an AVPixelFormat + // enum from an unvalidated integer. Library/header skew or a new + // hardware format would otherwise be UB. + let received_fmt: i32 = unsafe { (*self.hw_frame.as_ptr()).format }; - // HW path: receive into our reusable hw_frame, then transfer. - self - .inner - .receive_frame(&mut self.hw_frame) - .map_err(Error::Ffmpeg)?; + if self.state.backend == Backend::Software { + // Pure SW path: just hand over the frame. + unsafe { + av_frame_unref(frame.as_mut_ptr()); + av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr()); + } + self.probe = None; + return Ok(()); + } - // SAFETY: both frames are valid AVFrame pointers owned by us. transfer - // allocates buffers on `frame` as needed; copy_props moves timing and - // side data over (transfer_data does not). - unsafe { - let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0); - if ret < 0 { - return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); - } - let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr()); - if ret < 0 { - return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + if received_fmt == self.state.hw_pix_fmt as i32 { + // True HW frame: download to CPU and copy timing/side data. + unsafe { + av_frame_unref(frame.as_mut_ptr()); + let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0); + if ret < 0 { + return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + } + let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr()); + if ret < 0 { + return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + } + } + self.probe = None; + return Ok(()); + } + + // The decoder produced a CPU frame from a HW-opened context. With + // strict `get_format` this is unusual (the codec would normally + // error on get_format=NONE). If it does happen and we still have + // backends to try, treat it as a probe failure and advance. + if self.probe.is_some() && self.advance_probe()? { + continue; + } + // No fallback left; accept the SW frame and update the active + // backend so `backend()` reflects reality. + unsafe { + av_frame_unref(frame.as_mut_ptr()); + av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr()); + } + self.state.backend = Backend::Software; + self.probe = None; + return Ok(()); + } } } - Ok(()) } - /// Flush internal buffers (e.g. after a seek). + /// Flush internal buffers (e.g. after a seek). Resets probe-time buffer if + /// active, since post-seek packets do not align with replayed history. pub fn flush(&mut self) { - self.inner.flush(); + self.state.inner.flush(); + if let Some(probe) = self.probe.as_mut() { + probe.buffered_packets.clear(); + probe.eof_sent = false; + } + } + + /// Tear down the active decoder and bring up the next backend in + /// `remaining_backends`, replaying buffered packets. Returns `true` if a + /// new backend was successfully installed (caller should retry the + /// receive); `false` if the probe is exhausted. + fn advance_probe(&mut self) -> Result { + let next_backend = match self.probe.as_mut() { + Some(probe) if !probe.remaining_backends.is_empty() => probe.remaining_backends.remove(0), + _ => return Ok(false), + }; + let prev_backend = self.state.backend; + tracing::warn!( + from = ?prev_backend, + to = ?next_backend, + "hwdecode: backend rejected stream, advancing probe" + ); + + // Snapshot probe inputs before mutating self. + let (parameters, codec, buffered_packets, eof_sent) = { + let probe = self.probe.as_mut().expect("probe state"); + ( + probe.parameters.clone(), + probe.codec, + std::mem::take(&mut probe.buffered_packets), + probe.eof_sent, + ) + }; + + // Build the new state. If this open fails, we fall through to advancing + // again — which is what the caller's loop will do once it sees the next + // probe iteration also fail. To keep semantics simple, propagate the + // open error directly: the caller's loop will see it as the decode + // error and return upward; in practice probe order ends in Software + // which always opens. + let new_state = Self::build_state(parameters, codec, next_backend)?; + + // Replace state. The old DecoderState's Drop runs here, in order: + // codec context first, then callback_state box, then hw_device_ref. + self.state = new_state; + + // hw_frame may hold residual data from the old decoder. Clear it so + // the next receive starts clean. + unsafe { + av_frame_unref(self.hw_frame.as_mut_ptr()); + } + + // Replay buffered packets and (if previously sent) EOF through the new + // decoder. We re-buffer them on the way through so a subsequent probe + // advance still has the full history. + let probe = self.probe.as_mut().expect("probe still present"); + probe.buffered_packets.clear(); + probe.eof_sent = false; + + for pkt in buffered_packets { + // Mirror `send_packet`'s buffering behaviour. + probe.buffered_packets.push(pkt.clone()); + self.state.inner.send_packet(&pkt).map_err(Error::Ffmpeg)?; + } + if eof_sent { + self.probe.as_mut().expect("probe still present").eof_sent = true; + self.state.inner.send_eof().map_err(Error::Ffmpeg)?; + } + + Ok(true) } - /// Inner open: tries one backend exactly, no probing. - fn try_open(parameters: codec::Parameters, codec: Codec, backend: Backend) -> Result { + /// Build raw FFmpeg state for one backend. Strict `get_format` (NONE on + /// missing HW format); cross-backend fallback is the caller's job. + fn build_state( + parameters: codec::Parameters, + codec: Codec, + backend: Backend, + ) -> Result { let mut ctx = Context::from_parameters(parameters)?; - let (hw_device_ref, callback_state) = match backend.av_hwdevice_type() { - None => (ptr::null_mut(), ptr::null_mut()), + let (hw_device_ref, callback_state, hw_pix_fmt) = match backend.av_hwdevice_type() { + None => ( + ptr::null_mut(), + ptr::null_mut(), + AVPixelFormat::AV_PIX_FMT_NONE, + ), Some(av_type) => { // Verify the codec advertises this hwaccel. let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type) .ok_or(Error::BackendUnsupportedByCodec(backend))?; // Create the device context. - let mut hw_device_ref = ptr::null_mut(); + let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut(); // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill. let ret = unsafe { av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0) @@ -184,8 +414,6 @@ impl VideoDecoder { }); } - // Wire up the codec context: a fresh ref for FFmpeg, a heap - // pointer for the get_format callback to read. let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt })); // SAFETY: ctx is a freshly-constructed AVCodecContext we own; // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's @@ -196,7 +424,7 @@ impl VideoDecoder { (*raw).opaque = callback_state.cast(); (*raw).get_format = Some(get_hw_format); } - (hw_device_ref, callback_state) + (hw_device_ref, callback_state, hw_pix_fmt) } }; @@ -221,34 +449,21 @@ impl VideoDecoder { } }; - Ok(Self { + Ok(DecoderState { inner: ManuallyDrop::new(opened), backend, hw_device_ref, callback_state, - hw_frame: frame::Video::empty(), + hw_pix_fmt, }) } } -impl Drop for VideoDecoder { - fn drop(&mut self) { - // Order matters: - // 1. Drop the codec context first. While it lives, FFmpeg may invoke - // `get_format`, which dereferences `callback_state` via `opaque`. - // 2. Free the callback state heap allocation. - // 3. Release our hw device reference (FFmpeg released its own when - // the codec context was freed in step 1). - unsafe { - ManuallyDrop::drop(&mut self.inner); - if !self.callback_state.is_null() { - drop(Box::from_raw(self.callback_state)); - } - if !self.hw_device_ref.is_null() { - av_buffer_unref(&mut self.hw_device_ref); - } - } - } +/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame` +/// and must not be treated as backend failures. +fn is_transient(e: &ffmpeg_next::Error) -> bool { + matches!(e, ffmpeg_next::Error::Other { errno } if *errno == ffmpeg_next::error::EAGAIN) + || matches!(e, ffmpeg_next::Error::Eof) } #[allow(dead_code)] @@ -263,10 +478,6 @@ mod tests { #[test] fn no_codec_for_unknown_id() { - // Build a Parameters with an unknown id — easiest path is to allocate - // empty parameters and inspect; here we just confirm Error::NoCodec - // formats sensibly. (Open behavior is exercised by integration tests - // because it requires real stream params.) let err = Error::NoCodec(codec::Id::None); assert!(format!("{err}").contains("no decoder")); } @@ -275,4 +486,15 @@ mod tests { fn videodecoder_is_send() { _assert_send(); } + + #[test] + fn is_transient_recognises_eagain_and_eof() { + let eagain = ffmpeg_next::Error::Other { + errno: ffmpeg_next::error::EAGAIN, + }; + assert!(is_transient(&eagain)); + assert!(is_transient(&ffmpeg_next::Error::Eof)); + let other = ffmpeg_next::Error::InvalidData; + assert!(!is_transient(&other)); + } } diff --git a/src/ffi.rs b/src/ffi.rs index 6020079..78ee80c 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -8,39 +8,48 @@ use ffmpeg_next::ffi::{ /// State pointed to by `AVCodecContext::opaque` so [`get_hw_format`] can pick /// the correct hardware pixel format without globals. One instance per -/// decoder; freed in [`crate::VideoDecoder::drop`]. +/// decoder; freed by [`crate::VideoDecoder`] after the codec context is +/// dropped. #[repr(C)] pub(crate) struct CallbackState { + /// Hardware pixel format we want the decoder to produce. pub(crate) wanted: AVPixelFormat, } /// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of -/// pixel formats the codec is willing to output for the current stream. We -/// pick the hardware format we wired up at open time, or [`AVPixelFormat::AV_PIX_FMT_NONE`] -/// to signal "no usable format" (which causes FFmpeg to error out — the caller -/// then sees a normal `ffmpeg::Error` and probes the next backend). +/// pixel formats the codec is willing to output for the current stream. +/// +/// Returns the configured hardware format if present; otherwise +/// [`AVPixelFormat::AV_PIX_FMT_NONE`], which causes the decoder to fail. The +/// failure surfaces as a normal `Error::Ffmpeg` from +/// [`crate::VideoDecoder::receive_frame`]; for `VideoDecoder::open` callers +/// the probe loop tears down and retries with the next backend (replaying +/// buffered packets), so software fallback happens at the decoder level +/// rather than silently in-context. pub(crate) unsafe extern "C" fn get_hw_format( ctx: *mut AVCodecContext, - mut pix_fmts: *const AVPixelFormat, + pix_fmts: *const AVPixelFormat, ) -> AVPixelFormat { debug_assert!(!ctx.is_null()); debug_assert!(!pix_fmts.is_null()); // SAFETY: opaque was set by `try_open` to a valid `Box` // pointer that outlives the codec context (we only free it after the - // codec context's drop runs). + // codec context's drop runs). When opaque is null we treat the call as + // strict — a stray invocation cannot silently downgrade. let state = unsafe { (*ctx).opaque as *const CallbackState }; - if state.is_null() { - return AVPixelFormat::AV_PIX_FMT_NONE; - } - let wanted = unsafe { (*state).wanted }; + let wanted = if state.is_null() { + AVPixelFormat::AV_PIX_FMT_NONE + } else { + unsafe { (*state).wanted } + }; - // Walk the offered list looking for our format. - while unsafe { *pix_fmts } != AVPixelFormat::AV_PIX_FMT_NONE { - if unsafe { *pix_fmts } == wanted { + let mut p = pix_fmts; + while unsafe { *p } != AVPixelFormat::AV_PIX_FMT_NONE { + if unsafe { *p } == wanted { return wanted; } - pix_fmts = unsafe { pix_fmts.add(1) }; + p = unsafe { p.add(1) }; } AVPixelFormat::AV_PIX_FMT_NONE } @@ -68,3 +77,76 @@ pub(crate) fn find_hw_pix_fmt( i += 1; } } + +#[cfg(test)] +mod tests { + use super::*; + use std::ptr; + + // The callback derefs `(*ctx).opaque`, so we need a real-looking + // AVCodecContext. We construct a zeroed one (the callback only reads opaque). + struct FakeCtx(*mut AVCodecContext); + impl FakeCtx { + fn new(state: *mut CallbackState) -> Self { + let boxed: Box = unsafe { Box::new(std::mem::zeroed()) }; + let raw = Box::into_raw(boxed); + unsafe { (*raw).opaque = state.cast() }; + Self(raw) + } + } + impl Drop for FakeCtx { + fn drop(&mut self) { + unsafe { drop(Box::from_raw(self.0)) }; + } + } + + fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat { + offered.push(AVPixelFormat::AV_PIX_FMT_NONE); + let ctx = FakeCtx::new(state as *const _ as *mut _); + unsafe { get_hw_format(ctx.0, offered.as_ptr()) } + } + + #[test] + fn returns_wanted_hw_format_when_offered() { + let state = CallbackState { + wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, + }; + let got = run( + &state, + vec![ + AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, + AVPixelFormat::AV_PIX_FMT_NV12, + ], + ); + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); + } + + #[test] + fn returns_none_when_wanted_absent() { + let state = CallbackState { + wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, + }; + let got = run( + &state, + vec![ + AVPixelFormat::AV_PIX_FMT_NV12, + AVPixelFormat::AV_PIX_FMT_YUV420P, + ], + ); + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); + } + + #[test] + fn null_opaque_is_treated_as_strict() { + let boxed: Box = unsafe { Box::new(std::mem::zeroed()) }; + let ctx_raw = Box::into_raw(boxed); + unsafe { (*ctx_raw).opaque = ptr::null_mut() }; + let offered = [ + AVPixelFormat::AV_PIX_FMT_NV12, + AVPixelFormat::AV_PIX_FMT_NONE, + ]; + let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr()) }; + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); + unsafe { drop(Box::from_raw(ctx_raw)) }; + } +} diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs index 5aa37c9..3084faf 100644 --- a/tests/hw_smoke.rs +++ b/tests/hw_smoke.rs @@ -26,14 +26,12 @@ fn auto_probe_picks_hardware_backend() { let stream_index = stream.index(); let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder"); - eprintln!("auto-probe selected backend = {:?}", decoder.backend()); - assert_ne!( - decoder.backend(), - Backend::Software, - "expected hardware backend; got Software" - ); + eprintln!("auto-probe optimistic backend = {:?}", decoder.backend()); - // Verify we can actually decode at least one HW frame end-to-end. + // Decode at least one frame so the probe collapses, then check the + // backend that actually produced it. Checking `decoder.backend()` before + // any frame has been received would observe the optimistic pre-probe + // value and could false-pass when a HW backend silently degrades. let mut frame = frame::Video::empty(); let mut got_frame = false; for (s, packet) in input.packets() { @@ -45,7 +43,8 @@ fn auto_probe_picks_hardware_backend() { Ok(()) => { got_frame = true; eprintln!( - "first hw frame: {}x{} fmt={:?}", + "first frame: backend={:?} {}x{} fmt={:?}", + decoder.backend(), frame.width(), frame.height(), frame.format() @@ -61,4 +60,9 @@ fn auto_probe_picks_hardware_backend() { } } assert!(got_frame, "no frames decoded"); + assert_ne!( + decoder.backend(), + Backend::Software, + "expected hardware backend after first frame; got Software" + ); } From 9dc8542d5de68f57fd5e5f11184380802bfd12e6 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 16:23:16 +1200 Subject: [PATCH 03/27] update --- benches/decode.rs | 8 +- examples/decode.rs | 20 ++-- src/decoder.rs | 284 ++++++++++++++++++++++++++------------------- src/frame.rs | 109 +++++++++++++++++ src/lib.rs | 2 + tests/decode.rs | 10 +- tests/hw_smoke.rs | 10 +- 7 files changed, 302 insertions(+), 141 deletions(-) create mode 100644 src/frame.rs diff --git a/benches/decode.rs b/benches/decode.rs index 2433de9..82d0ba9 100644 --- a/benches/decode.rs +++ b/benches/decode.rs @@ -11,9 +11,9 @@ use std::{path::PathBuf, time::Duration}; use criterion::{criterion_group, criterion_main, Criterion}; -use ffmpeg::{format, frame, media}; +use ffmpeg::{format, media}; use ffmpeg_next as ffmpeg; -use hwdecode::{Backend, VideoDecoder}; +use hwdecode::{Backend, Frame, VideoDecoder}; const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; @@ -36,7 +36,7 @@ fn decode_all(path: &PathBuf, backend: Backend) -> Result VideoDecoder::open(stream.parameters())?, }; - let mut frame = frame::Video::empty(); + let mut frame = Frame::empty(); let mut count = 0_usize; let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> { @@ -86,7 +86,7 @@ fn bench_decode(c: &mut Criterion) { .expect("video stream"); let stream_index = stream.index(); let mut dec = VideoDecoder::open(stream.parameters()).expect("auto-probe"); - let mut frame = frame::Video::empty(); + let mut frame = Frame::empty(); 'probe: for (s, packet) in input.packets() { if s.index() != stream_index { continue; diff --git a/examples/decode.rs b/examples/decode.rs index fa28582..69763bf 100644 --- a/examples/decode.rs +++ b/examples/decode.rs @@ -4,9 +4,9 @@ //! cargo run --release --example decode -- /path/to/video.mp4 //! ``` -use ffmpeg::{format, frame, media}; +use ffmpeg::{format, media}; use ffmpeg_next as ffmpeg; -use hwdecode::VideoDecoder; +use hwdecode::{Frame, VideoDecoder}; fn main() -> Result<(), Box> { let path = std::env::args() @@ -24,26 +24,25 @@ fn main() -> Result<(), Box> { let mut decoder = VideoDecoder::open(stream.parameters())?; println!( - "backend={:?} {}x{} codec_pix_fmt_initial={:?}", + "open: backend={:?} {}x{}", decoder.backend(), decoder.width(), decoder.height(), - decoder.format(), ); - let mut frame = frame::Video::empty(); + let mut frame = Frame::empty(); let mut count: u64 = 0; - let drain = |decoder: &mut VideoDecoder, frame: &mut frame::Video, count: &mut u64| loop { + let drain = |decoder: &mut VideoDecoder, frame: &mut Frame, count: &mut u64| loop { match decoder.receive_frame(frame) { Ok(()) => { *count += 1; println!( - "frame#{count} pts={:?} {}x{} fmt={:?}", + "frame#{count} pts={:?} {}x{} pix_fmt={}", frame.pts(), frame.width(), frame.height(), - frame.format(), + frame.pix_fmt(), ); } Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) @@ -69,6 +68,9 @@ fn main() -> Result<(), Box> { decoder.send_eof()?; drain(&mut decoder, &mut frame, &mut count); - println!("decoded {count} frames"); + println!( + "decoded {count} frames; final backend={:?}", + decoder.backend() + ); Ok(()) } diff --git a/src/decoder.rs b/src/decoder.rs index 76fe3e4..a776697 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -6,7 +6,6 @@ use ffmpeg_next::{ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVPixelFormat, }, - format::Pixel, frame, Codec, Packet, Rational, }; @@ -14,24 +13,30 @@ use crate::{ backend::{self, Backend}, error::{Error, Result}, ffi::{find_hw_pix_fmt, get_hw_format, CallbackState}, + frame::Frame, }; /// Hardware-accelerated video decoder with software fallback. /// /// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface. -/// Frames returned by [`Self::receive_frame`] are always CPU-side; for the -/// hardware path they are downloaded with `av_hwframe_transfer_data` (NV12 / -/// P010). +/// Decoded frames are returned through [`crate::Frame`], a CPU-side wrapper +/// whose accessors avoid the `AVPixelFormat`-enum UB that an unvalidated read +/// of FFmpeg's raw integer pixel formats can trigger. /// /// `open` does a true probe: each backend opens with a strict `get_format` -/// callback, and on the first non-transient error the decoder is torn down -/// and the next backend is tried with all packets seen so far replayed -/// through it. Once the first frame is successfully received the probe -/// collapses and subsequent calls go straight to the active backend. +/// callback. On the first non-transient error from a backend the decoder is +/// torn down and the next backend in probe order is tried, with all packets +/// seen so far replayed through it. The advance is *transactional* — the +/// candidate backend must successfully build and accept the replayed packets +/// before any probe state is consumed, so a failing backend in the middle of +/// the order does not strand the caller without history. Once the first frame +/// is delivered the probe collapses and subsequent calls go straight to the +/// active backend. pub struct VideoDecoder { /// Live FFmpeg state for the currently active backend. state: DecoderState, /// Reusable frame buffer used for hw-side decoding before transfer / move. + /// Internal use only — never handed to callers. hw_frame: frame::Video, /// Probe state: present until the first frame is received from the active /// backend, then `None`. While `Some`, packets are buffered for replay and @@ -67,7 +72,9 @@ struct ProbeState { /// Backends still to try, in order. Empty means "no more options after /// the active one fails". remaining_backends: Vec, - /// Packets sent so far, kept for replay through the next backend. + /// Packets sent so far, kept for replay through any candidate backend. + /// Preserved across failed candidates — only cleared when the probe + /// collapses on a successful first frame. buffered_packets: Vec, /// Whether `send_eof` has been called; replayed alongside packets. eof_sent: bool, @@ -105,16 +112,18 @@ impl VideoDecoder { /// Auto-probe hardware backends in the platform's default order. /// /// Each backend opens with a strict `get_format` callback. The first - /// backend whose `avcodec_open2` succeeds becomes active; if the first - /// frame from it fails (e.g. `get_format` returns `NONE` because the - /// backend can't handle this stream's profile/depth), the decoder is torn - /// down and the next backend is tried — packets sent so far are replayed - /// through the new decoder, transparently to the caller. + /// backend whose `avcodec_open2` succeeds becomes active; if its first + /// frame is unusable (decode error, transfer failure, or a CPU-format + /// frame from a HW context) the decoder is torn down and the next backend + /// is tried — packets sent so far are replayed through the new decoder + /// transparently. The probe advance is transactional: the next backend + /// must build *and* accept the replayed history before any probe state is + /// consumed, so a misbehaving middle backend cannot strand the caller. /// /// [`Self::backend`] reflects whichever backend ultimately produced the /// first frame. Software is the last entry in every probe order, so - /// `open` cannot return without a working decoder for codecs that - /// libavcodec supports at all. + /// `open` cannot return without a working decoder for any codec libavcodec + /// supports. pub fn open(parameters: codec::Parameters) -> Result { let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; @@ -184,15 +193,6 @@ impl VideoDecoder { self.state.inner.height() } - /// Codec context's current pixel format. For HW backends this is the - /// hardware pixel format (e.g. `Pixel::VIDEOTOOLBOX`) once the first frame - /// has been negotiated; the caller-facing format produced by - /// [`Self::receive_frame`] is the *transferred* format (NV12 / P010 for - /// HW, codec-native for SW) and must be read from the frame itself. - pub fn format(&self) -> Pixel { - self.state.inner.format() - } - /// Codec context time base. pub fn time_base(&self) -> Rational { self.state.inner.time_base() @@ -223,23 +223,22 @@ impl VideoDecoder { /// Receive a CPU-side decoded frame. /// - /// On the hardware path the frame is transferred from GPU memory via - /// `av_hwframe_transfer_data` and frame metadata (pts, time_base, side - /// data, ...) is copied with `av_frame_copy_props`. The caller's frame is - /// always unref'd first so reuse across resolution changes or different - /// decoders is safe (mirrors `avcodec_receive_frame`'s own contract). + /// On the hardware path the frame is downloaded with + /// `av_hwframe_transfer_data` and metadata is copied via + /// `av_frame_copy_props`. The caller's frame is always unref'd first, so + /// reuse across resolution changes or different decoders is safe. /// - /// While the probe window is open and the active backend produces a - /// non-transient error or a software-format frame instead of the - /// configured hardware format, the decoder is torn down and the next - /// backend in probe order is tried with all buffered packets replayed. + /// While the probe window is open, *any* non-transient failure (decode + /// error, transfer error, copy_props error, or a CPU-format frame from a + /// HW-opened context) tears down the current decoder and advances to the + /// next backend in probe order, replaying buffered packets through it. /// The caller observes only the eventual successful frame (or, if every /// backend has been exhausted, the underlying error). /// /// Returns the same transient signals as `ffmpeg::decoder::Video`: /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and /// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained. - pub fn receive_frame(&mut self, frame: &mut frame::Video) -> Result<()> { + pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<()> { loop { let res = self.state.inner.receive_frame(&mut self.hw_frame); match res { @@ -253,50 +252,56 @@ impl VideoDecoder { return Err(Error::Ffmpeg(e)); } Ok(()) => { - // Compare format as i32 to avoid constructing an AVPixelFormat - // enum from an unvalidated integer. Library/header skew or a new - // hardware format would otherwise be UB. + // Read AVFrame.format as i32 — avoid constructing an + // AVPixelFormat enum from a raw integer (UB on library/header skew). let received_fmt: i32 = unsafe { (*self.hw_frame.as_ptr()).format }; if self.state.backend == Backend::Software { - // Pure SW path: just hand over the frame. unsafe { - av_frame_unref(frame.as_mut_ptr()); - av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr()); + av_frame_unref(frame.as_inner_mut().as_mut_ptr()); + av_frame_move_ref( + frame.as_inner_mut().as_mut_ptr(), + self.hw_frame.as_mut_ptr(), + ); } self.probe = None; return Ok(()); } if received_fmt == self.state.hw_pix_fmt as i32 { - // True HW frame: download to CPU and copy timing/side data. - unsafe { - av_frame_unref(frame.as_mut_ptr()); - let ret = av_hwframe_transfer_data(frame.as_mut_ptr(), self.hw_frame.as_ptr(), 0); - if ret < 0 { - return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + // True HW frame: try to download to CPU. + let transfer_result = unsafe { transfer_hw_frame(frame, &mut self.hw_frame) }; + match transfer_result { + Ok(()) => { + self.probe = None; + return Ok(()); } - let ret = av_frame_copy_props(frame.as_mut_ptr(), self.hw_frame.as_ptr()); - if ret < 0 { - return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + Err(e) => { + // Transfer failures during the probe window are also + // backend-level failures — try the next backend. + if self.probe.is_some() && self.advance_probe()? { + unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) }; + continue; + } + return Err(Error::Ffmpeg(e)); } } - self.probe = None; - return Ok(()); } - // The decoder produced a CPU frame from a HW-opened context. With - // strict `get_format` this is unusual (the codec would normally - // error on get_format=NONE). If it does happen and we still have - // backends to try, treat it as a probe failure and advance. + // Decoder produced a CPU frame from a HW-opened context. With + // strict `get_format` this only happens if the codec ignores it + // (uncommon). Treat as a probe failure if we still have backends. if self.probe.is_some() && self.advance_probe()? { continue; } // No fallback left; accept the SW frame and update the active // backend so `backend()` reflects reality. unsafe { - av_frame_unref(frame.as_mut_ptr()); - av_frame_move_ref(frame.as_mut_ptr(), self.hw_frame.as_mut_ptr()); + av_frame_unref(frame.as_inner_mut().as_mut_ptr()); + av_frame_move_ref( + frame.as_inner_mut().as_mut_ptr(), + self.hw_frame.as_mut_ptr(), + ); } self.state.backend = Backend::Software; self.probe = None; @@ -316,69 +321,92 @@ impl VideoDecoder { } } - /// Tear down the active decoder and bring up the next backend in - /// `remaining_backends`, replaying buffered packets. Returns `true` if a - /// new backend was successfully installed (caller should retry the - /// receive); `false` if the probe is exhausted. + /// Try the next backend in `remaining_backends`. Transactional: a + /// candidate must successfully build and accept the replayed history + /// before any probe state is consumed. Backends that fail to build or + /// reject the replay are skipped (with `tracing::warn!`) and the loop + /// continues to the next one. Returns: + /// - `Ok(true)` when a candidate is installed and replay completed. + /// - `Ok(false)` when the probe is exhausted (no more backends to try). + /// - `Err(_)` only for genuinely fatal conditions surfaced by `build_state` + /// on the very first inspection (e.g. a malformed `Parameters`); the + /// per-candidate failures during the loop are absorbed and logged. fn advance_probe(&mut self) -> Result { - let next_backend = match self.probe.as_mut() { - Some(probe) if !probe.remaining_backends.is_empty() => probe.remaining_backends.remove(0), - _ => return Ok(false), - }; - let prev_backend = self.state.backend; - tracing::warn!( - from = ?prev_backend, - to = ?next_backend, - "hwdecode: backend rejected stream, advancing probe" - ); - - // Snapshot probe inputs before mutating self. - let (parameters, codec, buffered_packets, eof_sent) = { - let probe = self.probe.as_mut().expect("probe state"); - ( - probe.parameters.clone(), - probe.codec, - std::mem::take(&mut probe.buffered_packets), - probe.eof_sent, - ) - }; - - // Build the new state. If this open fails, we fall through to advancing - // again — which is what the caller's loop will do once it sees the next - // probe iteration also fail. To keep semantics simple, propagate the - // open error directly: the caller's loop will see it as the decode - // error and return upward; in practice probe order ends in Software - // which always opens. - let new_state = Self::build_state(parameters, codec, next_backend)?; - - // Replace state. The old DecoderState's Drop runs here, in order: - // codec context first, then callback_state box, then hw_device_ref. - self.state = new_state; - - // hw_frame may hold residual data from the old decoder. Clear it so - // the next receive starts clean. - unsafe { - av_frame_unref(self.hw_frame.as_mut_ptr()); - } + loop { + // Snapshot inputs without mutating probe state. + let (next_backend, parameters, codec) = match self.probe.as_ref() { + Some(probe) if !probe.remaining_backends.is_empty() => ( + probe.remaining_backends[0], + probe.parameters.clone(), + probe.codec, + ), + _ => return Ok(false), + }; + + let prev_backend = self.state.backend; + tracing::warn!(from = ?prev_backend, to = ?next_backend, "hwdecode: advancing probe"); + + // Build candidate. On failure, pop and continue without touching the + // packet buffer. + let mut candidate_state = match Self::build_state(parameters, codec, next_backend) { + Ok(s) => s, + Err(e) => { + tracing::warn!(?next_backend, error = %e, "hwdecode: candidate build failed"); + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + continue; + } + }; + + // Replay buffered history through the candidate WITHOUT installing it. + // We borrow the buffer immutably; if replay fails the candidate's Drop + // releases the FFmpeg state and the buffer is preserved for the next + // attempt. + let replay_result: std::result::Result<(), ffmpeg_next::Error> = { + let probe = self.probe.as_ref().expect("probe state present"); + let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(()); + for pkt in &probe.buffered_packets { + if let Err(e) = candidate_state.inner.send_packet(pkt) { + r = Err(e); + break; + } + } + if r.is_ok() && probe.eof_sent { + if let Err(e) = candidate_state.inner.send_eof() { + r = Err(e); + } + } + r + }; + + if let Err(e) = replay_result { + tracing::warn!(?next_backend, error = %e, "hwdecode: candidate replay failed"); + // Drop candidate explicitly so its FFI cleanup runs now. + drop(candidate_state); + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + continue; + } - // Replay buffered packets and (if previously sent) EOF through the new - // decoder. We re-buffer them on the way through so a subsequent probe - // advance still has the full history. - let probe = self.probe.as_mut().expect("probe still present"); - probe.buffered_packets.clear(); - probe.eof_sent = false; - - for pkt in buffered_packets { - // Mirror `send_packet`'s buffering behaviour. - probe.buffered_packets.push(pkt.clone()); - self.state.inner.send_packet(&pkt).map_err(Error::Ffmpeg)?; - } - if eof_sent { - self.probe.as_mut().expect("probe still present").eof_sent = true; - self.state.inner.send_eof().map_err(Error::Ffmpeg)?; + // Commit: install the candidate, clear residual hw_frame, pop backend. + self.state = candidate_state; + unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) }; + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + return Ok(true); } - - Ok(true) } /// Build raw FFmpeg state for one backend. Strict `get_format` (NONE on @@ -459,6 +487,26 @@ impl VideoDecoder { } } +/// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination +/// first so reuse across resolution changes is safe. +unsafe fn transfer_hw_frame( + dst: &mut Frame, + src: &mut frame::Video, +) -> std::result::Result<(), ffmpeg_next::Error> { + unsafe { + av_frame_unref(dst.as_inner_mut().as_mut_ptr()); + let ret = av_hwframe_transfer_data(dst.as_inner_mut().as_mut_ptr(), src.as_ptr(), 0); + if ret < 0 { + return Err(ffmpeg_next::Error::from(ret)); + } + let ret = av_frame_copy_props(dst.as_inner_mut().as_mut_ptr(), src.as_ptr()); + if ret < 0 { + return Err(ffmpeg_next::Error::from(ret)); + } + } + Ok(()) +} + /// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame` /// and must not be treated as backend failures. fn is_transient(e: &ffmpeg_next::Error) -> bool { diff --git a/src/frame.rs b/src/frame.rs new file mode 100644 index 0000000..f221b68 --- /dev/null +++ b/src/frame.rs @@ -0,0 +1,109 @@ +//! CPU-side decoded video frame. +//! +//! Wraps `ffmpeg_next::frame::Video` so callers cannot reach the upstream +//! `format()` accessor, which constructs an `AVPixelFormat` enum from the +//! raw integer FFmpeg writes into `AVFrame.format`. That conversion is UB +//! when the value isn't in the bindgen-generated enum (library/header skew, +//! a new pixel format added upstream, etc.). The wrapper exposes +//! [`Frame::pix_fmt`] which reads the field as a plain `i32` — sound for any +//! value FFmpeg can produce — and accessors are limited to fields whose +//! reads do not invoke the same hazard. +//! +//! Compare formats against integer constants taken from the FFI layer, e.g. +//! +//! ```ignore +//! use ffmpeg_next::ffi::AVPixelFormat; +//! if frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32 { ... } +//! ``` + +use ffmpeg_next::frame; + +/// CPU-side decoded video frame produced by [`crate::VideoDecoder`]. +pub struct Frame { + inner: frame::Video, +} + +impl Frame { + /// Construct an empty frame, suitable as the destination passed to + /// [`crate::VideoDecoder::receive_frame`]. + pub fn empty() -> Self { + Self { + inner: frame::Video::empty(), + } + } + + /// Width in pixels. + pub fn width(&self) -> u32 { + self.inner.width() + } + + /// Height in pixels. + pub fn height(&self) -> u32 { + self.inner.height() + } + + /// Pixel format, returned as the raw `i32` value FFmpeg wrote to + /// `AVFrame.format`. Sound regardless of the linked FFmpeg version — + /// no `AVPixelFormat` enum is constructed. + /// + /// Compare against integer constants from `ffmpeg_next::ffi`, e.g. + /// `frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32`. + pub fn pix_fmt(&self) -> i32 { + // SAFETY: `AVFrame.format` is bound as `c_int`; reading it yields a + // plain integer with no validity invariants. + unsafe { (*self.inner.as_ptr()).format } + } + + /// Presentation timestamp in stream time base, or `None` if the frame + /// carries `AV_NOPTS_VALUE`. + pub fn pts(&self) -> Option { + self.inner.pts() + } + + /// Number of populated planes (e.g. 3 for `YUV420P`, 2 for `NV12`). + pub fn planes(&self) -> usize { + self.inner.planes() + } + + /// Bytes per row for `plane`. Panics if `plane >= planes()`. + pub fn stride(&self, plane: usize) -> usize { + self.inner.stride(plane) + } + + /// Pixel data for `plane`. Panics if `plane >= planes()`. + pub fn data(&self, plane: usize) -> &[u8] { + self.inner.data(plane) + } + + /// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code. + pub(crate) fn as_inner_mut(&mut self) -> &mut frame::Video { + &mut self.inner + } +} + +impl Default for Frame { + fn default() -> Self { + Self::empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_frame_has_zero_dimensions_and_no_pts() { + let f = Frame::empty(); + assert_eq!(f.width(), 0); + assert_eq!(f.height(), 0); + assert_eq!(f.pts(), None); + // AVFrame.format defaults to -1 (AV_PIX_FMT_NONE) for an empty frame. + assert_eq!(f.pix_fmt(), -1); + } + + #[test] + fn frame_is_send() { + fn check() {} + check::(); + } +} diff --git a/src/lib.rs b/src/lib.rs index 7d9c7bd..69a0660 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,9 @@ mod backend; mod decoder; mod error; mod ffi; +mod frame; pub use backend::Backend; pub use decoder::VideoDecoder; pub use error::{Error, Result}; +pub use frame::Frame; diff --git a/tests/decode.rs b/tests/decode.rs index a936ae3..bc15f30 100644 --- a/tests/decode.rs +++ b/tests/decode.rs @@ -4,9 +4,9 @@ //! //! Set `HWDECODE_SAMPLE_VIDEO` to an absolute path to enable. -use ffmpeg::{format, frame, media}; +use ffmpeg::{format, media}; use ffmpeg_next as ffmpeg; -use hwdecode::VideoDecoder; +use hwdecode::{Frame, VideoDecoder}; const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; @@ -29,12 +29,12 @@ fn auto_open_decodes_at_least_one_frame() { let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 }; let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder"); - eprintln!("backend = {:?}", decoder.backend()); + eprintln!("optimistic backend = {:?}", decoder.backend()); assert_eq!(decoder.width(), expected_w); assert_eq!(decoder.height(), expected_h); - let mut frame = frame::Video::empty(); + let mut frame = Frame::empty(); let mut count = 0_usize; let target = 30_usize; @@ -64,5 +64,5 @@ fn auto_open_decodes_at_least_one_frame() { } assert!(count >= 1, "expected at least 1 decoded frame, got {count}"); - eprintln!("decoded {count} frames"); + eprintln!("decoded {count} frames via backend {:?}", decoder.backend()); } diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs index 3084faf..e734533 100644 --- a/tests/hw_smoke.rs +++ b/tests/hw_smoke.rs @@ -5,9 +5,9 @@ //! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored //! ``` -use ffmpeg::{format, frame, media}; +use ffmpeg::{format, media}; use ffmpeg_next as ffmpeg; -use hwdecode::{Backend, VideoDecoder}; +use hwdecode::{Backend, Frame, VideoDecoder}; const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; @@ -32,7 +32,7 @@ fn auto_probe_picks_hardware_backend() { // backend that actually produced it. Checking `decoder.backend()` before // any frame has been received would observe the optimistic pre-probe // value and could false-pass when a HW backend silently degrades. - let mut frame = frame::Video::empty(); + let mut frame = Frame::empty(); let mut got_frame = false; for (s, packet) in input.packets() { if s.index() != stream_index { @@ -43,11 +43,11 @@ fn auto_probe_picks_hardware_backend() { Ok(()) => { got_frame = true; eprintln!( - "first frame: backend={:?} {}x{} fmt={:?}", + "first frame: backend={:?} {}x{} pix_fmt={}", decoder.backend(), frame.width(), frame.height(), - frame.format() + frame.pix_fmt() ); break; } From 979a2bfbe4a3ad87d1bfd1dbcc52d517c07a40bf Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 16:54:03 +1200 Subject: [PATCH 04/27] update --- benches/decode.rs | 113 +++++++++++++++++++++---------- examples/decode.rs | 16 ++++- src/backend.rs | 90 ++++++++++--------------- src/decoder.rs | 164 +++++++++++++++++---------------------------- src/error.rs | 7 +- src/lib.rs | 1 + src/pix_fmt.rs | 113 +++++++++++++++++++++++++++++++ tests/decode.rs | 12 +++- tests/hw_smoke.rs | 9 +-- 9 files changed, 319 insertions(+), 206 deletions(-) create mode 100644 src/pix_fmt.rs diff --git a/benches/decode.rs b/benches/decode.rs index 82d0ba9..5f53a66 100644 --- a/benches/decode.rs +++ b/benches/decode.rs @@ -1,8 +1,9 @@ -//! Benchmark comparing software-only decode against the auto-probed +//! Benchmark comparing software-only decode (via `ffmpeg-next` directly, +//! since `hwdecode` is hardware-only) against `hwdecode`'s auto-probed //! hardware backend on the same input file. //! //! Set `HWDECODE_SAMPLE_VIDEO` to a video file path. The hardware bench is -//! skipped (with a notice) when the auto-probe falls back to software. +//! skipped (with a notice) when no hardware backend is available on the host. //! //! ```sh //! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench @@ -11,9 +12,9 @@ use std::{path::PathBuf, time::Duration}; use criterion::{criterion_group, criterion_main, Criterion}; -use ffmpeg::{format, media}; +use ffmpeg::{codec::Context as CodecContext, format, frame, media}; use ffmpeg_next as ffmpeg; -use hwdecode::{Backend, Frame, VideoDecoder}; +use hwdecode::{Frame, VideoDecoder}; const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; @@ -21,9 +22,8 @@ fn sample_path() -> Option { std::env::var_os(SAMPLE_ENV).map(PathBuf::from) } -/// Decode every video frame in the file using `decoder`, returning the count. -/// Re-opens the input each call so each iteration measures a full decode pass. -fn decode_all(path: &PathBuf, backend: Backend) -> Result { +/// Decode every frame using `hwdecode`'s auto-probed hardware backend. +fn decode_all_hw(path: &PathBuf) -> Result { let mut input = format::input(path).map_err(hwdecode::Error::Ffmpeg)?; let stream = input .streams() @@ -31,11 +31,7 @@ fn decode_all(path: &PathBuf, backend: Backend) -> Result VideoDecoder::open_with(stream.parameters(), Backend::Software)?, - _ => VideoDecoder::open(stream.parameters())?, - }; - + let mut decoder = VideoDecoder::open(stream.parameters())?; let mut frame = Frame::empty(); let mut count = 0_usize; @@ -66,6 +62,46 @@ fn decode_all(path: &PathBuf, backend: Backend) -> Result Result { + let mut input = format::input(path)?; + let stream = input + .streams() + .best(media::Type::Video) + .ok_or(ffmpeg::Error::StreamNotFound)?; + let stream_index = stream.index(); + let mut decoder = CodecContext::from_parameters(stream.parameters())? + .decoder() + .video()?; + + let mut frame = frame::Video::empty(); + let mut count = 0_usize; + + let mut drain = + |decoder: &mut ffmpeg::decoder::Video, count: &mut usize| -> Result<(), ffmpeg::Error> { + loop { + match decoder.receive_frame(&mut frame) { + Ok(()) => *count += 1, + Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::error::EAGAIN => return Ok(()), + Err(ffmpeg::Error::Eof) => return Ok(()), + Err(e) => return Err(e), + } + } + }; + + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet)?; + drain(&mut decoder, &mut count)?; + } + decoder.send_eof()?; + drain(&mut decoder, &mut count)?; + Ok(count) +} + fn bench_decode(c: &mut Criterion) { ffmpeg::init().expect("ffmpeg init"); @@ -75,9 +111,8 @@ fn bench_decode(c: &mut Criterion) { }; // Probe by decoding one frame so the probe collapses to the backend that - // actually produced output. Reading `backend()` before the first frame - // would observe the optimistically-selected value and mislabel HW runs - // that silently degraded. + // actually produced output. None means no HW backend is available — we + // skip the HW arm and bench SW only. let probed_backend = { let mut input = format::input(&path).expect("open input"); let stream = input @@ -85,44 +120,50 @@ fn bench_decode(c: &mut Criterion) { .best(media::Type::Video) .expect("video stream"); let stream_index = stream.index(); - let mut dec = VideoDecoder::open(stream.parameters()).expect("auto-probe"); - let mut frame = Frame::empty(); - 'probe: for (s, packet) in input.packets() { - if s.index() != stream_index { - continue; - } - dec.send_packet(&packet).expect("probe send_packet"); - match dec.receive_frame(&mut frame) { - Ok(()) => break 'probe, - Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) - if errno == ffmpeg::error::EAGAIN => - { - continue; + match VideoDecoder::open(stream.parameters()) { + Ok(mut dec) => { + let mut frame = Frame::empty(); + 'probe: for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + dec.send_packet(&packet).expect("probe send_packet"); + match dec.receive_frame(&mut frame) { + Ok(()) => break 'probe, + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + continue; + } + Err(e) => panic!("probe receive_frame: {e}"), + } } - Err(e) => panic!("probe receive_frame: {e}"), + Some(dec.backend()) } + Err(hwdecode::Error::AllBackendsFailed { .. }) => None, + Err(e) => panic!("hwdecode probe: {e}"), } - dec.backend() }; - eprintln!("auto-probe settled on backend: {probed_backend:?}"); + match probed_backend { + Some(b) => eprintln!("auto-probe settled on backend: {b:?}"), + None => eprintln!("no hardware backend available — hardware bench will be skipped"), + } let mut group = c.benchmark_group("decode"); group.measurement_time(Duration::from_secs(15)); group.sample_size(20); group.bench_function("software", |b| { - b.iter(|| decode_all(&path, Backend::Software).expect("software decode")) + b.iter(|| decode_all_sw(&path).expect("software decode")) }); - if probed_backend != Backend::Software { + if probed_backend.is_some() { group.bench_function("hardware", |b| { b.iter(|| { - let n = decode_all(&path, probed_backend).expect("hardware decode"); + let n = decode_all_hw(&path).expect("hardware decode"); std::hint::black_box(n); }) }); - } else { - eprintln!("skipping hardware bench: auto-probe fell back to Software"); } group.finish(); diff --git a/examples/decode.rs b/examples/decode.rs index 69763bf..a1439d7 100644 --- a/examples/decode.rs +++ b/examples/decode.rs @@ -22,7 +22,21 @@ fn main() -> Result<(), Box> { .ok_or("no video stream")?; let stream_index = stream.index(); - let mut decoder = VideoDecoder::open(stream.parameters())?; + let mut decoder = match VideoDecoder::open(stream.parameters()) { + Ok(d) => d, + Err(hwdecode::Error::AllBackendsFailed { attempts }) => { + eprintln!( + "no hardware backend available; tried {} backend(s):", + attempts.len() + ); + for (b, e) in &attempts { + eprintln!(" {b:?}: {e}"); + } + eprintln!("(callers handle software fallback themselves — see ffmpeg::decoder::Video)"); + return Ok(()); + } + Err(e) => return Err(e.into()), + }; println!( "open: backend={:?} {}x{}", decoder.backend(), diff --git a/src/backend.rs b/src/backend.rs index cfcd48b..bce8699 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,11 +1,15 @@ use ffmpeg_next::{ffi::AVHWDeviceType, format::Pixel}; -/// Decoding backend selected (or forced) for a [`crate::VideoDecoder`]. +/// Hardware decoding backend. +/// +/// `hwdecode` only manages **hardware** decoders — software fallback is +/// out of scope. If no backend in [`probe_order`] for the current platform +/// can decode a stream, [`crate::VideoDecoder::open`] returns +/// [`crate::Error::AllBackendsFailed`] and the caller decides how to fall +/// back (e.g. by opening an `ffmpeg::decoder::Video` directly). #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub enum Backend { - /// Pure software decode via libavcodec. - Software, - /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS). + /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS, visionOS). VideoToolbox, /// Linux Video Acceleration API (Intel / AMD GPUs). Vaapi, @@ -16,37 +20,33 @@ pub enum Backend { } impl Backend { - /// `AVHWDeviceType` corresponding to this backend, or `None` for - /// [`Backend::Software`]. - pub(crate) fn av_hwdevice_type(self) -> Option { + /// `AVHWDeviceType` corresponding to this backend. + pub(crate) fn av_hwdevice_type(self) -> AVHWDeviceType { match self { - Self::Software => None, - Self::VideoToolbox => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX), - Self::Vaapi => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI), - Self::Cuda => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA), - Self::D3d11va => Some(AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA), + Self::VideoToolbox => AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX, + Self::Vaapi => AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI, + Self::Cuda => AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA, + Self::D3d11va => AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA, } } /// Hardware pixel format the codec is expected to produce when this - /// backend is in use. Used to inspect the result of `get_format`. - /// `None` for [`Backend::Software`]. + /// backend is in use. (The post-`av_hwframe_transfer_data` CPU format is + /// typically `NV12` or `P010LE`; this is the *pre-transfer* sentinel.) #[allow(dead_code)] // surfaced for tests / future use - pub(crate) fn hw_pixel_format(self) -> Option { + pub(crate) fn hw_pixel_format(self) -> Pixel { match self { - Self::Software => None, - Self::VideoToolbox => Some(Pixel::VIDEOTOOLBOX), - Self::Vaapi => Some(Pixel::VAAPI), - Self::Cuda => Some(Pixel::CUDA), - Self::D3d11va => Some(Pixel::D3D11), + Self::VideoToolbox => Pixel::VIDEOTOOLBOX, + Self::Vaapi => Pixel::VAAPI, + Self::Cuda => Pixel::CUDA, + Self::D3d11va => Pixel::D3D11, } } } -/// Probe order for `VideoDecoder::open` on the current target. -/// -/// Always ends in [`Backend::Software`]; auto-probe never returns an empty -/// list. Order is fixed at compile time per `target_os`. +/// Probe order for `VideoDecoder::open` on the current target. Hardware +/// backends only, in preference order. Empty for platforms with no known +/// HW backend; on those `open()` returns `AllBackendsFailed` immediately. pub(crate) fn probe_order() -> &'static [Backend] { #[cfg(any( target_os = "macos", @@ -55,15 +55,15 @@ pub(crate) fn probe_order() -> &'static [Backend] { target_os = "visionos", ))] { - &[Backend::VideoToolbox, Backend::Software] + &[Backend::VideoToolbox] } #[cfg(target_os = "linux")] { - &[Backend::Vaapi, Backend::Cuda, Backend::Software] + &[Backend::Vaapi, Backend::Cuda] } #[cfg(target_os = "windows")] { - &[Backend::D3d11va, Backend::Cuda, Backend::Software] + &[Backend::D3d11va, Backend::Cuda] } #[cfg(not(any( target_os = "macos", @@ -74,7 +74,7 @@ pub(crate) fn probe_order() -> &'static [Backend] { target_os = "windows", )))] { - &[Backend::Software] + &[] } } @@ -83,55 +83,33 @@ mod tests { use super::*; #[test] - fn probe_order_ends_in_software() { - let order = probe_order(); - assert!(!order.is_empty()); - assert_eq!(*order.last().unwrap(), Backend::Software); - } - - #[test] - fn software_has_no_av_hwdevice_type() { - assert!(Backend::Software.av_hwdevice_type().is_none()); - assert!(Backend::Software.hw_pixel_format().is_none()); - } - - #[test] - fn hw_backends_have_av_hwdevice_type() { + fn all_backends_have_hwdevice_type_and_pix_fmt() { for b in [ Backend::VideoToolbox, Backend::Vaapi, Backend::Cuda, Backend::D3d11va, ] { - assert!( - b.av_hwdevice_type().is_some(), - "{b:?} missing hwdevice type" - ); - assert!(b.hw_pixel_format().is_some(), "{b:?} missing hw pix fmt"); + let _ = b.av_hwdevice_type(); + let _ = b.hw_pixel_format(); } } #[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))] #[test] fn apple_probe_order() { - assert_eq!(probe_order(), &[Backend::VideoToolbox, Backend::Software]); + assert_eq!(probe_order(), &[Backend::VideoToolbox]); } #[cfg(target_os = "linux")] #[test] fn linux_probe_order() { - assert_eq!( - probe_order(), - &[Backend::Vaapi, Backend::Cuda, Backend::Software] - ); + assert_eq!(probe_order(), &[Backend::Vaapi, Backend::Cuda]); } #[cfg(target_os = "windows")] #[test] fn windows_probe_order() { - assert_eq!( - probe_order(), - &[Backend::D3d11va, Backend::Cuda, Backend::Software] - ); + assert_eq!(probe_order(), &[Backend::D3d11va, Backend::Cuda]); } } diff --git a/src/decoder.rs b/src/decoder.rs index a776697..09f5c67 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -3,8 +3,8 @@ use std::{mem::ManuallyDrop, ptr}; use ffmpeg_next::{ codec::{self, Context}, ffi::{ - av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, - av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVPixelFormat, + av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_unref, av_hwdevice_ctx_create, + av_hwframe_transfer_data, AVBufferRef, }, frame, Codec, Packet, Rational, }; @@ -52,15 +52,11 @@ struct DecoderState { inner: ManuallyDrop, /// Backend driving this state. backend: Backend, - /// Owned reference produced by `av_hwdevice_ctx_create`. Null for software. + /// Owned reference produced by `av_hwdevice_ctx_create`. hw_device_ref: *mut AVBufferRef, - /// Owned `Box` raw pointer; `AVCodecContext::opaque` aliases - /// it. Null for software. + /// Owned `Box` raw pointer; `AVCodecContext::opaque` + /// aliases it. callback_state: *mut CallbackState, - /// Hardware pixel format we asked the decoder to produce. Compared (as - /// `i32` to avoid enum-discriminant UB) against each received frame's - /// format. `AV_PIX_FMT_NONE` for the software path. - hw_pix_fmt: AVPixelFormat, } /// State carried only during the probe window (before the first successful @@ -223,17 +219,22 @@ impl VideoDecoder { /// Receive a CPU-side decoded frame. /// - /// On the hardware path the frame is downloaded with - /// `av_hwframe_transfer_data` and metadata is copied via - /// `av_frame_copy_props`. The caller's frame is always unref'd first, so - /// reuse across resolution changes or different decoders is safe. + /// The frame is downloaded with `av_hwframe_transfer_data` and metadata + /// is copied via `av_frame_copy_props`. The caller's frame is always + /// unref'd first, so reuse across resolution changes or different + /// decoders is safe. /// /// While the probe window is open, *any* non-transient failure (decode /// error, transfer error, copy_props error, or a CPU-format frame from a /// HW-opened context) tears down the current decoder and advances to the - /// next backend in probe order, replaying buffered packets through it. - /// The caller observes only the eventual successful frame (or, if every - /// backend has been exhausted, the underlying error). + /// next hardware backend in probe order, replaying buffered packets + /// through it. The caller observes only the eventual successful frame + /// (or, if every backend has been exhausted, the underlying error). + /// + /// This crate is hardware-only: there is no software fallback inside the + /// decoder. If every backend is exhausted, the failure surfaces as the + /// last decoder error (or [`Error::HwBackendProducedSwFrame`] for the + /// degraded-CPU-frame case). Callers handle software fallback themselves. /// /// Returns the same transient signals as `ffmpeg::decoder::Video`: /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and @@ -252,60 +253,25 @@ impl VideoDecoder { return Err(Error::Ffmpeg(e)); } Ok(()) => { - // Read AVFrame.format as i32 — avoid constructing an - // AVPixelFormat enum from a raw integer (UB on library/header skew). - let received_fmt: i32 = unsafe { (*self.hw_frame.as_ptr()).format }; - - if self.state.backend == Backend::Software { - unsafe { - av_frame_unref(frame.as_inner_mut().as_mut_ptr()); - av_frame_move_ref( - frame.as_inner_mut().as_mut_ptr(), - self.hw_frame.as_mut_ptr(), - ); + // Always attempt the HW→CPU transfer. With strict `get_format`, + // libavcodec can only deliver frames in the wired-up HW format + // (or fail). If a misbehaving codec ever hands us a CPU-side + // frame anyway, `av_hwframe_transfer_data` returns AVERROR(EINVAL) + // (neither src nor dst has an AVHWFramesContext attached) and we + // route through the same error path below. + match unsafe { transfer_hw_frame(frame, &mut self.hw_frame) } { + Ok(()) => { + self.probe = None; + return Ok(()); } - self.probe = None; - return Ok(()); - } - - if received_fmt == self.state.hw_pix_fmt as i32 { - // True HW frame: try to download to CPU. - let transfer_result = unsafe { transfer_hw_frame(frame, &mut self.hw_frame) }; - match transfer_result { - Ok(()) => { - self.probe = None; - return Ok(()); - } - Err(e) => { - // Transfer failures during the probe window are also - // backend-level failures — try the next backend. - if self.probe.is_some() && self.advance_probe()? { - unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) }; - continue; - } - return Err(Error::Ffmpeg(e)); + Err(e) => { + if self.probe.is_some() && self.advance_probe()? { + unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) }; + continue; } + return Err(Error::Ffmpeg(e)); } } - - // Decoder produced a CPU frame from a HW-opened context. With - // strict `get_format` this only happens if the codec ignores it - // (uncommon). Treat as a probe failure if we still have backends. - if self.probe.is_some() && self.advance_probe()? { - continue; - } - // No fallback left; accept the SW frame and update the active - // backend so `backend()` reflects reality. - unsafe { - av_frame_unref(frame.as_inner_mut().as_mut_ptr()); - av_frame_move_ref( - frame.as_inner_mut().as_mut_ptr(), - self.hw_frame.as_mut_ptr(), - ); - } - self.state.backend = Backend::Software; - self.probe = None; - return Ok(()); } } } @@ -409,52 +375,43 @@ impl VideoDecoder { } } - /// Build raw FFmpeg state for one backend. Strict `get_format` (NONE on - /// missing HW format); cross-backend fallback is the caller's job. + /// Build raw FFmpeg state for one hardware backend. Strict `get_format` + /// (NONE on missing HW format); cross-backend fallback is the caller's job. fn build_state( parameters: codec::Parameters, codec: Codec, backend: Backend, ) -> Result { let mut ctx = Context::from_parameters(parameters)?; + let av_type = backend.av_hwdevice_type(); - let (hw_device_ref, callback_state, hw_pix_fmt) = match backend.av_hwdevice_type() { - None => ( - ptr::null_mut(), - ptr::null_mut(), - AVPixelFormat::AV_PIX_FMT_NONE, - ), - Some(av_type) => { - // Verify the codec advertises this hwaccel. - let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type) - .ok_or(Error::BackendUnsupportedByCodec(backend))?; - - // Create the device context. - let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut(); - // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill. - let ret = unsafe { - av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0) - }; - if ret < 0 { - return Err(Error::HwDeviceInitFailed { - backend, - source: ffmpeg_next::Error::from(ret), - }); - } + // Verify the codec advertises this hwaccel. + let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type) + .ok_or(Error::BackendUnsupportedByCodec(backend))?; - let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt })); - // SAFETY: ctx is a freshly-constructed AVCodecContext we own; - // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's - // use (we keep our own ref in `hw_device_ref` for cleanup). - unsafe { - let raw = ctx.as_mut_ptr(); - (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref); - (*raw).opaque = callback_state.cast(); - (*raw).get_format = Some(get_hw_format); - } - (hw_device_ref, callback_state, hw_pix_fmt) - } + // Create the device context. + let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut(); + // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill. + let ret = unsafe { + av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0) }; + if ret < 0 { + return Err(Error::HwDeviceInitFailed { + backend, + source: ffmpeg_next::Error::from(ret), + }); + } + + let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt })); + // SAFETY: ctx is a freshly-constructed AVCodecContext we own; + // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's + // use (we keep our own ref in `hw_device_ref` for cleanup). + unsafe { + let raw = ctx.as_mut_ptr(); + (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref); + (*raw).opaque = callback_state.cast(); + (*raw).get_format = Some(get_hw_format); + } // Open the decoder. On any failure, release the resources we just // allocated so we don't leak. @@ -482,7 +439,6 @@ impl VideoDecoder { backend, hw_device_ref, callback_state, - hw_pix_fmt, }) } } diff --git a/src/error.rs b/src/error.rs index 92cb2d1..ef5373c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -28,8 +28,11 @@ pub enum Error { source: ffmpeg_next::Error, }, - /// Auto-probe exhausted every backend in the platform's order. - #[error("all backends failed; attempts: {attempts:?}")] + /// Auto-probe exhausted every backend in the platform's order. Empty + /// `attempts` means the platform has no hardware backends listed in + /// [`crate::Backend`] for the current `target_os` — callers must + /// fall back to a software decoder of their choice. + #[error("all hardware backends failed; attempts: {attempts:?}")] AllBackendsFailed { /// Per-backend errors collected during probing, in the order tried. attempts: Vec<(Backend, Box)>, diff --git a/src/lib.rs b/src/lib.rs index 69a0660..e6c12ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,7 @@ mod decoder; mod error; mod ffi; mod frame; +pub mod pix_fmt; pub use backend::Backend; pub use decoder::VideoDecoder; diff --git a/src/pix_fmt.rs b/src/pix_fmt.rs new file mode 100644 index 0000000..f3c594e --- /dev/null +++ b/src/pix_fmt.rs @@ -0,0 +1,113 @@ +//! Stable `i32` constants for the pixel formats produced by `hwdecode`'s +//! hardware decoders after `av_hwframe_transfer_data`. +//! +//! `Frame::pix_fmt()` returns the raw integer FFmpeg wrote to `AVFrame.format` +//! (as a plain `i32` to avoid the enum-construction UB that an unvalidated +//! cast would invoke). This module names the constants relevant to dispatch +//! after a successful hardware decode. +//! +//! Because `hwdecode` is hardware-only, the formats listed here cover what +//! the supported HW backends actually produce — the **NV** family (semi- +//! planar 8-bit) and the **P0xx / P2xx / P4xx** family (semi-planar 10/12/16 +//! bit). VideoToolbox, VAAPI, NVDEC, and D3D11VA all download into one of +//! these. +//! +//! Software-decoder output formats (`YUV420P`, `YUV422P`, `RGB24`, etc.) are +//! intentionally **not** listed: callers handle software fallback outside +//! this crate, and dispatch tables for those formats belong with the SW +//! pipeline. +//! +//! For values not listed here, write `AVPixelFormat::AV_PIX_FMT_X as i32` +//! directly — that's exactly the cast we use to define these constants. +//! +//! ```ignore +//! use hwdecode::{pix_fmt, Frame}; +//! match frame.pix_fmt() { +//! pix_fmt::NV12 => /* 8-bit 4:2:0 → colconv::frame::Nv12Frame */, +//! pix_fmt::P010LE => /* 10-bit 4:2:0 → colconv::frame::PnFrame<10> */, +//! other => unimplemented!("pix_fmt {other}"), +//! } +//! ``` + +use ffmpeg_next::ffi::AVPixelFormat; + +// --- semi-planar YUV (NV*) — 8-bit hardware download outputs ---------------- + +/// 4:2:0, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV12`). The +/// dominant 8-bit HW download format on every supported backend. +pub const NV12: i32 = AVPixelFormat::AV_PIX_FMT_NV12 as i32; +/// 4:2:0, 8-bit, Y plane + interleaved Cr/Cb (`AV_PIX_FMT_NV21`). +pub const NV21: i32 = AVPixelFormat::AV_PIX_FMT_NV21 as i32; +/// 4:2:2, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV16`). +pub const NV16: i32 = AVPixelFormat::AV_PIX_FMT_NV16 as i32; +/// 4:4:4, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV24`). +pub const NV24: i32 = AVPixelFormat::AV_PIX_FMT_NV24 as i32; + +// --- semi-planar YUV (P0xx) — 4:2:0 high-bit-depth HW downloads ------------- + +/// 4:2:0, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P010LE`). The +/// dominant 10-bit HW download format. +pub const P010LE: i32 = AVPixelFormat::AV_PIX_FMT_P010LE as i32; +/// 4:2:0, 10-bit, semi-planar big-endian (`AV_PIX_FMT_P010BE`). +pub const P010BE: i32 = AVPixelFormat::AV_PIX_FMT_P010BE as i32; +/// 4:2:0, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P012LE`). +pub const P012LE: i32 = AVPixelFormat::AV_PIX_FMT_P012LE as i32; +/// 4:2:0, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P016LE`). +pub const P016LE: i32 = AVPixelFormat::AV_PIX_FMT_P016LE as i32; + +// --- semi-planar YUV (P2xx) — 4:2:2 high-bit-depth HW downloads ------------- + +/// 4:2:2, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P210LE`). +pub const P210LE: i32 = AVPixelFormat::AV_PIX_FMT_P210LE as i32; +/// 4:2:2, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P212LE`, FFmpeg 5.0+). +pub const P212LE: i32 = AVPixelFormat::AV_PIX_FMT_P212LE as i32; +/// 4:2:2, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P216LE`). +pub const P216LE: i32 = AVPixelFormat::AV_PIX_FMT_P216LE as i32; + +// --- semi-planar YUV (P4xx) — 4:4:4 high-bit-depth HW downloads ------------- + +/// 4:4:4, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P410LE`). +pub const P410LE: i32 = AVPixelFormat::AV_PIX_FMT_P410LE as i32; +/// 4:4:4, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P412LE`, FFmpeg 5.0+). +pub const P412LE: i32 = AVPixelFormat::AV_PIX_FMT_P412LE as i32; +/// 4:4:4, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P416LE`). +pub const P416LE: i32 = AVPixelFormat::AV_PIX_FMT_P416LE as i32; + +// --- sentinel --------------------------------------------------------------- + +/// Sentinel value FFmpeg writes to `AVFrame.format` for an unset frame +/// (`AV_PIX_FMT_NONE`). [`crate::Frame::empty`] returns this until the frame +/// is filled by a decoder. +pub const NONE: i32 = AVPixelFormat::AV_PIX_FMT_NONE as i32; + +#[cfg(test)] +mod tests { + use super::*; + + /// Regression check: if the underlying `AVPixelFormat` discriminants ever + /// change in `ffmpeg-sys-next`'s bindings, this catches it. + #[test] + fn constants_match_bindings() { + assert_eq!(NV12, AVPixelFormat::AV_PIX_FMT_NV12 as i32); + assert_eq!(P010LE, AVPixelFormat::AV_PIX_FMT_P010LE as i32); + assert_eq!(P416LE, AVPixelFormat::AV_PIX_FMT_P416LE as i32); + assert_eq!(NONE, -1, "AV_PIX_FMT_NONE must be -1 (FFmpeg ABI sentinel)"); + } + + #[test] + fn match_dispatch_compiles() { + fn classify(v: i32) -> &'static str { + match v { + NV12 => "nv12", + NV21 => "nv21", + P010LE => "p010le", + P210LE => "p210le", + P410LE => "p410le", + _ => "other", + } + } + assert_eq!(classify(NV12), "nv12"); + assert_eq!(classify(P010LE), "p010le"); + assert_eq!(classify(NONE), "other"); + } +} diff --git a/tests/decode.rs b/tests/decode.rs index bc15f30..10a8bcb 100644 --- a/tests/decode.rs +++ b/tests/decode.rs @@ -28,7 +28,17 @@ fn auto_open_decodes_at_least_one_frame() { let expected_w = unsafe { (*stream.parameters().as_ptr()).width as u32 }; let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 }; - let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder"); + let mut decoder = match VideoDecoder::open(stream.parameters()) { + Ok(d) => d, + Err(hwdecode::Error::AllBackendsFailed { attempts }) => { + eprintln!( + "skipping: no hardware backend available ({} attempts)", + attempts.len() + ); + return; + } + Err(e) => panic!("open decoder: {e}"), + }; eprintln!("optimistic backend = {:?}", decoder.backend()); assert_eq!(decoder.width(), expected_w); diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs index e734533..6e11765 100644 --- a/tests/hw_smoke.rs +++ b/tests/hw_smoke.rs @@ -7,7 +7,7 @@ use ffmpeg::{format, media}; use ffmpeg_next as ffmpeg; -use hwdecode::{Backend, Frame, VideoDecoder}; +use hwdecode::{Frame, VideoDecoder}; const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; @@ -60,9 +60,6 @@ fn auto_probe_picks_hardware_backend() { } } assert!(got_frame, "no frames decoded"); - assert_ne!( - decoder.backend(), - Backend::Software, - "expected hardware backend after first frame; got Software" - ); + // hwdecode is hardware-only — `backend()` after a successful first frame + // is by construction one of the HW variants. Logged above for visibility. } From 189b6ba0161e29af5ae976593973d7c7fdd0c3fc Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 17:15:59 +1200 Subject: [PATCH 05/27] update --- src/backend.rs | 17 +++--- src/decoder.rs | 38 +++++++++--- src/ffi.rs | 156 +++++++++++++++++++++++++++++++++-------------- src/frame.rs | 162 ++++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 284 insertions(+), 89 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index bce8699..00cf82e 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,4 +1,4 @@ -use ffmpeg_next::{ffi::AVHWDeviceType, format::Pixel}; +use ffmpeg_next::ffi::{AVHWDeviceType, AVPixelFormat}; /// Hardware decoding backend. /// @@ -33,13 +33,16 @@ impl Backend { /// Hardware pixel format the codec is expected to produce when this /// backend is in use. (The post-`av_hwframe_transfer_data` CPU format is /// typically `NV12` or `P010LE`; this is the *pre-transfer* sentinel.) - #[allow(dead_code)] // surfaced for tests / future use - pub(crate) fn hw_pixel_format(self) -> Pixel { + /// + /// Returns a `AVPixelFormat` value constructed from a hardcoded constant + /// in our bindings — never reads an enum value supplied by FFmpeg, so + /// no enum-discriminant UB risk. + pub(crate) fn hw_pixel_format(self) -> AVPixelFormat { match self { - Self::VideoToolbox => Pixel::VIDEOTOOLBOX, - Self::Vaapi => Pixel::VAAPI, - Self::Cuda => Pixel::CUDA, - Self::D3d11va => Pixel::D3D11, + Self::VideoToolbox => AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, + Self::Vaapi => AVPixelFormat::AV_PIX_FMT_VAAPI, + Self::Cuda => AVPixelFormat::AV_PIX_FMT_CUDA, + Self::D3d11va => AVPixelFormat::AV_PIX_FMT_D3D11, } } } diff --git a/src/decoder.rs b/src/decoder.rs index 09f5c67..d6cffe3 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -12,7 +12,7 @@ use ffmpeg_next::{ use crate::{ backend::{self, Backend}, error::{Error, Result}, - ffi::{find_hw_pix_fmt, get_hw_format, CallbackState}, + ffi::{codec_supports_hwaccel, get_hw_format, CallbackState}, frame::Frame, }; @@ -199,22 +199,32 @@ impl VideoDecoder { self.state.inner.frame_rate() } - /// Submit a packet to the decoder. While the probe is active the packet is - /// also buffered for potential replay through a fallback backend. + /// Submit a packet to the decoder. On success — and only on success — + /// the packet is buffered for potential replay through a fallback backend + /// while the probe is active. A failed send (including EAGAIN) does not + /// mutate replay state, so a later probe advance only replays history + /// FFmpeg actually accepted. pub fn send_packet(&mut self, packet: &Packet) -> Result<()> { + self + .state + .inner + .send_packet(packet) + .map_err(Error::Ffmpeg)?; if let Some(probe) = self.probe.as_mut() { probe.buffered_packets.push(packet.clone()); } - self.state.inner.send_packet(packet).map_err(Error::Ffmpeg) + Ok(()) } /// Signal end-of-stream to the decoder; remaining frames can be drained - /// with [`Self::receive_frame`]. Recorded for replay if probe is active. + /// with [`Self::receive_frame`]. Recorded for replay only if the underlying + /// `send_eof` succeeds. pub fn send_eof(&mut self) -> Result<()> { + self.state.inner.send_eof().map_err(Error::Ffmpeg)?; if let Some(probe) = self.probe.as_mut() { probe.eof_sent = true; } - self.state.inner.send_eof().map_err(Error::Ffmpeg) + Ok(()) } /// Receive a CPU-side decoded frame. @@ -385,9 +395,14 @@ impl VideoDecoder { let mut ctx = Context::from_parameters(parameters)?; let av_type = backend.av_hwdevice_type(); - // Verify the codec advertises this hwaccel. - let hw_pix_fmt = find_hw_pix_fmt(unsafe { codec.as_ptr() }, av_type) - .ok_or(Error::BackendUnsupportedByCodec(backend))?; + // Verify the codec advertises this hwaccel. We do *not* read the + // codec's advertised pix_fmt — we use the hardcoded constant from + // `Backend::hw_pixel_format` so no FFmpeg-supplied enum value is ever + // interpreted as `AVPixelFormat`. + if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type) { + return Err(Error::BackendUnsupportedByCodec(backend)); + } + let hw_pix_fmt = backend.hw_pixel_format(); // Create the device context. let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut(); @@ -402,7 +417,10 @@ impl VideoDecoder { }); } - let callback_state = Box::into_raw(Box::new(CallbackState { wanted: hw_pix_fmt })); + let callback_state = Box::into_raw(Box::new(CallbackState { + wanted: hw_pix_fmt, + wanted_int: hw_pix_fmt as i32, + })); // SAFETY: ctx is a freshly-constructed AVCodecContext we own; // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's // use (we keep our own ref in `hw_device_ref` for cleanup). diff --git a/src/ffi.rs b/src/ffi.rs index 78ee80c..794d474 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -1,5 +1,15 @@ //! FFI shims used by the decoder. Kept in one place so the unsafe surface is //! easy to audit. +//! +//! All reads of `AVPixelFormat` / `AVHWDeviceType` values returned by FFmpeg +//! at runtime go through `ptr::read::` after a pointer cast, never +//! through the bindgen-generated Rust enum. The enums are `#[repr(i32)]` +//! and constructing them from a value not in the listed discriminants is +//! undefined behavior — exactly the situation header/library skew creates. +//! See the doc comments on individual functions for what is read as raw +//! integer vs. constructed from a known constant. + +use std::ptr; use ffmpeg_next::ffi::{ avcodec_get_hw_config, AVCodec, AVCodecContext, AVHWDeviceType, AVPixelFormat, @@ -10,22 +20,29 @@ use ffmpeg_next::ffi::{ /// the correct hardware pixel format without globals. One instance per /// decoder; freed by [`crate::VideoDecoder`] after the codec context is /// dropped. +/// +/// `wanted` is set from a hardcoded `AVPixelFormat` constant in our bindings +/// (via `Backend::hw_pixel_format`), so it is always a valid enum value. We +/// also store its raw `i32` so the callback can compare against the offered +/// list without going through enum reads. #[repr(C)] pub(crate) struct CallbackState { - /// Hardware pixel format we want the decoder to produce. + /// Hardware pixel format we want the decoder to produce. Constructed + /// from a known constant; safe to use as the callback's return value. pub(crate) wanted: AVPixelFormat, + /// Same value as `wanted` cast to `i32`, cached so the callback's + /// pix_fmts walk doesn't have to convert per iteration. + pub(crate) wanted_int: i32, } /// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of /// pixel formats the codec is willing to output for the current stream. /// -/// Returns the configured hardware format if present; otherwise -/// [`AVPixelFormat::AV_PIX_FMT_NONE`], which causes the decoder to fail. The -/// failure surfaces as a normal `Error::Ffmpeg` from -/// [`crate::VideoDecoder::receive_frame`]; for `VideoDecoder::open` callers -/// the probe loop tears down and retries with the next backend (replaying -/// buffered packets), so software fallback happens at the decoder level -/// rather than silently in-context. +/// The offered list is walked as `*const i32` (cast from `*const AVPixelFormat`) +/// to avoid constructing the bindgen enum from values that may not be in our +/// build's discriminant set. The return value is either `wanted` (a known +/// constant) or `AV_PIX_FMT_NONE` (also a known constant) — both safe to +/// produce as `AVPixelFormat`. pub(crate) unsafe extern "C" fn get_hw_format( ctx: *mut AVCodecContext, pix_fmts: *const AVPixelFormat, @@ -38,41 +55,68 @@ pub(crate) unsafe extern "C" fn get_hw_format( // codec context's drop runs). When opaque is null we treat the call as // strict — a stray invocation cannot silently downgrade. let state = unsafe { (*ctx).opaque as *const CallbackState }; - let wanted = if state.is_null() { - AVPixelFormat::AV_PIX_FMT_NONE + let (wanted, wanted_int) = if state.is_null() { + ( + AVPixelFormat::AV_PIX_FMT_NONE, + AVPixelFormat::AV_PIX_FMT_NONE as i32, + ) } else { - unsafe { (*state).wanted } + unsafe { ((*state).wanted, (*state).wanted_int) } }; - let mut p = pix_fmts; - while unsafe { *p } != AVPixelFormat::AV_PIX_FMT_NONE { - if unsafe { *p } == wanted { + // Walk the offered list as i32. The pointer cast is sound because + // `AVPixelFormat` is `#[repr(i32)]` (same size and alignment as i32). + // Reading as i32 cannot be UB regardless of the value FFmpeg wrote. + let mut p = pix_fmts as *const i32; + let none_int = AVPixelFormat::AV_PIX_FMT_NONE as i32; + loop { + // SAFETY: FFmpeg guarantees the list is terminated by AV_PIX_FMT_NONE. + // We bail at the sentinel; reads up to and including it are in-bounds. + let v = unsafe { ptr::read(p) }; + if v == none_int { + return AVPixelFormat::AV_PIX_FMT_NONE; + } + if v == wanted_int { return wanted; } p = unsafe { p.add(1) }; } - AVPixelFormat::AV_PIX_FMT_NONE } -/// Walk the codec's `AVCodecHWConfig` table and return the hardware pixel -/// format associated with `device_type`, if the codec advertises one that -/// uses the `HW_DEVICE_CTX` setup method. -pub(crate) fn find_hw_pix_fmt( - codec: *const AVCodec, - device_type: AVHWDeviceType, -) -> Option { +/// Walk the codec's `AVCodecHWConfig` table and return whether the codec +/// advertises support for `device_type` via the `HW_DEVICE_CTX` setup method. +/// +/// We do not return the codec's advertised `pix_fmt` — we know it already +/// from [`crate::backend::Backend::hw_pixel_format`] (a hardcoded constant +/// from our bindings). All reads from the FFmpeg-supplied `AVCodecHWConfig` +/// are performed as raw integers via `addr_of!` + `ptr::read::` to +/// avoid copying or interpreting enum-typed fields whose runtime values +/// might not match our build's discriminant set. +pub(crate) fn codec_supports_hwaccel(codec: *const AVCodec, device_type: AVHWDeviceType) -> bool { debug_assert!(!codec.is_null()); + let device_type_int = device_type as i32; let mut i = 0; loop { // SAFETY: `avcodec_get_hw_config` returns null past the end; we stop then. let cfg = unsafe { avcodec_get_hw_config(codec, i) }; if cfg.is_null() { - return None; + return false; } - let cfg = unsafe { *cfg }; - let supports_device_ctx = cfg.methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0; - if supports_device_ctx && cfg.device_type == device_type { - return Some(cfg.pix_fmt); + // Read each field as raw integer rather than copying the whole struct + // (which would interpret `pix_fmt` and `device_type` as their enum types). + // SAFETY: `cfg` is non-null and points to a valid `AVCodecHWConfig` for + // the lifetime of the call; `addr_of!` projects to a sized field; the + // `*const i32` cast is sound because `methods` is `c_int` (i32) and + // `device_type` is `AVHWDeviceType` (`#[repr(u32)]`, but FFmpeg's + // assigned values fit in i32 and the runtime layout is i32-sized). + let methods: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).methods)) }; + let cfg_device_type_int: i32 = + unsafe { ptr::read(ptr::addr_of!((*cfg).device_type) as *const i32) }; + + if methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0 + && cfg_device_type_int == device_type_int + { + return true; } i += 1; } @@ -81,7 +125,6 @@ pub(crate) fn find_hw_pix_fmt( #[cfg(test)] mod tests { use super::*; - use std::ptr; // The callback derefs `(*ctx).opaque`, so we need a real-looking // AVCodecContext. We construct a zeroed one (the callback only reads opaque). @@ -100,22 +143,32 @@ mod tests { } } - fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat { - offered.push(AVPixelFormat::AV_PIX_FMT_NONE); + fn make_state(wanted: AVPixelFormat) -> CallbackState { + CallbackState { + wanted, + wanted_int: wanted as i32, + } + } + + fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat { + // Build the offered list as raw i32, terminated by AV_PIX_FMT_NONE. + offered.push(AVPixelFormat::AV_PIX_FMT_NONE as i32); let ctx = FakeCtx::new(state as *const _ as *mut _); - unsafe { get_hw_format(ctx.0, offered.as_ptr()) } + // SAFETY: we cast the i32 buffer pointer to *const AVPixelFormat + // because that's the function's declared signature. The callback only + // ever reads through *const i32 internally, so this transit through + // *const AVPixelFormat is purely a type system formality. + unsafe { get_hw_format(ctx.0, offered.as_ptr() as *const AVPixelFormat) } } #[test] fn returns_wanted_hw_format_when_offered() { - let state = CallbackState { - wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, - }; + let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); let got = run( &state, vec![ - AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, - AVPixelFormat::AV_PIX_FMT_NV12, + AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32, + AVPixelFormat::AV_PIX_FMT_NV12 as i32, ], ); assert_eq!(got, AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); @@ -123,14 +176,12 @@ mod tests { #[test] fn returns_none_when_wanted_absent() { - let state = CallbackState { - wanted: AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, - }; + let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); let got = run( &state, vec![ - AVPixelFormat::AV_PIX_FMT_NV12, - AVPixelFormat::AV_PIX_FMT_YUV420P, + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + AVPixelFormat::AV_PIX_FMT_YUV420P as i32, ], ); assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); @@ -142,11 +193,28 @@ mod tests { let ctx_raw = Box::into_raw(boxed); unsafe { (*ctx_raw).opaque = ptr::null_mut() }; let offered = [ - AVPixelFormat::AV_PIX_FMT_NV12, - AVPixelFormat::AV_PIX_FMT_NONE, + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + AVPixelFormat::AV_PIX_FMT_NONE as i32, ]; - let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr()) }; + let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr() as *const AVPixelFormat) }; assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); unsafe { drop(Box::from_raw(ctx_raw)) }; } + + #[test] + fn unknown_offered_value_is_skipped_without_ub() { + // Simulate a header-skewed FFmpeg that offers a pixel-format value we + // don't have a binding constant for (e.g. some future format). The + // callback walks the list as i32 — no enum is constructed from that + // value, so this read is sound. + let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); + let got = run( + &state, + vec![ + 99_999_i32, // imaginary unknown + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + ], + ); + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); + } } diff --git a/src/frame.rs b/src/frame.rs index f221b68..d6ceca6 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -1,23 +1,27 @@ //! CPU-side decoded video frame. //! -//! Wraps `ffmpeg_next::frame::Video` so callers cannot reach the upstream -//! `format()` accessor, which constructs an `AVPixelFormat` enum from the -//! raw integer FFmpeg writes into `AVFrame.format`. That conversion is UB -//! when the value isn't in the bindgen-generated enum (library/header skew, -//! a new pixel format added upstream, etc.). The wrapper exposes -//! [`Frame::pix_fmt`] which reads the field as a plain `i32` — sound for any -//! value FFmpeg can produce — and accessors are limited to fields whose -//! reads do not invoke the same hazard. +//! Wraps `ffmpeg_next::frame::Video`. All accessors read from raw `AVFrame` +//! fields (`format`, `linesize`, `data`, `width`, `height`, `pts`) directly +//! and never go through ffmpeg-next's `Video::format()` / `plane_height()` +//! / `plane_width()` / `data()` — those construct `AVPixelFormat` from the +//! frame's raw `format` integer via `transmute`, which is undefined behavior +//! when the value isn't in the build's bindgen-generated discriminant set +//! (the exact failure mode this crate is designed to survive). //! -//! Compare formats against integer constants taken from the FFI layer, e.g. +//! Plane lengths for [`Frame::data`] are computed from a hardcoded chroma- +//! subsampling table keyed on the safe `pix_fmt()` integer, covering only +//! the formats `hwdecode` produces (the NV* and P0xx/P2xx/P4xx families +//! after `av_hwframe_transfer_data`). For any other format, [`Frame::data`] +//! returns `None` rather than guessing at a slice length. //! -//! ```ignore -//! use ffmpeg_next::ffi::AVPixelFormat; -//! if frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32 { ... } -//! ``` +//! Compare formats against integer constants in [`crate::pix_fmt`]. + +use std::slice; use ffmpeg_next::frame; +use crate::pix_fmt; + /// CPU-side decoded video frame produced by [`crate::VideoDecoder`]. pub struct Frame { inner: frame::Video, @@ -34,45 +38,93 @@ impl Frame { /// Width in pixels. pub fn width(&self) -> u32 { - self.inner.width() + // SAFETY: AVFrame.width is c_int; safe to read regardless of value. + unsafe { (*self.inner.as_ptr()).width as u32 } } /// Height in pixels. pub fn height(&self) -> u32 { - self.inner.height() + // SAFETY: AVFrame.height is c_int. + unsafe { (*self.inner.as_ptr()).height as u32 } } /// Pixel format, returned as the raw `i32` value FFmpeg wrote to /// `AVFrame.format`. Sound regardless of the linked FFmpeg version — /// no `AVPixelFormat` enum is constructed. /// - /// Compare against integer constants from `ffmpeg_next::ffi`, e.g. - /// `frame.pix_fmt() == AVPixelFormat::AV_PIX_FMT_NV12 as i32`. + /// Compare against constants in [`crate::pix_fmt`]. pub fn pix_fmt(&self) -> i32 { - // SAFETY: `AVFrame.format` is bound as `c_int`; reading it yields a - // plain integer with no validity invariants. + // SAFETY: AVFrame.format is bound as c_int. unsafe { (*self.inner.as_ptr()).format } } - /// Presentation timestamp in stream time base, or `None` if the frame - /// carries `AV_NOPTS_VALUE`. + /// Presentation timestamp in stream time base, or `None` for + /// `AV_NOPTS_VALUE`. pub fn pts(&self) -> Option { + // ffmpeg-next's Frame::pts performs no enum conversion; safe to use. self.inner.pts() } - /// Number of populated planes (e.g. 3 for `YUV420P`, 2 for `NV12`). + /// Number of populated planes (1 for packed formats, 2 for NV12/P010, + /// 3 for planar YUV, etc.). Computed by scanning `linesize` for the + /// first zero entry — no enum reads. pub fn planes(&self) -> usize { - self.inner.planes() + // SAFETY: AVFrame.linesize is `[c_int; 8]`; reads are sound. + unsafe { + let linesize = &(*self.inner.as_ptr()).linesize; + for (i, ls) in linesize.iter().enumerate() { + if *ls == 0 { + return i; + } + } + linesize.len() + } } - /// Bytes per row for `plane`. Panics if `plane >= planes()`. + /// Bytes per row for `plane`. Reads `AVFrame.linesize[plane]` directly. + /// Panics if `plane >= planes()`. pub fn stride(&self, plane: usize) -> usize { - self.inner.stride(plane) + let n = self.planes(); + assert!( + plane < n, + "stride: plane {plane} out of bounds (planes={n})" + ); + // SAFETY: bounds-checked above; linesize is `[c_int; 8]`. + unsafe { (*self.inner.as_ptr()).linesize[plane] as usize } } - /// Pixel data for `plane`. Panics if `plane >= planes()`. - pub fn data(&self, plane: usize) -> &[u8] { - self.inner.data(plane) + /// Pixel data for `plane`. + /// + /// Returns `None` when the frame's pixel format is not one of the + /// hardware-output formats listed in [`crate::pix_fmt`] — we cannot + /// safely compute the plane size for an unknown layout. Returns `None` + /// for an out-of-bounds plane index, a null data pointer, or an empty + /// frame. + /// + /// Currently supported (post-`av_hwframe_transfer_data`): + /// - 4:2:0 semi-planar 8-bit: `NV12`, `NV21` + /// - 4:2:2 semi-planar 8-bit: `NV16` + /// - 4:4:4 semi-planar 8-bit: `NV24` + /// - 4:2:0 semi-planar 10/12/16-bit: `P010LE`/`P010BE`/`P012LE`/`P016LE` + /// - 4:2:2 semi-planar 10/12/16-bit: `P210LE`/`P212LE`/`P216LE` + /// - 4:4:4 semi-planar 10/12/16-bit: `P410LE`/`P412LE`/`P416LE` + pub fn data(&self, plane: usize) -> Option<&[u8]> { + if plane >= self.planes() { + return None; + } + let stride = self.stride(plane); + let plane_height = plane_height_for(self.pix_fmt(), plane, self.height() as usize)?; + let len = stride.checked_mul(plane_height)?; + // SAFETY: bounds-checked plane index above. We trust FFmpeg to populate + // `data[plane]` validly when `linesize[plane]` is non-zero (which we + // verified via `planes()`); null-check guards against edge cases. + unsafe { + let ptr = (*self.inner.as_ptr()).data[plane]; + if ptr.is_null() { + return None; + } + Some(slice::from_raw_parts(ptr, len)) + } } /// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code. @@ -87,6 +139,37 @@ impl Default for Frame { } } +/// Number of rows in `plane` for a frame of `frame_height` and the given +/// pixel format. `None` for formats not in the supported HW-output set. +fn plane_height_for(pix_fmt_int: i32, plane: usize, frame_height: usize) -> Option { + match pix_fmt_int { + // 4:2:0 semi-planar — Y full height, chroma half height. + pix_fmt::NV12 + | pix_fmt::NV21 + | pix_fmt::P010LE + | pix_fmt::P010BE + | pix_fmt::P012LE + | pix_fmt::P016LE => match plane { + 0 => Some(frame_height), + 1 => Some(frame_height.div_ceil(2)), + _ => None, + }, + // 4:2:2 / 4:4:4 semi-planar — both planes full height. + pix_fmt::NV16 + | pix_fmt::NV24 + | pix_fmt::P210LE + | pix_fmt::P212LE + | pix_fmt::P216LE + | pix_fmt::P410LE + | pix_fmt::P412LE + | pix_fmt::P416LE => match plane { + 0 | 1 => Some(frame_height), + _ => None, + }, + _ => None, + } +} + #[cfg(test)] mod tests { use super::*; @@ -99,6 +182,15 @@ mod tests { assert_eq!(f.pts(), None); // AVFrame.format defaults to -1 (AV_PIX_FMT_NONE) for an empty frame. assert_eq!(f.pix_fmt(), -1); + // No active planes for an empty frame (all linesize entries are 0). + assert_eq!(f.planes(), 0); + } + + #[test] + fn data_returns_none_for_unknown_format() { + let f = Frame::empty(); + // pix_fmt is NONE (-1), not in the supported set. + assert!(f.data(0).is_none()); } #[test] @@ -106,4 +198,18 @@ mod tests { fn check() {} check::(); } + + #[test] + fn plane_height_table_covers_supported_formats() { + // Spot-check the chroma subsampling table. + assert_eq!(plane_height_for(pix_fmt::NV12, 0, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1080), Some(540)); + assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1081), Some(541)); + assert_eq!(plane_height_for(pix_fmt::P010LE, 1, 1080), Some(540)); + assert_eq!(plane_height_for(pix_fmt::NV16, 1, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::NV24, 1, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::P416LE, 1, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::NONE, 0, 1080), None); + assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None); + } } From fa30939968282e84a3752df080c96e9346307a88 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:09:51 +1200 Subject: [PATCH 06/27] update --- src/decoder.rs | 261 +++++++++++++++++++++++++++++++++++++++++-------- src/error.rs | 9 +- src/frame.rs | 99 ++++++++++++++++--- 3 files changed, 315 insertions(+), 54 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index d6cffe3..fcaea44 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1,14 +1,27 @@ -use std::{mem::ManuallyDrop, ptr}; +use std::{collections::VecDeque, mem::ManuallyDrop, ptr}; use ffmpeg_next::{ codec::{self, Context}, ffi::{ - av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_unref, av_hwdevice_ctx_create, - av_hwframe_transfer_data, AVBufferRef, + av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, + av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVCodec, }, frame, Codec, Packet, Rational, }; +/// Local FFI shim: `avcodec_find_decoder` declared with `c_int` instead of +/// the bindgen `AVCodecID` enum. Constructing `AVCodecID` from a runtime +/// integer that isn't in our build's discriminant set is UB; calling the +/// C function with a raw int avoids that boundary entirely. Both Rust +/// declarations resolve to the same C symbol at link time. +mod c_shims { + use super::AVCodec; + use libc::c_int; + extern "C" { + pub fn avcodec_find_decoder(id: c_int) -> *const AVCodec; + } +} + use crate::{ backend::{self, Backend}, error::{Error, Result}, @@ -42,6 +55,13 @@ pub struct VideoDecoder { /// backend, then `None`. While `Some`, packets are buffered for replay and /// non-transient errors / decoder failures advance to the next backend. probe: Option, + /// CPU-side frames produced by a candidate decoder during probe replay + /// (when its internal queue filled and we had to drain output before the + /// next `send_packet`). Already transferred from the candidate's + /// `AVHWFramesContext` to a CPU frame, so they remain valid after the + /// candidate state is committed. [`Self::receive_frame`] dequeues these + /// FIFO before reading from `state.inner`. + pending_frames: VecDeque, } /// Owned FFmpeg state for one open codec context. Has its own `Drop` so we @@ -121,8 +141,7 @@ impl VideoDecoder { /// `open` cannot return without a working decoder for any codec libavcodec /// supports. pub fn open(parameters: codec::Parameters) -> Result { - let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); - let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; + let codec = find_decoder(¶meters)?; let order = backend::probe_order(); let mut attempts: Vec<(Backend, Box)> = Vec::new(); @@ -142,6 +161,7 @@ impl VideoDecoder { state, hw_frame: frame::Video::empty(), probe, + pending_frames: VecDeque::new(), }); } Err(e) => { @@ -161,13 +181,13 @@ impl VideoDecoder { /// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible /// for retrying with `Backend::Software` or another backend if desired. pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result { - let codec_id = codec::Id::from(unsafe { (*parameters.as_ptr()).codec_id }); - let codec = ffmpeg_next::decoder::find(codec_id).ok_or(Error::NoCodec(codec_id))?; + let codec = find_decoder(¶meters)?; let state = Self::build_state(parameters, codec, backend)?; Ok(Self { state, hw_frame: frame::Video::empty(), probe: None, + pending_frames: VecDeque::new(), }) } @@ -199,32 +219,64 @@ impl VideoDecoder { self.state.inner.frame_rate() } - /// Submit a packet to the decoder. On success — and only on success — - /// the packet is buffered for potential replay through a fallback backend - /// while the probe is active. A failed send (including EAGAIN) does not - /// mutate replay state, so a later probe advance only replays history - /// FFmpeg actually accepted. + /// Submit a packet to the decoder. + /// + /// On success — and only on success — the packet is buffered for potential + /// replay through a fallback backend while the probe is active. EAGAIN + /// (decoder needs `receive_frame` to drain output first) propagates as + /// normal backpressure; the caller drains then retries. + /// + /// While the probe is active, a non-transient error (e.g. the active HW + /// backend rejecting this stream's geometry on first packet) advances the + /// probe to the next candidate and retries the packet there. The caller + /// observes only the eventual success or, if the probe is exhausted, the + /// final error. pub fn send_packet(&mut self, packet: &Packet) -> Result<()> { - self - .state - .inner - .send_packet(packet) - .map_err(Error::Ffmpeg)?; - if let Some(probe) = self.probe.as_mut() { - probe.buffered_packets.push(packet.clone()); + loop { + match self.state.inner.send_packet(packet) { + Ok(()) => { + if let Some(probe) = self.probe.as_mut() { + probe.buffered_packets.push(packet.clone()); + } + return Ok(()); + } + Err(e) if is_transient(&e) => { + // Normal backpressure / EOF — pass through unchanged. + return Err(Error::Ffmpeg(e)); + } + Err(e) => { + if self.probe.is_some() && self.advance_probe()? { + continue; + } + return Err(Error::Ffmpeg(e)); + } + } } - Ok(()) } - /// Signal end-of-stream to the decoder; remaining frames can be drained - /// with [`Self::receive_frame`]. Recorded for replay only if the underlying - /// `send_eof` succeeds. + /// Signal end-of-stream to the decoder. + /// + /// Recorded for replay only if the underlying `send_eof` succeeds. While + /// the probe is active, non-transient errors trigger probe advance and + /// retry, matching `send_packet`'s behaviour. pub fn send_eof(&mut self) -> Result<()> { - self.state.inner.send_eof().map_err(Error::Ffmpeg)?; - if let Some(probe) = self.probe.as_mut() { - probe.eof_sent = true; + loop { + match self.state.inner.send_eof() { + Ok(()) => { + if let Some(probe) = self.probe.as_mut() { + probe.eof_sent = true; + } + return Ok(()); + } + Err(e) if is_transient(&e) => return Err(Error::Ffmpeg(e)), + Err(e) => { + if self.probe.is_some() && self.advance_probe()? { + continue; + } + return Err(Error::Ffmpeg(e)); + } + } } - Ok(()) } /// Receive a CPU-side decoded frame. @@ -238,18 +290,25 @@ impl VideoDecoder { /// error, transfer error, copy_props error, or a CPU-format frame from a /// HW-opened context) tears down the current decoder and advances to the /// next hardware backend in probe order, replaying buffered packets - /// through it. The caller observes only the eventual successful frame - /// (or, if every backend has been exhausted, the underlying error). + /// through it. Frames the candidate produced during replay (drained when + /// `send_packet` returned EAGAIN) are queued and delivered FIFO via this + /// method, so the caller never loses initial frames after a fallback. /// /// This crate is hardware-only: there is no software fallback inside the /// decoder. If every backend is exhausted, the failure surfaces as the - /// last decoder error (or [`Error::HwBackendProducedSwFrame`] for the - /// degraded-CPU-frame case). Callers handle software fallback themselves. + /// last decoder error. Callers handle software fallback themselves. /// /// Returns the same transient signals as `ffmpeg::decoder::Video`: /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and /// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained. pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<()> { + // Pre-drain frames queued during probe replay. They are already CPU-side + // (transferred at drain time, when the candidate's HW context was alive) + // so we just move them into the caller's slot. + if self.try_pop_pending(frame) { + return Ok(()); + } + loop { let res = self.state.inner.receive_frame(&mut self.hw_frame); match res { @@ -258,6 +317,11 @@ impl VideoDecoder { return Err(Error::Ffmpeg(e)); } if self.probe.is_some() && self.advance_probe()? { + // Probe advance may have populated `pending_frames`; deliver + // one of those before reading more from the new candidate. + if self.try_pop_pending(frame) { + return Ok(()); + } continue; } return Err(Error::Ffmpeg(e)); @@ -277,6 +341,9 @@ impl VideoDecoder { Err(e) => { if self.probe.is_some() && self.advance_probe()? { unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) }; + if self.try_pop_pending(frame) { + return Ok(()); + } continue; } return Err(Error::Ffmpeg(e)); @@ -287,6 +354,24 @@ impl VideoDecoder { } } + /// Pop one queued frame (produced by a candidate decoder during probe + /// replay) into the caller's slot. Returns `true` when a frame was + /// delivered, `false` when the queue was empty. + fn try_pop_pending(&mut self, frame: &mut Frame) -> bool { + let Some(mut buffered) = self.pending_frames.pop_front() else { + return false; + }; + // SAFETY: `buffered` is a CPU-side AVFrame we previously transferred + // and pushed into the queue; both pointers are valid. + unsafe { + av_frame_unref(frame.as_inner_mut().as_mut_ptr()); + av_frame_move_ref(frame.as_inner_mut().as_mut_ptr(), buffered.as_mut_ptr()); + } + // Probe semantics: delivering a frame collapses the probe. + self.probe = None; + true + } + /// Flush internal buffers (e.g. after a seek). Resets probe-time buffer if /// active, since post-seek packets do not align with replayed history. pub fn flush(&mut self) { @@ -342,13 +427,39 @@ impl VideoDecoder { // We borrow the buffer immutably; if replay fails the candidate's Drop // releases the FFmpeg state and the buffer is preserved for the next // attempt. + // + // EAGAIN handling: `avcodec_send_packet` may return EAGAIN when its + // internal queue is full and the user is expected to drain output + // first (B-frame buffering, candidate-specific queue depth, etc.). + // This is normal flow — we drain frames out of the candidate, transfer + // each one to a CPU frame, and stash them in `local_pending`. After + // commit they move to `self.pending_frames` and are delivered FIFO + // by `receive_frame`, so the caller never loses initial frames. + let mut local_pending: VecDeque = VecDeque::new(); let replay_result: std::result::Result<(), ffmpeg_next::Error> = { let probe = self.probe.as_ref().expect("probe state present"); + let mut hw_buf = frame::Video::empty(); let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(()); - for pkt in &probe.buffered_packets { - if let Err(e) = candidate_state.inner.send_packet(pkt) { - r = Err(e); - break; + + 'replay: for pkt in &probe.buffered_packets { + loop { + match candidate_state.inner.send_packet(pkt) { + Ok(()) => break, + Err(e) if is_eagain(&e) => { + // Drain candidate output (transferring + queueing each frame) + // and retry the same packet. + if let Err(de) = + drain_into_pending(&mut candidate_state.inner, &mut hw_buf, &mut local_pending) + { + r = Err(de); + break 'replay; + } + } + Err(e) => { + r = Err(e); + break 'replay; + } + } } } if r.is_ok() && probe.eof_sent { @@ -361,8 +472,11 @@ impl VideoDecoder { if let Err(e) = replay_result { tracing::warn!(?next_backend, error = %e, "hwdecode: candidate replay failed"); - // Drop candidate explicitly so its FFI cleanup runs now. + // Drop candidate explicitly so its FFI cleanup runs now. Discard any + // frames we drained from this candidate — they're tied to a decoder + // we're throwing away. drop(candidate_state); + drop(local_pending); self .probe .as_mut() @@ -372,9 +486,11 @@ impl VideoDecoder { continue; } - // Commit: install the candidate, clear residual hw_frame, pop backend. + // Commit: install the candidate, clear residual hw_frame, queue the + // drained frames for the caller, and pop the now-active backend. self.state = candidate_state; unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) }; + self.pending_frames.append(&mut local_pending); self .probe .as_mut() @@ -484,8 +600,75 @@ unsafe fn transfer_hw_frame( /// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame` /// and must not be treated as backend failures. fn is_transient(e: &ffmpeg_next::Error) -> bool { + is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof) +} + +/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine +/// distinguishes "drain output and retry" from "stream over"). +fn is_eagain(e: &ffmpeg_next::Error) -> bool { matches!(e, ffmpeg_next::Error::Other { errno } if *errno == ffmpeg_next::error::EAGAIN) - || matches!(e, ffmpeg_next::Error::Eof) +} + +/// Look up the decoder for `parameters` without going through the bindgen +/// `AVCodecID` Rust enum. Reads the codec_id field as raw `u32` via +/// `addr_of!` + `ptr::read` so a value not in our build's discriminant +/// set never invokes UB. +fn find_decoder(parameters: &codec::Parameters) -> Result { + // SAFETY: parameters owns a valid AVCodecParameters; addr_of! projects + // to the codec_id field; the *const u32 cast is sound because AVCodecID + // is `#[repr(u32)]` (same size and alignment as u32). Reading as u32 + // cannot be UB regardless of the value FFmpeg wrote. + let raw_id: u32 = + unsafe { ptr::read(ptr::addr_of!((*parameters.as_ptr()).codec_id) as *const u32) }; + + // Call C `avcodec_find_decoder` via our local `c_int`-typed shim — we + // never construct an `AVCodecID` enum from `raw_id`. The C function + // returns NULL for unknown ids, which we surface as `Error::NoCodec`. + // SAFETY: avcodec_find_decoder is a pure FFmpeg lookup; passing any + // c_int is sound (returns NULL for unknown). + let codec_ptr = unsafe { c_shims::avcodec_find_decoder(raw_id as libc::c_int) }; + if codec_ptr.is_null() { + return Err(Error::NoCodec(raw_id)); + } + // SAFETY: codec_ptr is a non-null *const AVCodec into FFmpeg's static + // codec table; it lives for the duration of the program. + Ok(unsafe { Codec::wrap(codec_ptr) }) +} + +/// Drain output frames from a candidate decoder during probe replay, +/// transferring each one from the candidate's HW context to a fresh CPU +/// frame and queueing it. Returns `Ok(())` once the candidate signals +/// EAGAIN/EOF. The transfer happens while the candidate is still alive +/// (its `AVHWFramesContext` is reachable); the resulting CPU frames remain +/// valid after the candidate is committed because they hold their own +/// buffer references with no dependency on the original device context. +fn drain_into_pending( + decoder: &mut ffmpeg_next::decoder::Video, + hw_buf: &mut frame::Video, + pending: &mut VecDeque, +) -> std::result::Result<(), ffmpeg_next::Error> { + loop { + match decoder.receive_frame(hw_buf) { + Ok(()) => { + let mut cpu = frame::Video::empty(); + // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data + // allocates buffers on `cpu`. copy_props moves timing/side data over. + unsafe { + let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0); + if r1 < 0 { + return Err(ffmpeg_next::Error::from(r1)); + } + let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr()); + if r2 < 0 { + return Err(ffmpeg_next::Error::from(r2)); + } + } + pending.push_back(cpu); + } + Err(e) if is_transient(&e) => return Ok(()), + Err(e) => return Err(e), + } + } } #[allow(dead_code)] @@ -500,7 +683,7 @@ mod tests { #[test] fn no_codec_for_unknown_id() { - let err = Error::NoCodec(codec::Id::None); + let err = Error::NoCodec(0); assert!(format!("{err}").contains("no decoder")); } diff --git a/src/error.rs b/src/error.rs index ef5373c..955d215 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,9 +10,12 @@ pub enum Error { #[error("ffmpeg error: {0}")] Ffmpeg(#[from] ffmpeg_next::Error), - /// `avcodec_find_decoder` returned null for the input codec id. - #[error("no decoder for codec id {0:?}")] - NoCodec(ffmpeg_next::codec::Id), + /// `avcodec_find_decoder` returned null for the input codec id. The id + /// is reported as the raw integer (`AVCodecID` discriminant) — we do not + /// construct the bindgen `AVCodecID` enum from a runtime value, since + /// values outside our build's discriminant set would invoke UB. + #[error("no decoder for codec id {0}")] + NoCodec(u32), /// The codec does not advertise a hardware configuration matching the /// requested backend (via `avcodec_get_hw_config`). diff --git a/src/frame.rs b/src/frame.rs index d6ceca6..15e903c 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -82,7 +82,9 @@ impl Frame { } /// Bytes per row for `plane`. Reads `AVFrame.linesize[plane]` directly. - /// Panics if `plane >= planes()`. + /// Panics if `plane >= planes()` or the linesize is non-positive (FFmpeg + /// allows negative linesize for vertically-flipped formats; this crate + /// does not surface those — call [`Self::data`] first to test safely). pub fn stride(&self, plane: usize) -> usize { let n = self.planes(); assert!( @@ -90,16 +92,29 @@ impl Frame { "stride: plane {plane} out of bounds (planes={n})" ); // SAFETY: bounds-checked above; linesize is `[c_int; 8]`. - unsafe { (*self.inner.as_ptr()).linesize[plane] as usize } + let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] }; + assert!( + linesize > 0, + "stride: non-positive linesize {linesize} for plane {plane} \ + (negative linesize means vertically-flipped — not supported)" + ); + linesize as usize } /// Pixel data for `plane`. /// - /// Returns `None` when the frame's pixel format is not one of the - /// hardware-output formats listed in [`crate::pix_fmt`] — we cannot - /// safely compute the plane size for an unknown layout. Returns `None` - /// for an out-of-bounds plane index, a null data pointer, or an empty - /// frame. + /// Returns `None` for any of the following — never panics: + /// - The frame's pixel format is not one of the hardware-output formats + /// listed in [`crate::pix_fmt`] (we cannot safely compute the plane + /// size for an unknown layout). + /// - The plane index is out of range. + /// - `AVFrame.linesize[plane]` is `<= 0` (negative linesize signals + /// vertically-flipped FFmpeg layouts which we do not surface; zero is + /// "no plane"). + /// - `AVFrame.height` is `<= 0`. + /// - The computed slice length would overflow or exceed `isize::MAX` + /// (a precondition of [`std::slice::from_raw_parts`]). + /// - The plane's data pointer is null. /// /// Currently supported (post-`av_hwframe_transfer_data`): /// - 4:2:0 semi-planar 8-bit: `NV12`, `NV21` @@ -112,12 +127,26 @@ impl Frame { if plane >= self.planes() { return None; } - let stride = self.stride(plane); - let plane_height = plane_height_for(self.pix_fmt(), plane, self.height() as usize)?; + + // SAFETY: bounds-checked plane index; `linesize` and `height` are + // primitive c_int reads that cannot themselves be UB. + let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] }; + let height_int: i32 = unsafe { (*self.inner.as_ptr()).height }; + if linesize <= 0 || height_int <= 0 { + return None; + } + let stride = linesize as usize; + + let plane_height = plane_height_for(self.pix_fmt(), plane, height_int as usize)?; let len = stride.checked_mul(plane_height)?; - // SAFETY: bounds-checked plane index above. We trust FFmpeg to populate - // `data[plane]` validly when `linesize[plane]` is non-zero (which we - // verified via `planes()`); null-check guards against edge cases. + if len > isize::MAX as usize { + return None; + } + + // SAFETY: linesize > 0 and height > 0 verified; len <= isize::MAX + // verified — both preconditions of `slice::from_raw_parts`. We trust + // FFmpeg to populate `data[plane]` validly when linesize[plane] is + // non-zero; the null check is a final defensive guard. unsafe { let ptr = (*self.inner.as_ptr()).data[plane]; if ptr.is_null() { @@ -193,6 +222,52 @@ mod tests { assert!(f.data(0).is_none()); } + /// Synthesize a frame with a negative linesize (FFmpeg's vertical-flip + /// convention) and assert `data()` refuses to construct a slice. Without + /// the linesize > 0 check, the negative `i32 as usize` would produce a + /// huge positive length and `from_raw_parts` would be UB. + #[test] + fn data_returns_none_for_negative_linesize() { + let mut f = Frame::empty(); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).format = pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = 1080; + (*raw).linesize[0] = -1920; // vertically-flipped + (*raw).linesize[1] = -1920; + // data pointers stay null; `data()` would return None on the null + // check anyway, but should bail earlier on the linesize sign. + } + assert!(f.data(0).is_none()); + assert!(f.data(1).is_none()); + } + + #[test] + fn data_returns_none_for_non_positive_height() { + let mut f = Frame::empty(); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).format = pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = 0; + (*raw).linesize[0] = 1920; + (*raw).linesize[1] = 1920; + } + assert!(f.data(0).is_none()); + } + + #[test] + #[should_panic(expected = "non-positive linesize")] + fn stride_panics_on_negative_linesize() { + let mut f = Frame::empty(); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).linesize[0] = -1920; + } + let _ = f.stride(0); + } + #[test] fn frame_is_send() { fn check() {} From b48e5329253cdf4f6ae6740c950dd4486e0e898a Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:37:37 +1200 Subject: [PATCH 07/27] update --- src/decoder.rs | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index fcaea44..0bd638e 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -313,9 +313,14 @@ impl VideoDecoder { let res = self.state.inner.receive_frame(&mut self.hw_frame); match res { Err(e) => { - if is_transient(&e) { + // EAGAIN is normal backpressure — pass through unconditionally. + if is_eagain(&e) { return Err(Error::Ffmpeg(e)); } + // EOF (and every other non-transient error): if we are still + // probing, treat it as candidate failure — a backend that drains + // to EOF without ever producing a frame should not silently + // present as "stream over" to the caller. Advance and retry. if self.probe.is_some() && self.advance_probe()? { // Probe advance may have populated `pending_frames`; deliver // one of those before reading more from the new candidate. @@ -324,6 +329,8 @@ impl VideoDecoder { } continue; } + // Probe collapsed or exhausted — surface the error (including EOF + // for a genuinely empty stream). return Err(Error::Ffmpeg(e)); } Ok(()) => { @@ -372,10 +379,20 @@ impl VideoDecoder { true } - /// Flush internal buffers (e.g. after a seek). Resets probe-time buffer if - /// active, since post-seek packets do not align with replayed history. + /// Flush internal buffers (e.g. after a seek). + /// + /// Discards every frame buffered by the decoder, every frame queued during + /// probe replay (`pending_frames`), and the residual `hw_frame` scratch + /// buffer. Probe-time replay state (buffered packets, EOF marker) is also + /// cleared since post-seek packets do not align with the previously + /// captured history. After a flush, the next `receive_frame` waits for new + /// post-seek input. pub fn flush(&mut self) { self.state.inner.flush(); + // SAFETY: hw_frame is a valid AVFrame we own; av_frame_unref is a no-op + // for an already-empty frame. + unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) }; + self.pending_frames.clear(); if let Some(probe) = self.probe.as_mut() { probe.buffered_packets.clear(); probe.eof_sent = false; @@ -540,9 +557,28 @@ impl VideoDecoder { // SAFETY: ctx is a freshly-constructed AVCodecContext we own; // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's // use (we keep our own ref in `hw_device_ref` for cleanup). + // av_buffer_ref returns NULL on allocation failure; we must check it + // before assigning, otherwise the codec context would be opened with a + // HW-flagged setup but no actual device reference. + let device_ref_for_ctx = unsafe { av_buffer_ref(hw_device_ref) }; + if device_ref_for_ctx.is_null() { + // SAFETY: rolling back what we just allocated above. hw_device_ref + // is non-null (we checked after av_hwdevice_ctx_create); callback_state + // was just freshly Box::into_raw'd. + unsafe { + let mut hw = hw_device_ref; + av_buffer_unref(&mut hw); + drop(Box::from_raw(callback_state)); + } + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + // SAFETY: device_ref_for_ctx is a valid AVBufferRef* from av_buffer_ref; + // ctx is freshly built and owned by us. unsafe { let raw = ctx.as_mut_ptr(); - (*raw).hw_device_ctx = av_buffer_ref(hw_device_ref); + (*raw).hw_device_ctx = device_ref_for_ctx; (*raw).opaque = callback_state.cast(); (*raw).get_format = Some(get_hw_format); } From 4dc0be9ed900f0d0ca2477ebdf052dddc5110a8a Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:52:46 +1200 Subject: [PATCH 08/27] update --- src/decoder.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 0bd638e..542b36f 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -79,6 +79,20 @@ struct DecoderState { callback_state: *mut CallbackState, } +/// Maximum number of packets we are willing to buffer for probe replay +/// before abandoning the fallback safety net. Set high enough to absorb +/// long B-frame GOPs and codec setup latency, low enough to bound memory +/// against malicious / pathological streams that never produce a first +/// frame. +const MAX_PROBE_PACKETS: usize = 256; + +/// Maximum total compressed-byte size of buffered probe packets. Each +/// `Packet` clone holds a refcounted reference to the demuxer's bitstream +/// data — even though the clone itself is shallow, the underlying buffers +/// stay alive until we drop them. 64 MiB is generous for normal video and +/// gives untrusted media a hard ceiling. +const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024; + /// State carried only during the probe window (before the first successful /// frame). Holds enough information to tear down the current decoder and /// retry with the next backend. @@ -90,8 +104,12 @@ struct ProbeState { remaining_backends: Vec, /// Packets sent so far, kept for replay through any candidate backend. /// Preserved across failed candidates — only cleared when the probe - /// collapses on a successful first frame. + /// collapses on a successful first frame, or when the probe is + /// abandoned due to the size caps. buffered_packets: Vec, + /// Cumulative size (in compressed bytes) of `buffered_packets`. Tracked + /// incrementally so we don't have to re-sum on every send. + buffered_bytes: usize, /// Whether `send_eof` has been called; replayed alongside packets. eof_sent: bool, } @@ -155,6 +173,7 @@ impl VideoDecoder { codec, remaining_backends: remaining, buffered_packets: Vec::new(), + buffered_bytes: 0, eof_sent: false, }); return Ok(Self { @@ -231,12 +250,37 @@ impl VideoDecoder { /// probe to the next candidate and retries the packet there. The caller /// observes only the eventual success or, if the probe is exhausted, the /// final error. + /// + /// If the probe window grows beyond [`MAX_PROBE_PACKETS`] or + /// [`MAX_PROBE_PACKET_BYTES`] without producing a first frame (a stream + /// the active backend is silently mishandling, or pathological input), + /// the probe is **abandoned**: replay history is dropped, queued frames + /// are cleared, and `self.probe = None`. The active backend continues + /// serving the caller without fallback. A `tracing::warn!` records this + /// so it is visible in production logs. pub fn send_packet(&mut self, packet: &Packet) -> Result<()> { loop { match self.state.inner.send_packet(packet) { Ok(()) => { if let Some(probe) = self.probe.as_mut() { - probe.buffered_packets.push(packet.clone()); + let pkt_size = packet.size(); + let new_count = probe.buffered_packets.len() + 1; + let new_bytes = probe.buffered_bytes.saturating_add(pkt_size); + if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES { + tracing::warn!( + packets = new_count, + bytes = new_bytes, + max_packets = MAX_PROBE_PACKETS, + max_bytes = MAX_PROBE_PACKET_BYTES, + "hwdecode: probe window exceeded caps without first frame; \ + abandoning fallback safety net" + ); + self.probe = None; + self.pending_frames.clear(); + } else { + probe.buffered_packets.push(packet.clone()); + probe.buffered_bytes = new_bytes; + } } return Ok(()); } @@ -395,6 +439,7 @@ impl VideoDecoder { self.pending_frames.clear(); if let Some(probe) = self.probe.as_mut() { probe.buffered_packets.clear(); + probe.buffered_bytes = 0; probe.eof_sent = false; } } From 964e9e7c571269bce792094fa09bf566e1d4e32d Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 21:35:50 +1200 Subject: [PATCH 09/27] update --- README.md | 4 +- docs/design.md | 4 +- src/decoder.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++++-- src/frame.rs | 10 ++++- 4 files changed, 124 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bcfb058..007eac3 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,9 @@ unconditionally. ## Build requirements -- A system FFmpeg ≥ 4.x linkable via `pkg-config`. Verify with +- A system FFmpeg ≥ **5.1** linkable via `pkg-config` (we reference + `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`, which were added in 5.1). + Tested against 8.1. Verify with `ffmpeg -hwaccels` that your build has the backends you expect compiled in (e.g. `videotoolbox` on macOS, `vaapi` / `cuda` on Linux, `d3d11va` / `cuda` on Windows). diff --git a/docs/design.md b/docs/design.md index 056bb4f..6acc8c5 100644 --- a/docs/design.md +++ b/docs/design.md @@ -130,7 +130,9 @@ No other modules. Keep the surface small. No platform-specific Cargo features. `cfg!(target_os = ...)` selects which `AVHWDeviceType` constants we even attempt — the FFI symbols are linked unconditionally via `ffmpeg-sys-next`. -System FFmpeg ≥ 4.x. Verified against the user's macOS Homebrew build (FFmpeg 8.1, VideoToolbox enabled). +System FFmpeg ≥ **5.1** (we reference `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`, +added upstream in 5.1). Verified against the macOS Homebrew build (FFmpeg 8.1, +VideoToolbox enabled). ## Testing diff --git a/src/decoder.rs b/src/decoder.rs index 542b36f..3a8067e 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -62,6 +62,10 @@ pub struct VideoDecoder { /// candidate state is committed. [`Self::receive_frame`] dequeues these /// FIFO before reading from `state.inner`. pending_frames: VecDeque, + /// Per-decoder byte budget for [`Self::pending_frames`] during probe + /// replay. Defaults to [`DEFAULT_MAX_PROBE_PENDING_BYTES`]; override via + /// [`Self::with_max_probe_pending_bytes`]. + max_probe_pending_bytes: usize, } /// Owned FFmpeg state for one open codec context. Has its own `Drop` so we @@ -93,6 +97,30 @@ const MAX_PROBE_PACKETS: usize = 256; /// gives untrusted media a hard ceiling. const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024; +/// Maximum number of CPU frames we are willing to queue from a candidate +/// during probe replay. Each frame is a fully-allocated CPU buffer +/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so +/// an unbounded queue would OOM on a candidate with a shallow internal +/// queue against a deep replay history. Drained candidate frames in +/// excess of this cap (or [`DEFAULT_MAX_PROBE_PENDING_BYTES`], whichever +/// hits first) are discarded with a `tracing::warn!`; we still drain so +/// `send_packet` can keep feeding the candidate. +const MAX_PROBE_PENDING_FRAMES: usize = 16; + +/// Default byte budget for probe-replay drained frames. 256 MiB is enough +/// for 16 frames at 4K P010 (~24 MiB each = 384 MiB worst case under the +/// count cap), and is the cap that fires first for very high-resolution +/// content (8K P010: ~96 MiB per frame → only ~2 frames fit). +/// +/// Override per-decoder with [`VideoDecoder::with_max_probe_pending_bytes`] +/// when targeting 8K+ workloads or memory-constrained environments. +/// +/// TODO: when frames significantly exceed typical sizes, consider +/// memmap-backed pending buffers (write transferred frames to a temp file +/// or shared-memory segment) so the resident set stays bounded even when +/// the byte cap is raised. Out of scope for v0.0.0. +pub const DEFAULT_MAX_PROBE_PENDING_BYTES: usize = 256 * 1024 * 1024; + /// State carried only during the probe window (before the first successful /// frame). Holds enough information to tear down the current decoder and /// retry with the next backend. @@ -181,6 +209,7 @@ impl VideoDecoder { hw_frame: frame::Video::empty(), probe, pending_frames: VecDeque::new(), + max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES, }); } Err(e) => { @@ -207,9 +236,30 @@ impl VideoDecoder { hw_frame: frame::Video::empty(), probe: None, pending_frames: VecDeque::new(), + max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES, }) } + /// Override the byte budget for probe-replay queued frames. Defaults to + /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`]. Use a higher value when targeting + /// 8K+ workloads where 16 frames at full size could exceed the default; + /// use a lower value in memory-constrained services to bound peak + /// allocation more tightly. + /// + /// Setting after the first frame has been delivered is harmless but has + /// no observable effect — the probe has already collapsed and the cap + /// only applies during replay drain. + /// + /// Returns `self` for builder-style chaining: + /// ```ignore + /// let decoder = VideoDecoder::open(params)? + /// .with_max_probe_pending_bytes(1024 * 1024 * 1024); // 1 GiB + /// ``` + pub fn with_max_probe_pending_bytes(mut self, bytes: usize) -> Self { + self.max_probe_pending_bytes = bytes; + self + } + /// The backend currently producing frames. While the probe is still in /// progress (no frame received yet) this returns the optimistically /// selected backend; after the first frame, it is the backend that @@ -498,6 +548,8 @@ impl VideoDecoder { // commit they move to `self.pending_frames` and are delivered FIFO // by `receive_frame`, so the caller never loses initial frames. let mut local_pending: VecDeque = VecDeque::new(); + let mut local_pending_bytes: usize = 0; + let max_pending_bytes = self.max_probe_pending_bytes; let replay_result: std::result::Result<(), ffmpeg_next::Error> = { let probe = self.probe.as_ref().expect("probe state present"); let mut hw_buf = frame::Video::empty(); @@ -510,9 +562,13 @@ impl VideoDecoder { Err(e) if is_eagain(&e) => { // Drain candidate output (transferring + queueing each frame) // and retry the same packet. - if let Err(de) = - drain_into_pending(&mut candidate_state.inner, &mut hw_buf, &mut local_pending) - { + if let Err(de) = drain_into_pending( + &mut candidate_state.inner, + &mut hw_buf, + &mut local_pending, + &mut local_pending_bytes, + max_pending_bytes, + ) { r = Err(de); break 'replay; } @@ -727,10 +783,33 @@ fn drain_into_pending( decoder: &mut ffmpeg_next::decoder::Video, hw_buf: &mut frame::Video, pending: &mut VecDeque, + pending_bytes: &mut usize, + max_bytes: usize, ) -> std::result::Result<(), ffmpeg_next::Error> { loop { match decoder.receive_frame(hw_buf) { Ok(()) => { + // Either cap (count or bytes) closes the queue. We still drain so + // `send_packet` can resume on the next iteration; we just stop + // accumulating. + // + // TODO: at very large frame sizes (8K HDR P010, > ~96 MiB each) + // even a single retained frame is significant. Future direction: + // memmap-backed pending frames (write to a temp file or shared + // memory segment) so the resident set stays bounded even when the + // byte cap is raised. Out of scope for v0.0.0. + if pending.len() >= MAX_PROBE_PENDING_FRAMES || *pending_bytes >= max_bytes { + tracing::warn!( + frames = pending.len(), + bytes = *pending_bytes, + max_frames = MAX_PROBE_PENDING_FRAMES, + max_bytes = max_bytes, + "hwdecode: probe pending cap reached; discarding drained candidate frame" + ); + // SAFETY: hw_buf is owned and valid; unref of an empty frame is a no-op. + unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; + continue; + } let mut cpu = frame::Video::empty(); // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data // allocates buffers on `cpu`. copy_props moves timing/side data over. @@ -744,6 +823,7 @@ fn drain_into_pending( return Err(ffmpeg_next::Error::from(r2)); } } + *pending_bytes = pending_bytes.saturating_add(cpu_frame_bytes(&cpu)); pending.push_back(cpu); } Err(e) if is_transient(&e) => return Ok(()), @@ -752,6 +832,32 @@ fn drain_into_pending( } } +/// Approximate resident size of a CPU frame: sum of `linesize[plane] * +/// plane_height` across populated planes. Returns 0 for unknown formats +/// (we under-count rather than over-count, on the principle that under- +/// counting only delays the cap firing, while over-counting could starve +/// legitimate streams). +fn cpu_frame_bytes(frame: &frame::Video) -> usize { + // SAFETY: AVFrame.height / format / linesize are c_int reads. + let (height, pix_fmt, linesizes) = unsafe { + let raw = frame.as_ptr(); + ((*raw).height as usize, (*raw).format, (*raw).linesize) + }; + let mut total: usize = 0; + for (plane, linesize) in linesizes.iter().enumerate() { + if *linesize <= 0 { + break; + } + let stride = *linesize as usize; + let Some(plane_h) = crate::frame::plane_height_for(pix_fmt, plane, height) else { + // Unknown format / unsupported plane index — bail out, accept under-count. + break; + }; + total = total.saturating_add(stride.saturating_mul(plane_h)); + } + total +} + #[allow(dead_code)] fn _assert_send() { fn check() {} diff --git a/src/frame.rs b/src/frame.rs index 15e903c..22f7783 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -170,7 +170,15 @@ impl Default for Frame { /// Number of rows in `plane` for a frame of `frame_height` and the given /// pixel format. `None` for formats not in the supported HW-output set. -fn plane_height_for(pix_fmt_int: i32, plane: usize, frame_height: usize) -> Option { +/// +/// Crate-internal so the decoder's probe-replay accountant can compute +/// per-frame byte sizes without re-implementing the chroma-subsampling +/// table. +pub(crate) fn plane_height_for( + pix_fmt_int: i32, + plane: usize, + frame_height: usize, +) -> Option { match pix_fmt_int { // 4:2:0 semi-planar — Y full height, chroma half height. pix_fmt::NV12 From d2d96a8b00d99aaa4c1f249477163eb47f5c814f Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 21:59:19 +1200 Subject: [PATCH 10/27] update --- benches/decode.rs | 4 +-- examples/decode.rs | 2 +- src/decoder.rs | 76 ++++++++++++++++++++++++++++++++++++---------- src/frame.rs | 39 +++++++++++++++--------- tests/decode.rs | 2 +- tests/hw_smoke.rs | 2 +- 6 files changed, 90 insertions(+), 35 deletions(-) diff --git a/benches/decode.rs b/benches/decode.rs index 5f53a66..9e53f0a 100644 --- a/benches/decode.rs +++ b/benches/decode.rs @@ -32,7 +32,7 @@ fn decode_all_hw(path: &PathBuf) -> Result { let stream_index = stream.index(); let mut decoder = VideoDecoder::open(stream.parameters())?; - let mut frame = Frame::empty(); + let mut frame = Frame::empty()?; let mut count = 0_usize; let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> { @@ -122,7 +122,7 @@ fn bench_decode(c: &mut Criterion) { let stream_index = stream.index(); match VideoDecoder::open(stream.parameters()) { Ok(mut dec) => { - let mut frame = Frame::empty(); + let mut frame = Frame::empty().expect("alloc probe frame"); 'probe: for (s, packet) in input.packets() { if s.index() != stream_index { continue; diff --git a/examples/decode.rs b/examples/decode.rs index a1439d7..1d14de1 100644 --- a/examples/decode.rs +++ b/examples/decode.rs @@ -44,7 +44,7 @@ fn main() -> Result<(), Box> { decoder.height(), ); - let mut frame = Frame::empty(); + let mut frame = Frame::empty()?; let mut count: u64 = 0; let drain = |decoder: &mut VideoDecoder, frame: &mut Frame, count: &mut u64| loop { diff --git a/src/decoder.rs b/src/decoder.rs index 3a8067e..a7739c2 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -206,7 +206,7 @@ impl VideoDecoder { }); return Ok(Self { state, - hw_frame: frame::Video::empty(), + hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?, probe, pending_frames: VecDeque::new(), max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES, @@ -233,7 +233,7 @@ impl VideoDecoder { let state = Self::build_state(parameters, codec, backend)?; Ok(Self { state, - hw_frame: frame::Video::empty(), + hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?, probe: None, pending_frames: VecDeque::new(), max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES, @@ -552,7 +552,10 @@ impl VideoDecoder { let max_pending_bytes = self.max_probe_pending_bytes; let replay_result: std::result::Result<(), ffmpeg_next::Error> = { let probe = self.probe.as_ref().expect("probe state present"); - let mut hw_buf = frame::Video::empty(); + let mut hw_buf = match alloc_av_frame() { + Ok(f) => f, + Err(e) => return Err(Error::Ffmpeg(e)), + }; let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(()); 'replay: for pkt in &probe.buffered_packets { @@ -740,6 +743,21 @@ fn is_transient(e: &ffmpeg_next::Error) -> bool { is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof) } +/// Allocate a fresh `frame::Video`, checking that `av_frame_alloc` did not +/// return NULL. ffmpeg-next's `frame::Video::empty()` does not surface that +/// failure and the resulting null pointer would be UB on the next field +/// access; this wrapper catches it and surfaces it as `ENOMEM`. +fn alloc_av_frame() -> std::result::Result { + let inner = frame::Video::empty(); + // SAFETY: as_ptr() just exposes the inner pointer for inspection. + if unsafe { inner.as_ptr() }.is_null() { + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + Ok(inner) +} + /// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine /// distinguishes "drain output and retry" from "stream over"). fn is_eagain(e: &ffmpeg_next::Error) -> bool { @@ -810,7 +828,7 @@ fn drain_into_pending( unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; continue; } - let mut cpu = frame::Video::empty(); + let mut cpu = alloc_av_frame()?; // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data // allocates buffers on `cpu`. copy_props moves timing/side data over. unsafe { @@ -823,8 +841,26 @@ fn drain_into_pending( return Err(ffmpeg_next::Error::from(r2)); } } - *pending_bytes = pending_bytes.saturating_add(cpu_frame_bytes(&cpu)); - pending.push_back(cpu); + // Conservative byte-cap accounting: if we can't size this frame + // (unknown CPU pix_fmt — should not happen with strict get_format, + // but a misbehaving codec could surface one), discard rather than + // queue an unaccounted-for allocation. Never push something whose + // size we can't deduct from the budget. + match cpu_frame_bytes(&cpu) { + Some(bytes) => { + *pending_bytes = pending_bytes.saturating_add(bytes); + pending.push_back(cpu); + } + None => { + // SAFETY: AVFrame.format is c_int, safe to read. + let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format }; + tracing::warn!( + pix_fmt, + "hwdecode: cannot size unknown CPU pix_fmt during replay; discarding drained frame" + ); + // cpu drops here, freeing its buffers via Frame::drop. + } + } } Err(e) if is_transient(&e) => return Ok(()), Err(e) => return Err(e), @@ -833,29 +869,37 @@ fn drain_into_pending( } /// Approximate resident size of a CPU frame: sum of `linesize[plane] * -/// plane_height` across populated planes. Returns 0 for unknown formats -/// (we under-count rather than over-count, on the principle that under- -/// counting only delays the cap firing, while over-counting could starve -/// legitimate streams). -fn cpu_frame_bytes(frame: &frame::Video) -> usize { +/// plane_height` across populated planes. +/// +/// Returns `None` for pixel formats not in our chroma-subsampling table, +/// so the caller can refuse to queue an allocation it can't account for. +/// Returning 0 for unknown formats would silently bypass the byte cap and +/// let an unbounded number of large frames into `pending_frames`. +fn cpu_frame_bytes(frame: &frame::Video) -> Option { // SAFETY: AVFrame.height / format / linesize are c_int reads. let (height, pix_fmt, linesizes) = unsafe { let raw = frame.as_ptr(); ((*raw).height as usize, (*raw).format, (*raw).linesize) }; let mut total: usize = 0; + let mut any_plane = false; for (plane, linesize) in linesizes.iter().enumerate() { if *linesize <= 0 { break; } + any_plane = true; let stride = *linesize as usize; - let Some(plane_h) = crate::frame::plane_height_for(pix_fmt, plane, height) else { - // Unknown format / unsupported plane index — bail out, accept under-count. - break; - }; + // If we can't size *any* populated plane, the format is outside our + // table — refuse to size the frame at all (conservative; discarding + // is safer than under-counting against the byte cap). + let plane_h = crate::frame::plane_height_for(pix_fmt, plane, height)?; total = total.saturating_add(stride.saturating_mul(plane_h)); } - total + if !any_plane { + // Genuinely empty frame (no populated planes) — nothing to account for. + return Some(0); + } + Some(total) } #[allow(dead_code)] diff --git a/src/frame.rs b/src/frame.rs index 22f7783..65ec63d 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -20,7 +20,10 @@ use std::slice; use ffmpeg_next::frame; -use crate::pix_fmt; +use crate::{ + error::{Error, Result}, + pix_fmt, +}; /// CPU-side decoded video frame produced by [`crate::VideoDecoder`]. pub struct Frame { @@ -30,10 +33,20 @@ pub struct Frame { impl Frame { /// Construct an empty frame, suitable as the destination passed to /// [`crate::VideoDecoder::receive_frame`]. - pub fn empty() -> Self { - Self { - inner: frame::Video::empty(), + /// + /// Returns `Err(Error::Ffmpeg(Other { errno: ENOMEM }))` when the + /// underlying `av_frame_alloc()` returns NULL — `ffmpeg_next` does not + /// surface that failure, so we check it here rather than letting a null + /// pointer flow into the safe accessors and become UB on first read. + pub fn empty() -> Result { + // SAFETY: as_ptr() is safe; we just inspect the value (potentially null). + let inner = frame::Video::empty(); + if unsafe { inner.as_ptr() }.is_null() { + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); } + Ok(Self { inner }) } /// Width in pixels. @@ -162,11 +175,9 @@ impl Frame { } } -impl Default for Frame { - fn default() -> Self { - Self::empty() - } -} +// `Default` intentionally omitted: constructing a frame can fail (OOM +// in `av_frame_alloc`), and a panicking `default()` would defeat the +// safety stance of [`Frame::empty`]. Use `Frame::empty()?` directly. /// Number of rows in `plane` for a frame of `frame_height` and the given /// pixel format. `None` for formats not in the supported HW-output set. @@ -213,7 +224,7 @@ mod tests { #[test] fn empty_frame_has_zero_dimensions_and_no_pts() { - let f = Frame::empty(); + let f = Frame::empty().expect("alloc"); assert_eq!(f.width(), 0); assert_eq!(f.height(), 0); assert_eq!(f.pts(), None); @@ -225,7 +236,7 @@ mod tests { #[test] fn data_returns_none_for_unknown_format() { - let f = Frame::empty(); + let f = Frame::empty().expect("alloc"); // pix_fmt is NONE (-1), not in the supported set. assert!(f.data(0).is_none()); } @@ -236,7 +247,7 @@ mod tests { /// huge positive length and `from_raw_parts` would be UB. #[test] fn data_returns_none_for_negative_linesize() { - let mut f = Frame::empty(); + let mut f = Frame::empty().expect("alloc"); unsafe { let raw = f.inner.as_mut_ptr(); (*raw).format = pix_fmt::NV12; @@ -253,7 +264,7 @@ mod tests { #[test] fn data_returns_none_for_non_positive_height() { - let mut f = Frame::empty(); + let mut f = Frame::empty().expect("alloc"); unsafe { let raw = f.inner.as_mut_ptr(); (*raw).format = pix_fmt::NV12; @@ -268,7 +279,7 @@ mod tests { #[test] #[should_panic(expected = "non-positive linesize")] fn stride_panics_on_negative_linesize() { - let mut f = Frame::empty(); + let mut f = Frame::empty().expect("alloc"); unsafe { let raw = f.inner.as_mut_ptr(); (*raw).linesize[0] = -1920; diff --git a/tests/decode.rs b/tests/decode.rs index 10a8bcb..2431ff1 100644 --- a/tests/decode.rs +++ b/tests/decode.rs @@ -44,7 +44,7 @@ fn auto_open_decodes_at_least_one_frame() { assert_eq!(decoder.width(), expected_w); assert_eq!(decoder.height(), expected_h); - let mut frame = Frame::empty(); + let mut frame = Frame::empty().expect("alloc frame"); let mut count = 0_usize; let target = 30_usize; diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs index 6e11765..372c8f7 100644 --- a/tests/hw_smoke.rs +++ b/tests/hw_smoke.rs @@ -32,7 +32,7 @@ fn auto_probe_picks_hardware_backend() { // backend that actually produced it. Checking `decoder.backend()` before // any frame has been received would observe the optimistic pre-probe // value and could false-pass when a HW backend silently degrades. - let mut frame = Frame::empty(); + let mut frame = Frame::empty().expect("alloc frame"); let mut got_frame = false; for (s, packet) in input.packets() { if s.index() != stream_index { From e1899e63e1aea2aabb3da6cca5382d40ddf9bb8f Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:17:29 +1200 Subject: [PATCH 11/27] update --- Cargo.toml | 2 +- README.md | 8 +++++-- docs/design.md | 36 ++++++++++++++++++---------- src/decoder.rs | 64 +++++++++++++++++++++++++++++++++----------------- src/lib.rs | 21 +++++++++-------- 5 files changed, 84 insertions(+), 47 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e4ea79..7691656 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "hwdecode" version = "0.0.0" edition = "2021" rust-version = "1.95" -description = "Cross-platform hardware-accelerated video decoder built on top of ffmpeg-next, with auto-probe and software fallback." +description = "Cross-platform hardware-only video decoder built on top of ffmpeg-next, with auto-probe across HW backends. Callers handle software fallback." repository = "https://github.com/findit-ai/hwdecode" homepage = "https://github.com/findit-ai/hwdecode" documentation = "https://docs.rs/hwdecode" diff --git a/README.md b/README.md index 007eac3..c4e9108 100644 --- a/README.md +++ b/README.md @@ -50,13 +50,17 @@ while decoder.receive_frame(&mut frame).is_ok() { } ``` -To force a specific backend (no probe, no fallback): +To force a specific hardware backend (no probe, no fallback): ```rust use hwdecode::{Backend, VideoDecoder}; -let decoder = VideoDecoder::open_with(parameters, Backend::Software)?; +let decoder = VideoDecoder::open_with(parameters, Backend::VideoToolbox)?; ``` +`hwdecode` is hardware-only: there is no `Backend::Software`. If `open` +returns `Error::AllBackendsFailed`, fall back to a software decoder +yourself (typically `ffmpeg::decoder::Video`). + ## Running tests and benches The integration test and benchmark expect a real video file. Set diff --git a/docs/design.md b/docs/design.md index 6acc8c5..2c54aee 100644 --- a/docs/design.md +++ b/docs/design.md @@ -1,12 +1,20 @@ # hwdecode — design -Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next` 8.1. +Cross-platform **hardware-only** video decoder built on top of `ffmpeg-next` 8.1. + +> **Status note.** This document was the original spec from the brainstorm +> phase and parts have evolved since: the crate is hardware-only (no +> `Backend::Software`), `Frame` is its own safe wrapper, and several pixel- +> format / safety details were tightened during review. For the canonical +> behavior, read `src/lib.rs` and `README.md`. Sections below have been +> trimmed where they conflicted; the spec is otherwise preserved as +> historical context. ## Goals - Drop-in replacement for `ffmpeg::decoder::Video` at the call site (`send_packet` / `receive_frame` / `send_eof` / `flush`). -- Auto-probe the platform's hardware backends and silently fall back to software if none open. Caller never has to think about hwaccel availability. -- Hand back native-format CPU frames (NV12/P010 from the HW path, codec-native from the SW path). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`). +- Auto-probe the platform's hardware backends. **No software fallback inside this crate** — callers handle that themselves (e.g. via `ffmpeg::decoder::Video`) when `open` returns `Error::AllBackendsFailed`. +- Hand back native-format CPU frames (NV12/P010 from the HW path post-transfer). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`). - Cross-platform: macOS / iOS / iPadOS / tvOS, Linux (Intel/AMD/NVIDIA), Windows (any GPU + CUDA on NVIDIA). ## Non-goals @@ -22,8 +30,10 @@ Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next` pub struct VideoDecoder { /* private */ } impl VideoDecoder { - /// Auto-probe HW backends in platform order; fall back to software. - /// On success, `backend()` reports the one that won. + /// Auto-probe HW backends in platform order. Returns + /// `Error::AllBackendsFailed` if no backend can decode this stream; + /// caller falls back to software decoder of choice. On success, + /// `backend()` reports the one that won. pub fn open(parameters: ffmpeg::codec::Parameters) -> Result; /// Force a specific backend. No probe, no fallback. @@ -39,11 +49,11 @@ impl VideoDecoder { pub fn send_packet(&mut self, packet: &ffmpeg::Packet) -> Result<(), Error>; pub fn send_eof(&mut self) -> Result<(), Error>; - /// Receive a CPU-side frame. For HW backends, internally calls + /// Receive a CPU-side frame. Internally calls /// `av_hwframe_transfer_data` and copies PTS/timing onto the result; - /// output format is NV12 (8-bit) or P010 (10-bit). For SW, the frame - /// is in the codec's native format. - pub fn receive_frame(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), Error>; + /// output format is NV12 (8-bit) or P010 (10-bit) per the HW backend's + /// `AVHWFramesContext::sw_format`. + pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<(), Error>; pub fn flush(&mut self); } @@ -89,7 +99,7 @@ Always device 0 / system default (`av_hwdevice_ctx_create(.., NULL, ..)`). No en ### `get_format` callback -A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState { wanted: AVPixelFormat }`. The callback walks the offered `pix_fmts` list, returns `wanted` if present, else `AV_PIX_FMT_NONE` (which forces FFmpeg to retry with software). This is the standard pattern from `doc/examples/hw_decode.c`. +A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState`. The callback walks the offered `pix_fmts` list as raw `i32` (avoiding bindgen-enum UB on header skew), returns `wanted` if present, else `AV_PIX_FMT_NONE` (which causes the decoder to fail; the caller-side probe loop then tears down and tries the next hardware backend). ### Frame transfer @@ -138,7 +148,7 @@ VideoToolbox enabled). 1. **Unit tests** (`src/backend.rs`, `src/error.rs`) — pure-Rust: probe-order construction per platform, `Backend` ↔ `AVHWDeviceType` mapping, error formatting. 2. **Integration** (`tests/decode.rs`) — opens a sample H.264 file via `ffmpeg::format::input`, decodes 30 frames through `VideoDecoder::open` (auto-probe), asserts frame count and dimensions. Sample path comes from env var `HWDECODE_SAMPLE_VIDEO`; test is skipped with a clear `eprintln!` if unset. -3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, but additionally asserts `decoder.backend() != Backend::Software`. CI runs this on platform-matched runners. +3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, asserts `decoder.backend()` returns one of the hardware variants (the enum no longer has a Software variant; this is a sanity check against accidental no-op selection). CI runs this on platform-matched runners. Sample-file env var keeps the repo binary-free. Documented in `README.md`. @@ -146,8 +156,8 @@ Sample-file env var keeps the repo binary-free. Documented in `README.md`. `benches/decode.rs` (criterion) — two functions: -- `bench_software_decode` — `VideoDecoder::open_with(.., Backend::Software)`, decode all frames of the sample, measure wall-clock per frame. -- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped (`return`) if `decoder.backend() == Backend::Software` (no HW available). +- `bench_software_decode` — drives `ffmpeg::decoder::Video` directly (this crate has no software backend), decodes all frames, measures wall-clock per frame. +- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped if `open` returns `AllBackendsFailed` (no HW backend available on this host). Both use the same `HWDECODE_SAMPLE_VIDEO` file. Bench prints which backend the HW run actually used, so results are interpretable across machines. diff --git a/src/decoder.rs b/src/decoder.rs index a7739c2..cbc995b 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -29,7 +29,13 @@ use crate::{ frame::Frame, }; -/// Hardware-accelerated video decoder with software fallback. +/// Hardware-accelerated video decoder. +/// +/// Hardware-only — there is no software fallback inside this crate. If +/// every hardware backend in the platform's probe order fails to open, +/// `open` returns [`Error::AllBackendsFailed`] and the caller is +/// responsible for falling back to a software decoder of their choice +/// (e.g. `ffmpeg::decoder::Video`). /// /// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface. /// Decoded frames are returned through [`crate::Frame`], a CPU-side wrapper @@ -183,9 +189,9 @@ impl VideoDecoder { /// consumed, so a misbehaving middle backend cannot strand the caller. /// /// [`Self::backend`] reflects whichever backend ultimately produced the - /// first frame. Software is the last entry in every probe order, so - /// `open` cannot return without a working decoder for any codec libavcodec - /// supports. + /// first frame. If no hardware backend in the platform's probe order can + /// decode this stream, `open` returns [`Error::AllBackendsFailed`]; + /// callers handle software fallback themselves. pub fn open(parameters: codec::Parameters) -> Result { let codec = find_decoder(¶meters)?; let order = backend::probe_order(); @@ -223,11 +229,11 @@ impl VideoDecoder { /// Open the decoder with a specific backend. No probe, no fallback. /// - /// If `backend` is a hardware backend that the codec can't actually use - /// for this stream, the failure surfaces from - /// [`Self::receive_frame`] (the strict `get_format` callback returns + /// If `backend` cannot actually decode this stream, the failure surfaces + /// from [`Self::receive_frame`] (the strict `get_format` callback returns /// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible - /// for retrying with `Backend::Software` or another backend if desired. + /// for retrying with another hardware backend or falling back to a + /// software decoder of their choice (e.g. `ffmpeg::decoder::Video`). pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result { let codec = find_decoder(¶meters)?; let state = Self::build_state(parameters, codec, backend)?; @@ -807,9 +813,11 @@ fn drain_into_pending( loop { match decoder.receive_frame(hw_buf) { Ok(()) => { - // Either cap (count or bytes) closes the queue. We still drain so - // `send_packet` can resume on the next iteration; we just stop - // accumulating. + // Pre-transfer cap check: if we are already at or over either cap, + // the candidate is producing more than we can hold. Treat as an + // explicit candidate failure so `advance_probe` can try the next + // backend instead of committing a stream with silently-dropped + // frames in the middle. // // TODO: at very large frame sizes (8K HDR P010, > ~96 MiB each) // even a single retained frame is significant. Future direction: @@ -822,11 +830,13 @@ fn drain_into_pending( bytes = *pending_bytes, max_frames = MAX_PROBE_PENDING_FRAMES, max_bytes = max_bytes, - "hwdecode: probe pending cap reached; discarding drained candidate frame" + "hwdecode: probe pending cap reached; failing candidate replay" ); // SAFETY: hw_buf is owned and valid; unref of an empty frame is a no-op. unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; - continue; + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); } let mut cpu = alloc_av_frame()?; // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data @@ -841,14 +851,26 @@ fn drain_into_pending( return Err(ffmpeg_next::Error::from(r2)); } } - // Conservative byte-cap accounting: if we can't size this frame - // (unknown CPU pix_fmt — should not happen with strict get_format, - // but a misbehaving codec could surface one), discard rather than - // queue an unaccounted-for allocation. Never push something whose - // size we can't deduct from the budget. + // Post-transfer accounting: size the frame and confirm we can fit + // it without exceeding the byte budget. If sizing fails (unknown + // pix_fmt) we still queue the frame — the count cap (16) bounds + // memory — but log that byte accounting under-counts. match cpu_frame_bytes(&cpu) { Some(bytes) => { - *pending_bytes = pending_bytes.saturating_add(bytes); + let new_total = pending_bytes.saturating_add(bytes); + if new_total > max_bytes { + tracing::warn!( + pending_bytes = *pending_bytes, + frame_bytes = bytes, + max_bytes, + "hwdecode: queueing this frame would exceed byte cap; failing candidate replay" + ); + // cpu drops here. + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + *pending_bytes = new_total; pending.push_back(cpu); } None => { @@ -856,9 +878,9 @@ fn drain_into_pending( let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format }; tracing::warn!( pix_fmt, - "hwdecode: cannot size unknown CPU pix_fmt during replay; discarding drained frame" + "hwdecode: unknown CPU pix_fmt during replay; queueing without byte accounting (count cap still applies)" ); - // cpu drops here, freeing its buffers via Frame::drop. + pending.push_back(cpu); } } } diff --git a/src/lib.rs b/src/lib.rs index e6c12ce..b487132 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,17 @@ -//! Cross-platform hardware-accelerated video decoder built on top of `ffmpeg-next`. +//! Cross-platform **hardware** video decoder built on top of `ffmpeg-next`. //! //! [`VideoDecoder`] mirrors the surface of `ffmpeg::decoder::Video` -//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and silently picks the best -//! hardware backend for the host platform, falling back to software if none open. +//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and auto-probes the +//! host's hardware backends (VideoToolbox / VAAPI / NVDEC / D3D11VA). +//! There is **no software fallback inside this crate** — if no hardware +//! backend can decode the stream, [`VideoDecoder::open`] returns +//! [`Error::AllBackendsFailed`] and the caller picks how to fall back +//! (e.g. by opening an `ffmpeg::decoder::Video` directly). //! -//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side. For -//! hardware backends they are downloaded with `av_hwframe_transfer_data` (NV12 -//! for 8-bit input, P010 for 10-bit). For software backends the frame is in the -//! codec's native format. -//! -//! Pixel-format conversion is intentionally out of scope; downstream code is -//! expected to handle that (e.g. via `colconv`). +//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side +//! and downloaded via `av_hwframe_transfer_data` (NV12 for 8-bit input, +//! P010 for 10-bit). Pixel-format conversion is intentionally out of +//! scope; downstream code handles that (e.g. via `colconv`). #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, allow(unused_attributes))] #![deny(missing_docs)] From d351f1fb1a5539e54bc307067b01dfa9a36db960 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:31:58 +1200 Subject: [PATCH 12/27] update --- src/decoder.rs | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index cbc995b..06b9504 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -202,8 +202,15 @@ impl VideoDecoder { Ok(state) => { tracing::info!(?backend, "hwdecode: opened video decoder (probing)"); let remaining = order[(i + 1)..].to_vec(); + // Deep-copy the caller's `parameters` before storing in ProbeState. + // `codec::Parameters` from `stream.parameters()` carries an Rc + // owner pointing at the demuxer; moving that Rc to a worker + // thread (when VideoDecoder is sent) would race with the demuxer's + // Rc on the original thread. `Parameters::clone()` does + // `avcodec_parameters_copy` and returns a fully owned Parameters + // with `owner: None`, severing the link. let probe = (!remaining.is_empty()).then(|| ProbeState { - parameters, + parameters: parameters.clone(), codec, remaining_backends: remaining, buffered_packets: Vec::new(), @@ -852,9 +859,9 @@ fn drain_into_pending( } } // Post-transfer accounting: size the frame and confirm we can fit - // it without exceeding the byte budget. If sizing fails (unknown - // pix_fmt) we still queue the frame — the count cap (16) bounds - // memory — but log that byte accounting under-counts. + // it without exceeding the byte budget. Both cap-hit and inability + // to size the frame are treated as candidate failures, so the byte + // budget is *strict* — we never queue a frame we can't account for. match cpu_frame_bytes(&cpu) { Some(bytes) => { let new_total = pending_bytes.saturating_add(bytes); @@ -874,13 +881,21 @@ fn drain_into_pending( pending.push_back(cpu); } None => { + // Unknown pix_fmt — we cannot bound this frame's contribution + // against the byte cap, so up to MAX_PROBE_PENDING_FRAMES of + // them could exhaust memory. Fail the candidate so probing + // tries the next backend rather than queueing untracked + // allocations. // SAFETY: AVFrame.format is c_int, safe to read. let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format }; tracing::warn!( pix_fmt, - "hwdecode: unknown CPU pix_fmt during replay; queueing without byte accounting (count cap still applies)" + "hwdecode: cannot size unknown CPU pix_fmt during replay; failing candidate" ); - pending.push_back(cpu); + // cpu drops here. + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); } } } From 3b888d81b77027bb2650e23234b15f32d55124cf Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:52:12 +1200 Subject: [PATCH 13/27] update --- src/decoder.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 5 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 06b9504..cb325fd 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1,10 +1,16 @@ use std::{collections::VecDeque, mem::ManuallyDrop, ptr}; use ffmpeg_next::{ - codec::{self, Context}, + codec::{ + self, + packet::{Mut as PacketMut, Ref as PacketRef}, + Context, + }, ffi::{ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, - av_hwdevice_ctx_create, av_hwframe_transfer_data, AVBufferRef, AVCodec, + av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref, + avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_to_context, AVBufferRef, + AVCodec, }, frame, Codec, Packet, Rational, }; @@ -341,8 +347,23 @@ impl VideoDecoder { self.probe = None; self.pending_frames.clear(); } else { - probe.buffered_packets.push(packet.clone()); - probe.buffered_bytes = new_bytes; + // Use the checked clone — ffmpeg-next's `Packet::clone` + // discards av_packet_ref's return value and would silently + // store an empty packet on ENOMEM, corrupting future replay. + match try_clone_packet(packet) { + Ok(cloned) => { + probe.buffered_packets.push(cloned); + probe.buffered_bytes = new_bytes; + } + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: packet clone failed for probe history; abandoning fallback safety net" + ); + self.probe = None; + self.pending_frames.clear(); + } + } } } return Ok(()); @@ -518,6 +539,14 @@ impl VideoDecoder { /// on the very first inspection (e.g. a malformed `Parameters`); the /// per-candidate failures during the loop are absorbed and logged. fn advance_probe(&mut self) -> Result { + // Drop frames previously queued from the backend we're now abandoning. + // They came from a candidate that just failed for cause and cannot be + // trusted alongside frames we may queue from the next candidate. (If + // this method is called repeatedly via chained probe advances, this + // also keeps `pending_frames` from accumulating frames from multiple + // rejected backends.) + self.pending_frames.clear(); + loop { // Snapshot inputs without mutating probe state. let (next_backend, parameters, codec) = match self.probe.as_ref() { @@ -642,7 +671,10 @@ impl VideoDecoder { codec: Codec, backend: Backend, ) -> Result { - let mut ctx = Context::from_parameters(parameters)?; + // Use our checked allocator instead of Context::from_parameters, which + // does not null-check avcodec_alloc_context3 and would feed a null + // AVCodecContext into FFmpeg under OOM. + let mut ctx = build_codec_context(¶meters)?; let av_type = backend.av_hwdevice_type(); // Verify the codec advertises this hwaccel. We do *not* read the @@ -771,6 +803,59 @@ fn alloc_av_frame() -> std::result::Result { Ok(inner) } +/// Build a fresh `Context` from `parameters`, checking the underlying +/// `avcodec_alloc_context3` for NULL before passing it to +/// `avcodec_parameters_to_context`. ffmpeg-next's `Context::from_parameters` +/// skips that check and would feed a null pointer into FFmpeg under OOM — +/// undefined behavior. This helper surfaces the failure as `ENOMEM` and +/// frees the context if `parameters_to_context` itself errors. +fn build_codec_context(parameters: &codec::Parameters) -> Result { + // SAFETY: avcodec_alloc_context3(NULL) returns a fresh AVCodecContext + // or NULL on allocation failure. + let ctx_ptr = unsafe { avcodec_alloc_context3(ptr::null()) }; + if ctx_ptr.is_null() { + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + // SAFETY: ctx_ptr is non-null and freshly allocated; parameters.as_ptr() + // returns a valid AVCodecParameters pointer; the function copies bytes + // out of parameters into the context. + let ret = unsafe { avcodec_parameters_to_context(ctx_ptr, parameters.as_ptr()) }; + if ret < 0 { + // SAFETY: ctx_ptr was allocated by us and never handed to anyone else. + let mut p = ctx_ptr; + unsafe { avcodec_free_context(&mut p) }; + return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + } + // SAFETY: ctx_ptr is valid; passing `owner: None` means our wrapper owns + // the allocation and `Context::drop` will run `avcodec_free_context`. + Ok(unsafe { Context::wrap(ctx_ptr, None) }) +} + +/// Checked counterpart to `Packet::clone()`. ffmpeg-next's `clone_from` +/// calls `av_packet_ref` and ignores the int return value; on `ENOMEM` +/// the destination is left empty while the caller assumes the clone +/// succeeded — corrupting any later replay history. This helper surfaces +/// the AVERROR. +fn try_clone_packet(src: &Packet) -> std::result::Result { + let mut dst = Packet::empty(); + // SAFETY: dst is a freshly zero-initialized Packet (av_init_packet inside + // Packet::empty); av_packet_ref initializes its data fields from src's + // refcounted buffer or returns AVERROR(ENOMEM) on failure. + let ret = unsafe { av_packet_ref(dst.as_mut_ptr(), src.as_ptr()) }; + if ret < 0 { + return Err(ffmpeg_next::Error::from(ret)); + } + // av_packet_make_writable allocates a writable copy if the buffer is + // shared. Can also fail with ENOMEM. + let ret = unsafe { av_packet_make_writable(dst.as_mut_ptr()) }; + if ret < 0 { + return Err(ffmpeg_next::Error::from(ret)); + } + Ok(dst) +} + /// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine /// distinguishes "drain output and retry" from "stream over"). fn is_eagain(e: &ffmpeg_next::Error) -> bool { From a95968d1bf780b2dd3d130565deded5370cb94da Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 23:29:39 +1200 Subject: [PATCH 14/27] update --- src/decoder.rs | 120 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 19 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index cb325fd..aee8366 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -9,7 +9,8 @@ use ffmpeg_next::{ ffi::{ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref, - avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_to_context, AVBufferRef, + avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_alloc, + avcodec_parameters_copy, avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, AVCodec, }, frame, Codec, Packet, Rational, @@ -204,7 +205,19 @@ impl VideoDecoder { let mut attempts: Vec<(Backend, Box)> = Vec::new(); for (i, &backend) in order.iter().enumerate() { - match Self::build_state(parameters.clone(), codec, backend) { + // Use the checked clone — ffmpeg-next's `Parameters::clone` does + // `avcodec_parameters_alloc` without a null check and ignores the + // return of `avcodec_parameters_copy`. Under OOM that path silently + // produces a Parameters with a null inner pointer. + let cloned_for_build = match try_clone_parameters(¶meters) { + Ok(p) => p, + Err(e) => { + tracing::warn!(?backend, error = %e, "hwdecode: parameters clone failed"); + attempts.push((backend, Box::new(Error::Ffmpeg(e)))); + continue; + } + }; + match Self::build_state(cloned_for_build, codec, backend) { Ok(state) => { tracing::info!(?backend, "hwdecode: opened video decoder (probing)"); let remaining = order[(i + 1)..].to_vec(); @@ -212,17 +225,33 @@ impl VideoDecoder { // `codec::Parameters` from `stream.parameters()` carries an Rc // owner pointing at the demuxer; moving that Rc to a worker // thread (when VideoDecoder is sent) would race with the demuxer's - // Rc on the original thread. `Parameters::clone()` does - // `avcodec_parameters_copy` and returns a fully owned Parameters - // with `owner: None`, severing the link. - let probe = (!remaining.is_empty()).then(|| ProbeState { - parameters: parameters.clone(), - codec, - remaining_backends: remaining, - buffered_packets: Vec::new(), - buffered_bytes: 0, - eof_sent: false, - }); + // Rc on the original thread. The checked clone copies the bytes + // into a fresh allocation with `owner: None`, severing the link. + // + // If the clone fails (ENOMEM), we keep the active `state` but + // skip probe setup — caller loses cross-backend fallback safety + // net but still gets a working decoder. + let probe = if remaining.is_empty() { + None + } else { + match try_clone_parameters(¶meters) { + Ok(probe_params) => Some(ProbeState { + parameters: probe_params, + codec, + remaining_backends: remaining, + buffered_packets: Vec::new(), + buffered_bytes: 0, + eof_sent: false, + }), + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: parameters clone failed for probe state; proceeding without fallback" + ); + None + } + } + }; return Ok(Self { state, hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?, @@ -548,13 +577,28 @@ impl VideoDecoder { self.pending_frames.clear(); loop { - // Snapshot inputs without mutating probe state. + // Snapshot inputs without mutating probe state. Use the checked + // clone helper rather than `Parameters::clone` (which masks ENOMEM). let (next_backend, parameters, codec) = match self.probe.as_ref() { - Some(probe) if !probe.remaining_backends.is_empty() => ( - probe.remaining_backends[0], - probe.parameters.clone(), - probe.codec, - ), + Some(probe) if !probe.remaining_backends.is_empty() => { + let parameters = match try_clone_parameters(&probe.parameters) { + Ok(p) => p, + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: parameters clone failed during probe advance; popping backend and trying next" + ); + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + continue; + } + }; + (probe.remaining_backends[0], parameters, probe.codec) + } _ => return Ok(false), }; @@ -833,6 +877,44 @@ fn build_codec_context(parameters: &codec::Parameters) -> Result { Ok(unsafe { Context::wrap(ctx_ptr, None) }) } +/// Checked deep-clone of `codec::Parameters`. ffmpeg-next's +/// `Parameters::clone` allocates via `avcodec_parameters_alloc` without +/// checking for NULL and runs `avcodec_parameters_copy` without checking +/// the return code. On `ENOMEM` the result is a `Parameters` with a null +/// inner pointer, which becomes UB when later passed to FFmpeg. +/// +/// This helper performs both calls explicitly, frees a partial allocation +/// on failure, and surfaces the AVERROR. The returned `Parameters` has +/// `owner: None`, severing any Rc link to the caller's demuxer (the +/// reason we deep-clone in the first place — see Send safety in +/// `VideoDecoder::open`). +fn try_clone_parameters( + src: &codec::Parameters, +) -> std::result::Result { + // SAFETY: avcodec_parameters_alloc returns a fresh AVCodecParameters + // pointer or NULL on allocation failure. + let dst_ptr = unsafe { avcodec_parameters_alloc() }; + if dst_ptr.is_null() { + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + // SAFETY: dst_ptr is non-null and freshly allocated; src.as_ptr() is + // a valid AVCodecParameters pointer; the function copies bytes from + // src into dst. + let ret = unsafe { avcodec_parameters_copy(dst_ptr, src.as_ptr()) }; + if ret < 0 { + // SAFETY: dst_ptr was allocated by us and never handed out. + let mut p = dst_ptr; + unsafe { avcodec_parameters_free(&mut p) }; + return Err(ffmpeg_next::Error::from(ret)); + } + // SAFETY: dst_ptr is a valid AVCodecParameters; passing `owner: None` + // means our wrapper owns the allocation and `Parameters::drop` will + // call `avcodec_parameters_free`. + Ok(unsafe { codec::Parameters::wrap(dst_ptr, None) }) +} + /// Checked counterpart to `Packet::clone()`. ffmpeg-next's `clone_from` /// calls `av_packet_ref` and ignores the int return value; on `ENOMEM` /// the destination is left empty while the caller assumes the clone From ab25046e7e9bf01fabf399e94bb647101131bb1c Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Sun, 26 Apr 2026 23:48:49 +1200 Subject: [PATCH 15/27] update --- src/decoder.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index aee8366..c9e99eb 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -832,6 +832,23 @@ fn is_transient(e: &ffmpeg_next::Error) -> bool { is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof) } +/// Reject a `codec::Parameters` whose inner `*mut AVCodecParameters` is +/// null. This guards the public trust boundary: ffmpeg-next can produce +/// such a `Parameters` under OOM (`Parameters::new()` does not check +/// `avcodec_parameters_alloc`), and a safe caller can legally hand one +/// in. Without this check, the very next `(*p.as_ptr()).field` read +/// would be a null deref. +fn ensure_parameters_non_null(parameters: &codec::Parameters) -> Result<()> { + // SAFETY: as_ptr() returns the inner *const AVCodecParameters; we just + // inspect the pointer value (no deref). + if unsafe { parameters.as_ptr() }.is_null() { + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + Ok(()) +} + /// Allocate a fresh `frame::Video`, checking that `av_frame_alloc` did not /// return NULL. ffmpeg-next's `frame::Video::empty()` does not surface that /// failure and the resulting null pointer would be UB on the next field @@ -854,6 +871,7 @@ fn alloc_av_frame() -> std::result::Result { /// undefined behavior. This helper surfaces the failure as `ENOMEM` and /// frees the context if `parameters_to_context` itself errors. fn build_codec_context(parameters: &codec::Parameters) -> Result { + ensure_parameters_non_null(parameters)?; // SAFETY: avcodec_alloc_context3(NULL) returns a fresh AVCodecContext // or NULL on allocation failure. let ctx_ptr = unsafe { avcodec_alloc_context3(ptr::null()) }; @@ -891,6 +909,13 @@ fn build_codec_context(parameters: &codec::Parameters) -> Result { fn try_clone_parameters( src: &codec::Parameters, ) -> std::result::Result { + // Reject a null inner pointer at the boundary; a deref inside + // avcodec_parameters_copy below would otherwise be UB. + if unsafe { src.as_ptr() }.is_null() { + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } // SAFETY: avcodec_parameters_alloc returns a fresh AVCodecParameters // pointer or NULL on allocation failure. let dst_ptr = unsafe { avcodec_parameters_alloc() }; @@ -949,10 +974,11 @@ fn is_eagain(e: &ffmpeg_next::Error) -> bool { /// `addr_of!` + `ptr::read` so a value not in our build's discriminant /// set never invokes UB. fn find_decoder(parameters: &codec::Parameters) -> Result { - // SAFETY: parameters owns a valid AVCodecParameters; addr_of! projects - // to the codec_id field; the *const u32 cast is sound because AVCodecID - // is `#[repr(u32)]` (same size and alignment as u32). Reading as u32 - // cannot be UB regardless of the value FFmpeg wrote. + ensure_parameters_non_null(parameters)?; + // SAFETY: parameters' inner pointer is non-null (checked above); + // addr_of! projects to the codec_id field; the *const u32 cast is sound + // because AVCodecID is `#[repr(u32)]` (same size and alignment as u32). + // Reading as u32 cannot be UB regardless of the value FFmpeg wrote. let raw_id: u32 = unsafe { ptr::read(ptr::addr_of!((*parameters.as_ptr()).codec_id) as *const u32) }; @@ -1137,4 +1163,35 @@ mod tests { let other = ffmpeg_next::Error::InvalidData; assert!(!is_transient(&other)); } + + /// Regression: a `codec::Parameters` with a null inner pointer must be + /// rejected at the entrypoint, not deref'd. ffmpeg-next's + /// `Parameters::new()` does not check `avcodec_parameters_alloc()`, so a + /// safe caller can hand us such a value under OOM. + #[test] + fn open_rejects_null_parameters() { + // SAFETY: Parameters::wrap accepts any pointer; we explicitly construct + // one with null inner. avcodec_parameters_free is null-safe on Drop. + let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) }; + match VideoDecoder::open(null_params) { + Ok(_) => panic!("open should fail on null parameters"), + Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => { + assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}"); + } + Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"), + } + } + + #[test] + fn open_with_rejects_null_parameters() { + // SAFETY: see open_rejects_null_parameters. + let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) }; + match VideoDecoder::open_with(null_params, Backend::VideoToolbox) { + Ok(_) => panic!("open_with should fail on null parameters"), + Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => { + assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}"); + } + Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"), + } + } } From 36a4729606e825af94973f1229d83d2969f56401 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 00:26:00 +1200 Subject: [PATCH 16/27] update --- src/decoder.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index c9e99eb..165f6d8 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -11,7 +11,7 @@ use ffmpeg_next::{ av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref, avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_alloc, avcodec_parameters_copy, avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, - AVCodec, + AVCodec, AVMediaType, }, frame, Codec, Packet, Rational, }; @@ -670,8 +670,30 @@ impl VideoDecoder { } } if r.is_ok() && probe.eof_sent { - if let Err(e) = candidate_state.inner.send_eof() { - r = Err(e); + // `avcodec_send_packet(NULL)` (which `send_eof` becomes) can + // return EAGAIN with the same drain-output-first semantics as + // a regular send_packet. Loop drain+retry instead of failing + // the candidate on backpressure. + loop { + match candidate_state.inner.send_eof() { + Ok(()) => break, + Err(e) if is_eagain(&e) => { + if let Err(de) = drain_into_pending( + &mut candidate_state.inner, + &mut hw_buf, + &mut local_pending, + &mut local_pending_bytes, + max_pending_bytes, + ) { + r = Err(de); + break; + } + } + Err(e) => { + r = Err(e); + break; + } + } } } r @@ -778,8 +800,15 @@ impl VideoDecoder { // Open the decoder. On any failure, release the resources we just // allocated so we don't leak. - let opened = match ctx.decoder().open_as(codec).and_then(|o| o.video()) { - Ok(d) => d, + // + // We deliberately bypass `Opened::video()` because it calls + // `Context::medium()`, which reads `AVCodecContext.codec_type` as the + // bindgen `AVMediaType` enum — the same UB hazard we've been + // systematically removing. Instead: validate `codec_type` as a raw + // `c_int` ourselves, then construct the `decoder::Video` wrapper + // directly via its public tuple field. + let opened = match ctx.decoder().open_as(codec) { + Ok(o) => o, Err(e) => { // SAFETY: we either allocated these in this function above or // they are null; av_buffer_unref / Box::from_raw handle null @@ -797,6 +826,24 @@ impl VideoDecoder { } }; + // Validate codec_type as a raw integer — never construct AVMediaType + // from an unvalidated runtime value. + // SAFETY: codec_type is bound as AVMediaType (`#[repr(i32)]`), same + // size and alignment as i32; reading the bytes as i32 cannot be UB. + let codec_type_int: i32 = + unsafe { ptr::read(ptr::addr_of!((*opened.as_ptr()).codec_type) as *const i32) }; + let video_type_int: i32 = AVMediaType::AVMEDIA_TYPE_VIDEO as i32; + if codec_type_int != video_type_int { + // Not a video codec context — surface the same error + // `Opened::video()` would have, without going through enum + // construction. Cleanup runs via `opened`'s Drop. + return Err(Error::Ffmpeg(ffmpeg_next::Error::InvalidData)); + } + // SAFETY of construction: `decoder::Video` is `pub struct Video(pub Opened)`. + // We construct via the public field; this is the same wrapping + // `Opened::video()` does on success, just without the enum read. + let opened = ffmpeg_next::decoder::Video(opened); + Ok(DecoderState { inner: ManuallyDrop::new(opened), backend, From e27758846eca038a0be9c3c265193ac8d9856ef3 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 01:31:36 +1200 Subject: [PATCH 17/27] update --- Cargo.toml | 2 +- README.md | 51 ++++++++++++++++++++++----------- docs/design.md | 72 +++++++++-------------------------------------- src/decoder.rs | 28 +++++++++--------- tests/hw_smoke.rs | 25 ++++++++++++---- 5 files changed, 82 insertions(+), 96 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7691656..9a3b19a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hwdecode" -version = "0.0.0" +version = "0.1.0" edition = "2021" rust-version = "1.95" description = "Cross-platform hardware-only video decoder built on top of ffmpeg-next, with auto-probe across HW backends. Callers handle software fallback." diff --git a/README.md b/README.md index c4e9108..05e7c56 100644 --- a/README.md +++ b/README.md @@ -4,27 +4,32 @@ Cross-platform hardware-accelerated video decoder for Rust, built on top of [`ffmpeg-next`](https://crates.io/crates/ffmpeg-next). `VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of -`ffmpeg::decoder::Video` and silently picks the best hardware backend for the -host platform, falling back to software if none open. Output frames are -CPU-side — for HW backends they are downloaded with `av_hwframe_transfer_data` -(NV12 for 8-bit, P010 for 10-bit). Pixel-format conversion is intentionally -out of scope. +`ffmpeg::decoder::Video` and auto-probes the host's hardware backends. +This crate is **hardware-only** — there is no software fallback inside it. +If no hardware backend can decode the stream, `VideoDecoder::open` returns +`Error::AllBackendsFailed` and the caller decides how to fall back (typically +by opening an `ffmpeg::decoder::Video` directly). Output frames are CPU-side, +downloaded with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit). +Pixel-format conversion is intentionally out of scope. ## Backends -| Target | Probe order | +| Target | Probe order (HW only) | | ------------------- | --------------------------------- | -| macOS / iOS / tvOS | VideoToolbox → Software | -| Linux | VAAPI → CUDA → Software | -| Windows | D3D11VA → CUDA → Software | -| other | Software | +| macOS / iOS / tvOS | VideoToolbox | +| Linux | VAAPI → CUDA | +| Windows | D3D11VA → CUDA | +| other | (none) | + +If `open` returns `Error::AllBackendsFailed`, software fallback is the +caller's responsibility (this crate intentionally does not include one). ## Usage -```rust +```rust,no_run use ffmpeg_next as ffmpeg; -use ffmpeg::{format, frame, media}; -use hwdecode::VideoDecoder; +use ffmpeg::{format, media}; +use hwdecode::{Frame, VideoDecoder}; ffmpeg::init()?; @@ -32,15 +37,29 @@ let mut input = format::input(path)?; let stream = input.streams().best(media::Type::Video).unwrap(); let stream_index = stream.index(); -let mut decoder = VideoDecoder::open(stream.parameters())?; +// HW-only open. On AllBackendsFailed, fall back to software yourself. +let mut decoder = match VideoDecoder::open(stream.parameters()) { + Ok(d) => d, + Err(hwdecode::Error::AllBackendsFailed { .. }) => { + // Caller-side software fallback. + let _sw = ffmpeg::codec::Context::from_parameters(stream.parameters())? + .decoder() + .video()?; + // ... drive _sw with send_packet / receive_frame yourself ... + return Ok(()); + } + Err(e) => return Err(e.into()), +}; println!("backend = {:?}", decoder.backend()); -let mut frame = frame::Video::empty(); +let mut frame = Frame::empty()?; for (s, packet) in input.packets() { if s.index() != stream_index { continue; } decoder.send_packet(&packet)?; while decoder.receive_frame(&mut frame).is_ok() { - // frame.format() is NV12 / P010 (HW path) or codec-native (SW path) + // frame.pix_fmt() is the integer constant — match against + // hwdecode::pix_fmt::{NV12, P010LE, ...} and dispatch to your + // pixel-format pipeline (e.g. `colconv`). // ... do something with frame ... } } diff --git a/docs/design.md b/docs/design.md index 2c54aee..521dd49 100644 --- a/docs/design.md +++ b/docs/design.md @@ -26,72 +26,26 @@ Cross-platform **hardware-only** video decoder built on top of `ffmpeg-next` 8.1 ## Public API -```rust -pub struct VideoDecoder { /* private */ } - -impl VideoDecoder { - /// Auto-probe HW backends in platform order. Returns - /// `Error::AllBackendsFailed` if no backend can decode this stream; - /// caller falls back to software decoder of choice. On success, - /// `backend()` reports the one that won. - pub fn open(parameters: ffmpeg::codec::Parameters) -> Result; - - /// Force a specific backend. No probe, no fallback. - pub fn open_with(parameters: ffmpeg::codec::Parameters, backend: Backend) -> Result; - - pub fn backend(&self) -> Backend; - pub fn width(&self) -> u32; - pub fn height(&self) -> u32; - pub fn format(&self) -> ffmpeg::format::Pixel; - pub fn time_base(&self) -> ffmpeg::Rational; - pub fn frame_rate(&self) -> ffmpeg::Rational; - - pub fn send_packet(&mut self, packet: &ffmpeg::Packet) -> Result<(), Error>; - pub fn send_eof(&mut self) -> Result<(), Error>; - - /// Receive a CPU-side frame. Internally calls - /// `av_hwframe_transfer_data` and copies PTS/timing onto the result; - /// output format is NV12 (8-bit) or P010 (10-bit) per the HW backend's - /// `AVHWFramesContext::sw_format`. - pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<(), Error>; - - pub fn flush(&mut self); -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum Backend { - Software, - VideoToolbox, // macOS, iOS, iPadOS, tvOS - Vaapi, // Linux (Intel/AMD) - Cuda, // Linux/Windows (NVIDIA) - D3d11va, // Windows -} - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("ffmpeg error: {0}")] - Ffmpeg(#[from] ffmpeg::Error), - #[error("no decoder for codec id {0:?}")] - NoCodec(ffmpeg::codec::Id), - #[error("hardware device init failed for {backend:?}: {source}")] - HwDeviceInitFailed { backend: Backend, source: ffmpeg::Error }, - #[error("all backends failed; attempts: {attempts:?}")] - AllBackendsFailed { attempts: Vec<(Backend, ffmpeg::Error)> }, -} -``` +> The original spec listed an inline API surface here. It diverged from the +> shipping crate (`Backend::Software` was removed; `format() -> Pixel` was +> removed in favor of `Frame::pix_fmt() -> i32`; the `Frame` wrapper +> replaced `frame::Video`; `Error` gained / dropped variants). Rather than +> keep stale signatures here, the canonical reference is `src/lib.rs` and +> the public docs on each item. See the README for a runnable usage +> example. ## Behavior ### Probe order -| Target | Order tried | +| Target | Order tried (HW only) | | ------------------- | -------------------------------------------- | -| macOS, iOS, tvOS | `[VideoToolbox, Software]` | -| Linux | `[Vaapi, Cuda, Software]` | -| Windows | `[D3d11va, Cuda, Software]` | -| Other | `[Software]` | +| macOS, iOS, tvOS | `[VideoToolbox]` | +| Linux | `[Vaapi, Cuda]` | +| Windows | `[D3d11va, Cuda]` | +| Other | `[]` → `Error::AllBackendsFailed` | -A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next. +A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next. If every backend fails (or the platform has none), `open` returns `Error::AllBackendsFailed`; software fallback is the caller's responsibility. ### Device selection diff --git a/src/decoder.rs b/src/decoder.rs index 165f6d8..e146a18 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -8,10 +8,9 @@ use ffmpeg_next::{ }, ffi::{ av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, - av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_make_writable, av_packet_ref, - avcodec_alloc_context3, avcodec_free_context, avcodec_parameters_alloc, - avcodec_parameters_copy, avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, - AVCodec, AVMediaType, + av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3, + avcodec_free_context, avcodec_parameters_alloc, avcodec_parameters_copy, + avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, AVCodec, AVMediaType, }, frame, Codec, Packet, Rational, }; @@ -114,10 +113,12 @@ const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024; /// during probe replay. Each frame is a fully-allocated CPU buffer /// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so /// an unbounded queue would OOM on a candidate with a shallow internal -/// queue against a deep replay history. Drained candidate frames in -/// excess of this cap (or [`DEFAULT_MAX_PROBE_PENDING_BYTES`], whichever -/// hits first) are discarded with a `tracing::warn!`; we still drain so -/// `send_packet` can keep feeding the candidate. +/// queue against a deep replay history. This cap, together with +/// [`DEFAULT_MAX_PROBE_PENDING_BYTES`], is enforced as a hard limit during +/// replay: once either limit is reached, probe buffering fails for the +/// candidate (returns `ENOMEM` from `drain_into_pending`) instead of +/// queueing additional drained frames. The probe loop then advances to +/// the next backend or returns `Error::AllBackendsFailed` if exhausted. const MAX_PROBE_PENDING_FRAMES: usize = 16; /// Default byte budget for probe-replay drained frames. 256 MiB is enough @@ -991,7 +992,10 @@ fn try_clone_parameters( /// calls `av_packet_ref` and ignores the int return value; on `ENOMEM` /// the destination is left empty while the caller assumes the clone /// succeeded — corrupting any later replay history. This helper surfaces -/// the AVERROR. +/// the AVERROR. The result is a refcounted shallow clone — the payload +/// buffer is shared with `src` rather than deep-copied; the probe replay +/// only sends packets through `avcodec_send_packet`, which does not +/// require a writable buffer. fn try_clone_packet(src: &Packet) -> std::result::Result { let mut dst = Packet::empty(); // SAFETY: dst is a freshly zero-initialized Packet (av_init_packet inside @@ -1001,12 +1005,6 @@ fn try_clone_packet(src: &Packet) -> std::result::Result Date: Mon, 27 Apr 2026 10:58:42 +1200 Subject: [PATCH 18/27] update --- README.md | 16 ++- src/decoder.rs | 177 +++++++++++++++++------- src/frame.rs | 362 +++++++++++++++++++++++++++++++++++++++++-------- src/lib.rs | 10 +- 4 files changed, 456 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 05e7c56..3da5fba 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,17 @@ Cross-platform hardware-accelerated video decoder for Rust, built on top of `VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of `ffmpeg::decoder::Video` and auto-probes the host's hardware backends. This crate is **hardware-only** — there is no software fallback inside it. -If no hardware backend can decode the stream, `VideoDecoder::open` returns -`Error::AllBackendsFailed` and the caller decides how to fall back (typically -by opening an `ffmpeg::decoder::Video` directly). Output frames are CPU-side, -downloaded with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit). -Pixel-format conversion is intentionally out of scope. +If no hardware backend can decode the stream, `Error::AllBackendsFailed` +surfaces from `VideoDecoder::open` (when no backend opens) or from +`receive_frame` / `send_packet` / `send_eof` (when the initially-opened +backend fails at decode time and every remaining backend in the probe order +also fails — the only way it surfaces on single-backend platforms like macOS). +The caller decides how to fall back (typically by opening an +`ffmpeg::decoder::Video` directly). Output frames are CPU-side, downloaded +with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit). Pixel- +format conversion is intentionally out of scope; safe per-row access is via +`Frame::row` / `Frame::rows` (clipped to visible byte width — never includes +FFmpeg's per-row alignment padding). ## Backends diff --git a/src/decoder.rs b/src/decoder.rs index e146a18..2fe4f08 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -142,7 +142,9 @@ struct ProbeState { parameters: codec::Parameters, codec: Codec, /// Backends still to try, in order. Empty means "no more options after - /// the active one fails". + /// the active one fails" — `advance_probe` then surfaces + /// [`Error::AllBackendsFailed`] so the contract is the same on + /// single-backend platforms (e.g. macOS) as on multi-backend ones. remaining_backends: Vec, /// Packets sent so far, kept for replay through any candidate backend. /// Preserved across failed candidates — only cleared when the probe @@ -154,6 +156,12 @@ struct ProbeState { buffered_bytes: usize, /// Whether `send_eof` has been called; replayed alongside packets. eof_sent: bool, + /// Per-backend errors captured since the probe window opened. Pushed + /// whenever a backend's failure triggers `advance_probe` (the active + /// backend that just failed) or a candidate's build / replay rejects + /// it. Drained into [`Error::AllBackendsFailed`] when the probe + /// exhausts every option. + attempts: Vec<(Backend, Box)>, } // SAFETY: All raw pointers are exclusively owned by `DecoderState` and never @@ -197,9 +205,21 @@ impl VideoDecoder { /// consumed, so a misbehaving middle backend cannot strand the caller. /// /// [`Self::backend`] reflects whichever backend ultimately produced the - /// first frame. If no hardware backend in the platform's probe order can - /// decode this stream, `open` returns [`Error::AllBackendsFailed`]; - /// callers handle software fallback themselves. + /// first frame. + /// + /// [`Error::AllBackendsFailed`] surfaces in two places, with the same + /// meaning ("no hardware backend can decode this stream — fall back to + /// software yourself"): + /// - From `open` itself, when no backend even opens. + /// - From [`Self::send_packet`] / [`Self::send_eof`] / + /// [`Self::receive_frame`], when the initially-opened backend fails + /// at decode time and every remaining backend in the probe order + /// either also fails or doesn't exist. On single-backend platforms + /// (e.g. macOS, where the order is `[VideoToolbox]`), this is the + /// only place a HW-only failure surfaces. + /// + /// In both cases, `attempts` carries the per-backend error log so the + /// caller can decide how to proceed with software fallback. pub fn open(parameters: codec::Parameters) -> Result { let codec = find_decoder(¶meters)?; let order = backend::probe_order(); @@ -229,28 +249,33 @@ impl VideoDecoder { // Rc on the original thread. The checked clone copies the bytes // into a fresh allocation with `owner: None`, severing the link. // + // We always create ProbeState — even when `remaining` is empty + // (single-backend platforms like macOS) — so that a first-frame + // failure on the only backend surfaces as + // `Error::AllBackendsFailed` from `receive_frame` / + // `send_packet` rather than as a raw FFmpeg error. That keeps + // the API contract the same regardless of how many HW backends + // the platform exposes. + // // If the clone fails (ENOMEM), we keep the active `state` but - // skip probe setup — caller loses cross-backend fallback safety - // net but still gets a working decoder. - let probe = if remaining.is_empty() { - None - } else { - match try_clone_parameters(¶meters) { - Ok(probe_params) => Some(ProbeState { - parameters: probe_params, - codec, - remaining_backends: remaining, - buffered_packets: Vec::new(), - buffered_bytes: 0, - eof_sent: false, - }), - Err(e) => { - tracing::warn!( - error = %e, - "hwdecode: parameters clone failed for probe state; proceeding without fallback" - ); - None - } + // skip probe setup — caller loses the transactional probe / + // fallback safety net but still gets a working decoder. + let probe = match try_clone_parameters(¶meters) { + Ok(probe_params) => Some(ProbeState { + parameters: probe_params, + codec, + remaining_backends: remaining, + buffered_packets: Vec::new(), + buffered_bytes: 0, + eof_sent: false, + attempts: Vec::new(), + }), + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: parameters clone failed for probe state; proceeding without fallback" + ); + None } }; return Ok(Self { @@ -403,7 +428,10 @@ impl VideoDecoder { return Err(Error::Ffmpeg(e)); } Err(e) => { - if self.probe.is_some() && self.advance_probe()? { + if self.probe.is_some() { + // advance_probe consumes the error into `attempts` and either + // installs a candidate (Ok) or surfaces AllBackendsFailed (Err). + self.advance_probe(Error::Ffmpeg(e))?; continue; } return Err(Error::Ffmpeg(e)); @@ -428,7 +456,8 @@ impl VideoDecoder { } Err(e) if is_transient(&e) => return Err(Error::Ffmpeg(e)), Err(e) => { - if self.probe.is_some() && self.advance_probe()? { + if self.probe.is_some() { + self.advance_probe(Error::Ffmpeg(e))?; continue; } return Err(Error::Ffmpeg(e)); @@ -453,8 +482,11 @@ impl VideoDecoder { /// method, so the caller never loses initial frames after a fallback. /// /// This crate is hardware-only: there is no software fallback inside the - /// decoder. If every backend is exhausted, the failure surfaces as the - /// last decoder error. Callers handle software fallback themselves. + /// decoder. When every backend in the probe order has been exhausted — + /// including the case of a single-backend platform whose only backend + /// failed — this returns [`Error::AllBackendsFailed`] with the per- + /// backend attempt log so the caller can branch into a software + /// decoder of their choice. /// /// Returns the same transient signals as `ffmpeg::decoder::Video`: /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and @@ -478,8 +510,11 @@ impl VideoDecoder { // EOF (and every other non-transient error): if we are still // probing, treat it as candidate failure — a backend that drains // to EOF without ever producing a frame should not silently - // present as "stream over" to the caller. Advance and retry. - if self.probe.is_some() && self.advance_probe()? { + // present as "stream over" to the caller. Advance and retry; if + // every backend has been exhausted, advance_probe surfaces + // AllBackendsFailed and `?` propagates it. + if self.probe.is_some() { + self.advance_probe(Error::Ffmpeg(e))?; // Probe advance may have populated `pending_frames`; deliver // one of those before reading more from the new candidate. if self.try_pop_pending(frame) { @@ -487,7 +522,7 @@ impl VideoDecoder { } continue; } - // Probe collapsed or exhausted — surface the error (including EOF + // Probe collapsed already — surface the error (including EOF // for a genuinely empty stream). return Err(Error::Ffmpeg(e)); } @@ -504,7 +539,8 @@ impl VideoDecoder { return Ok(()); } Err(e) => { - if self.probe.is_some() && self.advance_probe()? { + if self.probe.is_some() { + self.advance_probe(Error::Ffmpeg(e))?; unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) }; if self.try_pop_pending(frame) { return Ok(()); @@ -561,14 +597,39 @@ impl VideoDecoder { /// Try the next backend in `remaining_backends`. Transactional: a /// candidate must successfully build and accept the replayed history /// before any probe state is consumed. Backends that fail to build or - /// reject the replay are skipped (with `tracing::warn!`) and the loop - /// continues to the next one. Returns: - /// - `Ok(true)` when a candidate is installed and replay completed. - /// - `Ok(false)` when the probe is exhausted (no more backends to try). - /// - `Err(_)` only for genuinely fatal conditions surfaced by `build_state` - /// on the very first inspection (e.g. a malformed `Parameters`); the - /// per-candidate failures during the loop are absorbed and logged. - fn advance_probe(&mut self) -> Result { + /// reject the replay are recorded into `probe.attempts` and the loop + /// continues to the next one. + /// + /// `last_error` is the error that triggered this advance — i.e. the + /// failure of the currently active backend on `send_packet` / + /// `send_eof` / `receive_frame`. It is recorded against the active + /// backend before any candidate is tried so that a final + /// `AllBackendsFailed` carries the full attempt log including the + /// initially-opened backend's runtime failure. + /// + /// Returns: + /// - `Ok(())` when a candidate is installed and replay completed — + /// caller should retry the operation. + /// - `Err(Error::AllBackendsFailed { attempts })` when every remaining + /// backend has been exhausted (including the just-failed active one). + /// This is what the documented `open` contract promises, surfaced at + /// runtime so the caller can branch into a software fallback. On a + /// single-backend platform (e.g. macOS), this fires after the only + /// backend's first-frame failure; on multi-backend platforms it + /// fires after the last candidate's failure. + /// - `Err(_)` for other fatal conditions surfaced by probe machinery + /// itself (e.g. `alloc_av_frame` ENOMEM during replay drain). + fn advance_probe(&mut self, last_error: Error) -> Result<()> { + // Record the failure that triggered this advance against the active + // backend. If the probe was somehow already gone (shouldn't happen — + // call sites guard with `self.probe.is_some()`), just propagate the + // error so behaviour matches the pre-fix code path. + let active_backend = self.state.backend; + match self.probe.as_mut() { + Some(probe) => probe.attempts.push((active_backend, Box::new(last_error))), + None => return Err(last_error), + } + // Drop frames previously queued from the backend we're now abandoning. // They came from a candidate that just failed for cause and cannot be // trusted alongside frames we may queue from the next candidate. (If @@ -589,25 +650,37 @@ impl VideoDecoder { error = %e, "hwdecode: parameters clone failed during probe advance; popping backend and trying next" ); - self + let popped = self .probe .as_mut() .expect("probe state present") .remaining_backends .remove(0); + self + .probe + .as_mut() + .expect("probe state present") + .attempts + .push((popped, Box::new(Error::Ffmpeg(e)))); continue; } }; (probe.remaining_backends[0], parameters, probe.codec) } - _ => return Ok(false), + // No more candidates — surface the accumulated attempt log as + // AllBackendsFailed so single- and multi-backend platforms have + // the same contract for "every HW backend failed." + _ => { + let attempts = self.probe.take().map(|p| p.attempts).unwrap_or_default(); + return Err(Error::AllBackendsFailed { attempts }); + } }; let prev_backend = self.state.backend; tracing::warn!(from = ?prev_backend, to = ?next_backend, "hwdecode: advancing probe"); - // Build candidate. On failure, pop and continue without touching the - // packet buffer. + // Build candidate. On failure, record into attempts and continue + // without touching the packet buffer. let mut candidate_state = match Self::build_state(parameters, codec, next_backend) { Ok(s) => s, Err(e) => { @@ -618,6 +691,12 @@ impl VideoDecoder { .expect("probe state present") .remaining_backends .remove(0); + self + .probe + .as_mut() + .expect("probe state present") + .attempts + .push((next_backend, Box::new(e))); continue; } }; @@ -713,6 +792,12 @@ impl VideoDecoder { .expect("probe state present") .remaining_backends .remove(0); + self + .probe + .as_mut() + .expect("probe state present") + .attempts + .push((next_backend, Box::new(Error::Ffmpeg(e)))); continue; } @@ -727,7 +812,7 @@ impl VideoDecoder { .expect("probe state present") .remaining_backends .remove(0); - return Ok(true); + return Ok(()); } } diff --git a/src/frame.rs b/src/frame.rs index 65ec63d..3f48075 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -8,11 +8,24 @@ //! when the value isn't in the build's bindgen-generated discriminant set //! (the exact failure mode this crate is designed to survive). //! -//! Plane lengths for [`Frame::data`] are computed from a hardcoded chroma- -//! subsampling table keyed on the safe `pix_fmt()` integer, covering only -//! the formats `hwdecode` produces (the NV* and P0xx/P2xx/P4xx families -//! after `av_hwframe_transfer_data`). For any other format, [`Frame::data`] -//! returns `None` rather than guessing at a slice length. +//! Per-row sizes for [`Frame::row`] / [`Frame::rows`] are computed from +//! hardcoded chroma-subsampling and bit-depth tables keyed on the safe +//! `pix_fmt()` integer, covering only the formats `hwdecode` produces (the +//! NV* and P0xx/P2xx/P4xx families after `av_hwframe_transfer_data`). For +//! any other format, the row accessors return `None` rather than guessing +//! at a slice length. +//! +//! Why per-row, not whole-plane: FFmpeg allocates each row at +//! `linesize[plane]` ([`Frame::stride`]) bytes for SIMD alignment, but +//! hardware transfer paths only initialize the first +//! [`Frame::row_bytes`]`(plane)` of every row. Exposing a stride-inclusive +//! `&[u8]` over an entire plane would let safe code observe those +//! uninitialized padding bytes, which violates `slice::from_raw_parts`. +//! Per-row slices are tightly clipped to the visible byte width so the +//! safe API never hands out an uninitialized byte. Callers that need a +//! single base pointer (e.g. SIMD pixel converters keyed off stride) can +//! reach for [`Frame::as_ptr`] and consume `stride * plane_h` bytes +//! themselves under their own `unsafe` contract. //! //! Compare formats against integer constants in [`crate::pix_fmt`]. @@ -114,59 +127,138 @@ impl Frame { linesize as usize } - /// Pixel data for `plane`. + /// Visible byte width of `plane` — the number of initialized bytes at + /// the start of every row in that plane. + /// + /// Distinct from [`Self::stride`], which returns the FFmpeg `linesize`. + /// `linesize` is `>= row_bytes` and may include trailing alignment + /// padding bytes that FFmpeg's hardware transfer paths do not + /// initialize. `row_bytes` is what `slice::from_raw_parts` can safely + /// see. + /// + /// Returns `None` when the format is not in the supported HW-output set + /// (see crate `pix_fmt`) or the plane is out of range. + pub fn row_bytes(&self, plane: usize) -> Option { + if plane >= self.planes() { + return None; + } + plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize) + } + + /// Pixel data for one row of `plane`, tightly clipped to the visible + /// byte width ([`Self::row_bytes`]). + /// + /// Excludes the trailing alignment padding that [`Self::stride`] + /// includes — those bytes are not guaranteed to be initialized by + /// FFmpeg's hardware transfer paths and must not be exposed through a + /// safe `&[u8]`. /// /// Returns `None` for any of the following — never panics: - /// - The frame's pixel format is not one of the hardware-output formats - /// listed in [`crate::pix_fmt`] (we cannot safely compute the plane - /// size for an unknown layout). + /// - The frame's pixel format is not one of the supported hardware- + /// output formats listed in [`crate::pix_fmt`]. /// - The plane index is out of range. - /// - `AVFrame.linesize[plane]` is `<= 0` (negative linesize signals - /// vertically-flipped FFmpeg layouts which we do not surface; zero is - /// "no plane"). - /// - `AVFrame.height` is `<= 0`. - /// - The computed slice length would overflow or exceed `isize::MAX` - /// (a precondition of [`std::slice::from_raw_parts`]). + /// - `y` is past the plane's row count. + /// - `AVFrame.linesize[plane]` is `<= 0` or `AVFrame.height` is `<= 0`. /// - The plane's data pointer is null. + /// - The plane size would overflow `isize::MAX`. + pub fn row(&self, plane: usize, y: usize) -> Option<&[u8]> { + let info = self.plane_info(plane)?; + if y >= info.plane_h { + return None; + } + // y < plane_h and plane_h * stride ≤ isize::MAX (verified in plane_info), + // so y * stride is bounded by (plane_h - 1) * stride ≤ isize::MAX. + let offset = y * info.stride; + // SAFETY: + // - `info.plane_ptr` is non-null (verified in plane_info). + // - `offset + row_bytes ≤ plane_h * stride`, which is the size of the + // FFmpeg allocation for this plane. + // - Bytes 0..row_bytes of every row are written by FFmpeg's HW + // transfer; the slice is fully initialized. + // - `row_bytes ≤ stride ≤ isize::MAX` per plane_info. + unsafe { + let row_ptr = info.plane_ptr.add(offset); + Some(slice::from_raw_parts(row_ptr, info.row_bytes)) + } + } + + /// Iterator over every row of `plane`. Each yielded slice has length + /// [`Self::row_bytes`]`(plane)` — never includes the trailing alignment + /// padding that lives within [`Self::stride`]. + /// + /// Returns `None` under the same conditions as [`Self::row`]. + pub fn rows(&self, plane: usize) -> Option + '_> { + let info = self.plane_info(plane)?; + Some((0..info.plane_h).map(move |y| { + // Same bounds argument as `row()`. + let offset = y * info.stride; + // SAFETY: see `row()` — the same invariants hold here, and the + // iterator's lifetime is tied to `&self` so the pointer remains + // valid for every yielded slice. + unsafe { slice::from_raw_parts(info.plane_ptr.add(offset), info.row_bytes) } + })) + } + + /// Raw base pointer to `plane`'s allocation, or `None` if the plane is + /// out of range or its data pointer is null. + /// + /// The returned pointer is valid for `stride(plane) * plane_height` + /// bytes, **but only the first [`Self::row_bytes`]`(plane)` bytes of + /// each row are guaranteed to be initialized.** The trailing per-row + /// alignment padding is uninitialized; callers performing wide SIMD + /// loads that read past `row_bytes` must mask the result and never + /// surface those bytes through a safe `&[u8]`. /// - /// Currently supported (post-`av_hwframe_transfer_data`): - /// - 4:2:0 semi-planar 8-bit: `NV12`, `NV21` - /// - 4:2:2 semi-planar 8-bit: `NV16` - /// - 4:4:4 semi-planar 8-bit: `NV24` - /// - 4:2:0 semi-planar 10/12/16-bit: `P010LE`/`P010BE`/`P012LE`/`P016LE` - /// - 4:2:2 semi-planar 10/12/16-bit: `P210LE`/`P212LE`/`P216LE` - /// - 4:4:4 semi-planar 10/12/16-bit: `P410LE`/`P412LE`/`P416LE` - pub fn data(&self, plane: usize) -> Option<&[u8]> { + /// This accessor exists for downstream pixel-format converters + /// (`colconv`) that work in `(ptr, stride, width, height)` quadruples; + /// safe code should prefer [`Self::row`] / [`Self::rows`]. + pub fn as_ptr(&self, plane: usize) -> Option<*const u8> { if plane >= self.planes() { return None; } + // SAFETY: plane index bounds-checked; AVFrame.data is `[*mut u8; 8]`. + let p = unsafe { (*self.inner.as_ptr()).data[plane] }; + if p.is_null() { + None + } else { + Some(p) + } + } - // SAFETY: bounds-checked plane index; `linesize` and `height` are - // primitive c_int reads that cannot themselves be UB. - let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] }; - let height_int: i32 = unsafe { (*self.inner.as_ptr()).height }; - if linesize <= 0 || height_int <= 0 { + /// Read every per-plane field needed by the row accessors with the + /// safety preconditions enforced once. + fn plane_info(&self, plane: usize) -> Option { + if plane >= self.planes() { return None; } - let stride = linesize as usize; - - let plane_height = plane_height_for(self.pix_fmt(), plane, height_int as usize)?; - let len = stride.checked_mul(plane_height)?; - if len > isize::MAX as usize { + // SAFETY: bounds-checked plane index; linesize/height/data are raw + // c_int / pointer reads that cannot themselves be UB. + let (stride_int, height_int, plane_ptr) = unsafe { + let raw = self.inner.as_ptr(); + ((*raw).linesize[plane], (*raw).height, (*raw).data[plane]) + }; + if stride_int <= 0 || height_int <= 0 || plane_ptr.is_null() { return None; } - - // SAFETY: linesize > 0 and height > 0 verified; len <= isize::MAX - // verified — both preconditions of `slice::from_raw_parts`. We trust - // FFmpeg to populate `data[plane]` validly when linesize[plane] is - // non-zero; the null check is a final defensive guard. - unsafe { - let ptr = (*self.inner.as_ptr()).data[plane]; - if ptr.is_null() { - return None; - } - Some(slice::from_raw_parts(ptr, len)) + let stride = stride_int as usize; + let plane_h = plane_height_for(self.pix_fmt(), plane, height_int as usize)?; + let row_bytes = plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize)?; + if row_bytes > stride { + return None; } + // Bound the entire plane allocation to isize::MAX so any byte offset + // computed as `y * stride` (y < plane_h) stays representable, satisfying + // the safety contract of `pointer::add` and `slice::from_raw_parts`. + let plane_size = stride.checked_mul(plane_h)?; + if plane_size > isize::MAX as usize { + return None; + } + Some(PlaneInfo { + plane_ptr, + stride, + plane_h, + row_bytes, + }) } /// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code. @@ -175,10 +267,68 @@ impl Frame { } } +#[derive(Clone, Copy)] +struct PlaneInfo { + plane_ptr: *const u8, + stride: usize, + plane_h: usize, + row_bytes: usize, +} + // `Default` intentionally omitted: constructing a frame can fail (OOM // in `av_frame_alloc`), and a panicking `default()` would defeat the // safety stance of [`Frame::empty`]. Use `Frame::empty()?` directly. +/// Visible byte width of `plane`'s rows for a frame of `frame_width` and +/// the given pixel format. `None` for formats not in the supported HW- +/// output set. +/// +/// Distinct from `linesize` (FFmpeg's per-row stride, which may include +/// alignment padding). HW transfer paths only initialize bytes +/// `0..plane_row_bytes_for(...)` of each row; everything from there to +/// `stride` is uninitialized padding and must not be exposed via +/// `slice::from_raw_parts`. +fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Option { + match pix_fmt_int { + // 8-bit semi-planar: Y at full width (1 byte/sample), UV interleaved + // at horizontally-subsampled chroma (4:2:0 / 4:2:2) with 2 bytes per + // chroma pair → both planes have row width == frame_width. + pix_fmt::NV12 | pix_fmt::NV21 | pix_fmt::NV16 => match plane { + 0 | 1 => Some(frame_width), + _ => None, + }, + // 8-bit 4:4:4 semi-planar: chroma at full horizontal resolution, + // 2 bytes per pixel (1 byte U + 1 byte V). + pix_fmt::NV24 => match plane { + 0 => Some(frame_width), + 1 => Some(frame_width.checked_mul(2)?), + _ => None, + }, + // 10/12/16-bit semi-planar 4:2:0 / 4:2:2: Y is 2 bytes/sample + // (high-bit-depth packed in 16-bit). UV interleaved at horizontally- + // subsampled chroma with 4 bytes per chroma pair (2 bytes U + 2 bytes + // V) → both planes have row width == 2 * frame_width. + pix_fmt::P010LE + | pix_fmt::P010BE + | pix_fmt::P012LE + | pix_fmt::P016LE + | pix_fmt::P210LE + | pix_fmt::P212LE + | pix_fmt::P216LE => match plane { + 0 | 1 => Some(frame_width.checked_mul(2)?), + _ => None, + }, + // 10/12/16-bit 4:4:4 semi-planar: Y is 2 bytes/sample; UV at full + // horizontal resolution with 4 bytes per pixel (2 bytes U + 2 bytes V). + pix_fmt::P410LE | pix_fmt::P412LE | pix_fmt::P416LE => match plane { + 0 => Some(frame_width.checked_mul(2)?), + 1 => Some(frame_width.checked_mul(4)?), + _ => None, + }, + _ => None, + } +} + /// Number of rows in `plane` for a frame of `frame_height` and the given /// pixel format. `None` for formats not in the supported HW-output set. /// @@ -235,18 +385,20 @@ mod tests { } #[test] - fn data_returns_none_for_unknown_format() { + fn row_returns_none_for_unknown_format() { let f = Frame::empty().expect("alloc"); // pix_fmt is NONE (-1), not in the supported set. - assert!(f.data(0).is_none()); + assert!(f.row(0, 0).is_none()); + assert!(f.rows(0).is_none()); + assert!(f.row_bytes(0).is_none()); } /// Synthesize a frame with a negative linesize (FFmpeg's vertical-flip - /// convention) and assert `data()` refuses to construct a slice. Without - /// the linesize > 0 check, the negative `i32 as usize` would produce a - /// huge positive length and `from_raw_parts` would be UB. + /// convention) and assert the row accessors refuse to construct a slice. + /// Without the linesize > 0 check, the negative `i32 as usize` would + /// produce a huge positive length and `from_raw_parts` would be UB. #[test] - fn data_returns_none_for_negative_linesize() { + fn row_returns_none_for_negative_linesize() { let mut f = Frame::empty().expect("alloc"); unsafe { let raw = f.inner.as_mut_ptr(); @@ -255,15 +407,16 @@ mod tests { (*raw).height = 1080; (*raw).linesize[0] = -1920; // vertically-flipped (*raw).linesize[1] = -1920; - // data pointers stay null; `data()` would return None on the null - // check anyway, but should bail earlier on the linesize sign. + // data pointers stay null; the accessors would also reject on null, + // but should bail earlier on the linesize sign. } - assert!(f.data(0).is_none()); - assert!(f.data(1).is_none()); + assert!(f.row(0, 0).is_none()); + assert!(f.row(1, 0).is_none()); + assert!(f.rows(0).is_none()); } #[test] - fn data_returns_none_for_non_positive_height() { + fn row_returns_none_for_non_positive_height() { let mut f = Frame::empty().expect("alloc"); unsafe { let raw = f.inner.as_mut_ptr(); @@ -273,7 +426,81 @@ mod tests { (*raw).linesize[0] = 1920; (*raw).linesize[1] = 1920; } - assert!(f.data(0).is_none()); + assert!(f.row(0, 0).is_none()); + } + + /// Synthesize a frame backed by a manually-allocated buffer with stride + /// strictly larger than visible row bytes (the exact case where + /// FFmpeg's HW transfer leaves trailing padding uninitialized) and + /// confirm the safe row accessor returns slices clipped to the visible + /// width. + #[test] + fn row_clips_to_visible_width_not_stride() { + use std::alloc::{alloc, dealloc, Layout}; + let width = 64usize; + let height = 4usize; + // Stride > width: 16 bytes of padding per row in the Y plane. + let stride = 80usize; + let plane_size = stride * height; + // Allocate ourselves so we can fully control initialization. Fill + // bytes 0..width with 0xAA per row (the "valid pixel" range) and + // bytes width..stride with 0xFF (the simulated alignment padding — + // FFmpeg would leave these uninitialized; we set them to a sentinel + // that the test can detect if the safe slice ever exposes them). + let layout = Layout::from_size_align(plane_size, 32).unwrap(); + let buf = unsafe { alloc(layout) }; + assert!(!buf.is_null()); + for y in 0..height { + let row = unsafe { buf.add(y * stride) }; + for x in 0..width { + unsafe { *row.add(x) = 0xAA }; + } + for x in width..stride { + unsafe { *row.add(x) = 0xFF }; + } + } + + let mut f = Frame::empty().expect("alloc"); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).format = pix_fmt::NV12; + (*raw).width = width as i32; + (*raw).height = height as i32; + (*raw).linesize[0] = stride as i32; + // linesize[1] = 0 keeps planes() at 1 so the test stays focused on + // plane 0 without owning a second allocation. + (*raw).data[0] = buf; + } + + assert_eq!(f.row_bytes(0), Some(width)); + assert_eq!(f.stride(0), stride); + let row0 = f.row(0, 0).expect("row 0"); + assert_eq!( + row0.len(), + width, + "safe row must be clipped to visible width" + ); + assert!( + row0.iter().all(|&b| b == 0xAA), + "row must not include padding sentinel 0xFF" + ); + + let collected: Vec<&[u8]> = f.rows(0).expect("rows iterator").collect(); + assert_eq!(collected.len(), height); + for r in &collected { + assert_eq!(r.len(), width); + assert!(r.iter().all(|&b| b == 0xAA)); + } + + // Out-of-range row index returns None instead of panicking. + assert!(f.row(0, height).is_none()); + + // Detach the buffer before drop so AVFrame's own free path doesn't + // touch our manual allocation. + unsafe { + (*f.inner.as_mut_ptr()).data[0] = std::ptr::null_mut(); + dealloc(buf, layout); + } } #[test] @@ -306,4 +533,27 @@ mod tests { assert_eq!(plane_height_for(pix_fmt::NONE, 0, 1080), None); assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None); } + + #[test] + fn plane_row_bytes_table_covers_supported_formats() { + // 8-bit 4:2:0 / 4:2:2 — both planes at width. + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1920), Some(1920)); + // 8-bit 4:4:4 — chroma plane is 2 * width. + assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 0, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1920), Some(3840)); + // 10/12/16-bit 4:2:0 / 4:2:2 — both planes at 2 * width. + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1920), Some(3840)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840)); + assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1920), Some(3840)); + // 10/12/16-bit 4:4:4 — Y is 2 * width, chroma is 4 * width. + assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 0, 1920), Some(3840)); + assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1920), Some(7680)); + assert_eq!(plane_row_bytes_for(pix_fmt::P416LE, 1, 1920), Some(7680)); + // Unsupported / out-of-range. + assert_eq!(plane_row_bytes_for(pix_fmt::NONE, 0, 1920), None); + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 2, 1920), None); + } } diff --git a/src/lib.rs b/src/lib.rs index b487132..3654016 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,8 +4,14 @@ //! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and auto-probes the //! host's hardware backends (VideoToolbox / VAAPI / NVDEC / D3D11VA). //! There is **no software fallback inside this crate** — if no hardware -//! backend can decode the stream, [`VideoDecoder::open`] returns -//! [`Error::AllBackendsFailed`] and the caller picks how to fall back +//! backend can decode the stream, [`Error::AllBackendsFailed`] surfaces +//! either from [`VideoDecoder::open`] (when no backend even opens) or +//! from [`VideoDecoder::receive_frame`] / [`VideoDecoder::send_packet`] / +//! [`VideoDecoder::send_eof`] (when the initially-opened backend or any +//! later candidate fails at decode time and the probe order is +//! exhausted). On single-backend platforms (e.g. macOS, where the order +//! is `[VideoToolbox]`), only the runtime path can return it. The +//! caller picks how to fall back to a software decoder of their choice //! (e.g. by opening an `ffmpeg::decoder::Video` directly). //! //! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side From b61c76a3badbf1a8458a0d392fdcb32f3ad3bf38 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:25:54 +1200 Subject: [PATCH 19/27] update --- src/decoder.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++++++- src/frame.rs | 55 ++++++++++++++++++++++++++++----- 2 files changed, 130 insertions(+), 9 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 2fe4f08..87fe847 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -387,7 +387,12 @@ impl VideoDecoder { match self.state.inner.send_packet(packet) { Ok(()) => { if let Some(probe) = self.probe.as_mut() { - let pkt_size = packet.size(); + // `try_clone_packet` calls `av_packet_ref`, which deep-copies + // side data via `av_packet_copy_props`. The probe budget must + // include side-data bytes or a malicious stream can keep + // `packet.size()` tiny while attaching megabytes of side data + // per packet and inflate retention beyond the advertised cap. + let pkt_size = packet.size().saturating_add(packet_side_data_bytes(packet)); let new_count = probe.buffered_packets.len() + 1; let new_bytes = probe.buffered_bytes.saturating_add(pkt_size); if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES { @@ -1093,6 +1098,38 @@ fn try_clone_packet(src: &Packet) -> std::result::Result usize { + // SAFETY: AVPacket.side_data is `*mut AVPacketSideData` and + // side_data_elems is `c_int`; both are raw struct fields safe to read. + // Field projection (`.size`) does not reconstruct the enum-typed `type_` + // field, so the bindgen-enum UB hazard does not apply here. + unsafe { + let raw = packet.as_ptr(); + let nel = (*raw).side_data_elems; + let arr = (*raw).side_data; + if arr.is_null() || nel <= 0 { + return 0; + } + let mut total: usize = 0; + for i in 0..(nel as usize) { + let entry = arr.add(i); + total = total.saturating_add((*entry).size); + } + total + } +} + /// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine /// distinguishes "drain output and retry" from "stream over"). fn is_eagain(e: &ffmpeg_next::Error) -> bool { @@ -1324,4 +1361,49 @@ mod tests { Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"), } } + + /// `try_clone_packet` calls `av_packet_ref`, which deep-copies side + /// data via `av_packet_copy_props`. The probe budget therefore has to + /// include side-data bytes — otherwise a stream with a 16-byte payload + /// and a 1 MiB side-data attachment would only consume 16 bytes of the + /// 64 MiB budget per packet, and 256 buffered clones would retain + /// ~256 MiB of side data while logs claim a few KiB. + #[test] + fn packet_side_data_counts_against_probe_budget() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + const PAYLOAD_SIZE: usize = 16; + const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB + + let mut packet = Packet::new(PAYLOAD_SIZE); + // SAFETY: packet is a freshly allocated AVPacket; av_packet_new_side_data + // attaches a fresh `SIDE_DATA_SIZE`-byte buffer of the requested type + // to it and returns a writable pointer (or NULL on OOM). + let p = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, + SIDE_DATA_SIZE, + ) + }; + assert!(!p.is_null(), "av_packet_new_side_data returned NULL"); + + assert_eq!(packet.size(), PAYLOAD_SIZE); + let side = packet_side_data_bytes(&packet); + assert!( + side >= SIDE_DATA_SIZE, + "side-data accounting must include the attached buffer; got {side}" + ); + let total = packet.size().saturating_add(side); + assert!( + total >= PAYLOAD_SIZE + SIDE_DATA_SIZE, + "probe budget must charge payload + side data; got {total}" + ); + } + + #[test] + fn packet_side_data_is_zero_when_no_side_data() { + let packet = Packet::new(64); + assert_eq!(packet_side_data_bytes(&packet), 0); + } } diff --git a/src/frame.rs b/src/frame.rs index 3f48075..9b651af 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -290,15 +290,19 @@ struct PlaneInfo { /// `slice::from_raw_parts`. fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Option { match pix_fmt_int { - // 8-bit semi-planar: Y at full width (1 byte/sample), UV interleaved - // at horizontally-subsampled chroma (4:2:0 / 4:2:2) with 2 bytes per - // chroma pair → both planes have row width == frame_width. + // 8-bit semi-planar 4:2:0 / 4:2:2: Y at full width (1 byte/sample); + // UV interleaved at horizontally-subsampled chroma with `ceil(W/2)` + // U+V pairs at 2 bytes per pair. For even W the chroma row equals + // `W` bytes (the simple case); for odd W it must round *up* to the + // next even byte so the trailing chroma sample is not silently + // dropped on width = 2k+1 frames. pix_fmt::NV12 | pix_fmt::NV21 | pix_fmt::NV16 => match plane { - 0 | 1 => Some(frame_width), + 0 => Some(frame_width), + 1 => Some(frame_width.div_ceil(2).checked_mul(2)?), _ => None, }, // 8-bit 4:4:4 semi-planar: chroma at full horizontal resolution, - // 2 bytes per pixel (1 byte U + 1 byte V). + // 2 bytes per pixel (1 byte U + 1 byte V) — no rounding required. pix_fmt::NV24 => match plane { 0 => Some(frame_width), 1 => Some(frame_width.checked_mul(2)?), @@ -306,8 +310,9 @@ fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Op }, // 10/12/16-bit semi-planar 4:2:0 / 4:2:2: Y is 2 bytes/sample // (high-bit-depth packed in 16-bit). UV interleaved at horizontally- - // subsampled chroma with 4 bytes per chroma pair (2 bytes U + 2 bytes - // V) → both planes have row width == 2 * frame_width. + // subsampled chroma with `ceil(W/2)` U+V pairs at 4 bytes per pair + // (2 bytes U + 2 bytes V). Same odd-width rounding as the 8-bit + // chroma path, scaled by 2 bytes per sample. pix_fmt::P010LE | pix_fmt::P010BE | pix_fmt::P012LE @@ -315,7 +320,8 @@ fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Op | pix_fmt::P210LE | pix_fmt::P212LE | pix_fmt::P216LE => match plane { - 0 | 1 => Some(frame_width.checked_mul(2)?), + 0 => Some(frame_width.checked_mul(2)?), + 1 => Some(frame_width.div_ceil(2).checked_mul(4)?), _ => None, }, // 10/12/16-bit 4:4:4 semi-planar: Y is 2 bytes/sample; UV at full @@ -534,6 +540,39 @@ mod tests { assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None); } + /// 4:2:0 / 4:2:2 chroma planes carry `ceil(W/2)` U+V pairs per row. + /// For odd `W`, dropping the round-up silently truncates the last chroma + /// sample — and the safe row slice would expose a buffer one byte (8-bit) + /// or two bytes (high-bit-depth) shorter than the data FFmpeg actually + /// wrote. Y planes and 4:4:4 chroma planes are unaffected because their + /// row count is just `W` or a fixed multiple of `W`. + #[test] + fn plane_row_bytes_rounds_up_chroma_for_odd_widths() { + // 8-bit subsampled chroma — odd W gains one byte (the missing sample + // pair). + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1921), Some(1922)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1921), Some(1922)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1921), Some(1922)); + // High-bit-depth subsampled chroma — odd W gains two bytes. + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010BE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P012LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P016LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P212LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P216LE, 1, 1921), Some(3844)); + // Y planes always at full width regardless of subsampling. + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1921), Some(1921)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1921), Some(3842)); + // 4:4:4 chroma is at full horizontal resolution — no rounding. + assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1921), Some(3842)); + assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1921), Some(7684)); + // Even widths must still match the original (pre-fix) values so the + // change is purely additive on the dominant code path. + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840)); + } + #[test] fn plane_row_bytes_table_covers_supported_formats() { // 8-bit 4:2:0 / 4:2:2 — both planes at width. From bedf83627acb7874cfe33b1e32cb225b0ab51b91 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:51:14 +1200 Subject: [PATCH 20/27] update --- src/decoder.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 87fe847..b99e3f7 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1268,10 +1268,17 @@ fn drain_into_pending( /// Approximate resident size of a CPU frame: sum of `linesize[plane] * /// plane_height` across populated planes. /// -/// Returns `None` for pixel formats not in our chroma-subsampling table, -/// so the caller can refuse to queue an allocation it can't account for. -/// Returning 0 for unknown formats would silently bypass the byte cap and -/// let an unbounded number of large frames into `pending_frames`. +/// Returns `None` for pixel formats not in our chroma-subsampling table or +/// for frames whose `linesize` is negative — both signal an allocation we +/// cannot account for, so the caller refuses to queue them. Returning 0 +/// in either case would silently bypass the byte cap and let an unbounded +/// number of large frames into `pending_frames`. +/// +/// Distinguishes `linesize == 0` (FFmpeg's sentinel for "no more populated +/// planes" — terminates the scan) from `linesize < 0` (FFmpeg's vertically- +/// flipped layout — `Frame::row` rejects those as unusable, so queueing one +/// during probe replay would only delay the failure to the consumer side +/// while wasting `|linesize| * plane_h` bytes of unaccounted memory). fn cpu_frame_bytes(frame: &frame::Video) -> Option { // SAFETY: AVFrame.height / format / linesize are c_int reads. let (height, pix_fmt, linesizes) = unsafe { @@ -1281,9 +1288,17 @@ fn cpu_frame_bytes(frame: &frame::Video) -> Option { let mut total: usize = 0; let mut any_plane = false; for (plane, linesize) in linesizes.iter().enumerate() { - if *linesize <= 0 { + if *linesize == 0 { + // End of populated planes — FFmpeg zeroes the trailing entries. break; } + if *linesize < 0 { + // Vertically-flipped layout — refuse to size so `drain_into_pending` + // fails the candidate. The same pre-fix code path silently returned + // `Some(0)` for a frame whose first plane was negative, allowing up + // to MAX_PROBE_PENDING_FRAMES frames of unaccounted memory. + return None; + } any_plane = true; let stride = *linesize as usize; // If we can't size *any* populated plane, the format is outside our @@ -1406,4 +1421,63 @@ mod tests { let packet = Packet::new(64); assert_eq!(packet_side_data_bytes(&packet), 0); } + + /// `cpu_frame_bytes` must refuse to size a frame whose first plane has + /// a negative `linesize`. Pre-fix, the loop break treated negative the + /// same as zero (FFmpeg's "no more populated planes" sentinel), so a + /// vertically-flipped frame returned `Some(0)` and `drain_into_pending` + /// would queue it as a 0-byte allocation — letting up to + /// `MAX_PROBE_PENDING_FRAMES` such frames bypass the configured byte + /// budget entirely. + #[test] + fn cpu_frame_bytes_rejects_negative_first_plane_linesize() { + let mut f = frame::Video::empty(); + // SAFETY: f is freshly allocated; we set `format` to NV12 and the + // first plane's linesize negative (FFmpeg's vertical-flip convention). + // No backing data buffer is allocated — cpu_frame_bytes must reject + // before any pointer dereference. + unsafe { + let raw = f.as_mut_ptr(); + (*raw).format = crate::pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = 1080; + (*raw).linesize[0] = -1920; + (*raw).linesize[1] = -1920; + } + assert!( + cpu_frame_bytes(&f).is_none(), + "negative linesize must be unsizeable, not Some(0)" + ); + } + + /// Sanity-check the positive path: a synthesized NV12 frame with valid + /// linesizes must report the sum across populated planes (Y full height + /// + UV half height). + #[test] + fn cpu_frame_bytes_sums_populated_planes() { + let mut f = frame::Video::empty(); + let stride = 1920usize; + let height = 1080usize; + // SAFETY: same scheme as above; we only mutate primitive struct fields. + unsafe { + let raw = f.as_mut_ptr(); + (*raw).format = crate::pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = height as i32; + (*raw).linesize[0] = stride as i32; + (*raw).linesize[1] = stride as i32; + } + let expected = stride * height + stride * (height / 2); + assert_eq!(cpu_frame_bytes(&f), Some(expected)); + } + + /// A frame with only a zero linesize in plane 0 is "no populated + /// planes" — must return `Some(0)`, not `None`. Distinguishes the + /// FFmpeg sentinel from the vertically-flipped layout. + #[test] + fn cpu_frame_bytes_zero_first_plane_returns_zero() { + let f = frame::Video::empty(); + // Default-allocated empty AVFrame already has all linesizes zero. + assert_eq!(cpu_frame_bytes(&f), Some(0)); + } } From 5447670bf89d3b693fdcb856a3376bda12cbabe5 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 12:37:20 +1200 Subject: [PATCH 21/27] update --- src/decoder.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index b99e3f7..1783924 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -404,8 +404,14 @@ impl VideoDecoder { "hwdecode: probe window exceeded caps without first frame; \ abandoning fallback safety net" ); + // Abandon the *future* probe-buffering only. `pending_frames` + // belong to the currently active backend (possibly the + // candidate `advance_probe` committed earlier in this same + // `send_packet` call) and are valid output the caller will + // dequeue via `receive_frame`. Clearing them here would + // silently drop initial frames at exactly the cap-overflow / + // OOM-stress paths. self.probe = None; - self.pending_frames.clear(); } else { // Use the checked clone — ffmpeg-next's `Packet::clone` // discards av_packet_ref's return value and would silently @@ -420,8 +426,10 @@ impl VideoDecoder { error = %e, "hwdecode: packet clone failed for probe history; abandoning fallback safety net" ); + // Same reasoning as the cap-overflow branch above: + // `pending_frames` are owned by the active backend, not + // the probe buffer, so they survive abandonment. self.probe = None; - self.pending_frames.clear(); } } } @@ -1480,4 +1488,89 @@ mod tests { // Default-allocated empty AVFrame already has all linesizes zero. assert_eq!(cpu_frame_bytes(&f), Some(0)); } + + /// Probe-abandon paths in `send_packet` (cap exceeded, packet clone + /// failed) must not drop frames already queued in `pending_frames`. + /// Those frames belong to the currently active backend — possibly a + /// candidate that `advance_probe` just committed earlier in the same + /// `send_packet` call — and are valid output the caller will dequeue + /// via `receive_frame`. + /// + /// Pre-fix, both abandon branches called `pending_frames.clear()` + /// alongside `self.probe = None;`, silently dropping initial frames at + /// exactly the cap-overflow / OOM-stress paths. + /// + /// Live HW required: a real `VideoDecoder` is the only way to construct + /// a valid `DecoderState` (its `Drop` invokes FFmpeg cleanup), and + /// `send_packet` must reach the Ok branch on a real decoder for the + /// cap check to fire. + #[test] + #[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"] + fn cap_overflow_preserves_pending_frames_from_active_backend() { + use ffmpeg_next::{format, media}; + + let path = std::env::var_os("HWDECODE_SAMPLE_VIDEO") + .expect("HWDECODE_SAMPLE_VIDEO must be set for this test"); + + ffmpeg_next::init().expect("ffmpeg init"); + let mut input = format::input(&path).expect("open input"); + let stream_index = input + .streams() + .best(media::Type::Video) + .expect("video stream") + .index(); + let stream_params = input + .streams() + .best(media::Type::Video) + .expect("video stream") + .parameters(); + + let mut decoder = VideoDecoder::open(stream_params).expect("open decoder"); + assert!( + decoder.probe.is_some(), + "probe must be active immediately after open" + ); + + // Inject sentinel frames as if `advance_probe` had drained them from + // a freshly-committed candidate during this same send_packet call. + decoder.pending_frames.push_back(frame::Video::empty()); + decoder.pending_frames.push_back(frame::Video::empty()); + let pending_before = decoder.pending_frames.len(); + + // Fast-forward the probe state to the byte cap so the next successful + // send_packet trips the cap-overflow branch. + decoder + .probe + .as_mut() + .expect("probe present") + .buffered_bytes = MAX_PROBE_PACKET_BYTES; + + // Find the first video packet and feed it. We don't care whether the + // underlying decoder actually accepts it cleanly; we only need to + // exercise the Ok branch's cap-overflow accounting at least once. + let mut hit_ok = false; + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + if decoder.send_packet(&packet).is_ok() { + hit_ok = true; + break; + } + } + assert!( + hit_ok, + "expected at least one send_packet to succeed and trigger the cap-overflow branch" + ); + + assert!( + decoder.probe.is_none(), + "probe must be abandoned after cap overflow" + ); + assert_eq!( + decoder.pending_frames.len(), + pending_before, + "pending_frames belong to the active backend; abandon must not drop them" + ); + } } From 88a84d49aa1b7631c8162a507c82eceeede182ed Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 12:59:22 +1200 Subject: [PATCH 22/27] update --- src/frame.rs | 68 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/src/frame.rs b/src/frame.rs index 9b651af..4642184 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -199,30 +199,42 @@ impl Frame { })) } - /// Raw base pointer to `plane`'s allocation, or `None` if the plane is - /// out of range or its data pointer is null. + /// Raw base pointer to `plane`'s allocation, or `None` if the plane + /// fails the same layout validation [`Self::row`] applies. /// - /// The returned pointer is valid for `stride(plane) * plane_height` - /// bytes, **but only the first [`Self::row_bytes`]`(plane)` bytes of - /// each row are guaranteed to be initialized.** The trailing per-row - /// alignment padding is uninitialized; callers performing wide SIMD - /// loads that read past `row_bytes` must mask the result and never - /// surface those bytes through a safe `&[u8]`. + /// Returns `None` whenever any of the following is true: + /// - The plane index is out of range (`plane >= planes()`). + /// - The frame's pixel format is not in the supported HW-output set. + /// - `linesize[plane] <= 0`. **In particular, FFmpeg permits negative + /// linesizes for vertically-flipped frames with `data[n]` pointing + /// at the *end* of the image. Returning that pointer with the + /// advertised "valid for `stride * plane_h` bytes forward" contract + /// would let a downstream converter walk past the buffer.** This + /// accessor refuses the layout instead of handing back a pointer the + /// caller cannot safely interpret as forward-addressable. + /// - `height <= 0`, the data pointer is null, `row_bytes > stride`, or + /// the total plane size would overflow `isize::MAX`. + /// + /// On `Some(ptr)` the pointer is valid for + /// `stride(plane) * plane_height` *forward-addressable* bytes, and + /// only the first [`Self::row_bytes`]`(plane)` bytes of each row are + /// guaranteed to be initialized. The trailing per-row alignment padding + /// is uninitialized; callers performing wide SIMD loads that read past + /// `row_bytes` must mask the result and never surface those bytes + /// through a safe `&[u8]`. /// /// This accessor exists for downstream pixel-format converters /// (`colconv`) that work in `(ptr, stride, width, height)` quadruples; /// safe code should prefer [`Self::row`] / [`Self::rows`]. pub fn as_ptr(&self, plane: usize) -> Option<*const u8> { - if plane >= self.planes() { - return None; - } - // SAFETY: plane index bounds-checked; AVFrame.data is `[*mut u8; 8]`. - let p = unsafe { (*self.inner.as_ptr()).data[plane] }; - if p.is_null() { - None - } else { - Some(p) - } + // Share the full plane-layout validation so the unsafe escape hatch + // never escapes a layout that `row()` / `rows()` reject. Returning a + // pointer for a negative-stride frame (FFmpeg's vertical-flip + // convention, where `data[n]` points at the *end* of the image) + // would invite forward-walking out-of-bounds reads from a caller + // that trusts the documented "valid for stride × plane_h bytes" + // contract. + self.plane_info(plane).map(|info| info.plane_ptr) } /// Read every per-plane field needed by the row accessors with the @@ -403,6 +415,11 @@ mod tests { /// convention) and assert the row accessors refuse to construct a slice. /// Without the linesize > 0 check, the negative `i32 as usize` would /// produce a huge positive length and `from_raw_parts` would be UB. + /// + /// `as_ptr` shares the same validation — handing back the data pointer + /// for a negative-stride frame would let a downstream converter + /// following the "valid for stride × plane_h bytes forward" contract + /// walk past the buffer. #[test] fn row_returns_none_for_negative_linesize() { let mut f = Frame::empty().expect("alloc"); @@ -419,6 +436,12 @@ mod tests { assert!(f.row(0, 0).is_none()); assert!(f.row(1, 0).is_none()); assert!(f.rows(0).is_none()); + assert!( + f.as_ptr(0).is_none(), + "as_ptr must share row()/rows() validation — a negative-stride \ + frame must not leak a forward-readable plane pointer" + ); + assert!(f.as_ptr(1).is_none()); } #[test] @@ -498,6 +521,15 @@ mod tests { assert!(r.iter().all(|&b| b == 0xAA)); } + // `as_ptr` accepts the valid layout and returns the same base pointer + // FFmpeg wrote into `data[0]`, so SIMD callers can reach the plane + // through the documented unsafe contract. + assert_eq!( + f.as_ptr(0), + Some(buf as *const u8), + "as_ptr must surface the plane base for a valid forward-stride frame" + ); + // Out-of-range row index returns None instead of panicking. assert!(f.row(0, height).is_none()); From 3e10b96091961005b808a37c9ad97682742b5920 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 15:21:16 +1200 Subject: [PATCH 23/27] update --- src/decoder.rs | 186 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 154 insertions(+), 32 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 1783924..c609e89 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1215,45 +1215,32 @@ fn drain_into_pending( } let mut cpu = alloc_av_frame()?; // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data - // allocates buffers on `cpu`. copy_props moves timing/side data over. + // allocates buffers on `cpu`. We deliberately defer + // `av_frame_copy_props` until *after* the cap check below — that + // call deep-copies every AVFrameSideData entry, which a malicious + // stream can size in megabytes; allocating then discarding the + // copies on cap rejection is wasted work and a real allocator + // pressure source. unsafe { let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0); if r1 < 0 { return Err(ffmpeg_next::Error::from(r1)); } - let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr()); - if r2 < 0 { - return Err(ffmpeg_next::Error::from(r2)); - } } - // Post-transfer accounting: size the frame and confirm we can fit - // it without exceeding the byte budget. Both cap-hit and inability - // to size the frame are treated as candidate failures, so the byte - // budget is *strict* — we never queue a frame we can't account for. - match cpu_frame_bytes(&cpu) { - Some(bytes) => { - let new_total = pending_bytes.saturating_add(bytes); - if new_total > max_bytes { - tracing::warn!( - pending_bytes = *pending_bytes, - frame_bytes = bytes, - max_bytes, - "hwdecode: queueing this frame would exceed byte cap; failing candidate replay" - ); - // cpu drops here. - return Err(ffmpeg_next::Error::Other { - errno: libc::ENOMEM, - }); - } - *pending_bytes = new_total; - pending.push_back(cpu); - } + // Pre-copy_props accounting: size the frame's pixel storage and + // its (yet-to-be-copied) side data. Both cap-hit and inability to + // size the pixel layout are treated as candidate failures, so the + // byte budget is *strict* — we never queue a frame we can't fully + // account for, and we never pay the side-data deep copy on a + // frame we'd immediately drop. + let pixel_bytes = match cpu_frame_bytes(&cpu) { + Some(b) => b, None => { - // Unknown pix_fmt — we cannot bound this frame's contribution - // against the byte cap, so up to MAX_PROBE_PENDING_FRAMES of - // them could exhaust memory. Fail the candidate so probing - // tries the next backend rather than queueing untracked - // allocations. + // Unknown pix_fmt or vertically-flipped layout — we cannot + // bound this frame's contribution against the byte cap, so up + // to MAX_PROBE_PENDING_FRAMES of them could exhaust memory. + // Fail the candidate so probing tries the next backend + // rather than queueing untracked allocations. // SAFETY: AVFrame.format is c_int, safe to read. let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format }; tracing::warn!( @@ -1265,7 +1252,34 @@ fn drain_into_pending( errno: libc::ENOMEM, }); } + }; + let side_bytes = frame_side_data_bytes(hw_buf); + let frame_bytes = pixel_bytes.saturating_add(side_bytes); + let new_total = pending_bytes.saturating_add(frame_bytes); + if new_total > max_bytes { + tracing::warn!( + pending_bytes = *pending_bytes, + pixel_bytes, + side_bytes, + max_bytes, + "hwdecode: queueing this frame (pixels + side data) would exceed byte cap; \ + failing candidate replay" + ); + // cpu drops here without paying av_frame_copy_props. + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); } + // Cap check passed — now safe to pay the side-data deep copy. + // SAFETY: cpu and hw_buf are both valid AVFrames we own. + unsafe { + let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr()); + if r2 < 0 { + return Err(ffmpeg_next::Error::from(r2)); + } + } + *pending_bytes = new_total; + pending.push_back(cpu); } Err(e) if is_transient(&e) => return Ok(()), Err(e) => return Err(e), @@ -1273,6 +1287,42 @@ fn drain_into_pending( } } +/// Sum of `AVFrameSideData[i].size` across every entry attached to +/// `frame`. `av_frame_copy_props` performs a deep copy of every side +/// data buffer (allocates a fresh `AVBufferRef` per entry), so a +/// candidate decoder that produces large per-frame metadata (HDR +/// mastering display info, A53 closed captions, ICC profiles, dynamic +/// HDR, motion vectors, …) would otherwise bypass the +/// `max_probe_pending_bytes` cap because [`cpu_frame_bytes`] only +/// accounts for pixel-plane storage. +/// +/// Reads only the `size` field of each `AVFrameSideData` — never +/// constructs the bindgen `AVFrameSideDataType` enum, so unknown side- +/// data types from a future FFmpeg do not invoke UB. +fn frame_side_data_bytes(frame: &frame::Video) -> usize { + // SAFETY: AVFrame.side_data is `*mut *mut AVFrameSideData` and + // nb_side_data is `c_int`; both are raw struct fields safe to read. + // Field projection through the indirected pointer touches only the + // primitive `usize` `.size` field (never `type_`). + unsafe { + let raw = frame.as_ptr(); + let nb = (*raw).nb_side_data; + let arr = (*raw).side_data; + if arr.is_null() || nb <= 0 { + return 0; + } + let mut total: usize = 0; + for i in 0..(nb as usize) { + let entry = *arr.add(i); + if entry.is_null() { + continue; + } + total = total.saturating_add((*entry).size); + } + total + } +} + /// Approximate resident size of a CPU frame: sum of `linesize[plane] * /// plane_height` across populated planes. /// @@ -1489,6 +1539,78 @@ mod tests { assert_eq!(cpu_frame_bytes(&f), Some(0)); } + /// `av_frame_copy_props` deep-copies every AVFrameSideData attached + /// to the source frame. `frame_side_data_bytes` must surface that + /// retention so `drain_into_pending` can charge it against + /// `max_probe_pending_bytes` — otherwise a stream with megabytes of + /// per-frame metadata can queue up to `MAX_PROBE_PENDING_FRAMES` + /// frames and overshoot the configured cap by orders of magnitude. + #[test] + fn frame_side_data_bytes_counts_attached_buffers() { + use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType}; + + const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB + + let mut f = frame::Video::empty(); + // SAFETY: f is freshly allocated; av_frame_new_side_data attaches a + // fresh `SIDE_DATA_SIZE`-byte buffer of the requested type and returns + // a pointer to the entry (or NULL on OOM). + let p = unsafe { + av_frame_new_side_data( + f.as_mut_ptr(), + AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED, + SIDE_DATA_SIZE, + ) + }; + assert!(!p.is_null(), "av_frame_new_side_data returned NULL"); + + let bytes = frame_side_data_bytes(&f); + assert!( + bytes >= SIDE_DATA_SIZE, + "side-data accounting must include the attached buffer; got {bytes}" + ); + } + + #[test] + fn frame_side_data_bytes_is_zero_for_bare_frame() { + let f = frame::Video::empty(); + assert_eq!(frame_side_data_bytes(&f), 0); + } + + /// Multiple side-data entries must be summed, not just the first. + #[test] + fn frame_side_data_bytes_sums_all_entries() { + use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType}; + + const ENTRY_A: usize = 256 * 1024; // 256 KiB + const ENTRY_B: usize = 512 * 1024; // 512 KiB + + let mut f = frame::Video::empty(); + // Two distinct types so neither call replaces the other. + let p1 = unsafe { + av_frame_new_side_data( + f.as_mut_ptr(), + AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED, + ENTRY_A, + ) + }; + let p2 = unsafe { + av_frame_new_side_data( + f.as_mut_ptr(), + AVFrameSideDataType::AV_FRAME_DATA_A53_CC, + ENTRY_B, + ) + }; + assert!(!p1.is_null() && !p2.is_null()); + + let bytes = frame_side_data_bytes(&f); + assert!( + bytes >= ENTRY_A + ENTRY_B, + "must sum across all side-data entries; got {bytes}, expected at least {}", + ENTRY_A + ENTRY_B + ); + } + /// Probe-abandon paths in `send_packet` (cap exceeded, packet clone /// failed) must not drop frames already queued in `pending_frames`. /// Those frames belong to the currently active backend — possibly a From cb8e9e63f2d949fbe9025dabe87c4e8a28d2168f Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:27:38 +1200 Subject: [PATCH 24/27] update --- src/decoder.rs | 177 +++++++++++++++++++++++++++++++++++++++---------- src/ffi.rs | 72 +++++++++++++++++--- 2 files changed, 203 insertions(+), 46 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index c609e89..bac87b2 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -842,14 +842,18 @@ impl VideoDecoder { let mut ctx = build_codec_context(¶meters)?; let av_type = backend.av_hwdevice_type(); - // Verify the codec advertises this hwaccel. We do *not* read the - // codec's advertised pix_fmt — we use the hardcoded constant from - // `Backend::hw_pixel_format` so no FFmpeg-supplied enum value is ever - // interpreted as `AVPixelFormat`. - if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type) { + // Verify the codec advertises this hwaccel **with the exact HW pix_fmt + // we're about to wire up in `get_format`**. FFmpeg's HW config table + // is keyed per (device_type, pix_fmt); a codec can advertise the same + // device with several HW pix_fmts, so matching only on device_type + // would let probing succeed for a backend whose pix_fmt the codec + // never offers — the failure would then surface deep inside the + // probe/decode loop. Matching the exact pix_fmt keeps the strict + // `get_format` honest and gives `open_with` a clean rejection. + let hw_pix_fmt = backend.hw_pixel_format(); + if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type, hw_pix_fmt as i32) { return Err(Error::BackendUnsupportedByCodec(backend)); } - let hw_pix_fmt = backend.hw_pixel_format(); // Create the device context. let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut(); @@ -868,6 +872,19 @@ impl VideoDecoder { wanted: hw_pix_fmt, wanted_int: hw_pix_fmt as i32, })); + // RAII guard: from now until the end-of-function `into_owned()`, every + // early return — `av_buffer_ref` failure, `open_as` failure, codec_type + // mismatch, or any future error path added between here and the + // `DecoderState` construction — frees `hw_device_ref` and + // `callback_state` via the guard's Drop. Without it, each error site + // had to remember to clean up these two FFI-owned resources by hand; + // the codec_type-mismatch branch was missed and silently leaked one + // device ref + one heap allocation per bad input. + let guard = PartialBuildState { + hw_device_ref, + callback_state, + }; + // SAFETY: ctx is a freshly-constructed AVCodecContext we own; // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's // use (we keep our own ref in `hw_device_ref` for cleanup). @@ -876,20 +893,18 @@ impl VideoDecoder { // HW-flagged setup but no actual device reference. let device_ref_for_ctx = unsafe { av_buffer_ref(hw_device_ref) }; if device_ref_for_ctx.is_null() { - // SAFETY: rolling back what we just allocated above. hw_device_ref - // is non-null (we checked after av_hwdevice_ctx_create); callback_state - // was just freshly Box::into_raw'd. - unsafe { - let mut hw = hw_device_ref; - av_buffer_unref(&mut hw); - drop(Box::from_raw(callback_state)); - } + // guard's Drop frees hw_device_ref (the first ref) and callback_state. return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno: libc::ENOMEM, })); } // SAFETY: device_ref_for_ctx is a valid AVBufferRef* from av_buffer_ref; - // ctx is freshly built and owned by us. + // ctx is freshly built and owned by us. After this point ctx aliases + // `callback_state` via `opaque` (FFmpeg never frees opaque, so + // `callback_state` ownership stays with us / the guard) and aliases + // `device_ref_for_ctx` (the second ref) via `hw_device_ctx` (FFmpeg + // unrefs that on codec context drop, independent of the guard's first + // ref). unsafe { let raw = ctx.as_mut_ptr(); (*raw).hw_device_ctx = device_ref_for_ctx; @@ -897,8 +912,9 @@ impl VideoDecoder { (*raw).get_format = Some(get_hw_format); } - // Open the decoder. On any failure, release the resources we just - // allocated so we don't leak. + // Open the decoder. On failure `ctx`/`opened` Drop releases the codec + // context (and via that the second device ref); the guard releases the + // first device ref and the callback state. // // We deliberately bypass `Opened::video()` because it calls // `Context::medium()`, which reads `AVCodecContext.codec_type` as the @@ -906,24 +922,7 @@ impl VideoDecoder { // systematically removing. Instead: validate `codec_type` as a raw // `c_int` ourselves, then construct the `decoder::Video` wrapper // directly via its public tuple field. - let opened = match ctx.decoder().open_as(codec) { - Ok(o) => o, - Err(e) => { - // SAFETY: we either allocated these in this function above or - // they are null; av_buffer_unref / Box::from_raw handle null - // explicitly (we check first). - unsafe { - let mut hw = hw_device_ref; - if !hw.is_null() { - av_buffer_unref(&mut hw); - } - if !callback_state.is_null() { - drop(Box::from_raw(callback_state)); - } - } - return Err(Error::Ffmpeg(e)); - } - }; + let opened = ctx.decoder().open_as(codec).map_err(Error::Ffmpeg)?; // Validate codec_type as a raw integer — never construct AVMediaType // from an unvalidated runtime value. @@ -935,7 +934,8 @@ impl VideoDecoder { if codec_type_int != video_type_int { // Not a video codec context — surface the same error // `Opened::video()` would have, without going through enum - // construction. Cleanup runs via `opened`'s Drop. + // construction. `opened`'s Drop releases the codec context; the + // guard releases the first hw_device_ref and the callback state. return Err(Error::Ffmpeg(ffmpeg_next::Error::InvalidData)); } // SAFETY of construction: `decoder::Video` is `pub struct Video(pub Opened)`. @@ -943,6 +943,9 @@ impl VideoDecoder { // `Opened::video()` does on success, just without the enum read. let opened = ffmpeg_next::decoder::Video(opened); + // Disarm the guard and transfer ownership of both resources into the + // returned DecoderState (whose own Drop handles their lifetime). + let (hw_device_ref, callback_state) = guard.into_owned(); Ok(DecoderState { inner: ManuallyDrop::new(opened), backend, @@ -952,6 +955,55 @@ impl VideoDecoder { } } +/// RAII guard for the partially-owned FFmpeg state that +/// [`VideoDecoder::build_state`] holds between the +/// `av_hwdevice_ctx_create` and `Box::into_raw(CallbackState)` +/// allocations and the final `DecoderState` construction. +/// +/// If `build_state` returns `Err` for any reason in that window +/// (`av_buffer_ref` ENOMEM, `open_as` failure, codec_type mismatch, or +/// any future error path), this guard's `Drop` releases +/// `hw_device_ref` — the first ref returned by `av_hwdevice_ctx_create`, +/// distinct from the second ref FFmpeg unrefs when the codec context +/// drops — and the boxed `CallbackState`, which FFmpeg never touches +/// because `AVCodecContext::opaque` is purely user-owned. +/// +/// Successful construction calls [`Self::into_owned`] to disarm the +/// guard and hand both pointers to the new `DecoderState`. +struct PartialBuildState { + hw_device_ref: *mut AVBufferRef, + callback_state: *mut CallbackState, +} + +impl PartialBuildState { + /// Disarm the guard: return the owned pointers and replace the guard's + /// fields with null so its Drop is a no-op. + fn into_owned(mut self) -> (*mut AVBufferRef, *mut CallbackState) { + let hw = std::mem::replace(&mut self.hw_device_ref, ptr::null_mut()); + let cb = std::mem::replace(&mut self.callback_state, ptr::null_mut()); + (hw, cb) + } +} + +impl Drop for PartialBuildState { + fn drop(&mut self) { + // SAFETY: pointers are either freshly allocated by `build_state` (via + // `av_hwdevice_ctx_create` and `Box::into_raw`) or null after + // `into_owned`. Both `av_buffer_unref` and `Box::from_raw` need the + // null check we apply here; both are otherwise sound on resources we + // own. + unsafe { + if !self.hw_device_ref.is_null() { + let mut hw = self.hw_device_ref; + av_buffer_unref(&mut hw); + } + if !self.callback_state.is_null() { + drop(Box::from_raw(self.callback_state)); + } + } + } +} + /// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination /// first so reuse across resolution changes is safe. unsafe fn transfer_hw_frame( @@ -1611,6 +1663,59 @@ mod tests { ); } + /// `PartialBuildState`'s `Drop` must be a no-op when both pointers are + /// null — the disarmed-by-`into_owned` post-state. A panic / double-free + /// here would break the success path of every `build_state` call. + #[test] + fn partial_build_state_drop_is_no_op_on_null_pointers() { + let _g = PartialBuildState { + hw_device_ref: ptr::null_mut(), + callback_state: ptr::null_mut(), + }; + // Drops at end of scope. Test passes if it doesn't panic / crash. + } + + /// `into_owned` must return the original pointers and disarm the guard + /// (so the guard's Drop becomes a no-op and the caller can safely + /// transfer ownership to `DecoderState` without double-freeing). + #[test] + fn partial_build_state_into_owned_disarms_and_returns_originals() { + use ffmpeg_next::ffi::{av_buffer_alloc, av_buffer_unref, AVPixelFormat}; + + // SAFETY: av_buffer_alloc returns a fresh AVBufferRef* with refcount + // 1, or NULL on OOM. We free it ourselves below (after into_owned + // disarms the guard). + let hw_ptr = unsafe { av_buffer_alloc(64) }; + assert!(!hw_ptr.is_null(), "av_buffer_alloc(64) returned NULL"); + let cb_ptr = Box::into_raw(Box::new(CallbackState { + wanted: AVPixelFormat::AV_PIX_FMT_NONE, + wanted_int: AVPixelFormat::AV_PIX_FMT_NONE as i32, + })); + + let g = PartialBuildState { + hw_device_ref: hw_ptr, + callback_state: cb_ptr, + }; + let (hw_back, cb_back) = g.into_owned(); + assert_eq!( + hw_back, hw_ptr, + "into_owned must return the original device ref" + ); + assert_eq!( + cb_back, cb_ptr, + "into_owned must return the original callback box" + ); + + // Guard is now disarmed (its Drop ran with null pointers as soon as + // into_owned consumed it). We own the pointers and must free them. + // SAFETY: hw_ptr and cb_ptr are still the freshly-allocated values. + unsafe { + let mut hw = hw_back; + av_buffer_unref(&mut hw); + drop(Box::from_raw(cb_back)); + } + } + /// Probe-abandon paths in `send_packet` (cap exceeded, packet clone /// failed) must not drop frames already queued in `pending_frames`. /// Those frames belong to the currently active backend — possibly a diff --git a/src/ffi.rs b/src/ffi.rs index 794d474..04aa50f 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -84,15 +84,28 @@ pub(crate) unsafe extern "C" fn get_hw_format( } /// Walk the codec's `AVCodecHWConfig` table and return whether the codec -/// advertises support for `device_type` via the `HW_DEVICE_CTX` setup method. +/// advertises support for `device_type` **with** `wanted_pix_fmt` via the +/// `HW_DEVICE_CTX` setup method. /// -/// We do not return the codec's advertised `pix_fmt` — we know it already -/// from [`crate::backend::Backend::hw_pixel_format`] (a hardcoded constant -/// from our bindings). All reads from the FFmpeg-supplied `AVCodecHWConfig` -/// are performed as raw integers via `addr_of!` + `ptr::read::` to -/// avoid copying or interpreting enum-typed fields whose runtime values -/// might not match our build's discriminant set. -pub(crate) fn codec_supports_hwaccel(codec: *const AVCodec, device_type: AVHWDeviceType) -> bool { +/// FFmpeg's HW config table is keyed per (device_type, pix_fmt) pair: a +/// codec can advertise the same device with several different hardware +/// pixel formats (e.g. VAAPI codecs that offer both `AV_PIX_FMT_VAAPI` +/// and `AV_PIX_FMT_DRM_PRIME`). Matching only on `device_type` would let +/// us proceed to install a strict `get_format` callback for a format the +/// codec never advertises, and the failure would surface deep inside the +/// probe / decode path instead of up front. Requiring the codec to +/// advertise the **exact** pix_fmt our `Backend` uses keeps the strict +/// `get_format` honest and gives `open_with` a clean rejection signal. +/// +/// All reads from the FFmpeg-supplied `AVCodecHWConfig` are performed as +/// raw integers via `addr_of!` + `ptr::read::` to avoid copying or +/// interpreting enum-typed fields whose runtime values might not match +/// our build's discriminant set. +pub(crate) fn codec_supports_hwaccel( + codec: *const AVCodec, + device_type: AVHWDeviceType, + wanted_pix_fmt: i32, +) -> bool { debug_assert!(!codec.is_null()); let device_type_int = device_type as i32; let mut i = 0; @@ -106,15 +119,18 @@ pub(crate) fn codec_supports_hwaccel(codec: *const AVCodec, device_type: AVHWDev // (which would interpret `pix_fmt` and `device_type` as their enum types). // SAFETY: `cfg` is non-null and points to a valid `AVCodecHWConfig` for // the lifetime of the call; `addr_of!` projects to a sized field; the - // `*const i32` cast is sound because `methods` is `c_int` (i32) and + // `*const i32` cast is sound because `methods` is `c_int` (i32), // `device_type` is `AVHWDeviceType` (`#[repr(u32)]`, but FFmpeg's - // assigned values fit in i32 and the runtime layout is i32-sized). + // assigned values fit in i32 and the runtime layout is i32-sized), + // and `pix_fmt` is `AVPixelFormat` (`#[repr(i32)]`). let methods: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).methods)) }; let cfg_device_type_int: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).device_type) as *const i32) }; + let cfg_pix_fmt_int: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).pix_fmt) as *const i32) }; if methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0 && cfg_device_type_int == device_type_int + && cfg_pix_fmt_int == wanted_pix_fmt { return true; } @@ -217,4 +233,40 @@ mod tests { ); assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); } + + /// `codec_supports_hwaccel` must reject a (device_type, pix_fmt) pair + /// that the codec does not advertise — even if the device alone is + /// listed. Without this check, the strict `get_format` callback would + /// be wired up for a HW pix_fmt the codec never offers and the failure + /// would surface deep inside the probe / decode path instead of at + /// `open_with` / probe-build time. + /// + /// macOS-only: the test relies on FFmpeg's H.264 decoder advertising + /// `(AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX)`, which is + /// only present in builds with VideoToolbox compiled in. + #[cfg(target_os = "macos")] + #[test] + fn codec_supports_hwaccel_requires_matching_pix_fmt() { + use ffmpeg_next::ffi::{avcodec_find_decoder, AVCodecID, AVHWDeviceType, AVPixelFormat}; + + // SAFETY: AV_CODEC_ID_H264 is a known constant in our build's + // `AVCodecID` discriminant set; constructing it does not invoke the + // bindgen-enum UB we worry about for runtime-derived ids. + let codec_ptr = unsafe { avcodec_find_decoder(AVCodecID::AV_CODEC_ID_H264) }; + assert!(!codec_ptr.is_null(), "H.264 decoder must be present"); + + let device = AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX; + let videotoolbox = AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32; + let nv12 = AVPixelFormat::AV_PIX_FMT_NV12 as i32; + + assert!( + codec_supports_hwaccel(codec_ptr, device, videotoolbox), + "VideoToolbox + AV_PIX_FMT_VIDEOTOOLBOX must be advertised by FFmpeg's H.264 decoder" + ); + assert!( + !codec_supports_hwaccel(codec_ptr, device, nv12), + "VideoToolbox + AV_PIX_FMT_NV12 must NOT match the codec's HW config — \ + the strict get_format would have no offered HW format to return" + ); + } } From 65ae604a74556ee89a249e2e4c5125735a6006bb Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:51:15 +1200 Subject: [PATCH 25/27] update --- src/decoder.rs | 326 +++++++++++++++++++++++++++---------------------- 1 file changed, 179 insertions(+), 147 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index bac87b2..81db18a 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -7,10 +7,10 @@ use ffmpeg_next::{ Context, }, ffi::{ - av_buffer_ref, av_buffer_unref, av_frame_copy_props, av_frame_move_ref, av_frame_unref, - av_hwdevice_ctx_create, av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3, - avcodec_free_context, avcodec_parameters_alloc, avcodec_parameters_copy, - avcodec_parameters_free, avcodec_parameters_to_context, AVBufferRef, AVCodec, AVMediaType, + av_buffer_ref, av_buffer_unref, av_frame_move_ref, av_frame_unref, av_hwdevice_ctx_create, + av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3, avcodec_free_context, + avcodec_parameters_alloc, avcodec_parameters_copy, avcodec_parameters_free, + avcodec_parameters_to_context, AVBufferRef, AVCodec, AVFrame, AVMediaType, }, frame, Codec, Packet, Rational, }; @@ -109,6 +109,29 @@ const MAX_PROBE_PACKETS: usize = 256; /// gives untrusted media a hard ceiling. const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024; +/// Hard cap on the number of side-data entries we tolerate per buffered +/// packet. `av_packet_ref` allocates an `AVPacketSideData` descriptor and +/// an `AVBufferRef` per entry, so a packet stuffed with many tiny or +/// zero-sized entries can consume significant memory in descriptor / +/// allocator overhead even after [`packet_side_data_bytes`] charges +/// [`SIDE_DATA_ENTRY_OVERHEAD`] bytes per entry. Refusing to clone such +/// packets short-circuits the descriptor explosion path. +/// +/// Sized for legitimate streams (typical video packets carry 0-5 side- +/// data entries; SEI-heavy HEVC/AV1 maybe a dozen) while comfortably +/// rejecting weaponised input. +const MAX_PROBE_PACKET_SIDE_DATA_ENTRIES: usize = 64; + +/// Conservative per-side-data-entry overhead estimate used by both +/// [`packet_side_data_bytes`] and the budget accounting in +/// [`VideoDecoder::send_packet`]. Counts the `AVPacketSideData` +/// descriptor (24 bytes per the FFmpeg 8.x bindings), the `AVBufferRef` +/// FFmpeg allocates per entry, and a margin for malloc bookkeeping +/// (header bytes, alignment slack). Setting it on the high side keeps +/// the byte cap a true upper bound on retained memory; under-charging +/// would let many tiny entries slip past the cap. +const SIDE_DATA_ENTRY_OVERHEAD: usize = 80; + /// Maximum number of CPU frames we are willing to queue from a candidate /// during probe replay. Each frame is a fully-allocated CPU buffer /// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so @@ -389,18 +412,32 @@ impl VideoDecoder { if let Some(probe) = self.probe.as_mut() { // `try_clone_packet` calls `av_packet_ref`, which deep-copies // side data via `av_packet_copy_props`. The probe budget must - // include side-data bytes or a malicious stream can keep - // `packet.size()` tiny while attaching megabytes of side data - // per packet and inflate retention beyond the advertised cap. + // include both descriptor + ref overhead per side-data entry + // (via `packet_side_data_bytes`) and a hard cap on the entry + // count itself — without the count cap, a packet stuffed with + // many tiny entries can dominate retained memory before the + // byte cap is even close to firing. + let side_count = packet_side_data_count(packet); let pkt_size = packet.size().saturating_add(packet_side_data_bytes(packet)); let new_count = probe.buffered_packets.len() + 1; let new_bytes = probe.buffered_bytes.saturating_add(pkt_size); - if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES { + let entry_cap_exceeded = side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES; + if new_count > MAX_PROBE_PACKETS + || new_bytes > MAX_PROBE_PACKET_BYTES + || entry_cap_exceeded + { tracing::warn!( packets = new_count, bytes = new_bytes, + side_data_entries = side_count, max_packets = MAX_PROBE_PACKETS, max_bytes = MAX_PROBE_PACKET_BYTES, + max_side_data_entries = MAX_PROBE_PACKET_SIDE_DATA_ENTRIES, + trigger = if entry_cap_exceeded { + "side_data_entry_cap" + } else { + "byte_or_packet_cap" + }, "hwdecode: probe window exceeded caps without first frame; \ abandoning fallback safety net" ); @@ -1006,6 +1043,20 @@ impl Drop for PartialBuildState { /// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination /// first so reuse across resolution changes is safe. +/// +/// Deliberately does **not** call `av_frame_copy_props`. That FFmpeg +/// helper deep-copies AVFrame side data (SEI, mastering display, ICC +/// profiles, dynamic HDR, etc.), the metadata dict, and bumps both +/// `opaque_ref` and `private_ref` on every receive — none of which +/// `Frame` exposes via its public accessors. On a crafted stream with +/// megabytes of per-frame metadata that would mean an unbounded +/// allocation per receive, with no caller-visible benefit. We instead +/// copy only the scalar fields the public API can read (today: `pts`); +/// pixel layout (`width`, `height`, `format`, `linesize`, `data`) is +/// already set by `av_hwframe_transfer_data`. If `Frame` ever grows +/// accessors for timing extras (`duration`, `time_base`, `pkt_dts`) or +/// color metadata, add those to `copy_frame_props_minimal` at the same +/// time. unsafe fn transfer_hw_frame( dst: &mut Frame, src: &mut frame::Video, @@ -1016,14 +1067,27 @@ unsafe fn transfer_hw_frame( if ret < 0 { return Err(ffmpeg_next::Error::from(ret)); } - let ret = av_frame_copy_props(dst.as_inner_mut().as_mut_ptr(), src.as_ptr()); - if ret < 0 { - return Err(ffmpeg_next::Error::from(ret)); - } + copy_frame_props_minimal(dst.as_inner_mut().as_mut_ptr(), src.as_ptr()); } Ok(()) } +/// Bounded substitute for `av_frame_copy_props`. Copies only the scalar +/// AVFrame fields the public `Frame` API needs from `src` to `dst` — +/// today just `pts`. Skips every allocating field (`av_dict_copy` for +/// `metadata`, `av_frame_new_side_data` + memcpy for each `side_data[i]`, +/// `av_buffer_replace` for `opaque_ref` / `private_ref`) so the cost is +/// O(1) per frame regardless of what the source attaches. +/// +/// # Safety +/// Both pointers must be valid `AVFrame` pointers we own; field +/// projection touches only POD scalars, no enums or buffer refs. +unsafe fn copy_frame_props_minimal(dst: *mut AVFrame, src: *const AVFrame) { + unsafe { + (*dst).pts = (*src).pts; + } +} + /// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame` /// and must not be treated as backend failures. fn is_transient(e: &ffmpeg_next::Error) -> bool { @@ -1181,8 +1245,12 @@ fn packet_side_data_bytes(packet: &Packet) -> usize { if arr.is_null() || nel <= 0 { return 0; } - let mut total: usize = 0; - for i in 0..(nel as usize) { + let count = nel as usize; + // Descriptor + AVBufferRef + allocator overhead per entry — without + // this, a packet stuffed with many zero-size entries could slip past + // `MAX_PROBE_PACKET_BYTES` purely on descriptor cost. + let mut total = count.saturating_mul(SIDE_DATA_ENTRY_OVERHEAD); + for i in 0..count { let entry = arr.add(i); total = total.saturating_add((*entry).size); } @@ -1190,6 +1258,20 @@ fn packet_side_data_bytes(packet: &Packet) -> usize { } } +/// Number of `AVPacketSideData` entries on `packet`. The probe buffer +/// uses this to enforce [`MAX_PROBE_PACKET_SIDE_DATA_ENTRIES`] before +/// cloning, so a packet whose entry count alone would dominate retained +/// memory is rejected up front. +fn packet_side_data_count(packet: &Packet) -> usize { + // SAFETY: side_data_elems is `c_int`, safe to read; clamp negatives to 0. + let nel = unsafe { (*packet.as_ptr()).side_data_elems }; + if nel <= 0 { + 0 + } else { + nel as usize + } +} + /// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine /// distinguishes "drain output and retry" from "stream over"). fn is_eagain(e: &ffmpeg_next::Error) -> bool { @@ -1266,25 +1348,20 @@ fn drain_into_pending( }); } let mut cpu = alloc_av_frame()?; - // SAFETY: hw_buf is a freshly-decoded HW frame; av_hwframe_transfer_data - // allocates buffers on `cpu`. We deliberately defer - // `av_frame_copy_props` until *after* the cap check below — that - // call deep-copies every AVFrameSideData entry, which a malicious - // stream can size in megabytes; allocating then discarding the - // copies on cap rejection is wasted work and a real allocator - // pressure source. + // SAFETY: hw_buf is a freshly-decoded HW frame; + // `av_hwframe_transfer_data` allocates pixel buffers on `cpu`. + // We use `copy_frame_props_minimal` (only `pts`) instead of + // `av_frame_copy_props` for the same reason as + // `transfer_hw_frame`: the public `Frame` API does not expose + // side data / metadata / opaque refs, so deep-copying them per + // frame is pure cost and an unbounded allocation source on + // attacker-controlled streams. unsafe { let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0); if r1 < 0 { return Err(ffmpeg_next::Error::from(r1)); } } - // Pre-copy_props accounting: size the frame's pixel storage and - // its (yet-to-be-copied) side data. Both cap-hit and inability to - // size the pixel layout are treated as candidate failures, so the - // byte budget is *strict* — we never queue a frame we can't fully - // account for, and we never pay the side-data deep copy on a - // frame we'd immediately drop. let pixel_bytes = match cpu_frame_bytes(&cpu) { Some(b) => b, None => { @@ -1305,30 +1382,25 @@ fn drain_into_pending( }); } }; - let side_bytes = frame_side_data_bytes(hw_buf); - let frame_bytes = pixel_bytes.saturating_add(side_bytes); - let new_total = pending_bytes.saturating_add(frame_bytes); + let new_total = pending_bytes.saturating_add(pixel_bytes); if new_total > max_bytes { tracing::warn!( pending_bytes = *pending_bytes, pixel_bytes, - side_bytes, max_bytes, - "hwdecode: queueing this frame (pixels + side data) would exceed byte cap; \ + "hwdecode: queueing this frame would exceed byte cap; \ failing candidate replay" ); - // cpu drops here without paying av_frame_copy_props. + // cpu drops here without ever paying a metadata deep copy. return Err(ffmpeg_next::Error::Other { errno: libc::ENOMEM, }); } - // Cap check passed — now safe to pay the side-data deep copy. - // SAFETY: cpu and hw_buf are both valid AVFrames we own. + // Cap check passed — copy only the scalar AVFrame fields the + // public API needs. SAFETY: cpu and hw_buf are both valid + // AVFrames we own. unsafe { - let r2 = av_frame_copy_props(cpu.as_mut_ptr(), hw_buf.as_ptr()); - if r2 < 0 { - return Err(ffmpeg_next::Error::from(r2)); - } + copy_frame_props_minimal(cpu.as_mut_ptr(), hw_buf.as_ptr()); } *pending_bytes = new_total; pending.push_back(cpu); @@ -1339,42 +1411,6 @@ fn drain_into_pending( } } -/// Sum of `AVFrameSideData[i].size` across every entry attached to -/// `frame`. `av_frame_copy_props` performs a deep copy of every side -/// data buffer (allocates a fresh `AVBufferRef` per entry), so a -/// candidate decoder that produces large per-frame metadata (HDR -/// mastering display info, A53 closed captions, ICC profiles, dynamic -/// HDR, motion vectors, …) would otherwise bypass the -/// `max_probe_pending_bytes` cap because [`cpu_frame_bytes`] only -/// accounts for pixel-plane storage. -/// -/// Reads only the `size` field of each `AVFrameSideData` — never -/// constructs the bindgen `AVFrameSideDataType` enum, so unknown side- -/// data types from a future FFmpeg do not invoke UB. -fn frame_side_data_bytes(frame: &frame::Video) -> usize { - // SAFETY: AVFrame.side_data is `*mut *mut AVFrameSideData` and - // nb_side_data is `c_int`; both are raw struct fields safe to read. - // Field projection through the indirected pointer touches only the - // primitive `usize` `.size` field (never `type_`). - unsafe { - let raw = frame.as_ptr(); - let nb = (*raw).nb_side_data; - let arr = (*raw).side_data; - if arr.is_null() || nb <= 0 { - return 0; - } - let mut total: usize = 0; - for i in 0..(nb as usize) { - let entry = *arr.add(i); - if entry.is_null() { - continue; - } - total = total.saturating_add((*entry).size); - } - total - } -} - /// Approximate resident size of a CPU frame: sum of `linesize[plane] * /// plane_height` across populated planes. /// @@ -1530,6 +1566,74 @@ mod tests { fn packet_side_data_is_zero_when_no_side_data() { let packet = Packet::new(64); assert_eq!(packet_side_data_bytes(&packet), 0); + assert_eq!(packet_side_data_count(&packet), 0); + } + + /// Packets with many tiny side-data entries must be charged the + /// per-entry descriptor + ref overhead, even when each entry's payload + /// `size` is zero. Without `SIDE_DATA_ENTRY_OVERHEAD`, a packet stuffed + /// with N zero-byte entries would charge 0 bytes against the budget + /// while `av_packet_ref` still allocates ~`N * 80` bytes of descriptor + /// + AVBufferRef + allocator overhead per cloned copy. + #[test] + fn packet_side_data_bytes_charges_descriptor_overhead_for_zero_size_entries() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + let mut packet = Packet::new(0); + // Attach two zero-byte entries of distinct types so neither call + // replaces the other. + let p1 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, + 0, + ) + }; + let p2 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_PALETTE, + 0, + ) + }; + assert!( + !p1.is_null() && !p2.is_null(), + "av_packet_new_side_data NULL" + ); + + assert_eq!(packet_side_data_count(&packet), 2); + let bytes = packet_side_data_bytes(&packet); + assert!( + bytes >= 2 * SIDE_DATA_ENTRY_OVERHEAD, + "must charge descriptor overhead per entry even at zero payload; got {bytes}" + ); + } + + /// `MAX_PROBE_PACKET_SIDE_DATA_ENTRIES` is the cliff above which a + /// packet is rejected from the probe buffer regardless of byte total — + /// pure descriptor inflation is its own attack vector. Sanity-check + /// that `packet_side_data_count` reports the value the cap is checked + /// against. + #[test] + fn packet_side_data_count_reports_attached_entries() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + let mut packet = Packet::new(0); + let _p1 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, + 4, + ) + }; + let _p2 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_PALETTE, + 4, + ) + }; + assert_eq!(packet_side_data_count(&packet), 2); } /// `cpu_frame_bytes` must refuse to size a frame whose first plane has @@ -1591,78 +1695,6 @@ mod tests { assert_eq!(cpu_frame_bytes(&f), Some(0)); } - /// `av_frame_copy_props` deep-copies every AVFrameSideData attached - /// to the source frame. `frame_side_data_bytes` must surface that - /// retention so `drain_into_pending` can charge it against - /// `max_probe_pending_bytes` — otherwise a stream with megabytes of - /// per-frame metadata can queue up to `MAX_PROBE_PENDING_FRAMES` - /// frames and overshoot the configured cap by orders of magnitude. - #[test] - fn frame_side_data_bytes_counts_attached_buffers() { - use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType}; - - const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB - - let mut f = frame::Video::empty(); - // SAFETY: f is freshly allocated; av_frame_new_side_data attaches a - // fresh `SIDE_DATA_SIZE`-byte buffer of the requested type and returns - // a pointer to the entry (or NULL on OOM). - let p = unsafe { - av_frame_new_side_data( - f.as_mut_ptr(), - AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED, - SIDE_DATA_SIZE, - ) - }; - assert!(!p.is_null(), "av_frame_new_side_data returned NULL"); - - let bytes = frame_side_data_bytes(&f); - assert!( - bytes >= SIDE_DATA_SIZE, - "side-data accounting must include the attached buffer; got {bytes}" - ); - } - - #[test] - fn frame_side_data_bytes_is_zero_for_bare_frame() { - let f = frame::Video::empty(); - assert_eq!(frame_side_data_bytes(&f), 0); - } - - /// Multiple side-data entries must be summed, not just the first. - #[test] - fn frame_side_data_bytes_sums_all_entries() { - use ffmpeg_next::ffi::{av_frame_new_side_data, AVFrameSideDataType}; - - const ENTRY_A: usize = 256 * 1024; // 256 KiB - const ENTRY_B: usize = 512 * 1024; // 512 KiB - - let mut f = frame::Video::empty(); - // Two distinct types so neither call replaces the other. - let p1 = unsafe { - av_frame_new_side_data( - f.as_mut_ptr(), - AVFrameSideDataType::AV_FRAME_DATA_SEI_UNREGISTERED, - ENTRY_A, - ) - }; - let p2 = unsafe { - av_frame_new_side_data( - f.as_mut_ptr(), - AVFrameSideDataType::AV_FRAME_DATA_A53_CC, - ENTRY_B, - ) - }; - assert!(!p1.is_null() && !p2.is_null()); - - let bytes = frame_side_data_bytes(&f); - assert!( - bytes >= ENTRY_A + ENTRY_B, - "must sum across all side-data entries; got {bytes}, expected at least {}", - ENTRY_A + ENTRY_B - ); - } - /// `PartialBuildState`'s `Drop` must be a no-op when both pointers are /// null — the disarmed-by-`into_owned` post-state. A panic / double-free /// here would break the success path of every `build_state` call. From 1dffd6fd82156316159b1bac94e0a56d613f1f0e Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 17:09:52 +1200 Subject: [PATCH 26/27] update --- src/decoder.rs | 210 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 145 insertions(+), 65 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 81db18a..46a099a 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -410,63 +410,84 @@ impl VideoDecoder { match self.state.inner.send_packet(packet) { Ok(()) => { if let Some(probe) = self.probe.as_mut() { - // `try_clone_packet` calls `av_packet_ref`, which deep-copies - // side data via `av_packet_copy_props`. The probe budget must - // include both descriptor + ref overhead per side-data entry - // (via `packet_side_data_bytes`) and a hard cap on the entry - // count itself — without the count cap, a packet stuffed with - // many tiny entries can dominate retained memory before the - // byte cap is even close to firing. + // Step 1: reject by side-data entry count BEFORE walking the + // side-data array for byte accounting. `packet_side_data_bytes` + // dereferences each `AVPacket.side_data[i]` based on the + // FFmpeg-supplied `side_data_elems`; if that integer is + // corrupt or weaponised we don't want to walk it from the + // safe `send_packet` path. The byte helper still clamps its + // own walk to the cap as defense-in-depth, but checking the + // count first short-circuits the descriptor-explosion case + // entirely. let side_count = packet_side_data_count(packet); - let pkt_size = packet.size().saturating_add(packet_side_data_bytes(packet)); - let new_count = probe.buffered_packets.len() + 1; - let new_bytes = probe.buffered_bytes.saturating_add(pkt_size); - let entry_cap_exceeded = side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES; - if new_count > MAX_PROBE_PACKETS - || new_bytes > MAX_PROBE_PACKET_BYTES - || entry_cap_exceeded - { + if side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES { tracing::warn!( - packets = new_count, - bytes = new_bytes, side_data_entries = side_count, - max_packets = MAX_PROBE_PACKETS, - max_bytes = MAX_PROBE_PACKET_BYTES, max_side_data_entries = MAX_PROBE_PACKET_SIDE_DATA_ENTRIES, - trigger = if entry_cap_exceeded { - "side_data_entry_cap" - } else { - "byte_or_packet_cap" - }, - "hwdecode: probe window exceeded caps without first frame; \ - abandoning fallback safety net" + trigger = "side_data_entry_cap", + "hwdecode: packet side-data entry count exceeds cap; \ + abandoning fallback safety net without byte accounting" ); - // Abandon the *future* probe-buffering only. `pending_frames` - // belong to the currently active backend (possibly the - // candidate `advance_probe` committed earlier in this same - // `send_packet` call) and are valid output the caller will - // dequeue via `receive_frame`. Clearing them here would - // silently drop initial frames at exactly the cap-overflow / - // OOM-stress paths. + // Abandon the *future* probe-buffering only — see the byte/ + // packet cap branch below for why `pending_frames` survives. self.probe = None; } else { - // Use the checked clone — ffmpeg-next's `Packet::clone` - // discards av_packet_ref's return value and would silently - // store an empty packet on ENOMEM, corrupting future replay. - match try_clone_packet(packet) { - Ok(cloned) => { - probe.buffered_packets.push(cloned); - probe.buffered_bytes = new_bytes; - } - Err(e) => { - tracing::warn!( - error = %e, - "hwdecode: packet clone failed for probe history; abandoning fallback safety net" - ); - // Same reasoning as the cap-overflow branch above: - // `pending_frames` are owned by the active backend, not - // the probe buffer, so they survive abandonment. - self.probe = None; + // Step 2: now safe to compute byte budget — `side_count` + // is bounded. + // + // `try_clone_packet` calls `av_packet_ref`, which deep-copies + // side data via `av_packet_copy_props`. The probe budget + // must include descriptor + ref overhead per side-data + // entry (via `packet_side_data_bytes`); without it, a + // packet stuffed with many tiny entries can dominate + // retained memory before the byte cap is even close to + // firing. + let pkt_size = packet.size().saturating_add(packet_side_data_bytes( + packet, + MAX_PROBE_PACKET_SIDE_DATA_ENTRIES, + )); + let new_count = probe.buffered_packets.len() + 1; + let new_bytes = probe.buffered_bytes.saturating_add(pkt_size); + if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES { + tracing::warn!( + packets = new_count, + bytes = new_bytes, + side_data_entries = side_count, + max_packets = MAX_PROBE_PACKETS, + max_bytes = MAX_PROBE_PACKET_BYTES, + trigger = "byte_or_packet_cap", + "hwdecode: probe window exceeded caps without first frame; \ + abandoning fallback safety net" + ); + // Abandon the *future* probe-buffering only. + // `pending_frames` belong to the currently active backend + // (possibly the candidate `advance_probe` committed + // earlier in this same `send_packet` call) and are valid + // output the caller will dequeue via `receive_frame`. + // Clearing them here would silently drop initial frames + // at exactly the cap-overflow / OOM-stress paths. + self.probe = None; + } else { + // Use the checked clone — ffmpeg-next's `Packet::clone` + // discards av_packet_ref's return value and would + // silently store an empty packet on ENOMEM, corrupting + // future replay. + match try_clone_packet(packet) { + Ok(cloned) => { + probe.buffered_packets.push(cloned); + probe.buffered_bytes = new_bytes; + } + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: packet clone failed for probe history; \ + abandoning fallback safety net" + ); + // Same reasoning as the cap-overflow branch above: + // `pending_frames` are owned by the active backend, + // not the probe buffer, so they survive abandonment. + self.probe = None; + } } } } @@ -1222,18 +1243,27 @@ fn try_clone_packet(src: &Packet) -> std::result::Result usize { +fn packet_side_data_bytes(packet: &Packet, max_entries: usize) -> usize { // SAFETY: AVPacket.side_data is `*mut AVPacketSideData` and // side_data_elems is `c_int`; both are raw struct fields safe to read. // Field projection (`.size`) does not reconstruct the enum-typed `type_` @@ -1242,13 +1272,10 @@ fn packet_side_data_bytes(packet: &Packet) -> usize { let raw = packet.as_ptr(); let nel = (*raw).side_data_elems; let arr = (*raw).side_data; - if arr.is_null() || nel <= 0 { + if arr.is_null() || nel <= 0 || max_entries == 0 { return 0; } - let count = nel as usize; - // Descriptor + AVBufferRef + allocator overhead per entry — without - // this, a packet stuffed with many zero-size entries could slip past - // `MAX_PROBE_PACKET_BYTES` purely on descriptor cost. + let count = (nel as usize).min(max_entries); let mut total = count.saturating_mul(SIDE_DATA_ENTRY_OVERHEAD); for i in 0..count { let entry = arr.add(i); @@ -1550,7 +1577,7 @@ mod tests { assert!(!p.is_null(), "av_packet_new_side_data returned NULL"); assert_eq!(packet.size(), PAYLOAD_SIZE); - let side = packet_side_data_bytes(&packet); + let side = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES); assert!( side >= SIDE_DATA_SIZE, "side-data accounting must include the attached buffer; got {side}" @@ -1565,7 +1592,10 @@ mod tests { #[test] fn packet_side_data_is_zero_when_no_side_data() { let packet = Packet::new(64); - assert_eq!(packet_side_data_bytes(&packet), 0); + assert_eq!( + packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES), + 0 + ); assert_eq!(packet_side_data_count(&packet), 0); } @@ -1602,13 +1632,63 @@ mod tests { ); assert_eq!(packet_side_data_count(&packet), 2); - let bytes = packet_side_data_bytes(&packet); + let bytes = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES); assert!( bytes >= 2 * SIDE_DATA_ENTRY_OVERHEAD, "must charge descriptor overhead per entry even at zero payload; got {bytes}" ); } + /// `packet_side_data_bytes` must clamp its walk to `max_entries` + /// regardless of `side_data_elems`. Defense-in-depth: the caller is + /// expected to short-circuit packets whose count exceeds the cap, but + /// if a corrupt or weaponised packet ever does reach the helper, the + /// internal cap prevents an unbounded raw-pointer walk. + /// + /// This test attaches 5 entries of distinct types and asks the helper + /// to walk only the first 2. Result must equal exactly `2 * overhead + + /// (size_a + size_b)`, confirming entries 3-5 were not even read. + #[test] + fn packet_side_data_bytes_respects_max_entries_cap() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + let mut packet = Packet::new(0); + // Five distinct side-data types so each `av_packet_new_side_data` + // call appends rather than replaces. + let types_and_sizes: [(AVPacketSideDataType, usize); 5] = [ + (AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, 100), + (AVPacketSideDataType::AV_PKT_DATA_PALETTE, 200), + (AVPacketSideDataType::AV_PKT_DATA_REPLAYGAIN, 300), + (AVPacketSideDataType::AV_PKT_DATA_DISPLAYMATRIX, 400), + (AVPacketSideDataType::AV_PKT_DATA_STEREO3D, 500), + ]; + for (ty, size) in types_and_sizes { + let p = unsafe { av_packet_new_side_data(packet.as_mut_ptr(), ty, size) }; + assert!(!p.is_null(), "av_packet_new_side_data returned NULL"); + } + assert_eq!(packet_side_data_count(&packet), 5); + + let walked_2 = packet_side_data_bytes(&packet, 2); + let walked_5 = packet_side_data_bytes(&packet, 5); + + assert_eq!( + walked_2, + 2 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200, + "max_entries=2 must walk exactly the first two entries" + ); + assert_eq!( + walked_5, + 5 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200 + 300 + 400 + 500, + "max_entries=5 must walk all five entries" + ); + // max_entries=0 short-circuits to 0. + assert_eq!(packet_side_data_bytes(&packet, 0), 0); + // max_entries larger than the actual count clamps to the actual count + // (no out-of-bounds walk past `side_data_elems`). + let walked_huge = packet_side_data_bytes(&packet, 1_000_000); + assert_eq!(walked_huge, walked_5); + } + /// `MAX_PROBE_PACKET_SIDE_DATA_ENTRIES` is the cliff above which a /// packet is rejected from the probe buffer regardless of byte total — /// pure descriptor inflation is its own attack vector. Sanity-check From 62c6ff139bff15823cd7fa8db6803d4354835203 Mon Sep 17 00:00:00 2001 From: uqio <276879906+uqio@users.noreply.github.com> Date: Mon, 27 Apr 2026 17:30:03 +1200 Subject: [PATCH 27/27] update --- src/decoder.rs | 169 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/src/decoder.rs b/src/decoder.rs index 46a099a..a241b68 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -132,6 +132,29 @@ const MAX_PROBE_PACKET_SIDE_DATA_ENTRIES: usize = 64; /// would let many tiny entries slip past the cap. const SIDE_DATA_ENTRY_OVERHEAD: usize = 80; +/// Conservative upper-bound bytes-per-pixel multiplier used to estimate +/// the size of a CPU frame **before** `av_hwframe_transfer_data` +/// allocates its pixel buffers. Covers every HW download format this +/// crate produces (worst case is `P416LE` / `P412LE` at 6 bytes/pixel +/// for 16-bit 4:4:4 semi-planar) plus a margin for FFmpeg's per-row +/// stride alignment (typically 32-byte aligned, ~5% extra at HD widths +/// and below). +/// +/// Used by [`drain_into_pending`] as a pre-transfer guard: if the +/// product `width * height * WORST_CASE_BYTES_PER_PIXEL` would already +/// push `pending_bytes` past `max_probe_pending_bytes`, the candidate +/// replay refuses the frame *before* allocating. Without this, FFmpeg +/// would perform the full HW→CPU download (potentially ~100 MiB for +/// 8K HDR) and we would only reject the frame after RSS had already +/// spiked. The post-transfer accounting via [`cpu_frame_bytes`] stays in +/// place as a backstop using the frame's actual stride/format. +/// +/// Slightly over-charges true 4:2:0 NV12 / P010 frames (which dominate +/// real workloads) — that's the right side to err on. Callers feeding +/// 8K+ workloads through the probe path can tune +/// [`VideoDecoder::with_max_probe_pending_bytes`] upward to compensate. +const WORST_CASE_BYTES_PER_PIXEL: usize = 8; + /// Maximum number of CPU frames we are willing to queue from a candidate /// during probe replay. Each frame is a fully-allocated CPU buffer /// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so @@ -1374,6 +1397,54 @@ fn drain_into_pending( errno: libc::ENOMEM, }); } + // Pre-transfer size guard: `av_hwframe_transfer_data` will + // allocate the CPU buffer based on `hw_buf`'s dimensions. If a + // single frame's worst-case footprint already pushes past the + // cap, refuse the candidate **before** allocating so RSS does + // not spike on a frame we'd immediately drop. Uses a width * + // height * `WORST_CASE_BYTES_PER_PIXEL` upper bound; the + // post-transfer accounting via `cpu_frame_bytes` below stays in + // place as a backstop using the actual stride/format. + let estimated_bytes = match estimate_transfer_bytes(hw_buf) { + Some(b) => b, + None => { + // SAFETY: AVFrame.width/height are c_int reads. + let (w, h) = unsafe { + let raw = hw_buf.as_ptr(); + ((*raw).width, (*raw).height) + }; + tracing::warn!( + width = w, + height = h, + "hwdecode: HW frame dimensions invalid for sizing; failing candidate replay" + ); + unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + }; + let estimated_total = pending_bytes.saturating_add(estimated_bytes); + if estimated_total > max_bytes { + // SAFETY: AVFrame.width/height are c_int reads. + let (w, h) = unsafe { + let raw = hw_buf.as_ptr(); + ((*raw).width, (*raw).height) + }; + tracing::warn!( + pending_bytes = *pending_bytes, + estimated_bytes, + width = w, + height = h, + max_bytes = max_bytes, + "hwdecode: pre-transfer size estimate exceeds cap; \ + refusing candidate replay before allocating CPU frame" + ); + unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } let mut cpu = alloc_av_frame()?; // SAFETY: hw_buf is a freshly-decoded HW frame; // `av_hwframe_transfer_data` allocates pixel buffers on `cpu`. @@ -1438,6 +1509,33 @@ fn drain_into_pending( } } +/// Conservative upper-bound estimate of the bytes +/// `av_hwframe_transfer_data` will allocate when downloading `hw_buf` to +/// a CPU frame. Used by [`drain_into_pending`] as a pre-transfer guard +/// so a candidate replay can refuse a frame whose footprint would +/// exceed the byte budget *without* first paying the allocation. The +/// estimate is `width * height * WORST_CASE_BYTES_PER_PIXEL` — see that +/// constant for why we err on the high side. +/// +/// Returns `None` when the frame's `width` or `height` are not strictly +/// positive (caller treats as candidate failure — a HW frame with +/// non-positive dimensions cannot be transferred meaningfully). +fn estimate_transfer_bytes(hw_buf: &frame::Video) -> Option { + // SAFETY: AVFrame.width / height are c_int reads. + let (w, h) = unsafe { + let raw = hw_buf.as_ptr(); + ((*raw).width, (*raw).height) + }; + if w <= 0 || h <= 0 { + return None; + } + Some( + (w as usize) + .saturating_mul(h as usize) + .saturating_mul(WORST_CASE_BYTES_PER_PIXEL), + ) +} + /// Approximate resident size of a CPU frame: sum of `linesize[plane] * /// plane_height` across populated planes. /// @@ -1775,6 +1873,77 @@ mod tests { assert_eq!(cpu_frame_bytes(&f), Some(0)); } + /// `estimate_transfer_bytes` is the pre-transfer size guard for + /// `drain_into_pending`: it must compute `width * height * + /// WORST_CASE_BYTES_PER_PIXEL` so the candidate replay can refuse a + /// frame *before* `av_hwframe_transfer_data` allocates. + #[test] + fn estimate_transfer_bytes_uses_worst_case_per_pixel() { + let mut f = frame::Video::empty(); + // SAFETY: f is freshly allocated; we set width/height directly. + unsafe { + let raw = f.as_mut_ptr(); + (*raw).width = 1920; + (*raw).height = 1080; + } + assert_eq!( + estimate_transfer_bytes(&f), + Some(1920 * 1080 * WORST_CASE_BYTES_PER_PIXEL), + ); + } + + /// Non-positive dimensions surface as `None` so `drain_into_pending` + /// fails the candidate before allocating anything. A zero-width or + /// zero-height frame would silently yield a 0-byte estimate under the + /// raw multiplication, letting the cap check pass and exposing the + /// allocation path to whatever the actual transfer would do. + #[test] + fn estimate_transfer_bytes_rejects_non_positive_dimensions() { + let mut f = frame::Video::empty(); + unsafe { + let raw = f.as_mut_ptr(); + (*raw).width = 0; + (*raw).height = 1080; + } + assert!(estimate_transfer_bytes(&f).is_none()); + + unsafe { + (*f.as_mut_ptr()).width = 1920; + (*f.as_mut_ptr()).height = -1; + } + assert!(estimate_transfer_bytes(&f).is_none()); + } + + /// 8K HDR P010 has actual ~96 MiB resident size; the estimate should + /// over-charge it (the right side to err on for a memory cap) while + /// still fitting within the configurable + /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`] cap (256 MiB) for a single + /// frame so a default-configured decoder is not forced to reject 8K + /// streams outright. + #[test] + fn estimate_transfer_bytes_8k_fits_default_cap() { + let mut f = frame::Video::empty(); + unsafe { + let raw = f.as_mut_ptr(); + (*raw).width = 7680; + (*raw).height = 4320; + } + let estimate = estimate_transfer_bytes(&f).expect("8K is sizable"); + // ~256 MiB exactly — at-or-just-under the default cap. + assert!( + estimate <= DEFAULT_MAX_PROBE_PENDING_BYTES, + "8K estimate {estimate} must fit DEFAULT_MAX_PROBE_PENDING_BYTES \ + {DEFAULT_MAX_PROBE_PENDING_BYTES}; otherwise the default cap rejects \ + even a single 8K frame at probe time" + ); + // And strictly larger than a typical 8K P010 (~96 MiB) so the guard + // is actually conservative, not under-charging. + assert!( + estimate > 96 * 1024 * 1024, + "estimate must over-charge real 8K P010 to bound the worst case; got {estimate}" + ); + } + /// `PartialBuildState`'s `Drop` must be a no-op when both pointers are /// null — the disarmed-by-`into_owned` post-state. A panic / double-free /// here would break the success path of every `build_state` call.