diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index bd7a668..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,7 +0,0 @@ -# UNRELEASED - -# 0.1.2 (January 6th, 2022) - -FEATURES - - diff --git a/Cargo.toml b/Cargo.toml index ff7fe91..9a3b19a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,35 +1,37 @@ [package] -name = "template-rs" -version = "0.0.0" +name = "hwdecode" +version = "0.1.0" edition = "2021" -repository = "https://github.com/al8n/template-rs" -homepage = "https://github.com/al8n/template-rs" -documentation = "https://docs.rs/template-rs" -description = "A template for creating Rust open-source repo on GitHub" +rust-version = "1.95" +description = "Cross-platform hardware-only video decoder built on top of ffmpeg-next, with auto-probe across HW backends. Callers handle software fallback." +repository = "https://github.com/findit-ai/hwdecode" +homepage = "https://github.com/findit-ai/hwdecode" +documentation = "https://docs.rs/hwdecode" license = "MIT OR Apache-2.0" -rust-version = "1.73" - -[[bench]] -path = "benches/foo.rs" -name = "foo" -harness = false - -[features] -default = ["std"] -alloc = [] -std = [] [dependencies] +ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] } +thiserror = "2" +tracing = "0.1" +libc = "0.2" [dev-dependencies] criterion = "0.8" -tempfile = "3" + +[[example]] +name = "decode" +path = "examples/decode.rs" + +[[bench]] +name = "decode" +path = "benches/decode.rs" +harness = false [profile.bench] opt-level = 3 debug = false codegen-units = 1 -lto = 'thin' +lto = "thin" incremental = false debug-assertions = false overflow-checks = false @@ -41,8 +43,6 @@ rustdoc-args = ["--cfg", "docsrs"] [lints.rust] rust_2018_idioms = "warn" -single_use_lifetimes = "warn" unexpected_cfgs = { level = "warn", check-cfg = [ - 'cfg(all_tests)', 'cfg(tarpaulin)', ] } diff --git a/README-zh_CN.md b/README-zh_CN.md deleted file mode 100644 index 7a07f4d..0000000 --- a/README-zh_CN.md +++ /dev/null @@ -1,51 +0,0 @@ -
-

template-rs

-
-
- -开源Rust代码库GitHub模版 - -[github][Github-url] -LoC -[Build][CI-url] -[codecov][codecov-url] - -[docs.rs][doc-url] -[crates.io][crates-url] -[crates.io][crates-url] -license - -[English][en-url] | 简体中文 - -
- -## Installation - -```toml -[dependencies] -template_rs = "0.1" -``` - -## Features - -- [x] 更快的创建GitHub开源Rust代码库 - -#### License - -`Template-rs` is under the terms of both the MIT license and the -Apache License (Version 2.0). - -See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details. - -Copyright (c) 2021 Al Liu. - -[Github-url]: https://github.com/al8n/template-rs/ -[CI-url]: https://github.com/al8n/template/actions/workflows/template.yml -[doc-url]: https://docs.rs/template-rs -[crates-url]: https://crates.io/crates/template-rs -[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/ -[license-url]: https://opensource.org/licenses/Apache-2.0 -[rustc-url]: https://github.com/rust-lang/rust/blob/master/RELEASES.md -[license-apache-url]: https://opensource.org/licenses/Apache-2.0 -[license-mit-url]: https://opensource.org/licenses/MIT -[en-url]: https://github.com/al8n/template-rs/tree/main/README.md diff --git a/README.md b/README.md index 1af27e2..3da5fba 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,115 @@ -
-

template-rs

-
-
+# hwdecode -A template for creating Rust open-source GitHub repo. +Cross-platform hardware-accelerated video decoder for Rust, built on top of +[`ffmpeg-next`](https://crates.io/crates/ffmpeg-next). -[github][Github-url] -LoC -[Build][CI-url] -[codecov][codecov-url] +`VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of +`ffmpeg::decoder::Video` and auto-probes the host's hardware backends. +This crate is **hardware-only** — there is no software fallback inside it. +If no hardware backend can decode the stream, `Error::AllBackendsFailed` +surfaces from `VideoDecoder::open` (when no backend opens) or from +`receive_frame` / `send_packet` / `send_eof` (when the initially-opened +backend fails at decode time and every remaining backend in the probe order +also fails — the only way it surfaces on single-backend platforms like macOS). +The caller decides how to fall back (typically by opening an +`ffmpeg::decoder::Video` directly). Output frames are CPU-side, downloaded +with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit). Pixel- +format conversion is intentionally out of scope; safe per-row access is via +`Frame::row` / `Frame::rows` (clipped to visible byte width — never includes +FFmpeg's per-row alignment padding). -[docs.rs][doc-url] -[crates.io][crates-url] -[crates.io][crates-url] -license +## Backends -English | [简体中文][zh-cn-url] +| Target | Probe order (HW only) | +| ------------------- | --------------------------------- | +| macOS / iOS / tvOS | VideoToolbox | +| Linux | VAAPI → CUDA | +| Windows | D3D11VA → CUDA | +| other | (none) | -
+If `open` returns `Error::AllBackendsFailed`, software fallback is the +caller's responsibility (this crate intentionally does not include one). -## Installation +## Usage -```toml -[dependencies] -template_rs = "0.1" +```rust,no_run +use ffmpeg_next as ffmpeg; +use ffmpeg::{format, media}; +use hwdecode::{Frame, VideoDecoder}; + +ffmpeg::init()?; + +let mut input = format::input(path)?; +let stream = input.streams().best(media::Type::Video).unwrap(); +let stream_index = stream.index(); + +// HW-only open. On AllBackendsFailed, fall back to software yourself. +let mut decoder = match VideoDecoder::open(stream.parameters()) { + Ok(d) => d, + Err(hwdecode::Error::AllBackendsFailed { .. }) => { + // Caller-side software fallback. + let _sw = ffmpeg::codec::Context::from_parameters(stream.parameters())? + .decoder() + .video()?; + // ... drive _sw with send_packet / receive_frame yourself ... + return Ok(()); + } + Err(e) => return Err(e.into()), +}; +println!("backend = {:?}", decoder.backend()); + +let mut frame = Frame::empty()?; +for (s, packet) in input.packets() { + if s.index() != stream_index { continue; } + decoder.send_packet(&packet)?; + while decoder.receive_frame(&mut frame).is_ok() { + // frame.pix_fmt() is the integer constant — match against + // hwdecode::pix_fmt::{NV12, P010LE, ...} and dispatch to your + // pixel-format pipeline (e.g. `colconv`). + // ... do something with frame ... + } +} +decoder.send_eof()?; +while decoder.receive_frame(&mut frame).is_ok() { + // ... drain ... +} ``` -## Features -- [x] Create a Rust open-source repo fast +To force a specific hardware backend (no probe, no fallback): + +```rust +use hwdecode::{Backend, VideoDecoder}; +let decoder = VideoDecoder::open_with(parameters, Backend::VideoToolbox)?; +``` + +`hwdecode` is hardware-only: there is no `Backend::Software`. If `open` +returns `Error::AllBackendsFailed`, fall back to a software decoder +yourself (typically `ffmpeg::decoder::Video`). + +## Running tests and benches + +The integration test and benchmark expect a real video file. Set +`HWDECODE_SAMPLE_VIDEO` to enable them: + +```sh +HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test +HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored +HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench +``` -#### License +Without the env var the integration test skips with a notice; unit tests run +unconditionally. -`template-rs` is under the terms of both the MIT license and the -Apache License (Version 2.0). +## Build requirements -See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details. +- A system FFmpeg ≥ **5.1** linkable via `pkg-config` (we reference + `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`, which were added in 5.1). + Tested against 8.1. Verify with + `ffmpeg -hwaccels` that your build has the backends you expect compiled in + (e.g. `videotoolbox` on macOS, `vaapi` / `cuda` on Linux, + `d3d11va` / `cuda` on Windows). +- Rust ≥ 1.95. -Copyright (c) 2021 Al Liu. +## License -[Github-url]: https://github.com/al8n/template-rs/ -[CI-url]: https://github.com/al8n/template-rs/actions/workflows/ci.yml -[doc-url]: https://docs.rs/template-rs -[crates-url]: https://crates.io/crates/template-rs -[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/ -[zh-cn-url]: https://github.com/al8n/template-rs/tree/main/README-zh_CN.md +MIT or Apache-2.0, at your option. diff --git a/benches/decode.rs b/benches/decode.rs new file mode 100644 index 0000000..9e53f0a --- /dev/null +++ b/benches/decode.rs @@ -0,0 +1,173 @@ +//! Benchmark comparing software-only decode (via `ffmpeg-next` directly, +//! since `hwdecode` is hardware-only) against `hwdecode`'s auto-probed +//! hardware backend on the same input file. +//! +//! Set `HWDECODE_SAMPLE_VIDEO` to a video file path. The hardware bench is +//! skipped (with a notice) when no hardware backend is available on the host. +//! +//! ```sh +//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench +//! ``` + +use std::{path::PathBuf, time::Duration}; + +use criterion::{criterion_group, criterion_main, Criterion}; +use ffmpeg::{codec::Context as CodecContext, format, frame, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::{Frame, VideoDecoder}; + +const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; + +fn sample_path() -> Option { + std::env::var_os(SAMPLE_ENV).map(PathBuf::from) +} + +/// Decode every frame using `hwdecode`'s auto-probed hardware backend. +fn decode_all_hw(path: &PathBuf) -> Result { + let mut input = format::input(path).map_err(hwdecode::Error::Ffmpeg)?; + let stream = input + .streams() + .best(media::Type::Video) + .ok_or(hwdecode::Error::Ffmpeg(ffmpeg::Error::StreamNotFound))?; + let stream_index = stream.index(); + + let mut decoder = VideoDecoder::open(stream.parameters())?; + let mut frame = Frame::empty()?; + let mut count = 0_usize; + + let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> { + loop { + match decoder.receive_frame(&mut frame) { + Ok(()) => *count += 1, + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + return Ok(()); + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => return Ok(()), + Err(e) => return Err(e), + } + } + }; + + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet)?; + drain(&mut decoder, &mut count)?; + } + decoder.send_eof()?; + drain(&mut decoder, &mut count)?; + Ok(count) +} + +/// Decode every frame using a plain software `ffmpeg-next` decoder. Used as +/// the SW baseline since `hwdecode` no longer exposes a software backend. +fn decode_all_sw(path: &PathBuf) -> Result { + let mut input = format::input(path)?; + let stream = input + .streams() + .best(media::Type::Video) + .ok_or(ffmpeg::Error::StreamNotFound)?; + let stream_index = stream.index(); + let mut decoder = CodecContext::from_parameters(stream.parameters())? + .decoder() + .video()?; + + let mut frame = frame::Video::empty(); + let mut count = 0_usize; + + let mut drain = + |decoder: &mut ffmpeg::decoder::Video, count: &mut usize| -> Result<(), ffmpeg::Error> { + loop { + match decoder.receive_frame(&mut frame) { + Ok(()) => *count += 1, + Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::error::EAGAIN => return Ok(()), + Err(ffmpeg::Error::Eof) => return Ok(()), + Err(e) => return Err(e), + } + } + }; + + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet)?; + drain(&mut decoder, &mut count)?; + } + decoder.send_eof()?; + drain(&mut decoder, &mut count)?; + Ok(count) +} + +fn bench_decode(c: &mut Criterion) { + ffmpeg::init().expect("ffmpeg init"); + + let Some(path) = sample_path() else { + eprintln!("skipping benches: set {SAMPLE_ENV} to a video file path"); + return; + }; + + // Probe by decoding one frame so the probe collapses to the backend that + // actually produced output. None means no HW backend is available — we + // skip the HW arm and bench SW only. + let probed_backend = { + let mut input = format::input(&path).expect("open input"); + let stream = input + .streams() + .best(media::Type::Video) + .expect("video stream"); + let stream_index = stream.index(); + match VideoDecoder::open(stream.parameters()) { + Ok(mut dec) => { + let mut frame = Frame::empty().expect("alloc probe frame"); + 'probe: for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + dec.send_packet(&packet).expect("probe send_packet"); + match dec.receive_frame(&mut frame) { + Ok(()) => break 'probe, + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + continue; + } + Err(e) => panic!("probe receive_frame: {e}"), + } + } + Some(dec.backend()) + } + Err(hwdecode::Error::AllBackendsFailed { .. }) => None, + Err(e) => panic!("hwdecode probe: {e}"), + } + }; + match probed_backend { + Some(b) => eprintln!("auto-probe settled on backend: {b:?}"), + None => eprintln!("no hardware backend available — hardware bench will be skipped"), + } + + let mut group = c.benchmark_group("decode"); + group.measurement_time(Duration::from_secs(15)); + group.sample_size(20); + + group.bench_function("software", |b| { + b.iter(|| decode_all_sw(&path).expect("software decode")) + }); + + if probed_backend.is_some() { + group.bench_function("hardware", |b| { + b.iter(|| { + let n = decode_all_hw(&path).expect("hardware decode"); + std::hint::black_box(n); + }) + }); + } + + group.finish(); +} + +criterion_group!(benches, bench_decode); +criterion_main!(benches); diff --git a/benches/foo.rs b/benches/foo.rs deleted file mode 100644 index f328e4d..0000000 --- a/benches/foo.rs +++ /dev/null @@ -1 +0,0 @@ -fn main() {} diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..521dd49 --- /dev/null +++ b/docs/design.md @@ -0,0 +1,120 @@ +# hwdecode — design + +Cross-platform **hardware-only** video decoder built on top of `ffmpeg-next` 8.1. + +> **Status note.** This document was the original spec from the brainstorm +> phase and parts have evolved since: the crate is hardware-only (no +> `Backend::Software`), `Frame` is its own safe wrapper, and several pixel- +> format / safety details were tightened during review. For the canonical +> behavior, read `src/lib.rs` and `README.md`. Sections below have been +> trimmed where they conflicted; the spec is otherwise preserved as +> historical context. + +## Goals + +- Drop-in replacement for `ffmpeg::decoder::Video` at the call site (`send_packet` / `receive_frame` / `send_eof` / `flush`). +- Auto-probe the platform's hardware backends. **No software fallback inside this crate** — callers handle that themselves (e.g. via `ffmpeg::decoder::Video`) when `open` returns `Error::AllBackendsFailed`. +- Hand back native-format CPU frames (NV12/P010 from the HW path post-transfer). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`). +- Cross-platform: macOS / iOS / iPadOS / tvOS, Linux (Intel/AMD/NVIDIA), Windows (any GPU + CUDA on NVIDIA). + +## Non-goals + +- Audio hardware decoding. Out of scope; software AAC/Opus/etc. is fast enough that the complexity isn't justified. +- Demuxing. Callers open files/streams themselves (e.g. via `findit-demuxer`) and feed packets in. +- Pixel-format conversion. Done downstream (`colconv`). +- Encoding. + +## Public API + +> The original spec listed an inline API surface here. It diverged from the +> shipping crate (`Backend::Software` was removed; `format() -> Pixel` was +> removed in favor of `Frame::pix_fmt() -> i32`; the `Frame` wrapper +> replaced `frame::Video`; `Error` gained / dropped variants). Rather than +> keep stale signatures here, the canonical reference is `src/lib.rs` and +> the public docs on each item. See the README for a runnable usage +> example. + +## Behavior + +### Probe order + +| Target | Order tried (HW only) | +| ------------------- | -------------------------------------------- | +| macOS, iOS, tvOS | `[VideoToolbox]` | +| Linux | `[Vaapi, Cuda]` | +| Windows | `[D3d11va, Cuda]` | +| Other | `[]` → `Error::AllBackendsFailed` | + +A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next. If every backend fails (or the platform has none), `open` returns `Error::AllBackendsFailed`; software fallback is the caller's responsibility. + +### Device selection + +Always device 0 / system default (`av_hwdevice_ctx_create(.., NULL, ..)`). No env var, no config knob in v1. Add later if the multi-GPU use case appears. + +### `get_format` callback + +A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState`. The callback walks the offered `pix_fmts` list as raw `i32` (avoiding bindgen-enum UB on header skew), returns `wanted` if present, else `AV_PIX_FMT_NONE` (which causes the decoder to fail; the caller-side probe loop then tears down and tries the next hardware backend). + +### Frame transfer + +`receive_frame` always: + +1. Reads from the codec into an internal `hw_frame: ffmpeg::frame::Video` (allocated once, reused). +2. If the frame's format is the HW pix fmt, calls `av_hwframe_transfer_data(out, hw_frame, 0)` into the caller's `&mut frame`. Copies `pts`, `pkt_dts`, `time_base`, `duration` (FFmpeg does not transfer timing). +3. Otherwise (SW path or decoder fell back mid-stream), clones the frame into the caller's slot. + +### Threading + +`VideoDecoder: Send + !Sync`. Each instance owns its own `AVCodecContext` and `AVBufferRef*`. Multiple decoders can run on different threads; a single decoder is not concurrent. + +### Drop + +`Drop` calls `av_buffer_unref(&mut self.hw_device_ref)` if non-null, frees the boxed `CallbackState`, then lets `ffmpeg::decoder::Video`'s own Drop free the codec context. + +## Internals + +```text +src/ +├── lib.rs // re-exports + crate-level docs +├── error.rs // Error enum +├── backend.rs // Backend enum, probe order, AVHWDeviceType <-> Backend mapping +├── decoder.rs // VideoDecoder, open/open_with, send/receive +└── ffi.rs // get_format callback, av_hwdevice_ctx_create / transfer wrappers, + // avcodec_get_hw_config probe +``` + +No other modules. Keep the surface small. + +## Build & dependencies + +- `ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] }` +- `thiserror = "2"` +- `tracing = "0.1"` +- `libc = "0.2"` + +No platform-specific Cargo features. `cfg!(target_os = ...)` selects which `AVHWDeviceType` constants we even attempt — the FFI symbols are linked unconditionally via `ffmpeg-sys-next`. + +System FFmpeg ≥ **5.1** (we reference `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`, +added upstream in 5.1). Verified against the macOS Homebrew build (FFmpeg 8.1, +VideoToolbox enabled). + +## Testing + +1. **Unit tests** (`src/backend.rs`, `src/error.rs`) — pure-Rust: probe-order construction per platform, `Backend` ↔ `AVHWDeviceType` mapping, error formatting. +2. **Integration** (`tests/decode.rs`) — opens a sample H.264 file via `ffmpeg::format::input`, decodes 30 frames through `VideoDecoder::open` (auto-probe), asserts frame count and dimensions. Sample path comes from env var `HWDECODE_SAMPLE_VIDEO`; test is skipped with a clear `eprintln!` if unset. +3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, asserts `decoder.backend()` returns one of the hardware variants (the enum no longer has a Software variant; this is a sanity check against accidental no-op selection). CI runs this on platform-matched runners. + +Sample-file env var keeps the repo binary-free. Documented in `README.md`. + +## Benchmark + +`benches/decode.rs` (criterion) — two functions: + +- `bench_software_decode` — drives `ffmpeg::decoder::Video` directly (this crate has no software backend), decodes all frames, measures wall-clock per frame. +- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped if `open` returns `AllBackendsFailed` (no HW backend available on this host). + +Both use the same `HWDECODE_SAMPLE_VIDEO` file. Bench prints which backend the HW run actually used, so results are interpretable across machines. + +## Examples + +`examples/decode.rs` — opens a path from `argv[1]` with `ffmpeg::format::input`, finds the best video stream, feeds packets through `VideoDecoder`, prints `(pts, width, height, format, backend)` for each frame. diff --git a/examples/decode.rs b/examples/decode.rs new file mode 100644 index 0000000..1d14de1 --- /dev/null +++ b/examples/decode.rs @@ -0,0 +1,90 @@ +//! Decode every video frame in `argv[1]`, printing one line per frame. +//! +//! ```sh +//! cargo run --release --example decode -- /path/to/video.mp4 +//! ``` + +use ffmpeg::{format, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::{Frame, VideoDecoder}; + +fn main() -> Result<(), Box> { + let path = std::env::args() + .nth(1) + .ok_or("usage: decode ")?; + + ffmpeg::init()?; + + let mut input = format::input(&path)?; + let stream = input + .streams() + .best(media::Type::Video) + .ok_or("no video stream")?; + let stream_index = stream.index(); + + let mut decoder = match VideoDecoder::open(stream.parameters()) { + Ok(d) => d, + Err(hwdecode::Error::AllBackendsFailed { attempts }) => { + eprintln!( + "no hardware backend available; tried {} backend(s):", + attempts.len() + ); + for (b, e) in &attempts { + eprintln!(" {b:?}: {e}"); + } + eprintln!("(callers handle software fallback themselves — see ffmpeg::decoder::Video)"); + return Ok(()); + } + Err(e) => return Err(e.into()), + }; + println!( + "open: backend={:?} {}x{}", + decoder.backend(), + decoder.width(), + decoder.height(), + ); + + let mut frame = Frame::empty()?; + let mut count: u64 = 0; + + let drain = |decoder: &mut VideoDecoder, frame: &mut Frame, count: &mut u64| loop { + match decoder.receive_frame(frame) { + Ok(()) => { + *count += 1; + println!( + "frame#{count} pts={:?} {}x{} pix_fmt={}", + frame.pts(), + frame.width(), + frame.height(), + frame.pix_fmt(), + ); + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + break + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => break, + Err(e) => { + eprintln!("decode error: {e}"); + break; + } + } + }; + + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet)?; + drain(&mut decoder, &mut frame, &mut count); + } + decoder.send_eof()?; + drain(&mut decoder, &mut frame, &mut count); + + println!( + "decoded {count} frames; final backend={:?}", + decoder.backend() + ); + Ok(()) +} diff --git a/examples/foo.rs b/examples/foo.rs deleted file mode 100644 index f328e4d..0000000 --- a/examples/foo.rs +++ /dev/null @@ -1 +0,0 @@ -fn main() {} diff --git a/src/backend.rs b/src/backend.rs new file mode 100644 index 0000000..00cf82e --- /dev/null +++ b/src/backend.rs @@ -0,0 +1,118 @@ +use ffmpeg_next::ffi::{AVHWDeviceType, AVPixelFormat}; + +/// Hardware decoding backend. +/// +/// `hwdecode` only manages **hardware** decoders — software fallback is +/// out of scope. If no backend in [`probe_order`] for the current platform +/// can decode a stream, [`crate::VideoDecoder::open`] returns +/// [`crate::Error::AllBackendsFailed`] and the caller decides how to fall +/// back (e.g. by opening an `ffmpeg::decoder::Video` directly). +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Backend { + /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS, visionOS). + VideoToolbox, + /// Linux Video Acceleration API (Intel / AMD GPUs). + Vaapi, + /// NVIDIA NVDEC via CUDA (Linux / Windows on NVIDIA hardware). + Cuda, + /// Microsoft Direct3D 11 Video Acceleration (Windows). + D3d11va, +} + +impl Backend { + /// `AVHWDeviceType` corresponding to this backend. + pub(crate) fn av_hwdevice_type(self) -> AVHWDeviceType { + match self { + Self::VideoToolbox => AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX, + Self::Vaapi => AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI, + Self::Cuda => AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA, + Self::D3d11va => AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA, + } + } + + /// Hardware pixel format the codec is expected to produce when this + /// backend is in use. (The post-`av_hwframe_transfer_data` CPU format is + /// typically `NV12` or `P010LE`; this is the *pre-transfer* sentinel.) + /// + /// Returns a `AVPixelFormat` value constructed from a hardcoded constant + /// in our bindings — never reads an enum value supplied by FFmpeg, so + /// no enum-discriminant UB risk. + pub(crate) fn hw_pixel_format(self) -> AVPixelFormat { + match self { + Self::VideoToolbox => AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX, + Self::Vaapi => AVPixelFormat::AV_PIX_FMT_VAAPI, + Self::Cuda => AVPixelFormat::AV_PIX_FMT_CUDA, + Self::D3d11va => AVPixelFormat::AV_PIX_FMT_D3D11, + } + } +} + +/// Probe order for `VideoDecoder::open` on the current target. Hardware +/// backends only, in preference order. Empty for platforms with no known +/// HW backend; on those `open()` returns `AllBackendsFailed` immediately. +pub(crate) fn probe_order() -> &'static [Backend] { + #[cfg(any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + ))] + { + &[Backend::VideoToolbox] + } + #[cfg(target_os = "linux")] + { + &[Backend::Vaapi, Backend::Cuda] + } + #[cfg(target_os = "windows")] + { + &[Backend::D3d11va, Backend::Cuda] + } + #[cfg(not(any( + target_os = "macos", + target_os = "ios", + target_os = "tvos", + target_os = "visionos", + target_os = "linux", + target_os = "windows", + )))] + { + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn all_backends_have_hwdevice_type_and_pix_fmt() { + for b in [ + Backend::VideoToolbox, + Backend::Vaapi, + Backend::Cuda, + Backend::D3d11va, + ] { + let _ = b.av_hwdevice_type(); + let _ = b.hw_pixel_format(); + } + } + + #[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))] + #[test] + fn apple_probe_order() { + assert_eq!(probe_order(), &[Backend::VideoToolbox]); + } + + #[cfg(target_os = "linux")] + #[test] + fn linux_probe_order() { + assert_eq!(probe_order(), &[Backend::Vaapi, Backend::Cuda]); + } + + #[cfg(target_os = "windows")] + #[test] + fn windows_probe_order() { + assert_eq!(probe_order(), &[Backend::D3d11va, Backend::Cuda]); + } +} diff --git a/src/decoder.rs b/src/decoder.rs new file mode 100644 index 0000000..a241b68 --- /dev/null +++ b/src/decoder.rs @@ -0,0 +1,2084 @@ +use std::{collections::VecDeque, mem::ManuallyDrop, ptr}; + +use ffmpeg_next::{ + codec::{ + self, + packet::{Mut as PacketMut, Ref as PacketRef}, + Context, + }, + ffi::{ + av_buffer_ref, av_buffer_unref, av_frame_move_ref, av_frame_unref, av_hwdevice_ctx_create, + av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3, avcodec_free_context, + avcodec_parameters_alloc, avcodec_parameters_copy, avcodec_parameters_free, + avcodec_parameters_to_context, AVBufferRef, AVCodec, AVFrame, AVMediaType, + }, + frame, Codec, Packet, Rational, +}; + +/// Local FFI shim: `avcodec_find_decoder` declared with `c_int` instead of +/// the bindgen `AVCodecID` enum. Constructing `AVCodecID` from a runtime +/// integer that isn't in our build's discriminant set is UB; calling the +/// C function with a raw int avoids that boundary entirely. Both Rust +/// declarations resolve to the same C symbol at link time. +mod c_shims { + use super::AVCodec; + use libc::c_int; + extern "C" { + pub fn avcodec_find_decoder(id: c_int) -> *const AVCodec; + } +} + +use crate::{ + backend::{self, Backend}, + error::{Error, Result}, + ffi::{codec_supports_hwaccel, get_hw_format, CallbackState}, + frame::Frame, +}; + +/// Hardware-accelerated video decoder. +/// +/// Hardware-only — there is no software fallback inside this crate. If +/// every hardware backend in the platform's probe order fails to open, +/// `open` returns [`Error::AllBackendsFailed`] and the caller is +/// responsible for falling back to a software decoder of their choice +/// (e.g. `ffmpeg::decoder::Video`). +/// +/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface. +/// Decoded frames are returned through [`crate::Frame`], a CPU-side wrapper +/// whose accessors avoid the `AVPixelFormat`-enum UB that an unvalidated read +/// of FFmpeg's raw integer pixel formats can trigger. +/// +/// `open` does a true probe: each backend opens with a strict `get_format` +/// callback. On the first non-transient error from a backend the decoder is +/// torn down and the next backend in probe order is tried, with all packets +/// seen so far replayed through it. The advance is *transactional* — the +/// candidate backend must successfully build and accept the replayed packets +/// before any probe state is consumed, so a failing backend in the middle of +/// the order does not strand the caller without history. Once the first frame +/// is delivered the probe collapses and subsequent calls go straight to the +/// active backend. +pub struct VideoDecoder { + /// Live FFmpeg state for the currently active backend. + state: DecoderState, + /// Reusable frame buffer used for hw-side decoding before transfer / move. + /// Internal use only — never handed to callers. + hw_frame: frame::Video, + /// Probe state: present until the first frame is received from the active + /// backend, then `None`. While `Some`, packets are buffered for replay and + /// non-transient errors / decoder failures advance to the next backend. + probe: Option, + /// CPU-side frames produced by a candidate decoder during probe replay + /// (when its internal queue filled and we had to drain output before the + /// next `send_packet`). Already transferred from the candidate's + /// `AVHWFramesContext` to a CPU frame, so they remain valid after the + /// candidate state is committed. [`Self::receive_frame`] dequeues these + /// FIFO before reading from `state.inner`. + pending_frames: VecDeque, + /// Per-decoder byte budget for [`Self::pending_frames`] during probe + /// replay. Defaults to [`DEFAULT_MAX_PROBE_PENDING_BYTES`]; override via + /// [`Self::with_max_probe_pending_bytes`]. + max_probe_pending_bytes: usize, +} + +/// Owned FFmpeg state for one open codec context. Has its own `Drop` so we +/// can swap it out cleanly during a probe advance via `mem::replace`. +struct DecoderState { + /// Wrapped FFmpeg decoder. `ManuallyDrop` so we can sequence its drop + /// before freeing the callback state. + inner: ManuallyDrop, + /// Backend driving this state. + backend: Backend, + /// Owned reference produced by `av_hwdevice_ctx_create`. + hw_device_ref: *mut AVBufferRef, + /// Owned `Box` raw pointer; `AVCodecContext::opaque` + /// aliases it. + callback_state: *mut CallbackState, +} + +/// Maximum number of packets we are willing to buffer for probe replay +/// before abandoning the fallback safety net. Set high enough to absorb +/// long B-frame GOPs and codec setup latency, low enough to bound memory +/// against malicious / pathological streams that never produce a first +/// frame. +const MAX_PROBE_PACKETS: usize = 256; + +/// Maximum total compressed-byte size of buffered probe packets. Each +/// `Packet` clone holds a refcounted reference to the demuxer's bitstream +/// data — even though the clone itself is shallow, the underlying buffers +/// stay alive until we drop them. 64 MiB is generous for normal video and +/// gives untrusted media a hard ceiling. +const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024; + +/// Hard cap on the number of side-data entries we tolerate per buffered +/// packet. `av_packet_ref` allocates an `AVPacketSideData` descriptor and +/// an `AVBufferRef` per entry, so a packet stuffed with many tiny or +/// zero-sized entries can consume significant memory in descriptor / +/// allocator overhead even after [`packet_side_data_bytes`] charges +/// [`SIDE_DATA_ENTRY_OVERHEAD`] bytes per entry. Refusing to clone such +/// packets short-circuits the descriptor explosion path. +/// +/// Sized for legitimate streams (typical video packets carry 0-5 side- +/// data entries; SEI-heavy HEVC/AV1 maybe a dozen) while comfortably +/// rejecting weaponised input. +const MAX_PROBE_PACKET_SIDE_DATA_ENTRIES: usize = 64; + +/// Conservative per-side-data-entry overhead estimate used by both +/// [`packet_side_data_bytes`] and the budget accounting in +/// [`VideoDecoder::send_packet`]. Counts the `AVPacketSideData` +/// descriptor (24 bytes per the FFmpeg 8.x bindings), the `AVBufferRef` +/// FFmpeg allocates per entry, and a margin for malloc bookkeeping +/// (header bytes, alignment slack). Setting it on the high side keeps +/// the byte cap a true upper bound on retained memory; under-charging +/// would let many tiny entries slip past the cap. +const SIDE_DATA_ENTRY_OVERHEAD: usize = 80; + +/// Conservative upper-bound bytes-per-pixel multiplier used to estimate +/// the size of a CPU frame **before** `av_hwframe_transfer_data` +/// allocates its pixel buffers. Covers every HW download format this +/// crate produces (worst case is `P416LE` / `P412LE` at 6 bytes/pixel +/// for 16-bit 4:4:4 semi-planar) plus a margin for FFmpeg's per-row +/// stride alignment (typically 32-byte aligned, ~5% extra at HD widths +/// and below). +/// +/// Used by [`drain_into_pending`] as a pre-transfer guard: if the +/// product `width * height * WORST_CASE_BYTES_PER_PIXEL` would already +/// push `pending_bytes` past `max_probe_pending_bytes`, the candidate +/// replay refuses the frame *before* allocating. Without this, FFmpeg +/// would perform the full HW→CPU download (potentially ~100 MiB for +/// 8K HDR) and we would only reject the frame after RSS had already +/// spiked. The post-transfer accounting via [`cpu_frame_bytes`] stays in +/// place as a backstop using the frame's actual stride/format. +/// +/// Slightly over-charges true 4:2:0 NV12 / P010 frames (which dominate +/// real workloads) — that's the right side to err on. Callers feeding +/// 8K+ workloads through the probe path can tune +/// [`VideoDecoder::with_max_probe_pending_bytes`] upward to compensate. +const WORST_CASE_BYTES_PER_PIXEL: usize = 8; + +/// Maximum number of CPU frames we are willing to queue from a candidate +/// during probe replay. Each frame is a fully-allocated CPU buffer +/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so +/// an unbounded queue would OOM on a candidate with a shallow internal +/// queue against a deep replay history. This cap, together with +/// [`DEFAULT_MAX_PROBE_PENDING_BYTES`], is enforced as a hard limit during +/// replay: once either limit is reached, probe buffering fails for the +/// candidate (returns `ENOMEM` from `drain_into_pending`) instead of +/// queueing additional drained frames. The probe loop then advances to +/// the next backend or returns `Error::AllBackendsFailed` if exhausted. +const MAX_PROBE_PENDING_FRAMES: usize = 16; + +/// Default byte budget for probe-replay drained frames. 256 MiB is enough +/// for 16 frames at 4K P010 (~24 MiB each = 384 MiB worst case under the +/// count cap), and is the cap that fires first for very high-resolution +/// content (8K P010: ~96 MiB per frame → only ~2 frames fit). +/// +/// Override per-decoder with [`VideoDecoder::with_max_probe_pending_bytes`] +/// when targeting 8K+ workloads or memory-constrained environments. +/// +/// TODO: when frames significantly exceed typical sizes, consider +/// memmap-backed pending buffers (write transferred frames to a temp file +/// or shared-memory segment) so the resident set stays bounded even when +/// the byte cap is raised. Out of scope for v0.0.0. +pub const DEFAULT_MAX_PROBE_PENDING_BYTES: usize = 256 * 1024 * 1024; + +/// State carried only during the probe window (before the first successful +/// frame). Holds enough information to tear down the current decoder and +/// retry with the next backend. +struct ProbeState { + parameters: codec::Parameters, + codec: Codec, + /// Backends still to try, in order. Empty means "no more options after + /// the active one fails" — `advance_probe` then surfaces + /// [`Error::AllBackendsFailed`] so the contract is the same on + /// single-backend platforms (e.g. macOS) as on multi-backend ones. + remaining_backends: Vec, + /// Packets sent so far, kept for replay through any candidate backend. + /// Preserved across failed candidates — only cleared when the probe + /// collapses on a successful first frame, or when the probe is + /// abandoned due to the size caps. + buffered_packets: Vec, + /// Cumulative size (in compressed bytes) of `buffered_packets`. Tracked + /// incrementally so we don't have to re-sum on every send. + buffered_bytes: usize, + /// Whether `send_eof` has been called; replayed alongside packets. + eof_sent: bool, + /// Per-backend errors captured since the probe window opened. Pushed + /// whenever a backend's failure triggers `advance_probe` (the active + /// backend that just failed) or a candidate's build / replay rejects + /// it. Drained into [`Error::AllBackendsFailed`] when the probe + /// exhausts every option. + attempts: Vec<(Backend, Box)>, +} + +// SAFETY: All raw pointers are exclusively owned by `DecoderState` and never +// shared. `ffmpeg::decoder::Video` is itself `Send` (its `Context` carries an +// `unsafe impl Send`). The decoder is not safe for concurrent use, hence not +// `Sync`. +unsafe impl Send for DecoderState {} +unsafe impl Send for VideoDecoder {} + +impl Drop for DecoderState { + fn drop(&mut self) { + // Order matters: + // 1. Drop the codec context first. While it lives, FFmpeg may invoke + // `get_format`, which dereferences `callback_state` via `opaque`. + // 2. Free the callback state heap allocation. + // 3. Release our hw device reference (FFmpeg released its own when + // the codec context was freed in step 1). + unsafe { + ManuallyDrop::drop(&mut self.inner); + if !self.callback_state.is_null() { + drop(Box::from_raw(self.callback_state)); + self.callback_state = ptr::null_mut(); + } + if !self.hw_device_ref.is_null() { + av_buffer_unref(&mut self.hw_device_ref); + } + } + } +} + +impl VideoDecoder { + /// Auto-probe hardware backends in the platform's default order. + /// + /// Each backend opens with a strict `get_format` callback. The first + /// backend whose `avcodec_open2` succeeds becomes active; if its first + /// frame is unusable (decode error, transfer failure, or a CPU-format + /// frame from a HW context) the decoder is torn down and the next backend + /// is tried — packets sent so far are replayed through the new decoder + /// transparently. The probe advance is transactional: the next backend + /// must build *and* accept the replayed history before any probe state is + /// consumed, so a misbehaving middle backend cannot strand the caller. + /// + /// [`Self::backend`] reflects whichever backend ultimately produced the + /// first frame. + /// + /// [`Error::AllBackendsFailed`] surfaces in two places, with the same + /// meaning ("no hardware backend can decode this stream — fall back to + /// software yourself"): + /// - From `open` itself, when no backend even opens. + /// - From [`Self::send_packet`] / [`Self::send_eof`] / + /// [`Self::receive_frame`], when the initially-opened backend fails + /// at decode time and every remaining backend in the probe order + /// either also fails or doesn't exist. On single-backend platforms + /// (e.g. macOS, where the order is `[VideoToolbox]`), this is the + /// only place a HW-only failure surfaces. + /// + /// In both cases, `attempts` carries the per-backend error log so the + /// caller can decide how to proceed with software fallback. + pub fn open(parameters: codec::Parameters) -> Result { + let codec = find_decoder(¶meters)?; + let order = backend::probe_order(); + + let mut attempts: Vec<(Backend, Box)> = Vec::new(); + for (i, &backend) in order.iter().enumerate() { + // Use the checked clone — ffmpeg-next's `Parameters::clone` does + // `avcodec_parameters_alloc` without a null check and ignores the + // return of `avcodec_parameters_copy`. Under OOM that path silently + // produces a Parameters with a null inner pointer. + let cloned_for_build = match try_clone_parameters(¶meters) { + Ok(p) => p, + Err(e) => { + tracing::warn!(?backend, error = %e, "hwdecode: parameters clone failed"); + attempts.push((backend, Box::new(Error::Ffmpeg(e)))); + continue; + } + }; + match Self::build_state(cloned_for_build, codec, backend) { + Ok(state) => { + tracing::info!(?backend, "hwdecode: opened video decoder (probing)"); + let remaining = order[(i + 1)..].to_vec(); + // Deep-copy the caller's `parameters` before storing in ProbeState. + // `codec::Parameters` from `stream.parameters()` carries an Rc + // owner pointing at the demuxer; moving that Rc to a worker + // thread (when VideoDecoder is sent) would race with the demuxer's + // Rc on the original thread. The checked clone copies the bytes + // into a fresh allocation with `owner: None`, severing the link. + // + // We always create ProbeState — even when `remaining` is empty + // (single-backend platforms like macOS) — so that a first-frame + // failure on the only backend surfaces as + // `Error::AllBackendsFailed` from `receive_frame` / + // `send_packet` rather than as a raw FFmpeg error. That keeps + // the API contract the same regardless of how many HW backends + // the platform exposes. + // + // If the clone fails (ENOMEM), we keep the active `state` but + // skip probe setup — caller loses the transactional probe / + // fallback safety net but still gets a working decoder. + let probe = match try_clone_parameters(¶meters) { + Ok(probe_params) => Some(ProbeState { + parameters: probe_params, + codec, + remaining_backends: remaining, + buffered_packets: Vec::new(), + buffered_bytes: 0, + eof_sent: false, + attempts: Vec::new(), + }), + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: parameters clone failed for probe state; proceeding without fallback" + ); + None + } + }; + return Ok(Self { + state, + hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?, + probe, + pending_frames: VecDeque::new(), + max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES, + }); + } + Err(e) => { + tracing::warn!(?backend, error = %e, "hwdecode: backend open failed"); + attempts.push((backend, Box::new(e))); + } + } + } + Err(Error::AllBackendsFailed { attempts }) + } + + /// Open the decoder with a specific backend. No probe, no fallback. + /// + /// If `backend` cannot actually decode this stream, the failure surfaces + /// from [`Self::receive_frame`] (the strict `get_format` callback returns + /// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible + /// for retrying with another hardware backend or falling back to a + /// software decoder of their choice (e.g. `ffmpeg::decoder::Video`). + pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result { + let codec = find_decoder(¶meters)?; + let state = Self::build_state(parameters, codec, backend)?; + Ok(Self { + state, + hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?, + probe: None, + pending_frames: VecDeque::new(), + max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES, + }) + } + + /// Override the byte budget for probe-replay queued frames. Defaults to + /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`]. Use a higher value when targeting + /// 8K+ workloads where 16 frames at full size could exceed the default; + /// use a lower value in memory-constrained services to bound peak + /// allocation more tightly. + /// + /// Setting after the first frame has been delivered is harmless but has + /// no observable effect — the probe has already collapsed and the cap + /// only applies during replay drain. + /// + /// Returns `self` for builder-style chaining: + /// ```ignore + /// let decoder = VideoDecoder::open(params)? + /// .with_max_probe_pending_bytes(1024 * 1024 * 1024); // 1 GiB + /// ``` + pub fn with_max_probe_pending_bytes(mut self, bytes: usize) -> Self { + self.max_probe_pending_bytes = bytes; + self + } + + /// The backend currently producing frames. While the probe is still in + /// progress (no frame received yet) this returns the optimistically + /// selected backend; after the first frame, it is the backend that + /// actually produced it. Once stable, never changes again. + pub fn backend(&self) -> Backend { + self.state.backend + } + + /// Decoder width in pixels. + pub fn width(&self) -> u32 { + self.state.inner.width() + } + + /// Decoder height in pixels. + pub fn height(&self) -> u32 { + self.state.inner.height() + } + + /// Codec context time base. + pub fn time_base(&self) -> Rational { + self.state.inner.time_base() + } + + /// Frame rate from the codec context, if known. + pub fn frame_rate(&self) -> Option { + self.state.inner.frame_rate() + } + + /// Submit a packet to the decoder. + /// + /// On success — and only on success — the packet is buffered for potential + /// replay through a fallback backend while the probe is active. EAGAIN + /// (decoder needs `receive_frame` to drain output first) propagates as + /// normal backpressure; the caller drains then retries. + /// + /// While the probe is active, a non-transient error (e.g. the active HW + /// backend rejecting this stream's geometry on first packet) advances the + /// probe to the next candidate and retries the packet there. The caller + /// observes only the eventual success or, if the probe is exhausted, the + /// final error. + /// + /// If the probe window grows beyond [`MAX_PROBE_PACKETS`] or + /// [`MAX_PROBE_PACKET_BYTES`] without producing a first frame (a stream + /// the active backend is silently mishandling, or pathological input), + /// the probe is **abandoned**: replay history is dropped, queued frames + /// are cleared, and `self.probe = None`. The active backend continues + /// serving the caller without fallback. A `tracing::warn!` records this + /// so it is visible in production logs. + pub fn send_packet(&mut self, packet: &Packet) -> Result<()> { + loop { + match self.state.inner.send_packet(packet) { + Ok(()) => { + if let Some(probe) = self.probe.as_mut() { + // Step 1: reject by side-data entry count BEFORE walking the + // side-data array for byte accounting. `packet_side_data_bytes` + // dereferences each `AVPacket.side_data[i]` based on the + // FFmpeg-supplied `side_data_elems`; if that integer is + // corrupt or weaponised we don't want to walk it from the + // safe `send_packet` path. The byte helper still clamps its + // own walk to the cap as defense-in-depth, but checking the + // count first short-circuits the descriptor-explosion case + // entirely. + let side_count = packet_side_data_count(packet); + if side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES { + tracing::warn!( + side_data_entries = side_count, + max_side_data_entries = MAX_PROBE_PACKET_SIDE_DATA_ENTRIES, + trigger = "side_data_entry_cap", + "hwdecode: packet side-data entry count exceeds cap; \ + abandoning fallback safety net without byte accounting" + ); + // Abandon the *future* probe-buffering only — see the byte/ + // packet cap branch below for why `pending_frames` survives. + self.probe = None; + } else { + // Step 2: now safe to compute byte budget — `side_count` + // is bounded. + // + // `try_clone_packet` calls `av_packet_ref`, which deep-copies + // side data via `av_packet_copy_props`. The probe budget + // must include descriptor + ref overhead per side-data + // entry (via `packet_side_data_bytes`); without it, a + // packet stuffed with many tiny entries can dominate + // retained memory before the byte cap is even close to + // firing. + let pkt_size = packet.size().saturating_add(packet_side_data_bytes( + packet, + MAX_PROBE_PACKET_SIDE_DATA_ENTRIES, + )); + let new_count = probe.buffered_packets.len() + 1; + let new_bytes = probe.buffered_bytes.saturating_add(pkt_size); + if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES { + tracing::warn!( + packets = new_count, + bytes = new_bytes, + side_data_entries = side_count, + max_packets = MAX_PROBE_PACKETS, + max_bytes = MAX_PROBE_PACKET_BYTES, + trigger = "byte_or_packet_cap", + "hwdecode: probe window exceeded caps without first frame; \ + abandoning fallback safety net" + ); + // Abandon the *future* probe-buffering only. + // `pending_frames` belong to the currently active backend + // (possibly the candidate `advance_probe` committed + // earlier in this same `send_packet` call) and are valid + // output the caller will dequeue via `receive_frame`. + // Clearing them here would silently drop initial frames + // at exactly the cap-overflow / OOM-stress paths. + self.probe = None; + } else { + // Use the checked clone — ffmpeg-next's `Packet::clone` + // discards av_packet_ref's return value and would + // silently store an empty packet on ENOMEM, corrupting + // future replay. + match try_clone_packet(packet) { + Ok(cloned) => { + probe.buffered_packets.push(cloned); + probe.buffered_bytes = new_bytes; + } + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: packet clone failed for probe history; \ + abandoning fallback safety net" + ); + // Same reasoning as the cap-overflow branch above: + // `pending_frames` are owned by the active backend, + // not the probe buffer, so they survive abandonment. + self.probe = None; + } + } + } + } + } + return Ok(()); + } + Err(e) if is_transient(&e) => { + // Normal backpressure / EOF — pass through unchanged. + return Err(Error::Ffmpeg(e)); + } + Err(e) => { + if self.probe.is_some() { + // advance_probe consumes the error into `attempts` and either + // installs a candidate (Ok) or surfaces AllBackendsFailed (Err). + self.advance_probe(Error::Ffmpeg(e))?; + continue; + } + return Err(Error::Ffmpeg(e)); + } + } + } + } + + /// Signal end-of-stream to the decoder. + /// + /// Recorded for replay only if the underlying `send_eof` succeeds. While + /// the probe is active, non-transient errors trigger probe advance and + /// retry, matching `send_packet`'s behaviour. + pub fn send_eof(&mut self) -> Result<()> { + loop { + match self.state.inner.send_eof() { + Ok(()) => { + if let Some(probe) = self.probe.as_mut() { + probe.eof_sent = true; + } + return Ok(()); + } + Err(e) if is_transient(&e) => return Err(Error::Ffmpeg(e)), + Err(e) => { + if self.probe.is_some() { + self.advance_probe(Error::Ffmpeg(e))?; + continue; + } + return Err(Error::Ffmpeg(e)); + } + } + } + } + + /// Receive a CPU-side decoded frame. + /// + /// The frame is downloaded with `av_hwframe_transfer_data` and metadata + /// is copied via `av_frame_copy_props`. The caller's frame is always + /// unref'd first, so reuse across resolution changes or different + /// decoders is safe. + /// + /// While the probe window is open, *any* non-transient failure (decode + /// error, transfer error, copy_props error, or a CPU-format frame from a + /// HW-opened context) tears down the current decoder and advances to the + /// next hardware backend in probe order, replaying buffered packets + /// through it. Frames the candidate produced during replay (drained when + /// `send_packet` returned EAGAIN) are queued and delivered FIFO via this + /// method, so the caller never loses initial frames after a fallback. + /// + /// This crate is hardware-only: there is no software fallback inside the + /// decoder. When every backend in the probe order has been exhausted — + /// including the case of a single-backend platform whose only backend + /// failed — this returns [`Error::AllBackendsFailed`] with the per- + /// backend attempt log so the caller can branch into a software + /// decoder of their choice. + /// + /// Returns the same transient signals as `ffmpeg::decoder::Video`: + /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and + /// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained. + pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<()> { + // Pre-drain frames queued during probe replay. They are already CPU-side + // (transferred at drain time, when the candidate's HW context was alive) + // so we just move them into the caller's slot. + if self.try_pop_pending(frame) { + return Ok(()); + } + + loop { + let res = self.state.inner.receive_frame(&mut self.hw_frame); + match res { + Err(e) => { + // EAGAIN is normal backpressure — pass through unconditionally. + if is_eagain(&e) { + return Err(Error::Ffmpeg(e)); + } + // EOF (and every other non-transient error): if we are still + // probing, treat it as candidate failure — a backend that drains + // to EOF without ever producing a frame should not silently + // present as "stream over" to the caller. Advance and retry; if + // every backend has been exhausted, advance_probe surfaces + // AllBackendsFailed and `?` propagates it. + if self.probe.is_some() { + self.advance_probe(Error::Ffmpeg(e))?; + // Probe advance may have populated `pending_frames`; deliver + // one of those before reading more from the new candidate. + if self.try_pop_pending(frame) { + return Ok(()); + } + continue; + } + // Probe collapsed already — surface the error (including EOF + // for a genuinely empty stream). + return Err(Error::Ffmpeg(e)); + } + Ok(()) => { + // Always attempt the HW→CPU transfer. With strict `get_format`, + // libavcodec can only deliver frames in the wired-up HW format + // (or fail). If a misbehaving codec ever hands us a CPU-side + // frame anyway, `av_hwframe_transfer_data` returns AVERROR(EINVAL) + // (neither src nor dst has an AVHWFramesContext attached) and we + // route through the same error path below. + match unsafe { transfer_hw_frame(frame, &mut self.hw_frame) } { + Ok(()) => { + self.probe = None; + return Ok(()); + } + Err(e) => { + if self.probe.is_some() { + self.advance_probe(Error::Ffmpeg(e))?; + unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) }; + if self.try_pop_pending(frame) { + return Ok(()); + } + continue; + } + return Err(Error::Ffmpeg(e)); + } + } + } + } + } + } + + /// Pop one queued frame (produced by a candidate decoder during probe + /// replay) into the caller's slot. Returns `true` when a frame was + /// delivered, `false` when the queue was empty. + fn try_pop_pending(&mut self, frame: &mut Frame) -> bool { + let Some(mut buffered) = self.pending_frames.pop_front() else { + return false; + }; + // SAFETY: `buffered` is a CPU-side AVFrame we previously transferred + // and pushed into the queue; both pointers are valid. + unsafe { + av_frame_unref(frame.as_inner_mut().as_mut_ptr()); + av_frame_move_ref(frame.as_inner_mut().as_mut_ptr(), buffered.as_mut_ptr()); + } + // Probe semantics: delivering a frame collapses the probe. + self.probe = None; + true + } + + /// Flush internal buffers (e.g. after a seek). + /// + /// Discards every frame buffered by the decoder, every frame queued during + /// probe replay (`pending_frames`), and the residual `hw_frame` scratch + /// buffer. Probe-time replay state (buffered packets, EOF marker) is also + /// cleared since post-seek packets do not align with the previously + /// captured history. After a flush, the next `receive_frame` waits for new + /// post-seek input. + pub fn flush(&mut self) { + self.state.inner.flush(); + // SAFETY: hw_frame is a valid AVFrame we own; av_frame_unref is a no-op + // for an already-empty frame. + unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) }; + self.pending_frames.clear(); + if let Some(probe) = self.probe.as_mut() { + probe.buffered_packets.clear(); + probe.buffered_bytes = 0; + probe.eof_sent = false; + } + } + + /// Try the next backend in `remaining_backends`. Transactional: a + /// candidate must successfully build and accept the replayed history + /// before any probe state is consumed. Backends that fail to build or + /// reject the replay are recorded into `probe.attempts` and the loop + /// continues to the next one. + /// + /// `last_error` is the error that triggered this advance — i.e. the + /// failure of the currently active backend on `send_packet` / + /// `send_eof` / `receive_frame`. It is recorded against the active + /// backend before any candidate is tried so that a final + /// `AllBackendsFailed` carries the full attempt log including the + /// initially-opened backend's runtime failure. + /// + /// Returns: + /// - `Ok(())` when a candidate is installed and replay completed — + /// caller should retry the operation. + /// - `Err(Error::AllBackendsFailed { attempts })` when every remaining + /// backend has been exhausted (including the just-failed active one). + /// This is what the documented `open` contract promises, surfaced at + /// runtime so the caller can branch into a software fallback. On a + /// single-backend platform (e.g. macOS), this fires after the only + /// backend's first-frame failure; on multi-backend platforms it + /// fires after the last candidate's failure. + /// - `Err(_)` for other fatal conditions surfaced by probe machinery + /// itself (e.g. `alloc_av_frame` ENOMEM during replay drain). + fn advance_probe(&mut self, last_error: Error) -> Result<()> { + // Record the failure that triggered this advance against the active + // backend. If the probe was somehow already gone (shouldn't happen — + // call sites guard with `self.probe.is_some()`), just propagate the + // error so behaviour matches the pre-fix code path. + let active_backend = self.state.backend; + match self.probe.as_mut() { + Some(probe) => probe.attempts.push((active_backend, Box::new(last_error))), + None => return Err(last_error), + } + + // Drop frames previously queued from the backend we're now abandoning. + // They came from a candidate that just failed for cause and cannot be + // trusted alongside frames we may queue from the next candidate. (If + // this method is called repeatedly via chained probe advances, this + // also keeps `pending_frames` from accumulating frames from multiple + // rejected backends.) + self.pending_frames.clear(); + + loop { + // Snapshot inputs without mutating probe state. Use the checked + // clone helper rather than `Parameters::clone` (which masks ENOMEM). + let (next_backend, parameters, codec) = match self.probe.as_ref() { + Some(probe) if !probe.remaining_backends.is_empty() => { + let parameters = match try_clone_parameters(&probe.parameters) { + Ok(p) => p, + Err(e) => { + tracing::warn!( + error = %e, + "hwdecode: parameters clone failed during probe advance; popping backend and trying next" + ); + let popped = self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + self + .probe + .as_mut() + .expect("probe state present") + .attempts + .push((popped, Box::new(Error::Ffmpeg(e)))); + continue; + } + }; + (probe.remaining_backends[0], parameters, probe.codec) + } + // No more candidates — surface the accumulated attempt log as + // AllBackendsFailed so single- and multi-backend platforms have + // the same contract for "every HW backend failed." + _ => { + let attempts = self.probe.take().map(|p| p.attempts).unwrap_or_default(); + return Err(Error::AllBackendsFailed { attempts }); + } + }; + + let prev_backend = self.state.backend; + tracing::warn!(from = ?prev_backend, to = ?next_backend, "hwdecode: advancing probe"); + + // Build candidate. On failure, record into attempts and continue + // without touching the packet buffer. + let mut candidate_state = match Self::build_state(parameters, codec, next_backend) { + Ok(s) => s, + Err(e) => { + tracing::warn!(?next_backend, error = %e, "hwdecode: candidate build failed"); + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + self + .probe + .as_mut() + .expect("probe state present") + .attempts + .push((next_backend, Box::new(e))); + continue; + } + }; + + // Replay buffered history through the candidate WITHOUT installing it. + // We borrow the buffer immutably; if replay fails the candidate's Drop + // releases the FFmpeg state and the buffer is preserved for the next + // attempt. + // + // EAGAIN handling: `avcodec_send_packet` may return EAGAIN when its + // internal queue is full and the user is expected to drain output + // first (B-frame buffering, candidate-specific queue depth, etc.). + // This is normal flow — we drain frames out of the candidate, transfer + // each one to a CPU frame, and stash them in `local_pending`. After + // commit they move to `self.pending_frames` and are delivered FIFO + // by `receive_frame`, so the caller never loses initial frames. + let mut local_pending: VecDeque = VecDeque::new(); + let mut local_pending_bytes: usize = 0; + let max_pending_bytes = self.max_probe_pending_bytes; + let replay_result: std::result::Result<(), ffmpeg_next::Error> = { + let probe = self.probe.as_ref().expect("probe state present"); + let mut hw_buf = match alloc_av_frame() { + Ok(f) => f, + Err(e) => return Err(Error::Ffmpeg(e)), + }; + let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(()); + + 'replay: for pkt in &probe.buffered_packets { + loop { + match candidate_state.inner.send_packet(pkt) { + Ok(()) => break, + Err(e) if is_eagain(&e) => { + // Drain candidate output (transferring + queueing each frame) + // and retry the same packet. + if let Err(de) = drain_into_pending( + &mut candidate_state.inner, + &mut hw_buf, + &mut local_pending, + &mut local_pending_bytes, + max_pending_bytes, + ) { + r = Err(de); + break 'replay; + } + } + Err(e) => { + r = Err(e); + break 'replay; + } + } + } + } + if r.is_ok() && probe.eof_sent { + // `avcodec_send_packet(NULL)` (which `send_eof` becomes) can + // return EAGAIN with the same drain-output-first semantics as + // a regular send_packet. Loop drain+retry instead of failing + // the candidate on backpressure. + loop { + match candidate_state.inner.send_eof() { + Ok(()) => break, + Err(e) if is_eagain(&e) => { + if let Err(de) = drain_into_pending( + &mut candidate_state.inner, + &mut hw_buf, + &mut local_pending, + &mut local_pending_bytes, + max_pending_bytes, + ) { + r = Err(de); + break; + } + } + Err(e) => { + r = Err(e); + break; + } + } + } + } + r + }; + + if let Err(e) = replay_result { + tracing::warn!(?next_backend, error = %e, "hwdecode: candidate replay failed"); + // Drop candidate explicitly so its FFI cleanup runs now. Discard any + // frames we drained from this candidate — they're tied to a decoder + // we're throwing away. + drop(candidate_state); + drop(local_pending); + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + self + .probe + .as_mut() + .expect("probe state present") + .attempts + .push((next_backend, Box::new(Error::Ffmpeg(e)))); + continue; + } + + // Commit: install the candidate, clear residual hw_frame, queue the + // drained frames for the caller, and pop the now-active backend. + self.state = candidate_state; + unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) }; + self.pending_frames.append(&mut local_pending); + self + .probe + .as_mut() + .expect("probe state present") + .remaining_backends + .remove(0); + return Ok(()); + } + } + + /// Build raw FFmpeg state for one hardware backend. Strict `get_format` + /// (NONE on missing HW format); cross-backend fallback is the caller's job. + fn build_state( + parameters: codec::Parameters, + codec: Codec, + backend: Backend, + ) -> Result { + // Use our checked allocator instead of Context::from_parameters, which + // does not null-check avcodec_alloc_context3 and would feed a null + // AVCodecContext into FFmpeg under OOM. + let mut ctx = build_codec_context(¶meters)?; + let av_type = backend.av_hwdevice_type(); + + // Verify the codec advertises this hwaccel **with the exact HW pix_fmt + // we're about to wire up in `get_format`**. FFmpeg's HW config table + // is keyed per (device_type, pix_fmt); a codec can advertise the same + // device with several HW pix_fmts, so matching only on device_type + // would let probing succeed for a backend whose pix_fmt the codec + // never offers — the failure would then surface deep inside the + // probe/decode loop. Matching the exact pix_fmt keeps the strict + // `get_format` honest and gives `open_with` a clean rejection. + let hw_pix_fmt = backend.hw_pixel_format(); + if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type, hw_pix_fmt as i32) { + return Err(Error::BackendUnsupportedByCodec(backend)); + } + + // Create the device context. + let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut(); + // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill. + let ret = unsafe { + av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0) + }; + if ret < 0 { + return Err(Error::HwDeviceInitFailed { + backend, + source: ffmpeg_next::Error::from(ret), + }); + } + + let callback_state = Box::into_raw(Box::new(CallbackState { + wanted: hw_pix_fmt, + wanted_int: hw_pix_fmt as i32, + })); + // RAII guard: from now until the end-of-function `into_owned()`, every + // early return — `av_buffer_ref` failure, `open_as` failure, codec_type + // mismatch, or any future error path added between here and the + // `DecoderState` construction — frees `hw_device_ref` and + // `callback_state` via the guard's Drop. Without it, each error site + // had to remember to clean up these two FFI-owned resources by hand; + // the codec_type-mismatch branch was missed and silently leaked one + // device ref + one heap allocation per bad input. + let guard = PartialBuildState { + hw_device_ref, + callback_state, + }; + + // SAFETY: ctx is a freshly-constructed AVCodecContext we own; + // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's + // use (we keep our own ref in `hw_device_ref` for cleanup). + // av_buffer_ref returns NULL on allocation failure; we must check it + // before assigning, otherwise the codec context would be opened with a + // HW-flagged setup but no actual device reference. + let device_ref_for_ctx = unsafe { av_buffer_ref(hw_device_ref) }; + if device_ref_for_ctx.is_null() { + // guard's Drop frees hw_device_ref (the first ref) and callback_state. + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + // SAFETY: device_ref_for_ctx is a valid AVBufferRef* from av_buffer_ref; + // ctx is freshly built and owned by us. After this point ctx aliases + // `callback_state` via `opaque` (FFmpeg never frees opaque, so + // `callback_state` ownership stays with us / the guard) and aliases + // `device_ref_for_ctx` (the second ref) via `hw_device_ctx` (FFmpeg + // unrefs that on codec context drop, independent of the guard's first + // ref). + unsafe { + let raw = ctx.as_mut_ptr(); + (*raw).hw_device_ctx = device_ref_for_ctx; + (*raw).opaque = callback_state.cast(); + (*raw).get_format = Some(get_hw_format); + } + + // Open the decoder. On failure `ctx`/`opened` Drop releases the codec + // context (and via that the second device ref); the guard releases the + // first device ref and the callback state. + // + // We deliberately bypass `Opened::video()` because it calls + // `Context::medium()`, which reads `AVCodecContext.codec_type` as the + // bindgen `AVMediaType` enum — the same UB hazard we've been + // systematically removing. Instead: validate `codec_type` as a raw + // `c_int` ourselves, then construct the `decoder::Video` wrapper + // directly via its public tuple field. + let opened = ctx.decoder().open_as(codec).map_err(Error::Ffmpeg)?; + + // Validate codec_type as a raw integer — never construct AVMediaType + // from an unvalidated runtime value. + // SAFETY: codec_type is bound as AVMediaType (`#[repr(i32)]`), same + // size and alignment as i32; reading the bytes as i32 cannot be UB. + let codec_type_int: i32 = + unsafe { ptr::read(ptr::addr_of!((*opened.as_ptr()).codec_type) as *const i32) }; + let video_type_int: i32 = AVMediaType::AVMEDIA_TYPE_VIDEO as i32; + if codec_type_int != video_type_int { + // Not a video codec context — surface the same error + // `Opened::video()` would have, without going through enum + // construction. `opened`'s Drop releases the codec context; the + // guard releases the first hw_device_ref and the callback state. + return Err(Error::Ffmpeg(ffmpeg_next::Error::InvalidData)); + } + // SAFETY of construction: `decoder::Video` is `pub struct Video(pub Opened)`. + // We construct via the public field; this is the same wrapping + // `Opened::video()` does on success, just without the enum read. + let opened = ffmpeg_next::decoder::Video(opened); + + // Disarm the guard and transfer ownership of both resources into the + // returned DecoderState (whose own Drop handles their lifetime). + let (hw_device_ref, callback_state) = guard.into_owned(); + Ok(DecoderState { + inner: ManuallyDrop::new(opened), + backend, + hw_device_ref, + callback_state, + }) + } +} + +/// RAII guard for the partially-owned FFmpeg state that +/// [`VideoDecoder::build_state`] holds between the +/// `av_hwdevice_ctx_create` and `Box::into_raw(CallbackState)` +/// allocations and the final `DecoderState` construction. +/// +/// If `build_state` returns `Err` for any reason in that window +/// (`av_buffer_ref` ENOMEM, `open_as` failure, codec_type mismatch, or +/// any future error path), this guard's `Drop` releases +/// `hw_device_ref` — the first ref returned by `av_hwdevice_ctx_create`, +/// distinct from the second ref FFmpeg unrefs when the codec context +/// drops — and the boxed `CallbackState`, which FFmpeg never touches +/// because `AVCodecContext::opaque` is purely user-owned. +/// +/// Successful construction calls [`Self::into_owned`] to disarm the +/// guard and hand both pointers to the new `DecoderState`. +struct PartialBuildState { + hw_device_ref: *mut AVBufferRef, + callback_state: *mut CallbackState, +} + +impl PartialBuildState { + /// Disarm the guard: return the owned pointers and replace the guard's + /// fields with null so its Drop is a no-op. + fn into_owned(mut self) -> (*mut AVBufferRef, *mut CallbackState) { + let hw = std::mem::replace(&mut self.hw_device_ref, ptr::null_mut()); + let cb = std::mem::replace(&mut self.callback_state, ptr::null_mut()); + (hw, cb) + } +} + +impl Drop for PartialBuildState { + fn drop(&mut self) { + // SAFETY: pointers are either freshly allocated by `build_state` (via + // `av_hwdevice_ctx_create` and `Box::into_raw`) or null after + // `into_owned`. Both `av_buffer_unref` and `Box::from_raw` need the + // null check we apply here; both are otherwise sound on resources we + // own. + unsafe { + if !self.hw_device_ref.is_null() { + let mut hw = self.hw_device_ref; + av_buffer_unref(&mut hw); + } + if !self.callback_state.is_null() { + drop(Box::from_raw(self.callback_state)); + } + } + } +} + +/// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination +/// first so reuse across resolution changes is safe. +/// +/// Deliberately does **not** call `av_frame_copy_props`. That FFmpeg +/// helper deep-copies AVFrame side data (SEI, mastering display, ICC +/// profiles, dynamic HDR, etc.), the metadata dict, and bumps both +/// `opaque_ref` and `private_ref` on every receive — none of which +/// `Frame` exposes via its public accessors. On a crafted stream with +/// megabytes of per-frame metadata that would mean an unbounded +/// allocation per receive, with no caller-visible benefit. We instead +/// copy only the scalar fields the public API can read (today: `pts`); +/// pixel layout (`width`, `height`, `format`, `linesize`, `data`) is +/// already set by `av_hwframe_transfer_data`. If `Frame` ever grows +/// accessors for timing extras (`duration`, `time_base`, `pkt_dts`) or +/// color metadata, add those to `copy_frame_props_minimal` at the same +/// time. +unsafe fn transfer_hw_frame( + dst: &mut Frame, + src: &mut frame::Video, +) -> std::result::Result<(), ffmpeg_next::Error> { + unsafe { + av_frame_unref(dst.as_inner_mut().as_mut_ptr()); + let ret = av_hwframe_transfer_data(dst.as_inner_mut().as_mut_ptr(), src.as_ptr(), 0); + if ret < 0 { + return Err(ffmpeg_next::Error::from(ret)); + } + copy_frame_props_minimal(dst.as_inner_mut().as_mut_ptr(), src.as_ptr()); + } + Ok(()) +} + +/// Bounded substitute for `av_frame_copy_props`. Copies only the scalar +/// AVFrame fields the public `Frame` API needs from `src` to `dst` — +/// today just `pts`. Skips every allocating field (`av_dict_copy` for +/// `metadata`, `av_frame_new_side_data` + memcpy for each `side_data[i]`, +/// `av_buffer_replace` for `opaque_ref` / `private_ref`) so the cost is +/// O(1) per frame regardless of what the source attaches. +/// +/// # Safety +/// Both pointers must be valid `AVFrame` pointers we own; field +/// projection touches only POD scalars, no enums or buffer refs. +unsafe fn copy_frame_props_minimal(dst: *mut AVFrame, src: *const AVFrame) { + unsafe { + (*dst).pts = (*src).pts; + } +} + +/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame` +/// and must not be treated as backend failures. +fn is_transient(e: &ffmpeg_next::Error) -> bool { + is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof) +} + +/// Reject a `codec::Parameters` whose inner `*mut AVCodecParameters` is +/// null. This guards the public trust boundary: ffmpeg-next can produce +/// such a `Parameters` under OOM (`Parameters::new()` does not check +/// `avcodec_parameters_alloc`), and a safe caller can legally hand one +/// in. Without this check, the very next `(*p.as_ptr()).field` read +/// would be a null deref. +fn ensure_parameters_non_null(parameters: &codec::Parameters) -> Result<()> { + // SAFETY: as_ptr() returns the inner *const AVCodecParameters; we just + // inspect the pointer value (no deref). + if unsafe { parameters.as_ptr() }.is_null() { + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + Ok(()) +} + +/// Allocate a fresh `frame::Video`, checking that `av_frame_alloc` did not +/// return NULL. ffmpeg-next's `frame::Video::empty()` does not surface that +/// failure and the resulting null pointer would be UB on the next field +/// access; this wrapper catches it and surfaces it as `ENOMEM`. +fn alloc_av_frame() -> std::result::Result { + let inner = frame::Video::empty(); + // SAFETY: as_ptr() just exposes the inner pointer for inspection. + if unsafe { inner.as_ptr() }.is_null() { + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + Ok(inner) +} + +/// Build a fresh `Context` from `parameters`, checking the underlying +/// `avcodec_alloc_context3` for NULL before passing it to +/// `avcodec_parameters_to_context`. ffmpeg-next's `Context::from_parameters` +/// skips that check and would feed a null pointer into FFmpeg under OOM — +/// undefined behavior. This helper surfaces the failure as `ENOMEM` and +/// frees the context if `parameters_to_context` itself errors. +fn build_codec_context(parameters: &codec::Parameters) -> Result { + ensure_parameters_non_null(parameters)?; + // SAFETY: avcodec_alloc_context3(NULL) returns a fresh AVCodecContext + // or NULL on allocation failure. + let ctx_ptr = unsafe { avcodec_alloc_context3(ptr::null()) }; + if ctx_ptr.is_null() { + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + // SAFETY: ctx_ptr is non-null and freshly allocated; parameters.as_ptr() + // returns a valid AVCodecParameters pointer; the function copies bytes + // out of parameters into the context. + let ret = unsafe { avcodec_parameters_to_context(ctx_ptr, parameters.as_ptr()) }; + if ret < 0 { + // SAFETY: ctx_ptr was allocated by us and never handed to anyone else. + let mut p = ctx_ptr; + unsafe { avcodec_free_context(&mut p) }; + return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret))); + } + // SAFETY: ctx_ptr is valid; passing `owner: None` means our wrapper owns + // the allocation and `Context::drop` will run `avcodec_free_context`. + Ok(unsafe { Context::wrap(ctx_ptr, None) }) +} + +/// Checked deep-clone of `codec::Parameters`. ffmpeg-next's +/// `Parameters::clone` allocates via `avcodec_parameters_alloc` without +/// checking for NULL and runs `avcodec_parameters_copy` without checking +/// the return code. On `ENOMEM` the result is a `Parameters` with a null +/// inner pointer, which becomes UB when later passed to FFmpeg. +/// +/// This helper performs both calls explicitly, frees a partial allocation +/// on failure, and surfaces the AVERROR. The returned `Parameters` has +/// `owner: None`, severing any Rc link to the caller's demuxer (the +/// reason we deep-clone in the first place — see Send safety in +/// `VideoDecoder::open`). +fn try_clone_parameters( + src: &codec::Parameters, +) -> std::result::Result { + // Reject a null inner pointer at the boundary; a deref inside + // avcodec_parameters_copy below would otherwise be UB. + if unsafe { src.as_ptr() }.is_null() { + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + // SAFETY: avcodec_parameters_alloc returns a fresh AVCodecParameters + // pointer or NULL on allocation failure. + let dst_ptr = unsafe { avcodec_parameters_alloc() }; + if dst_ptr.is_null() { + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + // SAFETY: dst_ptr is non-null and freshly allocated; src.as_ptr() is + // a valid AVCodecParameters pointer; the function copies bytes from + // src into dst. + let ret = unsafe { avcodec_parameters_copy(dst_ptr, src.as_ptr()) }; + if ret < 0 { + // SAFETY: dst_ptr was allocated by us and never handed out. + let mut p = dst_ptr; + unsafe { avcodec_parameters_free(&mut p) }; + return Err(ffmpeg_next::Error::from(ret)); + } + // SAFETY: dst_ptr is a valid AVCodecParameters; passing `owner: None` + // means our wrapper owns the allocation and `Parameters::drop` will + // call `avcodec_parameters_free`. + Ok(unsafe { codec::Parameters::wrap(dst_ptr, None) }) +} + +/// Checked counterpart to `Packet::clone()`. ffmpeg-next's `clone_from` +/// calls `av_packet_ref` and ignores the int return value; on `ENOMEM` +/// the destination is left empty while the caller assumes the clone +/// succeeded — corrupting any later replay history. This helper surfaces +/// the AVERROR. The result is a refcounted shallow clone — the payload +/// buffer is shared with `src` rather than deep-copied; the probe replay +/// only sends packets through `avcodec_send_packet`, which does not +/// require a writable buffer. +fn try_clone_packet(src: &Packet) -> std::result::Result { + let mut dst = Packet::empty(); + // SAFETY: dst is a freshly zero-initialized Packet (av_init_packet inside + // Packet::empty); av_packet_ref initializes its data fields from src's + // refcounted buffer or returns AVERROR(ENOMEM) on failure. + let ret = unsafe { av_packet_ref(dst.as_mut_ptr(), src.as_ptr()) }; + if ret < 0 { + return Err(ffmpeg_next::Error::from(ret)); + } + Ok(dst) +} + +/// Sum of `AVPacket.side_data[i].size` across every entry, plus +/// `nb_entries * SIDE_DATA_ENTRY_OVERHEAD` (descriptor + AVBufferRef + +/// allocator bookkeeping per entry). `av_packet_ref` performs a deep +/// copy of side data via `av_packet_copy_props`, so each probe-buffered +/// clone retains every one of these bytes. Charging both keeps +/// `MAX_PROBE_PACKET_BYTES` a true upper bound — without the overhead, +/// many zero-size entries slip past the cap on pure descriptor cost. +/// +/// Walks at most `max_entries` entries even when `side_data_elems` +/// reports a larger count. Defense-in-depth against a corrupt or hostile +/// packet whose `side_data_elems` lies about the actual array length: +/// the caller is expected to also reject any packet whose count exceeds +/// the cap (so the inflated clone is never created), but bounding the +/// walk here means a stale or weaponised value can never trigger an +/// unbounded raw-pointer scan from the safe API. +/// +/// Reads only the `size` field of each `AVPacketSideData` entry — never +/// touches the bindgen `AVPacketSideDataType` enum, so no UB even if a +/// future FFmpeg adds a side-data type discriminant our build doesn't +/// know. +fn packet_side_data_bytes(packet: &Packet, max_entries: usize) -> usize { + // SAFETY: AVPacket.side_data is `*mut AVPacketSideData` and + // side_data_elems is `c_int`; both are raw struct fields safe to read. + // Field projection (`.size`) does not reconstruct the enum-typed `type_` + // field, so the bindgen-enum UB hazard does not apply here. + unsafe { + let raw = packet.as_ptr(); + let nel = (*raw).side_data_elems; + let arr = (*raw).side_data; + if arr.is_null() || nel <= 0 || max_entries == 0 { + return 0; + } + let count = (nel as usize).min(max_entries); + let mut total = count.saturating_mul(SIDE_DATA_ENTRY_OVERHEAD); + for i in 0..count { + let entry = arr.add(i); + total = total.saturating_add((*entry).size); + } + total + } +} + +/// Number of `AVPacketSideData` entries on `packet`. The probe buffer +/// uses this to enforce [`MAX_PROBE_PACKET_SIDE_DATA_ENTRIES`] before +/// cloning, so a packet whose entry count alone would dominate retained +/// memory is rejected up front. +fn packet_side_data_count(packet: &Packet) -> usize { + // SAFETY: side_data_elems is `c_int`, safe to read; clamp negatives to 0. + let nel = unsafe { (*packet.as_ptr()).side_data_elems }; + if nel <= 0 { + 0 + } else { + nel as usize + } +} + +/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine +/// distinguishes "drain output and retry" from "stream over"). +fn is_eagain(e: &ffmpeg_next::Error) -> bool { + matches!(e, ffmpeg_next::Error::Other { errno } if *errno == ffmpeg_next::error::EAGAIN) +} + +/// Look up the decoder for `parameters` without going through the bindgen +/// `AVCodecID` Rust enum. Reads the codec_id field as raw `u32` via +/// `addr_of!` + `ptr::read` so a value not in our build's discriminant +/// set never invokes UB. +fn find_decoder(parameters: &codec::Parameters) -> Result { + ensure_parameters_non_null(parameters)?; + // SAFETY: parameters' inner pointer is non-null (checked above); + // addr_of! projects to the codec_id field; the *const u32 cast is sound + // because AVCodecID is `#[repr(u32)]` (same size and alignment as u32). + // Reading as u32 cannot be UB regardless of the value FFmpeg wrote. + let raw_id: u32 = + unsafe { ptr::read(ptr::addr_of!((*parameters.as_ptr()).codec_id) as *const u32) }; + + // Call C `avcodec_find_decoder` via our local `c_int`-typed shim — we + // never construct an `AVCodecID` enum from `raw_id`. The C function + // returns NULL for unknown ids, which we surface as `Error::NoCodec`. + // SAFETY: avcodec_find_decoder is a pure FFmpeg lookup; passing any + // c_int is sound (returns NULL for unknown). + let codec_ptr = unsafe { c_shims::avcodec_find_decoder(raw_id as libc::c_int) }; + if codec_ptr.is_null() { + return Err(Error::NoCodec(raw_id)); + } + // SAFETY: codec_ptr is a non-null *const AVCodec into FFmpeg's static + // codec table; it lives for the duration of the program. + Ok(unsafe { Codec::wrap(codec_ptr) }) +} + +/// Drain output frames from a candidate decoder during probe replay, +/// transferring each one from the candidate's HW context to a fresh CPU +/// frame and queueing it. Returns `Ok(())` once the candidate signals +/// EAGAIN/EOF. The transfer happens while the candidate is still alive +/// (its `AVHWFramesContext` is reachable); the resulting CPU frames remain +/// valid after the candidate is committed because they hold their own +/// buffer references with no dependency on the original device context. +fn drain_into_pending( + decoder: &mut ffmpeg_next::decoder::Video, + hw_buf: &mut frame::Video, + pending: &mut VecDeque, + pending_bytes: &mut usize, + max_bytes: usize, +) -> std::result::Result<(), ffmpeg_next::Error> { + loop { + match decoder.receive_frame(hw_buf) { + Ok(()) => { + // Pre-transfer cap check: if we are already at or over either cap, + // the candidate is producing more than we can hold. Treat as an + // explicit candidate failure so `advance_probe` can try the next + // backend instead of committing a stream with silently-dropped + // frames in the middle. + // + // TODO: at very large frame sizes (8K HDR P010, > ~96 MiB each) + // even a single retained frame is significant. Future direction: + // memmap-backed pending frames (write to a temp file or shared + // memory segment) so the resident set stays bounded even when the + // byte cap is raised. Out of scope for v0.0.0. + if pending.len() >= MAX_PROBE_PENDING_FRAMES || *pending_bytes >= max_bytes { + tracing::warn!( + frames = pending.len(), + bytes = *pending_bytes, + max_frames = MAX_PROBE_PENDING_FRAMES, + max_bytes = max_bytes, + "hwdecode: probe pending cap reached; failing candidate replay" + ); + // SAFETY: hw_buf is owned and valid; unref of an empty frame is a no-op. + unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + // Pre-transfer size guard: `av_hwframe_transfer_data` will + // allocate the CPU buffer based on `hw_buf`'s dimensions. If a + // single frame's worst-case footprint already pushes past the + // cap, refuse the candidate **before** allocating so RSS does + // not spike on a frame we'd immediately drop. Uses a width * + // height * `WORST_CASE_BYTES_PER_PIXEL` upper bound; the + // post-transfer accounting via `cpu_frame_bytes` below stays in + // place as a backstop using the actual stride/format. + let estimated_bytes = match estimate_transfer_bytes(hw_buf) { + Some(b) => b, + None => { + // SAFETY: AVFrame.width/height are c_int reads. + let (w, h) = unsafe { + let raw = hw_buf.as_ptr(); + ((*raw).width, (*raw).height) + }; + tracing::warn!( + width = w, + height = h, + "hwdecode: HW frame dimensions invalid for sizing; failing candidate replay" + ); + unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + }; + let estimated_total = pending_bytes.saturating_add(estimated_bytes); + if estimated_total > max_bytes { + // SAFETY: AVFrame.width/height are c_int reads. + let (w, h) = unsafe { + let raw = hw_buf.as_ptr(); + ((*raw).width, (*raw).height) + }; + tracing::warn!( + pending_bytes = *pending_bytes, + estimated_bytes, + width = w, + height = h, + max_bytes = max_bytes, + "hwdecode: pre-transfer size estimate exceeds cap; \ + refusing candidate replay before allocating CPU frame" + ); + unsafe { av_frame_unref(hw_buf.as_mut_ptr()) }; + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + let mut cpu = alloc_av_frame()?; + // SAFETY: hw_buf is a freshly-decoded HW frame; + // `av_hwframe_transfer_data` allocates pixel buffers on `cpu`. + // We use `copy_frame_props_minimal` (only `pts`) instead of + // `av_frame_copy_props` for the same reason as + // `transfer_hw_frame`: the public `Frame` API does not expose + // side data / metadata / opaque refs, so deep-copying them per + // frame is pure cost and an unbounded allocation source on + // attacker-controlled streams. + unsafe { + let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0); + if r1 < 0 { + return Err(ffmpeg_next::Error::from(r1)); + } + } + let pixel_bytes = match cpu_frame_bytes(&cpu) { + Some(b) => b, + None => { + // Unknown pix_fmt or vertically-flipped layout — we cannot + // bound this frame's contribution against the byte cap, so up + // to MAX_PROBE_PENDING_FRAMES of them could exhaust memory. + // Fail the candidate so probing tries the next backend + // rather than queueing untracked allocations. + // SAFETY: AVFrame.format is c_int, safe to read. + let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format }; + tracing::warn!( + pix_fmt, + "hwdecode: cannot size unknown CPU pix_fmt during replay; failing candidate" + ); + // cpu drops here. + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + }; + let new_total = pending_bytes.saturating_add(pixel_bytes); + if new_total > max_bytes { + tracing::warn!( + pending_bytes = *pending_bytes, + pixel_bytes, + max_bytes, + "hwdecode: queueing this frame would exceed byte cap; \ + failing candidate replay" + ); + // cpu drops here without ever paying a metadata deep copy. + return Err(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + }); + } + // Cap check passed — copy only the scalar AVFrame fields the + // public API needs. SAFETY: cpu and hw_buf are both valid + // AVFrames we own. + unsafe { + copy_frame_props_minimal(cpu.as_mut_ptr(), hw_buf.as_ptr()); + } + *pending_bytes = new_total; + pending.push_back(cpu); + } + Err(e) if is_transient(&e) => return Ok(()), + Err(e) => return Err(e), + } + } +} + +/// Conservative upper-bound estimate of the bytes +/// `av_hwframe_transfer_data` will allocate when downloading `hw_buf` to +/// a CPU frame. Used by [`drain_into_pending`] as a pre-transfer guard +/// so a candidate replay can refuse a frame whose footprint would +/// exceed the byte budget *without* first paying the allocation. The +/// estimate is `width * height * WORST_CASE_BYTES_PER_PIXEL` — see that +/// constant for why we err on the high side. +/// +/// Returns `None` when the frame's `width` or `height` are not strictly +/// positive (caller treats as candidate failure — a HW frame with +/// non-positive dimensions cannot be transferred meaningfully). +fn estimate_transfer_bytes(hw_buf: &frame::Video) -> Option { + // SAFETY: AVFrame.width / height are c_int reads. + let (w, h) = unsafe { + let raw = hw_buf.as_ptr(); + ((*raw).width, (*raw).height) + }; + if w <= 0 || h <= 0 { + return None; + } + Some( + (w as usize) + .saturating_mul(h as usize) + .saturating_mul(WORST_CASE_BYTES_PER_PIXEL), + ) +} + +/// Approximate resident size of a CPU frame: sum of `linesize[plane] * +/// plane_height` across populated planes. +/// +/// Returns `None` for pixel formats not in our chroma-subsampling table or +/// for frames whose `linesize` is negative — both signal an allocation we +/// cannot account for, so the caller refuses to queue them. Returning 0 +/// in either case would silently bypass the byte cap and let an unbounded +/// number of large frames into `pending_frames`. +/// +/// Distinguishes `linesize == 0` (FFmpeg's sentinel for "no more populated +/// planes" — terminates the scan) from `linesize < 0` (FFmpeg's vertically- +/// flipped layout — `Frame::row` rejects those as unusable, so queueing one +/// during probe replay would only delay the failure to the consumer side +/// while wasting `|linesize| * plane_h` bytes of unaccounted memory). +fn cpu_frame_bytes(frame: &frame::Video) -> Option { + // SAFETY: AVFrame.height / format / linesize are c_int reads. + let (height, pix_fmt, linesizes) = unsafe { + let raw = frame.as_ptr(); + ((*raw).height as usize, (*raw).format, (*raw).linesize) + }; + let mut total: usize = 0; + let mut any_plane = false; + for (plane, linesize) in linesizes.iter().enumerate() { + if *linesize == 0 { + // End of populated planes — FFmpeg zeroes the trailing entries. + break; + } + if *linesize < 0 { + // Vertically-flipped layout — refuse to size so `drain_into_pending` + // fails the candidate. The same pre-fix code path silently returned + // `Some(0)` for a frame whose first plane was negative, allowing up + // to MAX_PROBE_PENDING_FRAMES frames of unaccounted memory. + return None; + } + any_plane = true; + let stride = *linesize as usize; + // If we can't size *any* populated plane, the format is outside our + // table — refuse to size the frame at all (conservative; discarding + // is safer than under-counting against the byte cap). + let plane_h = crate::frame::plane_height_for(pix_fmt, plane, height)?; + total = total.saturating_add(stride.saturating_mul(plane_h)); + } + if !any_plane { + // Genuinely empty frame (no populated planes) — nothing to account for. + return Some(0); + } + Some(total) +} + +#[allow(dead_code)] +fn _assert_send() { + fn check() {} + check::(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn no_codec_for_unknown_id() { + let err = Error::NoCodec(0); + assert!(format!("{err}").contains("no decoder")); + } + + #[test] + fn videodecoder_is_send() { + _assert_send(); + } + + #[test] + fn is_transient_recognises_eagain_and_eof() { + let eagain = ffmpeg_next::Error::Other { + errno: ffmpeg_next::error::EAGAIN, + }; + assert!(is_transient(&eagain)); + assert!(is_transient(&ffmpeg_next::Error::Eof)); + let other = ffmpeg_next::Error::InvalidData; + assert!(!is_transient(&other)); + } + + /// Regression: a `codec::Parameters` with a null inner pointer must be + /// rejected at the entrypoint, not deref'd. ffmpeg-next's + /// `Parameters::new()` does not check `avcodec_parameters_alloc()`, so a + /// safe caller can hand us such a value under OOM. + #[test] + fn open_rejects_null_parameters() { + // SAFETY: Parameters::wrap accepts any pointer; we explicitly construct + // one with null inner. avcodec_parameters_free is null-safe on Drop. + let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) }; + match VideoDecoder::open(null_params) { + Ok(_) => panic!("open should fail on null parameters"), + Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => { + assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}"); + } + Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"), + } + } + + #[test] + fn open_with_rejects_null_parameters() { + // SAFETY: see open_rejects_null_parameters. + let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) }; + match VideoDecoder::open_with(null_params, Backend::VideoToolbox) { + Ok(_) => panic!("open_with should fail on null parameters"), + Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => { + assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}"); + } + Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"), + } + } + + /// `try_clone_packet` calls `av_packet_ref`, which deep-copies side + /// data via `av_packet_copy_props`. The probe budget therefore has to + /// include side-data bytes — otherwise a stream with a 16-byte payload + /// and a 1 MiB side-data attachment would only consume 16 bytes of the + /// 64 MiB budget per packet, and 256 buffered clones would retain + /// ~256 MiB of side data while logs claim a few KiB. + #[test] + fn packet_side_data_counts_against_probe_budget() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + const PAYLOAD_SIZE: usize = 16; + const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB + + let mut packet = Packet::new(PAYLOAD_SIZE); + // SAFETY: packet is a freshly allocated AVPacket; av_packet_new_side_data + // attaches a fresh `SIDE_DATA_SIZE`-byte buffer of the requested type + // to it and returns a writable pointer (or NULL on OOM). + let p = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, + SIDE_DATA_SIZE, + ) + }; + assert!(!p.is_null(), "av_packet_new_side_data returned NULL"); + + assert_eq!(packet.size(), PAYLOAD_SIZE); + let side = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES); + assert!( + side >= SIDE_DATA_SIZE, + "side-data accounting must include the attached buffer; got {side}" + ); + let total = packet.size().saturating_add(side); + assert!( + total >= PAYLOAD_SIZE + SIDE_DATA_SIZE, + "probe budget must charge payload + side data; got {total}" + ); + } + + #[test] + fn packet_side_data_is_zero_when_no_side_data() { + let packet = Packet::new(64); + assert_eq!( + packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES), + 0 + ); + assert_eq!(packet_side_data_count(&packet), 0); + } + + /// Packets with many tiny side-data entries must be charged the + /// per-entry descriptor + ref overhead, even when each entry's payload + /// `size` is zero. Without `SIDE_DATA_ENTRY_OVERHEAD`, a packet stuffed + /// with N zero-byte entries would charge 0 bytes against the budget + /// while `av_packet_ref` still allocates ~`N * 80` bytes of descriptor + /// + AVBufferRef + allocator overhead per cloned copy. + #[test] + fn packet_side_data_bytes_charges_descriptor_overhead_for_zero_size_entries() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + let mut packet = Packet::new(0); + // Attach two zero-byte entries of distinct types so neither call + // replaces the other. + let p1 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, + 0, + ) + }; + let p2 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_PALETTE, + 0, + ) + }; + assert!( + !p1.is_null() && !p2.is_null(), + "av_packet_new_side_data NULL" + ); + + assert_eq!(packet_side_data_count(&packet), 2); + let bytes = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES); + assert!( + bytes >= 2 * SIDE_DATA_ENTRY_OVERHEAD, + "must charge descriptor overhead per entry even at zero payload; got {bytes}" + ); + } + + /// `packet_side_data_bytes` must clamp its walk to `max_entries` + /// regardless of `side_data_elems`. Defense-in-depth: the caller is + /// expected to short-circuit packets whose count exceeds the cap, but + /// if a corrupt or weaponised packet ever does reach the helper, the + /// internal cap prevents an unbounded raw-pointer walk. + /// + /// This test attaches 5 entries of distinct types and asks the helper + /// to walk only the first 2. Result must equal exactly `2 * overhead + + /// (size_a + size_b)`, confirming entries 3-5 were not even read. + #[test] + fn packet_side_data_bytes_respects_max_entries_cap() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + let mut packet = Packet::new(0); + // Five distinct side-data types so each `av_packet_new_side_data` + // call appends rather than replaces. + let types_and_sizes: [(AVPacketSideDataType, usize); 5] = [ + (AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, 100), + (AVPacketSideDataType::AV_PKT_DATA_PALETTE, 200), + (AVPacketSideDataType::AV_PKT_DATA_REPLAYGAIN, 300), + (AVPacketSideDataType::AV_PKT_DATA_DISPLAYMATRIX, 400), + (AVPacketSideDataType::AV_PKT_DATA_STEREO3D, 500), + ]; + for (ty, size) in types_and_sizes { + let p = unsafe { av_packet_new_side_data(packet.as_mut_ptr(), ty, size) }; + assert!(!p.is_null(), "av_packet_new_side_data returned NULL"); + } + assert_eq!(packet_side_data_count(&packet), 5); + + let walked_2 = packet_side_data_bytes(&packet, 2); + let walked_5 = packet_side_data_bytes(&packet, 5); + + assert_eq!( + walked_2, + 2 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200, + "max_entries=2 must walk exactly the first two entries" + ); + assert_eq!( + walked_5, + 5 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200 + 300 + 400 + 500, + "max_entries=5 must walk all five entries" + ); + // max_entries=0 short-circuits to 0. + assert_eq!(packet_side_data_bytes(&packet, 0), 0); + // max_entries larger than the actual count clamps to the actual count + // (no out-of-bounds walk past `side_data_elems`). + let walked_huge = packet_side_data_bytes(&packet, 1_000_000); + assert_eq!(walked_huge, walked_5); + } + + /// `MAX_PROBE_PACKET_SIDE_DATA_ENTRIES` is the cliff above which a + /// packet is rejected from the probe buffer regardless of byte total — + /// pure descriptor inflation is its own attack vector. Sanity-check + /// that `packet_side_data_count` reports the value the cap is checked + /// against. + #[test] + fn packet_side_data_count_reports_attached_entries() { + use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType}; + + let mut packet = Packet::new(0); + let _p1 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, + 4, + ) + }; + let _p2 = unsafe { + av_packet_new_side_data( + packet.as_mut_ptr(), + AVPacketSideDataType::AV_PKT_DATA_PALETTE, + 4, + ) + }; + assert_eq!(packet_side_data_count(&packet), 2); + } + + /// `cpu_frame_bytes` must refuse to size a frame whose first plane has + /// a negative `linesize`. Pre-fix, the loop break treated negative the + /// same as zero (FFmpeg's "no more populated planes" sentinel), so a + /// vertically-flipped frame returned `Some(0)` and `drain_into_pending` + /// would queue it as a 0-byte allocation — letting up to + /// `MAX_PROBE_PENDING_FRAMES` such frames bypass the configured byte + /// budget entirely. + #[test] + fn cpu_frame_bytes_rejects_negative_first_plane_linesize() { + let mut f = frame::Video::empty(); + // SAFETY: f is freshly allocated; we set `format` to NV12 and the + // first plane's linesize negative (FFmpeg's vertical-flip convention). + // No backing data buffer is allocated — cpu_frame_bytes must reject + // before any pointer dereference. + unsafe { + let raw = f.as_mut_ptr(); + (*raw).format = crate::pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = 1080; + (*raw).linesize[0] = -1920; + (*raw).linesize[1] = -1920; + } + assert!( + cpu_frame_bytes(&f).is_none(), + "negative linesize must be unsizeable, not Some(0)" + ); + } + + /// Sanity-check the positive path: a synthesized NV12 frame with valid + /// linesizes must report the sum across populated planes (Y full height + /// + UV half height). + #[test] + fn cpu_frame_bytes_sums_populated_planes() { + let mut f = frame::Video::empty(); + let stride = 1920usize; + let height = 1080usize; + // SAFETY: same scheme as above; we only mutate primitive struct fields. + unsafe { + let raw = f.as_mut_ptr(); + (*raw).format = crate::pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = height as i32; + (*raw).linesize[0] = stride as i32; + (*raw).linesize[1] = stride as i32; + } + let expected = stride * height + stride * (height / 2); + assert_eq!(cpu_frame_bytes(&f), Some(expected)); + } + + /// A frame with only a zero linesize in plane 0 is "no populated + /// planes" — must return `Some(0)`, not `None`. Distinguishes the + /// FFmpeg sentinel from the vertically-flipped layout. + #[test] + fn cpu_frame_bytes_zero_first_plane_returns_zero() { + let f = frame::Video::empty(); + // Default-allocated empty AVFrame already has all linesizes zero. + assert_eq!(cpu_frame_bytes(&f), Some(0)); + } + + /// `estimate_transfer_bytes` is the pre-transfer size guard for + /// `drain_into_pending`: it must compute `width * height * + /// WORST_CASE_BYTES_PER_PIXEL` so the candidate replay can refuse a + /// frame *before* `av_hwframe_transfer_data` allocates. + #[test] + fn estimate_transfer_bytes_uses_worst_case_per_pixel() { + let mut f = frame::Video::empty(); + // SAFETY: f is freshly allocated; we set width/height directly. + unsafe { + let raw = f.as_mut_ptr(); + (*raw).width = 1920; + (*raw).height = 1080; + } + assert_eq!( + estimate_transfer_bytes(&f), + Some(1920 * 1080 * WORST_CASE_BYTES_PER_PIXEL), + ); + } + + /// Non-positive dimensions surface as `None` so `drain_into_pending` + /// fails the candidate before allocating anything. A zero-width or + /// zero-height frame would silently yield a 0-byte estimate under the + /// raw multiplication, letting the cap check pass and exposing the + /// allocation path to whatever the actual transfer would do. + #[test] + fn estimate_transfer_bytes_rejects_non_positive_dimensions() { + let mut f = frame::Video::empty(); + unsafe { + let raw = f.as_mut_ptr(); + (*raw).width = 0; + (*raw).height = 1080; + } + assert!(estimate_transfer_bytes(&f).is_none()); + + unsafe { + (*f.as_mut_ptr()).width = 1920; + (*f.as_mut_ptr()).height = -1; + } + assert!(estimate_transfer_bytes(&f).is_none()); + } + + /// 8K HDR P010 has actual ~96 MiB resident size; the estimate should + /// over-charge it (the right side to err on for a memory cap) while + /// still fitting within the configurable + /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`] cap (256 MiB) for a single + /// frame so a default-configured decoder is not forced to reject 8K + /// streams outright. + #[test] + fn estimate_transfer_bytes_8k_fits_default_cap() { + let mut f = frame::Video::empty(); + unsafe { + let raw = f.as_mut_ptr(); + (*raw).width = 7680; + (*raw).height = 4320; + } + let estimate = estimate_transfer_bytes(&f).expect("8K is sizable"); + // ~256 MiB exactly — at-or-just-under the default cap. + assert!( + estimate <= DEFAULT_MAX_PROBE_PENDING_BYTES, + "8K estimate {estimate} must fit DEFAULT_MAX_PROBE_PENDING_BYTES \ + {DEFAULT_MAX_PROBE_PENDING_BYTES}; otherwise the default cap rejects \ + even a single 8K frame at probe time" + ); + // And strictly larger than a typical 8K P010 (~96 MiB) so the guard + // is actually conservative, not under-charging. + assert!( + estimate > 96 * 1024 * 1024, + "estimate must over-charge real 8K P010 to bound the worst case; got {estimate}" + ); + } + + /// `PartialBuildState`'s `Drop` must be a no-op when both pointers are + /// null — the disarmed-by-`into_owned` post-state. A panic / double-free + /// here would break the success path of every `build_state` call. + #[test] + fn partial_build_state_drop_is_no_op_on_null_pointers() { + let _g = PartialBuildState { + hw_device_ref: ptr::null_mut(), + callback_state: ptr::null_mut(), + }; + // Drops at end of scope. Test passes if it doesn't panic / crash. + } + + /// `into_owned` must return the original pointers and disarm the guard + /// (so the guard's Drop becomes a no-op and the caller can safely + /// transfer ownership to `DecoderState` without double-freeing). + #[test] + fn partial_build_state_into_owned_disarms_and_returns_originals() { + use ffmpeg_next::ffi::{av_buffer_alloc, av_buffer_unref, AVPixelFormat}; + + // SAFETY: av_buffer_alloc returns a fresh AVBufferRef* with refcount + // 1, or NULL on OOM. We free it ourselves below (after into_owned + // disarms the guard). + let hw_ptr = unsafe { av_buffer_alloc(64) }; + assert!(!hw_ptr.is_null(), "av_buffer_alloc(64) returned NULL"); + let cb_ptr = Box::into_raw(Box::new(CallbackState { + wanted: AVPixelFormat::AV_PIX_FMT_NONE, + wanted_int: AVPixelFormat::AV_PIX_FMT_NONE as i32, + })); + + let g = PartialBuildState { + hw_device_ref: hw_ptr, + callback_state: cb_ptr, + }; + let (hw_back, cb_back) = g.into_owned(); + assert_eq!( + hw_back, hw_ptr, + "into_owned must return the original device ref" + ); + assert_eq!( + cb_back, cb_ptr, + "into_owned must return the original callback box" + ); + + // Guard is now disarmed (its Drop ran with null pointers as soon as + // into_owned consumed it). We own the pointers and must free them. + // SAFETY: hw_ptr and cb_ptr are still the freshly-allocated values. + unsafe { + let mut hw = hw_back; + av_buffer_unref(&mut hw); + drop(Box::from_raw(cb_back)); + } + } + + /// Probe-abandon paths in `send_packet` (cap exceeded, packet clone + /// failed) must not drop frames already queued in `pending_frames`. + /// Those frames belong to the currently active backend — possibly a + /// candidate that `advance_probe` just committed earlier in the same + /// `send_packet` call — and are valid output the caller will dequeue + /// via `receive_frame`. + /// + /// Pre-fix, both abandon branches called `pending_frames.clear()` + /// alongside `self.probe = None;`, silently dropping initial frames at + /// exactly the cap-overflow / OOM-stress paths. + /// + /// Live HW required: a real `VideoDecoder` is the only way to construct + /// a valid `DecoderState` (its `Drop` invokes FFmpeg cleanup), and + /// `send_packet` must reach the Ok branch on a real decoder for the + /// cap check to fire. + #[test] + #[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"] + fn cap_overflow_preserves_pending_frames_from_active_backend() { + use ffmpeg_next::{format, media}; + + let path = std::env::var_os("HWDECODE_SAMPLE_VIDEO") + .expect("HWDECODE_SAMPLE_VIDEO must be set for this test"); + + ffmpeg_next::init().expect("ffmpeg init"); + let mut input = format::input(&path).expect("open input"); + let stream_index = input + .streams() + .best(media::Type::Video) + .expect("video stream") + .index(); + let stream_params = input + .streams() + .best(media::Type::Video) + .expect("video stream") + .parameters(); + + let mut decoder = VideoDecoder::open(stream_params).expect("open decoder"); + assert!( + decoder.probe.is_some(), + "probe must be active immediately after open" + ); + + // Inject sentinel frames as if `advance_probe` had drained them from + // a freshly-committed candidate during this same send_packet call. + decoder.pending_frames.push_back(frame::Video::empty()); + decoder.pending_frames.push_back(frame::Video::empty()); + let pending_before = decoder.pending_frames.len(); + + // Fast-forward the probe state to the byte cap so the next successful + // send_packet trips the cap-overflow branch. + decoder + .probe + .as_mut() + .expect("probe present") + .buffered_bytes = MAX_PROBE_PACKET_BYTES; + + // Find the first video packet and feed it. We don't care whether the + // underlying decoder actually accepts it cleanly; we only need to + // exercise the Ok branch's cap-overflow accounting at least once. + let mut hit_ok = false; + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + if decoder.send_packet(&packet).is_ok() { + hit_ok = true; + break; + } + } + assert!( + hit_ok, + "expected at least one send_packet to succeed and trigger the cap-overflow branch" + ); + + assert!( + decoder.probe.is_none(), + "probe must be abandoned after cap overflow" + ); + assert_eq!( + decoder.pending_frames.len(), + pending_before, + "pending_frames belong to the active backend; abandon must not drop them" + ); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..955d215 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,43 @@ +use crate::backend::Backend; + +/// Crate result alias. +pub type Result = std::result::Result; + +/// Errors returned from [`crate::VideoDecoder`]. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// An underlying FFmpeg error. + #[error("ffmpeg error: {0}")] + Ffmpeg(#[from] ffmpeg_next::Error), + + /// `avcodec_find_decoder` returned null for the input codec id. The id + /// is reported as the raw integer (`AVCodecID` discriminant) — we do not + /// construct the bindgen `AVCodecID` enum from a runtime value, since + /// values outside our build's discriminant set would invoke UB. + #[error("no decoder for codec id {0}")] + NoCodec(u32), + + /// The codec does not advertise a hardware configuration matching the + /// requested backend (via `avcodec_get_hw_config`). + #[error("codec does not support backend {0:?}")] + BackendUnsupportedByCodec(Backend), + + /// `av_hwdevice_ctx_create` failed for the requested backend. + #[error("hardware device init failed for {backend:?}: {source}")] + HwDeviceInitFailed { + /// Backend that failed to initialise. + backend: Backend, + /// Underlying FFmpeg error. + source: ffmpeg_next::Error, + }, + + /// Auto-probe exhausted every backend in the platform's order. Empty + /// `attempts` means the platform has no hardware backends listed in + /// [`crate::Backend`] for the current `target_os` — callers must + /// fall back to a software decoder of their choice. + #[error("all hardware backends failed; attempts: {attempts:?}")] + AllBackendsFailed { + /// Per-backend errors collected during probing, in the order tried. + attempts: Vec<(Backend, Box)>, + }, +} diff --git a/src/ffi.rs b/src/ffi.rs new file mode 100644 index 0000000..04aa50f --- /dev/null +++ b/src/ffi.rs @@ -0,0 +1,272 @@ +//! FFI shims used by the decoder. Kept in one place so the unsafe surface is +//! easy to audit. +//! +//! All reads of `AVPixelFormat` / `AVHWDeviceType` values returned by FFmpeg +//! at runtime go through `ptr::read::` after a pointer cast, never +//! through the bindgen-generated Rust enum. The enums are `#[repr(i32)]` +//! and constructing them from a value not in the listed discriminants is +//! undefined behavior — exactly the situation header/library skew creates. +//! See the doc comments on individual functions for what is read as raw +//! integer vs. constructed from a known constant. + +use std::ptr; + +use ffmpeg_next::ffi::{ + avcodec_get_hw_config, AVCodec, AVCodecContext, AVHWDeviceType, AVPixelFormat, + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, +}; + +/// State pointed to by `AVCodecContext::opaque` so [`get_hw_format`] can pick +/// the correct hardware pixel format without globals. One instance per +/// decoder; freed by [`crate::VideoDecoder`] after the codec context is +/// dropped. +/// +/// `wanted` is set from a hardcoded `AVPixelFormat` constant in our bindings +/// (via `Backend::hw_pixel_format`), so it is always a valid enum value. We +/// also store its raw `i32` so the callback can compare against the offered +/// list without going through enum reads. +#[repr(C)] +pub(crate) struct CallbackState { + /// Hardware pixel format we want the decoder to produce. Constructed + /// from a known constant; safe to use as the callback's return value. + pub(crate) wanted: AVPixelFormat, + /// Same value as `wanted` cast to `i32`, cached so the callback's + /// pix_fmts walk doesn't have to convert per iteration. + pub(crate) wanted_int: i32, +} + +/// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of +/// pixel formats the codec is willing to output for the current stream. +/// +/// The offered list is walked as `*const i32` (cast from `*const AVPixelFormat`) +/// to avoid constructing the bindgen enum from values that may not be in our +/// build's discriminant set. The return value is either `wanted` (a known +/// constant) or `AV_PIX_FMT_NONE` (also a known constant) — both safe to +/// produce as `AVPixelFormat`. +pub(crate) unsafe extern "C" fn get_hw_format( + ctx: *mut AVCodecContext, + pix_fmts: *const AVPixelFormat, +) -> AVPixelFormat { + debug_assert!(!ctx.is_null()); + debug_assert!(!pix_fmts.is_null()); + + // SAFETY: opaque was set by `try_open` to a valid `Box` + // pointer that outlives the codec context (we only free it after the + // codec context's drop runs). When opaque is null we treat the call as + // strict — a stray invocation cannot silently downgrade. + let state = unsafe { (*ctx).opaque as *const CallbackState }; + let (wanted, wanted_int) = if state.is_null() { + ( + AVPixelFormat::AV_PIX_FMT_NONE, + AVPixelFormat::AV_PIX_FMT_NONE as i32, + ) + } else { + unsafe { ((*state).wanted, (*state).wanted_int) } + }; + + // Walk the offered list as i32. The pointer cast is sound because + // `AVPixelFormat` is `#[repr(i32)]` (same size and alignment as i32). + // Reading as i32 cannot be UB regardless of the value FFmpeg wrote. + let mut p = pix_fmts as *const i32; + let none_int = AVPixelFormat::AV_PIX_FMT_NONE as i32; + loop { + // SAFETY: FFmpeg guarantees the list is terminated by AV_PIX_FMT_NONE. + // We bail at the sentinel; reads up to and including it are in-bounds. + let v = unsafe { ptr::read(p) }; + if v == none_int { + return AVPixelFormat::AV_PIX_FMT_NONE; + } + if v == wanted_int { + return wanted; + } + p = unsafe { p.add(1) }; + } +} + +/// Walk the codec's `AVCodecHWConfig` table and return whether the codec +/// advertises support for `device_type` **with** `wanted_pix_fmt` via the +/// `HW_DEVICE_CTX` setup method. +/// +/// FFmpeg's HW config table is keyed per (device_type, pix_fmt) pair: a +/// codec can advertise the same device with several different hardware +/// pixel formats (e.g. VAAPI codecs that offer both `AV_PIX_FMT_VAAPI` +/// and `AV_PIX_FMT_DRM_PRIME`). Matching only on `device_type` would let +/// us proceed to install a strict `get_format` callback for a format the +/// codec never advertises, and the failure would surface deep inside the +/// probe / decode path instead of up front. Requiring the codec to +/// advertise the **exact** pix_fmt our `Backend` uses keeps the strict +/// `get_format` honest and gives `open_with` a clean rejection signal. +/// +/// All reads from the FFmpeg-supplied `AVCodecHWConfig` are performed as +/// raw integers via `addr_of!` + `ptr::read::` to avoid copying or +/// interpreting enum-typed fields whose runtime values might not match +/// our build's discriminant set. +pub(crate) fn codec_supports_hwaccel( + codec: *const AVCodec, + device_type: AVHWDeviceType, + wanted_pix_fmt: i32, +) -> bool { + debug_assert!(!codec.is_null()); + let device_type_int = device_type as i32; + let mut i = 0; + loop { + // SAFETY: `avcodec_get_hw_config` returns null past the end; we stop then. + let cfg = unsafe { avcodec_get_hw_config(codec, i) }; + if cfg.is_null() { + return false; + } + // Read each field as raw integer rather than copying the whole struct + // (which would interpret `pix_fmt` and `device_type` as their enum types). + // SAFETY: `cfg` is non-null and points to a valid `AVCodecHWConfig` for + // the lifetime of the call; `addr_of!` projects to a sized field; the + // `*const i32` cast is sound because `methods` is `c_int` (i32), + // `device_type` is `AVHWDeviceType` (`#[repr(u32)]`, but FFmpeg's + // assigned values fit in i32 and the runtime layout is i32-sized), + // and `pix_fmt` is `AVPixelFormat` (`#[repr(i32)]`). + let methods: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).methods)) }; + let cfg_device_type_int: i32 = + unsafe { ptr::read(ptr::addr_of!((*cfg).device_type) as *const i32) }; + let cfg_pix_fmt_int: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).pix_fmt) as *const i32) }; + + if methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0 + && cfg_device_type_int == device_type_int + && cfg_pix_fmt_int == wanted_pix_fmt + { + return true; + } + i += 1; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // The callback derefs `(*ctx).opaque`, so we need a real-looking + // AVCodecContext. We construct a zeroed one (the callback only reads opaque). + struct FakeCtx(*mut AVCodecContext); + impl FakeCtx { + fn new(state: *mut CallbackState) -> Self { + let boxed: Box = unsafe { Box::new(std::mem::zeroed()) }; + let raw = Box::into_raw(boxed); + unsafe { (*raw).opaque = state.cast() }; + Self(raw) + } + } + impl Drop for FakeCtx { + fn drop(&mut self) { + unsafe { drop(Box::from_raw(self.0)) }; + } + } + + fn make_state(wanted: AVPixelFormat) -> CallbackState { + CallbackState { + wanted, + wanted_int: wanted as i32, + } + } + + fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat { + // Build the offered list as raw i32, terminated by AV_PIX_FMT_NONE. + offered.push(AVPixelFormat::AV_PIX_FMT_NONE as i32); + let ctx = FakeCtx::new(state as *const _ as *mut _); + // SAFETY: we cast the i32 buffer pointer to *const AVPixelFormat + // because that's the function's declared signature. The callback only + // ever reads through *const i32 internally, so this transit through + // *const AVPixelFormat is purely a type system formality. + unsafe { get_hw_format(ctx.0, offered.as_ptr() as *const AVPixelFormat) } + } + + #[test] + fn returns_wanted_hw_format_when_offered() { + let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); + let got = run( + &state, + vec![ + AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32, + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + ], + ); + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); + } + + #[test] + fn returns_none_when_wanted_absent() { + let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); + let got = run( + &state, + vec![ + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + AVPixelFormat::AV_PIX_FMT_YUV420P as i32, + ], + ); + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); + } + + #[test] + fn null_opaque_is_treated_as_strict() { + let boxed: Box = unsafe { Box::new(std::mem::zeroed()) }; + let ctx_raw = Box::into_raw(boxed); + unsafe { (*ctx_raw).opaque = ptr::null_mut() }; + let offered = [ + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + AVPixelFormat::AV_PIX_FMT_NONE as i32, + ]; + let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr() as *const AVPixelFormat) }; + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); + unsafe { drop(Box::from_raw(ctx_raw)) }; + } + + #[test] + fn unknown_offered_value_is_skipped_without_ub() { + // Simulate a header-skewed FFmpeg that offers a pixel-format value we + // don't have a binding constant for (e.g. some future format). The + // callback walks the list as i32 — no enum is constructed from that + // value, so this read is sound. + let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX); + let got = run( + &state, + vec![ + 99_999_i32, // imaginary unknown + AVPixelFormat::AV_PIX_FMT_NV12 as i32, + ], + ); + assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE); + } + + /// `codec_supports_hwaccel` must reject a (device_type, pix_fmt) pair + /// that the codec does not advertise — even if the device alone is + /// listed. Without this check, the strict `get_format` callback would + /// be wired up for a HW pix_fmt the codec never offers and the failure + /// would surface deep inside the probe / decode path instead of at + /// `open_with` / probe-build time. + /// + /// macOS-only: the test relies on FFmpeg's H.264 decoder advertising + /// `(AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX)`, which is + /// only present in builds with VideoToolbox compiled in. + #[cfg(target_os = "macos")] + #[test] + fn codec_supports_hwaccel_requires_matching_pix_fmt() { + use ffmpeg_next::ffi::{avcodec_find_decoder, AVCodecID, AVHWDeviceType, AVPixelFormat}; + + // SAFETY: AV_CODEC_ID_H264 is a known constant in our build's + // `AVCodecID` discriminant set; constructing it does not invoke the + // bindgen-enum UB we worry about for runtime-derived ids. + let codec_ptr = unsafe { avcodec_find_decoder(AVCodecID::AV_CODEC_ID_H264) }; + assert!(!codec_ptr.is_null(), "H.264 decoder must be present"); + + let device = AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX; + let videotoolbox = AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32; + let nv12 = AVPixelFormat::AV_PIX_FMT_NV12 as i32; + + assert!( + codec_supports_hwaccel(codec_ptr, device, videotoolbox), + "VideoToolbox + AV_PIX_FMT_VIDEOTOOLBOX must be advertised by FFmpeg's H.264 decoder" + ); + assert!( + !codec_supports_hwaccel(codec_ptr, device, nv12), + "VideoToolbox + AV_PIX_FMT_NV12 must NOT match the codec's HW config — \ + the strict get_format would have no offered HW format to return" + ); + } +} diff --git a/src/frame.rs b/src/frame.rs new file mode 100644 index 0000000..4642184 --- /dev/null +++ b/src/frame.rs @@ -0,0 +1,630 @@ +//! CPU-side decoded video frame. +//! +//! Wraps `ffmpeg_next::frame::Video`. All accessors read from raw `AVFrame` +//! fields (`format`, `linesize`, `data`, `width`, `height`, `pts`) directly +//! and never go through ffmpeg-next's `Video::format()` / `plane_height()` +//! / `plane_width()` / `data()` — those construct `AVPixelFormat` from the +//! frame's raw `format` integer via `transmute`, which is undefined behavior +//! when the value isn't in the build's bindgen-generated discriminant set +//! (the exact failure mode this crate is designed to survive). +//! +//! Per-row sizes for [`Frame::row`] / [`Frame::rows`] are computed from +//! hardcoded chroma-subsampling and bit-depth tables keyed on the safe +//! `pix_fmt()` integer, covering only the formats `hwdecode` produces (the +//! NV* and P0xx/P2xx/P4xx families after `av_hwframe_transfer_data`). For +//! any other format, the row accessors return `None` rather than guessing +//! at a slice length. +//! +//! Why per-row, not whole-plane: FFmpeg allocates each row at +//! `linesize[plane]` ([`Frame::stride`]) bytes for SIMD alignment, but +//! hardware transfer paths only initialize the first +//! [`Frame::row_bytes`]`(plane)` of every row. Exposing a stride-inclusive +//! `&[u8]` over an entire plane would let safe code observe those +//! uninitialized padding bytes, which violates `slice::from_raw_parts`. +//! Per-row slices are tightly clipped to the visible byte width so the +//! safe API never hands out an uninitialized byte. Callers that need a +//! single base pointer (e.g. SIMD pixel converters keyed off stride) can +//! reach for [`Frame::as_ptr`] and consume `stride * plane_h` bytes +//! themselves under their own `unsafe` contract. +//! +//! Compare formats against integer constants in [`crate::pix_fmt`]. + +use std::slice; + +use ffmpeg_next::frame; + +use crate::{ + error::{Error, Result}, + pix_fmt, +}; + +/// CPU-side decoded video frame produced by [`crate::VideoDecoder`]. +pub struct Frame { + inner: frame::Video, +} + +impl Frame { + /// Construct an empty frame, suitable as the destination passed to + /// [`crate::VideoDecoder::receive_frame`]. + /// + /// Returns `Err(Error::Ffmpeg(Other { errno: ENOMEM }))` when the + /// underlying `av_frame_alloc()` returns NULL — `ffmpeg_next` does not + /// surface that failure, so we check it here rather than letting a null + /// pointer flow into the safe accessors and become UB on first read. + pub fn empty() -> Result { + // SAFETY: as_ptr() is safe; we just inspect the value (potentially null). + let inner = frame::Video::empty(); + if unsafe { inner.as_ptr() }.is_null() { + return Err(Error::Ffmpeg(ffmpeg_next::Error::Other { + errno: libc::ENOMEM, + })); + } + Ok(Self { inner }) + } + + /// Width in pixels. + pub fn width(&self) -> u32 { + // SAFETY: AVFrame.width is c_int; safe to read regardless of value. + unsafe { (*self.inner.as_ptr()).width as u32 } + } + + /// Height in pixels. + pub fn height(&self) -> u32 { + // SAFETY: AVFrame.height is c_int. + unsafe { (*self.inner.as_ptr()).height as u32 } + } + + /// Pixel format, returned as the raw `i32` value FFmpeg wrote to + /// `AVFrame.format`. Sound regardless of the linked FFmpeg version — + /// no `AVPixelFormat` enum is constructed. + /// + /// Compare against constants in [`crate::pix_fmt`]. + pub fn pix_fmt(&self) -> i32 { + // SAFETY: AVFrame.format is bound as c_int. + unsafe { (*self.inner.as_ptr()).format } + } + + /// Presentation timestamp in stream time base, or `None` for + /// `AV_NOPTS_VALUE`. + pub fn pts(&self) -> Option { + // ffmpeg-next's Frame::pts performs no enum conversion; safe to use. + self.inner.pts() + } + + /// Number of populated planes (1 for packed formats, 2 for NV12/P010, + /// 3 for planar YUV, etc.). Computed by scanning `linesize` for the + /// first zero entry — no enum reads. + pub fn planes(&self) -> usize { + // SAFETY: AVFrame.linesize is `[c_int; 8]`; reads are sound. + unsafe { + let linesize = &(*self.inner.as_ptr()).linesize; + for (i, ls) in linesize.iter().enumerate() { + if *ls == 0 { + return i; + } + } + linesize.len() + } + } + + /// Bytes per row for `plane`. Reads `AVFrame.linesize[plane]` directly. + /// Panics if `plane >= planes()` or the linesize is non-positive (FFmpeg + /// allows negative linesize for vertically-flipped formats; this crate + /// does not surface those — call [`Self::data`] first to test safely). + pub fn stride(&self, plane: usize) -> usize { + let n = self.planes(); + assert!( + plane < n, + "stride: plane {plane} out of bounds (planes={n})" + ); + // SAFETY: bounds-checked above; linesize is `[c_int; 8]`. + let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] }; + assert!( + linesize > 0, + "stride: non-positive linesize {linesize} for plane {plane} \ + (negative linesize means vertically-flipped — not supported)" + ); + linesize as usize + } + + /// Visible byte width of `plane` — the number of initialized bytes at + /// the start of every row in that plane. + /// + /// Distinct from [`Self::stride`], which returns the FFmpeg `linesize`. + /// `linesize` is `>= row_bytes` and may include trailing alignment + /// padding bytes that FFmpeg's hardware transfer paths do not + /// initialize. `row_bytes` is what `slice::from_raw_parts` can safely + /// see. + /// + /// Returns `None` when the format is not in the supported HW-output set + /// (see crate `pix_fmt`) or the plane is out of range. + pub fn row_bytes(&self, plane: usize) -> Option { + if plane >= self.planes() { + return None; + } + plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize) + } + + /// Pixel data for one row of `plane`, tightly clipped to the visible + /// byte width ([`Self::row_bytes`]). + /// + /// Excludes the trailing alignment padding that [`Self::stride`] + /// includes — those bytes are not guaranteed to be initialized by + /// FFmpeg's hardware transfer paths and must not be exposed through a + /// safe `&[u8]`. + /// + /// Returns `None` for any of the following — never panics: + /// - The frame's pixel format is not one of the supported hardware- + /// output formats listed in [`crate::pix_fmt`]. + /// - The plane index is out of range. + /// - `y` is past the plane's row count. + /// - `AVFrame.linesize[plane]` is `<= 0` or `AVFrame.height` is `<= 0`. + /// - The plane's data pointer is null. + /// - The plane size would overflow `isize::MAX`. + pub fn row(&self, plane: usize, y: usize) -> Option<&[u8]> { + let info = self.plane_info(plane)?; + if y >= info.plane_h { + return None; + } + // y < plane_h and plane_h * stride ≤ isize::MAX (verified in plane_info), + // so y * stride is bounded by (plane_h - 1) * stride ≤ isize::MAX. + let offset = y * info.stride; + // SAFETY: + // - `info.plane_ptr` is non-null (verified in plane_info). + // - `offset + row_bytes ≤ plane_h * stride`, which is the size of the + // FFmpeg allocation for this plane. + // - Bytes 0..row_bytes of every row are written by FFmpeg's HW + // transfer; the slice is fully initialized. + // - `row_bytes ≤ stride ≤ isize::MAX` per plane_info. + unsafe { + let row_ptr = info.plane_ptr.add(offset); + Some(slice::from_raw_parts(row_ptr, info.row_bytes)) + } + } + + /// Iterator over every row of `plane`. Each yielded slice has length + /// [`Self::row_bytes`]`(plane)` — never includes the trailing alignment + /// padding that lives within [`Self::stride`]. + /// + /// Returns `None` under the same conditions as [`Self::row`]. + pub fn rows(&self, plane: usize) -> Option + '_> { + let info = self.plane_info(plane)?; + Some((0..info.plane_h).map(move |y| { + // Same bounds argument as `row()`. + let offset = y * info.stride; + // SAFETY: see `row()` — the same invariants hold here, and the + // iterator's lifetime is tied to `&self` so the pointer remains + // valid for every yielded slice. + unsafe { slice::from_raw_parts(info.plane_ptr.add(offset), info.row_bytes) } + })) + } + + /// Raw base pointer to `plane`'s allocation, or `None` if the plane + /// fails the same layout validation [`Self::row`] applies. + /// + /// Returns `None` whenever any of the following is true: + /// - The plane index is out of range (`plane >= planes()`). + /// - The frame's pixel format is not in the supported HW-output set. + /// - `linesize[plane] <= 0`. **In particular, FFmpeg permits negative + /// linesizes for vertically-flipped frames with `data[n]` pointing + /// at the *end* of the image. Returning that pointer with the + /// advertised "valid for `stride * plane_h` bytes forward" contract + /// would let a downstream converter walk past the buffer.** This + /// accessor refuses the layout instead of handing back a pointer the + /// caller cannot safely interpret as forward-addressable. + /// - `height <= 0`, the data pointer is null, `row_bytes > stride`, or + /// the total plane size would overflow `isize::MAX`. + /// + /// On `Some(ptr)` the pointer is valid for + /// `stride(plane) * plane_height` *forward-addressable* bytes, and + /// only the first [`Self::row_bytes`]`(plane)` bytes of each row are + /// guaranteed to be initialized. The trailing per-row alignment padding + /// is uninitialized; callers performing wide SIMD loads that read past + /// `row_bytes` must mask the result and never surface those bytes + /// through a safe `&[u8]`. + /// + /// This accessor exists for downstream pixel-format converters + /// (`colconv`) that work in `(ptr, stride, width, height)` quadruples; + /// safe code should prefer [`Self::row`] / [`Self::rows`]. + pub fn as_ptr(&self, plane: usize) -> Option<*const u8> { + // Share the full plane-layout validation so the unsafe escape hatch + // never escapes a layout that `row()` / `rows()` reject. Returning a + // pointer for a negative-stride frame (FFmpeg's vertical-flip + // convention, where `data[n]` points at the *end* of the image) + // would invite forward-walking out-of-bounds reads from a caller + // that trusts the documented "valid for stride × plane_h bytes" + // contract. + self.plane_info(plane).map(|info| info.plane_ptr) + } + + /// Read every per-plane field needed by the row accessors with the + /// safety preconditions enforced once. + fn plane_info(&self, plane: usize) -> Option { + if plane >= self.planes() { + return None; + } + // SAFETY: bounds-checked plane index; linesize/height/data are raw + // c_int / pointer reads that cannot themselves be UB. + let (stride_int, height_int, plane_ptr) = unsafe { + let raw = self.inner.as_ptr(); + ((*raw).linesize[plane], (*raw).height, (*raw).data[plane]) + }; + if stride_int <= 0 || height_int <= 0 || plane_ptr.is_null() { + return None; + } + let stride = stride_int as usize; + let plane_h = plane_height_for(self.pix_fmt(), plane, height_int as usize)?; + let row_bytes = plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize)?; + if row_bytes > stride { + return None; + } + // Bound the entire plane allocation to isize::MAX so any byte offset + // computed as `y * stride` (y < plane_h) stays representable, satisfying + // the safety contract of `pointer::add` and `slice::from_raw_parts`. + let plane_size = stride.checked_mul(plane_h)?; + if plane_size > isize::MAX as usize { + return None; + } + Some(PlaneInfo { + plane_ptr, + stride, + plane_h, + row_bytes, + }) + } + + /// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code. + pub(crate) fn as_inner_mut(&mut self) -> &mut frame::Video { + &mut self.inner + } +} + +#[derive(Clone, Copy)] +struct PlaneInfo { + plane_ptr: *const u8, + stride: usize, + plane_h: usize, + row_bytes: usize, +} + +// `Default` intentionally omitted: constructing a frame can fail (OOM +// in `av_frame_alloc`), and a panicking `default()` would defeat the +// safety stance of [`Frame::empty`]. Use `Frame::empty()?` directly. + +/// Visible byte width of `plane`'s rows for a frame of `frame_width` and +/// the given pixel format. `None` for formats not in the supported HW- +/// output set. +/// +/// Distinct from `linesize` (FFmpeg's per-row stride, which may include +/// alignment padding). HW transfer paths only initialize bytes +/// `0..plane_row_bytes_for(...)` of each row; everything from there to +/// `stride` is uninitialized padding and must not be exposed via +/// `slice::from_raw_parts`. +fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Option { + match pix_fmt_int { + // 8-bit semi-planar 4:2:0 / 4:2:2: Y at full width (1 byte/sample); + // UV interleaved at horizontally-subsampled chroma with `ceil(W/2)` + // U+V pairs at 2 bytes per pair. For even W the chroma row equals + // `W` bytes (the simple case); for odd W it must round *up* to the + // next even byte so the trailing chroma sample is not silently + // dropped on width = 2k+1 frames. + pix_fmt::NV12 | pix_fmt::NV21 | pix_fmt::NV16 => match plane { + 0 => Some(frame_width), + 1 => Some(frame_width.div_ceil(2).checked_mul(2)?), + _ => None, + }, + // 8-bit 4:4:4 semi-planar: chroma at full horizontal resolution, + // 2 bytes per pixel (1 byte U + 1 byte V) — no rounding required. + pix_fmt::NV24 => match plane { + 0 => Some(frame_width), + 1 => Some(frame_width.checked_mul(2)?), + _ => None, + }, + // 10/12/16-bit semi-planar 4:2:0 / 4:2:2: Y is 2 bytes/sample + // (high-bit-depth packed in 16-bit). UV interleaved at horizontally- + // subsampled chroma with `ceil(W/2)` U+V pairs at 4 bytes per pair + // (2 bytes U + 2 bytes V). Same odd-width rounding as the 8-bit + // chroma path, scaled by 2 bytes per sample. + pix_fmt::P010LE + | pix_fmt::P010BE + | pix_fmt::P012LE + | pix_fmt::P016LE + | pix_fmt::P210LE + | pix_fmt::P212LE + | pix_fmt::P216LE => match plane { + 0 => Some(frame_width.checked_mul(2)?), + 1 => Some(frame_width.div_ceil(2).checked_mul(4)?), + _ => None, + }, + // 10/12/16-bit 4:4:4 semi-planar: Y is 2 bytes/sample; UV at full + // horizontal resolution with 4 bytes per pixel (2 bytes U + 2 bytes V). + pix_fmt::P410LE | pix_fmt::P412LE | pix_fmt::P416LE => match plane { + 0 => Some(frame_width.checked_mul(2)?), + 1 => Some(frame_width.checked_mul(4)?), + _ => None, + }, + _ => None, + } +} + +/// Number of rows in `plane` for a frame of `frame_height` and the given +/// pixel format. `None` for formats not in the supported HW-output set. +/// +/// Crate-internal so the decoder's probe-replay accountant can compute +/// per-frame byte sizes without re-implementing the chroma-subsampling +/// table. +pub(crate) fn plane_height_for( + pix_fmt_int: i32, + plane: usize, + frame_height: usize, +) -> Option { + match pix_fmt_int { + // 4:2:0 semi-planar — Y full height, chroma half height. + pix_fmt::NV12 + | pix_fmt::NV21 + | pix_fmt::P010LE + | pix_fmt::P010BE + | pix_fmt::P012LE + | pix_fmt::P016LE => match plane { + 0 => Some(frame_height), + 1 => Some(frame_height.div_ceil(2)), + _ => None, + }, + // 4:2:2 / 4:4:4 semi-planar — both planes full height. + pix_fmt::NV16 + | pix_fmt::NV24 + | pix_fmt::P210LE + | pix_fmt::P212LE + | pix_fmt::P216LE + | pix_fmt::P410LE + | pix_fmt::P412LE + | pix_fmt::P416LE => match plane { + 0 | 1 => Some(frame_height), + _ => None, + }, + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_frame_has_zero_dimensions_and_no_pts() { + let f = Frame::empty().expect("alloc"); + assert_eq!(f.width(), 0); + assert_eq!(f.height(), 0); + assert_eq!(f.pts(), None); + // AVFrame.format defaults to -1 (AV_PIX_FMT_NONE) for an empty frame. + assert_eq!(f.pix_fmt(), -1); + // No active planes for an empty frame (all linesize entries are 0). + assert_eq!(f.planes(), 0); + } + + #[test] + fn row_returns_none_for_unknown_format() { + let f = Frame::empty().expect("alloc"); + // pix_fmt is NONE (-1), not in the supported set. + assert!(f.row(0, 0).is_none()); + assert!(f.rows(0).is_none()); + assert!(f.row_bytes(0).is_none()); + } + + /// Synthesize a frame with a negative linesize (FFmpeg's vertical-flip + /// convention) and assert the row accessors refuse to construct a slice. + /// Without the linesize > 0 check, the negative `i32 as usize` would + /// produce a huge positive length and `from_raw_parts` would be UB. + /// + /// `as_ptr` shares the same validation — handing back the data pointer + /// for a negative-stride frame would let a downstream converter + /// following the "valid for stride × plane_h bytes forward" contract + /// walk past the buffer. + #[test] + fn row_returns_none_for_negative_linesize() { + let mut f = Frame::empty().expect("alloc"); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).format = pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = 1080; + (*raw).linesize[0] = -1920; // vertically-flipped + (*raw).linesize[1] = -1920; + // data pointers stay null; the accessors would also reject on null, + // but should bail earlier on the linesize sign. + } + assert!(f.row(0, 0).is_none()); + assert!(f.row(1, 0).is_none()); + assert!(f.rows(0).is_none()); + assert!( + f.as_ptr(0).is_none(), + "as_ptr must share row()/rows() validation — a negative-stride \ + frame must not leak a forward-readable plane pointer" + ); + assert!(f.as_ptr(1).is_none()); + } + + #[test] + fn row_returns_none_for_non_positive_height() { + let mut f = Frame::empty().expect("alloc"); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).format = pix_fmt::NV12; + (*raw).width = 1920; + (*raw).height = 0; + (*raw).linesize[0] = 1920; + (*raw).linesize[1] = 1920; + } + assert!(f.row(0, 0).is_none()); + } + + /// Synthesize a frame backed by a manually-allocated buffer with stride + /// strictly larger than visible row bytes (the exact case where + /// FFmpeg's HW transfer leaves trailing padding uninitialized) and + /// confirm the safe row accessor returns slices clipped to the visible + /// width. + #[test] + fn row_clips_to_visible_width_not_stride() { + use std::alloc::{alloc, dealloc, Layout}; + let width = 64usize; + let height = 4usize; + // Stride > width: 16 bytes of padding per row in the Y plane. + let stride = 80usize; + let plane_size = stride * height; + // Allocate ourselves so we can fully control initialization. Fill + // bytes 0..width with 0xAA per row (the "valid pixel" range) and + // bytes width..stride with 0xFF (the simulated alignment padding — + // FFmpeg would leave these uninitialized; we set them to a sentinel + // that the test can detect if the safe slice ever exposes them). + let layout = Layout::from_size_align(plane_size, 32).unwrap(); + let buf = unsafe { alloc(layout) }; + assert!(!buf.is_null()); + for y in 0..height { + let row = unsafe { buf.add(y * stride) }; + for x in 0..width { + unsafe { *row.add(x) = 0xAA }; + } + for x in width..stride { + unsafe { *row.add(x) = 0xFF }; + } + } + + let mut f = Frame::empty().expect("alloc"); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).format = pix_fmt::NV12; + (*raw).width = width as i32; + (*raw).height = height as i32; + (*raw).linesize[0] = stride as i32; + // linesize[1] = 0 keeps planes() at 1 so the test stays focused on + // plane 0 without owning a second allocation. + (*raw).data[0] = buf; + } + + assert_eq!(f.row_bytes(0), Some(width)); + assert_eq!(f.stride(0), stride); + let row0 = f.row(0, 0).expect("row 0"); + assert_eq!( + row0.len(), + width, + "safe row must be clipped to visible width" + ); + assert!( + row0.iter().all(|&b| b == 0xAA), + "row must not include padding sentinel 0xFF" + ); + + let collected: Vec<&[u8]> = f.rows(0).expect("rows iterator").collect(); + assert_eq!(collected.len(), height); + for r in &collected { + assert_eq!(r.len(), width); + assert!(r.iter().all(|&b| b == 0xAA)); + } + + // `as_ptr` accepts the valid layout and returns the same base pointer + // FFmpeg wrote into `data[0]`, so SIMD callers can reach the plane + // through the documented unsafe contract. + assert_eq!( + f.as_ptr(0), + Some(buf as *const u8), + "as_ptr must surface the plane base for a valid forward-stride frame" + ); + + // Out-of-range row index returns None instead of panicking. + assert!(f.row(0, height).is_none()); + + // Detach the buffer before drop so AVFrame's own free path doesn't + // touch our manual allocation. + unsafe { + (*f.inner.as_mut_ptr()).data[0] = std::ptr::null_mut(); + dealloc(buf, layout); + } + } + + #[test] + #[should_panic(expected = "non-positive linesize")] + fn stride_panics_on_negative_linesize() { + let mut f = Frame::empty().expect("alloc"); + unsafe { + let raw = f.inner.as_mut_ptr(); + (*raw).linesize[0] = -1920; + } + let _ = f.stride(0); + } + + #[test] + fn frame_is_send() { + fn check() {} + check::(); + } + + #[test] + fn plane_height_table_covers_supported_formats() { + // Spot-check the chroma subsampling table. + assert_eq!(plane_height_for(pix_fmt::NV12, 0, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1080), Some(540)); + assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1081), Some(541)); + assert_eq!(plane_height_for(pix_fmt::P010LE, 1, 1080), Some(540)); + assert_eq!(plane_height_for(pix_fmt::NV16, 1, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::NV24, 1, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::P416LE, 1, 1080), Some(1080)); + assert_eq!(plane_height_for(pix_fmt::NONE, 0, 1080), None); + assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None); + } + + /// 4:2:0 / 4:2:2 chroma planes carry `ceil(W/2)` U+V pairs per row. + /// For odd `W`, dropping the round-up silently truncates the last chroma + /// sample — and the safe row slice would expose a buffer one byte (8-bit) + /// or two bytes (high-bit-depth) shorter than the data FFmpeg actually + /// wrote. Y planes and 4:4:4 chroma planes are unaffected because their + /// row count is just `W` or a fixed multiple of `W`. + #[test] + fn plane_row_bytes_rounds_up_chroma_for_odd_widths() { + // 8-bit subsampled chroma — odd W gains one byte (the missing sample + // pair). + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1921), Some(1922)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1921), Some(1922)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1921), Some(1922)); + // High-bit-depth subsampled chroma — odd W gains two bytes. + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010BE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P012LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P016LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P212LE, 1, 1921), Some(3844)); + assert_eq!(plane_row_bytes_for(pix_fmt::P216LE, 1, 1921), Some(3844)); + // Y planes always at full width regardless of subsampling. + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1921), Some(1921)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1921), Some(3842)); + // 4:4:4 chroma is at full horizontal resolution — no rounding. + assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1921), Some(3842)); + assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1921), Some(7684)); + // Even widths must still match the original (pre-fix) values so the + // change is purely additive on the dominant code path. + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840)); + } + + #[test] + fn plane_row_bytes_table_covers_supported_formats() { + // 8-bit 4:2:0 / 4:2:2 — both planes at width. + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1920), Some(1920)); + // 8-bit 4:4:4 — chroma plane is 2 * width. + assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 0, 1920), Some(1920)); + assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1920), Some(3840)); + // 10/12/16-bit 4:2:0 / 4:2:2 — both planes at 2 * width. + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1920), Some(3840)); + assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840)); + assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1920), Some(3840)); + // 10/12/16-bit 4:4:4 — Y is 2 * width, chroma is 4 * width. + assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 0, 1920), Some(3840)); + assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1920), Some(7680)); + assert_eq!(plane_row_bytes_for(pix_fmt::P416LE, 1, 1920), Some(7680)); + // Unsupported / out-of-range. + assert_eq!(plane_row_bytes_for(pix_fmt::NONE, 0, 1920), None); + assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 2, 1920), None); + } +} diff --git a/src/lib.rs b/src/lib.rs index 0a58390..3654016 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,35 @@ -//! A template for creating Rust open-source repo on GitHub -#![cfg_attr(not(feature = "std"), no_std)] +//! Cross-platform **hardware** video decoder built on top of `ffmpeg-next`. +//! +//! [`VideoDecoder`] mirrors the surface of `ffmpeg::decoder::Video` +//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and auto-probes the +//! host's hardware backends (VideoToolbox / VAAPI / NVDEC / D3D11VA). +//! There is **no software fallback inside this crate** — if no hardware +//! backend can decode the stream, [`Error::AllBackendsFailed`] surfaces +//! either from [`VideoDecoder::open`] (when no backend even opens) or +//! from [`VideoDecoder::receive_frame`] / [`VideoDecoder::send_packet`] / +//! [`VideoDecoder::send_eof`] (when the initially-opened backend or any +//! later candidate fails at decode time and the probe order is +//! exhausted). On single-backend platforms (e.g. macOS, where the order +//! is `[VideoToolbox]`), only the runtime path can return it. The +//! caller picks how to fall back to a software decoder of their choice +//! (e.g. by opening an `ffmpeg::decoder::Video` directly). +//! +//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side +//! and downloaded via `av_hwframe_transfer_data` (NV12 for 8-bit input, +//! P010 for 10-bit). Pixel-format conversion is intentionally out of +//! scope; downstream code handles that (e.g. via `colconv`). #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, allow(unused_attributes))] #![deny(missing_docs)] -#[cfg(all(not(feature = "std"), feature = "alloc"))] -extern crate alloc as std; +mod backend; +mod decoder; +mod error; +mod ffi; +mod frame; +pub mod pix_fmt; -#[cfg(feature = "std")] -extern crate std; +pub use backend::Backend; +pub use decoder::VideoDecoder; +pub use error::{Error, Result}; +pub use frame::Frame; diff --git a/src/pix_fmt.rs b/src/pix_fmt.rs new file mode 100644 index 0000000..f3c594e --- /dev/null +++ b/src/pix_fmt.rs @@ -0,0 +1,113 @@ +//! Stable `i32` constants for the pixel formats produced by `hwdecode`'s +//! hardware decoders after `av_hwframe_transfer_data`. +//! +//! `Frame::pix_fmt()` returns the raw integer FFmpeg wrote to `AVFrame.format` +//! (as a plain `i32` to avoid the enum-construction UB that an unvalidated +//! cast would invoke). This module names the constants relevant to dispatch +//! after a successful hardware decode. +//! +//! Because `hwdecode` is hardware-only, the formats listed here cover what +//! the supported HW backends actually produce — the **NV** family (semi- +//! planar 8-bit) and the **P0xx / P2xx / P4xx** family (semi-planar 10/12/16 +//! bit). VideoToolbox, VAAPI, NVDEC, and D3D11VA all download into one of +//! these. +//! +//! Software-decoder output formats (`YUV420P`, `YUV422P`, `RGB24`, etc.) are +//! intentionally **not** listed: callers handle software fallback outside +//! this crate, and dispatch tables for those formats belong with the SW +//! pipeline. +//! +//! For values not listed here, write `AVPixelFormat::AV_PIX_FMT_X as i32` +//! directly — that's exactly the cast we use to define these constants. +//! +//! ```ignore +//! use hwdecode::{pix_fmt, Frame}; +//! match frame.pix_fmt() { +//! pix_fmt::NV12 => /* 8-bit 4:2:0 → colconv::frame::Nv12Frame */, +//! pix_fmt::P010LE => /* 10-bit 4:2:0 → colconv::frame::PnFrame<10> */, +//! other => unimplemented!("pix_fmt {other}"), +//! } +//! ``` + +use ffmpeg_next::ffi::AVPixelFormat; + +// --- semi-planar YUV (NV*) — 8-bit hardware download outputs ---------------- + +/// 4:2:0, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV12`). The +/// dominant 8-bit HW download format on every supported backend. +pub const NV12: i32 = AVPixelFormat::AV_PIX_FMT_NV12 as i32; +/// 4:2:0, 8-bit, Y plane + interleaved Cr/Cb (`AV_PIX_FMT_NV21`). +pub const NV21: i32 = AVPixelFormat::AV_PIX_FMT_NV21 as i32; +/// 4:2:2, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV16`). +pub const NV16: i32 = AVPixelFormat::AV_PIX_FMT_NV16 as i32; +/// 4:4:4, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV24`). +pub const NV24: i32 = AVPixelFormat::AV_PIX_FMT_NV24 as i32; + +// --- semi-planar YUV (P0xx) — 4:2:0 high-bit-depth HW downloads ------------- + +/// 4:2:0, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P010LE`). The +/// dominant 10-bit HW download format. +pub const P010LE: i32 = AVPixelFormat::AV_PIX_FMT_P010LE as i32; +/// 4:2:0, 10-bit, semi-planar big-endian (`AV_PIX_FMT_P010BE`). +pub const P010BE: i32 = AVPixelFormat::AV_PIX_FMT_P010BE as i32; +/// 4:2:0, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P012LE`). +pub const P012LE: i32 = AVPixelFormat::AV_PIX_FMT_P012LE as i32; +/// 4:2:0, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P016LE`). +pub const P016LE: i32 = AVPixelFormat::AV_PIX_FMT_P016LE as i32; + +// --- semi-planar YUV (P2xx) — 4:2:2 high-bit-depth HW downloads ------------- + +/// 4:2:2, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P210LE`). +pub const P210LE: i32 = AVPixelFormat::AV_PIX_FMT_P210LE as i32; +/// 4:2:2, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P212LE`, FFmpeg 5.0+). +pub const P212LE: i32 = AVPixelFormat::AV_PIX_FMT_P212LE as i32; +/// 4:2:2, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P216LE`). +pub const P216LE: i32 = AVPixelFormat::AV_PIX_FMT_P216LE as i32; + +// --- semi-planar YUV (P4xx) — 4:4:4 high-bit-depth HW downloads ------------- + +/// 4:4:4, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P410LE`). +pub const P410LE: i32 = AVPixelFormat::AV_PIX_FMT_P410LE as i32; +/// 4:4:4, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P412LE`, FFmpeg 5.0+). +pub const P412LE: i32 = AVPixelFormat::AV_PIX_FMT_P412LE as i32; +/// 4:4:4, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P416LE`). +pub const P416LE: i32 = AVPixelFormat::AV_PIX_FMT_P416LE as i32; + +// --- sentinel --------------------------------------------------------------- + +/// Sentinel value FFmpeg writes to `AVFrame.format` for an unset frame +/// (`AV_PIX_FMT_NONE`). [`crate::Frame::empty`] returns this until the frame +/// is filled by a decoder. +pub const NONE: i32 = AVPixelFormat::AV_PIX_FMT_NONE as i32; + +#[cfg(test)] +mod tests { + use super::*; + + /// Regression check: if the underlying `AVPixelFormat` discriminants ever + /// change in `ffmpeg-sys-next`'s bindings, this catches it. + #[test] + fn constants_match_bindings() { + assert_eq!(NV12, AVPixelFormat::AV_PIX_FMT_NV12 as i32); + assert_eq!(P010LE, AVPixelFormat::AV_PIX_FMT_P010LE as i32); + assert_eq!(P416LE, AVPixelFormat::AV_PIX_FMT_P416LE as i32); + assert_eq!(NONE, -1, "AV_PIX_FMT_NONE must be -1 (FFmpeg ABI sentinel)"); + } + + #[test] + fn match_dispatch_compiles() { + fn classify(v: i32) -> &'static str { + match v { + NV12 => "nv12", + NV21 => "nv21", + P010LE => "p010le", + P210LE => "p210le", + P410LE => "p410le", + _ => "other", + } + } + assert_eq!(classify(NV12), "nv12"); + assert_eq!(classify(P010LE), "p010le"); + assert_eq!(classify(NONE), "other"); + } +} diff --git a/tests/decode.rs b/tests/decode.rs new file mode 100644 index 0000000..2431ff1 --- /dev/null +++ b/tests/decode.rs @@ -0,0 +1,78 @@ +//! Integration test: open the auto-probed decoder against a real video file +//! and decode the first 30 frames. Skipped (with a clear message) when no +//! sample is configured. +//! +//! Set `HWDECODE_SAMPLE_VIDEO` to an absolute path to enable. + +use ffmpeg::{format, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::{Frame, VideoDecoder}; + +const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; + +#[test] +fn auto_open_decodes_at_least_one_frame() { + let Some(path) = std::env::var_os(SAMPLE_ENV) else { + eprintln!("skipping: set {SAMPLE_ENV} to a video file path to run this test"); + return; + }; + + ffmpeg::init().expect("ffmpeg init"); + + let mut input = format::input(&path).expect("open input"); + let stream = input + .streams() + .best(media::Type::Video) + .expect("video stream"); + let stream_index = stream.index(); + let expected_w = unsafe { (*stream.parameters().as_ptr()).width as u32 }; + let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 }; + + let mut decoder = match VideoDecoder::open(stream.parameters()) { + Ok(d) => d, + Err(hwdecode::Error::AllBackendsFailed { attempts }) => { + eprintln!( + "skipping: no hardware backend available ({} attempts)", + attempts.len() + ); + return; + } + Err(e) => panic!("open decoder: {e}"), + }; + eprintln!("optimistic backend = {:?}", decoder.backend()); + + assert_eq!(decoder.width(), expected_w); + assert_eq!(decoder.height(), expected_h); + + let mut frame = Frame::empty().expect("alloc frame"); + let mut count = 0_usize; + let target = 30_usize; + + 'outer: for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet).expect("send packet"); + loop { + match decoder.receive_frame(&mut frame) { + Ok(()) => { + assert_eq!(frame.width(), expected_w); + assert_eq!(frame.height(), expected_h); + count += 1; + if count >= target { + break 'outer; + } + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + break; + } + Err(e) => panic!("receive_frame: {e}"), + } + } + } + + assert!(count >= 1, "expected at least 1 decoded frame, got {count}"); + eprintln!("decoded {count} frames via backend {:?}", decoder.backend()); +} diff --git a/tests/foo.rs b/tests/foo.rs deleted file mode 100644 index 8b13789..0000000 --- a/tests/foo.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs new file mode 100644 index 0000000..50d67ca --- /dev/null +++ b/tests/hw_smoke.rs @@ -0,0 +1,80 @@ +//! `#[ignore]`-gated smoke test that exercises end-to-end hardware decode +//! against a real video file: opens the auto-probed decoder, drives it +//! until the first frame is delivered, and asserts the active backend is +//! one of the supported HW variants. Run with: +//! +//! ```sh +//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored +//! ``` + +use ffmpeg::{format, media}; +use ffmpeg_next as ffmpeg; +use hwdecode::{Backend, Frame, VideoDecoder}; + +const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO"; + +#[test] +#[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"] +fn auto_probe_picks_hardware_backend() { + let path = std::env::var_os(SAMPLE_ENV).unwrap_or_else(|| panic!("{SAMPLE_ENV} not set")); + + ffmpeg::init().expect("ffmpeg init"); + + let mut input = format::input(&path).expect("open input"); + let stream = input + .streams() + .best(media::Type::Video) + .expect("video stream"); + let stream_index = stream.index(); + + let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder"); + eprintln!("auto-probe optimistic backend = {:?}", decoder.backend()); + + // Decode at least one frame so the probe collapses, then check the + // backend that actually produced it. Checking `decoder.backend()` before + // any frame has been received would observe the optimistic pre-probe + // value and could false-pass when a HW backend silently degrades. + let mut frame = Frame::empty().expect("alloc frame"); + let mut got_frame = false; + for (s, packet) in input.packets() { + if s.index() != stream_index { + continue; + } + decoder.send_packet(&packet).expect("send packet"); + match decoder.receive_frame(&mut frame) { + Ok(()) => { + got_frame = true; + eprintln!( + "first frame: backend={:?} {}x{} pix_fmt={}", + decoder.backend(), + frame.width(), + frame.height(), + frame.pix_fmt() + ); + break; + } + Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno })) + if errno == ffmpeg::error::EAGAIN => + { + continue; + } + Err(e) => panic!("receive_frame: {e}"), + } + } + assert!(got_frame, "no frames decoded"); + // After the probe collapses, `backend()` reports the backend that + // actually produced the first frame. Make the doc-comment claim + // explicit: it must be one of the HW variants. Today the enum is + // exhaustively HW-only, so `matches!` here is tautological — but it + // documents intent and would catch a future regression that + // reintroduces a non-HW variant or leaves the active state + // mis-classified. + let backend = decoder.backend(); + assert!( + matches!( + backend, + Backend::VideoToolbox | Backend::Vaapi | Backend::Cuda | Backend::D3d11va + ), + "expected HW backend, got {backend:?}" + ); +}