diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index bd7a668..0000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# UNRELEASED
-
-# 0.1.2 (January 6th, 2022)
-
-FEATURES
-
-
diff --git a/Cargo.toml b/Cargo.toml
index ff7fe91..9a3b19a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,35 +1,37 @@
[package]
-name = "template-rs"
-version = "0.0.0"
+name = "hwdecode"
+version = "0.1.0"
edition = "2021"
-repository = "https://github.com/al8n/template-rs"
-homepage = "https://github.com/al8n/template-rs"
-documentation = "https://docs.rs/template-rs"
-description = "A template for creating Rust open-source repo on GitHub"
+rust-version = "1.95"
+description = "Cross-platform hardware-only video decoder built on top of ffmpeg-next, with auto-probe across HW backends. Callers handle software fallback."
+repository = "https://github.com/findit-ai/hwdecode"
+homepage = "https://github.com/findit-ai/hwdecode"
+documentation = "https://docs.rs/hwdecode"
license = "MIT OR Apache-2.0"
-rust-version = "1.73"
-
-[[bench]]
-path = "benches/foo.rs"
-name = "foo"
-harness = false
-
-[features]
-default = ["std"]
-alloc = []
-std = []
[dependencies]
+ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] }
+thiserror = "2"
+tracing = "0.1"
+libc = "0.2"
[dev-dependencies]
criterion = "0.8"
-tempfile = "3"
+
+[[example]]
+name = "decode"
+path = "examples/decode.rs"
+
+[[bench]]
+name = "decode"
+path = "benches/decode.rs"
+harness = false
[profile.bench]
opt-level = 3
debug = false
codegen-units = 1
-lto = 'thin'
+lto = "thin"
incremental = false
debug-assertions = false
overflow-checks = false
@@ -41,8 +43,6 @@ rustdoc-args = ["--cfg", "docsrs"]
[lints.rust]
rust_2018_idioms = "warn"
-single_use_lifetimes = "warn"
unexpected_cfgs = { level = "warn", check-cfg = [
- 'cfg(all_tests)',
'cfg(tarpaulin)',
] }
diff --git a/README-zh_CN.md b/README-zh_CN.md
deleted file mode 100644
index 7a07f4d..0000000
--- a/README-zh_CN.md
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
template-rs
-
-
-
-开源Rust代码库GitHub模版
-
-[

][Github-url]
-

-[

][CI-url]
-[

][codecov-url]
-
-[

][doc-url]
-[

][crates-url]
-[

][crates-url]
-

-
-[English][en-url] | 简体中文
-
-
-
-## Installation
-
-```toml
-[dependencies]
-template_rs = "0.1"
-```
-
-## Features
-
-- [x] 更快的创建GitHub开源Rust代码库
-
-#### License
-
-`Template-rs` is under the terms of both the MIT license and the
-Apache License (Version 2.0).
-
-See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details.
-
-Copyright (c) 2021 Al Liu.
-
-[Github-url]: https://github.com/al8n/template-rs/
-[CI-url]: https://github.com/al8n/template/actions/workflows/template.yml
-[doc-url]: https://docs.rs/template-rs
-[crates-url]: https://crates.io/crates/template-rs
-[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/
-[license-url]: https://opensource.org/licenses/Apache-2.0
-[rustc-url]: https://github.com/rust-lang/rust/blob/master/RELEASES.md
-[license-apache-url]: https://opensource.org/licenses/Apache-2.0
-[license-mit-url]: https://opensource.org/licenses/MIT
-[en-url]: https://github.com/al8n/template-rs/tree/main/README.md
diff --git a/README.md b/README.md
index 1af27e2..3da5fba 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,115 @@
-
-
template-rs
-
-
+# hwdecode
-A template for creating Rust open-source GitHub repo.
+Cross-platform hardware-accelerated video decoder for Rust, built on top of
+[`ffmpeg-next`](https://crates.io/crates/ffmpeg-next).
-[

][Github-url]
-

-[

][CI-url]
-[

][codecov-url]
+`VideoDecoder` mirrors the `send_packet` / `receive_frame` interface of
+`ffmpeg::decoder::Video` and auto-probes the host's hardware backends.
+This crate is **hardware-only** — there is no software fallback inside it.
+If no hardware backend can decode the stream, `Error::AllBackendsFailed`
+surfaces from `VideoDecoder::open` (when no backend opens) or from
+`receive_frame` / `send_packet` / `send_eof` (when the initially-opened
+backend fails at decode time and every remaining backend in the probe order
+also fails — the only way it surfaces on single-backend platforms like macOS).
+The caller decides how to fall back (typically by opening an
+`ffmpeg::decoder::Video` directly). Output frames are CPU-side, downloaded
+with `av_hwframe_transfer_data` (NV12 for 8-bit, P010 for 10-bit). Pixel-
+format conversion is intentionally out of scope; safe per-row access is via
+`Frame::row` / `Frame::rows` (clipped to visible byte width — never includes
+FFmpeg's per-row alignment padding).
-[

][doc-url]
-[

][crates-url]
-[

][crates-url]
-

+## Backends
-English | [简体中文][zh-cn-url]
+| Target | Probe order (HW only) |
+| ------------------- | --------------------------------- |
+| macOS / iOS / tvOS | VideoToolbox |
+| Linux | VAAPI → CUDA |
+| Windows | D3D11VA → CUDA |
+| other | (none) |
-
+If `open` returns `Error::AllBackendsFailed`, software fallback is the
+caller's responsibility (this crate intentionally does not include one).
-## Installation
+## Usage
-```toml
-[dependencies]
-template_rs = "0.1"
+```rust,no_run
+use ffmpeg_next as ffmpeg;
+use ffmpeg::{format, media};
+use hwdecode::{Frame, VideoDecoder};
+
+ffmpeg::init()?;
+
+let mut input = format::input(path)?;
+let stream = input.streams().best(media::Type::Video).unwrap();
+let stream_index = stream.index();
+
+// HW-only open. On AllBackendsFailed, fall back to software yourself.
+let mut decoder = match VideoDecoder::open(stream.parameters()) {
+ Ok(d) => d,
+ Err(hwdecode::Error::AllBackendsFailed { .. }) => {
+ // Caller-side software fallback.
+ let _sw = ffmpeg::codec::Context::from_parameters(stream.parameters())?
+ .decoder()
+ .video()?;
+ // ... drive _sw with send_packet / receive_frame yourself ...
+ return Ok(());
+ }
+ Err(e) => return Err(e.into()),
+};
+println!("backend = {:?}", decoder.backend());
+
+let mut frame = Frame::empty()?;
+for (s, packet) in input.packets() {
+ if s.index() != stream_index { continue; }
+ decoder.send_packet(&packet)?;
+ while decoder.receive_frame(&mut frame).is_ok() {
+ // frame.pix_fmt() is the integer constant — match against
+ // hwdecode::pix_fmt::{NV12, P010LE, ...} and dispatch to your
+ // pixel-format pipeline (e.g. `colconv`).
+ // ... do something with frame ...
+ }
+}
+decoder.send_eof()?;
+while decoder.receive_frame(&mut frame).is_ok() {
+ // ... drain ...
+}
```
-## Features
-- [x] Create a Rust open-source repo fast
+To force a specific hardware backend (no probe, no fallback):
+
+```rust
+use hwdecode::{Backend, VideoDecoder};
+let decoder = VideoDecoder::open_with(parameters, Backend::VideoToolbox)?;
+```
+
+`hwdecode` is hardware-only: there is no `Backend::Software`. If `open`
+returns `Error::AllBackendsFailed`, fall back to a software decoder
+yourself (typically `ffmpeg::decoder::Video`).
+
+## Running tests and benches
+
+The integration test and benchmark expect a real video file. Set
+`HWDECODE_SAMPLE_VIDEO` to enable them:
+
+```sh
+HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test
+HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored
+HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench
+```
-#### License
+Without the env var the integration test skips with a notice; unit tests run
+unconditionally.
-`template-rs` is under the terms of both the MIT license and the
-Apache License (Version 2.0).
+## Build requirements
-See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details.
+- A system FFmpeg ≥ **5.1** linkable via `pkg-config` (we reference
+ `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`, which were added in 5.1).
+ Tested against 8.1. Verify with
+ `ffmpeg -hwaccels` that your build has the backends you expect compiled in
+ (e.g. `videotoolbox` on macOS, `vaapi` / `cuda` on Linux,
+ `d3d11va` / `cuda` on Windows).
+- Rust ≥ 1.95.
-Copyright (c) 2021 Al Liu.
+## License
-[Github-url]: https://github.com/al8n/template-rs/
-[CI-url]: https://github.com/al8n/template-rs/actions/workflows/ci.yml
-[doc-url]: https://docs.rs/template-rs
-[crates-url]: https://crates.io/crates/template-rs
-[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/
-[zh-cn-url]: https://github.com/al8n/template-rs/tree/main/README-zh_CN.md
+MIT or Apache-2.0, at your option.
diff --git a/benches/decode.rs b/benches/decode.rs
new file mode 100644
index 0000000..9e53f0a
--- /dev/null
+++ b/benches/decode.rs
@@ -0,0 +1,173 @@
+//! Benchmark comparing software-only decode (via `ffmpeg-next` directly,
+//! since `hwdecode` is hardware-only) against `hwdecode`'s auto-probed
+//! hardware backend on the same input file.
+//!
+//! Set `HWDECODE_SAMPLE_VIDEO` to a video file path. The hardware bench is
+//! skipped (with a notice) when no hardware backend is available on the host.
+//!
+//! ```sh
+//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo bench
+//! ```
+
+use std::{path::PathBuf, time::Duration};
+
+use criterion::{criterion_group, criterion_main, Criterion};
+use ffmpeg::{codec::Context as CodecContext, format, frame, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::{Frame, VideoDecoder};
+
+const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
+
+fn sample_path() -> Option {
+ std::env::var_os(SAMPLE_ENV).map(PathBuf::from)
+}
+
+/// Decode every frame using `hwdecode`'s auto-probed hardware backend.
+fn decode_all_hw(path: &PathBuf) -> Result {
+ let mut input = format::input(path).map_err(hwdecode::Error::Ffmpeg)?;
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .ok_or(hwdecode::Error::Ffmpeg(ffmpeg::Error::StreamNotFound))?;
+ let stream_index = stream.index();
+
+ let mut decoder = VideoDecoder::open(stream.parameters())?;
+ let mut frame = Frame::empty()?;
+ let mut count = 0_usize;
+
+ let mut drain = |decoder: &mut VideoDecoder, count: &mut usize| -> Result<(), hwdecode::Error> {
+ loop {
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => *count += 1,
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ return Ok(());
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => return Ok(()),
+ Err(e) => return Err(e),
+ }
+ }
+ };
+
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet)?;
+ drain(&mut decoder, &mut count)?;
+ }
+ decoder.send_eof()?;
+ drain(&mut decoder, &mut count)?;
+ Ok(count)
+}
+
+/// Decode every frame using a plain software `ffmpeg-next` decoder. Used as
+/// the SW baseline since `hwdecode` no longer exposes a software backend.
+fn decode_all_sw(path: &PathBuf) -> Result {
+ let mut input = format::input(path)?;
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .ok_or(ffmpeg::Error::StreamNotFound)?;
+ let stream_index = stream.index();
+ let mut decoder = CodecContext::from_parameters(stream.parameters())?
+ .decoder()
+ .video()?;
+
+ let mut frame = frame::Video::empty();
+ let mut count = 0_usize;
+
+ let mut drain =
+ |decoder: &mut ffmpeg::decoder::Video, count: &mut usize| -> Result<(), ffmpeg::Error> {
+ loop {
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => *count += 1,
+ Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::error::EAGAIN => return Ok(()),
+ Err(ffmpeg::Error::Eof) => return Ok(()),
+ Err(e) => return Err(e),
+ }
+ }
+ };
+
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet)?;
+ drain(&mut decoder, &mut count)?;
+ }
+ decoder.send_eof()?;
+ drain(&mut decoder, &mut count)?;
+ Ok(count)
+}
+
+fn bench_decode(c: &mut Criterion) {
+ ffmpeg::init().expect("ffmpeg init");
+
+ let Some(path) = sample_path() else {
+ eprintln!("skipping benches: set {SAMPLE_ENV} to a video file path");
+ return;
+ };
+
+ // Probe by decoding one frame so the probe collapses to the backend that
+ // actually produced output. None means no HW backend is available — we
+ // skip the HW arm and bench SW only.
+ let probed_backend = {
+ let mut input = format::input(&path).expect("open input");
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream");
+ let stream_index = stream.index();
+ match VideoDecoder::open(stream.parameters()) {
+ Ok(mut dec) => {
+ let mut frame = Frame::empty().expect("alloc probe frame");
+ 'probe: for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ dec.send_packet(&packet).expect("probe send_packet");
+ match dec.receive_frame(&mut frame) {
+ Ok(()) => break 'probe,
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ continue;
+ }
+ Err(e) => panic!("probe receive_frame: {e}"),
+ }
+ }
+ Some(dec.backend())
+ }
+ Err(hwdecode::Error::AllBackendsFailed { .. }) => None,
+ Err(e) => panic!("hwdecode probe: {e}"),
+ }
+ };
+ match probed_backend {
+ Some(b) => eprintln!("auto-probe settled on backend: {b:?}"),
+ None => eprintln!("no hardware backend available — hardware bench will be skipped"),
+ }
+
+ let mut group = c.benchmark_group("decode");
+ group.measurement_time(Duration::from_secs(15));
+ group.sample_size(20);
+
+ group.bench_function("software", |b| {
+ b.iter(|| decode_all_sw(&path).expect("software decode"))
+ });
+
+ if probed_backend.is_some() {
+ group.bench_function("hardware", |b| {
+ b.iter(|| {
+ let n = decode_all_hw(&path).expect("hardware decode");
+ std::hint::black_box(n);
+ })
+ });
+ }
+
+ group.finish();
+}
+
+criterion_group!(benches, bench_decode);
+criterion_main!(benches);
diff --git a/benches/foo.rs b/benches/foo.rs
deleted file mode 100644
index f328e4d..0000000
--- a/benches/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-fn main() {}
diff --git a/docs/design.md b/docs/design.md
new file mode 100644
index 0000000..521dd49
--- /dev/null
+++ b/docs/design.md
@@ -0,0 +1,120 @@
+# hwdecode — design
+
+Cross-platform **hardware-only** video decoder built on top of `ffmpeg-next` 8.1.
+
+> **Status note.** This document was the original spec from the brainstorm
+> phase and parts have evolved since: the crate is hardware-only (no
+> `Backend::Software`), `Frame` is its own safe wrapper, and several pixel-
+> format / safety details were tightened during review. For the canonical
+> behavior, read `src/lib.rs` and `README.md`. Sections below have been
+> trimmed where they conflicted; the spec is otherwise preserved as
+> historical context.
+
+## Goals
+
+- Drop-in replacement for `ffmpeg::decoder::Video` at the call site (`send_packet` / `receive_frame` / `send_eof` / `flush`).
+- Auto-probe the platform's hardware backends. **No software fallback inside this crate** — callers handle that themselves (e.g. via `ffmpeg::decoder::Video`) when `open` returns `Error::AllBackendsFailed`.
+- Hand back native-format CPU frames (NV12/P010 from the HW path post-transfer). Pixel-format conversion is the caller's responsibility (e.g. via `colconv`).
+- Cross-platform: macOS / iOS / iPadOS / tvOS, Linux (Intel/AMD/NVIDIA), Windows (any GPU + CUDA on NVIDIA).
+
+## Non-goals
+
+- Audio hardware decoding. Out of scope; software AAC/Opus/etc. is fast enough that the complexity isn't justified.
+- Demuxing. Callers open files/streams themselves (e.g. via `findit-demuxer`) and feed packets in.
+- Pixel-format conversion. Done downstream (`colconv`).
+- Encoding.
+
+## Public API
+
+> The original spec listed an inline API surface here. It diverged from the
+> shipping crate (`Backend::Software` was removed; `format() -> Pixel` was
+> removed in favor of `Frame::pix_fmt() -> i32`; the `Frame` wrapper
+> replaced `frame::Video`; `Error` gained / dropped variants). Rather than
+> keep stale signatures here, the canonical reference is `src/lib.rs` and
+> the public docs on each item. See the README for a runnable usage
+> example.
+
+## Behavior
+
+### Probe order
+
+| Target | Order tried (HW only) |
+| ------------------- | -------------------------------------------- |
+| macOS, iOS, tvOS | `[VideoToolbox]` |
+| Linux | `[Vaapi, Cuda]` |
+| Windows | `[D3d11va, Cuda]` |
+| Other | `[]` → `Error::AllBackendsFailed` |
+
+A HW backend is a candidate only if **(a)** its `AVHWDeviceType` device can be created via `av_hwdevice_ctx_create`, and **(b)** the codec advertises support via `avcodec_get_hw_config` matching that device type. The first candidate that fully opens wins. Each failure logs `tracing::warn!` with the backend and the underlying error and the loop tries the next. If every backend fails (or the platform has none), `open` returns `Error::AllBackendsFailed`; software fallback is the caller's responsibility.
+
+### Device selection
+
+Always device 0 / system default (`av_hwdevice_ctx_create(.., NULL, ..)`). No env var, no config knob in v1. Add later if the multi-GPU use case appears.
+
+### `get_format` callback
+
+A static `extern "C"` callback. The decoder context's `opaque` field points to a small heap-allocated `CallbackState`. The callback walks the offered `pix_fmts` list as raw `i32` (avoiding bindgen-enum UB on header skew), returns `wanted` if present, else `AV_PIX_FMT_NONE` (which causes the decoder to fail; the caller-side probe loop then tears down and tries the next hardware backend).
+
+### Frame transfer
+
+`receive_frame` always:
+
+1. Reads from the codec into an internal `hw_frame: ffmpeg::frame::Video` (allocated once, reused).
+2. If the frame's format is the HW pix fmt, calls `av_hwframe_transfer_data(out, hw_frame, 0)` into the caller's `&mut frame`. Copies `pts`, `pkt_dts`, `time_base`, `duration` (FFmpeg does not transfer timing).
+3. Otherwise (SW path or decoder fell back mid-stream), clones the frame into the caller's slot.
+
+### Threading
+
+`VideoDecoder: Send + !Sync`. Each instance owns its own `AVCodecContext` and `AVBufferRef*`. Multiple decoders can run on different threads; a single decoder is not concurrent.
+
+### Drop
+
+`Drop` calls `av_buffer_unref(&mut self.hw_device_ref)` if non-null, frees the boxed `CallbackState`, then lets `ffmpeg::decoder::Video`'s own Drop free the codec context.
+
+## Internals
+
+```text
+src/
+├── lib.rs // re-exports + crate-level docs
+├── error.rs // Error enum
+├── backend.rs // Backend enum, probe order, AVHWDeviceType <-> Backend mapping
+├── decoder.rs // VideoDecoder, open/open_with, send/receive
+└── ffi.rs // get_format callback, av_hwdevice_ctx_create / transfer wrappers,
+ // avcodec_get_hw_config probe
+```
+
+No other modules. Keep the surface small.
+
+## Build & dependencies
+
+- `ffmpeg-next = { version = "8.1", default-features = false, features = ["codec", "format"] }`
+- `thiserror = "2"`
+- `tracing = "0.1"`
+- `libc = "0.2"`
+
+No platform-specific Cargo features. `cfg!(target_os = ...)` selects which `AVHWDeviceType` constants we even attempt — the FFI symbols are linked unconditionally via `ffmpeg-sys-next`.
+
+System FFmpeg ≥ **5.1** (we reference `AV_PIX_FMT_P212LE` / `AV_PIX_FMT_P412LE`,
+added upstream in 5.1). Verified against the macOS Homebrew build (FFmpeg 8.1,
+VideoToolbox enabled).
+
+## Testing
+
+1. **Unit tests** (`src/backend.rs`, `src/error.rs`) — pure-Rust: probe-order construction per platform, `Backend` ↔ `AVHWDeviceType` mapping, error formatting.
+2. **Integration** (`tests/decode.rs`) — opens a sample H.264 file via `ffmpeg::format::input`, decodes 30 frames through `VideoDecoder::open` (auto-probe), asserts frame count and dimensions. Sample path comes from env var `HWDECODE_SAMPLE_VIDEO`; test is skipped with a clear `eprintln!` if unset.
+3. **HW smoke** (`tests/hw_smoke.rs`, `#[ignore]`) — same decode, asserts `decoder.backend()` returns one of the hardware variants (the enum no longer has a Software variant; this is a sanity check against accidental no-op selection). CI runs this on platform-matched runners.
+
+Sample-file env var keeps the repo binary-free. Documented in `README.md`.
+
+## Benchmark
+
+`benches/decode.rs` (criterion) — two functions:
+
+- `bench_software_decode` — drives `ffmpeg::decoder::Video` directly (this crate has no software backend), decodes all frames, measures wall-clock per frame.
+- `bench_hardware_decode` — `VideoDecoder::open(..)` (auto-probe). Skipped if `open` returns `AllBackendsFailed` (no HW backend available on this host).
+
+Both use the same `HWDECODE_SAMPLE_VIDEO` file. Bench prints which backend the HW run actually used, so results are interpretable across machines.
+
+## Examples
+
+`examples/decode.rs` — opens a path from `argv[1]` with `ffmpeg::format::input`, finds the best video stream, feeds packets through `VideoDecoder`, prints `(pts, width, height, format, backend)` for each frame.
diff --git a/examples/decode.rs b/examples/decode.rs
new file mode 100644
index 0000000..1d14de1
--- /dev/null
+++ b/examples/decode.rs
@@ -0,0 +1,90 @@
+//! Decode every video frame in `argv[1]`, printing one line per frame.
+//!
+//! ```sh
+//! cargo run --release --example decode -- /path/to/video.mp4
+//! ```
+
+use ffmpeg::{format, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::{Frame, VideoDecoder};
+
+fn main() -> Result<(), Box> {
+ let path = std::env::args()
+ .nth(1)
+ .ok_or("usage: decode ")?;
+
+ ffmpeg::init()?;
+
+ let mut input = format::input(&path)?;
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .ok_or("no video stream")?;
+ let stream_index = stream.index();
+
+ let mut decoder = match VideoDecoder::open(stream.parameters()) {
+ Ok(d) => d,
+ Err(hwdecode::Error::AllBackendsFailed { attempts }) => {
+ eprintln!(
+ "no hardware backend available; tried {} backend(s):",
+ attempts.len()
+ );
+ for (b, e) in &attempts {
+ eprintln!(" {b:?}: {e}");
+ }
+ eprintln!("(callers handle software fallback themselves — see ffmpeg::decoder::Video)");
+ return Ok(());
+ }
+ Err(e) => return Err(e.into()),
+ };
+ println!(
+ "open: backend={:?} {}x{}",
+ decoder.backend(),
+ decoder.width(),
+ decoder.height(),
+ );
+
+ let mut frame = Frame::empty()?;
+ let mut count: u64 = 0;
+
+ let drain = |decoder: &mut VideoDecoder, frame: &mut Frame, count: &mut u64| loop {
+ match decoder.receive_frame(frame) {
+ Ok(()) => {
+ *count += 1;
+ println!(
+ "frame#{count} pts={:?} {}x{} pix_fmt={}",
+ frame.pts(),
+ frame.width(),
+ frame.height(),
+ frame.pix_fmt(),
+ );
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ break
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Eof)) => break,
+ Err(e) => {
+ eprintln!("decode error: {e}");
+ break;
+ }
+ }
+ };
+
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet)?;
+ drain(&mut decoder, &mut frame, &mut count);
+ }
+ decoder.send_eof()?;
+ drain(&mut decoder, &mut frame, &mut count);
+
+ println!(
+ "decoded {count} frames; final backend={:?}",
+ decoder.backend()
+ );
+ Ok(())
+}
diff --git a/examples/foo.rs b/examples/foo.rs
deleted file mode 100644
index f328e4d..0000000
--- a/examples/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-fn main() {}
diff --git a/src/backend.rs b/src/backend.rs
new file mode 100644
index 0000000..00cf82e
--- /dev/null
+++ b/src/backend.rs
@@ -0,0 +1,118 @@
+use ffmpeg_next::ffi::{AVHWDeviceType, AVPixelFormat};
+
+/// Hardware decoding backend.
+///
+/// `hwdecode` only manages **hardware** decoders — software fallback is
+/// out of scope. If no backend in [`probe_order`] for the current platform
+/// can decode a stream, [`crate::VideoDecoder::open`] returns
+/// [`crate::Error::AllBackendsFailed`] and the caller decides how to fall
+/// back (e.g. by opening an `ffmpeg::decoder::Video` directly).
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Backend {
+ /// Apple VideoToolbox (macOS, iOS, iPadOS, tvOS, visionOS).
+ VideoToolbox,
+ /// Linux Video Acceleration API (Intel / AMD GPUs).
+ Vaapi,
+ /// NVIDIA NVDEC via CUDA (Linux / Windows on NVIDIA hardware).
+ Cuda,
+ /// Microsoft Direct3D 11 Video Acceleration (Windows).
+ D3d11va,
+}
+
+impl Backend {
+ /// `AVHWDeviceType` corresponding to this backend.
+ pub(crate) fn av_hwdevice_type(self) -> AVHWDeviceType {
+ match self {
+ Self::VideoToolbox => AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
+ Self::Vaapi => AVHWDeviceType::AV_HWDEVICE_TYPE_VAAPI,
+ Self::Cuda => AVHWDeviceType::AV_HWDEVICE_TYPE_CUDA,
+ Self::D3d11va => AVHWDeviceType::AV_HWDEVICE_TYPE_D3D11VA,
+ }
+ }
+
+ /// Hardware pixel format the codec is expected to produce when this
+ /// backend is in use. (The post-`av_hwframe_transfer_data` CPU format is
+ /// typically `NV12` or `P010LE`; this is the *pre-transfer* sentinel.)
+ ///
+ /// Returns a `AVPixelFormat` value constructed from a hardcoded constant
+ /// in our bindings — never reads an enum value supplied by FFmpeg, so
+ /// no enum-discriminant UB risk.
+ pub(crate) fn hw_pixel_format(self) -> AVPixelFormat {
+ match self {
+ Self::VideoToolbox => AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX,
+ Self::Vaapi => AVPixelFormat::AV_PIX_FMT_VAAPI,
+ Self::Cuda => AVPixelFormat::AV_PIX_FMT_CUDA,
+ Self::D3d11va => AVPixelFormat::AV_PIX_FMT_D3D11,
+ }
+ }
+}
+
+/// Probe order for `VideoDecoder::open` on the current target. Hardware
+/// backends only, in preference order. Empty for platforms with no known
+/// HW backend; on those `open()` returns `AllBackendsFailed` immediately.
+pub(crate) fn probe_order() -> &'static [Backend] {
+ #[cfg(any(
+ target_os = "macos",
+ target_os = "ios",
+ target_os = "tvos",
+ target_os = "visionos",
+ ))]
+ {
+ &[Backend::VideoToolbox]
+ }
+ #[cfg(target_os = "linux")]
+ {
+ &[Backend::Vaapi, Backend::Cuda]
+ }
+ #[cfg(target_os = "windows")]
+ {
+ &[Backend::D3d11va, Backend::Cuda]
+ }
+ #[cfg(not(any(
+ target_os = "macos",
+ target_os = "ios",
+ target_os = "tvos",
+ target_os = "visionos",
+ target_os = "linux",
+ target_os = "windows",
+ )))]
+ {
+ &[]
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn all_backends_have_hwdevice_type_and_pix_fmt() {
+ for b in [
+ Backend::VideoToolbox,
+ Backend::Vaapi,
+ Backend::Cuda,
+ Backend::D3d11va,
+ ] {
+ let _ = b.av_hwdevice_type();
+ let _ = b.hw_pixel_format();
+ }
+ }
+
+ #[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))]
+ #[test]
+ fn apple_probe_order() {
+ assert_eq!(probe_order(), &[Backend::VideoToolbox]);
+ }
+
+ #[cfg(target_os = "linux")]
+ #[test]
+ fn linux_probe_order() {
+ assert_eq!(probe_order(), &[Backend::Vaapi, Backend::Cuda]);
+ }
+
+ #[cfg(target_os = "windows")]
+ #[test]
+ fn windows_probe_order() {
+ assert_eq!(probe_order(), &[Backend::D3d11va, Backend::Cuda]);
+ }
+}
diff --git a/src/decoder.rs b/src/decoder.rs
new file mode 100644
index 0000000..a241b68
--- /dev/null
+++ b/src/decoder.rs
@@ -0,0 +1,2084 @@
+use std::{collections::VecDeque, mem::ManuallyDrop, ptr};
+
+use ffmpeg_next::{
+ codec::{
+ self,
+ packet::{Mut as PacketMut, Ref as PacketRef},
+ Context,
+ },
+ ffi::{
+ av_buffer_ref, av_buffer_unref, av_frame_move_ref, av_frame_unref, av_hwdevice_ctx_create,
+ av_hwframe_transfer_data, av_packet_ref, avcodec_alloc_context3, avcodec_free_context,
+ avcodec_parameters_alloc, avcodec_parameters_copy, avcodec_parameters_free,
+ avcodec_parameters_to_context, AVBufferRef, AVCodec, AVFrame, AVMediaType,
+ },
+ frame, Codec, Packet, Rational,
+};
+
+/// Local FFI shim: `avcodec_find_decoder` declared with `c_int` instead of
+/// the bindgen `AVCodecID` enum. Constructing `AVCodecID` from a runtime
+/// integer that isn't in our build's discriminant set is UB; calling the
+/// C function with a raw int avoids that boundary entirely. Both Rust
+/// declarations resolve to the same C symbol at link time.
+mod c_shims {
+ use super::AVCodec;
+ use libc::c_int;
+ extern "C" {
+ pub fn avcodec_find_decoder(id: c_int) -> *const AVCodec;
+ }
+}
+
+use crate::{
+ backend::{self, Backend},
+ error::{Error, Result},
+ ffi::{codec_supports_hwaccel, get_hw_format, CallbackState},
+ frame::Frame,
+};
+
+/// Hardware-accelerated video decoder.
+///
+/// Hardware-only — there is no software fallback inside this crate. If
+/// every hardware backend in the platform's probe order fails to open,
+/// `open` returns [`Error::AllBackendsFailed`] and the caller is
+/// responsible for falling back to a software decoder of their choice
+/// (e.g. `ffmpeg::decoder::Video`).
+///
+/// Mirrors `ffmpeg::decoder::Video`'s `send_packet`/`receive_frame` interface.
+/// Decoded frames are returned through [`crate::Frame`], a CPU-side wrapper
+/// whose accessors avoid the `AVPixelFormat`-enum UB that an unvalidated read
+/// of FFmpeg's raw integer pixel formats can trigger.
+///
+/// `open` does a true probe: each backend opens with a strict `get_format`
+/// callback. On the first non-transient error from a backend the decoder is
+/// torn down and the next backend in probe order is tried, with all packets
+/// seen so far replayed through it. The advance is *transactional* — the
+/// candidate backend must successfully build and accept the replayed packets
+/// before any probe state is consumed, so a failing backend in the middle of
+/// the order does not strand the caller without history. Once the first frame
+/// is delivered the probe collapses and subsequent calls go straight to the
+/// active backend.
+pub struct VideoDecoder {
+ /// Live FFmpeg state for the currently active backend.
+ state: DecoderState,
+ /// Reusable frame buffer used for hw-side decoding before transfer / move.
+ /// Internal use only — never handed to callers.
+ hw_frame: frame::Video,
+ /// Probe state: present until the first frame is received from the active
+ /// backend, then `None`. While `Some`, packets are buffered for replay and
+ /// non-transient errors / decoder failures advance to the next backend.
+ probe: Option,
+ /// CPU-side frames produced by a candidate decoder during probe replay
+ /// (when its internal queue filled and we had to drain output before the
+ /// next `send_packet`). Already transferred from the candidate's
+ /// `AVHWFramesContext` to a CPU frame, so they remain valid after the
+ /// candidate state is committed. [`Self::receive_frame`] dequeues these
+ /// FIFO before reading from `state.inner`.
+ pending_frames: VecDeque,
+ /// Per-decoder byte budget for [`Self::pending_frames`] during probe
+ /// replay. Defaults to [`DEFAULT_MAX_PROBE_PENDING_BYTES`]; override via
+ /// [`Self::with_max_probe_pending_bytes`].
+ max_probe_pending_bytes: usize,
+}
+
+/// Owned FFmpeg state for one open codec context. Has its own `Drop` so we
+/// can swap it out cleanly during a probe advance via `mem::replace`.
+struct DecoderState {
+ /// Wrapped FFmpeg decoder. `ManuallyDrop` so we can sequence its drop
+ /// before freeing the callback state.
+ inner: ManuallyDrop,
+ /// Backend driving this state.
+ backend: Backend,
+ /// Owned reference produced by `av_hwdevice_ctx_create`.
+ hw_device_ref: *mut AVBufferRef,
+ /// Owned `Box` raw pointer; `AVCodecContext::opaque`
+ /// aliases it.
+ callback_state: *mut CallbackState,
+}
+
+/// Maximum number of packets we are willing to buffer for probe replay
+/// before abandoning the fallback safety net. Set high enough to absorb
+/// long B-frame GOPs and codec setup latency, low enough to bound memory
+/// against malicious / pathological streams that never produce a first
+/// frame.
+const MAX_PROBE_PACKETS: usize = 256;
+
+/// Maximum total compressed-byte size of buffered probe packets. Each
+/// `Packet` clone holds a refcounted reference to the demuxer's bitstream
+/// data — even though the clone itself is shallow, the underlying buffers
+/// stay alive until we drop them. 64 MiB is generous for normal video and
+/// gives untrusted media a hard ceiling.
+const MAX_PROBE_PACKET_BYTES: usize = 64 * 1024 * 1024;
+
+/// Hard cap on the number of side-data entries we tolerate per buffered
+/// packet. `av_packet_ref` allocates an `AVPacketSideData` descriptor and
+/// an `AVBufferRef` per entry, so a packet stuffed with many tiny or
+/// zero-sized entries can consume significant memory in descriptor /
+/// allocator overhead even after [`packet_side_data_bytes`] charges
+/// [`SIDE_DATA_ENTRY_OVERHEAD`] bytes per entry. Refusing to clone such
+/// packets short-circuits the descriptor explosion path.
+///
+/// Sized for legitimate streams (typical video packets carry 0-5 side-
+/// data entries; SEI-heavy HEVC/AV1 maybe a dozen) while comfortably
+/// rejecting weaponised input.
+const MAX_PROBE_PACKET_SIDE_DATA_ENTRIES: usize = 64;
+
+/// Conservative per-side-data-entry overhead estimate used by both
+/// [`packet_side_data_bytes`] and the budget accounting in
+/// [`VideoDecoder::send_packet`]. Counts the `AVPacketSideData`
+/// descriptor (24 bytes per the FFmpeg 8.x bindings), the `AVBufferRef`
+/// FFmpeg allocates per entry, and a margin for malloc bookkeeping
+/// (header bytes, alignment slack). Setting it on the high side keeps
+/// the byte cap a true upper bound on retained memory; under-charging
+/// would let many tiny entries slip past the cap.
+const SIDE_DATA_ENTRY_OVERHEAD: usize = 80;
+
+/// Conservative upper-bound bytes-per-pixel multiplier used to estimate
+/// the size of a CPU frame **before** `av_hwframe_transfer_data`
+/// allocates its pixel buffers. Covers every HW download format this
+/// crate produces (worst case is `P416LE` / `P412LE` at 6 bytes/pixel
+/// for 16-bit 4:4:4 semi-planar) plus a margin for FFmpeg's per-row
+/// stride alignment (typically 32-byte aligned, ~5% extra at HD widths
+/// and below).
+///
+/// Used by [`drain_into_pending`] as a pre-transfer guard: if the
+/// product `width * height * WORST_CASE_BYTES_PER_PIXEL` would already
+/// push `pending_bytes` past `max_probe_pending_bytes`, the candidate
+/// replay refuses the frame *before* allocating. Without this, FFmpeg
+/// would perform the full HW→CPU download (potentially ~100 MiB for
+/// 8K HDR) and we would only reject the frame after RSS had already
+/// spiked. The post-transfer accounting via [`cpu_frame_bytes`] stays in
+/// place as a backstop using the frame's actual stride/format.
+///
+/// Slightly over-charges true 4:2:0 NV12 / P010 frames (which dominate
+/// real workloads) — that's the right side to err on. Callers feeding
+/// 8K+ workloads through the probe path can tune
+/// [`VideoDecoder::with_max_probe_pending_bytes`] upward to compensate.
+const WORST_CASE_BYTES_PER_PIXEL: usize = 8;
+
+/// Maximum number of CPU frames we are willing to queue from a candidate
+/// during probe replay. Each frame is a fully-allocated CPU buffer
+/// (~3 MiB for 1080p NV12, ~24 MiB for 4K P010, ~96 MiB for 8K P010), so
+/// an unbounded queue would OOM on a candidate with a shallow internal
+/// queue against a deep replay history. This cap, together with
+/// [`DEFAULT_MAX_PROBE_PENDING_BYTES`], is enforced as a hard limit during
+/// replay: once either limit is reached, probe buffering fails for the
+/// candidate (returns `ENOMEM` from `drain_into_pending`) instead of
+/// queueing additional drained frames. The probe loop then advances to
+/// the next backend or returns `Error::AllBackendsFailed` if exhausted.
+const MAX_PROBE_PENDING_FRAMES: usize = 16;
+
+/// Default byte budget for probe-replay drained frames. 256 MiB is enough
+/// for 16 frames at 4K P010 (~24 MiB each = 384 MiB worst case under the
+/// count cap), and is the cap that fires first for very high-resolution
+/// content (8K P010: ~96 MiB per frame → only ~2 frames fit).
+///
+/// Override per-decoder with [`VideoDecoder::with_max_probe_pending_bytes`]
+/// when targeting 8K+ workloads or memory-constrained environments.
+///
+/// TODO: when frames significantly exceed typical sizes, consider
+/// memmap-backed pending buffers (write transferred frames to a temp file
+/// or shared-memory segment) so the resident set stays bounded even when
+/// the byte cap is raised. Out of scope for v0.0.0.
+pub const DEFAULT_MAX_PROBE_PENDING_BYTES: usize = 256 * 1024 * 1024;
+
+/// State carried only during the probe window (before the first successful
+/// frame). Holds enough information to tear down the current decoder and
+/// retry with the next backend.
+struct ProbeState {
+ parameters: codec::Parameters,
+ codec: Codec,
+ /// Backends still to try, in order. Empty means "no more options after
+ /// the active one fails" — `advance_probe` then surfaces
+ /// [`Error::AllBackendsFailed`] so the contract is the same on
+ /// single-backend platforms (e.g. macOS) as on multi-backend ones.
+ remaining_backends: Vec,
+ /// Packets sent so far, kept for replay through any candidate backend.
+ /// Preserved across failed candidates — only cleared when the probe
+ /// collapses on a successful first frame, or when the probe is
+ /// abandoned due to the size caps.
+ buffered_packets: Vec,
+ /// Cumulative size (in compressed bytes) of `buffered_packets`. Tracked
+ /// incrementally so we don't have to re-sum on every send.
+ buffered_bytes: usize,
+ /// Whether `send_eof` has been called; replayed alongside packets.
+ eof_sent: bool,
+ /// Per-backend errors captured since the probe window opened. Pushed
+ /// whenever a backend's failure triggers `advance_probe` (the active
+ /// backend that just failed) or a candidate's build / replay rejects
+ /// it. Drained into [`Error::AllBackendsFailed`] when the probe
+ /// exhausts every option.
+ attempts: Vec<(Backend, Box)>,
+}
+
+// SAFETY: All raw pointers are exclusively owned by `DecoderState` and never
+// shared. `ffmpeg::decoder::Video` is itself `Send` (its `Context` carries an
+// `unsafe impl Send`). The decoder is not safe for concurrent use, hence not
+// `Sync`.
+unsafe impl Send for DecoderState {}
+unsafe impl Send for VideoDecoder {}
+
+impl Drop for DecoderState {
+ fn drop(&mut self) {
+ // Order matters:
+ // 1. Drop the codec context first. While it lives, FFmpeg may invoke
+ // `get_format`, which dereferences `callback_state` via `opaque`.
+ // 2. Free the callback state heap allocation.
+ // 3. Release our hw device reference (FFmpeg released its own when
+ // the codec context was freed in step 1).
+ unsafe {
+ ManuallyDrop::drop(&mut self.inner);
+ if !self.callback_state.is_null() {
+ drop(Box::from_raw(self.callback_state));
+ self.callback_state = ptr::null_mut();
+ }
+ if !self.hw_device_ref.is_null() {
+ av_buffer_unref(&mut self.hw_device_ref);
+ }
+ }
+ }
+}
+
+impl VideoDecoder {
+ /// Auto-probe hardware backends in the platform's default order.
+ ///
+ /// Each backend opens with a strict `get_format` callback. The first
+ /// backend whose `avcodec_open2` succeeds becomes active; if its first
+ /// frame is unusable (decode error, transfer failure, or a CPU-format
+ /// frame from a HW context) the decoder is torn down and the next backend
+ /// is tried — packets sent so far are replayed through the new decoder
+ /// transparently. The probe advance is transactional: the next backend
+ /// must build *and* accept the replayed history before any probe state is
+ /// consumed, so a misbehaving middle backend cannot strand the caller.
+ ///
+ /// [`Self::backend`] reflects whichever backend ultimately produced the
+ /// first frame.
+ ///
+ /// [`Error::AllBackendsFailed`] surfaces in two places, with the same
+ /// meaning ("no hardware backend can decode this stream — fall back to
+ /// software yourself"):
+ /// - From `open` itself, when no backend even opens.
+ /// - From [`Self::send_packet`] / [`Self::send_eof`] /
+ /// [`Self::receive_frame`], when the initially-opened backend fails
+ /// at decode time and every remaining backend in the probe order
+ /// either also fails or doesn't exist. On single-backend platforms
+ /// (e.g. macOS, where the order is `[VideoToolbox]`), this is the
+ /// only place a HW-only failure surfaces.
+ ///
+ /// In both cases, `attempts` carries the per-backend error log so the
+ /// caller can decide how to proceed with software fallback.
+ pub fn open(parameters: codec::Parameters) -> Result {
+ let codec = find_decoder(¶meters)?;
+ let order = backend::probe_order();
+
+ let mut attempts: Vec<(Backend, Box)> = Vec::new();
+ for (i, &backend) in order.iter().enumerate() {
+ // Use the checked clone — ffmpeg-next's `Parameters::clone` does
+ // `avcodec_parameters_alloc` without a null check and ignores the
+ // return of `avcodec_parameters_copy`. Under OOM that path silently
+ // produces a Parameters with a null inner pointer.
+ let cloned_for_build = match try_clone_parameters(¶meters) {
+ Ok(p) => p,
+ Err(e) => {
+ tracing::warn!(?backend, error = %e, "hwdecode: parameters clone failed");
+ attempts.push((backend, Box::new(Error::Ffmpeg(e))));
+ continue;
+ }
+ };
+ match Self::build_state(cloned_for_build, codec, backend) {
+ Ok(state) => {
+ tracing::info!(?backend, "hwdecode: opened video decoder (probing)");
+ let remaining = order[(i + 1)..].to_vec();
+ // Deep-copy the caller's `parameters` before storing in ProbeState.
+ // `codec::Parameters` from `stream.parameters()` carries an Rc
+ // owner pointing at the demuxer; moving that Rc to a worker
+ // thread (when VideoDecoder is sent) would race with the demuxer's
+ // Rc on the original thread. The checked clone copies the bytes
+ // into a fresh allocation with `owner: None`, severing the link.
+ //
+ // We always create ProbeState — even when `remaining` is empty
+ // (single-backend platforms like macOS) — so that a first-frame
+ // failure on the only backend surfaces as
+ // `Error::AllBackendsFailed` from `receive_frame` /
+ // `send_packet` rather than as a raw FFmpeg error. That keeps
+ // the API contract the same regardless of how many HW backends
+ // the platform exposes.
+ //
+ // If the clone fails (ENOMEM), we keep the active `state` but
+ // skip probe setup — caller loses the transactional probe /
+ // fallback safety net but still gets a working decoder.
+ let probe = match try_clone_parameters(¶meters) {
+ Ok(probe_params) => Some(ProbeState {
+ parameters: probe_params,
+ codec,
+ remaining_backends: remaining,
+ buffered_packets: Vec::new(),
+ buffered_bytes: 0,
+ eof_sent: false,
+ attempts: Vec::new(),
+ }),
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: parameters clone failed for probe state; proceeding without fallback"
+ );
+ None
+ }
+ };
+ return Ok(Self {
+ state,
+ hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?,
+ probe,
+ pending_frames: VecDeque::new(),
+ max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES,
+ });
+ }
+ Err(e) => {
+ tracing::warn!(?backend, error = %e, "hwdecode: backend open failed");
+ attempts.push((backend, Box::new(e)));
+ }
+ }
+ }
+ Err(Error::AllBackendsFailed { attempts })
+ }
+
+ /// Open the decoder with a specific backend. No probe, no fallback.
+ ///
+ /// If `backend` cannot actually decode this stream, the failure surfaces
+ /// from [`Self::receive_frame`] (the strict `get_format` callback returns
+ /// `AV_PIX_FMT_NONE`, the decoder errors out). The caller is responsible
+ /// for retrying with another hardware backend or falling back to a
+ /// software decoder of their choice (e.g. `ffmpeg::decoder::Video`).
+ pub fn open_with(parameters: codec::Parameters, backend: Backend) -> Result {
+ let codec = find_decoder(¶meters)?;
+ let state = Self::build_state(parameters, codec, backend)?;
+ Ok(Self {
+ state,
+ hw_frame: alloc_av_frame().map_err(Error::Ffmpeg)?,
+ probe: None,
+ pending_frames: VecDeque::new(),
+ max_probe_pending_bytes: DEFAULT_MAX_PROBE_PENDING_BYTES,
+ })
+ }
+
+ /// Override the byte budget for probe-replay queued frames. Defaults to
+ /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`]. Use a higher value when targeting
+ /// 8K+ workloads where 16 frames at full size could exceed the default;
+ /// use a lower value in memory-constrained services to bound peak
+ /// allocation more tightly.
+ ///
+ /// Setting after the first frame has been delivered is harmless but has
+ /// no observable effect — the probe has already collapsed and the cap
+ /// only applies during replay drain.
+ ///
+ /// Returns `self` for builder-style chaining:
+ /// ```ignore
+ /// let decoder = VideoDecoder::open(params)?
+ /// .with_max_probe_pending_bytes(1024 * 1024 * 1024); // 1 GiB
+ /// ```
+ pub fn with_max_probe_pending_bytes(mut self, bytes: usize) -> Self {
+ self.max_probe_pending_bytes = bytes;
+ self
+ }
+
+ /// The backend currently producing frames. While the probe is still in
+ /// progress (no frame received yet) this returns the optimistically
+ /// selected backend; after the first frame, it is the backend that
+ /// actually produced it. Once stable, never changes again.
+ pub fn backend(&self) -> Backend {
+ self.state.backend
+ }
+
+ /// Decoder width in pixels.
+ pub fn width(&self) -> u32 {
+ self.state.inner.width()
+ }
+
+ /// Decoder height in pixels.
+ pub fn height(&self) -> u32 {
+ self.state.inner.height()
+ }
+
+ /// Codec context time base.
+ pub fn time_base(&self) -> Rational {
+ self.state.inner.time_base()
+ }
+
+ /// Frame rate from the codec context, if known.
+ pub fn frame_rate(&self) -> Option {
+ self.state.inner.frame_rate()
+ }
+
+ /// Submit a packet to the decoder.
+ ///
+ /// On success — and only on success — the packet is buffered for potential
+ /// replay through a fallback backend while the probe is active. EAGAIN
+ /// (decoder needs `receive_frame` to drain output first) propagates as
+ /// normal backpressure; the caller drains then retries.
+ ///
+ /// While the probe is active, a non-transient error (e.g. the active HW
+ /// backend rejecting this stream's geometry on first packet) advances the
+ /// probe to the next candidate and retries the packet there. The caller
+ /// observes only the eventual success or, if the probe is exhausted, the
+ /// final error.
+ ///
+ /// If the probe window grows beyond [`MAX_PROBE_PACKETS`] or
+ /// [`MAX_PROBE_PACKET_BYTES`] without producing a first frame (a stream
+ /// the active backend is silently mishandling, or pathological input),
+ /// the probe is **abandoned**: replay history is dropped, queued frames
+ /// are cleared, and `self.probe = None`. The active backend continues
+ /// serving the caller without fallback. A `tracing::warn!` records this
+ /// so it is visible in production logs.
+ pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
+ loop {
+ match self.state.inner.send_packet(packet) {
+ Ok(()) => {
+ if let Some(probe) = self.probe.as_mut() {
+ // Step 1: reject by side-data entry count BEFORE walking the
+ // side-data array for byte accounting. `packet_side_data_bytes`
+ // dereferences each `AVPacket.side_data[i]` based on the
+ // FFmpeg-supplied `side_data_elems`; if that integer is
+ // corrupt or weaponised we don't want to walk it from the
+ // safe `send_packet` path. The byte helper still clamps its
+ // own walk to the cap as defense-in-depth, but checking the
+ // count first short-circuits the descriptor-explosion case
+ // entirely.
+ let side_count = packet_side_data_count(packet);
+ if side_count > MAX_PROBE_PACKET_SIDE_DATA_ENTRIES {
+ tracing::warn!(
+ side_data_entries = side_count,
+ max_side_data_entries = MAX_PROBE_PACKET_SIDE_DATA_ENTRIES,
+ trigger = "side_data_entry_cap",
+ "hwdecode: packet side-data entry count exceeds cap; \
+ abandoning fallback safety net without byte accounting"
+ );
+ // Abandon the *future* probe-buffering only — see the byte/
+ // packet cap branch below for why `pending_frames` survives.
+ self.probe = None;
+ } else {
+ // Step 2: now safe to compute byte budget — `side_count`
+ // is bounded.
+ //
+ // `try_clone_packet` calls `av_packet_ref`, which deep-copies
+ // side data via `av_packet_copy_props`. The probe budget
+ // must include descriptor + ref overhead per side-data
+ // entry (via `packet_side_data_bytes`); without it, a
+ // packet stuffed with many tiny entries can dominate
+ // retained memory before the byte cap is even close to
+ // firing.
+ let pkt_size = packet.size().saturating_add(packet_side_data_bytes(
+ packet,
+ MAX_PROBE_PACKET_SIDE_DATA_ENTRIES,
+ ));
+ let new_count = probe.buffered_packets.len() + 1;
+ let new_bytes = probe.buffered_bytes.saturating_add(pkt_size);
+ if new_count > MAX_PROBE_PACKETS || new_bytes > MAX_PROBE_PACKET_BYTES {
+ tracing::warn!(
+ packets = new_count,
+ bytes = new_bytes,
+ side_data_entries = side_count,
+ max_packets = MAX_PROBE_PACKETS,
+ max_bytes = MAX_PROBE_PACKET_BYTES,
+ trigger = "byte_or_packet_cap",
+ "hwdecode: probe window exceeded caps without first frame; \
+ abandoning fallback safety net"
+ );
+ // Abandon the *future* probe-buffering only.
+ // `pending_frames` belong to the currently active backend
+ // (possibly the candidate `advance_probe` committed
+ // earlier in this same `send_packet` call) and are valid
+ // output the caller will dequeue via `receive_frame`.
+ // Clearing them here would silently drop initial frames
+ // at exactly the cap-overflow / OOM-stress paths.
+ self.probe = None;
+ } else {
+ // Use the checked clone — ffmpeg-next's `Packet::clone`
+ // discards av_packet_ref's return value and would
+ // silently store an empty packet on ENOMEM, corrupting
+ // future replay.
+ match try_clone_packet(packet) {
+ Ok(cloned) => {
+ probe.buffered_packets.push(cloned);
+ probe.buffered_bytes = new_bytes;
+ }
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: packet clone failed for probe history; \
+ abandoning fallback safety net"
+ );
+ // Same reasoning as the cap-overflow branch above:
+ // `pending_frames` are owned by the active backend,
+ // not the probe buffer, so they survive abandonment.
+ self.probe = None;
+ }
+ }
+ }
+ }
+ }
+ return Ok(());
+ }
+ Err(e) if is_transient(&e) => {
+ // Normal backpressure / EOF — pass through unchanged.
+ return Err(Error::Ffmpeg(e));
+ }
+ Err(e) => {
+ if self.probe.is_some() {
+ // advance_probe consumes the error into `attempts` and either
+ // installs a candidate (Ok) or surfaces AllBackendsFailed (Err).
+ self.advance_probe(Error::Ffmpeg(e))?;
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ }
+ }
+ }
+
+ /// Signal end-of-stream to the decoder.
+ ///
+ /// Recorded for replay only if the underlying `send_eof` succeeds. While
+ /// the probe is active, non-transient errors trigger probe advance and
+ /// retry, matching `send_packet`'s behaviour.
+ pub fn send_eof(&mut self) -> Result<()> {
+ loop {
+ match self.state.inner.send_eof() {
+ Ok(()) => {
+ if let Some(probe) = self.probe.as_mut() {
+ probe.eof_sent = true;
+ }
+ return Ok(());
+ }
+ Err(e) if is_transient(&e) => return Err(Error::Ffmpeg(e)),
+ Err(e) => {
+ if self.probe.is_some() {
+ self.advance_probe(Error::Ffmpeg(e))?;
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ }
+ }
+ }
+
+ /// Receive a CPU-side decoded frame.
+ ///
+ /// The frame is downloaded with `av_hwframe_transfer_data` and metadata
+ /// is copied via `av_frame_copy_props`. The caller's frame is always
+ /// unref'd first, so reuse across resolution changes or different
+ /// decoders is safe.
+ ///
+ /// While the probe window is open, *any* non-transient failure (decode
+ /// error, transfer error, copy_props error, or a CPU-format frame from a
+ /// HW-opened context) tears down the current decoder and advances to the
+ /// next hardware backend in probe order, replaying buffered packets
+ /// through it. Frames the candidate produced during replay (drained when
+ /// `send_packet` returned EAGAIN) are queued and delivered FIFO via this
+ /// method, so the caller never loses initial frames after a fallback.
+ ///
+ /// This crate is hardware-only: there is no software fallback inside the
+ /// decoder. When every backend in the probe order has been exhausted —
+ /// including the case of a single-backend platform whose only backend
+ /// failed — this returns [`Error::AllBackendsFailed`] with the per-
+ /// backend attempt log so the caller can branch into a software
+ /// decoder of their choice.
+ ///
+ /// Returns the same transient signals as `ffmpeg::decoder::Video`:
+ /// `Error::Ffmpeg(Other { errno: EAGAIN })` when no frame is ready and
+ /// more packets must be sent, and `Error::Ffmpeg(Eof)` once fully drained.
+ pub fn receive_frame(&mut self, frame: &mut Frame) -> Result<()> {
+ // Pre-drain frames queued during probe replay. They are already CPU-side
+ // (transferred at drain time, when the candidate's HW context was alive)
+ // so we just move them into the caller's slot.
+ if self.try_pop_pending(frame) {
+ return Ok(());
+ }
+
+ loop {
+ let res = self.state.inner.receive_frame(&mut self.hw_frame);
+ match res {
+ Err(e) => {
+ // EAGAIN is normal backpressure — pass through unconditionally.
+ if is_eagain(&e) {
+ return Err(Error::Ffmpeg(e));
+ }
+ // EOF (and every other non-transient error): if we are still
+ // probing, treat it as candidate failure — a backend that drains
+ // to EOF without ever producing a frame should not silently
+ // present as "stream over" to the caller. Advance and retry; if
+ // every backend has been exhausted, advance_probe surfaces
+ // AllBackendsFailed and `?` propagates it.
+ if self.probe.is_some() {
+ self.advance_probe(Error::Ffmpeg(e))?;
+ // Probe advance may have populated `pending_frames`; deliver
+ // one of those before reading more from the new candidate.
+ if self.try_pop_pending(frame) {
+ return Ok(());
+ }
+ continue;
+ }
+ // Probe collapsed already — surface the error (including EOF
+ // for a genuinely empty stream).
+ return Err(Error::Ffmpeg(e));
+ }
+ Ok(()) => {
+ // Always attempt the HW→CPU transfer. With strict `get_format`,
+ // libavcodec can only deliver frames in the wired-up HW format
+ // (or fail). If a misbehaving codec ever hands us a CPU-side
+ // frame anyway, `av_hwframe_transfer_data` returns AVERROR(EINVAL)
+ // (neither src nor dst has an AVHWFramesContext attached) and we
+ // route through the same error path below.
+ match unsafe { transfer_hw_frame(frame, &mut self.hw_frame) } {
+ Ok(()) => {
+ self.probe = None;
+ return Ok(());
+ }
+ Err(e) => {
+ if self.probe.is_some() {
+ self.advance_probe(Error::Ffmpeg(e))?;
+ unsafe { av_frame_unref(frame.as_inner_mut().as_mut_ptr()) };
+ if self.try_pop_pending(frame) {
+ return Ok(());
+ }
+ continue;
+ }
+ return Err(Error::Ffmpeg(e));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /// Pop one queued frame (produced by a candidate decoder during probe
+ /// replay) into the caller's slot. Returns `true` when a frame was
+ /// delivered, `false` when the queue was empty.
+ fn try_pop_pending(&mut self, frame: &mut Frame) -> bool {
+ let Some(mut buffered) = self.pending_frames.pop_front() else {
+ return false;
+ };
+ // SAFETY: `buffered` is a CPU-side AVFrame we previously transferred
+ // and pushed into the queue; both pointers are valid.
+ unsafe {
+ av_frame_unref(frame.as_inner_mut().as_mut_ptr());
+ av_frame_move_ref(frame.as_inner_mut().as_mut_ptr(), buffered.as_mut_ptr());
+ }
+ // Probe semantics: delivering a frame collapses the probe.
+ self.probe = None;
+ true
+ }
+
+ /// Flush internal buffers (e.g. after a seek).
+ ///
+ /// Discards every frame buffered by the decoder, every frame queued during
+ /// probe replay (`pending_frames`), and the residual `hw_frame` scratch
+ /// buffer. Probe-time replay state (buffered packets, EOF marker) is also
+ /// cleared since post-seek packets do not align with the previously
+ /// captured history. After a flush, the next `receive_frame` waits for new
+ /// post-seek input.
+ pub fn flush(&mut self) {
+ self.state.inner.flush();
+ // SAFETY: hw_frame is a valid AVFrame we own; av_frame_unref is a no-op
+ // for an already-empty frame.
+ unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) };
+ self.pending_frames.clear();
+ if let Some(probe) = self.probe.as_mut() {
+ probe.buffered_packets.clear();
+ probe.buffered_bytes = 0;
+ probe.eof_sent = false;
+ }
+ }
+
+ /// Try the next backend in `remaining_backends`. Transactional: a
+ /// candidate must successfully build and accept the replayed history
+ /// before any probe state is consumed. Backends that fail to build or
+ /// reject the replay are recorded into `probe.attempts` and the loop
+ /// continues to the next one.
+ ///
+ /// `last_error` is the error that triggered this advance — i.e. the
+ /// failure of the currently active backend on `send_packet` /
+ /// `send_eof` / `receive_frame`. It is recorded against the active
+ /// backend before any candidate is tried so that a final
+ /// `AllBackendsFailed` carries the full attempt log including the
+ /// initially-opened backend's runtime failure.
+ ///
+ /// Returns:
+ /// - `Ok(())` when a candidate is installed and replay completed —
+ /// caller should retry the operation.
+ /// - `Err(Error::AllBackendsFailed { attempts })` when every remaining
+ /// backend has been exhausted (including the just-failed active one).
+ /// This is what the documented `open` contract promises, surfaced at
+ /// runtime so the caller can branch into a software fallback. On a
+ /// single-backend platform (e.g. macOS), this fires after the only
+ /// backend's first-frame failure; on multi-backend platforms it
+ /// fires after the last candidate's failure.
+ /// - `Err(_)` for other fatal conditions surfaced by probe machinery
+ /// itself (e.g. `alloc_av_frame` ENOMEM during replay drain).
+ fn advance_probe(&mut self, last_error: Error) -> Result<()> {
+ // Record the failure that triggered this advance against the active
+ // backend. If the probe was somehow already gone (shouldn't happen —
+ // call sites guard with `self.probe.is_some()`), just propagate the
+ // error so behaviour matches the pre-fix code path.
+ let active_backend = self.state.backend;
+ match self.probe.as_mut() {
+ Some(probe) => probe.attempts.push((active_backend, Box::new(last_error))),
+ None => return Err(last_error),
+ }
+
+ // Drop frames previously queued from the backend we're now abandoning.
+ // They came from a candidate that just failed for cause and cannot be
+ // trusted alongside frames we may queue from the next candidate. (If
+ // this method is called repeatedly via chained probe advances, this
+ // also keeps `pending_frames` from accumulating frames from multiple
+ // rejected backends.)
+ self.pending_frames.clear();
+
+ loop {
+ // Snapshot inputs without mutating probe state. Use the checked
+ // clone helper rather than `Parameters::clone` (which masks ENOMEM).
+ let (next_backend, parameters, codec) = match self.probe.as_ref() {
+ Some(probe) if !probe.remaining_backends.is_empty() => {
+ let parameters = match try_clone_parameters(&probe.parameters) {
+ Ok(p) => p,
+ Err(e) => {
+ tracing::warn!(
+ error = %e,
+ "hwdecode: parameters clone failed during probe advance; popping backend and trying next"
+ );
+ let popped = self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .attempts
+ .push((popped, Box::new(Error::Ffmpeg(e))));
+ continue;
+ }
+ };
+ (probe.remaining_backends[0], parameters, probe.codec)
+ }
+ // No more candidates — surface the accumulated attempt log as
+ // AllBackendsFailed so single- and multi-backend platforms have
+ // the same contract for "every HW backend failed."
+ _ => {
+ let attempts = self.probe.take().map(|p| p.attempts).unwrap_or_default();
+ return Err(Error::AllBackendsFailed { attempts });
+ }
+ };
+
+ let prev_backend = self.state.backend;
+ tracing::warn!(from = ?prev_backend, to = ?next_backend, "hwdecode: advancing probe");
+
+ // Build candidate. On failure, record into attempts and continue
+ // without touching the packet buffer.
+ let mut candidate_state = match Self::build_state(parameters, codec, next_backend) {
+ Ok(s) => s,
+ Err(e) => {
+ tracing::warn!(?next_backend, error = %e, "hwdecode: candidate build failed");
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .attempts
+ .push((next_backend, Box::new(e)));
+ continue;
+ }
+ };
+
+ // Replay buffered history through the candidate WITHOUT installing it.
+ // We borrow the buffer immutably; if replay fails the candidate's Drop
+ // releases the FFmpeg state and the buffer is preserved for the next
+ // attempt.
+ //
+ // EAGAIN handling: `avcodec_send_packet` may return EAGAIN when its
+ // internal queue is full and the user is expected to drain output
+ // first (B-frame buffering, candidate-specific queue depth, etc.).
+ // This is normal flow — we drain frames out of the candidate, transfer
+ // each one to a CPU frame, and stash them in `local_pending`. After
+ // commit they move to `self.pending_frames` and are delivered FIFO
+ // by `receive_frame`, so the caller never loses initial frames.
+ let mut local_pending: VecDeque = VecDeque::new();
+ let mut local_pending_bytes: usize = 0;
+ let max_pending_bytes = self.max_probe_pending_bytes;
+ let replay_result: std::result::Result<(), ffmpeg_next::Error> = {
+ let probe = self.probe.as_ref().expect("probe state present");
+ let mut hw_buf = match alloc_av_frame() {
+ Ok(f) => f,
+ Err(e) => return Err(Error::Ffmpeg(e)),
+ };
+ let mut r: std::result::Result<(), ffmpeg_next::Error> = Ok(());
+
+ 'replay: for pkt in &probe.buffered_packets {
+ loop {
+ match candidate_state.inner.send_packet(pkt) {
+ Ok(()) => break,
+ Err(e) if is_eagain(&e) => {
+ // Drain candidate output (transferring + queueing each frame)
+ // and retry the same packet.
+ if let Err(de) = drain_into_pending(
+ &mut candidate_state.inner,
+ &mut hw_buf,
+ &mut local_pending,
+ &mut local_pending_bytes,
+ max_pending_bytes,
+ ) {
+ r = Err(de);
+ break 'replay;
+ }
+ }
+ Err(e) => {
+ r = Err(e);
+ break 'replay;
+ }
+ }
+ }
+ }
+ if r.is_ok() && probe.eof_sent {
+ // `avcodec_send_packet(NULL)` (which `send_eof` becomes) can
+ // return EAGAIN with the same drain-output-first semantics as
+ // a regular send_packet. Loop drain+retry instead of failing
+ // the candidate on backpressure.
+ loop {
+ match candidate_state.inner.send_eof() {
+ Ok(()) => break,
+ Err(e) if is_eagain(&e) => {
+ if let Err(de) = drain_into_pending(
+ &mut candidate_state.inner,
+ &mut hw_buf,
+ &mut local_pending,
+ &mut local_pending_bytes,
+ max_pending_bytes,
+ ) {
+ r = Err(de);
+ break;
+ }
+ }
+ Err(e) => {
+ r = Err(e);
+ break;
+ }
+ }
+ }
+ }
+ r
+ };
+
+ if let Err(e) = replay_result {
+ tracing::warn!(?next_backend, error = %e, "hwdecode: candidate replay failed");
+ // Drop candidate explicitly so its FFI cleanup runs now. Discard any
+ // frames we drained from this candidate — they're tied to a decoder
+ // we're throwing away.
+ drop(candidate_state);
+ drop(local_pending);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .attempts
+ .push((next_backend, Box::new(Error::Ffmpeg(e))));
+ continue;
+ }
+
+ // Commit: install the candidate, clear residual hw_frame, queue the
+ // drained frames for the caller, and pop the now-active backend.
+ self.state = candidate_state;
+ unsafe { av_frame_unref(self.hw_frame.as_mut_ptr()) };
+ self.pending_frames.append(&mut local_pending);
+ self
+ .probe
+ .as_mut()
+ .expect("probe state present")
+ .remaining_backends
+ .remove(0);
+ return Ok(());
+ }
+ }
+
+ /// Build raw FFmpeg state for one hardware backend. Strict `get_format`
+ /// (NONE on missing HW format); cross-backend fallback is the caller's job.
+ fn build_state(
+ parameters: codec::Parameters,
+ codec: Codec,
+ backend: Backend,
+ ) -> Result {
+ // Use our checked allocator instead of Context::from_parameters, which
+ // does not null-check avcodec_alloc_context3 and would feed a null
+ // AVCodecContext into FFmpeg under OOM.
+ let mut ctx = build_codec_context(¶meters)?;
+ let av_type = backend.av_hwdevice_type();
+
+ // Verify the codec advertises this hwaccel **with the exact HW pix_fmt
+ // we're about to wire up in `get_format`**. FFmpeg's HW config table
+ // is keyed per (device_type, pix_fmt); a codec can advertise the same
+ // device with several HW pix_fmts, so matching only on device_type
+ // would let probing succeed for a backend whose pix_fmt the codec
+ // never offers — the failure would then surface deep inside the
+ // probe/decode loop. Matching the exact pix_fmt keeps the strict
+ // `get_format` honest and gives `open_with` a clean rejection.
+ let hw_pix_fmt = backend.hw_pixel_format();
+ if !codec_supports_hwaccel(unsafe { codec.as_ptr() }, av_type, hw_pix_fmt as i32) {
+ return Err(Error::BackendUnsupportedByCodec(backend));
+ }
+
+ // Create the device context.
+ let mut hw_device_ref: *mut AVBufferRef = ptr::null_mut();
+ // SAFETY: `hw_device_ref` is a stack ptr we hand FFmpeg to fill.
+ let ret = unsafe {
+ av_hwdevice_ctx_create(&mut hw_device_ref, av_type, ptr::null(), ptr::null_mut(), 0)
+ };
+ if ret < 0 {
+ return Err(Error::HwDeviceInitFailed {
+ backend,
+ source: ffmpeg_next::Error::from(ret),
+ });
+ }
+
+ let callback_state = Box::into_raw(Box::new(CallbackState {
+ wanted: hw_pix_fmt,
+ wanted_int: hw_pix_fmt as i32,
+ }));
+ // RAII guard: from now until the end-of-function `into_owned()`, every
+ // early return — `av_buffer_ref` failure, `open_as` failure, codec_type
+ // mismatch, or any future error path added between here and the
+ // `DecoderState` construction — frees `hw_device_ref` and
+ // `callback_state` via the guard's Drop. Without it, each error site
+ // had to remember to clean up these two FFI-owned resources by hand;
+ // the codec_type-mismatch branch was missed and silently leaked one
+ // device ref + one heap allocation per bad input.
+ let guard = PartialBuildState {
+ hw_device_ref,
+ callback_state,
+ };
+
+ // SAFETY: ctx is a freshly-constructed AVCodecContext we own;
+ // av_buffer_ref bumps the refcount of the device buffer for FFmpeg's
+ // use (we keep our own ref in `hw_device_ref` for cleanup).
+ // av_buffer_ref returns NULL on allocation failure; we must check it
+ // before assigning, otherwise the codec context would be opened with a
+ // HW-flagged setup but no actual device reference.
+ let device_ref_for_ctx = unsafe { av_buffer_ref(hw_device_ref) };
+ if device_ref_for_ctx.is_null() {
+ // guard's Drop frees hw_device_ref (the first ref) and callback_state.
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ // SAFETY: device_ref_for_ctx is a valid AVBufferRef* from av_buffer_ref;
+ // ctx is freshly built and owned by us. After this point ctx aliases
+ // `callback_state` via `opaque` (FFmpeg never frees opaque, so
+ // `callback_state` ownership stays with us / the guard) and aliases
+ // `device_ref_for_ctx` (the second ref) via `hw_device_ctx` (FFmpeg
+ // unrefs that on codec context drop, independent of the guard's first
+ // ref).
+ unsafe {
+ let raw = ctx.as_mut_ptr();
+ (*raw).hw_device_ctx = device_ref_for_ctx;
+ (*raw).opaque = callback_state.cast();
+ (*raw).get_format = Some(get_hw_format);
+ }
+
+ // Open the decoder. On failure `ctx`/`opened` Drop releases the codec
+ // context (and via that the second device ref); the guard releases the
+ // first device ref and the callback state.
+ //
+ // We deliberately bypass `Opened::video()` because it calls
+ // `Context::medium()`, which reads `AVCodecContext.codec_type` as the
+ // bindgen `AVMediaType` enum — the same UB hazard we've been
+ // systematically removing. Instead: validate `codec_type` as a raw
+ // `c_int` ourselves, then construct the `decoder::Video` wrapper
+ // directly via its public tuple field.
+ let opened = ctx.decoder().open_as(codec).map_err(Error::Ffmpeg)?;
+
+ // Validate codec_type as a raw integer — never construct AVMediaType
+ // from an unvalidated runtime value.
+ // SAFETY: codec_type is bound as AVMediaType (`#[repr(i32)]`), same
+ // size and alignment as i32; reading the bytes as i32 cannot be UB.
+ let codec_type_int: i32 =
+ unsafe { ptr::read(ptr::addr_of!((*opened.as_ptr()).codec_type) as *const i32) };
+ let video_type_int: i32 = AVMediaType::AVMEDIA_TYPE_VIDEO as i32;
+ if codec_type_int != video_type_int {
+ // Not a video codec context — surface the same error
+ // `Opened::video()` would have, without going through enum
+ // construction. `opened`'s Drop releases the codec context; the
+ // guard releases the first hw_device_ref and the callback state.
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::InvalidData));
+ }
+ // SAFETY of construction: `decoder::Video` is `pub struct Video(pub Opened)`.
+ // We construct via the public field; this is the same wrapping
+ // `Opened::video()` does on success, just without the enum read.
+ let opened = ffmpeg_next::decoder::Video(opened);
+
+ // Disarm the guard and transfer ownership of both resources into the
+ // returned DecoderState (whose own Drop handles their lifetime).
+ let (hw_device_ref, callback_state) = guard.into_owned();
+ Ok(DecoderState {
+ inner: ManuallyDrop::new(opened),
+ backend,
+ hw_device_ref,
+ callback_state,
+ })
+ }
+}
+
+/// RAII guard for the partially-owned FFmpeg state that
+/// [`VideoDecoder::build_state`] holds between the
+/// `av_hwdevice_ctx_create` and `Box::into_raw(CallbackState)`
+/// allocations and the final `DecoderState` construction.
+///
+/// If `build_state` returns `Err` for any reason in that window
+/// (`av_buffer_ref` ENOMEM, `open_as` failure, codec_type mismatch, or
+/// any future error path), this guard's `Drop` releases
+/// `hw_device_ref` — the first ref returned by `av_hwdevice_ctx_create`,
+/// distinct from the second ref FFmpeg unrefs when the codec context
+/// drops — and the boxed `CallbackState`, which FFmpeg never touches
+/// because `AVCodecContext::opaque` is purely user-owned.
+///
+/// Successful construction calls [`Self::into_owned`] to disarm the
+/// guard and hand both pointers to the new `DecoderState`.
+struct PartialBuildState {
+ hw_device_ref: *mut AVBufferRef,
+ callback_state: *mut CallbackState,
+}
+
+impl PartialBuildState {
+ /// Disarm the guard: return the owned pointers and replace the guard's
+ /// fields with null so its Drop is a no-op.
+ fn into_owned(mut self) -> (*mut AVBufferRef, *mut CallbackState) {
+ let hw = std::mem::replace(&mut self.hw_device_ref, ptr::null_mut());
+ let cb = std::mem::replace(&mut self.callback_state, ptr::null_mut());
+ (hw, cb)
+ }
+}
+
+impl Drop for PartialBuildState {
+ fn drop(&mut self) {
+ // SAFETY: pointers are either freshly allocated by `build_state` (via
+ // `av_hwdevice_ctx_create` and `Box::into_raw`) or null after
+ // `into_owned`. Both `av_buffer_unref` and `Box::from_raw` need the
+ // null check we apply here; both are otherwise sound on resources we
+ // own.
+ unsafe {
+ if !self.hw_device_ref.is_null() {
+ let mut hw = self.hw_device_ref;
+ av_buffer_unref(&mut hw);
+ }
+ if !self.callback_state.is_null() {
+ drop(Box::from_raw(self.callback_state));
+ }
+ }
+ }
+}
+
+/// Download a HW frame into a CPU [`Frame`]. Always unrefs the destination
+/// first so reuse across resolution changes is safe.
+///
+/// Deliberately does **not** call `av_frame_copy_props`. That FFmpeg
+/// helper deep-copies AVFrame side data (SEI, mastering display, ICC
+/// profiles, dynamic HDR, etc.), the metadata dict, and bumps both
+/// `opaque_ref` and `private_ref` on every receive — none of which
+/// `Frame` exposes via its public accessors. On a crafted stream with
+/// megabytes of per-frame metadata that would mean an unbounded
+/// allocation per receive, with no caller-visible benefit. We instead
+/// copy only the scalar fields the public API can read (today: `pts`);
+/// pixel layout (`width`, `height`, `format`, `linesize`, `data`) is
+/// already set by `av_hwframe_transfer_data`. If `Frame` ever grows
+/// accessors for timing extras (`duration`, `time_base`, `pkt_dts`) or
+/// color metadata, add those to `copy_frame_props_minimal` at the same
+/// time.
+unsafe fn transfer_hw_frame(
+ dst: &mut Frame,
+ src: &mut frame::Video,
+) -> std::result::Result<(), ffmpeg_next::Error> {
+ unsafe {
+ av_frame_unref(dst.as_inner_mut().as_mut_ptr());
+ let ret = av_hwframe_transfer_data(dst.as_inner_mut().as_mut_ptr(), src.as_ptr(), 0);
+ if ret < 0 {
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ copy_frame_props_minimal(dst.as_inner_mut().as_mut_ptr(), src.as_ptr());
+ }
+ Ok(())
+}
+
+/// Bounded substitute for `av_frame_copy_props`. Copies only the scalar
+/// AVFrame fields the public `Frame` API needs from `src` to `dst` —
+/// today just `pts`. Skips every allocating field (`av_dict_copy` for
+/// `metadata`, `av_frame_new_side_data` + memcpy for each `side_data[i]`,
+/// `av_buffer_replace` for `opaque_ref` / `private_ref`) so the cost is
+/// O(1) per frame regardless of what the source attaches.
+///
+/// # Safety
+/// Both pointers must be valid `AVFrame` pointers we own; field
+/// projection touches only POD scalars, no enums or buffer refs.
+unsafe fn copy_frame_props_minimal(dst: *mut AVFrame, src: *const AVFrame) {
+ unsafe {
+ (*dst).pts = (*src).pts;
+ }
+}
+
+/// `EAGAIN` and `EOF` are normal flow signals from `avcodec_receive_frame`
+/// and must not be treated as backend failures.
+fn is_transient(e: &ffmpeg_next::Error) -> bool {
+ is_eagain(e) || matches!(e, ffmpeg_next::Error::Eof)
+}
+
+/// Reject a `codec::Parameters` whose inner `*mut AVCodecParameters` is
+/// null. This guards the public trust boundary: ffmpeg-next can produce
+/// such a `Parameters` under OOM (`Parameters::new()` does not check
+/// `avcodec_parameters_alloc`), and a safe caller can legally hand one
+/// in. Without this check, the very next `(*p.as_ptr()).field` read
+/// would be a null deref.
+fn ensure_parameters_non_null(parameters: &codec::Parameters) -> Result<()> {
+ // SAFETY: as_ptr() returns the inner *const AVCodecParameters; we just
+ // inspect the pointer value (no deref).
+ if unsafe { parameters.as_ptr() }.is_null() {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ Ok(())
+}
+
+/// Allocate a fresh `frame::Video`, checking that `av_frame_alloc` did not
+/// return NULL. ffmpeg-next's `frame::Video::empty()` does not surface that
+/// failure and the resulting null pointer would be UB on the next field
+/// access; this wrapper catches it and surfaces it as `ENOMEM`.
+fn alloc_av_frame() -> std::result::Result {
+ let inner = frame::Video::empty();
+ // SAFETY: as_ptr() just exposes the inner pointer for inspection.
+ if unsafe { inner.as_ptr() }.is_null() {
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ Ok(inner)
+}
+
+/// Build a fresh `Context` from `parameters`, checking the underlying
+/// `avcodec_alloc_context3` for NULL before passing it to
+/// `avcodec_parameters_to_context`. ffmpeg-next's `Context::from_parameters`
+/// skips that check and would feed a null pointer into FFmpeg under OOM —
+/// undefined behavior. This helper surfaces the failure as `ENOMEM` and
+/// frees the context if `parameters_to_context` itself errors.
+fn build_codec_context(parameters: &codec::Parameters) -> Result {
+ ensure_parameters_non_null(parameters)?;
+ // SAFETY: avcodec_alloc_context3(NULL) returns a fresh AVCodecContext
+ // or NULL on allocation failure.
+ let ctx_ptr = unsafe { avcodec_alloc_context3(ptr::null()) };
+ if ctx_ptr.is_null() {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ // SAFETY: ctx_ptr is non-null and freshly allocated; parameters.as_ptr()
+ // returns a valid AVCodecParameters pointer; the function copies bytes
+ // out of parameters into the context.
+ let ret = unsafe { avcodec_parameters_to_context(ctx_ptr, parameters.as_ptr()) };
+ if ret < 0 {
+ // SAFETY: ctx_ptr was allocated by us and never handed to anyone else.
+ let mut p = ctx_ptr;
+ unsafe { avcodec_free_context(&mut p) };
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::from(ret)));
+ }
+ // SAFETY: ctx_ptr is valid; passing `owner: None` means our wrapper owns
+ // the allocation and `Context::drop` will run `avcodec_free_context`.
+ Ok(unsafe { Context::wrap(ctx_ptr, None) })
+}
+
+/// Checked deep-clone of `codec::Parameters`. ffmpeg-next's
+/// `Parameters::clone` allocates via `avcodec_parameters_alloc` without
+/// checking for NULL and runs `avcodec_parameters_copy` without checking
+/// the return code. On `ENOMEM` the result is a `Parameters` with a null
+/// inner pointer, which becomes UB when later passed to FFmpeg.
+///
+/// This helper performs both calls explicitly, frees a partial allocation
+/// on failure, and surfaces the AVERROR. The returned `Parameters` has
+/// `owner: None`, severing any Rc link to the caller's demuxer (the
+/// reason we deep-clone in the first place — see Send safety in
+/// `VideoDecoder::open`).
+fn try_clone_parameters(
+ src: &codec::Parameters,
+) -> std::result::Result {
+ // Reject a null inner pointer at the boundary; a deref inside
+ // avcodec_parameters_copy below would otherwise be UB.
+ if unsafe { src.as_ptr() }.is_null() {
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ // SAFETY: avcodec_parameters_alloc returns a fresh AVCodecParameters
+ // pointer or NULL on allocation failure.
+ let dst_ptr = unsafe { avcodec_parameters_alloc() };
+ if dst_ptr.is_null() {
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ // SAFETY: dst_ptr is non-null and freshly allocated; src.as_ptr() is
+ // a valid AVCodecParameters pointer; the function copies bytes from
+ // src into dst.
+ let ret = unsafe { avcodec_parameters_copy(dst_ptr, src.as_ptr()) };
+ if ret < 0 {
+ // SAFETY: dst_ptr was allocated by us and never handed out.
+ let mut p = dst_ptr;
+ unsafe { avcodec_parameters_free(&mut p) };
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ // SAFETY: dst_ptr is a valid AVCodecParameters; passing `owner: None`
+ // means our wrapper owns the allocation and `Parameters::drop` will
+ // call `avcodec_parameters_free`.
+ Ok(unsafe { codec::Parameters::wrap(dst_ptr, None) })
+}
+
+/// Checked counterpart to `Packet::clone()`. ffmpeg-next's `clone_from`
+/// calls `av_packet_ref` and ignores the int return value; on `ENOMEM`
+/// the destination is left empty while the caller assumes the clone
+/// succeeded — corrupting any later replay history. This helper surfaces
+/// the AVERROR. The result is a refcounted shallow clone — the payload
+/// buffer is shared with `src` rather than deep-copied; the probe replay
+/// only sends packets through `avcodec_send_packet`, which does not
+/// require a writable buffer.
+fn try_clone_packet(src: &Packet) -> std::result::Result {
+ let mut dst = Packet::empty();
+ // SAFETY: dst is a freshly zero-initialized Packet (av_init_packet inside
+ // Packet::empty); av_packet_ref initializes its data fields from src's
+ // refcounted buffer or returns AVERROR(ENOMEM) on failure.
+ let ret = unsafe { av_packet_ref(dst.as_mut_ptr(), src.as_ptr()) };
+ if ret < 0 {
+ return Err(ffmpeg_next::Error::from(ret));
+ }
+ Ok(dst)
+}
+
+/// Sum of `AVPacket.side_data[i].size` across every entry, plus
+/// `nb_entries * SIDE_DATA_ENTRY_OVERHEAD` (descriptor + AVBufferRef +
+/// allocator bookkeeping per entry). `av_packet_ref` performs a deep
+/// copy of side data via `av_packet_copy_props`, so each probe-buffered
+/// clone retains every one of these bytes. Charging both keeps
+/// `MAX_PROBE_PACKET_BYTES` a true upper bound — without the overhead,
+/// many zero-size entries slip past the cap on pure descriptor cost.
+///
+/// Walks at most `max_entries` entries even when `side_data_elems`
+/// reports a larger count. Defense-in-depth against a corrupt or hostile
+/// packet whose `side_data_elems` lies about the actual array length:
+/// the caller is expected to also reject any packet whose count exceeds
+/// the cap (so the inflated clone is never created), but bounding the
+/// walk here means a stale or weaponised value can never trigger an
+/// unbounded raw-pointer scan from the safe API.
+///
+/// Reads only the `size` field of each `AVPacketSideData` entry — never
+/// touches the bindgen `AVPacketSideDataType` enum, so no UB even if a
+/// future FFmpeg adds a side-data type discriminant our build doesn't
+/// know.
+fn packet_side_data_bytes(packet: &Packet, max_entries: usize) -> usize {
+ // SAFETY: AVPacket.side_data is `*mut AVPacketSideData` and
+ // side_data_elems is `c_int`; both are raw struct fields safe to read.
+ // Field projection (`.size`) does not reconstruct the enum-typed `type_`
+ // field, so the bindgen-enum UB hazard does not apply here.
+ unsafe {
+ let raw = packet.as_ptr();
+ let nel = (*raw).side_data_elems;
+ let arr = (*raw).side_data;
+ if arr.is_null() || nel <= 0 || max_entries == 0 {
+ return 0;
+ }
+ let count = (nel as usize).min(max_entries);
+ let mut total = count.saturating_mul(SIDE_DATA_ENTRY_OVERHEAD);
+ for i in 0..count {
+ let entry = arr.add(i);
+ total = total.saturating_add((*entry).size);
+ }
+ total
+ }
+}
+
+/// Number of `AVPacketSideData` entries on `packet`. The probe buffer
+/// uses this to enforce [`MAX_PROBE_PACKET_SIDE_DATA_ENTRIES`] before
+/// cloning, so a packet whose entry count alone would dominate retained
+/// memory is rejected up front.
+fn packet_side_data_count(packet: &Packet) -> usize {
+ // SAFETY: side_data_elems is `c_int`, safe to read; clamp negatives to 0.
+ let nel = unsafe { (*packet.as_ptr()).side_data_elems };
+ if nel <= 0 {
+ 0
+ } else {
+ nel as usize
+ }
+}
+
+/// Just `EAGAIN` (separate from EOF — the FFmpeg send/receive state machine
+/// distinguishes "drain output and retry" from "stream over").
+fn is_eagain(e: &ffmpeg_next::Error) -> bool {
+ matches!(e, ffmpeg_next::Error::Other { errno } if *errno == ffmpeg_next::error::EAGAIN)
+}
+
+/// Look up the decoder for `parameters` without going through the bindgen
+/// `AVCodecID` Rust enum. Reads the codec_id field as raw `u32` via
+/// `addr_of!` + `ptr::read` so a value not in our build's discriminant
+/// set never invokes UB.
+fn find_decoder(parameters: &codec::Parameters) -> Result {
+ ensure_parameters_non_null(parameters)?;
+ // SAFETY: parameters' inner pointer is non-null (checked above);
+ // addr_of! projects to the codec_id field; the *const u32 cast is sound
+ // because AVCodecID is `#[repr(u32)]` (same size and alignment as u32).
+ // Reading as u32 cannot be UB regardless of the value FFmpeg wrote.
+ let raw_id: u32 =
+ unsafe { ptr::read(ptr::addr_of!((*parameters.as_ptr()).codec_id) as *const u32) };
+
+ // Call C `avcodec_find_decoder` via our local `c_int`-typed shim — we
+ // never construct an `AVCodecID` enum from `raw_id`. The C function
+ // returns NULL for unknown ids, which we surface as `Error::NoCodec`.
+ // SAFETY: avcodec_find_decoder is a pure FFmpeg lookup; passing any
+ // c_int is sound (returns NULL for unknown).
+ let codec_ptr = unsafe { c_shims::avcodec_find_decoder(raw_id as libc::c_int) };
+ if codec_ptr.is_null() {
+ return Err(Error::NoCodec(raw_id));
+ }
+ // SAFETY: codec_ptr is a non-null *const AVCodec into FFmpeg's static
+ // codec table; it lives for the duration of the program.
+ Ok(unsafe { Codec::wrap(codec_ptr) })
+}
+
+/// Drain output frames from a candidate decoder during probe replay,
+/// transferring each one from the candidate's HW context to a fresh CPU
+/// frame and queueing it. Returns `Ok(())` once the candidate signals
+/// EAGAIN/EOF. The transfer happens while the candidate is still alive
+/// (its `AVHWFramesContext` is reachable); the resulting CPU frames remain
+/// valid after the candidate is committed because they hold their own
+/// buffer references with no dependency on the original device context.
+fn drain_into_pending(
+ decoder: &mut ffmpeg_next::decoder::Video,
+ hw_buf: &mut frame::Video,
+ pending: &mut VecDeque,
+ pending_bytes: &mut usize,
+ max_bytes: usize,
+) -> std::result::Result<(), ffmpeg_next::Error> {
+ loop {
+ match decoder.receive_frame(hw_buf) {
+ Ok(()) => {
+ // Pre-transfer cap check: if we are already at or over either cap,
+ // the candidate is producing more than we can hold. Treat as an
+ // explicit candidate failure so `advance_probe` can try the next
+ // backend instead of committing a stream with silently-dropped
+ // frames in the middle.
+ //
+ // TODO: at very large frame sizes (8K HDR P010, > ~96 MiB each)
+ // even a single retained frame is significant. Future direction:
+ // memmap-backed pending frames (write to a temp file or shared
+ // memory segment) so the resident set stays bounded even when the
+ // byte cap is raised. Out of scope for v0.0.0.
+ if pending.len() >= MAX_PROBE_PENDING_FRAMES || *pending_bytes >= max_bytes {
+ tracing::warn!(
+ frames = pending.len(),
+ bytes = *pending_bytes,
+ max_frames = MAX_PROBE_PENDING_FRAMES,
+ max_bytes = max_bytes,
+ "hwdecode: probe pending cap reached; failing candidate replay"
+ );
+ // SAFETY: hw_buf is owned and valid; unref of an empty frame is a no-op.
+ unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ // Pre-transfer size guard: `av_hwframe_transfer_data` will
+ // allocate the CPU buffer based on `hw_buf`'s dimensions. If a
+ // single frame's worst-case footprint already pushes past the
+ // cap, refuse the candidate **before** allocating so RSS does
+ // not spike on a frame we'd immediately drop. Uses a width *
+ // height * `WORST_CASE_BYTES_PER_PIXEL` upper bound; the
+ // post-transfer accounting via `cpu_frame_bytes` below stays in
+ // place as a backstop using the actual stride/format.
+ let estimated_bytes = match estimate_transfer_bytes(hw_buf) {
+ Some(b) => b,
+ None => {
+ // SAFETY: AVFrame.width/height are c_int reads.
+ let (w, h) = unsafe {
+ let raw = hw_buf.as_ptr();
+ ((*raw).width, (*raw).height)
+ };
+ tracing::warn!(
+ width = w,
+ height = h,
+ "hwdecode: HW frame dimensions invalid for sizing; failing candidate replay"
+ );
+ unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ };
+ let estimated_total = pending_bytes.saturating_add(estimated_bytes);
+ if estimated_total > max_bytes {
+ // SAFETY: AVFrame.width/height are c_int reads.
+ let (w, h) = unsafe {
+ let raw = hw_buf.as_ptr();
+ ((*raw).width, (*raw).height)
+ };
+ tracing::warn!(
+ pending_bytes = *pending_bytes,
+ estimated_bytes,
+ width = w,
+ height = h,
+ max_bytes = max_bytes,
+ "hwdecode: pre-transfer size estimate exceeds cap; \
+ refusing candidate replay before allocating CPU frame"
+ );
+ unsafe { av_frame_unref(hw_buf.as_mut_ptr()) };
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ let mut cpu = alloc_av_frame()?;
+ // SAFETY: hw_buf is a freshly-decoded HW frame;
+ // `av_hwframe_transfer_data` allocates pixel buffers on `cpu`.
+ // We use `copy_frame_props_minimal` (only `pts`) instead of
+ // `av_frame_copy_props` for the same reason as
+ // `transfer_hw_frame`: the public `Frame` API does not expose
+ // side data / metadata / opaque refs, so deep-copying them per
+ // frame is pure cost and an unbounded allocation source on
+ // attacker-controlled streams.
+ unsafe {
+ let r1 = av_hwframe_transfer_data(cpu.as_mut_ptr(), hw_buf.as_ptr(), 0);
+ if r1 < 0 {
+ return Err(ffmpeg_next::Error::from(r1));
+ }
+ }
+ let pixel_bytes = match cpu_frame_bytes(&cpu) {
+ Some(b) => b,
+ None => {
+ // Unknown pix_fmt or vertically-flipped layout — we cannot
+ // bound this frame's contribution against the byte cap, so up
+ // to MAX_PROBE_PENDING_FRAMES of them could exhaust memory.
+ // Fail the candidate so probing tries the next backend
+ // rather than queueing untracked allocations.
+ // SAFETY: AVFrame.format is c_int, safe to read.
+ let pix_fmt: i32 = unsafe { (*cpu.as_ptr()).format };
+ tracing::warn!(
+ pix_fmt,
+ "hwdecode: cannot size unknown CPU pix_fmt during replay; failing candidate"
+ );
+ // cpu drops here.
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ };
+ let new_total = pending_bytes.saturating_add(pixel_bytes);
+ if new_total > max_bytes {
+ tracing::warn!(
+ pending_bytes = *pending_bytes,
+ pixel_bytes,
+ max_bytes,
+ "hwdecode: queueing this frame would exceed byte cap; \
+ failing candidate replay"
+ );
+ // cpu drops here without ever paying a metadata deep copy.
+ return Err(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ });
+ }
+ // Cap check passed — copy only the scalar AVFrame fields the
+ // public API needs. SAFETY: cpu and hw_buf are both valid
+ // AVFrames we own.
+ unsafe {
+ copy_frame_props_minimal(cpu.as_mut_ptr(), hw_buf.as_ptr());
+ }
+ *pending_bytes = new_total;
+ pending.push_back(cpu);
+ }
+ Err(e) if is_transient(&e) => return Ok(()),
+ Err(e) => return Err(e),
+ }
+ }
+}
+
+/// Conservative upper-bound estimate of the bytes
+/// `av_hwframe_transfer_data` will allocate when downloading `hw_buf` to
+/// a CPU frame. Used by [`drain_into_pending`] as a pre-transfer guard
+/// so a candidate replay can refuse a frame whose footprint would
+/// exceed the byte budget *without* first paying the allocation. The
+/// estimate is `width * height * WORST_CASE_BYTES_PER_PIXEL` — see that
+/// constant for why we err on the high side.
+///
+/// Returns `None` when the frame's `width` or `height` are not strictly
+/// positive (caller treats as candidate failure — a HW frame with
+/// non-positive dimensions cannot be transferred meaningfully).
+fn estimate_transfer_bytes(hw_buf: &frame::Video) -> Option {
+ // SAFETY: AVFrame.width / height are c_int reads.
+ let (w, h) = unsafe {
+ let raw = hw_buf.as_ptr();
+ ((*raw).width, (*raw).height)
+ };
+ if w <= 0 || h <= 0 {
+ return None;
+ }
+ Some(
+ (w as usize)
+ .saturating_mul(h as usize)
+ .saturating_mul(WORST_CASE_BYTES_PER_PIXEL),
+ )
+}
+
+/// Approximate resident size of a CPU frame: sum of `linesize[plane] *
+/// plane_height` across populated planes.
+///
+/// Returns `None` for pixel formats not in our chroma-subsampling table or
+/// for frames whose `linesize` is negative — both signal an allocation we
+/// cannot account for, so the caller refuses to queue them. Returning 0
+/// in either case would silently bypass the byte cap and let an unbounded
+/// number of large frames into `pending_frames`.
+///
+/// Distinguishes `linesize == 0` (FFmpeg's sentinel for "no more populated
+/// planes" — terminates the scan) from `linesize < 0` (FFmpeg's vertically-
+/// flipped layout — `Frame::row` rejects those as unusable, so queueing one
+/// during probe replay would only delay the failure to the consumer side
+/// while wasting `|linesize| * plane_h` bytes of unaccounted memory).
+fn cpu_frame_bytes(frame: &frame::Video) -> Option {
+ // SAFETY: AVFrame.height / format / linesize are c_int reads.
+ let (height, pix_fmt, linesizes) = unsafe {
+ let raw = frame.as_ptr();
+ ((*raw).height as usize, (*raw).format, (*raw).linesize)
+ };
+ let mut total: usize = 0;
+ let mut any_plane = false;
+ for (plane, linesize) in linesizes.iter().enumerate() {
+ if *linesize == 0 {
+ // End of populated planes — FFmpeg zeroes the trailing entries.
+ break;
+ }
+ if *linesize < 0 {
+ // Vertically-flipped layout — refuse to size so `drain_into_pending`
+ // fails the candidate. The same pre-fix code path silently returned
+ // `Some(0)` for a frame whose first plane was negative, allowing up
+ // to MAX_PROBE_PENDING_FRAMES frames of unaccounted memory.
+ return None;
+ }
+ any_plane = true;
+ let stride = *linesize as usize;
+ // If we can't size *any* populated plane, the format is outside our
+ // table — refuse to size the frame at all (conservative; discarding
+ // is safer than under-counting against the byte cap).
+ let plane_h = crate::frame::plane_height_for(pix_fmt, plane, height)?;
+ total = total.saturating_add(stride.saturating_mul(plane_h));
+ }
+ if !any_plane {
+ // Genuinely empty frame (no populated planes) — nothing to account for.
+ return Some(0);
+ }
+ Some(total)
+}
+
+#[allow(dead_code)]
+fn _assert_send() {
+ fn check() {}
+ check::();
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn no_codec_for_unknown_id() {
+ let err = Error::NoCodec(0);
+ assert!(format!("{err}").contains("no decoder"));
+ }
+
+ #[test]
+ fn videodecoder_is_send() {
+ _assert_send();
+ }
+
+ #[test]
+ fn is_transient_recognises_eagain_and_eof() {
+ let eagain = ffmpeg_next::Error::Other {
+ errno: ffmpeg_next::error::EAGAIN,
+ };
+ assert!(is_transient(&eagain));
+ assert!(is_transient(&ffmpeg_next::Error::Eof));
+ let other = ffmpeg_next::Error::InvalidData;
+ assert!(!is_transient(&other));
+ }
+
+ /// Regression: a `codec::Parameters` with a null inner pointer must be
+ /// rejected at the entrypoint, not deref'd. ffmpeg-next's
+ /// `Parameters::new()` does not check `avcodec_parameters_alloc()`, so a
+ /// safe caller can hand us such a value under OOM.
+ #[test]
+ fn open_rejects_null_parameters() {
+ // SAFETY: Parameters::wrap accepts any pointer; we explicitly construct
+ // one with null inner. avcodec_parameters_free is null-safe on Drop.
+ let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) };
+ match VideoDecoder::open(null_params) {
+ Ok(_) => panic!("open should fail on null parameters"),
+ Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => {
+ assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}");
+ }
+ Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"),
+ }
+ }
+
+ #[test]
+ fn open_with_rejects_null_parameters() {
+ // SAFETY: see open_rejects_null_parameters.
+ let null_params = unsafe { codec::Parameters::wrap(std::ptr::null_mut(), None) };
+ match VideoDecoder::open_with(null_params, Backend::VideoToolbox) {
+ Ok(_) => panic!("open_with should fail on null parameters"),
+ Err(Error::Ffmpeg(ffmpeg_next::Error::Other { errno })) => {
+ assert_eq!(errno, libc::ENOMEM, "expected ENOMEM, got {errno}");
+ }
+ Err(other) => panic!("expected Ffmpeg(Other {{ ENOMEM }}), got {other:?}"),
+ }
+ }
+
+ /// `try_clone_packet` calls `av_packet_ref`, which deep-copies side
+ /// data via `av_packet_copy_props`. The probe budget therefore has to
+ /// include side-data bytes — otherwise a stream with a 16-byte payload
+ /// and a 1 MiB side-data attachment would only consume 16 bytes of the
+ /// 64 MiB budget per packet, and 256 buffered clones would retain
+ /// ~256 MiB of side data while logs claim a few KiB.
+ #[test]
+ fn packet_side_data_counts_against_probe_budget() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ const PAYLOAD_SIZE: usize = 16;
+ const SIDE_DATA_SIZE: usize = 1024 * 1024; // 1 MiB
+
+ let mut packet = Packet::new(PAYLOAD_SIZE);
+ // SAFETY: packet is a freshly allocated AVPacket; av_packet_new_side_data
+ // attaches a fresh `SIDE_DATA_SIZE`-byte buffer of the requested type
+ // to it and returns a writable pointer (or NULL on OOM).
+ let p = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA,
+ SIDE_DATA_SIZE,
+ )
+ };
+ assert!(!p.is_null(), "av_packet_new_side_data returned NULL");
+
+ assert_eq!(packet.size(), PAYLOAD_SIZE);
+ let side = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES);
+ assert!(
+ side >= SIDE_DATA_SIZE,
+ "side-data accounting must include the attached buffer; got {side}"
+ );
+ let total = packet.size().saturating_add(side);
+ assert!(
+ total >= PAYLOAD_SIZE + SIDE_DATA_SIZE,
+ "probe budget must charge payload + side data; got {total}"
+ );
+ }
+
+ #[test]
+ fn packet_side_data_is_zero_when_no_side_data() {
+ let packet = Packet::new(64);
+ assert_eq!(
+ packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES),
+ 0
+ );
+ assert_eq!(packet_side_data_count(&packet), 0);
+ }
+
+ /// Packets with many tiny side-data entries must be charged the
+ /// per-entry descriptor + ref overhead, even when each entry's payload
+ /// `size` is zero. Without `SIDE_DATA_ENTRY_OVERHEAD`, a packet stuffed
+ /// with N zero-byte entries would charge 0 bytes against the budget
+ /// while `av_packet_ref` still allocates ~`N * 80` bytes of descriptor
+ /// + AVBufferRef + allocator overhead per cloned copy.
+ #[test]
+ fn packet_side_data_bytes_charges_descriptor_overhead_for_zero_size_entries() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ let mut packet = Packet::new(0);
+ // Attach two zero-byte entries of distinct types so neither call
+ // replaces the other.
+ let p1 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA,
+ 0,
+ )
+ };
+ let p2 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_PALETTE,
+ 0,
+ )
+ };
+ assert!(
+ !p1.is_null() && !p2.is_null(),
+ "av_packet_new_side_data NULL"
+ );
+
+ assert_eq!(packet_side_data_count(&packet), 2);
+ let bytes = packet_side_data_bytes(&packet, MAX_PROBE_PACKET_SIDE_DATA_ENTRIES);
+ assert!(
+ bytes >= 2 * SIDE_DATA_ENTRY_OVERHEAD,
+ "must charge descriptor overhead per entry even at zero payload; got {bytes}"
+ );
+ }
+
+ /// `packet_side_data_bytes` must clamp its walk to `max_entries`
+ /// regardless of `side_data_elems`. Defense-in-depth: the caller is
+ /// expected to short-circuit packets whose count exceeds the cap, but
+ /// if a corrupt or weaponised packet ever does reach the helper, the
+ /// internal cap prevents an unbounded raw-pointer walk.
+ ///
+ /// This test attaches 5 entries of distinct types and asks the helper
+ /// to walk only the first 2. Result must equal exactly `2 * overhead +
+ /// (size_a + size_b)`, confirming entries 3-5 were not even read.
+ #[test]
+ fn packet_side_data_bytes_respects_max_entries_cap() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ let mut packet = Packet::new(0);
+ // Five distinct side-data types so each `av_packet_new_side_data`
+ // call appends rather than replaces.
+ let types_and_sizes: [(AVPacketSideDataType, usize); 5] = [
+ (AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA, 100),
+ (AVPacketSideDataType::AV_PKT_DATA_PALETTE, 200),
+ (AVPacketSideDataType::AV_PKT_DATA_REPLAYGAIN, 300),
+ (AVPacketSideDataType::AV_PKT_DATA_DISPLAYMATRIX, 400),
+ (AVPacketSideDataType::AV_PKT_DATA_STEREO3D, 500),
+ ];
+ for (ty, size) in types_and_sizes {
+ let p = unsafe { av_packet_new_side_data(packet.as_mut_ptr(), ty, size) };
+ assert!(!p.is_null(), "av_packet_new_side_data returned NULL");
+ }
+ assert_eq!(packet_side_data_count(&packet), 5);
+
+ let walked_2 = packet_side_data_bytes(&packet, 2);
+ let walked_5 = packet_side_data_bytes(&packet, 5);
+
+ assert_eq!(
+ walked_2,
+ 2 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200,
+ "max_entries=2 must walk exactly the first two entries"
+ );
+ assert_eq!(
+ walked_5,
+ 5 * SIDE_DATA_ENTRY_OVERHEAD + 100 + 200 + 300 + 400 + 500,
+ "max_entries=5 must walk all five entries"
+ );
+ // max_entries=0 short-circuits to 0.
+ assert_eq!(packet_side_data_bytes(&packet, 0), 0);
+ // max_entries larger than the actual count clamps to the actual count
+ // (no out-of-bounds walk past `side_data_elems`).
+ let walked_huge = packet_side_data_bytes(&packet, 1_000_000);
+ assert_eq!(walked_huge, walked_5);
+ }
+
+ /// `MAX_PROBE_PACKET_SIDE_DATA_ENTRIES` is the cliff above which a
+ /// packet is rejected from the probe buffer regardless of byte total —
+ /// pure descriptor inflation is its own attack vector. Sanity-check
+ /// that `packet_side_data_count` reports the value the cap is checked
+ /// against.
+ #[test]
+ fn packet_side_data_count_reports_attached_entries() {
+ use ffmpeg_next::ffi::{av_packet_new_side_data, AVPacketSideDataType};
+
+ let mut packet = Packet::new(0);
+ let _p1 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_NEW_EXTRADATA,
+ 4,
+ )
+ };
+ let _p2 = unsafe {
+ av_packet_new_side_data(
+ packet.as_mut_ptr(),
+ AVPacketSideDataType::AV_PKT_DATA_PALETTE,
+ 4,
+ )
+ };
+ assert_eq!(packet_side_data_count(&packet), 2);
+ }
+
+ /// `cpu_frame_bytes` must refuse to size a frame whose first plane has
+ /// a negative `linesize`. Pre-fix, the loop break treated negative the
+ /// same as zero (FFmpeg's "no more populated planes" sentinel), so a
+ /// vertically-flipped frame returned `Some(0)` and `drain_into_pending`
+ /// would queue it as a 0-byte allocation — letting up to
+ /// `MAX_PROBE_PENDING_FRAMES` such frames bypass the configured byte
+ /// budget entirely.
+ #[test]
+ fn cpu_frame_bytes_rejects_negative_first_plane_linesize() {
+ let mut f = frame::Video::empty();
+ // SAFETY: f is freshly allocated; we set `format` to NV12 and the
+ // first plane's linesize negative (FFmpeg's vertical-flip convention).
+ // No backing data buffer is allocated — cpu_frame_bytes must reject
+ // before any pointer dereference.
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).format = crate::pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = 1080;
+ (*raw).linesize[0] = -1920;
+ (*raw).linesize[1] = -1920;
+ }
+ assert!(
+ cpu_frame_bytes(&f).is_none(),
+ "negative linesize must be unsizeable, not Some(0)"
+ );
+ }
+
+ /// Sanity-check the positive path: a synthesized NV12 frame with valid
+ /// linesizes must report the sum across populated planes (Y full height
+ /// + UV half height).
+ #[test]
+ fn cpu_frame_bytes_sums_populated_planes() {
+ let mut f = frame::Video::empty();
+ let stride = 1920usize;
+ let height = 1080usize;
+ // SAFETY: same scheme as above; we only mutate primitive struct fields.
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).format = crate::pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = height as i32;
+ (*raw).linesize[0] = stride as i32;
+ (*raw).linesize[1] = stride as i32;
+ }
+ let expected = stride * height + stride * (height / 2);
+ assert_eq!(cpu_frame_bytes(&f), Some(expected));
+ }
+
+ /// A frame with only a zero linesize in plane 0 is "no populated
+ /// planes" — must return `Some(0)`, not `None`. Distinguishes the
+ /// FFmpeg sentinel from the vertically-flipped layout.
+ #[test]
+ fn cpu_frame_bytes_zero_first_plane_returns_zero() {
+ let f = frame::Video::empty();
+ // Default-allocated empty AVFrame already has all linesizes zero.
+ assert_eq!(cpu_frame_bytes(&f), Some(0));
+ }
+
+ /// `estimate_transfer_bytes` is the pre-transfer size guard for
+ /// `drain_into_pending`: it must compute `width * height *
+ /// WORST_CASE_BYTES_PER_PIXEL` so the candidate replay can refuse a
+ /// frame *before* `av_hwframe_transfer_data` allocates.
+ #[test]
+ fn estimate_transfer_bytes_uses_worst_case_per_pixel() {
+ let mut f = frame::Video::empty();
+ // SAFETY: f is freshly allocated; we set width/height directly.
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).width = 1920;
+ (*raw).height = 1080;
+ }
+ assert_eq!(
+ estimate_transfer_bytes(&f),
+ Some(1920 * 1080 * WORST_CASE_BYTES_PER_PIXEL),
+ );
+ }
+
+ /// Non-positive dimensions surface as `None` so `drain_into_pending`
+ /// fails the candidate before allocating anything. A zero-width or
+ /// zero-height frame would silently yield a 0-byte estimate under the
+ /// raw multiplication, letting the cap check pass and exposing the
+ /// allocation path to whatever the actual transfer would do.
+ #[test]
+ fn estimate_transfer_bytes_rejects_non_positive_dimensions() {
+ let mut f = frame::Video::empty();
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).width = 0;
+ (*raw).height = 1080;
+ }
+ assert!(estimate_transfer_bytes(&f).is_none());
+
+ unsafe {
+ (*f.as_mut_ptr()).width = 1920;
+ (*f.as_mut_ptr()).height = -1;
+ }
+ assert!(estimate_transfer_bytes(&f).is_none());
+ }
+
+ /// 8K HDR P010 has actual ~96 MiB resident size; the estimate should
+ /// over-charge it (the right side to err on for a memory cap) while
+ /// still fitting within the configurable
+ /// [`DEFAULT_MAX_PROBE_PENDING_BYTES`] cap (256 MiB) for a single
+ /// frame so a default-configured decoder is not forced to reject 8K
+ /// streams outright.
+ #[test]
+ fn estimate_transfer_bytes_8k_fits_default_cap() {
+ let mut f = frame::Video::empty();
+ unsafe {
+ let raw = f.as_mut_ptr();
+ (*raw).width = 7680;
+ (*raw).height = 4320;
+ }
+ let estimate = estimate_transfer_bytes(&f).expect("8K is sizable");
+ // ~256 MiB exactly — at-or-just-under the default cap.
+ assert!(
+ estimate <= DEFAULT_MAX_PROBE_PENDING_BYTES,
+ "8K estimate {estimate} must fit DEFAULT_MAX_PROBE_PENDING_BYTES \
+ {DEFAULT_MAX_PROBE_PENDING_BYTES}; otherwise the default cap rejects \
+ even a single 8K frame at probe time"
+ );
+ // And strictly larger than a typical 8K P010 (~96 MiB) so the guard
+ // is actually conservative, not under-charging.
+ assert!(
+ estimate > 96 * 1024 * 1024,
+ "estimate must over-charge real 8K P010 to bound the worst case; got {estimate}"
+ );
+ }
+
+ /// `PartialBuildState`'s `Drop` must be a no-op when both pointers are
+ /// null — the disarmed-by-`into_owned` post-state. A panic / double-free
+ /// here would break the success path of every `build_state` call.
+ #[test]
+ fn partial_build_state_drop_is_no_op_on_null_pointers() {
+ let _g = PartialBuildState {
+ hw_device_ref: ptr::null_mut(),
+ callback_state: ptr::null_mut(),
+ };
+ // Drops at end of scope. Test passes if it doesn't panic / crash.
+ }
+
+ /// `into_owned` must return the original pointers and disarm the guard
+ /// (so the guard's Drop becomes a no-op and the caller can safely
+ /// transfer ownership to `DecoderState` without double-freeing).
+ #[test]
+ fn partial_build_state_into_owned_disarms_and_returns_originals() {
+ use ffmpeg_next::ffi::{av_buffer_alloc, av_buffer_unref, AVPixelFormat};
+
+ // SAFETY: av_buffer_alloc returns a fresh AVBufferRef* with refcount
+ // 1, or NULL on OOM. We free it ourselves below (after into_owned
+ // disarms the guard).
+ let hw_ptr = unsafe { av_buffer_alloc(64) };
+ assert!(!hw_ptr.is_null(), "av_buffer_alloc(64) returned NULL");
+ let cb_ptr = Box::into_raw(Box::new(CallbackState {
+ wanted: AVPixelFormat::AV_PIX_FMT_NONE,
+ wanted_int: AVPixelFormat::AV_PIX_FMT_NONE as i32,
+ }));
+
+ let g = PartialBuildState {
+ hw_device_ref: hw_ptr,
+ callback_state: cb_ptr,
+ };
+ let (hw_back, cb_back) = g.into_owned();
+ assert_eq!(
+ hw_back, hw_ptr,
+ "into_owned must return the original device ref"
+ );
+ assert_eq!(
+ cb_back, cb_ptr,
+ "into_owned must return the original callback box"
+ );
+
+ // Guard is now disarmed (its Drop ran with null pointers as soon as
+ // into_owned consumed it). We own the pointers and must free them.
+ // SAFETY: hw_ptr and cb_ptr are still the freshly-allocated values.
+ unsafe {
+ let mut hw = hw_back;
+ av_buffer_unref(&mut hw);
+ drop(Box::from_raw(cb_back));
+ }
+ }
+
+ /// Probe-abandon paths in `send_packet` (cap exceeded, packet clone
+ /// failed) must not drop frames already queued in `pending_frames`.
+ /// Those frames belong to the currently active backend — possibly a
+ /// candidate that `advance_probe` just committed earlier in the same
+ /// `send_packet` call — and are valid output the caller will dequeue
+ /// via `receive_frame`.
+ ///
+ /// Pre-fix, both abandon branches called `pending_frames.clear()`
+ /// alongside `self.probe = None;`, silently dropping initial frames at
+ /// exactly the cap-overflow / OOM-stress paths.
+ ///
+ /// Live HW required: a real `VideoDecoder` is the only way to construct
+ /// a valid `DecoderState` (its `Drop` invokes FFmpeg cleanup), and
+ /// `send_packet` must reach the Ok branch on a real decoder for the
+ /// cap check to fire.
+ #[test]
+ #[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"]
+ fn cap_overflow_preserves_pending_frames_from_active_backend() {
+ use ffmpeg_next::{format, media};
+
+ let path = std::env::var_os("HWDECODE_SAMPLE_VIDEO")
+ .expect("HWDECODE_SAMPLE_VIDEO must be set for this test");
+
+ ffmpeg_next::init().expect("ffmpeg init");
+ let mut input = format::input(&path).expect("open input");
+ let stream_index = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream")
+ .index();
+ let stream_params = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream")
+ .parameters();
+
+ let mut decoder = VideoDecoder::open(stream_params).expect("open decoder");
+ assert!(
+ decoder.probe.is_some(),
+ "probe must be active immediately after open"
+ );
+
+ // Inject sentinel frames as if `advance_probe` had drained them from
+ // a freshly-committed candidate during this same send_packet call.
+ decoder.pending_frames.push_back(frame::Video::empty());
+ decoder.pending_frames.push_back(frame::Video::empty());
+ let pending_before = decoder.pending_frames.len();
+
+ // Fast-forward the probe state to the byte cap so the next successful
+ // send_packet trips the cap-overflow branch.
+ decoder
+ .probe
+ .as_mut()
+ .expect("probe present")
+ .buffered_bytes = MAX_PROBE_PACKET_BYTES;
+
+ // Find the first video packet and feed it. We don't care whether the
+ // underlying decoder actually accepts it cleanly; we only need to
+ // exercise the Ok branch's cap-overflow accounting at least once.
+ let mut hit_ok = false;
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ if decoder.send_packet(&packet).is_ok() {
+ hit_ok = true;
+ break;
+ }
+ }
+ assert!(
+ hit_ok,
+ "expected at least one send_packet to succeed and trigger the cap-overflow branch"
+ );
+
+ assert!(
+ decoder.probe.is_none(),
+ "probe must be abandoned after cap overflow"
+ );
+ assert_eq!(
+ decoder.pending_frames.len(),
+ pending_before,
+ "pending_frames belong to the active backend; abandon must not drop them"
+ );
+ }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..955d215
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,43 @@
+use crate::backend::Backend;
+
+/// Crate result alias.
+pub type Result = std::result::Result;
+
+/// Errors returned from [`crate::VideoDecoder`].
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ /// An underlying FFmpeg error.
+ #[error("ffmpeg error: {0}")]
+ Ffmpeg(#[from] ffmpeg_next::Error),
+
+ /// `avcodec_find_decoder` returned null for the input codec id. The id
+ /// is reported as the raw integer (`AVCodecID` discriminant) — we do not
+ /// construct the bindgen `AVCodecID` enum from a runtime value, since
+ /// values outside our build's discriminant set would invoke UB.
+ #[error("no decoder for codec id {0}")]
+ NoCodec(u32),
+
+ /// The codec does not advertise a hardware configuration matching the
+ /// requested backend (via `avcodec_get_hw_config`).
+ #[error("codec does not support backend {0:?}")]
+ BackendUnsupportedByCodec(Backend),
+
+ /// `av_hwdevice_ctx_create` failed for the requested backend.
+ #[error("hardware device init failed for {backend:?}: {source}")]
+ HwDeviceInitFailed {
+ /// Backend that failed to initialise.
+ backend: Backend,
+ /// Underlying FFmpeg error.
+ source: ffmpeg_next::Error,
+ },
+
+ /// Auto-probe exhausted every backend in the platform's order. Empty
+ /// `attempts` means the platform has no hardware backends listed in
+ /// [`crate::Backend`] for the current `target_os` — callers must
+ /// fall back to a software decoder of their choice.
+ #[error("all hardware backends failed; attempts: {attempts:?}")]
+ AllBackendsFailed {
+ /// Per-backend errors collected during probing, in the order tried.
+ attempts: Vec<(Backend, Box)>,
+ },
+}
diff --git a/src/ffi.rs b/src/ffi.rs
new file mode 100644
index 0000000..04aa50f
--- /dev/null
+++ b/src/ffi.rs
@@ -0,0 +1,272 @@
+//! FFI shims used by the decoder. Kept in one place so the unsafe surface is
+//! easy to audit.
+//!
+//! All reads of `AVPixelFormat` / `AVHWDeviceType` values returned by FFmpeg
+//! at runtime go through `ptr::read::` after a pointer cast, never
+//! through the bindgen-generated Rust enum. The enums are `#[repr(i32)]`
+//! and constructing them from a value not in the listed discriminants is
+//! undefined behavior — exactly the situation header/library skew creates.
+//! See the doc comments on individual functions for what is read as raw
+//! integer vs. constructed from a known constant.
+
+use std::ptr;
+
+use ffmpeg_next::ffi::{
+ avcodec_get_hw_config, AVCodec, AVCodecContext, AVHWDeviceType, AVPixelFormat,
+ AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX,
+};
+
+/// State pointed to by `AVCodecContext::opaque` so [`get_hw_format`] can pick
+/// the correct hardware pixel format without globals. One instance per
+/// decoder; freed by [`crate::VideoDecoder`] after the codec context is
+/// dropped.
+///
+/// `wanted` is set from a hardcoded `AVPixelFormat` constant in our bindings
+/// (via `Backend::hw_pixel_format`), so it is always a valid enum value. We
+/// also store its raw `i32` so the callback can compare against the offered
+/// list without going through enum reads.
+#[repr(C)]
+pub(crate) struct CallbackState {
+ /// Hardware pixel format we want the decoder to produce. Constructed
+ /// from a known constant; safe to use as the callback's return value.
+ pub(crate) wanted: AVPixelFormat,
+ /// Same value as `wanted` cast to `i32`, cached so the callback's
+ /// pix_fmts walk doesn't have to convert per iteration.
+ pub(crate) wanted_int: i32,
+}
+
+/// `AVCodecContext::get_format` callback. FFmpeg invokes it with the list of
+/// pixel formats the codec is willing to output for the current stream.
+///
+/// The offered list is walked as `*const i32` (cast from `*const AVPixelFormat`)
+/// to avoid constructing the bindgen enum from values that may not be in our
+/// build's discriminant set. The return value is either `wanted` (a known
+/// constant) or `AV_PIX_FMT_NONE` (also a known constant) — both safe to
+/// produce as `AVPixelFormat`.
+pub(crate) unsafe extern "C" fn get_hw_format(
+ ctx: *mut AVCodecContext,
+ pix_fmts: *const AVPixelFormat,
+) -> AVPixelFormat {
+ debug_assert!(!ctx.is_null());
+ debug_assert!(!pix_fmts.is_null());
+
+ // SAFETY: opaque was set by `try_open` to a valid `Box`
+ // pointer that outlives the codec context (we only free it after the
+ // codec context's drop runs). When opaque is null we treat the call as
+ // strict — a stray invocation cannot silently downgrade.
+ let state = unsafe { (*ctx).opaque as *const CallbackState };
+ let (wanted, wanted_int) = if state.is_null() {
+ (
+ AVPixelFormat::AV_PIX_FMT_NONE,
+ AVPixelFormat::AV_PIX_FMT_NONE as i32,
+ )
+ } else {
+ unsafe { ((*state).wanted, (*state).wanted_int) }
+ };
+
+ // Walk the offered list as i32. The pointer cast is sound because
+ // `AVPixelFormat` is `#[repr(i32)]` (same size and alignment as i32).
+ // Reading as i32 cannot be UB regardless of the value FFmpeg wrote.
+ let mut p = pix_fmts as *const i32;
+ let none_int = AVPixelFormat::AV_PIX_FMT_NONE as i32;
+ loop {
+ // SAFETY: FFmpeg guarantees the list is terminated by AV_PIX_FMT_NONE.
+ // We bail at the sentinel; reads up to and including it are in-bounds.
+ let v = unsafe { ptr::read(p) };
+ if v == none_int {
+ return AVPixelFormat::AV_PIX_FMT_NONE;
+ }
+ if v == wanted_int {
+ return wanted;
+ }
+ p = unsafe { p.add(1) };
+ }
+}
+
+/// Walk the codec's `AVCodecHWConfig` table and return whether the codec
+/// advertises support for `device_type` **with** `wanted_pix_fmt` via the
+/// `HW_DEVICE_CTX` setup method.
+///
+/// FFmpeg's HW config table is keyed per (device_type, pix_fmt) pair: a
+/// codec can advertise the same device with several different hardware
+/// pixel formats (e.g. VAAPI codecs that offer both `AV_PIX_FMT_VAAPI`
+/// and `AV_PIX_FMT_DRM_PRIME`). Matching only on `device_type` would let
+/// us proceed to install a strict `get_format` callback for a format the
+/// codec never advertises, and the failure would surface deep inside the
+/// probe / decode path instead of up front. Requiring the codec to
+/// advertise the **exact** pix_fmt our `Backend` uses keeps the strict
+/// `get_format` honest and gives `open_with` a clean rejection signal.
+///
+/// All reads from the FFmpeg-supplied `AVCodecHWConfig` are performed as
+/// raw integers via `addr_of!` + `ptr::read::` to avoid copying or
+/// interpreting enum-typed fields whose runtime values might not match
+/// our build's discriminant set.
+pub(crate) fn codec_supports_hwaccel(
+ codec: *const AVCodec,
+ device_type: AVHWDeviceType,
+ wanted_pix_fmt: i32,
+) -> bool {
+ debug_assert!(!codec.is_null());
+ let device_type_int = device_type as i32;
+ let mut i = 0;
+ loop {
+ // SAFETY: `avcodec_get_hw_config` returns null past the end; we stop then.
+ let cfg = unsafe { avcodec_get_hw_config(codec, i) };
+ if cfg.is_null() {
+ return false;
+ }
+ // Read each field as raw integer rather than copying the whole struct
+ // (which would interpret `pix_fmt` and `device_type` as their enum types).
+ // SAFETY: `cfg` is non-null and points to a valid `AVCodecHWConfig` for
+ // the lifetime of the call; `addr_of!` projects to a sized field; the
+ // `*const i32` cast is sound because `methods` is `c_int` (i32),
+ // `device_type` is `AVHWDeviceType` (`#[repr(u32)]`, but FFmpeg's
+ // assigned values fit in i32 and the runtime layout is i32-sized),
+ // and `pix_fmt` is `AVPixelFormat` (`#[repr(i32)]`).
+ let methods: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).methods)) };
+ let cfg_device_type_int: i32 =
+ unsafe { ptr::read(ptr::addr_of!((*cfg).device_type) as *const i32) };
+ let cfg_pix_fmt_int: i32 = unsafe { ptr::read(ptr::addr_of!((*cfg).pix_fmt) as *const i32) };
+
+ if methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0
+ && cfg_device_type_int == device_type_int
+ && cfg_pix_fmt_int == wanted_pix_fmt
+ {
+ return true;
+ }
+ i += 1;
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // The callback derefs `(*ctx).opaque`, so we need a real-looking
+ // AVCodecContext. We construct a zeroed one (the callback only reads opaque).
+ struct FakeCtx(*mut AVCodecContext);
+ impl FakeCtx {
+ fn new(state: *mut CallbackState) -> Self {
+ let boxed: Box = unsafe { Box::new(std::mem::zeroed()) };
+ let raw = Box::into_raw(boxed);
+ unsafe { (*raw).opaque = state.cast() };
+ Self(raw)
+ }
+ }
+ impl Drop for FakeCtx {
+ fn drop(&mut self) {
+ unsafe { drop(Box::from_raw(self.0)) };
+ }
+ }
+
+ fn make_state(wanted: AVPixelFormat) -> CallbackState {
+ CallbackState {
+ wanted,
+ wanted_int: wanted as i32,
+ }
+ }
+
+ fn run(state: &CallbackState, mut offered: Vec) -> AVPixelFormat {
+ // Build the offered list as raw i32, terminated by AV_PIX_FMT_NONE.
+ offered.push(AVPixelFormat::AV_PIX_FMT_NONE as i32);
+ let ctx = FakeCtx::new(state as *const _ as *mut _);
+ // SAFETY: we cast the i32 buffer pointer to *const AVPixelFormat
+ // because that's the function's declared signature. The callback only
+ // ever reads through *const i32 internally, so this transit through
+ // *const AVPixelFormat is purely a type system formality.
+ unsafe { get_hw_format(ctx.0, offered.as_ptr() as *const AVPixelFormat) }
+ }
+
+ #[test]
+ fn returns_wanted_hw_format_when_offered() {
+ let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
+ let got = run(
+ &state,
+ vec![
+ AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32,
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ ],
+ );
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
+ }
+
+ #[test]
+ fn returns_none_when_wanted_absent() {
+ let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
+ let got = run(
+ &state,
+ vec![
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ AVPixelFormat::AV_PIX_FMT_YUV420P as i32,
+ ],
+ );
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
+ }
+
+ #[test]
+ fn null_opaque_is_treated_as_strict() {
+ let boxed: Box = unsafe { Box::new(std::mem::zeroed()) };
+ let ctx_raw = Box::into_raw(boxed);
+ unsafe { (*ctx_raw).opaque = ptr::null_mut() };
+ let offered = [
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ AVPixelFormat::AV_PIX_FMT_NONE as i32,
+ ];
+ let got = unsafe { get_hw_format(ctx_raw, offered.as_ptr() as *const AVPixelFormat) };
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
+ unsafe { drop(Box::from_raw(ctx_raw)) };
+ }
+
+ #[test]
+ fn unknown_offered_value_is_skipped_without_ub() {
+ // Simulate a header-skewed FFmpeg that offers a pixel-format value we
+ // don't have a binding constant for (e.g. some future format). The
+ // callback walks the list as i32 — no enum is constructed from that
+ // value, so this read is sound.
+ let state = make_state(AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX);
+ let got = run(
+ &state,
+ vec![
+ 99_999_i32, // imaginary unknown
+ AVPixelFormat::AV_PIX_FMT_NV12 as i32,
+ ],
+ );
+ assert_eq!(got, AVPixelFormat::AV_PIX_FMT_NONE);
+ }
+
+ /// `codec_supports_hwaccel` must reject a (device_type, pix_fmt) pair
+ /// that the codec does not advertise — even if the device alone is
+ /// listed. Without this check, the strict `get_format` callback would
+ /// be wired up for a HW pix_fmt the codec never offers and the failure
+ /// would surface deep inside the probe / decode path instead of at
+ /// `open_with` / probe-build time.
+ ///
+ /// macOS-only: the test relies on FFmpeg's H.264 decoder advertising
+ /// `(AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX)`, which is
+ /// only present in builds with VideoToolbox compiled in.
+ #[cfg(target_os = "macos")]
+ #[test]
+ fn codec_supports_hwaccel_requires_matching_pix_fmt() {
+ use ffmpeg_next::ffi::{avcodec_find_decoder, AVCodecID, AVHWDeviceType, AVPixelFormat};
+
+ // SAFETY: AV_CODEC_ID_H264 is a known constant in our build's
+ // `AVCodecID` discriminant set; constructing it does not invoke the
+ // bindgen-enum UB we worry about for runtime-derived ids.
+ let codec_ptr = unsafe { avcodec_find_decoder(AVCodecID::AV_CODEC_ID_H264) };
+ assert!(!codec_ptr.is_null(), "H.264 decoder must be present");
+
+ let device = AVHWDeviceType::AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
+ let videotoolbox = AVPixelFormat::AV_PIX_FMT_VIDEOTOOLBOX as i32;
+ let nv12 = AVPixelFormat::AV_PIX_FMT_NV12 as i32;
+
+ assert!(
+ codec_supports_hwaccel(codec_ptr, device, videotoolbox),
+ "VideoToolbox + AV_PIX_FMT_VIDEOTOOLBOX must be advertised by FFmpeg's H.264 decoder"
+ );
+ assert!(
+ !codec_supports_hwaccel(codec_ptr, device, nv12),
+ "VideoToolbox + AV_PIX_FMT_NV12 must NOT match the codec's HW config — \
+ the strict get_format would have no offered HW format to return"
+ );
+ }
+}
diff --git a/src/frame.rs b/src/frame.rs
new file mode 100644
index 0000000..4642184
--- /dev/null
+++ b/src/frame.rs
@@ -0,0 +1,630 @@
+//! CPU-side decoded video frame.
+//!
+//! Wraps `ffmpeg_next::frame::Video`. All accessors read from raw `AVFrame`
+//! fields (`format`, `linesize`, `data`, `width`, `height`, `pts`) directly
+//! and never go through ffmpeg-next's `Video::format()` / `plane_height()`
+//! / `plane_width()` / `data()` — those construct `AVPixelFormat` from the
+//! frame's raw `format` integer via `transmute`, which is undefined behavior
+//! when the value isn't in the build's bindgen-generated discriminant set
+//! (the exact failure mode this crate is designed to survive).
+//!
+//! Per-row sizes for [`Frame::row`] / [`Frame::rows`] are computed from
+//! hardcoded chroma-subsampling and bit-depth tables keyed on the safe
+//! `pix_fmt()` integer, covering only the formats `hwdecode` produces (the
+//! NV* and P0xx/P2xx/P4xx families after `av_hwframe_transfer_data`). For
+//! any other format, the row accessors return `None` rather than guessing
+//! at a slice length.
+//!
+//! Why per-row, not whole-plane: FFmpeg allocates each row at
+//! `linesize[plane]` ([`Frame::stride`]) bytes for SIMD alignment, but
+//! hardware transfer paths only initialize the first
+//! [`Frame::row_bytes`]`(plane)` of every row. Exposing a stride-inclusive
+//! `&[u8]` over an entire plane would let safe code observe those
+//! uninitialized padding bytes, which violates `slice::from_raw_parts`.
+//! Per-row slices are tightly clipped to the visible byte width so the
+//! safe API never hands out an uninitialized byte. Callers that need a
+//! single base pointer (e.g. SIMD pixel converters keyed off stride) can
+//! reach for [`Frame::as_ptr`] and consume `stride * plane_h` bytes
+//! themselves under their own `unsafe` contract.
+//!
+//! Compare formats against integer constants in [`crate::pix_fmt`].
+
+use std::slice;
+
+use ffmpeg_next::frame;
+
+use crate::{
+ error::{Error, Result},
+ pix_fmt,
+};
+
+/// CPU-side decoded video frame produced by [`crate::VideoDecoder`].
+pub struct Frame {
+ inner: frame::Video,
+}
+
+impl Frame {
+ /// Construct an empty frame, suitable as the destination passed to
+ /// [`crate::VideoDecoder::receive_frame`].
+ ///
+ /// Returns `Err(Error::Ffmpeg(Other { errno: ENOMEM }))` when the
+ /// underlying `av_frame_alloc()` returns NULL — `ffmpeg_next` does not
+ /// surface that failure, so we check it here rather than letting a null
+ /// pointer flow into the safe accessors and become UB on first read.
+ pub fn empty() -> Result {
+ // SAFETY: as_ptr() is safe; we just inspect the value (potentially null).
+ let inner = frame::Video::empty();
+ if unsafe { inner.as_ptr() }.is_null() {
+ return Err(Error::Ffmpeg(ffmpeg_next::Error::Other {
+ errno: libc::ENOMEM,
+ }));
+ }
+ Ok(Self { inner })
+ }
+
+ /// Width in pixels.
+ pub fn width(&self) -> u32 {
+ // SAFETY: AVFrame.width is c_int; safe to read regardless of value.
+ unsafe { (*self.inner.as_ptr()).width as u32 }
+ }
+
+ /// Height in pixels.
+ pub fn height(&self) -> u32 {
+ // SAFETY: AVFrame.height is c_int.
+ unsafe { (*self.inner.as_ptr()).height as u32 }
+ }
+
+ /// Pixel format, returned as the raw `i32` value FFmpeg wrote to
+ /// `AVFrame.format`. Sound regardless of the linked FFmpeg version —
+ /// no `AVPixelFormat` enum is constructed.
+ ///
+ /// Compare against constants in [`crate::pix_fmt`].
+ pub fn pix_fmt(&self) -> i32 {
+ // SAFETY: AVFrame.format is bound as c_int.
+ unsafe { (*self.inner.as_ptr()).format }
+ }
+
+ /// Presentation timestamp in stream time base, or `None` for
+ /// `AV_NOPTS_VALUE`.
+ pub fn pts(&self) -> Option {
+ // ffmpeg-next's Frame::pts performs no enum conversion; safe to use.
+ self.inner.pts()
+ }
+
+ /// Number of populated planes (1 for packed formats, 2 for NV12/P010,
+ /// 3 for planar YUV, etc.). Computed by scanning `linesize` for the
+ /// first zero entry — no enum reads.
+ pub fn planes(&self) -> usize {
+ // SAFETY: AVFrame.linesize is `[c_int; 8]`; reads are sound.
+ unsafe {
+ let linesize = &(*self.inner.as_ptr()).linesize;
+ for (i, ls) in linesize.iter().enumerate() {
+ if *ls == 0 {
+ return i;
+ }
+ }
+ linesize.len()
+ }
+ }
+
+ /// Bytes per row for `plane`. Reads `AVFrame.linesize[plane]` directly.
+ /// Panics if `plane >= planes()` or the linesize is non-positive (FFmpeg
+ /// allows negative linesize for vertically-flipped formats; this crate
+ /// does not surface those — call [`Self::data`] first to test safely).
+ pub fn stride(&self, plane: usize) -> usize {
+ let n = self.planes();
+ assert!(
+ plane < n,
+ "stride: plane {plane} out of bounds (planes={n})"
+ );
+ // SAFETY: bounds-checked above; linesize is `[c_int; 8]`.
+ let linesize: i32 = unsafe { (*self.inner.as_ptr()).linesize[plane] };
+ assert!(
+ linesize > 0,
+ "stride: non-positive linesize {linesize} for plane {plane} \
+ (negative linesize means vertically-flipped — not supported)"
+ );
+ linesize as usize
+ }
+
+ /// Visible byte width of `plane` — the number of initialized bytes at
+ /// the start of every row in that plane.
+ ///
+ /// Distinct from [`Self::stride`], which returns the FFmpeg `linesize`.
+ /// `linesize` is `>= row_bytes` and may include trailing alignment
+ /// padding bytes that FFmpeg's hardware transfer paths do not
+ /// initialize. `row_bytes` is what `slice::from_raw_parts` can safely
+ /// see.
+ ///
+ /// Returns `None` when the format is not in the supported HW-output set
+ /// (see crate `pix_fmt`) or the plane is out of range.
+ pub fn row_bytes(&self, plane: usize) -> Option {
+ if plane >= self.planes() {
+ return None;
+ }
+ plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize)
+ }
+
+ /// Pixel data for one row of `plane`, tightly clipped to the visible
+ /// byte width ([`Self::row_bytes`]).
+ ///
+ /// Excludes the trailing alignment padding that [`Self::stride`]
+ /// includes — those bytes are not guaranteed to be initialized by
+ /// FFmpeg's hardware transfer paths and must not be exposed through a
+ /// safe `&[u8]`.
+ ///
+ /// Returns `None` for any of the following — never panics:
+ /// - The frame's pixel format is not one of the supported hardware-
+ /// output formats listed in [`crate::pix_fmt`].
+ /// - The plane index is out of range.
+ /// - `y` is past the plane's row count.
+ /// - `AVFrame.linesize[plane]` is `<= 0` or `AVFrame.height` is `<= 0`.
+ /// - The plane's data pointer is null.
+ /// - The plane size would overflow `isize::MAX`.
+ pub fn row(&self, plane: usize, y: usize) -> Option<&[u8]> {
+ let info = self.plane_info(plane)?;
+ if y >= info.plane_h {
+ return None;
+ }
+ // y < plane_h and plane_h * stride ≤ isize::MAX (verified in plane_info),
+ // so y * stride is bounded by (plane_h - 1) * stride ≤ isize::MAX.
+ let offset = y * info.stride;
+ // SAFETY:
+ // - `info.plane_ptr` is non-null (verified in plane_info).
+ // - `offset + row_bytes ≤ plane_h * stride`, which is the size of the
+ // FFmpeg allocation for this plane.
+ // - Bytes 0..row_bytes of every row are written by FFmpeg's HW
+ // transfer; the slice is fully initialized.
+ // - `row_bytes ≤ stride ≤ isize::MAX` per plane_info.
+ unsafe {
+ let row_ptr = info.plane_ptr.add(offset);
+ Some(slice::from_raw_parts(row_ptr, info.row_bytes))
+ }
+ }
+
+ /// Iterator over every row of `plane`. Each yielded slice has length
+ /// [`Self::row_bytes`]`(plane)` — never includes the trailing alignment
+ /// padding that lives within [`Self::stride`].
+ ///
+ /// Returns `None` under the same conditions as [`Self::row`].
+ pub fn rows(&self, plane: usize) -> Option + '_> {
+ let info = self.plane_info(plane)?;
+ Some((0..info.plane_h).map(move |y| {
+ // Same bounds argument as `row()`.
+ let offset = y * info.stride;
+ // SAFETY: see `row()` — the same invariants hold here, and the
+ // iterator's lifetime is tied to `&self` so the pointer remains
+ // valid for every yielded slice.
+ unsafe { slice::from_raw_parts(info.plane_ptr.add(offset), info.row_bytes) }
+ }))
+ }
+
+ /// Raw base pointer to `plane`'s allocation, or `None` if the plane
+ /// fails the same layout validation [`Self::row`] applies.
+ ///
+ /// Returns `None` whenever any of the following is true:
+ /// - The plane index is out of range (`plane >= planes()`).
+ /// - The frame's pixel format is not in the supported HW-output set.
+ /// - `linesize[plane] <= 0`. **In particular, FFmpeg permits negative
+ /// linesizes for vertically-flipped frames with `data[n]` pointing
+ /// at the *end* of the image. Returning that pointer with the
+ /// advertised "valid for `stride * plane_h` bytes forward" contract
+ /// would let a downstream converter walk past the buffer.** This
+ /// accessor refuses the layout instead of handing back a pointer the
+ /// caller cannot safely interpret as forward-addressable.
+ /// - `height <= 0`, the data pointer is null, `row_bytes > stride`, or
+ /// the total plane size would overflow `isize::MAX`.
+ ///
+ /// On `Some(ptr)` the pointer is valid for
+ /// `stride(plane) * plane_height` *forward-addressable* bytes, and
+ /// only the first [`Self::row_bytes`]`(plane)` bytes of each row are
+ /// guaranteed to be initialized. The trailing per-row alignment padding
+ /// is uninitialized; callers performing wide SIMD loads that read past
+ /// `row_bytes` must mask the result and never surface those bytes
+ /// through a safe `&[u8]`.
+ ///
+ /// This accessor exists for downstream pixel-format converters
+ /// (`colconv`) that work in `(ptr, stride, width, height)` quadruples;
+ /// safe code should prefer [`Self::row`] / [`Self::rows`].
+ pub fn as_ptr(&self, plane: usize) -> Option<*const u8> {
+ // Share the full plane-layout validation so the unsafe escape hatch
+ // never escapes a layout that `row()` / `rows()` reject. Returning a
+ // pointer for a negative-stride frame (FFmpeg's vertical-flip
+ // convention, where `data[n]` points at the *end* of the image)
+ // would invite forward-walking out-of-bounds reads from a caller
+ // that trusts the documented "valid for stride × plane_h bytes"
+ // contract.
+ self.plane_info(plane).map(|info| info.plane_ptr)
+ }
+
+ /// Read every per-plane field needed by the row accessors with the
+ /// safety preconditions enforced once.
+ fn plane_info(&self, plane: usize) -> Option {
+ if plane >= self.planes() {
+ return None;
+ }
+ // SAFETY: bounds-checked plane index; linesize/height/data are raw
+ // c_int / pointer reads that cannot themselves be UB.
+ let (stride_int, height_int, plane_ptr) = unsafe {
+ let raw = self.inner.as_ptr();
+ ((*raw).linesize[plane], (*raw).height, (*raw).data[plane])
+ };
+ if stride_int <= 0 || height_int <= 0 || plane_ptr.is_null() {
+ return None;
+ }
+ let stride = stride_int as usize;
+ let plane_h = plane_height_for(self.pix_fmt(), plane, height_int as usize)?;
+ let row_bytes = plane_row_bytes_for(self.pix_fmt(), plane, self.width() as usize)?;
+ if row_bytes > stride {
+ return None;
+ }
+ // Bound the entire plane allocation to isize::MAX so any byte offset
+ // computed as `y * stride` (y < plane_h) stays representable, satisfying
+ // the safety contract of `pointer::add` and `slice::from_raw_parts`.
+ let plane_size = stride.checked_mul(plane_h)?;
+ if plane_size > isize::MAX as usize {
+ return None;
+ }
+ Some(PlaneInfo {
+ plane_ptr,
+ stride,
+ plane_h,
+ row_bytes,
+ })
+ }
+
+ /// Crate-internal: hand the wrapped frame to FFmpeg / our decoder code.
+ pub(crate) fn as_inner_mut(&mut self) -> &mut frame::Video {
+ &mut self.inner
+ }
+}
+
+#[derive(Clone, Copy)]
+struct PlaneInfo {
+ plane_ptr: *const u8,
+ stride: usize,
+ plane_h: usize,
+ row_bytes: usize,
+}
+
+// `Default` intentionally omitted: constructing a frame can fail (OOM
+// in `av_frame_alloc`), and a panicking `default()` would defeat the
+// safety stance of [`Frame::empty`]. Use `Frame::empty()?` directly.
+
+/// Visible byte width of `plane`'s rows for a frame of `frame_width` and
+/// the given pixel format. `None` for formats not in the supported HW-
+/// output set.
+///
+/// Distinct from `linesize` (FFmpeg's per-row stride, which may include
+/// alignment padding). HW transfer paths only initialize bytes
+/// `0..plane_row_bytes_for(...)` of each row; everything from there to
+/// `stride` is uninitialized padding and must not be exposed via
+/// `slice::from_raw_parts`.
+fn plane_row_bytes_for(pix_fmt_int: i32, plane: usize, frame_width: usize) -> Option {
+ match pix_fmt_int {
+ // 8-bit semi-planar 4:2:0 / 4:2:2: Y at full width (1 byte/sample);
+ // UV interleaved at horizontally-subsampled chroma with `ceil(W/2)`
+ // U+V pairs at 2 bytes per pair. For even W the chroma row equals
+ // `W` bytes (the simple case); for odd W it must round *up* to the
+ // next even byte so the trailing chroma sample is not silently
+ // dropped on width = 2k+1 frames.
+ pix_fmt::NV12 | pix_fmt::NV21 | pix_fmt::NV16 => match plane {
+ 0 => Some(frame_width),
+ 1 => Some(frame_width.div_ceil(2).checked_mul(2)?),
+ _ => None,
+ },
+ // 8-bit 4:4:4 semi-planar: chroma at full horizontal resolution,
+ // 2 bytes per pixel (1 byte U + 1 byte V) — no rounding required.
+ pix_fmt::NV24 => match plane {
+ 0 => Some(frame_width),
+ 1 => Some(frame_width.checked_mul(2)?),
+ _ => None,
+ },
+ // 10/12/16-bit semi-planar 4:2:0 / 4:2:2: Y is 2 bytes/sample
+ // (high-bit-depth packed in 16-bit). UV interleaved at horizontally-
+ // subsampled chroma with `ceil(W/2)` U+V pairs at 4 bytes per pair
+ // (2 bytes U + 2 bytes V). Same odd-width rounding as the 8-bit
+ // chroma path, scaled by 2 bytes per sample.
+ pix_fmt::P010LE
+ | pix_fmt::P010BE
+ | pix_fmt::P012LE
+ | pix_fmt::P016LE
+ | pix_fmt::P210LE
+ | pix_fmt::P212LE
+ | pix_fmt::P216LE => match plane {
+ 0 => Some(frame_width.checked_mul(2)?),
+ 1 => Some(frame_width.div_ceil(2).checked_mul(4)?),
+ _ => None,
+ },
+ // 10/12/16-bit 4:4:4 semi-planar: Y is 2 bytes/sample; UV at full
+ // horizontal resolution with 4 bytes per pixel (2 bytes U + 2 bytes V).
+ pix_fmt::P410LE | pix_fmt::P412LE | pix_fmt::P416LE => match plane {
+ 0 => Some(frame_width.checked_mul(2)?),
+ 1 => Some(frame_width.checked_mul(4)?),
+ _ => None,
+ },
+ _ => None,
+ }
+}
+
+/// Number of rows in `plane` for a frame of `frame_height` and the given
+/// pixel format. `None` for formats not in the supported HW-output set.
+///
+/// Crate-internal so the decoder's probe-replay accountant can compute
+/// per-frame byte sizes without re-implementing the chroma-subsampling
+/// table.
+pub(crate) fn plane_height_for(
+ pix_fmt_int: i32,
+ plane: usize,
+ frame_height: usize,
+) -> Option {
+ match pix_fmt_int {
+ // 4:2:0 semi-planar — Y full height, chroma half height.
+ pix_fmt::NV12
+ | pix_fmt::NV21
+ | pix_fmt::P010LE
+ | pix_fmt::P010BE
+ | pix_fmt::P012LE
+ | pix_fmt::P016LE => match plane {
+ 0 => Some(frame_height),
+ 1 => Some(frame_height.div_ceil(2)),
+ _ => None,
+ },
+ // 4:2:2 / 4:4:4 semi-planar — both planes full height.
+ pix_fmt::NV16
+ | pix_fmt::NV24
+ | pix_fmt::P210LE
+ | pix_fmt::P212LE
+ | pix_fmt::P216LE
+ | pix_fmt::P410LE
+ | pix_fmt::P412LE
+ | pix_fmt::P416LE => match plane {
+ 0 | 1 => Some(frame_height),
+ _ => None,
+ },
+ _ => None,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn empty_frame_has_zero_dimensions_and_no_pts() {
+ let f = Frame::empty().expect("alloc");
+ assert_eq!(f.width(), 0);
+ assert_eq!(f.height(), 0);
+ assert_eq!(f.pts(), None);
+ // AVFrame.format defaults to -1 (AV_PIX_FMT_NONE) for an empty frame.
+ assert_eq!(f.pix_fmt(), -1);
+ // No active planes for an empty frame (all linesize entries are 0).
+ assert_eq!(f.planes(), 0);
+ }
+
+ #[test]
+ fn row_returns_none_for_unknown_format() {
+ let f = Frame::empty().expect("alloc");
+ // pix_fmt is NONE (-1), not in the supported set.
+ assert!(f.row(0, 0).is_none());
+ assert!(f.rows(0).is_none());
+ assert!(f.row_bytes(0).is_none());
+ }
+
+ /// Synthesize a frame with a negative linesize (FFmpeg's vertical-flip
+ /// convention) and assert the row accessors refuse to construct a slice.
+ /// Without the linesize > 0 check, the negative `i32 as usize` would
+ /// produce a huge positive length and `from_raw_parts` would be UB.
+ ///
+ /// `as_ptr` shares the same validation — handing back the data pointer
+ /// for a negative-stride frame would let a downstream converter
+ /// following the "valid for stride × plane_h bytes forward" contract
+ /// walk past the buffer.
+ #[test]
+ fn row_returns_none_for_negative_linesize() {
+ let mut f = Frame::empty().expect("alloc");
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).format = pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = 1080;
+ (*raw).linesize[0] = -1920; // vertically-flipped
+ (*raw).linesize[1] = -1920;
+ // data pointers stay null; the accessors would also reject on null,
+ // but should bail earlier on the linesize sign.
+ }
+ assert!(f.row(0, 0).is_none());
+ assert!(f.row(1, 0).is_none());
+ assert!(f.rows(0).is_none());
+ assert!(
+ f.as_ptr(0).is_none(),
+ "as_ptr must share row()/rows() validation — a negative-stride \
+ frame must not leak a forward-readable plane pointer"
+ );
+ assert!(f.as_ptr(1).is_none());
+ }
+
+ #[test]
+ fn row_returns_none_for_non_positive_height() {
+ let mut f = Frame::empty().expect("alloc");
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).format = pix_fmt::NV12;
+ (*raw).width = 1920;
+ (*raw).height = 0;
+ (*raw).linesize[0] = 1920;
+ (*raw).linesize[1] = 1920;
+ }
+ assert!(f.row(0, 0).is_none());
+ }
+
+ /// Synthesize a frame backed by a manually-allocated buffer with stride
+ /// strictly larger than visible row bytes (the exact case where
+ /// FFmpeg's HW transfer leaves trailing padding uninitialized) and
+ /// confirm the safe row accessor returns slices clipped to the visible
+ /// width.
+ #[test]
+ fn row_clips_to_visible_width_not_stride() {
+ use std::alloc::{alloc, dealloc, Layout};
+ let width = 64usize;
+ let height = 4usize;
+ // Stride > width: 16 bytes of padding per row in the Y plane.
+ let stride = 80usize;
+ let plane_size = stride * height;
+ // Allocate ourselves so we can fully control initialization. Fill
+ // bytes 0..width with 0xAA per row (the "valid pixel" range) and
+ // bytes width..stride with 0xFF (the simulated alignment padding —
+ // FFmpeg would leave these uninitialized; we set them to a sentinel
+ // that the test can detect if the safe slice ever exposes them).
+ let layout = Layout::from_size_align(plane_size, 32).unwrap();
+ let buf = unsafe { alloc(layout) };
+ assert!(!buf.is_null());
+ for y in 0..height {
+ let row = unsafe { buf.add(y * stride) };
+ for x in 0..width {
+ unsafe { *row.add(x) = 0xAA };
+ }
+ for x in width..stride {
+ unsafe { *row.add(x) = 0xFF };
+ }
+ }
+
+ let mut f = Frame::empty().expect("alloc");
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).format = pix_fmt::NV12;
+ (*raw).width = width as i32;
+ (*raw).height = height as i32;
+ (*raw).linesize[0] = stride as i32;
+ // linesize[1] = 0 keeps planes() at 1 so the test stays focused on
+ // plane 0 without owning a second allocation.
+ (*raw).data[0] = buf;
+ }
+
+ assert_eq!(f.row_bytes(0), Some(width));
+ assert_eq!(f.stride(0), stride);
+ let row0 = f.row(0, 0).expect("row 0");
+ assert_eq!(
+ row0.len(),
+ width,
+ "safe row must be clipped to visible width"
+ );
+ assert!(
+ row0.iter().all(|&b| b == 0xAA),
+ "row must not include padding sentinel 0xFF"
+ );
+
+ let collected: Vec<&[u8]> = f.rows(0).expect("rows iterator").collect();
+ assert_eq!(collected.len(), height);
+ for r in &collected {
+ assert_eq!(r.len(), width);
+ assert!(r.iter().all(|&b| b == 0xAA));
+ }
+
+ // `as_ptr` accepts the valid layout and returns the same base pointer
+ // FFmpeg wrote into `data[0]`, so SIMD callers can reach the plane
+ // through the documented unsafe contract.
+ assert_eq!(
+ f.as_ptr(0),
+ Some(buf as *const u8),
+ "as_ptr must surface the plane base for a valid forward-stride frame"
+ );
+
+ // Out-of-range row index returns None instead of panicking.
+ assert!(f.row(0, height).is_none());
+
+ // Detach the buffer before drop so AVFrame's own free path doesn't
+ // touch our manual allocation.
+ unsafe {
+ (*f.inner.as_mut_ptr()).data[0] = std::ptr::null_mut();
+ dealloc(buf, layout);
+ }
+ }
+
+ #[test]
+ #[should_panic(expected = "non-positive linesize")]
+ fn stride_panics_on_negative_linesize() {
+ let mut f = Frame::empty().expect("alloc");
+ unsafe {
+ let raw = f.inner.as_mut_ptr();
+ (*raw).linesize[0] = -1920;
+ }
+ let _ = f.stride(0);
+ }
+
+ #[test]
+ fn frame_is_send() {
+ fn check() {}
+ check::();
+ }
+
+ #[test]
+ fn plane_height_table_covers_supported_formats() {
+ // Spot-check the chroma subsampling table.
+ assert_eq!(plane_height_for(pix_fmt::NV12, 0, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1080), Some(540));
+ assert_eq!(plane_height_for(pix_fmt::NV12, 1, 1081), Some(541));
+ assert_eq!(plane_height_for(pix_fmt::P010LE, 1, 1080), Some(540));
+ assert_eq!(plane_height_for(pix_fmt::NV16, 1, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::NV24, 1, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::P416LE, 1, 1080), Some(1080));
+ assert_eq!(plane_height_for(pix_fmt::NONE, 0, 1080), None);
+ assert_eq!(plane_height_for(pix_fmt::NV12, 2, 1080), None);
+ }
+
+ /// 4:2:0 / 4:2:2 chroma planes carry `ceil(W/2)` U+V pairs per row.
+ /// For odd `W`, dropping the round-up silently truncates the last chroma
+ /// sample — and the safe row slice would expose a buffer one byte (8-bit)
+ /// or two bytes (high-bit-depth) shorter than the data FFmpeg actually
+ /// wrote. Y planes and 4:4:4 chroma planes are unaffected because their
+ /// row count is just `W` or a fixed multiple of `W`.
+ #[test]
+ fn plane_row_bytes_rounds_up_chroma_for_odd_widths() {
+ // 8-bit subsampled chroma — odd W gains one byte (the missing sample
+ // pair).
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1921), Some(1922));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1921), Some(1922));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1921), Some(1922));
+ // High-bit-depth subsampled chroma — odd W gains two bytes.
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010BE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P012LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P016LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P212LE, 1, 1921), Some(3844));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P216LE, 1, 1921), Some(3844));
+ // Y planes always at full width regardless of subsampling.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1921), Some(1921));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1921), Some(3842));
+ // 4:4:4 chroma is at full horizontal resolution — no rounding.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1921), Some(3842));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1921), Some(7684));
+ // Even widths must still match the original (pre-fix) values so the
+ // change is purely additive on the dominant code path.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840));
+ }
+
+ #[test]
+ fn plane_row_bytes_table_covers_supported_formats() {
+ // 8-bit 4:2:0 / 4:2:2 — both planes at width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 0, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 1, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV21, 1, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV16, 1, 1920), Some(1920));
+ // 8-bit 4:4:4 — chroma plane is 2 * width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 0, 1920), Some(1920));
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV24, 1, 1920), Some(3840));
+ // 10/12/16-bit 4:2:0 / 4:2:2 — both planes at 2 * width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 0, 1920), Some(3840));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P010LE, 1, 1920), Some(3840));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P210LE, 1, 1920), Some(3840));
+ // 10/12/16-bit 4:4:4 — Y is 2 * width, chroma is 4 * width.
+ assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 0, 1920), Some(3840));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P410LE, 1, 1920), Some(7680));
+ assert_eq!(plane_row_bytes_for(pix_fmt::P416LE, 1, 1920), Some(7680));
+ // Unsupported / out-of-range.
+ assert_eq!(plane_row_bytes_for(pix_fmt::NONE, 0, 1920), None);
+ assert_eq!(plane_row_bytes_for(pix_fmt::NV12, 2, 1920), None);
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 0a58390..3654016 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,11 +1,35 @@
-//! A template for creating Rust open-source repo on GitHub
-#![cfg_attr(not(feature = "std"), no_std)]
+//! Cross-platform **hardware** video decoder built on top of `ffmpeg-next`.
+//!
+//! [`VideoDecoder`] mirrors the surface of `ffmpeg::decoder::Video`
+//! (`send_packet`/`receive_frame`/`send_eof`/`flush`) and auto-probes the
+//! host's hardware backends (VideoToolbox / VAAPI / NVDEC / D3D11VA).
+//! There is **no software fallback inside this crate** — if no hardware
+//! backend can decode the stream, [`Error::AllBackendsFailed`] surfaces
+//! either from [`VideoDecoder::open`] (when no backend even opens) or
+//! from [`VideoDecoder::receive_frame`] / [`VideoDecoder::send_packet`] /
+//! [`VideoDecoder::send_eof`] (when the initially-opened backend or any
+//! later candidate fails at decode time and the probe order is
+//! exhausted). On single-backend platforms (e.g. macOS, where the order
+//! is `[VideoToolbox]`), only the runtime path can return it. The
+//! caller picks how to fall back to a software decoder of their choice
+//! (e.g. by opening an `ffmpeg::decoder::Video` directly).
+//!
+//! Output frames returned by [`VideoDecoder::receive_frame`] are CPU-side
+//! and downloaded via `av_hwframe_transfer_data` (NV12 for 8-bit input,
+//! P010 for 10-bit). Pixel-format conversion is intentionally out of
+//! scope; downstream code handles that (e.g. via `colconv`).
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(docsrs, allow(unused_attributes))]
#![deny(missing_docs)]
-#[cfg(all(not(feature = "std"), feature = "alloc"))]
-extern crate alloc as std;
+mod backend;
+mod decoder;
+mod error;
+mod ffi;
+mod frame;
+pub mod pix_fmt;
-#[cfg(feature = "std")]
-extern crate std;
+pub use backend::Backend;
+pub use decoder::VideoDecoder;
+pub use error::{Error, Result};
+pub use frame::Frame;
diff --git a/src/pix_fmt.rs b/src/pix_fmt.rs
new file mode 100644
index 0000000..f3c594e
--- /dev/null
+++ b/src/pix_fmt.rs
@@ -0,0 +1,113 @@
+//! Stable `i32` constants for the pixel formats produced by `hwdecode`'s
+//! hardware decoders after `av_hwframe_transfer_data`.
+//!
+//! `Frame::pix_fmt()` returns the raw integer FFmpeg wrote to `AVFrame.format`
+//! (as a plain `i32` to avoid the enum-construction UB that an unvalidated
+//! cast would invoke). This module names the constants relevant to dispatch
+//! after a successful hardware decode.
+//!
+//! Because `hwdecode` is hardware-only, the formats listed here cover what
+//! the supported HW backends actually produce — the **NV** family (semi-
+//! planar 8-bit) and the **P0xx / P2xx / P4xx** family (semi-planar 10/12/16
+//! bit). VideoToolbox, VAAPI, NVDEC, and D3D11VA all download into one of
+//! these.
+//!
+//! Software-decoder output formats (`YUV420P`, `YUV422P`, `RGB24`, etc.) are
+//! intentionally **not** listed: callers handle software fallback outside
+//! this crate, and dispatch tables for those formats belong with the SW
+//! pipeline.
+//!
+//! For values not listed here, write `AVPixelFormat::AV_PIX_FMT_X as i32`
+//! directly — that's exactly the cast we use to define these constants.
+//!
+//! ```ignore
+//! use hwdecode::{pix_fmt, Frame};
+//! match frame.pix_fmt() {
+//! pix_fmt::NV12 => /* 8-bit 4:2:0 → colconv::frame::Nv12Frame */,
+//! pix_fmt::P010LE => /* 10-bit 4:2:0 → colconv::frame::PnFrame<10> */,
+//! other => unimplemented!("pix_fmt {other}"),
+//! }
+//! ```
+
+use ffmpeg_next::ffi::AVPixelFormat;
+
+// --- semi-planar YUV (NV*) — 8-bit hardware download outputs ----------------
+
+/// 4:2:0, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV12`). The
+/// dominant 8-bit HW download format on every supported backend.
+pub const NV12: i32 = AVPixelFormat::AV_PIX_FMT_NV12 as i32;
+/// 4:2:0, 8-bit, Y plane + interleaved Cr/Cb (`AV_PIX_FMT_NV21`).
+pub const NV21: i32 = AVPixelFormat::AV_PIX_FMT_NV21 as i32;
+/// 4:2:2, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV16`).
+pub const NV16: i32 = AVPixelFormat::AV_PIX_FMT_NV16 as i32;
+/// 4:4:4, 8-bit, Y plane + interleaved Cb/Cr (`AV_PIX_FMT_NV24`).
+pub const NV24: i32 = AVPixelFormat::AV_PIX_FMT_NV24 as i32;
+
+// --- semi-planar YUV (P0xx) — 4:2:0 high-bit-depth HW downloads -------------
+
+/// 4:2:0, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P010LE`). The
+/// dominant 10-bit HW download format.
+pub const P010LE: i32 = AVPixelFormat::AV_PIX_FMT_P010LE as i32;
+/// 4:2:0, 10-bit, semi-planar big-endian (`AV_PIX_FMT_P010BE`).
+pub const P010BE: i32 = AVPixelFormat::AV_PIX_FMT_P010BE as i32;
+/// 4:2:0, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P012LE`).
+pub const P012LE: i32 = AVPixelFormat::AV_PIX_FMT_P012LE as i32;
+/// 4:2:0, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P016LE`).
+pub const P016LE: i32 = AVPixelFormat::AV_PIX_FMT_P016LE as i32;
+
+// --- semi-planar YUV (P2xx) — 4:2:2 high-bit-depth HW downloads -------------
+
+/// 4:2:2, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P210LE`).
+pub const P210LE: i32 = AVPixelFormat::AV_PIX_FMT_P210LE as i32;
+/// 4:2:2, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P212LE`, FFmpeg 5.0+).
+pub const P212LE: i32 = AVPixelFormat::AV_PIX_FMT_P212LE as i32;
+/// 4:2:2, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P216LE`).
+pub const P216LE: i32 = AVPixelFormat::AV_PIX_FMT_P216LE as i32;
+
+// --- semi-planar YUV (P4xx) — 4:4:4 high-bit-depth HW downloads -------------
+
+/// 4:4:4, 10-bit, semi-planar little-endian (`AV_PIX_FMT_P410LE`).
+pub const P410LE: i32 = AVPixelFormat::AV_PIX_FMT_P410LE as i32;
+/// 4:4:4, 12-bit, semi-planar little-endian (`AV_PIX_FMT_P412LE`, FFmpeg 5.0+).
+pub const P412LE: i32 = AVPixelFormat::AV_PIX_FMT_P412LE as i32;
+/// 4:4:4, 16-bit, semi-planar little-endian (`AV_PIX_FMT_P416LE`).
+pub const P416LE: i32 = AVPixelFormat::AV_PIX_FMT_P416LE as i32;
+
+// --- sentinel ---------------------------------------------------------------
+
+/// Sentinel value FFmpeg writes to `AVFrame.format` for an unset frame
+/// (`AV_PIX_FMT_NONE`). [`crate::Frame::empty`] returns this until the frame
+/// is filled by a decoder.
+pub const NONE: i32 = AVPixelFormat::AV_PIX_FMT_NONE as i32;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Regression check: if the underlying `AVPixelFormat` discriminants ever
+ /// change in `ffmpeg-sys-next`'s bindings, this catches it.
+ #[test]
+ fn constants_match_bindings() {
+ assert_eq!(NV12, AVPixelFormat::AV_PIX_FMT_NV12 as i32);
+ assert_eq!(P010LE, AVPixelFormat::AV_PIX_FMT_P010LE as i32);
+ assert_eq!(P416LE, AVPixelFormat::AV_PIX_FMT_P416LE as i32);
+ assert_eq!(NONE, -1, "AV_PIX_FMT_NONE must be -1 (FFmpeg ABI sentinel)");
+ }
+
+ #[test]
+ fn match_dispatch_compiles() {
+ fn classify(v: i32) -> &'static str {
+ match v {
+ NV12 => "nv12",
+ NV21 => "nv21",
+ P010LE => "p010le",
+ P210LE => "p210le",
+ P410LE => "p410le",
+ _ => "other",
+ }
+ }
+ assert_eq!(classify(NV12), "nv12");
+ assert_eq!(classify(P010LE), "p010le");
+ assert_eq!(classify(NONE), "other");
+ }
+}
diff --git a/tests/decode.rs b/tests/decode.rs
new file mode 100644
index 0000000..2431ff1
--- /dev/null
+++ b/tests/decode.rs
@@ -0,0 +1,78 @@
+//! Integration test: open the auto-probed decoder against a real video file
+//! and decode the first 30 frames. Skipped (with a clear message) when no
+//! sample is configured.
+//!
+//! Set `HWDECODE_SAMPLE_VIDEO` to an absolute path to enable.
+
+use ffmpeg::{format, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::{Frame, VideoDecoder};
+
+const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
+
+#[test]
+fn auto_open_decodes_at_least_one_frame() {
+ let Some(path) = std::env::var_os(SAMPLE_ENV) else {
+ eprintln!("skipping: set {SAMPLE_ENV} to a video file path to run this test");
+ return;
+ };
+
+ ffmpeg::init().expect("ffmpeg init");
+
+ let mut input = format::input(&path).expect("open input");
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream");
+ let stream_index = stream.index();
+ let expected_w = unsafe { (*stream.parameters().as_ptr()).width as u32 };
+ let expected_h = unsafe { (*stream.parameters().as_ptr()).height as u32 };
+
+ let mut decoder = match VideoDecoder::open(stream.parameters()) {
+ Ok(d) => d,
+ Err(hwdecode::Error::AllBackendsFailed { attempts }) => {
+ eprintln!(
+ "skipping: no hardware backend available ({} attempts)",
+ attempts.len()
+ );
+ return;
+ }
+ Err(e) => panic!("open decoder: {e}"),
+ };
+ eprintln!("optimistic backend = {:?}", decoder.backend());
+
+ assert_eq!(decoder.width(), expected_w);
+ assert_eq!(decoder.height(), expected_h);
+
+ let mut frame = Frame::empty().expect("alloc frame");
+ let mut count = 0_usize;
+ let target = 30_usize;
+
+ 'outer: for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet).expect("send packet");
+ loop {
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => {
+ assert_eq!(frame.width(), expected_w);
+ assert_eq!(frame.height(), expected_h);
+ count += 1;
+ if count >= target {
+ break 'outer;
+ }
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ break;
+ }
+ Err(e) => panic!("receive_frame: {e}"),
+ }
+ }
+ }
+
+ assert!(count >= 1, "expected at least 1 decoded frame, got {count}");
+ eprintln!("decoded {count} frames via backend {:?}", decoder.backend());
+}
diff --git a/tests/foo.rs b/tests/foo.rs
deleted file mode 100644
index 8b13789..0000000
--- a/tests/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/hw_smoke.rs b/tests/hw_smoke.rs
new file mode 100644
index 0000000..50d67ca
--- /dev/null
+++ b/tests/hw_smoke.rs
@@ -0,0 +1,80 @@
+//! `#[ignore]`-gated smoke test that exercises end-to-end hardware decode
+//! against a real video file: opens the auto-probed decoder, drives it
+//! until the first frame is delivered, and asserts the active backend is
+//! one of the supported HW variants. Run with:
+//!
+//! ```sh
+//! HWDECODE_SAMPLE_VIDEO=/path/to/clip.mp4 cargo test --test hw_smoke -- --ignored
+//! ```
+
+use ffmpeg::{format, media};
+use ffmpeg_next as ffmpeg;
+use hwdecode::{Backend, Frame, VideoDecoder};
+
+const SAMPLE_ENV: &str = "HWDECODE_SAMPLE_VIDEO";
+
+#[test]
+#[ignore = "requires HWDECODE_SAMPLE_VIDEO and a working hardware backend"]
+fn auto_probe_picks_hardware_backend() {
+ let path = std::env::var_os(SAMPLE_ENV).unwrap_or_else(|| panic!("{SAMPLE_ENV} not set"));
+
+ ffmpeg::init().expect("ffmpeg init");
+
+ let mut input = format::input(&path).expect("open input");
+ let stream = input
+ .streams()
+ .best(media::Type::Video)
+ .expect("video stream");
+ let stream_index = stream.index();
+
+ let mut decoder = VideoDecoder::open(stream.parameters()).expect("open decoder");
+ eprintln!("auto-probe optimistic backend = {:?}", decoder.backend());
+
+ // Decode at least one frame so the probe collapses, then check the
+ // backend that actually produced it. Checking `decoder.backend()` before
+ // any frame has been received would observe the optimistic pre-probe
+ // value and could false-pass when a HW backend silently degrades.
+ let mut frame = Frame::empty().expect("alloc frame");
+ let mut got_frame = false;
+ for (s, packet) in input.packets() {
+ if s.index() != stream_index {
+ continue;
+ }
+ decoder.send_packet(&packet).expect("send packet");
+ match decoder.receive_frame(&mut frame) {
+ Ok(()) => {
+ got_frame = true;
+ eprintln!(
+ "first frame: backend={:?} {}x{} pix_fmt={}",
+ decoder.backend(),
+ frame.width(),
+ frame.height(),
+ frame.pix_fmt()
+ );
+ break;
+ }
+ Err(hwdecode::Error::Ffmpeg(ffmpeg::Error::Other { errno }))
+ if errno == ffmpeg::error::EAGAIN =>
+ {
+ continue;
+ }
+ Err(e) => panic!("receive_frame: {e}"),
+ }
+ }
+ assert!(got_frame, "no frames decoded");
+ // After the probe collapses, `backend()` reports the backend that
+ // actually produced the first frame. Make the doc-comment claim
+ // explicit: it must be one of the HW variants. Today the enum is
+ // exhaustively HW-only, so `matches!` here is tautological — but it
+ // documents intent and would catch a future regression that
+ // reintroduces a non-HW variant or leaves the active state
+ // mis-classified.
+ let backend = decoder.backend();
+ assert!(
+ matches!(
+ backend,
+ Backend::VideoToolbox | Backend::Vaapi | Backend::Cuda | Backend::D3d11va
+ ),
+ "expected HW backend, got {backend:?}"
+ );
+}