diff --git a/src/sinker/mixed.rs b/src/sinker/mixed.rs index 53c6e2c..146db35 100644 --- a/src/sinker/mixed.rs +++ b/src/sinker/mixed.rs @@ -1110,12 +1110,12 @@ impl<'a> MixedSinker<'a, Yuv420p> { /// /// ```compile_fail /// // Attaching RGBA to a sink that doesn't write it is rejected - /// // at compile time. Nv16 (4:2:2 semi‑planar) has not yet been + /// // at compile time. Yuv444p (4:4:4 planar) has not yet been /// // wired for RGBA; once that lands the negative example here /// // moves to the next not‑yet‑wired format. - /// use colconv::{sinker::MixedSinker, yuv::Nv16}; + /// use colconv::{sinker::MixedSinker, yuv::Yuv444p}; /// let mut buf = vec![0u8; 16 * 8 * 4]; - /// let _ = MixedSinker::::new(16, 8).with_rgba(&mut buf); + /// let _ = MixedSinker::::new(16, 8).with_rgba(&mut buf); /// ``` #[cfg_attr(not(tarpaulin), inline(always))] pub fn with_rgba(mut self, buf: &'a mut [u8]) -> Result { @@ -1333,7 +1333,40 @@ impl PixelSink for MixedSinker<'_, Yuv420p> { // 4:2:2 is 4:2:0's vertical-axis twin: same per-row chroma shape // (half-width U / V, one pair per Y pair), just one chroma row per Y // row instead of one per two. This impl reuses `yuv_420_to_rgb_row` -// — no new kernels needed. +// (and `yuv_420_to_rgba_row` for the RGBA path) — no new kernels +// needed. + +impl<'a> MixedSinker<'a, Yuv422p> { + /// Attaches a packed 32‑bit RGBA output buffer. + /// + /// Only available on sinker types whose `PixelSink` impl writes + /// RGBA — see [`MixedSinker::::with_rgba`] for the same + /// rationale and constraints. Yuv422p has no alpha plane, so every + /// alpha byte is filled with `0xFF` (opaque). + /// + /// Returns `Err(RgbaBufferTooShort)` if + /// `buf.len() < width × height × 4`, or `Err(GeometryOverflow)` on + /// 32‑bit targets when the product overflows. + #[cfg_attr(not(tarpaulin), inline(always))] + pub fn with_rgba(mut self, buf: &'a mut [u8]) -> Result { + self.set_rgba(buf)?; + Ok(self) + } + + /// In-place variant of [`with_rgba`](Self::with_rgba). + #[cfg_attr(not(tarpaulin), inline(always))] + pub fn set_rgba(&mut self, buf: &'a mut [u8]) -> Result<&mut Self, MixedSinkerError> { + let expected = self.frame_bytes(4)?; + if buf.len() < expected { + return Err(MixedSinkerError::RgbaBufferTooShort { + expected, + actual: buf.len(), + }); + } + self.rgba = Some(buf); + Ok(self) + } +} impl Yuv422pSink for MixedSinker<'_, Yuv422p> {} @@ -1390,6 +1423,7 @@ impl PixelSink for MixedSinker<'_, Yuv422p> { let Self { rgb, + rgba, luma, hsv, rgb_scratch, @@ -1403,6 +1437,32 @@ impl PixelSink for MixedSinker<'_, Yuv422p> { luma[one_plane_start..one_plane_end].copy_from_slice(&row.y()[..w]); } + // Native RGBA: independent kernel run, separate from RGB. Default + // alpha = 0xFF since Yuv422p has no alpha plane. Reuses the + // Yuv420p RGBA dispatcher — 4:2:2's per-row contract is + // identical (half-width chroma, one pair per Y pair). + if let Some(buf) = rgba.as_deref_mut() { + let rgba_plane_end = + one_plane_end + .checked_mul(4) + .ok_or(MixedSinkerError::GeometryOverflow { + width: w, + height: h, + channels: 4, + })?; + let rgba_plane_start = one_plane_start * 4; + yuv_420_to_rgba_row( + row.y(), + row.u_half(), + row.v_half(), + &mut buf[rgba_plane_start..rgba_plane_end], + w, + row.matrix(), + row.full_range(), + use_simd, + ); + } + let want_rgb = rgb.is_some(); let want_hsv = hsv.is_some(); if !want_rgb && !want_hsv { @@ -1786,7 +1846,40 @@ impl Yuv420pSink for MixedSinker<'_, Yuv420p> {} // // 4:2:2 is 4:2:0's vertical‑axis twin: one UV row per Y row instead of // one per two. Per‑row math is identical, so this impl calls the same -// `nv12_to_rgb_row` dispatcher — no new kernels needed. +// `nv12_to_rgb_row` / `nv12_to_rgba_row` dispatchers — no new kernels +// needed. + +impl<'a> MixedSinker<'a, Nv16> { + /// Attaches a packed 32‑bit RGBA output buffer. + /// + /// Only available on sinker types whose `PixelSink` impl writes + /// RGBA — see [`MixedSinker::::with_rgba`] for the same + /// rationale and constraints. NV16 has no alpha plane, so every + /// alpha byte is filled with `0xFF` (opaque). + /// + /// Returns `Err(RgbaBufferTooShort)` if + /// `buf.len() < width × height × 4`, or `Err(GeometryOverflow)` on + /// 32‑bit targets when the product overflows. + #[cfg_attr(not(tarpaulin), inline(always))] + pub fn with_rgba(mut self, buf: &'a mut [u8]) -> Result { + self.set_rgba(buf)?; + Ok(self) + } + + /// In-place variant of [`with_rgba`](Self::with_rgba). + #[cfg_attr(not(tarpaulin), inline(always))] + pub fn set_rgba(&mut self, buf: &'a mut [u8]) -> Result<&mut Self, MixedSinkerError> { + let expected = self.frame_bytes(4)?; + if buf.len() < expected { + return Err(MixedSinkerError::RgbaBufferTooShort { + expected, + actual: buf.len(), + }); + } + self.rgba = Some(buf); + Ok(self) + } +} impl Nv16Sink for MixedSinker<'_, Nv16> {} @@ -1837,6 +1930,7 @@ impl PixelSink for MixedSinker<'_, Nv16> { let Self { rgb, + rgba, luma, hsv, rgb_scratch, @@ -1850,6 +1944,30 @@ impl PixelSink for MixedSinker<'_, Nv16> { luma[one_plane_start..one_plane_end].copy_from_slice(&row.y()[..w]); } + // Native RGBA: independent kernel run, separate from RGB. Default + // alpha = 0xFF since NV16 has no alpha plane. Reuses the NV12 + // RGBA dispatcher — 4:2:2's row contract is identical. + if let Some(buf) = rgba.as_deref_mut() { + let rgba_plane_end = + one_plane_end + .checked_mul(4) + .ok_or(MixedSinkerError::GeometryOverflow { + width: w, + height: h, + channels: 4, + })?; + let rgba_plane_start = one_plane_start * 4; + nv12_to_rgba_row( + row.y(), + row.uv(), + &mut buf[rgba_plane_start..rgba_plane_end], + w, + row.matrix(), + row.full_range(), + use_simd, + ); + } + let want_rgb = rgb.is_some(); let want_hsv = hsv.is_some(); if !want_rgb && !want_hsv { @@ -8893,6 +9011,189 @@ mod tests { assert_eq!(rgb_nv16, rgb_nv12); } + // ---- NV16 RGBA (Ship 8 PR 3) tests -------------------------------------- + // + // NV16 reuses the NV12 `_to_rgba_row` dispatcher (4:2:2's row + // contract is identical to NV12's). Tests mirror the NV12 set; + // the cross-format invariant against NV12 (with duplicated + // chroma rows so 4:2:0 vertical upsample matches NV16's per-row + // chroma) catches any wiring regression specific to the NV16 + // walker that the kernel-level tests don't cover. + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn nv16_rgba_only_converts_gray_to_gray_with_opaque_alpha() { + let (yp, uvp) = solid_nv16_frame(16, 8, 128, 128, 128); + let src = Nv16Frame::new(&yp, &uvp, 16, 8, 16, 16); + + let mut rgba = std::vec![0u8; 16 * 8 * 4]; + let mut sink = MixedSinker::::new(16, 8) + .with_rgba(&mut rgba) + .unwrap(); + nv16_to(&src, true, ColorMatrix::Bt601, &mut sink).unwrap(); + + for px in rgba.chunks(4) { + assert!(px[0].abs_diff(128) <= 1, "R"); + assert_eq!(px[0], px[1], "RGB monochromatic"); + assert_eq!(px[1], px[2], "RGB monochromatic"); + assert_eq!(px[3], 0xFF, "alpha must default to opaque"); + } + } + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn nv16_with_rgb_and_with_rgba_produce_byte_identical_rgb_bytes() { + let w = 32usize; + let h = 16usize; + let (yp, uvp) = solid_nv16_frame(w as u32, h as u32, 180, 60, 200); + let src = Nv16Frame::new(&yp, &uvp, w as u32, h as u32, w as u32, w as u32); + + let mut rgb = std::vec![0u8; w * h * 3]; + let mut rgba = std::vec![0u8; w * h * 4]; + let mut sink = MixedSinker::::new(w, h) + .with_rgb(&mut rgb) + .unwrap() + .with_rgba(&mut rgba) + .unwrap(); + nv16_to(&src, true, ColorMatrix::Bt601, &mut sink).unwrap(); + + for i in 0..(w * h) { + assert_eq!(rgba[i * 4], rgb[i * 3], "R differs at pixel {i}"); + assert_eq!(rgba[i * 4 + 1], rgb[i * 3 + 1], "G differs at pixel {i}"); + assert_eq!(rgba[i * 4 + 2], rgb[i * 3 + 2], "B differs at pixel {i}"); + assert_eq!(rgba[i * 4 + 3], 0xFF, "A not opaque at pixel {i}"); + } + } + + #[test] + fn nv16_rgba_buffer_too_short_returns_err() { + let mut rgba_short = std::vec![0u8; 16 * 8 * 4 - 1]; + let result = MixedSinker::::new(16, 8).with_rgba(&mut rgba_short); + let Err(err) = result else { + panic!("expected RgbaBufferTooShort error"); + }; + assert!(matches!( + err, + MixedSinkerError::RgbaBufferTooShort { + expected: 512, + actual: 511, + } + )); + } + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn nv16_rgba_simd_matches_scalar_with_random_yuv() { + // NV16 reuses the NV12 RGBA kernel; this test pins the wiring + // regardless of which tier the dispatcher picks. Width 1922 + + // height 4 to exercise both main loop and tail per backend. + let w = 1922usize; + let h = 4usize; + let mut yp = std::vec![0u8; w * h]; + let mut uvp = std::vec![0u8; w * h]; // NV16 UV is full-height + pseudo_random_u8(&mut yp, 0xC001_C0DE); + pseudo_random_u8(&mut uvp, 0xCAFE_F00D); + let src = Nv16Frame::new(&yp, &uvp, w as u32, h as u32, w as u32, w as u32); + + for &matrix in &[ + ColorMatrix::Bt601, + ColorMatrix::Bt709, + ColorMatrix::Bt2020Ncl, + ColorMatrix::YCgCo, + ] { + for &full_range in &[true, false] { + let mut rgba_simd = std::vec![0u8; w * h * 4]; + let mut rgba_scalar = std::vec![0u8; w * h * 4]; + + let mut s_simd = MixedSinker::::new(w, h) + .with_rgba(&mut rgba_simd) + .unwrap(); + nv16_to(&src, full_range, matrix, &mut s_simd).unwrap(); + + let mut s_scalar = MixedSinker::::new(w, h) + .with_rgba(&mut rgba_scalar) + .unwrap(); + s_scalar.set_simd(false); + nv16_to(&src, full_range, matrix, &mut s_scalar).unwrap(); + + if rgba_simd != rgba_scalar { + let mismatch = rgba_simd + .iter() + .zip(rgba_scalar.iter()) + .position(|(a, b)| a != b) + .unwrap(); + let pixel = mismatch / 4; + let channel = ["R", "G", "B", "A"][mismatch % 4]; + panic!( + "NV16 RGBA SIMD ≠ scalar at byte {mismatch} (px {pixel} {channel}) for matrix={matrix:?} full_range={full_range}: simd={} scalar={}", + rgba_simd[mismatch], rgba_scalar[mismatch] + ); + } + } + } + } + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn nv16_rgba_matches_nv12_rgba_with_duplicated_chroma() { + // Cross-format invariant on the RGBA path. Mirrors the existing + // `nv16_matches_nv12_mixed_sinker_with_duplicated_chroma` for + // RGB: duplicating NV16 chroma rows pairwise so the 4:2:0 + // vertical upsample matches NV16's per-row chroma must yield + // byte-identical RGBA. Catches NV16-vs-NV12 wiring regressions + // specific to the new RGBA path. + let w = 32usize; + let h = 16usize; + let yp: Vec = (0..w * h).map(|i| ((i * 37 + 11) & 0xFF) as u8).collect(); + let uv_nv16: Vec = (0..w * h).map(|i| ((i * 53 + 23) & 0xFF) as u8).collect(); + let mut uv_nv12 = std::vec![0u8; w * h / 2]; + for c_row in 0..h / 2 { + let src_row = c_row * 2; + uv_nv12[c_row * w..(c_row + 1) * w].copy_from_slice(&uv_nv16[src_row * w..(src_row + 1) * w]); + } + let mut uv_nv16_aligned = uv_nv16.clone(); + for c_row in 0..h / 2 { + let even_row = c_row * 2; + let odd_row = even_row + 1; + let (even, odd) = uv_nv16_aligned.split_at_mut(odd_row * w); + odd[..w].copy_from_slice(&even[even_row * w..even_row * w + w]); + } + let nv16_src = Nv16Frame::new( + &yp, + &uv_nv16_aligned, + w as u32, + h as u32, + w as u32, + w as u32, + ); + let nv12_src = Nv12Frame::new(&yp, &uv_nv12, w as u32, h as u32, w as u32, w as u32); + + let mut rgba_nv16 = std::vec![0u8; w * h * 4]; + let mut rgba_nv12 = std::vec![0u8; w * h * 4]; + let mut s_nv16 = MixedSinker::::new(w, h) + .with_rgba(&mut rgba_nv16) + .unwrap(); + let mut s_nv12 = MixedSinker::::new(w, h) + .with_rgba(&mut rgba_nv12) + .unwrap(); + nv16_to(&nv16_src, false, ColorMatrix::Bt709, &mut s_nv16).unwrap(); + nv12_to(&nv12_src, false, ColorMatrix::Bt709, &mut s_nv12).unwrap(); + + assert_eq!(rgba_nv16, rgba_nv12); + } + #[test] fn nv16_odd_width_sink_returns_err_at_begin_frame() { let mut rgb = std::vec![0u8; 15 * 8 * 3]; @@ -11102,6 +11403,181 @@ mod tests { assert_eq!(rgb422, rgb420); } + // ---- Yuv422p RGBA (Ship 8 PR 3) tests ----------------------------------- + // + // Yuv422p reuses the Yuv420p `_to_rgba_row` dispatcher (same row + // contract). Tests mirror the Yuv420p RGBA set; the cross-format + // invariant against Yuv420p (with solid chroma so 4:2:0 vertical + // upsample matches Yuv422p's per-row chroma) catches walker + // regressions specific to the Yuv422p RGBA wiring. + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn yuv422p_rgba_only_converts_gray_to_gray_with_opaque_alpha() { + let (yp, up, vp) = solid_yuv422p_frame(16, 8, 128, 128, 128); + let src = Yuv422pFrame::new(&yp, &up, &vp, 16, 8, 16, 8, 8); + + let mut rgba = std::vec![0u8; 16 * 8 * 4]; + let mut sink = MixedSinker::::new(16, 8) + .with_rgba(&mut rgba) + .unwrap(); + yuv422p_to(&src, true, ColorMatrix::Bt601, &mut sink).unwrap(); + + for px in rgba.chunks(4) { + assert!(px[0].abs_diff(128) <= 1, "R"); + assert_eq!(px[0], px[1], "RGB monochromatic"); + assert_eq!(px[1], px[2], "RGB monochromatic"); + assert_eq!(px[3], 0xFF, "alpha must default to opaque"); + } + } + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn yuv422p_with_rgb_and_with_rgba_produce_byte_identical_rgb_bytes() { + let w = 32u32; + let h = 16u32; + let ws = w as usize; + let hs = h as usize; + let (yp, up, vp) = solid_yuv422p_frame(w, h, 180, 60, 200); + let src = Yuv422pFrame::new(&yp, &up, &vp, w, h, w, w / 2, w / 2); + + let mut rgb = std::vec![0u8; ws * hs * 3]; + let mut rgba = std::vec![0u8; ws * hs * 4]; + let mut sink = MixedSinker::::new(ws, hs) + .with_rgb(&mut rgb) + .unwrap() + .with_rgba(&mut rgba) + .unwrap(); + yuv422p_to(&src, true, ColorMatrix::Bt601, &mut sink).unwrap(); + + for i in 0..(ws * hs) { + assert_eq!(rgba[i * 4], rgb[i * 3], "R differs at pixel {i}"); + assert_eq!(rgba[i * 4 + 1], rgb[i * 3 + 1], "G differs at pixel {i}"); + assert_eq!(rgba[i * 4 + 2], rgb[i * 3 + 2], "B differs at pixel {i}"); + assert_eq!(rgba[i * 4 + 3], 0xFF, "A not opaque at pixel {i}"); + } + } + + #[test] + fn yuv422p_rgba_buffer_too_short_returns_err() { + let mut rgba_short = std::vec![0u8; 16 * 8 * 4 - 1]; + let result = MixedSinker::::new(16, 8).with_rgba(&mut rgba_short); + let Err(err) = result else { + panic!("expected RgbaBufferTooShort error"); + }; + assert!(matches!( + err, + MixedSinkerError::RgbaBufferTooShort { + expected: 512, + actual: 511, + } + )); + } + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn yuv422p_rgba_simd_matches_scalar_with_random_yuv() { + // Random per-pixel YUV across all matrices × both ranges. Width + // 1922 forces both the SIMD main loop AND a scalar tail across + // every backend block size (16/32/64). 4:2:2 chroma is full- + // height, so up/vp use `w/2 × h` instead of `w/2 × h/2`. + let w = 1922usize; + let h = 4usize; + let mut yp = std::vec![0u8; w * h]; + let mut up = std::vec![0u8; (w / 2) * h]; + let mut vp = std::vec![0u8; (w / 2) * h]; + pseudo_random_u8(&mut yp, 0xC001_C0DE); + pseudo_random_u8(&mut up, 0xCAFE_F00D); + pseudo_random_u8(&mut vp, 0xDEAD_BEEF); + let src = Yuv422pFrame::new( + &yp, + &up, + &vp, + w as u32, + h as u32, + w as u32, + (w / 2) as u32, + (w / 2) as u32, + ); + + for &matrix in &[ + ColorMatrix::Bt601, + ColorMatrix::Bt709, + ColorMatrix::Bt2020Ncl, + ColorMatrix::YCgCo, + ] { + for &full_range in &[true, false] { + let mut rgba_simd = std::vec![0u8; w * h * 4]; + let mut rgba_scalar = std::vec![0u8; w * h * 4]; + + let mut s_simd = MixedSinker::::new(w, h) + .with_rgba(&mut rgba_simd) + .unwrap(); + yuv422p_to(&src, full_range, matrix, &mut s_simd).unwrap(); + + let mut s_scalar = MixedSinker::::new(w, h) + .with_rgba(&mut rgba_scalar) + .unwrap(); + s_scalar.set_simd(false); + yuv422p_to(&src, full_range, matrix, &mut s_scalar).unwrap(); + + if rgba_simd != rgba_scalar { + let mismatch = rgba_simd + .iter() + .zip(rgba_scalar.iter()) + .position(|(a, b)| a != b) + .unwrap(); + let pixel = mismatch / 4; + let channel = ["R", "G", "B", "A"][mismatch % 4]; + panic!( + "Yuv422p RGBA SIMD ≠ scalar at byte {mismatch} (px {pixel} {channel}) for matrix={matrix:?} full_range={full_range}: simd={} scalar={}", + rgba_simd[mismatch], rgba_scalar[mismatch] + ); + } + } + } + } + + #[test] + #[cfg_attr( + miri, + ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri" + )] + fn yuv422p_rgba_matches_yuv420p_rgba_when_chroma_matches() { + // 4:2:2 and 4:2:0 differ only in vertical chroma walk. With + // solid chroma planes they must produce identical RGBA — same + // shape as the existing `yuv422p_matches_yuv420p_luma_when_chroma_matches` + // RGB-path test for the new RGBA path. + let w = 32u32; + let h = 8u32; + let (yp, up422, vp422) = solid_yuv422p_frame(w, h, 140, 100, 160); + let src422 = Yuv422pFrame::new(&yp, &up422, &vp422, w, h, w, w / 2, w / 2); + + let (yp420, up420, vp420) = solid_yuv420p_frame(w, h, 140, 100, 160); + let src420 = Yuv420pFrame::new(&yp420, &up420, &vp420, w, h, w, w / 2, w / 2); + + let mut rgba422 = std::vec![0u8; (w * h * 4) as usize]; + let mut rgba420 = std::vec![0u8; (w * h * 4) as usize]; + let mut s422 = MixedSinker::::new(w as usize, h as usize) + .with_rgba(&mut rgba422) + .unwrap(); + let mut s420 = MixedSinker::::new(w as usize, h as usize) + .with_rgba(&mut rgba420) + .unwrap(); + yuv422p_to(&src422, true, ColorMatrix::Bt709, &mut s422).unwrap(); + yuv420p_to(&src420, true, ColorMatrix::Bt709, &mut s420).unwrap(); + assert_eq!(rgba422, rgba420); + } + // ---- 9-bit family + 4:4:0 family sanity tests ------------------------ fn solid_yuv440p_frame(