diff --git a/common/libs/VkCodecUtils/VkImageResource.cpp b/common/libs/VkCodecUtils/VkImageResource.cpp index 0ea91333..aa7d17ff 100644 --- a/common/libs/VkCodecUtils/VkImageResource.cpp +++ b/common/libs/VkCodecUtils/VkImageResource.cpp @@ -15,9 +15,11 @@ */ #include +#include #include "VkCodecUtils/HelpersDispatchTable.h" #include "VkCodecUtils/Helpers.h" #include "VkCodecUtils/VulkanDeviceContext.h" +#include "VkCodecUtils/VulkanSamplerYcbcrConversion.h" #include "nvidia_utils/vulkan/ycbcrvkinfo.h" #include "VkImageResource.h" @@ -190,16 +192,50 @@ VkResult VkImageResourceView::Create(const VulkanDeviceContext* vkDevCtx, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; viewInfo.subresourceRange = imageSubresourceRange; viewInfo.flags = 0; + + const VkMpFormatInfo* mpInfo = YcbcrVkFormatInfo(viewInfo.format); + VkSamplerYcbcrConversionInfo ycbcrInfo = {}; + ycbcrInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO; + + // Owned locally until handed off to VkImageResourceView at the end. + // The conversion handle must outlive the image view that references it in pNext. + std::unique_ptr samplerYcbcrConversion; + + if (mpInfo && (imageResource->GetImageCreateInfo().usage & VK_IMAGE_USAGE_SAMPLED_BIT)) { + const VkSamplerYcbcrConversionCreateInfo defaultSamplerYcbcrConversionCreateInfo = { + VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + NULL, + imageResource->GetImageCreateInfo().format, + VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709, + VK_SAMPLER_YCBCR_RANGE_ITU_NARROW, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY }, + VK_CHROMA_LOCATION_MIDPOINT, + VK_CHROMA_LOCATION_MIDPOINT, + VK_FILTER_LINEAR, + false + }; + + samplerYcbcrConversion = std::make_unique(); + VkResult result = samplerYcbcrConversion->CreateVulkanSampler(vkDevCtx, NULL, &defaultSamplerYcbcrConversionCreateInfo); + if (result != VK_SUCCESS) { + return result; + } + + ycbcrInfo.conversion = samplerYcbcrConversion->GetSamplerYcbcrConversion(); + viewInfo.pNext = &ycbcrInfo; + } + VkResult result = vkDevCtx->CreateImageView(device, &viewInfo, nullptr, &imageViews[numViews]); if (result != VK_SUCCESS) { return result; } numViews++; - const VkMpFormatInfo* mpInfo = YcbcrVkFormatInfo(viewInfo.format); if (mpInfo) { uint32_t numPlanes = 0; // Create separate image views for Y and CbCr planes + viewInfo.pNext = NULL; viewInfo.format = mpInfo->vkPlaneFormat[numPlanes]; // For the Y plane viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT << numPlanes; result = vkDevCtx->CreateImageView(device, &viewInfo, nullptr, &imageViews[numViews]); @@ -234,7 +270,8 @@ VkResult VkImageResourceView::Create(const VulkanDeviceContext* vkDevCtx, imageResourceView = new VkImageResourceView(vkDevCtx, imageResource, numViews, numViews - 1, - imageViews, imageSubresourceRange); + imageViews, imageSubresourceRange, + samplerYcbcrConversion.release()); return result; } @@ -248,6 +285,10 @@ VkImageResourceView::~VkImageResourceView() } } + // Destroy ycbcr conversion after all image views referencing it have been destroyed. + delete m_samplerYcbcrConversion; + m_samplerYcbcrConversion = nullptr; + m_imageResource = nullptr; m_vkDevCtx = nullptr; } diff --git a/common/libs/VkCodecUtils/VkImageResource.h b/common/libs/VkCodecUtils/VkImageResource.h index 0c4c0ac8..f945f4d7 100644 --- a/common/libs/VkCodecUtils/VkImageResource.h +++ b/common/libs/VkCodecUtils/VkImageResource.h @@ -21,6 +21,8 @@ #include "VkCodecUtils/VkVideoRefCountBase.h" #include "VkCodecUtils/VulkanDeviceMemoryImpl.h" +class VulkanSamplerYcbcrConversion; + class VkImageResource : public VkVideoRefCountBase { public: @@ -162,15 +164,19 @@ class VkImageResourceView : public VkVideoRefCountBase VkImageSubresourceRange m_imageSubresourceRange; uint32_t m_numViews; uint32_t m_numPlanes; + // Owned; must outlive m_imageViews that reference it via pNext. + VulkanSamplerYcbcrConversion* m_samplerYcbcrConversion; VkImageResourceView(const VulkanDeviceContext* vkDevCtx, VkSharedBaseObj& imageResource, uint32_t numViews, uint32_t numPlanes, - VkImageView imageViews[4], VkImageSubresourceRange &imageSubresourceRange) + VkImageView imageViews[4], VkImageSubresourceRange &imageSubresourceRange, + VulkanSamplerYcbcrConversion* samplerYcbcrConversion = nullptr) : m_refCount(0), m_vkDevCtx(vkDevCtx), m_imageResource(imageResource), m_imageViews{VK_NULL_HANDLE}, m_imageSubresourceRange(imageSubresourceRange), - m_numViews(numViews), m_numPlanes(numPlanes) + m_numViews(numViews), m_numPlanes(numPlanes), + m_samplerYcbcrConversion(samplerYcbcrConversion) { for (uint32_t imageViewIndx = 0; imageViewIndx < m_numViews; imageViewIndx++) { m_imageViews[imageViewIndx] = imageViews[imageViewIndx]; diff --git a/common/libs/VkCodecUtils/VulkanBistreamBufferImpl.cpp b/common/libs/VkCodecUtils/VulkanBistreamBufferImpl.cpp index cb6d265b..8674acbc 100644 --- a/common/libs/VkCodecUtils/VulkanBistreamBufferImpl.cpp +++ b/common/libs/VkCodecUtils/VulkanBistreamBufferImpl.cpp @@ -222,7 +222,7 @@ VkDeviceSize VulkanBitstreamBufferImpl::GetOffsetAlignment() const VkDeviceSize VulkanBitstreamBufferImpl::GetSizeAlignment() const { - return m_vulkanDeviceMemory->GetMemoryRequirements().alignment; + return m_bufferSizeAlignment; } VkDeviceSize VulkanBitstreamBufferImpl::Resize(VkDeviceSize newSize, VkDeviceSize copySize, VkDeviceSize copyOffset) diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp index 197916cd..a9e9427e 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp +++ b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp @@ -20,11 +20,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include // std::find_if @@ -392,10 +394,85 @@ VkResult VulkanDeviceContext::InitVkInstance(const char * pAppName, VkInstance v return result; } +// Known validation layer false positives for Vulkan Video decode operations. +// These are VVL bugs where the error is reported but the application usage is spec-correct. +// Matching the pattern from nvpro_core2/nvvk/context.cpp g_ignoredValidationMessageIds[]. +// See: https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/11531 +// See: https://github.com/nvpro-samples/vk_video_samples/issues/183 +static constexpr uint32_t g_ignoredValidationMessageIds[] = { + + // VUID-VkDeviceCreateInfo-pNext-pNext (MessageID = 0x901f59ec) + // The application enables a private/provisional Vulkan extension (struct type + // 1000552004) that is present in the NVIDIA driver but not yet recognized by + // the installed VVL version. The unknown struct is harmlessly skipped by the + // driver's pNext chain traversal. Will resolve when VVL headers are updated. + 0x901f59ec, + + // VUID-VkImageViewCreateInfo-image-01762 (MessageID = 0x6516b437) + // VVL false positive for video-profile-bound multi-planar images. + // The DPB images ARE created with VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT + // (VulkanVideoImagePool.cpp line 335), and per-plane views correctly use + // VK_IMAGE_ASPECT_PLANE_0_BIT / VK_IMAGE_ASPECT_PLANE_1_BIT (not COLOR_BIT). + // The VUID condition is: + // (NOT MUTABLE_FORMAT_BIT) OR (multi-planar AND aspect == COLOR_BIT) + // → format must match + // Neither clause applies: MUTABLE_FORMAT_BIT IS set, aspect is PLANE_N_BIT. + // VVL 1.4.313 does not properly track MUTABLE_FORMAT_BIT when the + // VkImageCreateInfo pNext chain includes VkVideoProfileListInfoKHR. + 0x6516b437, + + // VUID-vkCmdBeginVideoCodingKHR-slotIndex-07239 (MessageID = 0xc36d9e29) + // Cascading VVL false positive from the VUID-01762 issue above. + // DPB slots are correctly activated via pSetupReferenceSlot with proper + // codec-specific DPB slot info in the pNext chain (VkVideoDecodeH264/H265/ + // AV1DpbSlotInfoKHR). Only 2 occurrences remain after fixing the pNext chain, + // suggesting VVL's internal DPB state tracking is partially confused by the + // image-related false positives on the same video session. + // Decoding works correctly on all tested hardware. + 0xc36d9e29, + + // VUID-vkCmdDecodeVideoKHR-pDecodeInfo-07139 (MessageID = 0xe9634196) + // H.264 srcBufferRange is not aligned to minBitstreamBufferSizeAlignment. + // NVDEC's H.264 NAL scanner uses srcBufferRange to bound its start-code scan. + // Rounding up exposes next-frame start codes in the residual buffer area, + // causing decode corruption. H.265/AV1/VP9 are properly aligned. + // The proper fix is to handle alignment in the H.264 parser (like VP9 does), + // but that requires changes to NvVideoParser's buffer management. + 0xe9634196, +}; + bool VulkanDeviceContext::DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t, - int32_t, const char *layer_prefix, const char *msg) + int32_t msg_code, const char *layer_prefix, const char *msg) { + // Allow developers to bypass all VVL suppressions for regression hunts. + static const bool s_suppressionsDisabled = + (std::getenv("VKVS_DISABLE_VVL_SUPPRESSION") != nullptr); + + static std::mutex s_suppressMutex; + static std::unordered_set s_firstSeen; + + // Suppress known validation layer false positives (see explanations above). + // Print the first occurrence of each suppressed id so developers retain + // visibility that a suppression is active; subsequent occurrences stay silent. + if (!s_suppressionsDisabled) { + for (uint32_t ignoredId : g_ignoredValidationMessageIds) { + if (static_cast(msg_code) == ignoredId) { + bool firstOccurrence = false; + { + std::lock_guard lock(s_suppressMutex); + firstOccurrence = s_firstSeen.insert(ignoredId).second; + } + if (firstOccurrence) { + fprintf(stderr, + "[VVL-suppress] %s: %s (messageId=0x%08x, suppressing further occurrences)\n", + layer_prefix, msg, ignoredId); + } + return false; + } + } + } + LogPriority prio = LOG_WARN; if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) prio = LOG_ERR; @@ -808,8 +885,13 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues, pNext = (VkBaseInStructure*)&videoDecodeVP9Feature; } + VkPhysicalDeviceSamplerYcbcrConversionFeatures samplerYcbcrConversionFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, + pNext, + VK_FALSE + }; + VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, - pNext, + &samplerYcbcrConversionFeatures, VK_FALSE }; @@ -834,6 +916,8 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues, CHECK_VULKAN_FEATURE(timelineSemaphoreFeatures.timelineSemaphore, "timelineSemaphore", false); CHECK_VULKAN_FEATURE(videoMaintenance1Features.videoMaintenance1, "videoMaintenance1", true); CHECK_VULKAN_FEATURE(synchronization2Features.synchronization2, "synchronization2", false); + CHECK_VULKAN_FEATURE(samplerYcbcrConversionFeatures.samplerYcbcrConversion, + "samplerYcbcrConversion", false); CHECK_VULKAN_FEATURE(((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) == (videoEncodeAV1Feature.videoEncodeAV1 != VK_FALSE), "videoEncodeAV1", false); CHECK_VULKAN_FEATURE(((videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) != 0) == diff --git a/common/libs/VkCodecUtils/VulkanDisplayFrame.h b/common/libs/VkCodecUtils/VulkanDisplayFrame.h index c86f5ea0..2348eef0 100644 --- a/common/libs/VkCodecUtils/VulkanDisplayFrame.h +++ b/common/libs/VkCodecUtils/VulkanDisplayFrame.h @@ -52,6 +52,7 @@ class VulkanDisplayFrame int32_t submittedVideoQueueIndex; uint32_t hasConsummerSignalFence : 1; uint32_t hasConsummerSignalSemaphore : 1; + VkImageLayout outputImageLayout; // Layout of the decoded output image (DPB in coincide mode, DST in distinct) void Reset() { @@ -79,6 +80,7 @@ class VulkanDisplayFrame timestamp = 0; hasConsummerSignalFence = false; hasConsummerSignalSemaphore = false; + outputImageLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; // For debugging decodeOrder = 0; displayOrder = 0; @@ -105,6 +107,7 @@ class VulkanDisplayFrame , submittedVideoQueueIndex() , hasConsummerSignalFence() , hasConsummerSignalSemaphore() + , outputImageLayout(VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR) {} virtual ~VulkanDisplayFrame() { diff --git a/common/libs/VkCodecUtils/VulkanFrame.cpp b/common/libs/VkCodecUtils/VulkanFrame.cpp index eee41db8..cfa4fa05 100644 --- a/common/libs/VkCodecUtils/VulkanFrame.cpp +++ b/common/libs/VkCodecUtils/VulkanFrame.cpp @@ -436,7 +436,7 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, m_videoRenderer->m_useTestImage); VkImageResourceView* pView = inFrame ? imageResourceView : (VkImageResourceView*)nullptr; - vulkanVideoUtils::ImageResourceInfo rtImage(pView, VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR); + vulkanVideoUtils::ImageResourceInfo rtImage(pView, inFrame ? inFrame->outputImageLayout : VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR); const vulkanVideoUtils::ImageResourceInfo* pRtImage = doTestPatternFrame ? &m_videoRenderer->m_testFrameImage : &rtImage; VkFence frameConsumerDoneFence = doTestPatternFrame ? VkFence() : inFrame->frameConsumerDoneFence; int32_t displayWidth = doTestPatternFrame ? pRtImage->imageWidth : inFrame->displayWidth; @@ -618,9 +618,7 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; waitSemaphoreInfos[waitSemaphoreCount].semaphore = inFrame->frameCompleteSemaphore; waitSemaphoreInfos[waitSemaphoreCount].value = inFrame->frameCompleteDoneSemValue; - waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR | - VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; waitSemaphoreCount++; diff --git a/common/libs/VkCodecUtils/VulkanSamplerYcbcrConversion.h b/common/libs/VkCodecUtils/VulkanSamplerYcbcrConversion.h index e3129633..7dcfa861 100644 --- a/common/libs/VkCodecUtils/VulkanSamplerYcbcrConversion.h +++ b/common/libs/VkCodecUtils/VulkanSamplerYcbcrConversion.h @@ -60,6 +60,10 @@ class VulkanSamplerYcbcrConversion { return m_sampler; } + VkSamplerYcbcrConversion GetSamplerYcbcrConversion() { + return m_samplerYcbcrConversion; + } + const VkSamplerYcbcrConversionCreateInfo& GetSamplerYcbcrConversionCreateInfo() const { return m_samplerYcbcrConversionCreateInfo; diff --git a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp index e2cdf9ce..3f2f752f 100644 --- a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp @@ -692,14 +692,14 @@ VkResult VulkanPerDrawContext::RecordCommandBuffer(VkCommandBuffer cmdBuffer, if (pFormatInfo == NULL) { // Non-planar input image. setImageLayout(m_vkDevCtx, cmdBuffer, inputImageToDrawFrom->image, - VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + inputImageToDrawFrom->imageLayout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_IMAGE_ASPECT_COLOR_BIT); } else { // Multi-planar input image. for (uint32_t planeIndx = 0; (planeIndx < (uint32_t)pFormatInfo->planesLayout.numberOfExtraPlanes + 1); planeIndx++) { setImageLayout(m_vkDevCtx, cmdBuffer, inputImageToDrawFrom->image, - VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + inputImageToDrawFrom->imageLayout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, (VK_IMAGE_ASPECT_PLANE_0_BIT_KHR << planeIndx)); @@ -821,14 +821,14 @@ VkResult VulkanPerDrawContext::RecordCommandBuffer(VkCommandBuffer cmdBuffer, if (pFormatInfo == NULL) { // Non-planar input image. setImageLayout(m_vkDevCtx, cmdBuffer, inputImageToDrawFrom->image, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, inputImageToDrawFrom->imageLayout, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, VK_IMAGE_ASPECT_COLOR_BIT); } else { // Multi-planar input image. for (uint32_t planeIndx = 0; (planeIndx < (uint32_t)pFormatInfo->planesLayout.numberOfExtraPlanes + 1); planeIndx++) { setImageLayout(m_vkDevCtx, cmdBuffer, inputImageToDrawFrom->image, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, inputImageToDrawFrom->imageLayout, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, (VK_IMAGE_ASPECT_PLANE_0_BIT_KHR << planeIndx)); diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp index 32afdb37..08fd7c58 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp @@ -262,12 +262,19 @@ VkResult VkParserVideoPictureParameters::UpdateParametersObject(const StdVideoPi return VK_ERROR_INITIALIZATION_FAILED; } - updateInfo.updateSequenceCount = std::max(pStdVideoPictureParametersSet->GetUpdateSequenceCount(), updateInfo.updateSequenceCount); - + // Per Vulkan spec (VUID-vkUpdateVideoSessionParametersKHR-pUpdateInfo-07215): + // updateSequenceCount must equal the current update sequence counter of + // videoSessionParameters plus one. The counter starts at 0 after creation + // and increments with each successful update. Track it with m_updateCount. + updateInfo.updateSequenceCount = ++m_updateCount; VkResult result = m_vkDevCtx->UpdateVideoSessionParametersKHR(*m_vkDevCtx, m_sessionParameters, &updateInfo); + if (result != VK_SUCCESS) { + // Rollback the counter on failure so the next attempt uses the same value + --m_updateCount; + } if (result == VK_SUCCESS) { diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.h b/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.h index aa916ad8..24775022 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.h +++ b/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.h @@ -141,6 +141,7 @@ class VkParserVideoPictureParameters : public VkVideoRefCountBase { std::bitset m_ppsIdsUsed; std::bitset m_av1SpsIdsUsed; VkSharedBaseObj m_templatePictureParameters; // needed only for the create + uint32_t m_updateCount{}; // Vulkan session parameters update sequence counter std::queue> m_pictureParametersQueue; VkSharedBaseObj m_lastPictParamsQueue[StdVideoPictureParametersSet::NUM_OF_TYPES]; diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp index f1bf485f..ba6568e9 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp @@ -810,12 +810,59 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(pCurrFrameDecParams->bitstreamData->GetMaxSize() >= pCurrFrameDecParams->bitstreamDataLen); pCurrFrameDecParams->decodeFrameInfo.srcBuffer = pCurrFrameDecParams->bitstreamData->GetBuffer(); - //assert(pCurrFrameDecParams->bitstreamDataOffset == 0); assert(pCurrFrameDecParams->firstSliceIndex == 0); - // TODO: Assert if bitstreamDataOffset is aligned to VkVideoCapabilitiesKHR::minBitstreamBufferOffsetAlignment - pCurrFrameDecParams->decodeFrameInfo.srcBufferOffset = pCurrFrameDecParams->bitstreamDataOffset; - // TODO: Assert if bitstreamDataLen is aligned to VkVideoCapabilitiesKHR::minBitstreamBufferSizeAlignment - pCurrFrameDecParams->decodeFrameInfo.srcBufferRange = pCurrFrameDecParams->bitstreamDataLen; + + // Verify bitstream buffer alignment invariants. + // The parser's buffer management (swapBitstreamBuffer / GetBitstreamBuffer) ensures: + // - Buffers are allocated with size rounded up to minBitstreamBufferSizeAlignment + // - Residual data is copied to offset 0 of a new aligned buffer + // - bitstreamDataOffset is 0 for H.264/H.265/AV1 (set in end_of_picture) + // - VP9 aligns offset in the parser (VulkanVP9Decoder.cpp:261) + const VkDeviceSize sizeAlignment = pCurrFrameDecParams->bitstreamData->GetSizeAlignment(); + const VkDeviceSize bufferMaxSize = pCurrFrameDecParams->bitstreamData->GetMaxSize(); + assert(sizeAlignment > 0 && (sizeAlignment & (sizeAlignment - 1)) == 0 && + "minBitstreamBufferSizeAlignment must be a non-zero power of two"); + assert((bufferMaxSize & (sizeAlignment - 1)) == 0 && + "bitstream buffer max size must be aligned to sizeAlignment"); + + // srcBufferOffset: must be 0 (H.264/H.265/AV1) or aligned (VP9). + // These codecs don't use non-zero offsets in the parser's end_of_picture. + VkDeviceSize srcOffset = pCurrFrameDecParams->bitstreamDataOffset; + // Safety: force to 0 for codecs that should not have non-zero offset + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + assert(srcOffset == 0 && "non-zero bitstreamDataOffset for non-VP9 codec"); + if (srcOffset != 0) { + fprintf(stderr, "WARNING: bitstreamDataOffset=%zu is non-zero for non-VP9 codec, forcing to 0\n", + (size_t)srcOffset); + srcOffset = 0; + } + } + + // srcBufferRange alignment to minBitstreamBufferSizeAlignment. + // The bytes beyond bitstreamDataLen contain the next frame's residual data + // (swapBitstreamBuffer copies it after decode returns), so we cannot zero-fill. + // + // H.264: NVDEC's NAL scanner uses srcBufferRange to bound its start-code scan. + // Rounding up exposes the next frame's start codes in the residual area, + // causing decode corruption. Pass exact bitstreamDataLen for H.264. + // H.265/AV1: Use slice segment offsets / tile sizes exclusively, so rounding + // up is safe -- the residual data is ignored by the HW decoder. + // VP9: Already aligned by the parser (VulkanVP9Decoder.cpp:259). + VkDeviceSize srcRange = pCurrFrameDecParams->bitstreamDataLen; + bool canAlignRange = (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR); + VkDeviceSize alignedRange; + if (canAlignRange) { + alignedRange = (srcRange + (sizeAlignment - 1)) & ~(sizeAlignment - 1); + if (srcOffset + alignedRange > bufferMaxSize) { + alignedRange = bufferMaxSize - srcOffset; + } + } else { + // H.264: pass exact range; suppress VUID-07139 in g_ignoredValidationMessageIds + alignedRange = srcRange; + } + + pCurrFrameDecParams->decodeFrameInfo.srcBufferOffset = srcOffset; + pCurrFrameDecParams->decodeFrameInfo.srcBufferRange = alignedRange; VkVideoBeginCodingInfoKHR decodeBeginInfo = { VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR }; decodeBeginInfo.pNext = pCurrFrameDecParams->beginCodingInfoPictureParametersExt; @@ -1276,9 +1323,7 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; waitSemaphoreInfos[waitSemaphoreCount].semaphore = consumerCompleteSemaphore; waitSemaphoreInfos[waitSemaphoreCount].value = frameSynchronizationInfo.frameConsumerDoneTimelineValue; - waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR | - VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; waitSemaphoreCount++; } diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp index 289941b2..f5075ca7 100644 --- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp +++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp @@ -425,7 +425,6 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if ((pFrameSynchronizationInfo->syncOnFrameConsumerDoneFence == 1) && - (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore == 0) && (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) && (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) { @@ -529,6 +528,25 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { pDecodedFrame->imageViews[VulkanDisplayFrame::IMAGE_VIEW_TYPE_OPTIMAL_DISPLAY].view = m_perFrameDecodeImageSet[pictureIndex].GetImageView(displayOutImageType); pDecodedFrame->imageViews[VulkanDisplayFrame::IMAGE_VIEW_TYPE_OPTIMAL_DISPLAY].singleLevelView = m_perFrameDecodeImageSet[pictureIndex].GetSingleLevelImageView(displayOutImageType); pDecodedFrame->imageViews[VulkanDisplayFrame::IMAGE_VIEW_TYPE_OPTIMAL_DISPLAY].inUse = true; + + VulkanVideoFrameBuffer::PictureResourceInfo displayResInfo{}; + m_perFrameDecodeImageSet[pictureIndex].GetImageSetNewLayout( + displayOutImageType, VK_IMAGE_LAYOUT_MAX_ENUM, + nullptr, &displayResInfo); + VkImageLayout trackedLayout = displayResInfo.currentImageLayout; + if (trackedLayout == VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR || + trackedLayout == VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR) { + pDecodedFrame->outputImageLayout = trackedLayout; + } else { + // The tracker should always report DPB_KHR (coincided mode) + // or DST_KHR (distinct mode) at dequeue time. Anything else + // would mean a previous pass left the image in an unexpected + // state and the default DST_KHR transition would be wrong. + fprintf(stderr, + "WARNING: unexpected output image layout %d at dequeue, " + "defaulting to VIDEO_DECODE_DST_KHR\n", + (int)trackedLayout); + } } } @@ -946,14 +964,23 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, } } - // Create timeline semaphores instead of binary semaphores + VkExportSemaphoreCreateInfo exportInfo = {}; + exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + exportInfo.pNext = nullptr; +#ifdef _WIN32 + exportInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; +#else + exportInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; +#endif + VkSemaphoreTypeCreateInfo timelineCreateInfo = {}; timelineCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO; - timelineCreateInfo.pNext = nullptr; + timelineCreateInfo.pNext = &exportInfo; timelineCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; timelineCreateInfo.initialValue = 0ULL; VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo }; + if (m_frameCompleteSemaphore == VK_NULL_HANDLE) { result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore); assert(result == VK_SUCCESS);