From d3e8058606a60270878ce564ca6ba8282d34b85d Mon Sep 17 00:00:00 2001 From: Raju Konda Date: Sun, 9 Mar 2025 23:53:42 -0700 Subject: [PATCH 1/7] decode: Round up width/height of the luma plane For chroma resolution in 420 or 422 format, round width/height to the next integer when calculating from luma width and height. --- common/libs/VkCodecUtils/VkVideoFrameToFile.cpp | 8 ++++---- common/libs/VkCodecUtils/VulkanVideoUtils.cpp | 2 +- vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp index 26d78757..31b51c29 100644 --- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp +++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp @@ -336,7 +336,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { - secondaryPlaneHeight /= 2; + secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2; } VkImageSubresource subResource = {}; @@ -383,12 +383,12 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight; yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel; if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[1].rowPitch /= 2; + yuvPlaneLayouts[1].rowPitch = (yuvPlaneLayouts[1].rowPitch + 1) / 2; } yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight); yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel; if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[2].rowPitch /= 2; + yuvPlaneLayouts[2].rowPitch = (yuvPlaneLayouts[2].rowPitch + 1) / 2; } // Copy the luma plane @@ -410,7 +410,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { for (uint32_t plane = numCompatiblePlanes; plane < numPlanes; plane++) { const uint32_t srcPlane = std::min(plane, mpInfo->planesLayout.numberOfExtraPlanes); uint8_t* pDst = pOutBuffer + yuvPlaneLayouts[plane].offset; - const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? frameWidth / 2 : frameWidth; + const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? (frameWidth + 1) / 2 : frameWidth; for (int32_t height = 0; height < secondaryPlaneHeight; height++) { const uint8_t* pSrc; diff --git a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp index 39f3b6f1..e2cdf9ce 100644 --- a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp @@ -254,7 +254,7 @@ VkResult ImageObject::CopyYuvToVkImage(uint32_t numPlanes, const uint8_t* yuvPla } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { - cbimageHeight /= 2; + cbimageHeight = (cbimageHeight + 1) / 2; } if (mpInfo && !isUnnormalizedRgba) { diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp index d79ed014..29e2a36d 100644 --- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp +++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp @@ -1306,12 +1306,12 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer copyRegion[0].dstSubresource.layerCount = 1; copyRegion[1].extent.width = copyRegion[0].extent.width; if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) { - copyRegion[1].extent.width /= 2; + copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2; } copyRegion[1].extent.height = copyRegion[0].extent.height; if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) { - copyRegion[1].extent.height /= 2; + copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2; } copyRegion[1].extent.depth = 1; From 49e137444ca0c0ffc878e51f49a86ed76bccca3f Mon Sep 17 00:00:00 2001 From: Raju Konda Date: Tue, 18 Mar 2025 00:14:41 -0700 Subject: [PATCH 2/7] decode: Remove assert when fence times out Remove assert when fence times out but the timeout value is less than the total wait time. --- common/libs/VkCodecUtils/Helpers.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/common/libs/VkCodecUtils/Helpers.h b/common/libs/VkCodecUtils/Helpers.h index 4218d36d..e4c70abb 100644 --- a/common/libs/VkCodecUtils/Helpers.h +++ b/common/libs/VkCodecUtils/Helpers.h @@ -238,23 +238,21 @@ inline VkResult WaitAndResetFence(const VkInterfaceFunctions* vkIf, VkDevice dev while (fenceTotalWaitTimeout >= fenceCurrentWaitTimeout) { - result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout); - if (result != VK_SUCCESS) { - fprintf(stderr, "\t **** WARNING: fence %s(%llu) is not done after %llu nSec with result 0x%x ****\n", - fenceName, (long long unsigned int)fence, (long long unsigned int)fenceWaitTimeout, result); - assert(!"Fence is not signaled yet after more than 100 mSec wait"); - } + fenceCurrentWaitTimeout += fenceWaitTimeout; - if (result != VK_TIMEOUT) { - break; + result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout); + if (result == VK_TIMEOUT) { + fprintf(stderr, "\t **** WARNING: fence %s(%llu) is not done after %llu mSec with result 0x%x ****\n", + fenceName, (long long unsigned int)fence, (long long unsigned int)fenceCurrentWaitTimeout/(1000ULL * 1000ULL), result); + } else { + break; // either success or an error occured } - fenceCurrentWaitTimeout += fenceWaitTimeout; } if (result != VK_SUCCESS) { - fprintf(stderr, "\t **** ERROR: fence %s(%llu) is not done after %llu nSec with result 0x%x ****\n", - fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout, vkIf->GetFenceStatus(device, fence)); + fprintf(stderr, "\t **** ERROR: fence %s(%llu) is not done after %llu mSec with result 0x%x ****\n", + fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout/(1000ULL * 1000ULL), vkIf->GetFenceStatus(device, fence)); assert(!"Fence is not signaled yet after more than 100 mSec wait"); } From 5753d4bc8dae37f8475423ebe8714c12e592b03c Mon Sep 17 00:00:00 2001 From: Raju Konda Date: Tue, 18 Mar 2025 03:21:57 -0700 Subject: [PATCH 3/7] common: Provide separate destructor implementation Instead of defining the destructor inline, provide its implementation in a separate source file to make GCC 13 happy in release mode. Also fix other compilation errors in release mode. Signed-off-by: Raju Konda --- common/libs/VkCodecUtils/VkImageResource.cpp | 5 +++++ common/libs/VkCodecUtils/VkImageResource.h | 2 +- common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp | 5 +++++ common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h | 2 +- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/common/libs/VkCodecUtils/VkImageResource.cpp b/common/libs/VkCodecUtils/VkImageResource.cpp index 302ba8a1..0ea91333 100644 --- a/common/libs/VkCodecUtils/VkImageResource.cpp +++ b/common/libs/VkCodecUtils/VkImageResource.cpp @@ -88,6 +88,11 @@ VkImageResource::VkImageResource(const VulkanDeviceContext* vkDevCtx, } } +VkImageResource::~VkImageResource() +{ + Destroy(); +} + VkResult VkImageResource::Create(const VulkanDeviceContext* vkDevCtx, const VkImageCreateInfo* pImageCreateInfo, VkMemoryPropertyFlags memoryPropertyFlags, diff --git a/common/libs/VkCodecUtils/VkImageResource.h b/common/libs/VkCodecUtils/VkImageResource.h index 314a2f01..0c4c0ac8 100644 --- a/common/libs/VkCodecUtils/VkImageResource.h +++ b/common/libs/VkCodecUtils/VkImageResource.h @@ -113,7 +113,7 @@ class VkImageResource : public VkVideoRefCountBase void Destroy(); - virtual ~VkImageResource() { Destroy(); } + virtual ~VkImageResource(); }; class VkImageResourceView : public VkVideoRefCountBase diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp index db05c81a..f78b0de8 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp +++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp @@ -410,3 +410,8 @@ const uint8_t* VulkanDeviceMemoryImpl::GetReadOnlyDataPtr(VkDeviceSize offset, V maxSize = m_memoryRequirements.size - offset; return readData; } + +VulkanDeviceMemoryImpl::~VulkanDeviceMemoryImpl() +{ + Deinitialize(); +} diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h index 6b94e38e..f7d1c2d7 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h +++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h @@ -106,7 +106,7 @@ class VulkanDeviceMemoryImpl : public VkVideoRefCountBase void Deinitialize(); - virtual ~VulkanDeviceMemoryImpl() { Deinitialize(); } + virtual ~VulkanDeviceMemoryImpl(); private: std::atomic m_refCount; From 48b501e0bef1780779f0bf8ba0b408361d1fb39e Mon Sep 17 00:00:00 2001 From: Raju Konda Date: Mon, 28 Apr 2025 00:17:59 -0700 Subject: [PATCH 4/7] decode: Calculate chroma plane width separately Calculate the chroma plane width separately, and add the U and V widths to get the final UV plane width in NV12. --- .../libs/VkCodecUtils/VkVideoFrameToFile.cpp | 14 ++++++-------- .../VkCodecUtils/VulkanVideoProcessor.cpp | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp index 31b51c29..097439d7 100644 --- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp +++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp @@ -328,6 +328,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize); assert(readImagePtr != nullptr); + int32_t secondaryPlaneWidth = frameWidth; int32_t secondaryPlaneHeight = frameHeight; int32_t imageHeight = frameHeight; bool isUnnormalizedRgba = false; @@ -335,6 +336,9 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { isUnnormalizedRgba = true; } + if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { + secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2; + } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2; } @@ -381,15 +385,9 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { yuvPlaneLayouts[0].offset = 0; yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel; yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight; - yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[1].rowPitch = (yuvPlaneLayouts[1].rowPitch + 1) / 2; - } + yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel; yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight); - yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[2].rowPitch = (yuvPlaneLayouts[2].rowPitch + 1) / 2; - } + yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel; // Copy the luma plane const uint32_t numCompatiblePlanes = 1; diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp index 3122b062..97d3906f 100644 --- a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp @@ -382,6 +382,7 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize); assert(readImagePtr != nullptr); + int32_t secondaryPlaneWidth = frameWidth; int32_t secondaryPlaneHeight = frameHeight; int32_t imageHeight = frameHeight; bool isUnnormalizedRgba = false; @@ -389,8 +390,11 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt isUnnormalizedRgba = true; } + if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { + secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2; + } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { - secondaryPlaneHeight /= 2; + secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2; } VkImageSubresource subResource = {}; @@ -439,15 +443,9 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt yuvPlaneLayouts[0].offset = 0; yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel; yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight; - yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[1].rowPitch /= 2; - } + yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel; yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight); - yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[2].rowPitch /= 2; - } + yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel; // Copy the luma plane, always assume the 422 or 444 formats and src CbCr always is interleaved (shares the same plane). uint32_t numCompatiblePlanes = 1; @@ -642,6 +640,7 @@ VkResult VulkanVideoProcessor::CreateParser(const char*, static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION }; static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION }; static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION }; + static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION }; const VkExtensionProperties* pStdExtensionVersion = NULL; if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { @@ -650,6 +649,8 @@ VkResult VulkanVideoProcessor::CreateParser(const char*, pStdExtensionVersion = &h265StdExtensionVersion; } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { pStdExtensionVersion = &av1StdExtensionVersion; + } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pStdExtensionVersion = &vp9StdExtensionVersion; } else { assert(!"Unsupported Codec Type"); return VK_ERROR_FORMAT_NOT_SUPPORTED; From 62fe6707a55d2c7e8d8ee89105c583af8a23003b Mon Sep 17 00:00:00 2001 From: "Tony Zlatinski (NVIDIA Corporation)" Date: Fri, 2 May 2025 18:37:19 +0000 Subject: [PATCH 5/7] common: Switch to VkSubmitInfo2KHR and use TL semaphores Use VkSubmitInfo2KHR and use TL semaphores in both encoder and decoder. --- .../include/VkVideoCore/DecodeFrameBufferIf.h | 10 ++ .../libs/VkCodecUtils/VulkanDeviceContext.cpp | 78 ++++++++- .../libs/VkCodecUtils/VulkanDeviceContext.h | 16 +- common/libs/VkCodecUtils/VulkanDisplayFrame.h | 18 +- common/libs/VkCodecUtils/VulkanFilter.h | 144 +++++++++++++-- common/libs/VkCodecUtils/VulkanFrame.cpp | 108 ++++++++---- .../libs/VkCodecUtils/VulkanSemaphoreDump.h | 90 ++++++++++ .../libs/VkVideoDecoder/VkVideoDecoder.cpp | 150 ++++++++-------- .../VulkanVideoFrameBuffer.cpp | 164 +++++++++++------- .../VulkanVideoFrameBuffer.h | 8 +- vk_video_encoder/demos/vk-video-enc/Main.cpp | 1 + .../libs/VkVideoEncoder/VkVideoEncoder.cpp | 153 +++++++++++----- vk_video_encoder/src/vulkan_video_encoder.cpp | 1 + 13 files changed, 696 insertions(+), 245 deletions(-) create mode 100644 common/libs/VkCodecUtils/VulkanSemaphoreDump.h diff --git a/common/include/VkVideoCore/DecodeFrameBufferIf.h b/common/include/VkVideoCore/DecodeFrameBufferIf.h index 60393c38..fc99a4f5 100644 --- a/common/include/VkVideoCore/DecodeFrameBufferIf.h +++ b/common/include/VkVideoCore/DecodeFrameBufferIf.h @@ -107,6 +107,16 @@ class DecodeFrameBufferIf } }; + enum SemSyncTypeIdx : uint64_t { SEM_SYNC_TYPE_IDX_DECODE = (1ULL << 0), // Decode operation was signaled + SEM_SYNC_TYPE_IDX_DISPLAY = (1ULL << 0), // Display operation was signaled + SEM_SYNC_TYPE_IDX_FILTER = (1ULL << 1), // Filter operation was signaled + SEM_SYNC_TYPE_IDX_SHIFT = 2, // Shift semaphore counter value left + }; + + static uint64_t GetSemaphoreValue(SemSyncTypeIdx semSyncType, uint64_t semOrder) { + return (semOrder << SEM_SYNC_TYPE_IDX_SHIFT) | semSyncType; + } + }; #endif /* _VKVIDEOCORE_DECODEFRAMEBUFFERIF_H_ */ diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp index f13d598e..c85c2814 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp +++ b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp @@ -214,11 +214,27 @@ VkResult VulkanDeviceContext::AddReqDeviceExtensions(const char* const* required break; } m_requestedDeviceExtensions.push_back(name); + if (verbose) { + std::cout << "Added required device extension: " << name << std::endl; + } } return VK_SUCCESS; } +VkResult VulkanDeviceContext::AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose) +{ + if (requiredDeviceExtension) { + m_requestedDeviceExtensions.push_back(requiredDeviceExtension); + if (verbose) { + std::cout << "Added required device extension: " << requiredDeviceExtension << std::endl; + } + } + + return VK_SUCCESS; +} + + // optional device extensions VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose) { @@ -229,6 +245,9 @@ VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optional break; } m_optDeviceExtensions.push_back(name); + if (verbose) { + std::cout << "Added optional device extension: " << name << std::endl; + } } return VK_SUCCESS; @@ -712,26 +731,57 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues, devInfo.queueCreateInfoCount++; } + VkPhysicalDeviceVideoDecodeVP9FeaturesKHR videoDecodeVP9Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR, + nullptr, + false // videoDecodeVP9 + }; + VkPhysicalDeviceVideoEncodeAV1FeaturesKHR videoEncodeAV1Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR, nullptr, false // videoEncodeAV1 - }; + }; + // Chain only the structures that are requested + VkBaseInStructure* pNext = nullptr; + if (videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) { + videoEncodeAV1Feature.pNext = pNext; + pNext = (VkBaseInStructure*)&videoEncodeAV1Feature; + } + if (videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + videoDecodeVP9Feature.pNext = pNext; + pNext = (VkBaseInStructure*)&videoDecodeVP9Feature; + } + VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + pNext, + VK_FALSE + }; VkPhysicalDeviceVideoMaintenance1FeaturesKHR videoMaintenance1Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR, - ((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) ? - &videoEncodeAV1Feature : - nullptr, - false}; + &timelineSemaphoreFeatures, + VK_FALSE + }; VkPhysicalDeviceSynchronization2Features synchronization2Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES, &videoMaintenance1Features, - false + VK_FALSE }; VkPhysicalDeviceFeatures2 deviceFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, &synchronization2Features}; GetPhysicalDeviceFeatures2(m_physDevice, &deviceFeatures); + + assert(timelineSemaphoreFeatures.timelineSemaphore); + assert(videoMaintenance1Features.videoMaintenance1); + assert(synchronization2Features.synchronization2); + assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) == + (videoEncodeAV1Feature.videoEncodeAV1 != VK_FALSE)); + assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) != 0) == + (videoDecodeVP9Feature.videoDecodeVP9 != VK_FALSE)); + + // Validate feature support here. + // TODO: Currntly this method is receiving all codec bits irrespective of the codec that is required to decode/encode provided input. + // Provide only required codec and features and validate the support. + devInfo.pNext = &deviceFeatures; if ((numDecodeQueues > 0) && @@ -987,6 +1037,7 @@ VkResult VulkanDeviceContext::PopulateDeviceExtensions() VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, VkInstance vkInstance, + VkVideoCodecOperationFlagsKHR videoCodecs, bool enableWsi, bool enableWsiDirectMode, bool enableValidation, @@ -1020,6 +1071,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; @@ -1039,6 +1091,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, nullptr }; @@ -1070,6 +1123,19 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, /********** End WSI instance extensions support *******************************************/ #endif // VIDEO_DISPLAY_QUEUE_SUPPORT + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME); + } + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME); + } + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME); + } + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME); + } + VkResult result = InitVulkanDevice(pAppName, vkInstance, enbaleVerboseDump); if (result != VK_SUCCESS) { printf("Could not initialize the Vulkan device!\n"); diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.h b/common/libs/VkCodecUtils/VulkanDeviceContext.h index a3cf53a5..97325f1d 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceContext.h +++ b/common/libs/VkCodecUtils/VulkanDeviceContext.h @@ -24,6 +24,7 @@ #include #include #include "VkShell/VkWsiDisplay.h" +#include "VkCodecUtils/VulkanSemaphoreDump.h" class VulkanDeviceContext : public vk::VkInterfaceFunctions { @@ -157,11 +158,22 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { }; VkResult MultiThreadedQueueSubmit(const QueueFamilySubmitType submitType, const int32_t queueIndex, - uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) const + uint32_t submitCount, const VkSubmitInfo2KHR* pSubmits, VkFence fence, + const char* submissionName = nullptr, + uint64_t decodeEncodeOrder = UINT64_MAX, + uint64_t displayInputOrder = UINT64_MAX) const { MtQueueMutex queue(this, submitType, queueIndex); if (queue) { - return QueueSubmit(queue, submitCount, pSubmits, fence); + + // Dump semaphore info for debugging + if (false) { + for (uint32_t i = 0; i < submitCount; i++) { + VulkanSemaphoreDump::DumpSemaphoreInfo(pSubmits[i], submissionName, decodeEncodeOrder, displayInputOrder); + } + } + + return QueueSubmit2KHR(queue, submitCount, pSubmits, fence); } else { return VK_ERROR_INITIALIZATION_FAILED; } diff --git a/common/libs/VkCodecUtils/VulkanDisplayFrame.h b/common/libs/VkCodecUtils/VulkanDisplayFrame.h index 246183c9..c86f5ea0 100644 --- a/common/libs/VkCodecUtils/VulkanDisplayFrame.h +++ b/common/libs/VkCodecUtils/VulkanDisplayFrame.h @@ -41,7 +41,9 @@ class VulkanDisplayFrame VkFence frameCompleteFence; // If valid, the fence is signaled when the decoder or encoder is done decoding / encoding the frame. VkFence frameConsumerDoneFence; // If valid, the fence is signaled when the consumer (graphics, compute or display) is done using the frame. VkSemaphore frameCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame. - VkSemaphore frameConsumerDoneSemaphore; // If valid, the semaphore is signaled when the consumer (graphics, compute or display) is done using the frame. + VkSemaphore consumerCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame. + uint64_t frameCompleteDoneSemValue; // The semaphore is signaled by the decoder or the decoder's filter when this semaphore value has been reached. + uint64_t frameConsumerDoneSemValue; // The semaphore is signaled by the consumer (graphics, compute or display) when this semaphore value has been reached. VkQueryPool queryPool; // queryPool handle used for the video queries. int32_t startQueryId; // query Id used for the this frame. uint32_t numQueries; // usually one query per frame @@ -64,10 +66,12 @@ class VulkanDisplayFrame imageViews[imageTypeIdx].inUse = false; } } - frameCompleteFence = VkFence(); - frameConsumerDoneFence = VkFence(); - frameCompleteSemaphore = VkSemaphore(); - frameConsumerDoneSemaphore = VkSemaphore(); + frameCompleteFence = VK_NULL_HANDLE; + frameConsumerDoneFence = VK_NULL_HANDLE; + frameCompleteSemaphore = VK_NULL_HANDLE; + consumerCompleteSemaphore = VK_NULL_HANDLE; + frameCompleteDoneSemValue = (0ULL); // Frame 0 signaled by the decoder and/or filter + frameConsumerDoneSemValue = (0ULL); // Frame 0 signaled by the consumer queryPool = VkQueryPool(); startQueryId = 0; numQueries = 0; @@ -92,7 +96,9 @@ class VulkanDisplayFrame , frameCompleteFence() , frameConsumerDoneFence() , frameCompleteSemaphore() - , frameConsumerDoneSemaphore() + , consumerCompleteSemaphore() + , frameCompleteDoneSemValue(0ULL) + , frameConsumerDoneSemValue(0ULL) , queryPool() , startQueryId() , numQueries() diff --git a/common/libs/VkCodecUtils/VulkanFilter.h b/common/libs/VkCodecUtils/VulkanFilter.h index 2670304c..bb88799b 100644 --- a/common/libs/VkCodecUtils/VulkanFilter.h +++ b/common/libs/VkCodecUtils/VulkanFilter.h @@ -24,6 +24,7 @@ #include "VkCodecUtils/VulkanShaderCompiler.h" #include "VkCodecUtils/VkImageResource.h" #include "VkCodecUtils/VulkanCommandBufferPool.h" +#include "VkCodecUtils/VulkanSemaphoreDump.h" struct VulkanShaderInput { const std::string shader; @@ -34,6 +35,9 @@ struct VulkanShaderInput { class VulkanFilter : public VulkanCommandBufferPool { public: + // Constants moved inside the class as static constexpr + static constexpr uint32_t MAX_SEMAPHORES = 4; + static constexpr uint32_t MAX_CMD_BUFFERS = 4; VulkanFilter(const VulkanDeviceContext* vkDevCtx, uint32_t queueFamilyIndex, @@ -76,40 +80,146 @@ class VulkanFilter : public VulkanCommandBufferPool uint32_t bufferIdx) = 0; virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers, + const VkCommandBuffer* pCommandBuffers, uint32_t waitSemaphoreCount, const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags2KHR* pWaitStageMasks, uint32_t signalSemaphoreCount, const VkSemaphore* pSignalSemaphores, + const VkPipelineStageFlags2KHR* pSignalStageMasks, VkFence filterCompleteFence) const { - assert(m_queue != VK_NULL_HANDLE); + assert(commandBufferCount <= MAX_CMD_BUFFERS); + assert(waitSemaphoreCount <= MAX_SEMAPHORES); + assert(signalSemaphoreCount <= MAX_SEMAPHORES); + + // Prepare command buffer info on stack + VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS]; + for (uint32_t i = 0; i < commandBufferCount; i++) { + cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos[i].pNext = nullptr; + cmdBufferInfos[i].commandBuffer = pCommandBuffers[i]; + cmdBufferInfos[i].deviceMask = 0; + } - // Wait for rendering finished - VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // Prepare wait semaphore info on stack + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[i].pNext = nullptr; + waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i]; + waitSemaphoreInfos[i].value = 0; // Binary semaphore + waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i]; + waitSemaphoreInfos[i].deviceIndex = 0; + } - // Submit compute commands - VkSubmitInfo submitInfo {}; - submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submitInfo.pCommandBuffers = pCommandBuffers; - submitInfo.commandBufferCount = commandBufferCount; - submitInfo.waitSemaphoreCount = waitSemaphoreCount; - submitInfo.pWaitSemaphores = pWaitSemaphores; - submitInfo.pWaitDstStageMask = &waitStageMask; - submitInfo.signalSemaphoreCount = signalSemaphoreCount; - submitInfo.pSignalSemaphores = pSignalSemaphores; + // Prepare signal semaphore info on stack + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[i].pNext = nullptr; + signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i]; + signalSemaphoreInfos[i].value = 0; // Binary semaphore + signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i]; + signalSemaphoreInfos[i].deviceIndex = 0; + } + + // Submit info + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + submitInfo.pNext = nullptr; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = commandBufferCount; + submitInfo.pCommandBufferInfos = cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; + + if (false) { + // Dump semaphore info for debugging + VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0); + } assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence)); - VkResult result = m_vkDevCtx->QueueSubmit(m_queue, 1, &submitInfo, filterCompleteFence); + VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence); + + return result; + } + + virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers, + uint32_t waitSemaphoreCount, + const VkSemaphore* pWaitSemaphores, + const uint64_t* pWaitSemaphoreValues, + const VkPipelineStageFlags2KHR* pWaitStageMasks, + uint32_t signalSemaphoreCount, + const VkSemaphore* pSignalSemaphores, + const uint64_t* pSignalSemaphoreValues, + const VkPipelineStageFlags2KHR* pSignalStageMasks, + VkFence filterCompleteFence) const + { + assert(m_queue != VK_NULL_HANDLE); + assert(commandBufferCount <= MAX_CMD_BUFFERS); + assert(waitSemaphoreCount <= MAX_SEMAPHORES); + assert(signalSemaphoreCount <= MAX_SEMAPHORES); + + // Prepare command buffer info on stack + VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS]; + for (uint32_t i = 0; i < commandBufferCount; i++) { + cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos[i].pNext = nullptr; + cmdBufferInfos[i].commandBuffer = pCommandBuffers[i]; + cmdBufferInfos[i].deviceMask = 0; + } - if (result != VK_SUCCESS) { - return result; + // Prepare wait semaphore info on stack + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[i].pNext = nullptr; + waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i]; + waitSemaphoreInfos[i].value = pWaitSemaphoreValues[i]; // Timeline value + waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i]; + waitSemaphoreInfos[i].deviceIndex = 0; } + // Prepare signal semaphore info on stack + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[i].pNext = nullptr; + signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i]; + signalSemaphoreInfos[i].value = pSignalSemaphoreValues[i]; // Timeline value + signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i]; + signalSemaphoreInfos[i].deviceIndex = 0; + } + + // Submit info + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + submitInfo.pNext = nullptr; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = commandBufferCount; + submitInfo.pCommandBufferInfos = cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; + + if (false) { + // Dump semaphore info for debugging + VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0); + } + + assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence)); + VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence); + return result; } + protected: VulkanShaderCompiler m_vulkanShaderCompiler; uint32_t m_queueFamilyIndex; diff --git a/common/libs/VkCodecUtils/VulkanFrame.cpp b/common/libs/VkCodecUtils/VulkanFrame.cpp index 2e87885d..b95bdf96 100644 --- a/common/libs/VkCodecUtils/VulkanFrame.cpp +++ b/common/libs/VkCodecUtils/VulkanFrame.cpp @@ -18,6 +18,7 @@ #include #include #include +#include // Added for std::this_thread::sleep_for #include "VkCodecUtils/Helpers.h" #include "VkCodecUtils/VulkanDeviceContext.h" @@ -25,6 +26,7 @@ #include "VkCodecUtils/VulkanVideoUtils.h" #include "VulkanFrame.h" #include "VkVideoCore/DecodeFrameBufferIf.h" +#include "VkCodecUtils/VulkanSemaphoreDump.h" template VulkanFrame::VulkanFrame(const VulkanDeviceContext* vkDevCtx) @@ -420,6 +422,7 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, if (renderIndex < 0) { renderIndex = -renderIndex; } + vulkanVideoUtils::VulkanPerDrawContext* pPerDrawContext = m_videoRenderer->m_renderInfo.GetDrawContext(renderIndex); VkSharedBaseObj imageResourceView; @@ -583,54 +586,77 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, } } - const uint32_t maxWaitSemaphores = 2; - uint32_t numWaitSemaphores = 0; - VkSemaphore waitSemaphores[maxWaitSemaphores] = {}; + const uint32_t waitSemaphoreMaxCount = 2; + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{}; + + const uint32_t signalSemaphoreMaxCount = 2; + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{}; - assert(waitSemaphoreCount <= 1); - if ((waitSemaphoreCount > 0) && (pWaitSemaphores != nullptr)) { - waitSemaphores[numWaitSemaphores++] = *pWaitSemaphores; + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[i].pNext = nullptr; + waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i]; + waitSemaphoreInfos[i].value = 0; // Binary semaphore + waitSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + waitSemaphoreInfos[i].deviceIndex = 0; } - if (inFrame && (inFrame->frameCompleteSemaphore != VkSemaphore())) { - waitSemaphores[numWaitSemaphores++] = inFrame->frameCompleteSemaphore; + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[i].pNext = nullptr; + signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i]; + signalSemaphoreInfos[i].value = 0; // Binary semaphore + signalSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + signalSemaphoreInfos[i].deviceIndex = 0; } - assert(numWaitSemaphores <= maxWaitSemaphores); - const uint32_t maxSignalSemaphores = 2; - uint32_t numSignalSemaphores = 0; - VkSemaphore signalSemaphores[maxSignalSemaphores] = {}; + if (inFrame && (inFrame->frameCompleteSemaphore != VK_NULL_HANDLE)) { - assert(signalSemaphoreCount <= 1); - if ((signalSemaphoreCount > 0) && (pSignalSemaphores != nullptr)) { - signalSemaphores[numSignalSemaphores++] = *pSignalSemaphores; - } + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = inFrame->frameCompleteSemaphore; + waitSemaphoreInfos[waitSemaphoreCount].value = inFrame->frameCompleteDoneSemValue; + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR | + VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; + waitSemaphoreCount++; + + signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr; + signalSemaphoreInfos[signalSemaphoreCount].semaphore = inFrame->consumerCompleteSemaphore; + signalSemaphoreInfos[signalSemaphoreCount].value = inFrame->frameConsumerDoneSemValue; + signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0; + signalSemaphoreCount++; - if (inFrame && (inFrame->frameConsumerDoneSemaphore != VkSemaphore())) { - signalSemaphores[numSignalSemaphores++] = inFrame->frameConsumerDoneSemaphore; inFrame->hasConsummerSignalSemaphore = true; } - assert(numSignalSemaphores <= maxSignalSemaphores); + + assert(waitSemaphoreCount <= waitSemaphoreMaxCount); + assert(signalSemaphoreCount <= signalSemaphoreMaxCount); if (frameConsumerDoneFence != VkFence()) { inFrame->hasConsummerSignalFence = true; } - - // Wait for the image to be owned and signal for render completion - VkPipelineStageFlags primaryCmdSubmitWaitStages[2] = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT }; - VkSubmitInfo primaryCmdSubmitInfo = VkSubmitInfo(); - primaryCmdSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - primaryCmdSubmitInfo.pWaitDstStageMask = primaryCmdSubmitWaitStages; - primaryCmdSubmitInfo.commandBufferCount = 1; - - primaryCmdSubmitInfo.waitSemaphoreCount = numWaitSemaphores; - primaryCmdSubmitInfo.pWaitSemaphores = numWaitSemaphores ? waitSemaphores : NULL; - primaryCmdSubmitInfo.pCommandBuffers = pPerDrawContext->commandBuffer.GetCommandBuffer(); - - primaryCmdSubmitInfo.signalSemaphoreCount = numSignalSemaphores; - primaryCmdSubmitInfo.pSignalSemaphores = numSignalSemaphores ? signalSemaphores : NULL; + VkCommandBufferSubmitInfoKHR cmdBufferInfos; + cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos.pNext = nullptr; + cmdBufferInfos.commandBuffer = *pPerDrawContext->commandBuffer.GetCommandBuffer(); + cmdBufferInfos.deviceMask = 0; + + // Submit info + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + submitInfo.pNext = nullptr; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; // For fence/sync debugging if (false && inFrame && inFrame->frameCompleteFence) { @@ -646,7 +672,14 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, } } - result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS, 0, 1, &primaryCmdSubmitInfo, frameConsumerDoneFence); + result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS, + 0, // queueIndex + 1, // submitCount + &submitInfo, + frameConsumerDoneFence, + "Graphics Submit", + (inFrame != nullptr) ? inFrame->decodeOrder : UINT64_MAX, + (inFrame != nullptr) ? inFrame->displayOrder : UINT64_MAX); if (result != VK_SUCCESS) { assert(result == VK_SUCCESS); fprintf(stderr, "\nERROR: MultiThreadedQueueSubmit() result: 0x%x\n", result); @@ -676,6 +709,11 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, m_frameDataIndex = (m_frameDataIndex + 1) % m_frameData.size(); + if (false) { + // Add a 20ms sleep + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + return result; } diff --git a/common/libs/VkCodecUtils/VulkanSemaphoreDump.h b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h new file mode 100644 index 00000000..6e1b8913 --- /dev/null +++ b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h @@ -0,0 +1,90 @@ +/* +* Copyright 2024 NVIDIA Corporation. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#pragma once + +#include +#include +#include + +namespace VulkanSemaphoreDump { + +/** + * @brief Dumps the semaphore information from a VkSubmitInfo2KHR structure + * + * @param submitInfo The VkSubmitInfo2KHR structure containing semaphore information + * @param submissionName Optional name to identify the submission (e.g., "DECODE", "COMPUTE") + * @param decodeOrder Optional decode order number or identifier (uint64_t) + * @param displayOrder Optional display order number or identifier (uint64_t) + */ +inline void DumpSemaphoreInfo( + const VkSubmitInfo2KHR& submitInfo, + const char* submissionName = nullptr, + uint64_t decodeEncodeOrder = UINT64_MAX, + uint64_t displayInputOrder = UINT64_MAX) +{ + + std::cout << "----------------------------\n"; + + if (submissionName) { + std::cout << submissionName << " "; + } + + std::cout << "TL Semaphore sync"; + + if (decodeEncodeOrder != UINT64_MAX) { + std::cout << " (decode / encode = " << decodeEncodeOrder; + if (displayInputOrder != UINT64_MAX) { + std::cout << ", display / input = " << displayInputOrder; + } + std::cout << ")"; + } else if (displayInputOrder != UINT64_MAX) { + std::cout << " (display / input = " << displayInputOrder << ")"; + } + + std::cout << ":\n"; + + // Dump wait semaphores + for (uint32_t i = 0; i < submitInfo.waitSemaphoreInfoCount; i++) { + const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pWaitSemaphoreInfos[i]; + std::cout << " Wait sem[" << i << "]: " << semInfo.semaphore + << " value = " << semInfo.value + << " stage = 0x" << std::hex << semInfo.stageMask << std::dec; + + if (semInfo.deviceIndex > 0) { + std::cout << " deviceIndex=" << semInfo.deviceIndex; + } + std::cout << std::endl; + } + + // Dump signal semaphores + for (uint32_t i = 0; i < submitInfo.signalSemaphoreInfoCount; i++) { + const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pSignalSemaphoreInfos[i]; + std::cout << " Signal sem[" << i << "]: " << semInfo.semaphore + << " value = " << semInfo.value + << " stage = 0x" << std::hex << semInfo.stageMask << std::dec; + + if (semInfo.deviceIndex > 0) { + std::cout << " deviceIndex = " << semInfo.deviceIndex; + } + std::cout << std::endl; + } + + std::cout << "----------------------------" << std::endl; +} + + +} // namespace VulkanSemaphoreDump diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp index dc980474..630232e7 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp @@ -951,6 +951,8 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters VulkanVideoFrameBuffer::FrameSynchronizationInfo frameSynchronizationInfo = VulkanVideoFrameBuffer::FrameSynchronizationInfo(); frameSynchronizationInfo.hasFrameCompleteSignalFence = true; frameSynchronizationInfo.hasFrameCompleteSignalSemaphore = true; + frameSynchronizationInfo.hasFilterSignalSemaphore = m_enableDecodeComputeFilter; + frameSynchronizationInfo.hasFrameConsumerSignalSemaphore = false; frameSynchronizationInfo.syncOnFrameCompleteFence = true; frameSynchronizationInfo.syncOnFrameConsumerDoneFence = true; frameSynchronizationInfo.imageSpecsIndex = m_imageSpecsIndex; @@ -1039,14 +1041,9 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, frameSynchronizationInfo.frameCompleteFence)); VkFence frameCompleteFence = frameSynchronizationInfo.frameCompleteFence; - VkSemaphore frameCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore; - VkSemaphore frameConsumerDoneSemaphore = frameSynchronizationInfo.frameConsumerDoneSemaphore; - // By default, the frameCompleteSemaphore is the videoDecodeCompleteSemaphore. - // If the video frame filter is enabled, since it is executed after the decoder's queue, - // the filter will provide its own semaphore for the video decoder to signal, instead. - // Then the frameCompleteSemaphore will be signaled by the filter of its completion. + VkSemaphore videoDecodeCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore; + VkSemaphore consumerCompleteSemaphore = frameSynchronizationInfo.consumerCompleteSemaphore; VkFence videoDecodeCompleteFence = frameCompleteFence; - VkSemaphore videoDecodeCompleteSemaphore = frameCompleteSemaphore; VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; @@ -1136,34 +1133,43 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(filterCmdBuffer != nullptr); - // frameCompleteSemaphore is the semaphore that the filter is going to signal on completion when enabled. - // The videoDecodeCompleteSemaphore semaphore will be signaled by the decoder and then used by the filter to wait on. - + // videoDecodeCompleteFence is the fence that the filter is going to signal on completion when enabled. videoDecodeCompleteFence = filterCmdBuffer->GetFence(); - videoDecodeCompleteSemaphore = filterCmdBuffer->GetSemaphore(); } const uint32_t waitSemaphoreMaxCount = 3; - VkSemaphore waitSemaphores[waitSemaphoreMaxCount] = { VK_NULL_HANDLE }; + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{}; const uint32_t signalSemaphoreMaxCount = 3; - VkSemaphore signalSemaphores[signalSemaphoreMaxCount] = { VK_NULL_HANDLE }; + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{}; uint32_t waitSemaphoreCount = 0; - if (frameConsumerDoneSemaphore != VK_NULL_HANDLE) { - waitSemaphores[waitSemaphoreCount] = frameConsumerDoneSemaphore; + uint32_t signalSemaphoreCount = 0; + + if (consumerCompleteSemaphore != VK_NULL_HANDLE) { + + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = consumerCompleteSemaphore; + waitSemaphoreInfos[waitSemaphoreCount].value = frameSynchronizationInfo.frameConsumerDoneTimelineValue; + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; waitSemaphoreCount++; } - uint32_t signalSemaphoreCount = 0; if (videoDecodeCompleteSemaphore != VK_NULL_HANDLE) { - signalSemaphores[signalSemaphoreCount] = videoDecodeCompleteSemaphore; + + signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr; + signalSemaphoreInfos[signalSemaphoreCount].semaphore = videoDecodeCompleteSemaphore; + signalSemaphoreInfos[signalSemaphoreCount].value = frameSynchronizationInfo.decodeCompleteTimelineValue; + signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0; signalSemaphoreCount++; } - uint64_t waitTlSemaphoresValues[waitSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ }; - uint64_t signalTlSemaphoresValues[signalSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ }; - VkTimelineSemaphoreSubmitInfo timelineSemaphoreInfos = {}; if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) { if (m_dumpDecodeData) { @@ -1172,67 +1178,53 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters std::cout << "\t TL semaphore value: " << currSemValue << ", status: " << semResult << std::endl; } - waitSemaphores[waitSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore; - waitTlSemaphoresValues[waitSemaphoreCount] = m_decodePicCount - 1; // wait for the previous value to be signaled + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore; + waitSemaphoreInfos[waitSemaphoreCount].value = m_decodePicCount - 1; // wait for the previous value to be signaled + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; waitSemaphoreCount++; - signalSemaphores[signalSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore; - signalTlSemaphoresValues[signalSemaphoreCount] = m_decodePicCount; // signal the current m_decodePicCount value + signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr; + signalSemaphoreInfos[signalSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore; + signalSemaphoreInfos[signalSemaphoreCount].value = m_decodePicCount; // signal the current m_decodePicCount value + signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0; signalSemaphoreCount++; - timelineSemaphoreInfos.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - timelineSemaphoreInfos.pNext = NULL; assert(waitSemaphoreCount < waitSemaphoreMaxCount); - timelineSemaphoreInfos.waitSemaphoreValueCount = waitSemaphoreCount; - timelineSemaphoreInfos.pWaitSemaphoreValues = waitTlSemaphoresValues; assert(signalSemaphoreCount < signalSemaphoreMaxCount); - timelineSemaphoreInfos.signalSemaphoreValueCount = signalSemaphoreCount; - timelineSemaphoreInfos.pSignalSemaphoreValues = signalTlSemaphoresValues; - if (m_dumpDecodeData) { - std::cout << "\t Wait for: " << (waitSemaphoreCount ? waitTlSemaphoresValues[waitSemaphoreCount - 1] : 0) << - ", signal at " << signalTlSemaphoresValues[signalSemaphoreCount - 1] << std::endl; - } } assert(waitSemaphoreCount <= waitSemaphoreMaxCount); assert(signalSemaphoreCount <= signalSemaphoreMaxCount); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoDecodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.pNext = (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) ? &timelineSemaphoreInfos : nullptr; - submitInfo.waitSemaphoreCount = waitSemaphoreCount; - submitInfo.pWaitSemaphores = waitSemaphores; - submitInfo.pWaitDstStageMask = &videoDecodeSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = &frameDataSlot.commandBuffer; - submitInfo.signalSemaphoreCount = signalSemaphoreCount; - submitInfo.pSignalSemaphores = signalSemaphores; - - if (m_dumpDecodeData) { - if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) { - std::cout << "\t\t waitSemaphoreValueCount: " << timelineSemaphoreInfos.waitSemaphoreValueCount << std::endl; - std::cout << "\t pWaitSemaphoreValues: " << timelineSemaphoreInfos.pWaitSemaphoreValues[0] << ", " << - timelineSemaphoreInfos.pWaitSemaphoreValues[1] << ", " << - timelineSemaphoreInfos.pWaitSemaphoreValues[2] << std::endl; - std::cout << "\t\t signalSemaphoreValueCount: " << timelineSemaphoreInfos.signalSemaphoreValueCount << std::endl; - std::cout << "\t pSignalSemaphoreValues: " << timelineSemaphoreInfos.pSignalSemaphoreValues[0] << ", " << - timelineSemaphoreInfos.pSignalSemaphoreValues[1] << ", " << - timelineSemaphoreInfos.pSignalSemaphoreValues[2] << std::endl; - } - - std::cout << "\t waitSemaphoreCount: " << submitInfo.waitSemaphoreCount << std::endl; - std::cout << "\t\t pWaitSemaphores: " << submitInfo.pWaitSemaphores[0] << ", " << - submitInfo.pWaitSemaphores[1] << ", " << - submitInfo.pWaitSemaphores[2] << std::endl; - std::cout << "\t signalSemaphoreCount: " << submitInfo.signalSemaphoreCount << std::endl; - std::cout << "\t\t pSignalSemaphores: " << submitInfo.pSignalSemaphores[0] << ", " << - submitInfo.pSignalSemaphores[1] << ", " << - submitInfo.pSignalSemaphores[2] << std::endl << std::endl; - } + VkCommandBufferSubmitInfoKHR cmdBufferInfos; + cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos.pNext = nullptr; + cmdBufferInfos.commandBuffer = frameDataSlot.commandBuffer; + cmdBufferInfos.deviceMask = 0; + + // Submit info + VkSubmitInfo2KHR submitInfo { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, videoDecodeCompleteFence)); - VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE, m_currentVideoQueueIndx, - 1, &submitInfo, videoDecodeCompleteFence); + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE, + m_currentVideoQueueIndx, + 1, + &submitInfo, + videoDecodeCompleteFence, + "Video Decode", + picNumInDecodeOrder); assert(result == VK_SUCCESS); if (result != VK_SUCCESS) { return -1; @@ -1368,11 +1360,23 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters result = filterCmdBuffer->EndCommandBufferRecording(cmdBuf); assert(result == VK_SUCCESS); - if (false) std::cout << currPicIdx << " : OUT view: " << outputImageView->GetImageView() << ", signalSem: " << frameCompleteSemaphore << std::endl << std::flush; - assert(videoDecodeCompleteSemaphore != frameCompleteSemaphore); - result = m_yuvFilter->SubmitCommandBuffer(1, filterCmdBuffer->GetCommandBuffer(), - 1, &videoDecodeCompleteSemaphore, - 1, &frameCompleteSemaphore, + // Wait for the decoder to complete. + const VkPipelineStageFlags2KHR waitDecoderStageMasks = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + + // Signal the compute stage after done. + const uint64_t computeCompleteTimelineValue = frameSynchronizationInfo.filterCompleteTimelineValue; + const VkPipelineStageFlags2KHR signalComputeStageMasks = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR; + + result = m_yuvFilter->SubmitCommandBuffer(1, // commandBufferCount + filterCmdBuffer->GetCommandBuffer(), + 1, // waitSemaphoreCount + &videoDecodeCompleteSemaphore, + &frameSynchronizationInfo.decodeCompleteTimelineValue, + &waitDecoderStageMasks, + 1, // signalSemaphoreCount + &videoDecodeCompleteSemaphore, + &computeCompleteTimelineValue, + &signalComputeStageMasks, frameCompleteFence); assert(result == VK_SUCCESS); filterCmdBuffer->SetCommandBufferSubmitted(); diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp index f8e925b0..2550bd7e 100644 --- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp +++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp @@ -51,18 +51,17 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { NvPerFrameDecodeResources() : m_picDispInfo() , m_frameCompleteFence() - , m_frameCompleteSemaphore() , m_frameConsumerDoneFence() - , m_frameConsumerDoneSemaphore() + , m_frameCompleteTimelineValue() + , m_frameConsumerDoneTimelineValue() , m_imageSpecsIndex() , m_hasFrameCompleteSignalFence(false) , m_hasFrameCompleteSignalSemaphore(false) , m_hasConsummerSignalFence(false) - , m_hasConsummerSignalSemaphore(false) + , m_useConsummerSignalSemaphore(false) , m_inDecodeQueue(false) , m_inDisplayQueue(false) , m_ownedByConsummer(false) - , m_vkDevCtx() , m_imageViewState() { } @@ -75,14 +74,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { VkResult init( const VulkanDeviceContext* vkDevCtx); - void Deinit(); + void Deinit(const VulkanDeviceContext* vkDevCtx); NvPerFrameDecodeResources (const NvPerFrameDecodeResources &srcObj) = delete; NvPerFrameDecodeResources (NvPerFrameDecodeResources &&srcObj) = delete; ~NvPerFrameDecodeResources() { - Deinit(); + Deinit(nullptr); } VkSharedBaseObj& GetImageView(uint8_t imageTypeIdx) { @@ -149,14 +148,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { VkParserDecodePictureInfo m_picDispInfo; VkFence m_frameCompleteFence; - VkSemaphore m_frameCompleteSemaphore; VkFence m_frameConsumerDoneFence; - VkSemaphore m_frameConsumerDoneSemaphore; + uint64_t m_frameCompleteTimelineValue; + uint64_t m_frameConsumerDoneTimelineValue; DecodeFrameBufferIf::ImageSpecsIndex m_imageSpecsIndex; uint32_t m_hasFrameCompleteSignalFence : 1; uint32_t m_hasFrameCompleteSignalSemaphore : 1; uint32_t m_hasConsummerSignalFence : 1; - uint32_t m_hasConsummerSignalSemaphore : 1; + uint32_t m_useConsummerSignalSemaphore : 1; uint32_t m_inDecodeQueue : 1; uint32_t m_inDisplayQueue : 1; uint32_t m_ownedByConsummer : 1; @@ -171,8 +170,8 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { // The filter's pool node VkSharedBaseObj filterPoolNode; + private: - const VulkanDeviceContext* m_vkDevCtx; std::array m_imageViewState; }; @@ -180,7 +179,10 @@ class NvPerFrameDecodeImageSet { public: NvPerFrameDecodeImageSet() - : m_queueFamilyIndex((uint32_t)-1) + : m_vkDevCtx() + , m_queueFamilyIndex((uint32_t)-1) + , m_frameCompleteSemaphore() + , m_consumerCompleteSemaphore() , m_numImages(0) , m_maxNumImageTypeIdx(0) , m_perFrameDecodeResources(VulkanVideoFrameBuffer::maxImages) @@ -195,11 +197,12 @@ class NvPerFrameDecodeImageSet { const std::array& imageSpecs, uint32_t queueFamilyIndex); - void Deinit(); + void Deinit(const VulkanDeviceContext* vkDevCtx); ~NvPerFrameDecodeImageSet() { - Deinit(); + Deinit(m_vkDevCtx); + m_vkDevCtx = nullptr; } NvPerFrameDecodeResources& operator[](unsigned int index) @@ -258,8 +261,13 @@ class NvPerFrameDecodeImageSet { } private: + const VulkanDeviceContext* m_vkDevCtx; uint32_t m_queueFamilyIndex; VkVideoCoreProfile m_videoProfile; +public: + VkSemaphore m_frameCompleteSemaphore; + VkSemaphore m_consumerCompleteSemaphore; +private: uint32_t m_numImages; uint32_t m_maxNumImageTypeIdx; std::vector m_perFrameDecodeResources; @@ -372,7 +380,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { m_ownedByDisplayMask = 0; m_frameNumInDisplayOrder = 0; - m_perFrameDecodeImageSet.Deinit(); + m_perFrameDecodeImageSet.Deinit(m_vkDevCtx); if (m_queryPool != VkQueryPool()) { m_vkDevCtx->DestroyQueryPool(*m_vkDevCtx, m_queryPool, NULL); @@ -417,10 +425,9 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if ((pFrameSynchronizationInfo->syncOnFrameConsumerDoneFence == 1) && - ((m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore == 0) || - (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore == VK_NULL_HANDLE)) && - (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) && - (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) { + (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore == 0) && + (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) && + (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) { vk::WaitAndResetFence(m_vkDevCtx, *m_vkDevCtx, m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence, true, "frameConsumerDoneFence"); @@ -456,15 +463,35 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if (pFrameSynchronizationInfo->hasFrameCompleteSignalSemaphore) { - pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet[picId].m_frameCompleteSemaphore; - if (pFrameSynchronizationInfo->frameCompleteSemaphore) { + pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore; + if (pFrameSynchronizationInfo->frameCompleteSemaphore != VK_NULL_HANDLE) { + + pFrameSynchronizationInfo->decodeCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DECODE, + m_perFrameDecodeImageSet[picId].m_decodeOrder); + + if (pFrameSynchronizationInfo->hasFilterSignalSemaphore) { + pFrameSynchronizationInfo->filterCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_FILTER, + m_perFrameDecodeImageSet[picId].m_decodeOrder); + + m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->filterCompleteTimelineValue; + + } else { + + m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->decodeCompleteTimelineValue; + + } + m_perFrameDecodeImageSet[picId].m_hasFrameCompleteSignalSemaphore = true; } } - if (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore) { - pFrameSynchronizationInfo->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore; - m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = false; + if (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore) { + pFrameSynchronizationInfo->hasFrameConsumerSignalSemaphore = true; + pFrameSynchronizationInfo->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore; + pFrameSynchronizationInfo->frameConsumerDoneTimelineValue = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue; + m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = false; } pFrameSynchronizationInfo->queryPool = m_queryPool; @@ -529,14 +556,20 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if (m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore) { - pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteSemaphore; + pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore; + pDecodedFrame->frameCompleteDoneSemValue = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteTimelineValue; m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore = false; + + pDecodedFrame->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore; + pDecodedFrame->frameConsumerDoneSemValue = DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY, + m_perFrameDecodeImageSet[pictureIndex].m_displayOrder); + } else { - pDecodedFrame->frameCompleteSemaphore = VkSemaphore(); + pDecodedFrame->frameCompleteSemaphore = VK_NULL_HANDLE; } pDecodedFrame->frameConsumerDoneFence = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneFence; - pDecodedFrame->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneSemaphore; pDecodedFrame->timestamp = m_perFrameDecodeImageSet[pictureIndex].m_timestamp; pDecodedFrame->decodeOrder = m_perFrameDecodeImageSet[pictureIndex].m_decodeOrder; @@ -572,7 +605,13 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { m_perFrameDecodeImageSet[picId].Release(); m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence = pDecodedFrameRelease->hasConsummerSignalFence; - m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore; + m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore; + if (pDecodedFrameRelease->hasConsummerSignalSemaphore) { + m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue = + DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY, + pDecodedFrameRelease->displayOrder); + } } return 0; } @@ -648,7 +687,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { std::lock_guard lock(m_displayQueueMutex); for (unsigned int resId = 0; resId < numResources; resId++) { if ((uint32_t)indexes[resId] < m_perFrameDecodeImageSet.size()) { - m_perFrameDecodeImageSet[indexes[resId]].Deinit(); + m_perFrameDecodeImageSet[indexes[resId]].Deinit(m_vkDevCtx); } } return (int32_t)m_perFrameDecodeImageSet.size(); @@ -785,8 +824,6 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe } if (!ImageExist(pImageSpec->imageTypeIdx) || m_imageViewState[pImageSpec->imageTypeIdx].recreateImage) { - assert(m_vkDevCtx != nullptr); - m_imageViewState[pImageSpec->imageTypeIdx].currentLayerLayout = pImageSpec->createInfo.initialLayout; VkSharedBaseObj imageResource; @@ -839,21 +876,13 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx) { - m_vkDevCtx = vkDevCtx; - // The fence waited on for the first frame should be signaled. const VkFenceCreateInfo fenceFrameCompleteInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT }; - VkResult result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence); + VkResult result = vkDevCtx->CreateFence(*vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence); const VkFenceCreateInfo fenceInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr }; - result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence); - assert(result == VK_SUCCESS); - - const VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr }; - result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore); - assert(result == VK_SUCCESS); - result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameConsumerDoneSemaphore); + result = vkDevCtx->CreateFence(*vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence); assert(result == VK_SUCCESS); Reset(); @@ -861,49 +890,35 @@ VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx) return result; } -void NvPerFrameDecodeResources::Deinit() +void NvPerFrameDecodeResources::Deinit(const VulkanDeviceContext* vkDevCtx) { bitstreamData = nullptr; stdPps = nullptr; stdSps = nullptr; stdVps = nullptr; - if (m_vkDevCtx == nullptr) { + if (vkDevCtx == nullptr) { assert ((m_frameCompleteFence == VK_NULL_HANDLE) && - (m_frameConsumerDoneFence == VK_NULL_HANDLE) && - (m_frameCompleteSemaphore == VK_NULL_HANDLE) && - (m_frameConsumerDoneSemaphore == VK_NULL_HANDLE)); + (m_frameConsumerDoneFence == VK_NULL_HANDLE)); return; } if (m_frameCompleteFence != VkFence()) { - m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameCompleteFence, nullptr); + vkDevCtx->DestroyFence(*vkDevCtx, m_frameCompleteFence, nullptr); m_frameCompleteFence = VkFence(); } if (m_frameConsumerDoneFence != VkFence()) { - m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameConsumerDoneFence, nullptr); + vkDevCtx->DestroyFence(*vkDevCtx, m_frameConsumerDoneFence, nullptr); m_frameConsumerDoneFence = VkFence(); } - if (m_frameCompleteSemaphore != VkSemaphore()) { - m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameCompleteSemaphore, nullptr); - m_frameCompleteSemaphore = VkSemaphore(); - } - - if (m_frameConsumerDoneSemaphore != VkSemaphore()) { - m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameConsumerDoneSemaphore, nullptr); - m_frameConsumerDoneSemaphore = VkSemaphore(); - } - for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) { m_imageViewState[imageTypeIdx].view = nullptr; m_imageViewState[imageTypeIdx].singleLevelView = nullptr; } - m_vkDevCtx = nullptr; - Reset(); } @@ -919,6 +934,8 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, return -1; } + m_vkDevCtx = vkDevCtx; + for (uint32_t imageIndex = m_numImages; imageIndex < numImages; imageIndex++) { VkResult result = m_perFrameDecodeResources[imageIndex].init(vkDevCtx); assert(result == VK_SUCCESS); @@ -927,6 +944,20 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, } } + // Create timeline semaphores instead of binary semaphores + VkSemaphoreTypeCreateInfo timelineCreateInfo = {}; + timelineCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO; + timelineCreateInfo.pNext = nullptr; + timelineCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + timelineCreateInfo.initialValue = 0ULL; + + VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo }; + VkResult result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore); + assert(result == VK_SUCCESS); + + result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_consumerCompleteSemaphore); + assert(result == VK_SUCCESS); + m_videoProfile.InitFromProfile(pDecodeProfile); m_queueFamilyIndex = queueFamilyIndex; @@ -1048,10 +1079,21 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, return (int32_t)numImages; } -void NvPerFrameDecodeImageSet::Deinit() +void NvPerFrameDecodeImageSet::Deinit(const VulkanDeviceContext* vkDevCtx) { + + if (m_frameCompleteSemaphore != VK_NULL_HANDLE) { + m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_frameCompleteSemaphore, nullptr); + m_frameCompleteSemaphore = VK_NULL_HANDLE; + } + + if (m_consumerCompleteSemaphore != VK_NULL_HANDLE) { + m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_consumerCompleteSemaphore, nullptr); + m_consumerCompleteSemaphore = VK_NULL_HANDLE; + } + for (size_t ndx = 0; ndx < m_numImages; ndx++) { - m_perFrameDecodeResources[ndx].Deinit(); + m_perFrameDecodeResources[ndx].Deinit(vkDevCtx); } for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) { diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h index 863d3a4f..e622bb7f 100644 --- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h +++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h @@ -66,14 +66,20 @@ class VulkanVideoFrameBuffer : public IVulkanVideoFrameBufferParserCb { struct FrameSynchronizationInfo { VkFence frameCompleteFence; VkSemaphore frameCompleteSemaphore; + VkSemaphore consumerCompleteSemaphore; VkFence frameConsumerDoneFence; - VkSemaphore frameConsumerDoneSemaphore; + uint64_t frameConsumerDoneTimelineValue; + uint64_t decodeCompleteTimelineValue; + uint64_t filterCompleteTimelineValue; VkQueryPool queryPool; uint32_t startQueryId; uint32_t numQueries; DecodeFrameBufferIf::ImageSpecsIndex imageSpecsIndex; uint32_t hasFrameCompleteSignalFence : 1; + uint32_t hasFrameConsumerSignalSemaphore : 1; uint32_t hasFrameCompleteSignalSemaphore : 1; + // post processing filter + uint32_t hasFilterSignalSemaphore : 1; uint32_t syncOnFrameCompleteFence : 1; uint32_t syncOnFrameConsumerDoneFence : 1; }; diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp index 37b046a6..259260f5 100644 --- a/vk_video_encoder/demos/vk-video-enc/Main.cpp +++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp @@ -53,6 +53,7 @@ int main(int argc, char** argv) VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp index 29e2a36d..e4f71cd8 100644 --- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp +++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp @@ -441,22 +441,36 @@ VkResult VkVideoEncoder::SubmitStagedQpMap(VkSharedBaseObjqpMapCmdBuffer->GetCommandBuffer(); VkSemaphore frameCompleteSemaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore(); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.waitSemaphoreCount = 0; - submitInfo.pWaitSemaphores = nullptr; - submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = pCmdBuf; - submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr; - submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR }; + cmdBufferInfo.commandBuffer = *pCmdBuf; + cmdBufferInfo.deviceMask = 0; + + VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + signalSemaphoreInfo.semaphore = frameCompleteSemaphore; + signalSemaphoreInfo.value = 0; // Binary semaphore + signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete + signalSemaphoreInfo.deviceIndex = 0; + + VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = 0; + submitInfo.pWaitSemaphoreInfos = nullptr; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfo; + submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr; VkFence queueCompleteFence = encodeFrameInfo->qpMapCmdBuffer->GetFence(); assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence)); + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(((m_vkDevCtx->GetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ? - VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER, - 0, 1, &submitInfo, - queueCompleteFence); + VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER, + 0, // queueIndex + 1, // submitCount + &submitInfo, queueCompleteFence, + "Encode Staging QpMap", + m_encodeEncodeFrameNum, + m_encodeInputFrameNum); encodeFrameInfo->qpMapCmdBuffer->SetCommandBufferSubmitted(); bool syncCpuAfterStaging = false; @@ -475,15 +489,24 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObjinputCmdBuffer->GetCommandBuffer(); VkSemaphore frameCompleteSemaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore(); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.waitSemaphoreCount = 0; - submitInfo.pWaitSemaphores = nullptr; - submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = pCmdBuf; - submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr; - submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR }; + cmdBufferInfo.commandBuffer = *pCmdBuf; + cmdBufferInfo.deviceMask = 0; + + VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + signalSemaphoreInfo.semaphore = frameCompleteSemaphore; + signalSemaphoreInfo.value = 0; // Binary semaphore + signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete + signalSemaphoreInfo.deviceIndex = 0; + + VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = 0; + submitInfo.pWaitSemaphoreInfos = nullptr; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfo; + submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr; VkFence queueCompleteFence = encodeFrameInfo->inputCmdBuffer->GetFence(); assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence)); @@ -491,9 +514,15 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObjGetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ? VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER); + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(submitType, - 0, 1, &submitInfo, - queueCompleteFence); + 0, // queueIndex + 1, // submitCount + &submitInfo, + queueCompleteFence, + "Encode Staging Input", + m_encodeEncodeFrameNum, + m_encodeInputFrameNum); encodeFrameInfo->inputCmdBuffer->SetCommandBufferSubmitted(); bool syncCpuAfterStaging = false; @@ -1602,38 +1631,74 @@ VkResult VkVideoEncoder::SubmitVideoCodingCmds(VkSharedBaseObjencodeCmdBuffer != nullptr); + const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer(); + // The encode operation complete semaphore is not needed at this point. + VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore(); + + // Create command buffer submit info + VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR }; + cmdBufferInfo.commandBuffer = *pCmdBuf; + cmdBufferInfo.deviceMask = 0; + + + + // Create wait semaphore submit infos // If we are processing the input staging, wait for it's semaphore // to be done before processing the input frame with the encoder. - VkSemaphore inputWaitSemaphore[2] = { VK_NULL_HANDLE }; + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[2]{}; uint32_t waitSemaphoreCount = 0; if (encodeFrameInfo->inputCmdBuffer) { - inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->inputCmdBuffer->GetSemaphore(); + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore(); + waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore + // Use transfer bit since these semaphores come from transfer operations + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; + waitSemaphoreCount++; } if (encodeFrameInfo->qpMapCmdBuffer) { - inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore(); - } - - const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer(); - // The encode operation complete semaphore is not needed at this point. - VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore(); - - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoEncodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.pWaitSemaphores = (waitSemaphoreCount > 0) ? inputWaitSemaphore : nullptr; - submitInfo.waitSemaphoreCount = waitSemaphoreCount; - submitInfo.pWaitDstStageMask = &videoEncodeSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = pCmdBuf; - submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr; - submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore(); + waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore + // Use transfer bit since these semaphores come from transfer operations + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; + waitSemaphoreCount++; + } + + // Create signal semaphore submit info if needed + VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + if (frameCompleteSemaphore != VK_NULL_HANDLE) { + signalSemaphoreInfo.semaphore = frameCompleteSemaphore; + signalSemaphoreInfo.value = 0; // Binary semaphore + signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR; + signalSemaphoreInfo.deviceIndex = 0; + } + + // Create submit info + VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = (waitSemaphoreCount > 0) ? waitSemaphoreInfos : nullptr; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfo; + submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr; VkFence queueCompleteFence = encodeFrameInfo->encodeCmdBuffer->GetFence(); assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence)); - VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE, 0, - 1, &submitInfo, - queueCompleteFence); + + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE, + 0, // queueIndex + 1, // submitCount + &submitInfo, + queueCompleteFence, + "Video Encode", + m_encodeEncodeFrameNum, + m_encodeInputFrameNum); encodeFrameInfo->encodeCmdBuffer->SetCommandBufferSubmitted(); bool syncCpuAfterEncoding = false; diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp index 18831f2a..196caf3a 100644 --- a/vk_video_encoder/src/vulkan_video_encoder.cpp +++ b/vk_video_encoder/src/vulkan_video_encoder.cpp @@ -106,6 +106,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; From 9f58f6b26024eb007c38dad9bfc749817328afcf Mon Sep 17 00:00:00 2001 From: Raju Konda Date: Mon, 30 Dec 2024 04:26:15 -0800 Subject: [PATCH 6/7] decode: Add vp9 decoder support Update vp9 decode headers VK_KHR_video_decode_vp9 spec update Fix offset of frames in super frame for parsing and decoding VP9 Decode: Derive UsePrevFrameMvs VP9 Decode: Use previous loop filter values when not coded Fix self assignment of renderHeight when frame and render sizes are different resolution change fixes Handle alignment of bitstream offset for frames in super frame VP9 Decode: Use correct constants for DPB state arrays Fix output DISTINCT case for VP9 After recent changes related to codedExtent in VP9, the output distinct case was broken, since the codedExtent for the dstPictureResource is not set. In the COINCIDE case, the dstPictureResource is copied from the reference resource. Fix preparing ref_frame_sign_bias bitmask Display an existing frame when show_existing_frame is set and skip HW decode Fix parsing of profile Fix conversion of 12-bit chroma format from ffmpeg values to vulkan video. Unify display logic for show_frame and show_existing_frame. App runs fine if a frame is displayed once or not displayed. It crashes if a frame is displayed more than once (example clip: vp90-2-10-show-existing-frame.webm). Fix odd resolution frame decoding VulkanVideoDecoder: Fix VP9 include name The VP9 include file uses upper case. VP9 Decoder: use different bitstreamBuffer than the buffer used in previous frame decoding Use override for VulkanVP9Decoder Remove useless av1 includes Remove StdVideoDecodeVP9ReferenceInfo Cleanup ParseFrameHeader interface Set UsePrevFrameMvs to 0 when frame size is changed Parse VP9 10bit profile correctly in header parser Make sure to retrive the VP9 10bit profile correctly as value 2 in the parser VP9Decode: fill BitDepth in pStdColorConfig Remove BitDepth in VkParserVp9PictureData Use maximum of frame width/height and render width/height for vp9 session VP9Decoder: set codecProfile in nvsi nvsi.codecProfile was always 0 and might be used by a parser client (CTS). Enable requested video codec extension by adding it to required extension list. Enable maintenance1 extension. Add it to optional extension list. Cleanup code to get codec type to chose decode extension list earlier. Align source buffer range to the multiple of minBitstreamBufferSizeAlignment Remove the local headers --- .../include/VkVideoCore/VkVideoCoreProfile.h | 45 +- .../VkVideoCore/VulkanVideoCapabilities.h | 51 +- common/libs/VkCodecUtils/DecoderConfig.h | 5 +- .../libs/VkCodecUtils/VulkanDeviceContext.h | 33 +- .../libs/VkCodecUtils/VulkanVideoSession.cpp | 4 + vk_video_decoder/demos/vk-video-dec/Main.cpp | 106 +- .../vkvideo_parser/VulkanVideoParserIf.h | 91 +- .../NvVideoParser/include/VulkanVP9Decoder.h | 1583 +--------------- .../NvVideoParser/src/VulkanVP9Decoder.cpp | 1599 ++++++++--------- .../NvVideoParser/src/VulkanVideoDecoder.cpp | 15 +- .../libs/VkDecoderUtils/FFmpegDemuxer.cpp | 26 +- .../libs/VkVideoDecoder/VkVideoDecoder.cpp | 37 +- .../libs/VkVideoParser/VulkanVideoParser.cpp | 320 +++- vk_video_decoder/src/vulkan_video_decoder.cpp | 37 +- .../test/vulkan-video-dec/Main.cpp | 84 +- .../test/vulkan-video-simple-dec/Main.cpp | 2 + vk_video_encoder/demos/vk-video-enc/Main.cpp | 52 +- .../libs/VkVideoEncoder/VkEncoderConfig.h | 2 - vk_video_encoder/src/vulkan_video_encoder.cpp | 36 +- 19 files changed, 1415 insertions(+), 2713 deletions(-) diff --git a/common/include/VkVideoCore/VkVideoCoreProfile.h b/common/include/VkVideoCore/VkVideoCoreProfile.h index 7483d8ce..55ea56e7 100644 --- a/common/include/VkVideoCore/VkVideoCoreProfile.h +++ b/common/include/VkVideoCore/VkVideoCoreProfile.h @@ -50,7 +50,8 @@ class VkVideoCoreProfile { return (videoCodecOperations & (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); @@ -100,12 +101,26 @@ class VkVideoCoreProfile m_av1DecodeProfile = *pProfileExt; } else { // Use default ext profile parameters - m_av1DecodeProfile.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + m_av1DecodeProfile.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR; m_av1DecodeProfile.stdProfile = STD_VIDEO_AV1_PROFILE_MAIN; } m_profile.pNext = &m_av1DecodeProfile; m_av1DecodeProfile.pNext = NULL; - + } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + VkVideoDecodeVP9ProfileInfoKHR const * pProfileExt = (VkVideoDecodeVP9ProfileInfoKHR const *)pVideoProfileExt; + if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR)) { + m_profile.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + return false; + } + if (pProfileExt) { + m_vp9DecodeProfile = *pProfileExt; + } else { + // Use default ext profile parameters + m_vp9DecodeProfile.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR; + m_vp9DecodeProfile.stdProfile = STD_VIDEO_VP9_PROFILE_0; + } + m_profile.pNext = &m_vp9DecodeProfile; + m_vp9DecodeProfile.pNext = NULL; } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) { VkVideoEncodeH264ProfileInfoKHR const * pProfileExt = (VkVideoEncodeH264ProfileInfoKHR const *)pVideoProfileExt; if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR)) { @@ -205,6 +220,7 @@ class VkVideoCoreProfile VkVideoDecodeH264ProfileInfoKHR decodeH264ProfilesRequest; VkVideoDecodeH265ProfileInfoKHR decodeH265ProfilesRequest; VkVideoDecodeAV1ProfileInfoKHR decodeAV1ProfilesRequest; + VkVideoDecodeVP9ProfileInfoKHR decodeVP9ProfilesRequest; VkVideoEncodeH264ProfileInfoKHR encodeH264ProfilesRequest; VkVideoEncodeH265ProfileInfoKHR encodeH265ProfilesRequest; VkVideoEncodeAV1ProfileInfoKHR encodeAV1ProfilesRequest; @@ -243,6 +259,13 @@ class VkVideoCoreProfile STD_VIDEO_H265_PROFILE_IDC_INVALID : (StdVideoH265ProfileIdc)videoH26xProfileIdc; pVideoProfileExt = (VkBaseInStructure*)&decodeH265ProfilesRequest; + } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + decodeVP9ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR; + decodeVP9ProfilesRequest.pNext = NULL; + decodeVP9ProfilesRequest.stdProfile = (videoH26xProfileIdc == 0) ? + STD_VIDEO_VP9_PROFILE_0 : + (StdVideoVP9Profile)videoH26xProfileIdc; + pVideoProfileExt = (VkBaseInStructure*)&decodeVP9ProfilesRequest; } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) { encodeH264ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR; encodeH264ProfilesRequest.pNext = pEncodeUsageInfo; @@ -287,7 +310,9 @@ class VkVideoCoreProfile bool IsDecodeCodecType() const { return ((m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) || - (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR)); + (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) || + (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) || + (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR)); } operator bool() const @@ -340,6 +365,15 @@ class VkVideoCoreProfile } } + const VkVideoDecodeVP9ProfileInfoKHR* GetDecodeVP9Profile() const + { + if (m_vp9DecodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR) { + return &m_vp9DecodeProfile; + } else { + return NULL; + } + } + const VkVideoEncodeH264ProfileInfoKHR* GetEncodeH264Profile() const { if (m_h264EncodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR) { @@ -605,6 +639,8 @@ class VkVideoCoreProfile return "decode h.265"; case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: return "decode av1"; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + return "decode vp9"; case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: return "encode h.264"; case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: @@ -769,6 +805,7 @@ class VkVideoCoreProfile VkVideoDecodeH264ProfileInfoKHR m_h264DecodeProfile; VkVideoDecodeH265ProfileInfoKHR m_h265DecodeProfile; VkVideoDecodeAV1ProfileInfoKHR m_av1DecodeProfile; + VkVideoDecodeVP9ProfileInfoKHR m_vp9DecodeProfile; VkVideoEncodeH264ProfileInfoKHR m_h264EncodeProfile; VkVideoEncodeH265ProfileInfoKHR m_h265EncodeProfile; VkVideoEncodeAV1ProfileInfoKHR m_av1EncodeProfile; diff --git a/common/include/VkVideoCore/VulkanVideoCapabilities.h b/common/include/VkVideoCore/VulkanVideoCapabilities.h index b703298b..a2cc4af9 100644 --- a/common/include/VkVideoCore/VulkanVideoCapabilities.h +++ b/common/include/VkVideoCore/VulkanVideoCapabilities.h @@ -38,6 +38,7 @@ class VulkanVideoCapabilities VkVideoDecodeH264CapabilitiesKHR h264Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, nullptr }; VkVideoDecodeH265CapabilitiesKHR h265Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, nullptr }; VkVideoDecodeAV1CapabilitiesKHR av1Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR, nullptr }; + VkVideoDecodeVP9CapabilitiesKHR vp9Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR, nullptr }; if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { videoDecodeCapabilities.pNext = &h264Capabilities; @@ -45,6 +46,8 @@ class VulkanVideoCapabilities videoDecodeCapabilities.pNext = &h265Capabilities; } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { videoDecodeCapabilities.pNext = &av1Capabilities; + } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + videoDecodeCapabilities.pNext = &vp9Capabilities; } else { assert(!"Unsupported codec"); return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR; @@ -197,6 +200,16 @@ class VulkanVideoCapabilities } } break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + { + assert(pVideoDecodeCapabilities->pNext); + const VkVideoDecodeVP9CapabilitiesKHR* pVP9Capabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext; + assert(pVP9Capabilities->sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR); + if (pVP9Capabilities->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR) { + return VK_ERROR_INITIALIZATION_FAILED; + } + } + break; case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: { assert(pVideoEncodeCapabilities->pNext); @@ -277,6 +290,26 @@ class VulkanVideoCapabilities assert(!"Unsupported h.265 STD version"); return VK_ERROR_INCOMPATIBLE_DRIVER; } + } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + const VkVideoDecodeAV1CapabilitiesKHR* pAV1DecCapabilities = (VkVideoDecodeAV1CapabilitiesKHR*)pVideoDecodeCapabilities->pNext; + std::cout << "\t\t\t" << "maxLevelIdc: " << pAV1DecCapabilities->maxLevel << std::endl; + if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName, + VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, + sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) || + (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION)) { + assert(!"Unsupported AV1 STD version"); + return VK_ERROR_INCOMPATIBLE_DRIVER; + } + } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + const VkVideoDecodeVP9CapabilitiesKHR* pVP9DecCapabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext; + std::cout << "\t\t\t" << "maxLevelIdc: " << pVP9DecCapabilities->maxLevel << std::endl; + if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName, + VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, + sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) || + (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) { + assert(!"Unsupported VP9 STD version"); + return VK_ERROR_INCOMPATIBLE_DRIVER; + } } else { assert(!"Unsupported codec"); } @@ -354,8 +387,12 @@ class VulkanVideoCapabilities int32_t* pVideoQueueFamily, VkQueueFlags queueFlagsRequired = ( VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR), VkVideoCodecOperationFlagsKHR videoCodeOperations = - ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | + ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)) { std::vector queues; @@ -429,6 +466,16 @@ class VulkanVideoCapabilities &videoDecodeCapabilities); } + static VkResult GetDecodeVP9Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t, + const VkVideoProfileInfoKHR& videoProfile, + VkVideoCapabilitiesKHR &videoDecodeCapabilities) + { + videoDecodeCapabilities.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + return vkDevCtx->GetPhysicalDeviceVideoCapabilitiesKHR(vkDevCtx->getPhysicalDevice(), + &videoProfile, + &videoDecodeCapabilities); + } + static VkResult GetEncodeH264Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t, const VkVideoProfileInfoKHR& videoProfile, VkVideoCapabilitiesKHR &videoEncodeCapabilities, diff --git a/common/libs/VkCodecUtils/DecoderConfig.h b/common/libs/VkCodecUtils/DecoderConfig.h index 4d06a1d5..b0f14a59 100644 --- a/common/libs/VkCodecUtils/DecoderConfig.h +++ b/common/libs/VkCodecUtils/DecoderConfig.h @@ -75,7 +75,6 @@ struct DecoderConfig { directMode = false; enableHwLoadBalancing = false; selectVideoWithComputeQueue = false; - enableVideoEncoder = false; outputy4m = false; outputcrcPerFrame = false; outputcrc = false; @@ -137,6 +136,9 @@ struct DecoderConfig { } else if (strcmp(args[0], "av1") == 0) { forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR; return true; + } else if ((strcmp(args[0], "vp9") == 0)) { + forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; + return true; } else { std::cerr << "Invalid codec \"" << args[0] << "\"" << std::endl; return false; @@ -470,7 +472,6 @@ struct DecoderConfig { uint32_t noPresent : 1; uint32_t enableHwLoadBalancing : 1; uint32_t selectVideoWithComputeQueue : 1; - uint32_t enableVideoEncoder : 1; uint32_t outputy4m : 1; uint32_t outputcrc : 1; uint32_t outputcrcPerFrame : 1; diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.h b/common/libs/VkCodecUtils/VulkanDeviceContext.h index 97325f1d..6e83e33f 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceContext.h +++ b/common/libs/VkCodecUtils/VulkanDeviceContext.h @@ -51,6 +51,21 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { MAX_QUEUE_FAMILIES = 6, // Gfx, Present, Compute, Transfer, Decode, Encode }; + static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_DECODE = + VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; + + static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ENCODE = + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR; + + static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ALL = + VIDEO_CODEC_OPERATIONS_DECODE | + VIDEO_CODEC_OPERATIONS_ENCODE; + VulkanDeviceContext(); VkInstance getInstance() const { @@ -230,6 +245,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { VkResult InitVulkanDecoderDevice(const char * pAppName, VkInstance vkInstance = VK_NULL_HANDLE, + VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL, bool enableWsi = false, bool enableWsiDirectMode = false, bool enableValidation = false, @@ -243,6 +259,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { VkResult AddReqInstanceExtension(const char* requiredInstanceExtension, bool verbose = false); VkResult CheckAllInstanceExtensions(bool verbose = false); VkResult AddReqDeviceExtensions(const char* const* requiredDeviceExtensions, bool verbose = false); + VkResult AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose = false); VkResult AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose = false); bool HasAllDeviceExtensions(VkPhysicalDevice physDevice, const char* printMissingDeviceExt = nullptr); @@ -260,26 +277,16 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { const VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_TRANSFER_BIT, const VkVideoCodecOperationFlagsKHR requestVideoDecodeQueueOperations = - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), + VIDEO_CODEC_OPERATIONS_DECODE, const VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_TRANSFER_BIT, const VkVideoCodecOperationFlagsKHR requestVideoEncodeQueueOperations = - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR), + VIDEO_CODEC_OPERATIONS_ENCODE, VkPhysicalDevice vkPhysicalDevice = VK_NULL_HANDLE); VkResult CreateVulkanDevice(int32_t numDecodeQueues = 1, int32_t numEncodeQueues = 0, - VkVideoCodecOperationFlagsKHR videoCodecs = - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) | - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR), + VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL, bool createTransferQueue = false, bool createGraphicsQueue = false, bool createPresentQueue = false, diff --git a/common/libs/VkCodecUtils/VulkanVideoSession.cpp b/common/libs/VkCodecUtils/VulkanVideoSession.cpp index 021ec538..3a8935d7 100644 --- a/common/libs/VkCodecUtils/VulkanVideoSession.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoSession.cpp @@ -39,6 +39,7 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx, static const VkExtensionProperties h264DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION }; static const VkExtensionProperties h265DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION }; static const VkExtensionProperties av1DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION }; + static const VkExtensionProperties vp9DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION }; static const VkExtensionProperties h264EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION }; static const VkExtensionProperties h265EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION }; static const VkExtensionProperties av1EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_SPEC_VERSION }; @@ -63,6 +64,9 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx, case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: createInfo.pStdHeaderVersion = &av1DecodeStdExtensionVersion; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + createInfo.pStdHeaderVersion = &vp9DecodeStdExtensionVersion; + break; case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: createInfo.pStdHeaderVersion = &h264EncodeStdExtensionVersion; break; diff --git a/vk_video_decoder/demos/vk-video-dec/Main.cpp b/vk_video_decoder/demos/vk-video-dec/Main.cpp index 8579362e..3e499c32 100644 --- a/vk_video_decoder/demos/vk-video-dec/Main.cpp +++ b/vk_video_decoder/demos/vk-video-dec/Main.cpp @@ -28,14 +28,33 @@ #include "VkShell/Shell.h" #include "VkCodecUtils/VkVideoFrameOutput.h" -int main(int argc, const char **argv) { +int main(int argc, const char **argv) +{ DecoderConfig decoderConfig(argv[0]); decoderConfig.ParseArgs(argc, argv); + VkSharedBaseObj videoStreamDemuxer; + VkResult result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), + decoderConfig.forceParserType, + decoderConfig.enableStreamDemuxing, + decoderConfig.initialWidth, + decoderConfig.initialHeight, + decoderConfig.initialBitdepth, + videoStreamDemuxer); + if (result != VK_SUCCESS) { + assert(!"Can't initialize the VideoStreamDemuxer!"); + return -1; + } + + VkVideoCodecOperationFlagsKHR videoCodecOperation = (decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR) ? + decoderConfig.forceParserType : + videoStreamDemuxer->GetVideoCodec(); + VulkanDeviceContext vkDevCtxt; - VkResult result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(), + result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(), VK_NULL_HANDLE, + videoCodecOperation, !decoderConfig.noPresent, decoderConfig.directMode, decoderConfig.validate, @@ -54,16 +73,8 @@ int main(int argc, const char **argv) { VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR; - VkQueueFlags requestVideoEncodeQueueMask = 0; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - } - if (decoderConfig.selectVideoWithComputeQueue) { requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -71,17 +82,6 @@ int main(int argc, const char **argv) { requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT; } - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs | - (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR); - if (!decoderConfig.noPresent) { VkSharedBaseObj displayShell; @@ -98,17 +98,12 @@ int main(int argc, const char **argv) { result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoDecodeQueueMask), displayShell, requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), - requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + videoCodecOperation, + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); @@ -117,30 +112,15 @@ int main(int argc, const char **argv) { assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(), vkDevCtxt.GetPresentQueueFamilyIdx())); - vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues - videoCodecs, - false, // createTransferQueue - true, // createGraphicsQueue - true, // createDisplayQueue + vkDevCtxt.CreateVulkanDevice(numDecodeQueues, // numDecodeQueues + 0, // num encode queues + videoCodecOperation, // videoCodecs + false, // createTransferQueue + true, // createGraphicsQueue + true, // createDisplayQueue requestVideoComputeQueueMask != 0 // createComputeQueue ); - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } - VkSharedBaseObj vulkanVideoProcessor; result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor); if (result != VK_SUCCESS) { @@ -176,8 +156,7 @@ int main(int argc, const char **argv) { result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_TRANSFER_BIT | requestVideoDecodeQueueMask | - requestVideoComputeQueueMask | - requestVideoEncodeQueueMask), + requestVideoComputeQueueMask), nullptr, requestVideoDecodeQueueMask); if (result != VK_SUCCESS) { @@ -187,9 +166,9 @@ int main(int argc, const char **argv) { } - result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - 0, // num encode queues - videoCodecs, + result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues, // numDecodeQueues + 0, // num encode queues + videoCodecOperation, // videoCodecs // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. @@ -204,21 +183,6 @@ int main(int argc, const char **argv) { return -1; } - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } - VkSharedBaseObj vulkanVideoProcessor; result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor); if (result != VK_SUCCESS) { diff --git a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h index 21ad0fed..d5141c1b 100644 --- a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h +++ b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h @@ -269,61 +269,6 @@ typedef struct VkParserHevcPictureData { } VkParserHevcPictureData; -typedef struct VkParserVp9PictureData { - uint32_t width; - uint32_t height; - - // Frame Indexes - VkPicIf* pLastRef; - VkPicIf* pGoldenRef; - VkPicIf* pAltRef; - - uint32_t keyFrame; - uint32_t version; - uint32_t showFrame; - uint32_t errorResilient; - uint32_t bit_depth_minus8; - uint32_t colorSpace; - uint32_t subsamplingX; - uint32_t subsamplingY; - uint32_t activeRefIdx[3]; - uint32_t intraOnly; - uint32_t resetFrameContext; - uint32_t frameParallelDecoding; - uint32_t refreshFrameFlags; - uint8_t refFrameSignBias[4]; - uint32_t frameContextIdx; - uint32_t allow_high_precision_mv; - uint32_t mcomp_filter_type; - uint32_t loopFilterLevel; - uint32_t loopFilterSharpness; - uint32_t log2_tile_columns; - uint32_t log2_tile_rows; - int32_t mbRefLfDelta[4]; - int32_t mbModeLfDelta[2]; - int32_t segmentMapTemporalUpdate; - uint8_t segmentFeatureEnable[8][4]; - uint8_t mb_segment_tree_probs[7]; - uint8_t segment_pred_probs[3]; - int16_t segmentFeatureData[8][4]; - uint32_t scaledWidth; - uint32_t scaledHeight; - uint32_t scalingActive; - uint32_t segmentEnabled; - uint32_t prevIsKeyFrame; - uint32_t PrevShowFrame; - uint32_t modeRefLfEnabled; - int32_t qpYAc; - int32_t qpYDc; - int32_t qpChDc; - int32_t qpChAc; - uint32_t segmentMapUpdate; - uint32_t segmentFeatureMode; - uint32_t refreshEntropyProbs; - uint32_t frameTagSize; - uint32_t offsetToDctParts; -} VkParserVp9PictureData; - struct VkParserAv1PictureData { // The picture info structure is mostly pointing at other // structures defining the coding tool parameters. Those @@ -373,6 +318,42 @@ struct VkParserAv1PictureData { uint32_t frame_height; }; +typedef struct VkParserVp9PictureData { + + StdVideoDecodeVP9PictureInfo stdPictureInfo; + StdVideoVP9ColorConfig stdColorConfig; + StdVideoVP9LoopFilter stdLoopFilter; + StdVideoVP9Segmentation stdSegmentation; + + // frame dimentions + uint32_t FrameWidth, FrameHeight; + uint32_t MiCols, MiRows; + uint32_t Sb64Cols, Sb64Rows; + uint32_t renderWidth, renderHeight; + + // display details + uint8_t frame_to_show_map_idx; + bool show_existing_frame; + + // references + uint8_t ref_frame_idx[STD_VIDEO_VP9_REFS_PER_FRAME]; + uint8_t pic_idx[STD_VIDEO_VP9_NUM_REF_FRAMES]; + VkPicIf* pLastRef; + VkPicIf* pGoldenRef; + VkPicIf* pAltRef; + + // other derived parameters + bool FrameIsIntra; + uint8_t ChromaFormat; + uint32_t numTiles; + uint32_t compressedHeaderSize; + + // bitstream divisons + uint32_t uncompressedHeaderOffset; + uint32_t compressedHeaderOffset; + uint32_t tilesOffset; +} VkParserVp9PictureData; + typedef struct VkParserPictureData { int32_t PicWidthInMbs; // Coded Frame Size int32_t FrameHeightInMbs; // Coded Frame Height diff --git a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h index 142f7db8..503f5827 100644 --- a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h +++ b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h @@ -22,31 +22,36 @@ #include "VulkanVideoDecoder.h" -typedef enum { - EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR, - SWITCHABLE /* should be the last one */ -} INTERPOLATIONFILTERTYPE; - -typedef enum { - //NONE = -1, - INTRA_FRAME = 0, - LAST_FRAME = 1, - GOLDEN_FRAME = 2, - ALTREF_FRAME = 3, - VP9_MAX_REF_FRAMES = 4 -}MV_REFERENCE_FRAME; - -typedef enum { - ONLY_4X4 = 0, - ALLOW_8X8 = 1, - ALLOW_16X16 = 2, - ALLOW_32X32 = 3, - TX_MODE_SELECT = 4, - NB_TXFM_MODES = 5, -} TXFM_MODE; +#define VP9_FRAME_MARKER 2 +#define VP9_FRAME_SYNC_CODE 0x498342 +#define VP9_MAX_PRBABILITY 255 +#define VP9_MIN_TILE_WIDTH_B64 4 +#define VP9_MAX_TILE_WIDTH_B64 64 +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) +#define ALIGN_POWER_OF_TWO(value, n) (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +#define VP9_BUFFER_POOL_MAX_SIZE 10 +#define VP9_MAX_NUM_SPATIAL_LAYERS 4 + +#define VP9_CHECK_FRAME_MARKER { \ + if (u(2) != VP9_FRAME_MARKER) { \ + assert(!"Invalid frame marker");\ + return false; \ + } \ +} + +#define VP9_CHECK_ZERO_BIT { \ + if (u(1) != 0) { \ + assert("!Invalid syntax"); \ + return false; \ + } \ +} + +#define VP9_CHECK_FRAME_SYNC_CODE { \ + if (u(24) != VP9_FRAME_SYNC_CODE) { \ + assert("!Invalid frame sync code"); \ + } \ +} // Segment level features. typedef enum { @@ -57,1492 +62,78 @@ typedef enum { SEG_LVL_MAX = 4 // Number of MB level features supported } SEG_LVL_FEATURES; -typedef enum { - SINGLE_PREDICTION_ONLY = 0, - COMP_PREDICTION_ONLY = 1, - HYBRID_PREDICTION = 2, - NB_PREDICTION_TYPES = 3, -} COMPPREDMODE_TYPE; - -/* Symbols for coding which components are zero jointly */ -typedef enum { - MV_JOINT_ZERO = 0, /* Zero vector */ - MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ - MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ - MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ -} MV_JOINT_TYPE; - -/* Symbols for coding magnitude class of nonzero components */ -typedef enum { - MV_CLASS_0 = 0, /* (0, 2] integer pel */ - MV_CLASS_1 = 1, /* (2, 4] integer pel */ - MV_CLASS_2 = 2, /* (4, 8] integer pel */ - MV_CLASS_3 = 3, /* (8, 16] integer pel */ - MV_CLASS_4 = 4, /* (16, 32] integer pel */ - MV_CLASS_5 = 5, /* (32, 64] integer pel */ - MV_CLASS_6 = 6, /* (64, 128] integer pel */ - MV_CLASS_7 = 7, /* (128, 256] integer pel */ - MV_CLASS_8 = 8, /* (256, 512] integer pel */ - MV_CLASS_9 = 9, /* (512, 1024] integer pel */ - MV_CLASS_10 = 10, /* (1024,2048] integer pel */ -} MV_CLASS_TYPE; - -typedef enum PARTITION_TYPE { - PARTITION_NONE, - PARTITION_HORZ, - PARTITION_VERT, - PARTITION_SPLIT, - PARTITION_TYPES -} PARTITION_TYPE; - - -typedef enum -{ - DC_PRED, /* average of above and left pixels */ - V_PRED, /* vertical prediction */ - H_PRED, /* horizontal prediction */ - D45_PRED, /* Directional 45 deg prediction [anti-clockwise from 0 deg hor] */ - D135_PRED, /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */ - D117_PRED, /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */ - D153_PRED, /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */ - D27_PRED, /* Directional 22 deg prediction [anti-clockwise from 0 deg hor] */ - D63_PRED, /* Directional 67 deg prediction [anti-clockwise from 0 deg hor] */ - TM_PRED, /* Truemotion prediction */ - NEARESTMV, - NEARMV, - ZEROMV, - NEWMV, - SPLITMV, - MB_MODE_COUNT -} MB_PREDICTION_MODE; - -typedef enum { - KEY_FRAME = 0, - INTER_FRAME = 1, - NUM_FRAME_TYPES, -} FRAME_TYPE; - -// Segment level features. -typedef enum { - TX_4X4 = 0, // 4x4 dct transform - TX_8X8 = 1, // 8x8 dct transform - TX_16X16 = 2, // 16x16 dct transform - TX_32X32 = 3, // 32x32 dct transform - TX_SIZE_MAX_SB, // Number of transforms available to SBs -} TX_SIZE; - -#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) - -#define BIG_NUM 0xffff -#define MIN_TILE_WIDTH_B64 4 -#define MAX_TILE_WIDTH_B64 64 -#define MI_SIZE_LOG2 3 -#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) -#define ALIGN_POWER_OF_TWO(value, n) \ - (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) -#define VP9_MB_LVL_MAX 2 -#define VP9_MAX_MB_SEGMENTS 4 -#define VP9_MB_FEATURE_TREE_PROBS 3 -#define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 2 //for vp8 its 4 -#define ALLOWED_REFS_PER_FRAME 3 -#define NUM_REF_FRAMES 8 -#define NUM_REF_FRAMES_LG2 3 -#define NUM_FRAME_CONTEXTS_LG2 2 -#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6) -#define MIN_TILE_WIDTH 256 -#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6) -//#define MAX_TILE_WIDTH 4096 -#define MAX_MB_SEGMENTS 8 -#define MB_SEG_TREE_PROBS (MAX_MB_SEGMENTS-1) -#define MAX_PROB 255 -#define PREDICTION_PROBS 3 -#define TX_SIZE_CONTEXTS 2 -#define PARTITION_PLOFFSET 4 // number of probability models per block size -#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) -#define BLOCK_SIZE_GROUPS 4 -#define VP9_INTRA_MODES 10/* (TM_PRED + 1) */ -#define COMP_PRED_CONTEXTS 2 -/* Entropy nodes above is divided in two parts, first three probs in part1 - * and the modeled probs in part2. Part1 is padded so that tables align with - * 32 byte addresses, so there is four bytes for each table. */ -#define ENTROPY_NODES_PART1 4 -#define ENTROPY_NODES_PART2 8 -#define INTER_MODE_CONTEXTS 7 -#define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */ -#define COMP_PRED_CONTEXTS 2 -#define INTRA_INTER_CONTEXTS 4 -#define COMP_INTER_CONTEXTS 5 -#define REF_CONTEXTS 5 -#define VP9_BLOCK_TYPES 2 -#define VP9_REF_TYPES 2 // intra=0, inter=1 -#define VP9_COEF_BANDS 6 -#define VP9_PREV_COEF_CONTEXTS 6 -#define MBSKIP_CONTEXTS 3 -#define COEF_UPDATE_PROB 252 -#define VP9_PROB_HALF 128 -#define VP9_NMV_UPDATE_PROB 252 -#define VP9_MV_UPDATE_PRECISION 7 -#define MV_JOINTS 4 -#define MV_CLASSES 11 -#define CLASS0_BITS 1 -#define CLASS0_SIZE (1 << CLASS0_BITS) -#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) -/* The first nodes of the entropy probs are unconstrained, the rest are - * modeled with statistic distribution. */ -#define UNCONSTRAINED_NODES 3 -#define MODEL_NODES (VP9_ENTROPY_NODES - UNCONSTRAINED_NODES) -#define PIVOT_NODE 2 // which node is pivot -#define COEFPROB_MODELS 128 -#define END_OF_STREAM 0xFFFFFFFFU -#define VP9_DEF_UPDATE_PROB 252 -#define MODULUS_PARAM 13 -#define OK 0 //HANTRO_OK -#define NOK 1 //HANTRO_NOK -#define CHECK_END_OF_STREAM(s) if((s)==END_OF_STREAM) return (s) -#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV) -#define VP9_REF_LIST_SIZE 8 -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 -#define MAXQ 255 -#define LOTS_OF_BITS 0x40000000 -#define BD_VALUE_SIZE ((int32_t)sizeof(VP9_BD_VALUE)*CHAR_BIT) - -#define VP9_ENTROPY_NODES 11 -#define COEF_COUNT_SAT 24 -#define COEF_MAX_UPDATE_FACTOR 112 -#define COEF_COUNT_SAT_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_KEY 112 -#define COEF_COUNT_SAT_AFTER_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 -#define MODE_COUNT_SAT 20 -#define MODE_MAX_UPDATE_FACTOR 128 -#define MAX_PROBS 32 -#define MVREF_COUNT_SAT 20 -#define MVREF_MAX_UPDATE_FACTOR 128 -#define MV_COUNT_SAT 20 -#define MV_MAX_UPDATE_FACTOR 128 - -/* Coefficient token alphabet */ - -#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ -#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ -#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ -#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ -#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ -#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ -#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ -#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ -#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ -#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */ -#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ -#define MAX_ENTROPY_TOKENS 12 -#define FRAME_CONTEXTS_LOG2 2 -#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) - -#define DCT_EOB_MODEL_TOKEN 3 /* EOB Extra Bits 0+0 */ - -typedef signed char vp9_tree_index; - -static const int32_t seg_feature_data_signed[SEG_LVL_MAX] = {1, 1, 0, 0}; -static const int32_t seg_feature_data_max[SEG_LVL_MAX] = {MAXQ, 63, 3, 0}; - -#define NVDEC_VP9HWPAD(x, y) unsigned char x[y] - -typedef struct { - /* last bytes of address 41 */ - unsigned char joints[3]; - unsigned char sign[2]; - /* address 42 */ - unsigned char class0[2][1]; - unsigned char fp[2][3]; - unsigned char class0_hp[2]; - unsigned char hp[2]; - unsigned char classes[2][10]; - /* address 43 */ - unsigned char class0_fp[2][2][3]; - unsigned char bits[2][10]; - -} nvdec_nmv_context; - -/* Adaptive entropy contexts, padding elements are added to have - * 256 bit aligned tables for HW access. - * Compile with TRACE_PROB_TABLES to print bases for each table. */ -typedef struct nvdec_vp9AdaptiveEntropyProbs_s -{ - /* address 32 */ - unsigned char inter_mode_prob[7][4]; - unsigned char intra_inter_prob[4]; - - /* address 33 */ - unsigned char uv_mode_prob[10][8]; - unsigned char tx8x8_prob[2][1]; - unsigned char tx16x16_prob[2][2]; - unsigned char tx32x32_prob[2][3]; - unsigned char sb_ymode_probB[4][1]; - unsigned char sb_ymode_prob[4][8]; - - /* address 37 */ - unsigned char partition_prob[2][16][4]; - - /* address 41 */ - unsigned char uv_mode_probB[10][1]; - unsigned char switchable_interp_prob[4][2]; - unsigned char comp_inter_prob[5]; - unsigned char mbskip_probs[3]; - NVDEC_VP9HWPAD(pad1, 1); - - nvdec_nmv_context nmvc; - - /* address 44 */ - unsigned char single_ref_prob[5][2]; - unsigned char comp_ref_prob[5]; - NVDEC_VP9HWPAD(pad2, 17); - - /* address 45 */ - unsigned char probCoeffs[2][2][6][6][4]; - unsigned char probCoeffs8x8[2][2][6][6][4]; - unsigned char probCoeffs16x16[2][2][6][6][4]; - unsigned char probCoeffs32x32[2][2][6][6][4]; - -} nvdec_vp9AdaptiveEntropyProbs_t; - -typedef struct nvdec_vp9EntropyProbs_s -{ - /* Default keyframe probs */ - /* Table formatted for 256b memory, probs 0to7 for all tables followed by - * probs 8toN for all tables. - * Compile with TRACE_PROB_TABLES to print bases for each table. */ - - unsigned char kf_bmode_prob[10][10][8]; - - /* Address 25 */ - unsigned char kf_bmode_probB[10][10][1]; - unsigned char ref_pred_probs[3]; - unsigned char mb_segment_tree_probs[7]; - unsigned char segment_pred_probs[3]; - unsigned char ref_scores[4]; - unsigned char prob_comppred[2]; - NVDEC_VP9HWPAD(pad1, 9); - - /* Address 29 */ - unsigned char kf_uv_mode_prob[10][8]; - unsigned char kf_uv_mode_probB[10][1]; - NVDEC_VP9HWPAD(pad2, 6); - - nvdec_vp9AdaptiveEntropyProbs_t a; /* Probs with backward adaptation */ - - -} nvdec_vp9EntropyProbs_t; - -typedef struct { - unsigned int joints[4]; - unsigned int sign[2][2]; - unsigned int classes[2][11]; - unsigned int class0[2][2]; - unsigned int bits[2][10][2]; - unsigned int class0_fp[2][2][4]; - unsigned int fp[2][4]; - unsigned int class0_hp[2][2]; - unsigned int hp[2][2]; - -} nvdec_nmv_context_counts; - -typedef struct nvdec_vp9EntropyCounts_s -{ - unsigned int inter_mode_counts[7][3][2]; - unsigned int sb_ymode_counts[4][10]; - unsigned int uv_mode_counts[10][10]; - unsigned int partition_counts[16][4]; - unsigned int switchable_interp_counts[4][3]; - unsigned int intra_inter_count[4][2]; - unsigned int comp_inter_count[5][2]; - unsigned int single_ref_count[5][2][2]; - unsigned int comp_ref_count[5][2]; - unsigned int tx32x32_count[2][4]; - unsigned int tx16x16_count[2][3]; - unsigned int tx8x8_count[2][2]; - unsigned int mbskip_count[3][2]; - - nvdec_nmv_context_counts nmvcount; - - unsigned int countCoeffs[2][2][6][6][4]; - unsigned int countCoeffs8x8[2][2][6][6][4]; - unsigned int countCoeffs16x16[2][2][6][6][4]; - unsigned int countCoeffs32x32[2][2][6][6][4]; - - unsigned int countEobs[4][2][2][6][6]; - -} nvdec_vp9EntropyCounts_t; - -// Structure required to update Forward and Backward probabilities -typedef struct _vp9_prob_update_s -{ - nvdec_vp9EntropyProbs_t *pProbTab; - nvdec_vp9EntropyCounts_t *pCtxCounters; - unsigned char keyFrame : 1; - unsigned char prevIsKeyFrame : 1; - unsigned char resolutionChange : 1; - unsigned char errorResilient : 1; - unsigned char prevShowFrame : 1; - unsigned char intraOnly : 1; - unsigned char reserved2 : 2; - char lossless; - char transform_mode; - char allow_high_precision_mv; - char mcomp_filter_type; - char comp_pred_mode; - unsigned char FrameParallelDecoding; - unsigned char RefreshEntropyProbs; - uint32_t resetFrameContext; - uint32_t frameContextIdx; - uint32_t offsetToDctParts; - uint32_t allow_comp_inter_inter; - uint32_t probsDecoded; -} vp9_prob_update_s; - -typedef uint32_t VP9_BD_VALUE; - -typedef struct { - uint32_t buffer_end; - uint32_t buffer; - int32_t value; - int32_t count; - uint32_t range; - uint32_t pos; -} vp9_reader; - -const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ -{ - -DCT_EOB_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, 6, /* 2 = ONE */ - 8, 12, /* 3 = LOW_VAL */ - -TWO_TOKEN, 10, /* 4 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 14, 16, /* 6 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ - 18, 20, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ -}; - -const vp9_tree_index vp9_coefmodel_tree[6] = { - -DCT_EOB_MODEL_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, -TWO_TOKEN, /* 2 = ONE */ -}; +typedef struct _vp9_ref_frames_s { + VkPicIf* buffer; + StdVideoVP9FrameType frame_type; + bool segmentation_enabled; +} vp9_ref_frames_s; -const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { - -0, 2, - -1, -2 -}; - -const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = { - -MV_JOINT_ZERO, 2, - -MV_JOINT_HNZVZ, 4, - -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ -}; - -const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = { - -0, -1, -}; - -const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { - -MV_CLASS_0, 2, - -MV_CLASS_1, 4, - 6, 8, - -MV_CLASS_2, -MV_CLASS_3, - 10, 12, - -MV_CLASS_4, -MV_CLASS_5, - -MV_CLASS_6, 14, - 16, 18, - -MV_CLASS_7, -MV_CLASS_8, - -MV_CLASS_9, -MV_CLASS_10, -}; - -const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = { - -0, 2, - -1, 4, - -2, -3 -}; - -static const uint32_t vp9dx_bitreader_norm[256] = +class VulkanVP9Decoder : public VulkanVideoDecoder { - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -//***************************************************************** -//vp9_entropymode.c -typedef uint8_t vp9_prob; -//typedef uint8_t vp9_tree_index; // typedef i8 vp9_tree_index -static const vp9_prob default_kf_uv_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { - { 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */, - { 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */, - { 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */, - { 120, 11, 50, 123, 163, 135, 64, 77, 103 } /* y = d45 */, - { 113, 9, 36, 155, 111, 157, 32, 44, 161 } /* y = d135 */, - { 116, 9, 55, 176, 76, 96, 37, 61, 149 } /* y = d117 */, - { 115, 9, 28, 141, 161, 167, 21, 25, 193 } /* y = d153 */, - { 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d27 */, - { 116, 12, 64, 120, 140, 125, 49, 115, 121 } /* y = d63 */, - { 102, 19, 66, 162, 182, 122, 35, 59, 128 } /* y = tm */ -}; - -static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS] - [VP9_INTRA_MODES - 1] = { - { 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */, - { 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */, - { 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */, - { 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */ -}; - -static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { - { 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */, - { 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */, - { 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */, - { 97, 5, 44, 131, 176, 139, 48, 68, 97 } /* y = d45 */, - { 83, 5, 42, 156, 111, 152, 26, 49, 152 } /* y = d135 */, - { 80, 5, 58, 178, 74, 83, 33, 62, 145 } /* y = d117 */, - { 86, 5, 32, 154, 192, 168, 14, 22, 163 } /* y = d153 */, - { 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d27 */, - { 77, 7, 64, 116, 132, 122, 37, 126, 120 } /* y = d63 */, - { 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */ -}; - -static const uint8_t vp9_default_inter_mode_prob[INTER_MODE_CONTEXTS][4] = { - {2, 173, 34, 0}, // 0 = both zero mv - {7, 145, 85, 0}, // 1 = one zero mv + one a predicted mv - {7, 166, 63, 0}, // 2 = two predicted mvs - {7, 94, 66, 0}, // 3 = one predicted/zero and one new mv - {8, 64, 46, 0}, // 4 = two new mvs - {17, 81, 31, 0}, // 5 = one intra neighbour + x - {25, 29, 30, 0}, // 6 = two intra neighbours -}; -static const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS] - [PARTITION_TYPES] = { /* 1 byte padding */ - { /* frame_type = keyframe */ - /* 8x8 -> 4x4 */ - { 158, 97, 94, 0 } /* a/l both not split */, - { 93, 24, 99, 0 } /* a split, l not split */, - { 85, 119, 44, 0 } /* l split, a not split */, - { 62, 59, 67, 0 } /* a/l both split */, - /* 16x16 -> 8x8 */ - { 149, 53, 53, 0 } /* a/l both not split */, - { 94, 20, 48, 0 } /* a split, l not split */, - { 83, 53, 24, 0 } /* l split, a not split */, - { 52, 18, 18, 0 } /* a/l both split */, - /* 32x32 -> 16x16 */ - { 150, 40, 39, 0 } /* a/l both not split */, - { 78, 12, 26, 0 } /* a split, l not split */, - { 67, 33, 11, 0 } /* l split, a not split */, - { 24, 7, 5, 0 } /* a/l both split */, - /* 64x64 -> 32x32 */ - { 174, 35, 49, 0 } /* a/l both not split */, - { 68, 11, 27, 0 } /* a split, l not split */, - { 57, 15, 9, 0 } /* l split, a not split */, - { 12, 3, 3, 0 } /* a/l both split */ - }, { /* frame_type = interframe */ - /* 8x8 -> 4x4 */ - { 199, 122, 141, 0 } /* a/l both not split */, - { 147, 63, 159, 0 } /* a split, l not split */, - { 148, 133, 118, 0 } /* l split, a not split */, - { 121, 104, 114, 0 } /* a/l both split */, - /* 16x16 -> 8x8 */ - { 174, 73, 87, 0 } /* a/l both not split */, - { 92, 41, 83, 0 } /* a split, l not split */, - { 82, 99, 50, 0 } /* l split, a not split */, - { 53, 39, 39, 0 } /* a/l both split */, - /* 32x32 -> 16x16 */ - { 177, 58, 59, 0 } /* a/l both not split */, - { 68, 26, 63, 0 } /* a split, l not split */, - { 52, 79, 25, 0 } /* l split, a not split */, - { 17, 14, 12, 0 } /* a/l both split */, - /* 64x64 -> 32x32 */ - { 222, 34, 30, 0 } /* a/l both not split */, - { 72, 16, 44, 0 } /* a split, l not split */, - { 58, 32, 12, 0 } /* l split, a not split */, - { 10, 7, 6, 0 } /* a/l both split */ - } -}; -static const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = { - -DC_PRED, 2, // 0 = DC_NODE - -TM_PRED, 4, // 1 = TM_NODE - -V_PRED, 6, // 2 = V_NODE - 8, 12, // 3 = COM_NODE - -H_PRED, 10, // 4 = H_NODE - -D135_PRED, -D117_PRED, // 5 = D135_NODE - -D45_PRED, 14, // 6 = D45_NODE - -D63_PRED, 16, // 7 = D63_NODE - -D153_PRED, -D27_PRED // 8 = D153_NODE -}; - -static const vp9_tree_index vp9_partition_tree[6] = { - -PARTITION_NONE, 2, - -PARTITION_HORZ, 4, - -PARTITION_VERT, -PARTITION_SPLIT -}; - -static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { - 9, 102, 187, 225 -}; - -static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { - 239, 183, 119, 96, 41 -}; - -static const vp9_prob default_comp_ref_p[REF_CONTEXTS] = { - 50, 126, 123, 221, 226 -}; - -static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = { - { 33, 16 }, - { 77, 74 }, - { 142, 142 }, - { 172, 170 }, - { 238, 247 } -}; - -static const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] - [VP9_SWITCHABLE_FILTERS-1] = { - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, -}; -static const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS] - [TX_SIZE_MAX_SB - 1] = { - { 3, 136, 37, }, - { 5, 52, 13, }, -}; -static const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS] - [TX_SIZE_MAX_SB - 2] = { - { 20, 152, }, - { 15, 101, }, -}; -static const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS] - [TX_SIZE_MAX_SB - 3] = { - { 100, }, - { 66, }, -}; -static const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = { //its C0..shud be f8?? - 192, 128, 64 -}; - -static const nvdec_nmv_context vp9_default_nmv_context = { - {32, 64, 96}, /* joints */ - {128, 128}, /* sign */ - {{216},{208}}, /* class0 */ - {{64, 96, 64},{64, 96, 64}}, /* fp */ - {160,160}, /* class0_hp bit */ - {128,128}, /* hp */ - {{224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}}, /* class */ - {{{128, 128, 64}, {96, 112, 64}}, - {{128, 128, 64}, {96, 112, 64}}}, /* class0_fp */ - {{136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}}, /* bits */ -}; +protected: + VkParserVp9PictureData m_PicData; -static const int32_t vp9_seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; -static const int32_t vp9_seg_feature_data_max[SEG_LVL_MAX] = { 255, 63, 3, 0 }; -typedef uint8_t vp9_coeff_probs[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES]; + VkPicIf* m_pCurrPic; + VkPicIf* m_pOutFrame[VP9_MAX_NUM_SPATIAL_LAYERS]; -static const vp9_coeff_probs default_coef_probs_4x4[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 195, 29, 183 }, - { 84, 49, 136 }, - { 8, 42, 71 } - }, { /* Coeff Band 1 */ - { 31, 107, 169 }, - { 35, 99, 159 }, - { 17, 82, 140 }, - { 8, 66, 114 }, - { 2, 44, 76 }, - { 1, 19, 32 } - }, { /* Coeff Band 2 */ - { 40, 132, 201 }, - { 29, 114, 187 }, - { 13, 91, 157 }, - { 7, 75, 127 }, - { 3, 58, 95 }, - { 1, 28, 47 } - }, { /* Coeff Band 3 */ - { 69, 142, 221 }, - { 42, 122, 201 }, - { 15, 91, 159 }, - { 6, 67, 121 }, - { 1, 42, 77 }, - { 1, 17, 31 } - }, { /* Coeff Band 4 */ - { 102, 148, 228 }, - { 67, 117, 204 }, - { 17, 82, 154 }, - { 6, 59, 114 }, - { 2, 39, 75 }, - { 1, 15, 29 } - }, { /* Coeff Band 5 */ - { 156, 57, 233 }, - { 119, 57, 212 }, - { 58, 48, 163 }, - { 29, 40, 124 }, - { 12, 30, 81 }, - { 3, 12, 31 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 191, 107, 226 }, - { 124, 117, 204 }, - { 25, 99, 155 } - }, { /* Coeff Band 1 */ - { 29, 148, 210 }, - { 37, 126, 194 }, - { 8, 93, 157 }, - { 2, 68, 118 }, - { 1, 39, 69 }, - { 1, 17, 33 } - }, { /* Coeff Band 2 */ - { 41, 151, 213 }, - { 27, 123, 193 }, - { 3, 82, 144 }, - { 1, 58, 105 }, - { 1, 32, 60 }, - { 1, 13, 26 } - }, { /* Coeff Band 3 */ - { 59, 159, 220 }, - { 23, 126, 198 }, - { 4, 88, 151 }, - { 1, 66, 114 }, - { 1, 38, 71 }, - { 1, 18, 34 } - }, { /* Coeff Band 4 */ - { 114, 136, 232 }, - { 51, 114, 207 }, - { 11, 83, 155 }, - { 3, 56, 105 }, - { 1, 33, 65 }, - { 1, 17, 34 } - }, { /* Coeff Band 5 */ - { 149, 65, 234 }, - { 121, 57, 215 }, - { 61, 49, 166 }, - { 28, 36, 114 }, - { 12, 25, 76 }, - { 3, 16, 42 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 214, 49, 220 }, - { 132, 63, 188 }, - { 42, 65, 137 } - }, { /* Coeff Band 1 */ - { 85, 137, 221 }, - { 104, 131, 216 }, - { 49, 111, 192 }, - { 21, 87, 155 }, - { 2, 49, 87 }, - { 1, 16, 28 } - }, { /* Coeff Band 2 */ - { 89, 163, 230 }, - { 90, 137, 220 }, - { 29, 100, 183 }, - { 10, 70, 135 }, - { 2, 42, 81 }, - { 1, 17, 33 } - }, { /* Coeff Band 3 */ - { 108, 167, 237 }, - { 55, 133, 222 }, - { 15, 97, 179 }, - { 4, 72, 135 }, - { 1, 45, 85 }, - { 1, 19, 38 } - }, { /* Coeff Band 4 */ - { 124, 146, 240 }, - { 66, 124, 224 }, - { 17, 88, 175 }, - { 4, 58, 122 }, - { 1, 36, 75 }, - { 1, 18, 37 } - }, { /* Coeff Band 5 */ - { 141, 79, 241 }, - { 126, 70, 227 }, - { 66, 58, 182 }, - { 30, 44, 136 }, - { 12, 34, 96 }, - { 2, 20, 47 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 229, 99, 249 }, - { 143, 111, 235 }, - { 46, 109, 192 } - }, { /* Coeff Band 1 */ - { 82, 158, 236 }, - { 94, 146, 224 }, - { 25, 117, 191 }, - { 9, 87, 149 }, - { 3, 56, 99 }, - { 1, 33, 57 } - }, { /* Coeff Band 2 */ - { 83, 167, 237 }, - { 68, 145, 222 }, - { 10, 103, 177 }, - { 2, 72, 131 }, - { 1, 41, 79 }, - { 1, 20, 39 } - }, { /* Coeff Band 3 */ - { 99, 167, 239 }, - { 47, 141, 224 }, - { 10, 104, 178 }, - { 2, 73, 133 }, - { 1, 44, 85 }, - { 1, 22, 47 } - }, { /* Coeff Band 4 */ - { 127, 145, 243 }, - { 71, 129, 228 }, - { 17, 93, 177 }, - { 3, 61, 124 }, - { 1, 41, 84 }, - { 1, 21, 52 } - }, { /* Coeff Band 5 */ - { 157, 78, 244 }, - { 140, 72, 231 }, - { 69, 58, 184 }, - { 31, 44, 137 }, - { 14, 38, 105 }, - { 8, 23, 61 } - } - } - } -}; -static const vp9_coeff_probs default_coef_probs_8x8[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 125, 34, 187 }, - { 52, 41, 133 }, - { 6, 31, 56 } - }, { /* Coeff Band 1 */ - { 37, 109, 153 }, - { 51, 102, 147 }, - { 23, 87, 128 }, - { 8, 67, 101 }, - { 1, 41, 63 }, - { 1, 19, 29 } - }, { /* Coeff Band 2 */ - { 31, 154, 185 }, - { 17, 127, 175 }, - { 6, 96, 145 }, - { 2, 73, 114 }, - { 1, 51, 82 }, - { 1, 28, 45 } - }, { /* Coeff Band 3 */ - { 23, 163, 200 }, - { 10, 131, 185 }, - { 2, 93, 148 }, - { 1, 67, 111 }, - { 1, 41, 69 }, - { 1, 14, 24 } - }, { /* Coeff Band 4 */ - { 29, 176, 217 }, - { 12, 145, 201 }, - { 3, 101, 156 }, - { 1, 69, 111 }, - { 1, 39, 63 }, - { 1, 14, 23 } - }, { /* Coeff Band 5 */ - { 57, 192, 233 }, - { 25, 154, 215 }, - { 6, 109, 167 }, - { 3, 78, 118 }, - { 1, 48, 69 }, - { 1, 21, 29 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 202, 105, 245 }, - { 108, 106, 216 }, - { 18, 90, 144 } - }, { /* Coeff Band 1 */ - { 33, 172, 219 }, - { 64, 149, 206 }, - { 14, 117, 177 }, - { 5, 90, 141 }, - { 2, 61, 95 }, - { 1, 37, 57 } - }, { /* Coeff Band 2 */ - { 33, 179, 220 }, - { 11, 140, 198 }, - { 1, 89, 148 }, - { 1, 60, 104 }, - { 1, 33, 57 }, - { 1, 12, 21 } - }, { /* Coeff Band 3 */ - { 30, 181, 221 }, - { 8, 141, 198 }, - { 1, 87, 145 }, - { 1, 58, 100 }, - { 1, 31, 55 }, - { 1, 12, 20 } - }, { /* Coeff Band 4 */ - { 32, 186, 224 }, - { 7, 142, 198 }, - { 1, 86, 143 }, - { 1, 58, 100 }, - { 1, 31, 55 }, - { 1, 12, 22 } - }, { /* Coeff Band 5 */ - { 57, 192, 227 }, - { 20, 143, 204 }, - { 3, 96, 154 }, - { 1, 68, 112 }, - { 1, 42, 69 }, - { 1, 19, 32 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 212, 35, 215 }, - { 113, 47, 169 }, - { 29, 48, 105 } - }, { /* Coeff Band 1 */ - { 74, 129, 203 }, - { 106, 120, 203 }, - { 49, 107, 178 }, - { 19, 84, 144 }, - { 4, 50, 84 }, - { 1, 15, 25 } - }, { /* Coeff Band 2 */ - { 71, 172, 217 }, - { 44, 141, 209 }, - { 15, 102, 173 }, - { 6, 76, 133 }, - { 2, 51, 89 }, - { 1, 24, 42 } - }, { /* Coeff Band 3 */ - { 64, 185, 231 }, - { 31, 148, 216 }, - { 8, 103, 175 }, - { 3, 74, 131 }, - { 1, 46, 81 }, - { 1, 18, 30 } - }, { /* Coeff Band 4 */ - { 65, 196, 235 }, - { 25, 157, 221 }, - { 5, 105, 174 }, - { 1, 67, 120 }, - { 1, 38, 69 }, - { 1, 15, 30 } - }, { /* Coeff Band 5 */ - { 65, 204, 238 }, - { 30, 156, 224 }, - { 7, 107, 177 }, - { 2, 70, 124 }, - { 1, 42, 73 }, - { 1, 18, 34 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 225, 86, 251 }, - { 144, 104, 235 }, - { 42, 99, 181 } - }, { /* Coeff Band 1 */ - { 85, 175, 239 }, - { 112, 165, 229 }, - { 29, 136, 200 }, - { 12, 103, 162 }, - { 6, 77, 123 }, - { 2, 53, 84 } - }, { /* Coeff Band 2 */ - { 75, 183, 239 }, - { 30, 155, 221 }, - { 3, 106, 171 }, - { 1, 74, 128 }, - { 1, 44, 76 }, - { 1, 17, 28 } - }, { /* Coeff Band 3 */ - { 73, 185, 240 }, - { 27, 159, 222 }, - { 2, 107, 172 }, - { 1, 75, 127 }, - { 1, 42, 73 }, - { 1, 17, 29 } - }, { /* Coeff Band 4 */ - { 62, 190, 238 }, - { 21, 159, 222 }, - { 2, 107, 172 }, - { 1, 72, 122 }, - { 1, 40, 71 }, - { 1, 18, 32 } - }, { /* Coeff Band 5 */ - { 61, 199, 240 }, - { 27, 161, 226 }, - { 4, 113, 180 }, - { 1, 76, 129 }, - { 1, 46, 80 }, - { 1, 23, 41 } - } - } - } -}; -static const vp9_coeff_probs default_coef_probs_16x16[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 7, 27, 153 }, - { 5, 30, 95 }, - { 1, 16, 30 } - }, { /* Coeff Band 1 */ - { 50, 75, 127 }, - { 57, 75, 124 }, - { 27, 67, 108 }, - { 10, 54, 86 }, - { 1, 33, 52 }, - { 1, 12, 18 } - }, { /* Coeff Band 2 */ - { 43, 125, 151 }, - { 26, 108, 148 }, - { 7, 83, 122 }, - { 2, 59, 89 }, - { 1, 38, 60 }, - { 1, 17, 27 } - }, { /* Coeff Band 3 */ - { 23, 144, 163 }, - { 13, 112, 154 }, - { 2, 75, 117 }, - { 1, 50, 81 }, - { 1, 31, 51 }, - { 1, 14, 23 } - }, { /* Coeff Band 4 */ - { 18, 162, 185 }, - { 6, 123, 171 }, - { 1, 78, 125 }, - { 1, 51, 86 }, - { 1, 31, 54 }, - { 1, 14, 23 } - }, { /* Coeff Band 5 */ - { 15, 199, 227 }, - { 3, 150, 204 }, - { 1, 91, 146 }, - { 1, 55, 95 }, - { 1, 30, 53 }, - { 1, 11, 20 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 19, 55, 240 }, - { 19, 59, 196 }, - { 3, 52, 105 } - }, { /* Coeff Band 1 */ - { 41, 166, 207 }, - { 104, 153, 199 }, - { 31, 123, 181 }, - { 14, 101, 152 }, - { 5, 72, 106 }, - { 1, 36, 52 } - }, { /* Coeff Band 2 */ - { 35, 176, 211 }, - { 12, 131, 190 }, - { 2, 88, 144 }, - { 1, 60, 101 }, - { 1, 36, 60 }, - { 1, 16, 28 } - }, { /* Coeff Band 3 */ - { 28, 183, 213 }, - { 8, 134, 191 }, - { 1, 86, 142 }, - { 1, 56, 96 }, - { 1, 30, 53 }, - { 1, 12, 20 } - }, { /* Coeff Band 4 */ - { 20, 190, 215 }, - { 4, 135, 192 }, - { 1, 84, 139 }, - { 1, 53, 91 }, - { 1, 28, 49 }, - { 1, 11, 20 } - }, { /* Coeff Band 5 */ - { 13, 196, 216 }, - { 2, 137, 192 }, - { 1, 86, 143 }, - { 1, 57, 99 }, - { 1, 32, 56 }, - { 1, 13, 24 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 211, 29, 217 }, - { 96, 47, 156 }, - { 22, 43, 87 } - }, { /* Coeff Band 1 */ - { 78, 120, 193 }, - { 111, 116, 186 }, - { 46, 102, 164 }, - { 15, 80, 128 }, - { 2, 49, 76 }, - { 1, 18, 28 } - }, { /* Coeff Band 2 */ - { 71, 161, 203 }, - { 42, 132, 192 }, - { 10, 98, 150 }, - { 3, 69, 109 }, - { 1, 44, 70 }, - { 1, 18, 29 } - }, { /* Coeff Band 3 */ - { 57, 186, 211 }, - { 30, 140, 196 }, - { 4, 93, 146 }, - { 1, 62, 102 }, - { 1, 38, 65 }, - { 1, 16, 27 } - }, { /* Coeff Band 4 */ - { 47, 199, 217 }, - { 14, 145, 196 }, - { 1, 88, 142 }, - { 1, 57, 98 }, - { 1, 36, 62 }, - { 1, 15, 26 } - }, { /* Coeff Band 5 */ - { 26, 219, 229 }, - { 5, 155, 207 }, - { 1, 94, 151 }, - { 1, 60, 104 }, - { 1, 36, 62 }, - { 1, 16, 28 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 233, 29, 248 }, - { 146, 47, 220 }, - { 43, 52, 140 } - }, { /* Coeff Band 1 */ - { 100, 163, 232 }, - { 179, 161, 222 }, - { 63, 142, 204 }, - { 37, 113, 174 }, - { 26, 89, 137 }, - { 18, 68, 97 } - }, { /* Coeff Band 2 */ - { 85, 181, 230 }, - { 32, 146, 209 }, - { 7, 100, 164 }, - { 3, 71, 121 }, - { 1, 45, 77 }, - { 1, 18, 30 } - }, { /* Coeff Band 3 */ - { 65, 187, 230 }, - { 20, 148, 207 }, - { 2, 97, 159 }, - { 1, 68, 116 }, - { 1, 40, 70 }, - { 1, 14, 29 } - }, { /* Coeff Band 4 */ - { 40, 194, 227 }, - { 8, 147, 204 }, - { 1, 94, 155 }, - { 1, 65, 112 }, - { 1, 39, 66 }, - { 1, 14, 26 } - }, { /* Coeff Band 5 */ - { 16, 208, 228 }, - { 3, 151, 207 }, - { 1, 98, 160 }, - { 1, 67, 117 }, - { 1, 41, 74 }, - { 1, 17, 31 } - } - } - } -}; -static const vp9_coeff_probs default_coef_probs_32x32[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 17, 38, 140 }, - { 7, 34, 80 }, - { 1, 17, 29 } - }, { /* Coeff Band 1 */ - { 37, 75, 128 }, - { 41, 76, 128 }, - { 26, 66, 116 }, - { 12, 52, 94 }, - { 2, 32, 55 }, - { 1, 10, 16 } - }, { /* Coeff Band 2 */ - { 50, 127, 154 }, - { 37, 109, 152 }, - { 16, 82, 121 }, - { 5, 59, 85 }, - { 1, 35, 54 }, - { 1, 13, 20 } - }, { /* Coeff Band 3 */ - { 40, 142, 167 }, - { 17, 110, 157 }, - { 2, 71, 112 }, - { 1, 44, 72 }, - { 1, 27, 45 }, - { 1, 11, 17 } - }, { /* Coeff Band 4 */ - { 30, 175, 188 }, - { 9, 124, 169 }, - { 1, 74, 116 }, - { 1, 48, 78 }, - { 1, 30, 49 }, - { 1, 11, 18 } - }, { /* Coeff Band 5 */ - { 10, 222, 223 }, - { 2, 150, 194 }, - { 1, 83, 128 }, - { 1, 48, 79 }, - { 1, 27, 45 }, - { 1, 11, 17 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 36, 41, 235 }, - { 29, 36, 193 }, - { 10, 27, 111 } - }, { /* Coeff Band 1 */ - { 85, 165, 222 }, - { 177, 162, 215 }, - { 110, 135, 195 }, - { 57, 113, 168 }, - { 23, 83, 120 }, - { 10, 49, 61 } - }, { /* Coeff Band 2 */ - { 85, 190, 223 }, - { 36, 139, 200 }, - { 5, 90, 146 }, - { 1, 60, 103 }, - { 1, 38, 65 }, - { 1, 18, 30 } - }, { /* Coeff Band 3 */ - { 72, 202, 223 }, - { 23, 141, 199 }, - { 2, 86, 140 }, - { 1, 56, 97 }, - { 1, 36, 61 }, - { 1, 16, 27 } - }, { /* Coeff Band 4 */ - { 55, 218, 225 }, - { 13, 145, 200 }, - { 1, 86, 141 }, - { 1, 57, 99 }, - { 1, 35, 61 }, - { 1, 13, 22 } - }, { /* Coeff Band 5 */ - { 15, 235, 212 }, - { 1, 132, 184 }, - { 1, 84, 139 }, - { 1, 57, 97 }, - { 1, 34, 56 }, - { 1, 14, 23 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 181, 21, 201 }, - { 61, 37, 123 }, - { 10, 38, 71 } - }, { /* Coeff Band 1 */ - { 47, 106, 172 }, - { 95, 104, 173 }, - { 42, 93, 159 }, - { 18, 77, 131 }, - { 4, 50, 81 }, - { 1, 17, 23 } - }, { /* Coeff Band 2 */ - { 62, 147, 199 }, - { 44, 130, 189 }, - { 28, 102, 154 }, - { 18, 75, 115 }, - { 2, 44, 65 }, - { 1, 12, 19 } - }, { /* Coeff Band 3 */ - { 55, 153, 210 }, - { 24, 130, 194 }, - { 3, 93, 146 }, - { 1, 61, 97 }, - { 1, 31, 50 }, - { 1, 10, 16 } - }, { /* Coeff Band 4 */ - { 49, 186, 223 }, - { 17, 148, 204 }, - { 1, 96, 142 }, - { 1, 53, 83 }, - { 1, 26, 44 }, - { 1, 11, 17 } - }, { /* Coeff Band 5 */ - { 13, 217, 212 }, - { 2, 136, 180 }, - { 1, 78, 124 }, - { 1, 50, 83 }, - { 1, 29, 49 }, - { 1, 14, 23 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 197, 13, 247 }, - { 82, 17, 222 }, - { 25, 17, 162 } - }, { /* Coeff Band 1 */ - { 126, 186, 247 }, - { 234, 191, 243 }, - { 176, 177, 234 }, - { 104, 158, 220 }, - { 66, 128, 186 }, - { 55, 90, 137 } - }, { /* Coeff Band 2 */ - { 111, 197, 242 }, - { 46, 158, 219 }, - { 9, 104, 171 }, - { 2, 65, 125 }, - { 1, 44, 80 }, - { 1, 17, 91 } - }, { /* Coeff Band 3 */ - { 104, 208, 245 }, - { 39, 168, 224 }, - { 3, 109, 162 }, - { 1, 79, 124 }, - { 1, 50, 102 }, - { 1, 43, 102 } - }, { /* Coeff Band 4 */ - { 84, 220, 246 }, - { 31, 177, 231 }, - { 2, 115, 180 }, - { 1, 79, 134 }, - { 1, 55, 77 }, - { 1, 60, 79 } - }, { /* Coeff Band 5 */ - { 43, 243, 240 }, - { 8, 180, 217 }, - { 1, 115, 166 }, - { 1, 84, 121 }, - { 1, 51, 67 }, - { 1, 16, 6 } - } - } - } -}; + int m_frameIdx; + int m_dataSize; + int m_frameSize; + bool m_frameSizeChanged; -static const uint8_t vp9_kf_default_bmode_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES] - [VP9_INTRA_MODES-1] = { - { /* above = dc */ - { 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */, - { 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */, - { 73, 32, 19, 187, 222, 215, 46, 34, 100 } /* left = h */, - { 91, 30, 32, 116, 121, 186, 93, 86, 94 } /* left = d45 */, - { 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */, - { 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */, - { 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */, - { 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */, - { 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */, - { 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */ - }, { /* above = v */ - { 63, 36, 126, 146, 123, 158, 60, 90, 96 } /* left = dc */, - { 43, 46, 168, 134, 107, 128, 69, 142, 92 } /* left = v */, - { 44, 29, 68, 159, 201, 177, 50, 57, 77 } /* left = h */, - { 58, 38, 76, 114, 97, 172, 78, 133, 92 } /* left = d45 */, - { 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */, - { 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */, - { 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */, - { 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */, - { 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */, - { 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */ - }, { /* above = h */ - { 82, 26, 26, 171, 208, 204, 44, 32, 105 } /* left = dc */, - { 55, 44, 68, 166, 179, 192, 57, 57, 108 } /* left = v */, - { 42, 26, 11, 199, 241, 228, 23, 15, 85 } /* left = h */, - { 68, 42, 19, 131, 160, 199, 55, 52, 83 } /* left = d45 */, - { 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */, - { 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */, - { 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */, - { 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */, - { 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */, - { 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */ - }, { /* above = d45 */ - { 103, 26, 36, 129, 132, 201, 83, 80, 93 } /* left = dc */, - { 59, 38, 83, 112, 103, 162, 98, 136, 90 } /* left = v */, - { 62, 30, 23, 158, 200, 207, 59, 57, 50 } /* left = h */, - { 67, 30, 29, 84, 86, 191, 102, 91, 59 } /* left = d45 */, - { 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */, - { 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */, - { 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */, - { 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */, - { 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */, - { 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */ - }, { /* above = d135 */ - { 69, 23, 29, 128, 83, 199, 46, 44, 101 } /* left = dc */, - { 53, 40, 55, 139, 69, 183, 61, 80, 110 } /* left = v */, - { 40, 29, 19, 161, 180, 207, 43, 24, 91 } /* left = h */, - { 60, 34, 19, 105, 61, 198, 53, 64, 89 } /* left = d45 */, - { 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */, - { 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */, - { 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */, - { 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */, - { 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */, - { 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */ - }, { /* above = d117 */ - { 64, 19, 37, 156, 66, 138, 49, 95, 133 } /* left = dc */, - { 46, 27, 80, 150, 55, 124, 55, 121, 135 } /* left = v */, - { 36, 23, 27, 165, 149, 166, 54, 64, 118 } /* left = h */, - { 53, 21, 36, 131, 63, 163, 60, 109, 81 } /* left = d45 */, - { 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */, - { 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */, - { 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */, - { 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */, - { 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */, - { 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */ - }, { /* above = d153 */ - { 75, 17, 22, 136, 138, 185, 32, 34, 166 } /* left = dc */, - { 56, 39, 58, 133, 117, 173, 48, 53, 187 } /* left = v */, - { 35, 21, 12, 161, 212, 207, 20, 23, 145 } /* left = h */, - { 56, 29, 19, 117, 109, 181, 55, 68, 112 } /* left = d45 */, - { 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */, - { 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */, - { 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */, - { 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */, - { 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */, - { 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */ - }, { /* above = d27 */ - { 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */, - { 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */, - { 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */, - { 68, 36, 17, 106, 102, 206, 59, 74, 74 } /* left = d45 */, - { 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */, - { 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */, - { 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */, - { 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */, - { 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */, - { 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */ - }, { /* above = d63 */ - { 78, 23, 39, 111, 117, 170, 74, 124, 94 } /* left = dc */, - { 48, 34, 86, 101, 92, 146, 78, 179, 134 } /* left = v */, - { 47, 22, 24, 138, 187, 178, 68, 69, 59 } /* left = h */, - { 56, 25, 33, 105, 112, 187, 95, 177, 129 } /* left = d45 */, - { 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */, - { 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */, - { 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */, - { 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */, - { 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */, - { 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */ - }, { /* above = tm */ - { 65, 70, 60, 155, 159, 199, 61, 60, 81 } /* left = dc */, - { 44, 78, 115, 132, 119, 173, 71, 112, 93 } /* left = v */, - { 39, 38, 21, 184, 227, 206, 42, 32, 64 } /* left = h */, - { 58, 47, 36, 124, 137, 193, 80, 82, 78 } /* left = d45 */, - { 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */, - { 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */, - { 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */, - { 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */, - { 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */, - { 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */ - } -}; + int m_rtOrigWidth; + int m_rtOrigHeight; + bool m_pictureStarted; + bool m_bitstreamComplete; -class VulkanVP9Decoder : public VulkanVideoDecoder -{ -protected: - vp9_reader reader; - nvdec_vp9EntropyProbs_t m_EntropyLast[FRAME_CONTEXTS]; - nvdec_vp9AdaptiveEntropyProbs_t m_PrevCtx; - const unsigned char* m_pCompressedHeader; + // Parsing state for compute_image_size() side effects + int m_lastFrameWidth; + int m_lastFrameHeight; + bool m_lastShowFrame; - void vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup); - vp9_prob weighted_prob(int32_t prob1, int32_t prob2, int32_t factor); - vp9_prob clip_prob(uint32_t p); - vp9_prob get_prob(uint32_t num, uint32_t den); - vp9_prob get_binary_prob(uint32_t n0, uint32_t n1); - uint32_t convert_distribution(uint32_t i, - const vp9_tree_index * tree, - uint8_t probs[], - uint32_t branch_ct[][2], - const uint32_t num_events[], - uint32_t tok0_offset); - void vp9_tree_probs_from_distribution(const vp9_tree_index* tree, - uint8_t probs [ /* n-1 */ ], - uint32_t branch_ct [ /* n-1 */ ] [2], - const uint32_t num_events[ /* n */ ], - uint32_t tok0_offset); - void update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1], - uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS], - int32_t count_sat, int32_t update_factor); - void adaptCoefProbs(vp9_prob_update_s *pProbSetup); - int32_t update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2]); - int32_t update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2]); - void update_mode_probs(int32_t n_modes, - const vp9_tree_index *tree, uint32_t *cnt, - vp9_prob *pre_probs, vp9_prob *pre_probsB, - vp9_prob *dst_probs, vp9_prob *dst_probsB, - uint32_t tok0_offset); - void tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p, uint32_t (*ct_32x32p)[2]); - void tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p, uint32_t (*ct_16x16p)[2]); - void tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p, uint32_t (*ct_8x8p)[2]); - void adaptModeProbs(vp9_prob_update_s *pProbSetup); - void adaptModeContext(vp9_prob_update_s *pProbSetup); - uint32_t adapt_probs(uint32_t i, - const signed char* tree, - vp9_prob this_probs[], - const vp9_prob last_probs[], - const uint32_t num_events[]); - void adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2]); - void adaptNmvProbs(vp9_prob_update_s *pProbSetup); + // Last used loop filter parameters + int8_t m_loopFilterRefDeltas[STD_VIDEO_VP9_MAX_REF_FRAMES]; + int8_t m_loopFilterModeDeltas[STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS]; + vp9_ref_frames_s m_pBuffers[VP9_BUFFER_POOL_MAX_SIZE]; + protected: - void vp9_reader_fill(); - int32_t vp9_reader_init (uint32_t size); - int32_t vp9_read_bit(); - int32_t vp9_read(int32_t probability); - int32_t vp9_read_literal(int32_t bits); - uint32_t ParseCompressedVP9(); - int32_t get_unsigned_bits(uint32_t num_values); - uint32_t swGetBitsUnsignedMax( uint32_t maxValue); - vp9_prob vp9hwdReadProbDiffUpdate(uint8_t oldp); - int32_t vp9_inv_recenter_nonneg(int32_t v, int32_t m); - int32_t inv_remap_prob(int32_t v, int32_t m); - int32_t merge_index(int32_t v, int32_t n, int32_t modulus); - uint32_t BoolDecodeUniform(uint32_t n); - uint32_t vp9hwdDecodeSubExp(uint32_t k, uint32_t num_syms); - uint32_t vp9hwdDecodeCoeffUpdate(uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1]); - uint32_t vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup); - void update_nmv(vp9_prob *const p, const vp9_prob upd_p); + void UpdateFramePointers(VkPicIf* currentPicture); + bool AddBuffertoOutputQueue(VkPicIf* pDispPic); + void AddBuffertoDispQueue(VkPicIf* pDispPic); + virtual void lEndPicture(VkPicIf* pDispPic); + void EndOfStream() override; public: VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std); - void ResetProbs(vp9_prob_update_s *pProbSetup); - void GetProbs(vp9_prob_update_s *pProbSetup); - uint32_t UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header); - void UpdateBackwardProbability(vp9_prob_update_s *pProbSetup); + ~VulkanVP9Decoder(); // TODO: Need to implement these functions. - bool IsPictureBoundary(int32_t) { return true; }; - int32_t ParseNalUnit() { return NALU_UNKNOWN; }; + bool IsPictureBoundary(int32_t) override { return true; }; + int32_t ParseNalUnit() override { return NALU_UNKNOWN; }; bool DecodePicture(VkParserPictureData *) { return false; }; - void InitParser() {} - bool BeginPicture(VkParserPictureData *) { return false; } - void CreatePrivateContext() {} - void FreeContext() {} + void InitParser() override; + bool BeginPicture(VkParserPictureData *) override; + void CreatePrivateContext() override {} + void FreeContext() override {} + +private: + bool ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBtes) override; + bool ParseFrameHeader(uint32_t framesize); + bool ParseUncompressedHeader(); + bool ParseColorConfig(); + void ParseFrameAndRenderSize(); + void ParseFrameAndRenderSizeWithRefs(); + void ComputeImageSize(); + void ParseLoopFilterParams(); + void ParseQuantizationParams(); + int32_t ReadDeltaQ(); + void ParseSegmentationParams(); + uint8_t CalcMinLog2TileCols(); + uint8_t CalcMaxLog2TileCols(); + void ParseTileInfo(); + void ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t sizes[8], uint32_t* count); + }; #endif // _VP9_PROBMANAGER_H_ diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp index 99452952..701e4c07 100644 --- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp +++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp @@ -20,1044 +20,889 @@ VulkanVP9Decoder::VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std) : VulkanVideoDecoder(std) -{ - memset(&m_EntropyLast, 0, sizeof(m_EntropyLast)); - memset(&m_PrevCtx, 0, sizeof(m_PrevCtx)); - memset(&reader, 0, sizeof(vp9_reader)); - m_pCompressedHeader = NULL; -} -void VulkanVP9Decoder::vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup) -{ - uint32_t i, j; - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - { - for (j = 0; j < 8; j++) - pProbSetup->pProbTab->a.sb_ymode_prob[i][j] = default_if_y_probs[i][j]; - pProbSetup->pProbTab->a.sb_ymode_probB[i][0] = default_if_y_probs[i][8]; - } - - for (i = 0; i < VP9_INTRA_MODES; i++) - { - for (j = 0; j < 8; j++) - pProbSetup->pProbTab->kf_uv_mode_prob[i][j] = default_kf_uv_probs[i][j]; - pProbSetup->pProbTab->kf_uv_mode_probB[i][0] = default_kf_uv_probs[i][8]; - - for (j = 0; j < 8; j++) - pProbSetup->pProbTab->a.uv_mode_prob[i][j] = default_if_uv_probs[i][j]; - pProbSetup->pProbTab->a.uv_mode_probB[i][0] = default_if_uv_probs[i][8]; - } - - memcpy(pProbSetup->pProbTab->a.switchable_interp_prob, vp9_switchable_interp_prob, - sizeof(vp9_switchable_interp_prob)); - memcpy(pProbSetup->pProbTab->a.partition_prob, vp9_partition_probs, - sizeof(vp9_partition_probs)); - memcpy(pProbSetup->pProbTab->a.intra_inter_prob, default_intra_inter_p, - sizeof(default_intra_inter_p)); - memcpy(pProbSetup->pProbTab->a.comp_inter_prob, default_comp_inter_p, - sizeof(default_comp_inter_p)); - memcpy(pProbSetup->pProbTab->a.comp_ref_prob, default_comp_ref_p, - sizeof(default_comp_ref_p)); - memcpy(pProbSetup->pProbTab->a.single_ref_prob, default_single_ref_p, - sizeof(default_single_ref_p)); - memcpy(pProbSetup->pProbTab->a.tx32x32_prob, vp9_default_tx_probs_32x32p, - sizeof(vp9_default_tx_probs_32x32p)); - memcpy(pProbSetup->pProbTab->a.tx16x16_prob, vp9_default_tx_probs_16x16p, - sizeof(vp9_default_tx_probs_16x16p)); - memcpy(pProbSetup->pProbTab->a.tx8x8_prob, vp9_default_tx_probs_8x8p, - sizeof(vp9_default_tx_probs_8x8p)); - memcpy(pProbSetup->pProbTab->a.mbskip_probs, vp9_default_mbskip_probs, - sizeof(vp9_default_mbskip_probs)); - - for (i = 0; i < VP9_INTRA_MODES; i++) - { - for (j = 0; j < VP9_INTRA_MODES; j++) - { - memcpy(pProbSetup->pProbTab->kf_bmode_prob[i][j], vp9_kf_default_bmode_probs[i][j], 8); - pProbSetup->pProbTab->kf_bmode_probB[i][j][0] = vp9_kf_default_bmode_probs[i][j][8]; - } - } + , m_PicData() + , m_pCurrPic() + , m_frameIdx(-1) + , m_dataSize() + , m_frameSize() + , m_frameSizeChanged() + , m_rtOrigWidth() + , m_rtOrigHeight() + , m_pictureStarted() + , m_bitstreamComplete(true) + , m_lastFrameWidth(0) + , m_lastFrameHeight(0) + , m_lastShowFrame(false) + , m_pBuffers() { } -void VulkanVP9Decoder::ResetProbs(vp9_prob_update_s *pProbSetup) +VulkanVP9Decoder::~VulkanVP9Decoder() { - //reset segmentMap (buffers going to HWIF_SEGMENT_READ_BASE_LSB and HWIF_SEGMENT_WRITE_BASE_LSB) - - uint32_t i, j, k, l, m; - - memcpy(pProbSetup->pProbTab->a.inter_mode_prob, vp9_default_inter_mode_prob, sizeof(vp9_default_inter_mode_prob)); - vp9_init_mbmode_probs(pProbSetup); - memcpy(&pProbSetup->pProbTab->a.nmvc, &vp9_default_nmv_context, sizeof(nvdec_nmv_context)); - - /* Copy the default probs into two separate prob tables: part1 and part2. */ - - for( i = 0; i < VP9_BLOCK_TYPES; i++ ) { - for ( j = 0; j < VP9_REF_TYPES; j++ ) { - for ( k = 0; k < VP9_COEF_BANDS; k++ ) { - for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ ) { - if (l >= 3 && k == 0) - continue; - - for ( m = 0; m < UNCONSTRAINED_NODES; m++ ) { - pProbSetup->pProbTab->a.probCoeffs[i][j][k][l][m] = - default_coef_probs_4x4[i][j][k][l][m]; - pProbSetup->pProbTab->a.probCoeffs8x8[i][j][k][l][m] = - default_coef_probs_8x8[i][j][k][l][m]; - pProbSetup->pProbTab->a.probCoeffs16x16[i][j][k][l][m] = - default_coef_probs_16x16[i][j][k][l][m]; - pProbSetup->pProbTab->a.probCoeffs32x32[i][j][k][l][m] = - default_coef_probs_32x32[i][j][k][l][m]; - } - } - } - } - } - - /* Store the default probs for all saved contexts */ - if (pProbSetup->keyFrame || pProbSetup->errorResilient || pProbSetup->resetFrameContext == 3) - { - for (i = 0; i < FRAME_CONTEXTS; i++) - memcpy( &m_EntropyLast[i], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t)); - } - else if (pProbSetup->resetFrameContext == 2) - memcpy( &m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t)); } -void VulkanVP9Decoder::GetProbs(vp9_prob_update_s *pProbSetup) +void VulkanVP9Decoder::InitParser() { - memcpy(pProbSetup->pProbTab, &m_EntropyLast[pProbSetup->frameContextIdx], sizeof(m_EntropyLast[pProbSetup->frameContextIdx])); + m_bNoStartCodes = true; + m_bEmulBytesPresent = false; + m_pCurrPic = nullptr; + m_bitstreamComplete = true; + m_pictureStarted = false; + EndOfStream(); } -///////////////////////////////////////////////////////////////////////////////// - - -void VulkanVP9Decoder::vp9_reader_fill() +void VulkanVP9Decoder::EndOfStream() { - vp9_reader *r = &reader; - uint32_t buffer_end = r->buffer_end; - uint32_t buffer = r->buffer; - VP9_BD_VALUE value = r->value; - int32_t count = r->count; - int32_t shift = BD_VALUE_SIZE - 8 - (count + 8); - int32_t loop_end = 0; - const int32_t bits_left = (int32_t)((buffer_end - buffer)*CHAR_BIT); - const int32_t x = shift + CHAR_BIT - bits_left; - if (x >= 0) { - count += LOTS_OF_BITS; - loop_end = x; + if (m_pCurrPic) { + m_pCurrPic->Release(); + m_pCurrPic = nullptr; } - if (x < 0 || bits_left) - { - while (shift >= loop_end) - { - count += CHAR_BIT; - uint8_t temp = m_pCompressedHeader[r->pos++]; //u( 8); - value |= (VP9_BD_VALUE)temp << shift; - shift -= CHAR_BIT; - buffer++; + for (int i = 0; i < 8; i++) { + if (m_pBuffers[i].buffer) { + m_pBuffers[i].buffer->Release(); + m_pBuffers[i].buffer = nullptr; } } - r->buffer = buffer; - r->value = value; - r->count = count; -} - -int32_t VulkanVP9Decoder::vp9_reader_init(uint32_t size) -{ - int32_t marker_bit = 0; - vp9_reader *r = &reader; - r->buffer_end = 0 + size; - r->buffer = 0; - r->value = 0; - r->count = -8; - r->range = 255; - r->pos = 0; - - vp9_reader_fill(); - marker_bit = vp9_read_bit(); - return marker_bit != 0; } -int32_t VulkanVP9Decoder::vp9_read_bit() +bool VulkanVP9Decoder::ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBytes) { - return vp9_read( 128); -} + const uint8_t* pDataIn = (uint8_t*)pck->pByteStream; + int dataSize = (int)pck->nDataLength; -int32_t VulkanVP9Decoder::vp9_read(int32_t probability) -{ - - vp9_reader *br = &reader; - uint32_t bit = 0; - VP9_BD_VALUE value; - VP9_BD_VALUE bigsplit; - int32_t count; - uint32_t range; - uint32_t split = 1 + (((br->range - 1) * probability) >> 8); - if (br->count < 0) - vp9_reader_fill(); - value = br->value; - count = br->count; - bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8); - - range = split; - if (value >= bigsplit) - { - range = br->range - split; - value = value - bigsplit; - bit = 1; + if (pParsedBytes) { + *pParsedBytes = 0; } - uint32_t shift = vp9dx_bitreader_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - br->value = value; - br->count = count; - br->range = range; - return bit; -} - -int32_t VulkanVP9Decoder::vp9_read_literal( int32_t bits) -{ - int32_t z = 0, bit; - for (bit = bits - 1; bit >= 0; bit--) - { - z |= vp9_read_bit() << bit; + // Use different bitstreamBuffer than the previous frames bitstreamBuffer + // TODO: Make sure that the bitstreamBuffer is not in use. + VkSharedBaseObj bitstreamBuffer; + assert(m_pClient); + m_pClient->GetBitstreamBuffer(m_bitstreamDataLen, + m_bufferOffsetAlignment, m_bufferSizeAlignment, + nullptr, 0, bitstreamBuffer); + assert(bitstreamBuffer); + if (!bitstreamBuffer) { + return false; } - return z; -} -//////////////////////////////////////////////////////////////////////////////////// -//Forward Update -uint32_t VulkanVP9Decoder::UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header) -{ - nvdec_vp9EntropyProbs_t *fc = pProbSetup->pProbTab; // Frame context - - uint32_t tmp, i, j, k; + m_bitstreamDataLen = m_bitstreamData.SetBitstreamBuffer(bitstreamBuffer); + m_bitstreamData.ResetStreamMarkers(); - m_pCompressedHeader = pCompressed_Header; - m_PrevCtx = pProbSetup->pProbTab->a; - - if (vp9_reader_init(pProbSetup->offsetToDctParts) != 0) - { - return NOK; - } - - if (pProbSetup->lossless) - pProbSetup->transform_mode = ONLY_4X4; - else - { - pProbSetup->transform_mode = vp9_read_literal( 2); - if (pProbSetup->transform_mode == ALLOW_32X32) - pProbSetup->transform_mode += vp9_read_literal( 1); - if (pProbSetup->transform_mode == TX_MODE_SELECT) - { - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) - { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.tx8x8_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.tx16x16_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.tx32x32_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } - } + if (m_bitstreamData.GetBitstreamBuffer() == nullptr) { + // make sure we're initialized + return false; } - // Coefficient probability update - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs); + m_nCallbackEventCount = 0; - if( tmp != OK ) return (tmp); - if (pProbSetup->transform_mode > ONLY_4X4) { - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs8x8); - if( tmp != OK ) return (tmp); - } - if (pProbSetup->transform_mode > ALLOW_8X8) { - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs16x16); - if( tmp != OK ) return (tmp); - } - if (pProbSetup->transform_mode > ALLOW_16X16) { - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs32x32); - if( tmp != OK ) return (tmp); + // Handle discontinuity + if (pck->bDiscontinuity) { + memset(&m_nalu, 0, sizeof(m_nalu)); + memset(&m_PTSQueue, 0, sizeof(m_PTSQueue)); + m_bDiscontinuityReported = true; + m_pictureStarted = false; } - pProbSetup->probsDecoded = 1; + if (pck->bPTSValid) { + m_PTSQueue[m_lPTSPos].bPTSValid = true; + m_PTSQueue[m_lPTSPos].llPTS = pck->llPTS; + m_PTSQueue[m_lPTSPos].llPTSPos = m_llParsedBytes; + m_PTSQueue[m_lPTSPos].bDiscontinuity = m_bDiscontinuityReported; + m_bDiscontinuityReported = false; + m_lPTSPos = (m_lPTSPos + 1) % MAX_QUEUED_PTS; + } - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - fc->a.mbskip_probs[k] = vp9hwdReadProbDiffUpdate( fc->a.mbskip_probs[k]); - } + if (pck->pByteStream && pck->nDataLength && m_frameIdx == -1) { + memset(&m_PicData, 0, sizeof(VkParserVp9PictureData)); + m_frameIdx++; } - if(!pProbSetup->keyFrame) - { - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < VP9_INTER_MODES - 1; j++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.inter_mode_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } + while ((dataSize > 0) || m_pictureStarted) { + if (!m_pictureStarted) { + if (m_bitstreamComplete) { + // fill bitstreambuffer from start + // assuming parser will get bitstream per frame from demuxer + m_frameSize = dataSize; + m_nalu.start_offset = 0; + m_nalu.end_offset = 0; } - } - if (pProbSetup->mcomp_filter_type == SWITCHABLE) { - for (j = 0; j < VP9_SWITCHABLE_FILTERS+1; ++j) { - for (i = 0; i < VP9_SWITCHABLE_FILTERS-1; ++i) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.switchable_interp_prob[j][i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } + if (((VkDeviceSize)dataSize > m_bitstreamDataLen) && !resizeBitstreamBuffer(dataSize - m_bitstreamDataLen)) { + return false; } - } - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.intra_inter_prob[i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); + if (dataSize >= (m_frameSize - m_nalu.end_offset)) { + memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, m_frameSize - m_nalu.end_offset); + m_pictureStarted = true; + pDataIn += (m_frameSize - (int)m_nalu.end_offset); + dataSize -= (m_frameSize - (int)m_nalu.end_offset); + m_nalu.end_offset = m_frameSize; + m_bitstreamComplete = true; + } else { + memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, dataSize); + m_nalu.end_offset += dataSize; + pDataIn += dataSize; + dataSize = 0; + m_bitstreamComplete = false; } - } - - // Compound prediction mode probabilities - if (pProbSetup->allow_comp_inter_inter) { - tmp = vp9_read_literal( 1); - pProbSetup->comp_pred_mode = tmp; - if(tmp) { - tmp = vp9_read_literal( 1); - pProbSetup->comp_pred_mode += tmp; - if (pProbSetup->comp_pred_mode == HYBRID_PREDICTION) - { - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.comp_inter_prob[i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); + } else { + uint32_t frames_processed = 0; + uint32_t sizeparsed = 0, framesdone = 0; + + uint32_t frame_size = m_frameSize; + + const uint8_t* data_start = m_bitstreamData.GetBitstreamPtr(); + const uint8_t* data_end = data_start + m_frameSize; + uint32_t data_size = m_frameSize; + uint32_t frames_in_superframe, frame_sizes[8]; + + ParseSuperFrameIndex(data_start, data_size, frame_sizes, &frames_in_superframe); + + do { + // Skip over the superframe index, if present + if ((data_size > 0) && ((data_start[0] & 0xe0) == 0xc0)) { + const uint8_t marker = data_start[0]; + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const uint32_t index_sz = 2 + mag * frames; + + if ((data_size >= index_sz) && (data_start[index_sz - 1] == marker)) { + data_start += index_sz; + data_size -= index_sz; + if (data_start < data_end) { + continue; + } else { + break; } } } - } - } else { - pProbSetup->comp_pred_mode = SINGLE_PREDICTION_ONLY; - } - if (pProbSetup->comp_pred_mode != COMP_PREDICTION_ONLY) { - for (i = 0; i < REF_CONTEXTS; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.single_ref_prob[i][0]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.single_ref_prob[i][1]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } + // Use the correct size for this frame, if an index is present + if (frames_in_superframe > 0) { + frame_size = frame_sizes[frames_processed]; + if (data_size < frame_size) { + // Invalid frame size in index + return false; + } + data_size = frame_size; + m_nalu.start_offset = sizeparsed; - if (pProbSetup->comp_pred_mode != SINGLE_PREDICTION_ONLY) { - for (i = 0; i < REF_CONTEXTS; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.comp_ref_prob[i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); } - } - } - // Superblock intra luma pred mode probabilities - for(j = 0 ; j < BLOCK_SIZE_GROUPS; ++j) - { - for( i = 0 ; i < 8; ++i ) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - fc->a.sb_ymode_prob[j][i] = vp9hwdReadProbDiffUpdate( - fc->a.sb_ymode_prob[j][i]); - } - } - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - fc->a.sb_ymode_probB[j][0] = vp9hwdReadProbDiffUpdate( - fc->a.sb_ymode_probB[j][0]); - } - } + ParseFrameHeader(frame_size); - for (j = 0; j < NUM_PARTITION_CONTEXTS; j++) { - for (i = 0; i < PARTITION_TYPES - 1; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.partition_prob[INTER_FRAME][j][i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); + if (frames_in_superframe > 0) { + sizeparsed += frame_sizes[framesdone]; + framesdone++; } - } + data_start += data_size; + while (data_start < data_end && *data_start == 0) { + data_start++; + } + + data_size = (int)(data_end - data_start); + frames_processed += 1; + } while (data_start < data_end); + + m_frameIdx++; + m_pictureStarted = false; } - // Motion vector tree update - tmp = vp9hwdDecodeMvUpdate(pProbSetup); - if( tmp != OK ) - return (tmp); } - return (OK); -} + if (pck->bEOS) { + end_of_stream(); + } -void VulkanVP9Decoder::update_nmv( vp9_prob *const p, const vp9_prob upd_p) -{ - uint32_t tmp = vp9_read( upd_p); - if (tmp) { -#if 1 //def LOW_PRECISION_MV_UPDATE - *p = (vp9_read_literal( 7) << 1) | 1; -#else - *p = vp9_read_literal( 8); -#endif + if (pParsedBytes) { + *pParsedBytes = pck->nDataLength; } + + return true; } -uint32_t VulkanVP9Decoder::vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup) + +bool VulkanVP9Decoder::ParseFrameHeader(uint32_t framesize) { - uint32_t i, j, k; - nvdec_nmv_context *mvctx = &pProbSetup->pProbTab->a.nmvc; + m_llNaluStartLocation = m_llParsedBytes; + m_llFrameStartLocation = m_llNaluStartLocation; + m_llParsedBytes += framesize; + //m_pSliceOffsets[0] = 0; -#if 0 - tmp = vp9_read_literal( 1); - if (!tmp) return HANTRO_OK; -#endif + init_dbits(); + //parse uncompressed header + if(!ParseUncompressedHeader()) + { + assert((!"Error in ParseUncompressedVP9\n")); + return 0; + } + if (m_PicData.show_existing_frame == true) { + // display an existing frame + VkPicIf* pDispPic = m_pBuffers[m_PicData.frame_to_show_map_idx].buffer; + if (pDispPic) { + pDispPic->AddRef(); + } + + AddBuffertoOutputQueue(pDispPic); - for (j = 0; j < MV_JOINTS - 1; ++j) { - update_nmv( &mvctx->joints[j], - VP9_NMV_UPDATE_PROB); + return 0; } - for (i = 0; i < 2; ++i) { - update_nmv( &mvctx->sign[i], VP9_NMV_UPDATE_PROB); - for (j = 0; j < MV_CLASSES - 1; ++j) { - update_nmv( &mvctx->classes[i][j], VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < CLASS0_SIZE - 1; ++j) { - update_nmv( &mvctx->class0[i][j], VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < MV_OFFSET_BITS; ++j) { - update_nmv( &mvctx->bits[i][j], VP9_NMV_UPDATE_PROB); - } + + // handle bitstream start offset alignment (for super frame) + uint32_t addOffset = m_nalu.start_offset & (m_bufferOffsetAlignment - 1); + m_PicData.uncompressedHeaderOffset += addOffset; + m_PicData.compressedHeaderOffset += addOffset; + m_PicData.tilesOffset += addOffset; + + *m_pVkPictureData = VkParserPictureData(); + m_pVkPictureData->CodecSpecific.vp9 = m_PicData; + m_pVkPictureData->numSlices = m_PicData.numTiles; + m_pVkPictureData->bitstreamDataLen = (framesize + addOffset + m_bufferSizeAlignment - 1) & ~(m_bufferSizeAlignment - 1); // buffer is already aligned so, no issues. + m_pVkPictureData->bitstreamData = m_bitstreamData.GetBitstreamBuffer(); + m_pVkPictureData->bitstreamDataOffset = m_nalu.start_offset & ~((int64_t)m_bufferOffsetAlignment - 1); + + if (!BeginPicture(m_pVkPictureData)) { + assert(!"BeginPicture failed"); + return false; } - for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { - for (k = 0; k < 3; ++k) - update_nmv( &mvctx->class0_fp[i][j][k], VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < 3; ++j) { - update_nmv( &mvctx->fp[i][j], VP9_NMV_UPDATE_PROB); - } + bool bSkipped = false; + if (m_pClient != nullptr) { + // Notify client + if (!m_pClient->DecodePicture(m_pVkPictureData)) { + bSkipped = true; + // WARNING: skipped decoding current picture; + } else { + m_nCallbackEventCount++; + } + } else { + // WARNING: no valid render target for current picture } - if (pProbSetup->allow_high_precision_mv) { - for (i = 0; i < 2; ++i) { - update_nmv( &mvctx->class0_hp[i], VP9_NMV_UPDATE_PROB); - update_nmv( &mvctx->hp[i], VP9_NMV_UPDATE_PROB); - } + //m_PicData.prevIsKeyFrame = m_PicData.keyFrame; + //m_PicData.PrevShowFrame = m_PicData.showFrame; + UpdateFramePointers(m_pCurrPic); + + if (m_PicData.stdPictureInfo.flags.show_frame && !bSkipped) { + // Call back codec for post-decode event (display the decoded frame) + AddBuffertoOutputQueue(m_pCurrPic); + m_pCurrPic = nullptr; + } else { + m_pCurrPic->Release(); + m_pCurrPic = nullptr; } - return (OK); + return 1; } -uint32_t VulkanVP9Decoder::vp9hwdDecodeCoeffUpdate( - uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1]) +void VulkanVP9Decoder::UpdateFramePointers(VkPicIf* currentPicture) { - uint32_t i, j, k, l, m; - uint32_t tmp; - tmp = vp9_read_literal( 1); - if (!tmp) return OK; - for( i = 0; i < VP9_BLOCK_TYPES; i++ ) - { - for ( j = 0; j < VP9_REF_TYPES; j++ ) - { - for ( k = 0; k < VP9_COEF_BANDS; k++ ) - { - for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ ) - { - if (l >= 3 && k == 0) - continue; - - for ( m = 0; m < UNCONSTRAINED_NODES; m++ ) - { - tmp = vp9_read( 252); - CHECK_END_OF_STREAM(tmp); - if ( tmp ) - { - uint8_t old, latest; - old = probCoeffs[i][j][k][l][m]; - latest = vp9hwdReadProbDiffUpdate( old); - CHECK_END_OF_STREAM(tmp); - - probCoeffs[i][j][k][l][m] = latest; - } - } - } + StdVideoDecodeVP9PictureInfo* const pStdPicInfo = &m_PicData.stdPictureInfo; + + uint32_t mask, ref_index = 0; + + for (mask = pStdPicInfo->refresh_frame_flags; mask; mask >>= 1) { + if (mask & 1) { + if (m_pBuffers[ref_index].buffer) { + m_pBuffers[ref_index].buffer->Release(); + } + m_pBuffers[ref_index].buffer = currentPicture; + + if (m_pBuffers[ref_index].buffer) { + m_pBuffers[ref_index].buffer->AddRef(); } } + ++ref_index; } - return (OK); -} -int32_t VulkanVP9Decoder::get_unsigned_bits(uint32_t num_values) -{ - int32_t cat = 0; - if (num_values <= 1) - return 0; - num_values--; - while(num_values > 0) - { - cat++; - num_values >>= 1; - } - return cat; + // Invalidate these references until the next frame starts. + //for (int i = 0; i < ALLOWED_REFS_PER_FRAME; i++) { + // pFrameInfo->activeRefIdx[i] = 0xffff; + //} } -uint32_t VulkanVP9Decoder::BoolDecodeUniform( uint32_t n) +bool VulkanVP9Decoder::AddBuffertoOutputQueue(VkPicIf* pDispPic) { - int32_t value, v; - int32_t l = get_unsigned_bits(n); - int32_t m = (1 << l) - n; - if (!l) return 0; - value = vp9_read_literal( l - 1); - if (value >= m) { - v = vp9_read_literal( 1); - value = (value << 1) - m + v; - } - return value; + AddBuffertoDispQueue(pDispPic); + lEndPicture(pDispPic); + + return true; } -uint32_t VulkanVP9Decoder::vp9hwdDecodeSubExp( uint32_t k, uint32_t num_syms) +void VulkanVP9Decoder::AddBuffertoDispQueue(VkPicIf* pDispPic) { - uint32_t i=0, mk=0, value=0; - while (1) { - int32_t b = (i ? k + i - 1 : k); - uint32_t a = (1 << b); - if (num_syms <= mk + 3 * a) { - value = BoolDecodeUniform( num_syms - mk) + mk; + int lDisp = 0; + + // Find an entry in m_DispInfo + for (int i = 0; i < MAX_DELAY; i++) { + if (m_DispInfo[i].pPicBuf == pDispPic) { + lDisp = i; break; - } else { - value = vp9_read_bit(); - if (value) { - i++; - mk += a; - } else { - value = vp9_read_literal( b) + mk; - break; - } + } + if ((m_DispInfo[i].pPicBuf == nullptr) + || ((m_DispInfo[lDisp].pPicBuf != nullptr) && (m_DispInfo[i].llPTS - m_DispInfo[lDisp].llPTS < 0))) { + lDisp = i; } } - return value; -} + m_DispInfo[lDisp].pPicBuf = pDispPic; + m_DispInfo[lDisp].bSkipped = false; + m_DispInfo[lDisp].lPOC = 0; + m_DispInfo[lDisp].lNumFields = 2; -int32_t VulkanVP9Decoder::merge_index(int32_t v, int32_t n, int32_t modulus) -{ - int32_t max1 = (n - 1 - modulus / 2) / modulus + 1; - if (v < max1) v = v * modulus + modulus / 2; - else - { - int32_t w; - v -= max1; - w = v; - v += (v + modulus - modulus / 2) / modulus; - while (v % modulus == modulus / 2 || - w != v - (v + modulus - modulus / 2) / modulus) v++; + // Find a PTS in the list + unsigned int ndx = m_lPTSPos; + m_DispInfo[lDisp].llPTS = m_llExpectedPTS; // Will be updated later on + + for (int k = 0; k < MAX_QUEUED_PTS; k++) { + if ((m_PTSQueue[ndx].bPTSValid) && (m_PTSQueue[ndx].llPTSPos - m_llFrameStartLocation <= (m_bNoStartCodes?0:3))) { + m_DispInfo[lDisp].bPTSValid = true; + m_DispInfo[lDisp].llPTS = m_PTSQueue[ndx].llPTS; + m_PTSQueue[ndx].bPTSValid = false; + } + ndx = (ndx + 1) % MAX_QUEUED_PTS; } - return v; } -int32_t VulkanVP9Decoder::vp9_inv_recenter_nonneg(int32_t v, int32_t m) +void VulkanVP9Decoder::lEndPicture(VkPicIf* pDispPic) { - if (v > (m << 1)) return v; - else if ((v & 1) == 0) return (v >> 1) + m; - else return m - ((v + 1) >> 1); -} + if (pDispPic) { + display_picture(pDispPic); + pDispPic->Release(); + } -int32_t VulkanVP9Decoder::inv_remap_prob(int32_t v, int32_t m) -{ - const int32_t n = 255; - v = merge_index(v, n - 1, MODULUS_PARAM); - m--; - if ((m << 1) <= n) - return 1 + vp9_inv_recenter_nonneg(v + 1, m); - else - return n - vp9_inv_recenter_nonneg(v + 1, n - 1 - m); } -vp9_prob VulkanVP9Decoder::vp9hwdReadProbDiffUpdate( uint8_t oldp) + +bool VulkanVP9Decoder::ParseUncompressedHeader() { - int32_t p; - int32_t delp = vp9hwdDecodeSubExp( 4, 255 ); - p = (vp9_prob)inv_remap_prob(delp, oldp); - return p; -} + VkParserVp9PictureData *pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig; + StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter; + m_frameSizeChanged = false; -//Backward update + VP9_CHECK_FRAME_MARKER; + uint32_t profile = u(1); + profile |= u(1) << 1; + pStdPicInfo->profile = (StdVideoVP9Profile)profile; + if (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3) { + if (u(1) != 0) { + assert(!"Invalid syntax"); + return false; + } + } -// this function assumes prob1 and prob2 are already within [1,255] range -vp9_prob VulkanVP9Decoder::weighted_prob(int32_t prob1, int32_t prob2, int32_t factor) -{ - return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); -} + pPicData->show_existing_frame = u(1); + if (pPicData->show_existing_frame) { + pPicData->frame_to_show_map_idx = u(3); + //U32 frame_to_show = vp9parser->m_pBuffers[idx_to_show]; + //Handle direct show: CHECK + pPicData->uncompressedHeaderOffset = (consumed_bits() + 7) >> 3; + pPicData->compressedHeaderSize = 0; + pStdPicInfo->refresh_frame_flags = 0; + pStdLoopFilter->loop_filter_level = 0; + return true; + } -vp9_prob VulkanVP9Decoder::clip_prob(uint32_t p) -{ - return (vp9_prob)((p > 255) ? 255u : (p < 1) ? 1u : p); -} + pStdPicInfo->frame_type = (StdVideoVP9FrameType)u(1); + pStdPicInfo->flags.show_frame = u(1); + pStdPicInfo->flags.error_resilient_mode = u(1); -vp9_prob VulkanVP9Decoder::get_prob(uint32_t num, uint32_t den) -{ - return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den); -} + if (pStdPicInfo->frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY) { + VP9_CHECK_FRAME_SYNC_CODE; + ParseColorConfig(); + ParseFrameAndRenderSize(); + pStdPicInfo->refresh_frame_flags = (1 << STD_VIDEO_VP9_NUM_REF_FRAMES) - 1; + pPicData->FrameIsIntra = true; -vp9_prob VulkanVP9Decoder::get_binary_prob(uint32_t n0, uint32_t n1) -{ - return get_prob(n0, n0 + n1); -} + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) { + pPicData->ref_frame_idx[i] = 0; + } + } else { // non key frame + pStdPicInfo->flags.intra_only = pStdPicInfo->flags.show_frame ? 0 : u(1); + pPicData->FrameIsIntra = pStdPicInfo->flags.intra_only; + pStdPicInfo->reset_frame_context = pStdPicInfo->flags.error_resilient_mode ? 0 : u(2); + + if (pStdPicInfo->flags.intra_only == 1) { + VP9_CHECK_FRAME_SYNC_CODE; + if (pStdPicInfo->profile > STD_VIDEO_VP9_PROFILE_0) { + ParseColorConfig(); + } else { + pStdColorConfig->color_space = STD_VIDEO_VP9_COLOR_SPACE_BT_601; + pStdColorConfig->subsampling_x = 1; + pStdColorConfig->subsampling_y = 1; + pStdColorConfig->BitDepth = 8; + } -uint32_t VulkanVP9Decoder::convert_distribution(uint32_t i, - const vp9_tree_index * tree, - vp9_prob probs[], - uint32_t branch_ct[][2], - const uint32_t num_events[], - uint32_t tok0_offset) -{ - uint32_t left, right; + pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES); //for non key frame refresh only some - if (tree[i] <= 0) - { - left = num_events[-tree[i] - tok0_offset]; + ParseFrameAndRenderSize(); + } else { // inter frame + pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES); + + pStdPicInfo->ref_frame_sign_bias_mask = 0; + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { + pPicData->ref_frame_idx[i] = u(3); + pStdPicInfo->ref_frame_sign_bias_mask |= (u(1) << (STD_VIDEO_VP9_REFERENCE_NAME_LAST_FRAME + i)); + } + + ParseFrameAndRenderSizeWithRefs(); + + pStdPicInfo->flags.allow_high_precision_mv = u(1); + + // interpolation filter + bool is_filter_switchable = u(1); //mb_switchable_mcomp_filt + if (is_filter_switchable) { + pStdPicInfo->interpolation_filter = STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE; + } else { + const StdVideoVP9InterpolationFilter literal_to_filter[] = { + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH, + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP, + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP, + STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR }; + pStdPicInfo->interpolation_filter = literal_to_filter[u(2)]; + } + } } - else - { - left = convert_distribution(tree[i], tree, probs, branch_ct, num_events, tok0_offset); + + if (pStdPicInfo->flags.error_resilient_mode == 0) { + /* Refresh entropy probs, + * 0 == this frame probs are used only for this frame decoding, + * 1 == this frame probs will be stored for future reference */ + pStdPicInfo->flags.refresh_frame_context = u(1); + pStdPicInfo->flags.frame_parallel_decoding_mode = u(1); + } else { + pStdPicInfo->flags.refresh_frame_context = 0; + pStdPicInfo->flags.frame_parallel_decoding_mode = 1; } - if (tree[i + 1] <= 0) - { - right = num_events[-tree[i + 1] - tok0_offset]; + + pStdPicInfo->frame_context_idx = u(2); + + if ((pPicData->FrameIsIntra == 1) || (pStdPicInfo->flags.error_resilient_mode == 1)) { + StdVideoVP9Segmentation* pStdSegment = &pPicData->stdSegmentation; + ///* Clear all previous segment data */ + memset(pStdSegment->FeatureEnabled, 0, sizeof(pStdSegment->FeatureEnabled)); + memset(pStdSegment->FeatureData, 0, sizeof(pStdSegment->FeatureData)); + pStdPicInfo->frame_context_idx = 0; } - else - { - right = convert_distribution(tree[i + 1], tree, probs, branch_ct, num_events, tok0_offset); + + ParseLoopFilterParams(); + ParseQuantizationParams(); + ParseSegmentationParams(); + ParseTileInfo(); + + pPicData->compressedHeaderSize = u(16); + + pPicData->uncompressedHeaderOffset = 0; + pPicData->compressedHeaderOffset = (consumed_bits() + 7) >> 3; + pPicData->tilesOffset = pPicData->compressedHeaderOffset + pPicData->compressedHeaderSize; + + pPicData->ChromaFormat = (pStdColorConfig->subsampling_x == 1) && (pStdColorConfig->subsampling_y == 1) ? 1 : 0; + assert(pPicData->ChromaFormat); // TODO: support only YUV420 + + return true; +} + +bool VulkanVP9Decoder::ParseColorConfig() +{ + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig; + + if (pStdPicInfo->profile >= STD_VIDEO_VP9_PROFILE_2) { + pStdColorConfig->BitDepth = u(1) ? 12 : 10; + } else { + pStdColorConfig->BitDepth = 8; + } + + pStdColorConfig->color_space = (StdVideoVP9ColorSpace)u(3); + + if (pStdColorConfig->color_space != STD_VIDEO_VP9_COLOR_SPACE_RGB) { + pStdColorConfig->flags.color_range = u(1); + if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) || + (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) { + pStdColorConfig->subsampling_x = u(1); + pStdColorConfig->subsampling_y = u(1); + VP9_CHECK_ZERO_BIT + } else { + pStdColorConfig->subsampling_x = 1; + pStdColorConfig->subsampling_y = 1; + } + } else { + pStdColorConfig->flags.color_range = 1; + if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) || + (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) { + pStdColorConfig->subsampling_x = 0; + pStdColorConfig->subsampling_y = 0; + VP9_CHECK_ZERO_BIT + } } - probs[i>>1] = get_binary_prob(left, right); - branch_ct[i>>1][0] = left; - branch_ct[i>>1][1] = right; - return left + right; + return true; } -void VulkanVP9Decoder::vp9_tree_probs_from_distribution(const vp9_tree_index * tree, - vp9_prob probs [ /* n-1 */ ], - uint32_t branch_ct [ /* n-1 */ ] [2], - const uint32_t num_events[ /* n */ ], - uint32_t tok0_offset) +void VulkanVP9Decoder::ParseFrameAndRenderSize() { - convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset); + VkParserVp9PictureData *pPicData = &m_PicData; + + pPicData->FrameWidth = u(16) + 1; + pPicData->FrameHeight = u(16) + 1; + + ComputeImageSize(); + + if (u(1) == 1) { // render_and_frame_size_different + pPicData->renderWidth = u(16) + 1; + pPicData->renderHeight = u(16) + 1; + } else { + pPicData->renderWidth = pPicData->FrameWidth; + pPicData->renderHeight = pPicData->FrameHeight; + } } -void VulkanVP9Decoder::update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1], - uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS], - int32_t count_sat, int32_t update_factor) +void VulkanVP9Decoder::ParseFrameAndRenderSizeWithRefs() { - int32_t t, i, j, k, l, count; - uint32_t branch_ct[VP9_ENTROPY_NODES][2]; - vp9_prob coef_probs[VP9_ENTROPY_NODES]; - int32_t factor; + VkParserVp9PictureData* pPicData = &m_PicData; - //int32_t brancharr[VP9_BLOCK_TYPES][VP9_REF_TYPES][36][VP9_PREV_COEF_CONTEXTS] = {0}; - //int32_t coeffprobarr[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS] = {0}; - //memset(brancharr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS); - //memset(coeffprobarr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS); + bool found_ref = false; - for (i = 0; i < VP9_BLOCK_TYPES; ++i) - { - for (j = 0; j < VP9_REF_TYPES; ++j) - { - for (k = 0; k < VP9_COEF_BANDS; ++k) - { - for (l = 0; l < VP9_PREV_COEF_CONTEXTS; ++l) - { - if (l >= 3 && k == 0) - continue; - vp9_tree_probs_from_distribution(vp9_coefmodel_tree, - coef_probs, branch_ct, - coef_counts[i][j][k][l], 0); - branch_ct[0][1] = eob_counts[i][j][k][l] - branch_ct[0][0]; - coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]); - //brancharr[i][j][k][l] = branch_ct[0][1]; - //coeffprobarr[i][j][k][l] = coef_probs[0]; - for (t = 0; t < UNCONSTRAINED_NODES; ++t) - { - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - dst_coef_probs[i][j][k][l][t] = weighted_prob(pre_coef_probs[i][j][k][l][t], coef_probs[t], factor); - } - } + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) { + found_ref = u(1); + if (found_ref) { + VkPicIf* pRefPic = m_pBuffers[pPicData->ref_frame_idx[i]].buffer; + if (pRefPic != nullptr) { + pPicData->FrameWidth = pRefPic->decodeWidth; + pPicData->FrameHeight = pRefPic->decodeHeight; + + ComputeImageSize(); + } + + if (u(1) == 1) { // render_and_frame_size_different + pPicData->renderWidth = u(16) + 1; + pPicData->renderHeight = u(16) + 1; + } else { + pPicData->renderWidth = pPicData->FrameWidth; + pPicData->renderHeight = pPicData->FrameHeight; } + + break; } } + if (!found_ref) { + ParseFrameAndRenderSize(); + } } -void VulkanVP9Decoder::adaptCoefProbs(vp9_prob_update_s *pProbSetup) +void VulkanVP9Decoder::ComputeImageSize() { - int32_t update_factor; /* denominator 256 */ - int32_t count_sat; + VkParserVp9PictureData* pPicData = &m_PicData; - if(pProbSetup->keyFrame) - { - update_factor = COEF_MAX_UPDATE_FACTOR_KEY; - count_sat = COEF_COUNT_SAT_KEY; - } - else if (pProbSetup->prevIsKeyFrame) - { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; // adapt quickly - count_sat = COEF_COUNT_SAT_AFTER_KEY; - } - else - { - update_factor = COEF_MAX_UPDATE_FACTOR; - count_sat = COEF_COUNT_SAT; + // compute_image_size() + pPicData->MiCols = (pPicData->FrameWidth + 7) >> 3; + pPicData->MiRows = (pPicData->FrameHeight + 7) >> 3; + pPicData->Sb64Cols = (pPicData->MiCols + 7) >> 3; + pPicData->Sb64Rows = (pPicData->MiRows + 7) >> 3; + + // compute_image_size() side effects (7.2.6) + if (((uint32_t)m_lastFrameHeight != pPicData->FrameHeight) || ((uint32_t)m_lastFrameWidth != pPicData->FrameWidth)) { + m_frameSizeChanged = true; + pPicData->stdPictureInfo.flags.UsePrevFrameMvs = false; + } else { /* 2.a, 2.b */ + bool intraOnly = pPicData->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY || pPicData->stdPictureInfo.flags.intra_only; + pPicData->stdPictureInfo.flags.UsePrevFrameMvs = m_lastShowFrame && /* 2.c */ + pPicData->stdPictureInfo.flags.error_resilient_mode == 0 && /* 2.d */ + !intraOnly /* 2.e */; } + m_lastFrameHeight = pPicData->FrameHeight; + m_lastFrameWidth = pPicData->FrameWidth; + m_lastShowFrame = pPicData->stdPictureInfo.flags.show_frame; - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs, - m_PrevCtx.probCoeffs, - pProbSetup->pCtxCounters->countCoeffs, - pProbSetup->pCtxCounters->countEobs[TX_4X4], - count_sat, update_factor); - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs8x8, - m_PrevCtx.probCoeffs8x8, - pProbSetup->pCtxCounters->countCoeffs8x8, - pProbSetup->pCtxCounters->countEobs[TX_8X8], - count_sat, update_factor); - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs16x16, - m_PrevCtx.probCoeffs16x16, - pProbSetup->pCtxCounters->countCoeffs16x16, - pProbSetup->pCtxCounters->countEobs[TX_16X16], - count_sat, update_factor); - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs32x32, - m_PrevCtx.probCoeffs32x32, - pProbSetup->pCtxCounters->countCoeffs32x32, - pProbSetup->pCtxCounters->countEobs[TX_32X32], - count_sat, update_factor); } -int32_t VulkanVP9Decoder::update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2]) +void VulkanVP9Decoder::ParseLoopFilterParams() { - int32_t factor, count = branch_ct[0] + branch_ct[1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - return weighted_prob(pre_prob, prob, factor); -} + VkParserVp9PictureData *pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo *pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter; -int32_t VulkanVP9Decoder::update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2]) -{ - return update_mode_ct(pre_prob, get_binary_prob(branch_ct[0], branch_ct[1]), branch_ct); -} + if (pPicData->FrameIsIntra || (pStdPicInfo->flags.error_resilient_mode == 1)) { + // setup_past_independence() for loop filter params + memset(m_loopFilterRefDeltas, 0, sizeof(m_loopFilterRefDeltas)); + memset(m_loopFilterModeDeltas, 0, sizeof(m_loopFilterModeDeltas)); + m_loopFilterRefDeltas[0] = 1; + m_loopFilterRefDeltas[1] = 0; + m_loopFilterRefDeltas[2] = -1; + m_loopFilterRefDeltas[3] = -1; + } -void VulkanVP9Decoder::update_mode_probs(int32_t n_modes, - const vp9_tree_index *tree, uint32_t *cnt, - vp9_prob *pre_probs, vp9_prob *pre_probsB, - vp9_prob *dst_probs, vp9_prob *dst_probsB, - uint32_t tok0_offset) -{ - vp9_prob probs[MAX_PROBS]; - uint32_t branch_ct[MAX_PROBS][2]; - int32_t t, count, factor; + pStdLoopFilter->loop_filter_level = u(6); + pStdLoopFilter->loop_filter_sharpness = u(3); - assert(n_modes - 1 < MAX_PROBS); - vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset); - for (t = 0; t < n_modes - 1; ++t) - { - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - if (t < 8 || dst_probsB == NULL) - dst_probs[t] = weighted_prob(pre_probs[t], probs[t], factor); - else - dst_probsB[t-8] = weighted_prob(pre_probsB[t-8], probs[t], factor); + pStdLoopFilter->flags.loop_filter_delta_enabled = u(1); + if (pStdLoopFilter->flags.loop_filter_delta_enabled) { + + pStdLoopFilter->flags.loop_filter_delta_update = u(1); + + if (pStdLoopFilter->flags.loop_filter_delta_update) { + + for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) { + uint8_t update_ref_delta = u(1); + pStdLoopFilter->update_ref_delta |= update_ref_delta << i; + if (update_ref_delta == 1) { + m_loopFilterRefDeltas[i] = u(6); + if (u(1)) { // sign + m_loopFilterRefDeltas[i] = -m_loopFilterRefDeltas[i]; + } + } + } + + for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++) { + uint8_t update_mode_delta = u( 1); + pStdLoopFilter->update_mode_delta |= update_mode_delta << i; + if (update_mode_delta) { + m_loopFilterModeDeltas[i] = u(6); + if(u(1)) { // sign + m_loopFilterModeDeltas[i] = -m_loopFilterRefDeltas[i]; + } + } + } + } } -} -void VulkanVP9Decoder::tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p, - uint32_t (*ct_32x32p)[2]) -{ - ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; - ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32]; - ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; - ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32]; - ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; - ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; + memcpy(pStdLoopFilter->loop_filter_ref_deltas, m_loopFilterRefDeltas, sizeof(m_loopFilterRefDeltas)); + memcpy(pStdLoopFilter->loop_filter_mode_deltas, m_loopFilterModeDeltas, sizeof(m_loopFilterModeDeltas)); } -void VulkanVP9Decoder::tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p, - uint32_t (*ct_16x16p)[2]) +void VulkanVP9Decoder::ParseQuantizationParams() { - ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; - ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; - ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; - ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; + VkParserVp9PictureData *pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &pPicData->stdPictureInfo; + + pStdPicInfo->base_q_idx = u(8); + pStdPicInfo->delta_q_y_dc = ReadDeltaQ(); + pStdPicInfo->delta_q_uv_dc = ReadDeltaQ(); + pStdPicInfo->delta_q_uv_ac = ReadDeltaQ(); } -void VulkanVP9Decoder::tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p, - uint32_t (*ct_8x8p)[2]) +int32_t VulkanVP9Decoder::ReadDeltaQ() { - ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; - ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; + int32_t delta; + if (u(1)) { + delta = u(4); + if (u(1)) { + delta = -delta; + } + return delta; + } else { + return 0; + } } -void VulkanVP9Decoder::adaptModeProbs(vp9_prob_update_s *pProbSetup) +void VulkanVP9Decoder::ParseSegmentationParams() { - uint32_t i, j; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - pProbSetup->pProbTab->a.intra_inter_prob[i] = update_mode_ct2(m_PrevCtx.intra_inter_prob[i], pProbSetup->pCtxCounters->intra_inter_count[i]); - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - pProbSetup->pProbTab->a.comp_inter_prob[i] = update_mode_ct2(m_PrevCtx.comp_inter_prob[i], pProbSetup->pCtxCounters->comp_inter_count[i]); - for (i = 0; i < REF_CONTEXTS; i++) - pProbSetup->pProbTab->a.comp_ref_prob[i] = update_mode_ct2(m_PrevCtx.comp_ref_prob[i], pProbSetup->pCtxCounters->comp_ref_count[i]); - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - pProbSetup->pProbTab->a.single_ref_prob[i][j] = update_mode_ct2(m_PrevCtx.single_ref_prob[i][j], pProbSetup->pCtxCounters->single_ref_count[i][j]); - - for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) - { - update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, - pProbSetup->pCtxCounters->sb_ymode_counts[i], - m_PrevCtx.sb_ymode_prob[i], m_PrevCtx.sb_ymode_probB[i], - pProbSetup->pProbTab->a.sb_ymode_prob[i], pProbSetup->pProbTab->a.sb_ymode_probB[i], 0); - } - for (i = 0; i < VP9_INTRA_MODES; ++i) - { - update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, - pProbSetup->pCtxCounters->uv_mode_counts[i], - m_PrevCtx.uv_mode_prob[i], - m_PrevCtx.uv_mode_probB[i], - pProbSetup->pProbTab->a.uv_mode_prob[i], - pProbSetup->pProbTab->a.uv_mode_probB[i], 0); - } - for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) - update_mode_probs(PARTITION_TYPES, vp9_partition_tree, - pProbSetup->pCtxCounters->partition_counts[i], - m_PrevCtx.partition_prob[INTER_FRAME][i], NULL, - pProbSetup->pProbTab->a.partition_prob[INTER_FRAME][i], NULL, 0); + uint8_t segmentation_feature_bits[STD_VIDEO_VP9_SEG_LVL_MAX] = { 8, 6, 2, 0}; + uint8_t segmentation_feature_signed[STD_VIDEO_VP9_SEG_LVL_MAX] = {1, 1, 0, 0}; - if (pProbSetup->mcomp_filter_type == SWITCHABLE) - { - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) - { - update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree, - pProbSetup->pCtxCounters->switchable_interp_counts[i], - m_PrevCtx.switchable_interp_prob[i], NULL, - pProbSetup->pProbTab->a.switchable_interp_prob[i], NULL, 0); - } + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9Segmentation* pSegment = &m_PicData.stdSegmentation; + + pSegment->flags.segmentation_update_map = 0; + pSegment->flags.segmentation_temporal_update = 0; + + pStdPicInfo->flags.segmentation_enabled = u(1); + if (pStdPicInfo->flags.segmentation_enabled == 0) { + return; } - if (pProbSetup->transform_mode == TX_MODE_SELECT) - { - uint32_t branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2]; - uint32_t branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2]; - uint32_t branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2]; - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - tx_counts_to_branch_counts_8x8(pProbSetup->pCtxCounters->tx8x8_count[i], branch_ct_8x8p); - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) - { - int32_t factor; - int32_t count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1]; - vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0], branch_ct_8x8p[j][1]); - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - pProbSetup->pProbTab->a.tx8x8_prob[i][j] = weighted_prob(m_PrevCtx.tx8x8_prob[i][j], prob, factor); - } + pSegment->flags.segmentation_update_map = u(1); + + if (pSegment->flags.segmentation_update_map == 1) { + + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++) { + uint8_t prob_coded = u(1); + pSegment->segmentation_tree_probs[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY; } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - tx_counts_to_branch_counts_16x16(pProbSetup->pCtxCounters->tx16x16_count[i], branch_ct_16x16p); - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) - { - int32_t factor; - int32_t count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1]; - vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0], branch_ct_16x16p[j][1]); - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - pProbSetup->pProbTab->a.tx16x16_prob[i][j] = weighted_prob(m_PrevCtx.tx16x16_prob[i][j], prob, factor); + + pSegment->flags.segmentation_temporal_update = u(1); + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++) { + if (pSegment->flags.segmentation_temporal_update) { + uint8_t prob_coded = u(1); + pSegment->segmentation_pred_prob[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY; + } else { + pSegment->segmentation_pred_prob[i] = VP9_MAX_PRBABILITY; } } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - tx_counts_to_branch_counts_32x32(pProbSetup->pCtxCounters->tx32x32_count[i], branch_ct_32x32p); - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) - { - int32_t factor; - int32_t count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1]; - vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0], branch_ct_32x32p[j][1]); - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - pProbSetup->pProbTab->a.tx32x32_prob[i][j] = weighted_prob(m_PrevCtx.tx32x32_prob[i][j], prob, factor); + } + + pSegment->flags.segmentation_update_data = u(1); + if (pSegment->flags.segmentation_update_data == 1) { + pSegment->flags.segmentation_abs_or_delta_update = u(1); + + /* Clear all previous segment data */ + memset(pSegment->FeatureEnabled, 0, sizeof(pSegment->FeatureEnabled)); + memset(pSegment->FeatureData, 0, sizeof(pSegment->FeatureData)); + + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) { + for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) { + uint8_t feature_enabled = u(1); + pSegment->FeatureEnabled[i] |= (feature_enabled << j); + + if (feature_enabled == 1) { + pSegment->FeatureData[i][j] = u(segmentation_feature_bits[j]); + + if (segmentation_feature_signed[j] == 1) { + if (u(1) == 1) { + pSegment->FeatureData[i][j] = -pSegment->FeatureData[i][j]; + } + } + } } } + + } // segmentation_update_data +} + +uint8_t VulkanVP9Decoder::CalcMinLog2TileCols() +{ + VkParserVp9PictureData* pPicData = &m_PicData; + uint8_t minLog2 = 0; + + while (((uint32_t)VP9_MAX_TILE_WIDTH_B64 << minLog2) < pPicData->Sb64Cols) { + minLog2++; } - for (i = 0; i < MBSKIP_CONTEXTS; ++i) - pProbSetup->pProbTab->a.mbskip_probs[i] = update_mode_ct2(m_PrevCtx.mbskip_probs[i],pProbSetup->pCtxCounters->mbskip_count[i]); + + return minLog2; } -void VulkanVP9Decoder::adaptModeContext(vp9_prob_update_s *pProbSetup) +uint8_t VulkanVP9Decoder::CalcMaxLog2TileCols() { - uint32_t i, j; - uint32_t (*mode_ct)[VP9_INTER_MODES - 1][2] = pProbSetup->pCtxCounters->inter_mode_counts; + VkParserVp9PictureData* pPicData = &m_PicData; + uint8_t maxLog2 = 1; - for (j = 0; j < INTER_MODE_CONTEXTS; j++) - { - for (i = 0; i < VP9_INTER_MODES - 1; i++) - { - int32_t count = mode_ct[j][i][0] + mode_ct[j][i][1], factor; - count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count; - factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT); - pProbSetup->pProbTab->a.inter_mode_prob[j][i] = weighted_prob(m_PrevCtx.inter_mode_prob[j][i], - get_binary_prob(mode_ct[j][i][0], mode_ct[j][i][1]), - factor); - } + while ((pPicData->Sb64Cols >> maxLog2) >= VP9_MIN_TILE_WIDTH_B64) { + maxLog2++; } + + return maxLog2 - 1; } -uint32_t VulkanVP9Decoder::adapt_probs(uint32_t i, - const signed char* tree, - vp9_prob this_probs[], - const vp9_prob last_probs[], - const uint32_t num_events[]) +void VulkanVP9Decoder::ParseTileInfo() { - vp9_prob this_prob; - uint32_t weight; + VkParserVp9PictureData* pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; - const uint32_t left = tree[i] <= 0 ? num_events[-tree[i]] : adapt_probs(tree[i], tree, this_probs, last_probs, num_events); - const uint32_t right = tree[i + 1] <= 0 ? num_events[-tree[i + 1]] : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events); - weight = left + right; - if (weight) - { - this_prob = get_binary_prob(left, right); - weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight; - this_prob = weighted_prob(last_probs[i >> 1], this_prob, MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT); + uint8_t minLog2TileCols = CalcMinLog2TileCols(); + uint8_t maxLog2TileCols = CalcMaxLog2TileCols(); + + pStdPicInfo->tile_cols_log2 = minLog2TileCols; + + while (pStdPicInfo->tile_cols_log2 < maxLog2TileCols) { + if (u(1) == 1) { // increment_tile_cols_log2 + pStdPicInfo->tile_cols_log2++; + } else { + break; + } } - else - { - this_prob = last_probs[i >> 1]; + + pStdPicInfo->tile_rows_log2 = u(1); + if (pStdPicInfo->tile_rows_log2 == 1) { + pStdPicInfo->tile_rows_log2 += u(1); } - this_probs[i >> 1] = this_prob; - return left + right; + + pPicData->numTiles = (1 << pStdPicInfo->tile_rows_log2) * (1 << pStdPicInfo->tile_cols_log2); } -void VulkanVP9Decoder::adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2]) +void VulkanVP9Decoder::ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t frame_sizes[8], uint32_t* frame_count) { - const int32_t count = std::min(ct[0] + ct[1], MV_COUNT_SAT); - if (count) - { - const vp9_prob newp = get_binary_prob(ct[0], ct[1]); - const int32_t factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT; - *dest = weighted_prob(prep, newp, factor); + uint8_t final_byte = data[data_sz - 1]; + *frame_count = 0; + + if ((final_byte & 0xe0) == 0xc0) { + const uint32_t frames = (final_byte & 0x7) + 1; + const uint32_t mag = ((final_byte >> 3) & 0x3) + 1; + const uint32_t index_sz = 2 + mag * frames; + + if (data_sz >= index_sz && data[data_sz - index_sz] == final_byte) { + // found a valid superframe index + const uint8_t* x = data + data_sz - index_sz + 1; + for (uint32_t i = 0; i < frames; i++) { + uint32_t this_sz = 0; + for (uint32_t j = 0; j < mag; j++) { + this_sz |= (*x++) << (j * 8); + } + frame_sizes[i] = this_sz; + } + *frame_count = frames; + } } - else - *dest = prep; } -void VulkanVP9Decoder::adaptNmvProbs(vp9_prob_update_s *pProbSetup) -{ - uint32_t usehp = pProbSetup->allow_high_precision_mv; - uint32_t i, j; - - adapt_probs(0, vp9_mv_joint_tree, - pProbSetup->pProbTab->a.nmvc.joints, - m_PrevCtx.nmvc.joints, - pProbSetup->pCtxCounters->nmvcount.joints); - for (i = 0; i < 2; ++i) - { - adapt_prob(&pProbSetup->pProbTab->a.nmvc.sign[i], - m_PrevCtx.nmvc.sign[i], - pProbSetup->pCtxCounters->nmvcount.sign[i]); - adapt_probs(0, vp9_mv_class_tree, - pProbSetup->pProbTab->a.nmvc.classes[i], - m_PrevCtx.nmvc.classes[i], - pProbSetup->pCtxCounters->nmvcount.classes[i]); - adapt_probs(0, vp9_mv_class0_tree, - pProbSetup->pProbTab->a.nmvc.class0[i], - m_PrevCtx.nmvc.class0[i], - pProbSetup->pCtxCounters->nmvcount.class0[i]); - for (j = 0; j < MV_OFFSET_BITS; ++j) - { - adapt_prob(&pProbSetup->pProbTab->a.nmvc.bits[i][j], - m_PrevCtx.nmvc.bits[i][j], - pProbSetup->pCtxCounters->nmvcount.bits[i][j]); +bool VulkanVP9Decoder::BeginPicture(VkParserPictureData* pnvpd) +{ + VkParserVp9PictureData* const pPicDataVP9 = &pnvpd->CodecSpecific.vp9; + StdVideoVP9ColorConfig* pStdColorConfig = &pPicDataVP9->stdColorConfig; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + + uint32_t width = pPicDataVP9->FrameWidth; + uint32_t height = pPicDataVP9->FrameHeight; + + VkParserSequenceInfo nvsi = m_ExtSeqInfo; + nvsi.eCodec = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; + nvsi.nChromaFormat = pPicDataVP9->ChromaFormat; + nvsi.nMaxWidth = std::max(width, pPicDataVP9->renderWidth); + nvsi.nMaxHeight = std::max(height, pPicDataVP9->renderHeight); + nvsi.nCodedWidth = width; + nvsi.nCodedHeight = height; + nvsi.nDisplayWidth = pPicDataVP9->renderWidth; + nvsi.nDisplayHeight = pPicDataVP9->renderHeight; + nvsi.lDARWidth = pPicDataVP9->renderWidth; + nvsi.lDARHeight = pPicDataVP9->renderHeight; + nvsi.bProgSeq = true; // VP9 doesn't have explicit interlaced coding. + nvsi.nMinNumDecodeSurfaces = 9; + nvsi.uBitDepthLumaMinus8 = pStdColorConfig->BitDepth - 8; + nvsi.uBitDepthChromaMinus8 = pStdColorConfig->BitDepth - 8; + nvsi.codecProfile = pStdPicInfo->profile; + + // Reset decoder only if decode RT orig width is less than required coded width + if ((nvsi.nMaxWidth > m_rtOrigWidth) || (nvsi.nMaxHeight > m_rtOrigHeight)) { + m_rtOrigWidth = nvsi.nMaxWidth; + m_rtOrigHeight = nvsi.nMaxHeight; + + for (int i = 0; i < 8; i++) { + if (m_pBuffers[i].buffer != nullptr) { + m_pBuffers[i].buffer->Release(); + m_pBuffers[i].buffer = nullptr; + } } - for (j = 0; j < CLASS0_SIZE; ++j) - { - adapt_probs(0, vp9_mv_fp_tree, - pProbSetup->pProbTab->a.nmvc.class0_fp[i][j], - m_PrevCtx.nmvc.class0_fp[i][j], - pProbSetup->pCtxCounters->nmvcount.class0_fp[i][j]); + if (m_pCurrPic != nullptr) { + m_pCurrPic->Release(); + m_pCurrPic = nullptr; } - adapt_probs(0, vp9_mv_fp_tree, - pProbSetup->pProbTab->a.nmvc.fp[i], - m_PrevCtx.nmvc.fp[i], - pProbSetup->pCtxCounters->nmvcount.fp[i]); } - if (usehp) - { - for (i = 0; i < 2; ++i) - { - adapt_prob(&pProbSetup->pProbTab->a.nmvc.class0_hp[i], - m_PrevCtx.nmvc.class0_hp[i], - pProbSetup->pCtxCounters->nmvcount.class0_hp[i]); - adapt_prob(&pProbSetup->pProbTab->a.nmvc.hp[i], - m_PrevCtx.nmvc.hp[i], - pProbSetup->pCtxCounters->nmvcount.hp[i]); - } + + if (!init_sequence(&nvsi)) { + assert(!"init_sequence failed!"); + return false; } -} -void VulkanVP9Decoder::UpdateBackwardProbability(vp9_prob_update_s *pProbSetup) -{ - if (!pProbSetup->errorResilient && !pProbSetup->FrameParallelDecoding) - { - adaptCoefProbs(pProbSetup); //vp9_adapt_coef_probs - if(!pProbSetup->keyFrame && !pProbSetup->intraOnly) - { - adaptModeProbs(pProbSetup); //vp9_adapt_mode_probs - adaptModeContext(pProbSetup); - adaptNmvProbs(pProbSetup); //vp9_adapt_mv_probs - } + // Allocate a buffer for the current picture + if (m_pCurrPic == nullptr) { + m_pClient->AllocPictureBuffer(&m_pCurrPic); + assert(m_pCurrPic); + + m_pCurrPic->decodeWidth = width; + m_pCurrPic->decodeHeight = height; } - //vp9hwdStoreProbs - if (pProbSetup->RefreshEntropyProbs) - { - memcpy(&m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(m_EntropyLast[pProbSetup->frameContextIdx])); + + pnvpd->PicWidthInMbs = nvsi.nCodedWidth >> 4; + pnvpd->FrameHeightInMbs = nvsi.nCodedHeight >> 4; + pnvpd->pCurrPic = m_pCurrPic; + pnvpd->progressive_frame = 1; + pnvpd->ref_pic_flag = 1; + pnvpd->intra_pic_flag = pPicDataVP9->FrameIsIntra; + pnvpd->chroma_format = pPicDataVP9->ChromaFormat; + + // Reference slots information + for (int i = 0; i < STD_VIDEO_VP9_NUM_REF_FRAMES; i++) { + vkPicBuffBase* pb = reinterpret_cast(m_pBuffers[i].buffer); + pPicDataVP9->pic_idx[i] = pb ? pb->m_picIdx : -1; } - //VP9HwdUpdateRefs + + return true; } diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp index 135af873..83b968ef 100644 --- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp +++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp @@ -20,9 +20,6 @@ #include "nvVulkanVideoUtils.h" #include "nvVulkanVideoParser.h" #include -#ifdef ENABLE_VP9_DECODER -#include -#endif VulkanVideoDecoder::VulkanVideoDecoder(VkVideoCodecOperationFlagBitsKHR std) : m_refCount(0) @@ -646,6 +643,7 @@ void VulkanVideoDecoder::end_of_stream() #include "VulkanH264Decoder.h" #include "VulkanH265Decoder.h" #include "VulkanAV1Decoder.h" +#include "VulkanVP9Decoder.h" static nvParserLogFuncType gParserLogFunc = nullptr; static int gLogLevel = 0; @@ -739,12 +737,17 @@ VkResult CreateVulkanVideoDecodeParser(VkVideoCodecOperationFlagBitsKHR videoCod } nvVideoDecodeParser = VkSharedBaseObj(new VulkanAV1Decoder(videoCodecOperation, pParserPictureData->isAnnexB)); break; -#ifdef ENABLE_VP9_DECODER case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: - // TODO: This will not work and is only here as a placeholder to get the compiler to include and link the class. + if ((pStdExtensionVersion == nullptr) || + (0 != strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME)) || + (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) { + nvParserErrorLog("The requested decoder VP9 Codec STD version is NOT supported\n"); + nvParserErrorLog("The supported decoder VP9 Codec STD version is verion %d of %s\n", + VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME); + return VK_ERROR_INCOMPATIBLE_DRIVER; + } nvVideoDecodeParser = VkSharedBaseObj(new VulkanVP9Decoder(videoCodecOperation)); break; -#endif default: nvParserErrorLog("Unsupported codec type!!!\n"); } diff --git a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp index 37888fcc..c93a5141 100644 --- a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp +++ b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp @@ -140,6 +140,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer { bsf = av_bsf_get_by_name("hevc_mp4toannexb"); } else if (videoCodec == AV_CODEC_ID_AV1) { bsf = av_bsf_get_by_name("av1_metadata"); + } else if (videoCodec == AV_CODEC_ID_VP9) { + bsf = av_bsf_get_by_name("vp9_metadata"); } if (!bsf) { @@ -286,6 +288,10 @@ class FFmpegDemuxer : public VideoStreamDemuxer { videoCodecId = AV_CODEC_ID_H264; } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) { videoCodecId = AV_CODEC_ID_HEVC; + } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + videoCodecId = AV_CODEC_ID_AV1; + } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + videoCodecId = AV_CODEC_ID_VP9; } } @@ -307,12 +313,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer { case AV_CODEC_ID_H264 : return VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR; case AV_CODEC_ID_HEVC : return VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR; case AV_CODEC_ID_VP8 : assert(false); return VkVideoCodecOperationFlagBitsKHR(0); - #ifdef VK_EXT_video_decode_vp9 case AV_CODEC_ID_VP9 : return VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; - #endif // VK_EXT_video_decode_vp9 - #ifdef vulkan_video_codec_av1std_decode case AV_CODEC_ID_AV1 : return VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR; - #endif case AV_CODEC_ID_MJPEG : assert(false); return VkVideoCodecOperationFlagBitsKHR(0); default : assert(false); return VkVideoCodecOperationFlagBitsKHR(0); } @@ -365,6 +367,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer { case AV_PIX_FMT_YUVJ420P: ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting color_range case AV_PIX_FMT_YUV420P: ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples) case AV_PIX_FMT_YUV420P10LE: ///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian + case AV_PIX_FMT_YUV420P12LE: ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian case AV_PIX_FMT_YUV420P16LE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian case AV_PIX_FMT_YUV420P16BE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; @@ -391,7 +394,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer { virtual uint32_t GetProfileIdc() const { - switch (FFmpegToVkCodecOperation(videoCodec)) { + switch ((uint32_t)FFmpegToVkCodecOperation(videoCodec)) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { switch(profile) { @@ -431,6 +434,19 @@ class FFmpegDemuxer : public VideoStreamDemuxer { } } break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + { + switch(profile) { + case STD_VIDEO_VP9_PROFILE_0: + case STD_VIDEO_VP9_PROFILE_1: + case STD_VIDEO_VP9_PROFILE_2: + case STD_VIDEO_VP9_PROFILE_3: + break; + default: + std::cerr << "\nInvalid VP9 profile: " << profile << std::endl; + } + } + break; default: std::cerr << "\nInvalid codec type: " << FFmpegToVkCodecOperation(videoCodec) << std::endl; } diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp index 630232e7..57a2c367 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp @@ -39,12 +39,8 @@ const char* VkVideoDecoder::GetVideoCodecString(VkVideoCodecOperationFlagBitsKHR { VK_VIDEO_CODEC_OPERATION_NONE_KHR, "None" }, { VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, "AVC/H.264" }, { VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR, "H.265/HEVC" }, -#ifdef VK_EXT_video_decode_vp9 { VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR, "VP9" }, -#endif // VK_EXT_video_decode_vp9 -#ifdef vulkan_video_codec_av1std { VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR, "AV1" }, -#endif // VK_EXT_video_decode_av1 }; for (unsigned i = 0; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) { @@ -126,6 +122,7 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR + | VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR ); assert(videoCodecs != VK_VIDEO_CODEC_OPERATION_NONE_KHR); @@ -637,12 +634,12 @@ int VkVideoDecoder::CopyOptimalToLinearImage(VkCommandBuffer& commandBuffer, copyRegion[0].dstSubresource.layerCount = 1; copyRegion[1].extent.width = copyRegion[0].extent.width; if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) { - copyRegion[1].extent.width /= 2; + copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2; } copyRegion[1].extent.height = copyRegion[0].extent.height; if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) { - copyRegion[1].extent.height /= 2; + copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2; } copyRegion[1].extent.depth = 1; @@ -706,7 +703,7 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(pCurrFrameDecParams->bitstreamData->GetMaxSize() >= pCurrFrameDecParams->bitstreamDataLen); pCurrFrameDecParams->decodeFrameInfo.srcBuffer = pCurrFrameDecParams->bitstreamData->GetBuffer(); - assert(pCurrFrameDecParams->bitstreamDataOffset == 0); + //assert(pCurrFrameDecParams->bitstreamDataOffset == 0); assert(pCurrFrameDecParams->firstSliceIndex == 0); // TODO: Assert if bitstreamDataOffset is aligned to VkVideoCapabilitiesKHR::minBitstreamBufferOffsetAlignment pCurrFrameDecParams->decodeFrameInfo.srcBufferOffset = pCurrFrameDecParams->bitstreamDataOffset; @@ -774,7 +771,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } pCurrFrameDecParams->dpbSetupPictureResource.codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode. - pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent; + // Setup picture may have different resolution compared to previous frames in VP9 + // So, set the codedExtent earlier in VP9 specific code and skip it here. + // TODO: Do the same for other codedcs + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent; + } if (dpbSetupPictureResourceInfo.currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) { imageBarriers[numDpbBarriers] = dpbBarrierTemplates[0]; @@ -816,7 +818,14 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } pOutputPictureResource->codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode. - pOutputPictureResource->codedExtent = m_codedExtent; + // Setup picture may have different resolution compared to previous frames in VP9 + // So, set the codedExtent earlier in VP9 specific code and skip it here. + // TODO: Do the same for other codedcs + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pOutputPictureResource->codedExtent = m_codedExtent; + } else { + pOutputPictureResource->codedExtent = pCurrFrameDecParams->dpbSetupPictureResource.codedExtent; + } // For Output Distinct transition the image to DECODE_DST if (pOutputPictureResourceInfo->currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) { @@ -909,9 +918,11 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters if (pictureResourcesInfo[resId].image != VK_NULL_HANDLE) { - // FIXME: m_codedExtent should have already be populated in in the + // FIXME: m_codedExtent should have already be populated in the // picture resource above from the FB. - pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent; + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent; + } // FIXME: This parameter must to be adjusted based on the interlaced mode. pCurrFrameDecParams->pictureResources[resId].codedOffset = { 0, 0 }; } @@ -958,7 +969,9 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters frameSynchronizationInfo.imageSpecsIndex = m_imageSpecsIndex; VkSharedBaseObj currentVkPictureParameters; - if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1 + if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + decodeBeginInfo.videoSessionParameters = VK_NULL_HANDLE; + } else if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1 bool valid = pCurrFrameDecParams->pStdSps->GetClientObject(currentVkPictureParameters); assert(valid); diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp index 129ec34a..d1646934 100644 --- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp +++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp @@ -173,6 +173,30 @@ struct nvVideoAV1PicParameters { nvVideoDecodeAV1DpbSlotInfo dpbRefList[nvVideoDecodeAV1DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1]; }; + +struct nvVideoDecodeVP9DpbSlotInfo +{ + enum { + // Number of reference frame types (including intra type) + TOTAL_REFS_PER_FRAME = 8, + }; + VkExtent2D codedExtent{}; + + void Invalidate() { memset(this, 0x00, sizeof(*this)); } + + // Set the STD data here for VP9. + +}; + +struct nvVideoVP9PicParameters { + StdVideoDecodeVP9PictureInfo stdPictureInfo; + StdVideoVP9ColorConfig stdColorConfig; + StdVideoVP9Segmentation stdSegment; + StdVideoVP9LoopFilter stdLoopFilter; + VkVideoDecodeVP9PictureInfoKHR vkPictureInfo{ VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR, nullptr, &stdPictureInfo }; + nvVideoDecodeVP9DpbSlotInfo dpbRefList[nvVideoDecodeVP9DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1]; +}; + static vkPicBuffBase* GetPic(VkPicIf* pPicBuf) { return (vkPicBuffBase*)pPicBuf; @@ -550,9 +574,9 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, // Vulkan Video parser.cpp -- maintains its own indices. // We can use more indices in the parser than the spec. (Ther eis a max of 8 but we can use 16) // Reason for single structure for DPB -- the array is passed in the callback (in the proxy of the processor) - // It checks which references are in use. + // It checks which references are in use. // 2nd Finds which DPB references were assigned before - and reuses indices. - // The local array maintains the + // The local array maintains the pRefPicInfo->flags.disable_frame_end_update_cdf = ; pRefPicInfo->flags.segmentation_enabled = ; pRefPicInfo->base_q_idx = ; @@ -574,6 +598,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, } } + void setVP9PictureData(nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo, + VkVideoReferenceSlotInfoKHR* pReferenceSlots, + uint32_t dpbEntryIdx, uint32_t dpbSlotIndex) + { + // TODO: VP9 dpb management + assert(0); + } + } dpbH264Entry; virtual int32_t AddRef(); @@ -685,6 +717,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, VkVideoReferenceSlotInfoKHR* pReferenceSlots, int8_t* pGopReferenceImagesIndexes, int32_t* pCurrAllocatedSlotIndex); + uint32_t FillDpbVP9State(const VkParserPictureData* pd, + VkParserVp9PictureData* pin, + nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo, + StdVideoDecodeVP9PictureInfo* pStdPictureInfo, + uint32_t maxRefPictures, + VkVideoReferenceSlotInfoKHR* pReferenceSlots, + int8_t* pGopReferenceImagesIndexes, + int32_t* pCurrAllocatedSlotIndex); int8_t AllocateDpbSlotForCurrentH264( vkPicBuffBase* pPic, StdVideoDecodeH264PictureInfoFlags currPicFlags, @@ -693,7 +733,8 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, int8_t presetDpbSlot); int8_t AllocateDpbSlotForCurrentAV1(vkPicBuffBase* pPic, bool isReference, int8_t presetDpbSlot); - + int8_t AllocateDpbSlotForCurrentVP9(vkPicBuffBase* pPic, bool isReference, + int8_t presetDpbSlot); protected: VkSharedBaseObj m_vkParser; @@ -944,6 +985,7 @@ VkResult VulkanVideoParser::Initialize( static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION }; static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION }; static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION }; + static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION }; const VkExtensionProperties* pStdExtensionVersion = NULL; if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { @@ -952,6 +994,8 @@ VkResult VulkanVideoParser::Initialize( pStdExtensionVersion = &h265StdExtensionVersion; } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { pStdExtensionVersion = &av1StdExtensionVersion; + } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pStdExtensionVersion = &vp9StdExtensionVersion; } else { assert(!"Unsupported codec type"); return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR; @@ -1098,6 +1142,14 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi) if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { maxDpbSlots = 9; + if ((pnvsi->nCodedWidth <= m_nvsi.nCodedWidth) && (pnvsi->nCodedHeight <= m_nvsi.nCodedHeight)) { + return 1; + } + } else if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + maxDpbSlots = 9; + if ((pnvsi->nMaxWidth <= m_nvsi.nMaxWidth) && (pnvsi->nMaxHeight <= m_nvsi.nMaxHeight)) { + return 1; + } } uint32_t configDpbSlots = (pnvsi->nMinNumDpbSlots > 0) ? pnvsi->nMinNumDpbSlots : maxDpbSlots; @@ -1120,8 +1172,8 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi) } m_nvsi = *pnvsi; - m_nvsi.nMaxWidth = pnvsi->nCodedWidth; - m_nvsi.nMaxHeight = pnvsi->nCodedHeight; + m_nvsi.nMaxWidth = pnvsi->nMaxWidth; + m_nvsi.nMaxHeight = pnvsi->nMaxHeight; m_maxNumDecodeSurfaces = pnvsi->nMinNumDecodeSurfaces; @@ -1814,7 +1866,7 @@ uint32_t VulkanVideoParser::FillDpbAV1State( uint8_t yellowSquare[] = { 0xf0, 0x9f, 0x9f, 0xa8, 0x00 }; printf("\nSlotsInUse: "); for (int i = 0; i < 9; i++) { - printf("%-2s ", (slotsInUse & (1<ref_frame_idx[i]); + } + printf("\n"); + + printf("m_pictureToDpbSlotMap: "); + for (int i = 0; i < MAX_FRM_CNT; i++) { + printf("%02d ", i); + } + printf("\nm_pictureToDpbSlotMap: "); + for (int i = 0; i < MAX_FRM_CNT; i++) { + printf("%02d ", m_pictureToDpbSlotMap[i]); + } + printf("\n"); + + printf("ref_frame_picture: "); + for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) { + printf("%02d ", inIdx); + } + printf("\nref_frame_picture: "); + for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) { + int8_t picIdx = pin->pic_idx[inIdx]; + printf("%02d ", picIdx); + } + printf("\n"); + } + + bool isKeyFrame = (pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY); + + // It doesn't look like this tracking is needed. + int8_t activeReferences[32]; + memset(activeReferences, 0, sizeof(activeReferences)); + for (size_t refName = 0; refName < STD_VIDEO_VP9_REFS_PER_FRAME; refName++) { + int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[refName]]; + if (picIdx < 0) { + //pKhr->referenceNameSlotIndices[refName] = -1; + continue; + } + int8_t dpbSlot = GetPicDpbSlot(picIdx); + assert(dpbSlot >= 0); + //pKhr->referenceNameSlotIndices[refName] = dpbSlot; + activeReferences[dpbSlot]++; + //hdr.delta_frame_id_minus_1[dpbSlot] = pin->delta_frame_id_minus_1[pin->ref_frame_idx[i]]; + } + + for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) { + int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[inIdx]; + int8_t dpbSlot = -1; + if ((picIdx >= 0) && !(refDpbUsedAndValidMask & (1 << picIdx))) { + dpbSlot = GetPicDpbSlot(picIdx); + + assert(dpbSlot >= 0); // There is still content hitting this assert. + if (dpbSlot < 0) { + continue; + } + + refDpbUsedAndValidMask |= (1 << picIdx); + m_dpb[dpbSlot].MarkInUse(m_nCurrentPictureID); + if (activeReferences[dpbSlot] == 0) { + continue; + } + + pReferenceSlots[referenceIndex].sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR; + pReferenceSlots[referenceIndex].pNext = nullptr; + pReferenceSlots[referenceIndex].slotIndex = dpbSlot; + pGopReferenceImagesIndexes[referenceIndex] = picIdx; + + VkExtent2D &codedExtent = pDpbSlotInfo[referenceIndex].codedExtent; + codedExtent.width = m_dpb[dpbSlot].getPictureResource()->decodeWidth; + codedExtent.height = m_dpb[dpbSlot].getPictureResource()->decodeHeight; + + referenceIndex++; + } + } + + if (m_dumpDpbData) { + printf(";;; pReferenceSlots (%d): ", referenceIndex); + for (size_t i =0 ;i < referenceIndex; i++) { + printf("%02d ", pReferenceSlots[i].slotIndex); + } + printf("\n"); + } + + ResetPicDpbSlots(refDpbUsedAndValidMask); + + // Take into account the reference picture now. + int8_t currPicIdx = GetPicIdx(pd->pCurrPic); + assert(currPicIdx >= 0); + if (currPicIdx >= 0) { + refDpbUsedAndValidMask |= (1 << currPicIdx); + } + + // NOTE(charlie): Most likely we can consider isReference = refresh_frame_flags != 0; + // However, the AMD fw interface appears to always need a setup slot & a destination resource, + // so it's not clear what to properly do in that case. + int8_t dpbSlot = AllocateDpbSlotForCurrentAV1(GetPic(pd->pCurrPic), + true /* isReference */, pd->current_dpb_id); + + assert(dpbSlot >= 0); + + *pCurrAllocatedSlotIndex = dpbSlot; + assert(!(dpbSlot < 0)); + if (dpbSlot >= 0) { + assert(pd->ref_pic_flag); + } + + if (m_dumpDpbData) { + printf("SlotsInUse: "); + uint32_t slotsInUse = m_dpb.getSlotInUseMask(); + for (int i = 0; i < 9; i++) { + printf("%02d ", i); + } + uint8_t greenSquare[] = { 0xf0, 0x9f, 0x9f, 0xa9, 0x00 }; + uint8_t redSquare[] = { 0xf0, 0x9f, 0x9f, 0xa5, 0x00 }; + uint8_t yellowSquare[] = { 0xf0, 0x9f, 0x9f, 0xa8, 0x00 }; + printf("\nSlotsInUse: "); + for (int i = 0; i < 9; i++) { + printf("%-2s ", (slotsInUse & (1<= 0); SetPicDpbSlot(currPicIdx, dpbSlot); // Assign the dpbSlot to the current picture index. m_dpb[dpbSlot].setPictureResource(pPic, m_nCurrentPictureID); // m_nCurrentPictureID is our main index. @@ -1942,6 +2148,7 @@ bool VulkanVideoParser::DecodePicture( nvVideoH264PicParameters h264; nvVideoH265PicParameters hevc; nvVideoAV1PicParameters av1; + nvVideoVP9PicParameters vp9; // }; if (m_decoderHandler == NULL) { @@ -2245,7 +2452,7 @@ bool VulkanVideoParser::DecodePicture( } nvVideoDecodeAV1DpbSlotInfo* dpbSlotsAv1 = av1.dpbRefList; - pCurrFrameDecParams->numGopReferenceSlots = + pCurrFrameDecParams->numGopReferenceSlots = FillDpbAV1State(pd, pin, dpbSlotsAv1, @@ -2317,10 +2524,98 @@ bool VulkanVideoParser::DecodePicture( pin->tileInfo.pMiRowStarts = pin->MiRowStarts; pDecodePictureInfo->flags.applyFilmGrain = pin->std_info.flags.apply_grain; + + } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + + VkParserVp9PictureData* pin = &pd->CodecSpecific.vp9; + + vp9 = nvVideoVP9PicParameters(); + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &vp9.stdPictureInfo; + VkVideoDecodeVP9PictureInfoKHR* pVkPicInfo = &vp9.vkPictureInfo; + nvVideoDecodeVP9DpbSlotInfo* pNvDpbSlotInfo = vp9.dpbRefList; + + // Copy std data and link pointers + memcpy(pStdPicInfo, &pin->stdPictureInfo, sizeof(StdVideoDecodeVP9PictureInfo)); + memcpy(&vp9.stdColorConfig, &pin->stdColorConfig, sizeof(StdVideoVP9ColorConfig)); + pStdPicInfo->pColorConfig = &vp9.stdColorConfig; + if (pStdPicInfo->flags.segmentation_enabled == 1) { + memcpy(&vp9.stdSegment, &pin->stdSegmentation, sizeof(StdVideoVP9Segmentation)); + pStdPicInfo->pSegmentation = &vp9.stdSegment; + } + memcpy(&vp9.stdLoopFilter, &pin->stdLoopFilter, sizeof(StdVideoVP9LoopFilter)); + pStdPicInfo->pLoopFilter = &vp9.stdLoopFilter; + + pVkPicInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR; + pVkPicInfo->pStdPictureInfo = pStdPicInfo; + + VkVideoDecodeInfoKHR* pKhrDecodeInfo = &pCurrFrameDecParams->decodeFrameInfo; + pKhrDecodeInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR; + pKhrDecodeInfo->pNext = pVkPicInfo; + + // dpb slots + pCurrFrameDecParams->numGopReferenceSlots = FillDpbVP9State(pd, + pin, + pNvDpbSlotInfo, + pStdPicInfo, + 9, + referenceSlots, + pCurrFrameDecParams->pGopReferenceImagesIndexes, + &setupReferenceSlot.slotIndex); + + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth; + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight; + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + } + + if (pCurrFrameDecParams->numGopReferenceSlots) { + assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); + for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; + dpbEntryIdx++) { + pCurrFrameDecParams->pictureResources[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR; + pCurrFrameDecParams->pictureResources[dpbEntryIdx].codedExtent = pNvDpbSlotInfo[dpbEntryIdx].codedExtent; + referenceSlots[dpbEntryIdx].pPictureResource = &pCurrFrameDecParams->pictureResources[dpbEntryIdx]; + } + + pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = referenceSlots; + pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = pCurrFrameDecParams->numGopReferenceSlots; + } else { + pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = nullptr; + pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; + } + + // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface. + //pDecodePictureInfo->videoFrameType = static_cast(pin->frame_type); + pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan? + + bool isKeyFrame = pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY; + for (size_t i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { + int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[i]]; + if (picIdx < 0) { + pVkPicInfo->referenceNameSlotIndices[i] = -1; + continue; + } + + int8_t dpbSlot = GetPicDpbSlot(picIdx); + assert(dpbSlot >= 0); + pVkPicInfo->referenceNameSlotIndices[i] = dpbSlot; + } + + pVkPicInfo->uncompressedHeaderOffset = pin->uncompressedHeaderOffset; + pVkPicInfo->compressedHeaderOffset = pin->compressedHeaderOffset; + pVkPicInfo->tilesOffset = pin->tilesOffset; + + // Use current frames with and height for display and writing to output + pDecodePictureInfo->displayWidth = pin->FrameWidth; + pDecodePictureInfo->displayHeight = pin->FrameHeight; } - pDecodePictureInfo->displayWidth = m_nvsi.nDisplayWidth; - pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight; + if (m_codecType != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pDecodePictureInfo->displayWidth = m_nvsi.nDisplayWidth; + pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight; + } bRet = (m_decoderHandler->DecodePictureWithParameters(pCurrFrameDecParams, pDecodePictureInfo) >= 0); @@ -2405,6 +2700,11 @@ VkResult vulkanCreateVideoParser( assert(!"Decoder AV1 Codec version is NOT supported"); return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR; } + } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + if (!pStdExtensionVersion || strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME) || (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) { + assert(!"Decoder VP9 Codec version is NOT supported"); + return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR; + } } else { assert(!"Decoder Codec is NOT supported"); return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR; diff --git a/vk_video_decoder/src/vulkan_video_decoder.cpp b/vk_video_decoder/src/vulkan_video_decoder.cpp index e26115cd..a0018a46 100644 --- a/vk_video_decoder/src/vulkan_video_decoder.cpp +++ b/vk_video_decoder/src/vulkan_video_decoder.cpp @@ -155,16 +155,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR; - VkQueueFlags requestVideoEncodeQueueMask = 0; - if (m_decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - } - if (m_decoderConfig.selectVideoWithComputeQueue) { requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (m_decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -172,16 +164,7 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT; } - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs | - (m_decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR); + VkVideoCodecOperationFlagsKHR videoCodecOperation = videoStreamDemuxer->GetVideoCodec(); const bool supportsShellPresent = ((!m_decoderConfig.noPresent == false) && (pWsiDisplay != nullptr)); const bool createGraphicsQueue = supportsShellPresent ? true : false; @@ -196,17 +179,12 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, ( VK_QUEUE_TRANSFER_BIT | requestGraphicsQueueMask | requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoDecodeQueueMask), pWsiDisplay, requestVideoDecodeQueueMask, - ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), - requestVideoEncodeQueueMask, - ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR), + videoCodecOperation, + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, vkPhysicalDevice); if (result != VK_SUCCESS) { @@ -216,8 +194,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, } m_vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - m_decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues - videoCodecs, + 0, // num encode queues + videoCodecOperation, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. @@ -264,6 +242,7 @@ VkResult CreateVulkanVideoDecoder(VkInstance vkInstance, VkPhysicalDevice vkPhys case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: { } diff --git a/vk_video_decoder/test/vulkan-video-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-dec/Main.cpp index 2c5d4d0a..5a02d1b3 100644 --- a/vk_video_decoder/test/vulkan-video-dec/Main.cpp +++ b/vk_video_decoder/test/vulkan-video-dec/Main.cpp @@ -60,6 +60,20 @@ int main(int argc, const char** argv) return -1; } + + VkSharedBaseObj videoStreamDemuxer; + result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), + decoderConfig.forceParserType, + decoderConfig.enableStreamDemuxing, + decoderConfig.initialWidth, + decoderConfig.initialHeight, + decoderConfig.initialBitdepth, + videoStreamDemuxer); + if (result != VK_SUCCESS) { + assert(!"Can't initialize the VideoStreamDemuxer!"); + return result; + } + const int32_t numDecodeQueues = ((decoderConfig.queueId != 0) || (decoderConfig.enableHwLoadBalancing != 0)) ? -1 : // all available HW decoders @@ -67,16 +81,8 @@ int main(int argc, const char** argv) VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR; - VkQueueFlags requestVideoEncodeQueueMask = 0; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - } - if (decoderConfig.selectVideoWithComputeQueue) { requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -84,16 +90,9 @@ int main(int argc, const char** argv) requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT; } - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs | - (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR); + VkVideoCodecOperationFlagsKHR videoCodec = decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR ? + decoderConfig.forceParserType : + videoStreamDemuxer->GetVideoCodec(); if (!decoderConfig.noPresent) { @@ -111,17 +110,12 @@ int main(int argc, const char** argv) result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoDecodeQueueMask), displayShell, requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), - requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + videoCodec, + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); return -1; @@ -130,27 +124,14 @@ int main(int argc, const char** argv) vkDevCtxt.GetPresentQueueFamilyIdx())); vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues - videoCodecs, + 0, // num encode queues + videoCodec, false, // createTransferQueue true, // createGraphicsQueue true, // createDisplayQueue requestVideoComputeQueueMask != 0 // createComputeQueue ); - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } VkSharedBaseObj frameToFile; if (!decoderConfig.outputFileName.empty()) { @@ -194,8 +175,7 @@ int main(int argc, const char** argv) result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_TRANSFER_BIT | requestVideoDecodeQueueMask | - requestVideoComputeQueueMask | - requestVideoEncodeQueueMask), + requestVideoComputeQueueMask), nullptr, requestVideoDecodeQueueMask); if (result != VK_SUCCESS) { @@ -205,7 +185,7 @@ int main(int argc, const char** argv) result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues, 0, // num encode queues - videoCodecs, + videoCodec, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. @@ -219,20 +199,6 @@ int main(int argc, const char** argv) return -1; } - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } - VkSharedBaseObj frameToFile; if (!decoderConfig.outputFileName.empty()) { const char* crcOutputFile = decoderConfig.outputcrcPerFrame ? decoderConfig.crcOutputFileName.c_str() : nullptr; diff --git a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp index 8b4e845b..56bba249 100644 --- a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp +++ b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp @@ -114,6 +114,8 @@ int main(int argc, const char** argv) break; case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + break; default: std::cout << "Simple decoder does not support demuxing " << "and the decoder type must be set with --codec " diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp index 259260f5..1a589fe0 100644 --- a/vk_video_encoder/demos/vk-video-enc/Main.cpp +++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp @@ -52,7 +52,6 @@ int main(int argc, char** argv) VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, - VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; @@ -71,6 +70,7 @@ int main(int argc, char** argv) VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, nullptr }; @@ -123,17 +123,9 @@ int main(int argc, char** argv) VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - VkQueueFlags requestVideoDecodeQueueMask = 0; - if (encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR | - VK_QUEUE_TRANSFER_BIT; - } if (encoderConfig->selectVideoWithComputeQueue) { requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -158,19 +150,6 @@ int main(int argc, char** argv) return -1; } - - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs | - encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR; - - VkSharedBaseObj encoder; // the encoder's instance if (supportsDisplay && encoderConfig->enableFramePresent) { @@ -186,14 +165,11 @@ int main(int argc, char** argv) result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(), (VK_QUEUE_GRAPHICS_BIT | - requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoComputeQueueMask | + requestVideoEncodeQueueMask), displayShell, - requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, requestVideoEncodeQueueMask, (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | @@ -206,9 +182,9 @@ int main(int argc, char** argv) assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(), vkDevCtxt.GetPresentQueueFamilyIdx())); - result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues + result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues numEncodeQueues, // num encode queues - videoCodecs, + encoderConfig->codec, false, // createTransferQueue true, // createGraphicsQueue true, // createDisplayQueue @@ -241,26 +217,22 @@ int main(int argc, char** argv) // No display presentation and no decoder - just the encoder result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(), (requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | requestVideoEncodeQueueMask | VK_QUEUE_TRANSFER_BIT), nullptr, - requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR), + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + encoderConfig->codec); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); return -1; } - result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues + result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues numEncodeQueues, // num encode queues - videoCodecs, + encoderConfig->codec, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h index c7de5367..e826064a 100644 --- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h +++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h @@ -762,7 +762,6 @@ struct EncoderConfig : public VkVideoRefCountBase { uint32_t verboseMsg : 1; uint32_t enableFramePresent : 1; uint32_t enableFrameDirectModePresent : 1; - uint32_t enableVideoDecoder : 1; uint32_t enableHwLoadBalancing : 1; uint32_t selectVideoWithComputeQueue : 1; uint32_t enablePreprocessComputeFilter : 1; @@ -857,7 +856,6 @@ struct EncoderConfig : public VkVideoRefCountBase { , verboseMsg(false) , enableFramePresent(false) , enableFrameDirectModePresent(false) - , enableVideoDecoder(false) , enableHwLoadBalancing(false) , selectVideoWithComputeQueue(false) , enablePreprocessComputeFilter(true) diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp index 196caf3a..803d9da6 100644 --- a/vk_video_encoder/src/vulkan_video_encoder.cpp +++ b/vk_video_encoder/src/vulkan_video_encoder.cpp @@ -115,6 +115,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, nullptr }; @@ -141,17 +142,8 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - VkQueueFlags requestVideoDecodeQueueMask = 0; - if (m_encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR | - VK_QUEUE_TRANSFER_BIT; - } - if (m_encoderConfig->selectVideoWithComputeQueue) { requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (m_encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -162,17 +154,13 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid // No display presentation and no decoder - just the encoder result = m_vkDevCtxt.InitPhysicalDevice(m_encoderConfig->deviceId, m_encoderConfig->GetDeviceUUID(), ( requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | requestVideoEncodeQueueMask | VK_QUEUE_TRANSFER_BIT), nullptr, - requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR), + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + videoCodecOperation); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); @@ -184,21 +172,9 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid -1 : // all available HW encoders 1; // only one HW encoder instance - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs | - (m_encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (uint32_t)VK_VIDEO_CODEC_OPERATION_NONE_KHR); - - - result = m_vkDevCtxt.CreateVulkanDevice(m_encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues + result = m_vkDevCtxt.CreateVulkanDevice(0, // num decode queues numEncodeQueues, // num encode queues - videoCodecs, + videoCodecOperation, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. From 321f336862d1d29beda15d2e7f89f10c1456eda9 Mon Sep 17 00:00:00 2001 From: Raju Konda Date: Mon, 28 Apr 2025 00:00:58 -0700 Subject: [PATCH 7/7] decode: Include pSetupReferenceSlot in the VkVideoBeginCodingInfoKHR Include pSetupReferenceSlot in the VkVideoBeginCodingInfoKHR::pReferenceSlots list when it is not null. --- .../libs/VkVideoDecoder/VkVideoDecoder.cpp | 8 ++- .../libs/VkVideoParser/VulkanVideoParser.cpp | 66 ++++++++++++------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp index 57a2c367..6bff5ce5 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp @@ -937,8 +937,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } } - decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount; - decodeBeginInfo.pReferenceSlots = pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots; + // Add setup reference slot details to decodeBeginInfo + decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount + + (pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot ? 1 : 0); + decodeBeginInfo.pReferenceSlots = (pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount > 0) ? + pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots : + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot; m_imageSpecsIndex.displayOut = ((m_dpbAndOutputCoincide == VK_TRUE) && !(pDecodePictureInfo->flags.applyFilmGrain == VK_TRUE)) ? diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp index d1646934..bbb68e66 100644 --- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp +++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp @@ -2279,11 +2279,6 @@ bool VulkanVideoParser::DecodePicture( h264.stdPictureInfo.flags, &setupReferenceSlot.slotIndex); // TODO: Remove it is for debugging only. Reserved fields must be set to "0". pout->stdPictureInfo.reserved1 = pCurrFrameDecParams->numGopReferenceSlots; - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; @@ -2299,6 +2294,15 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = NULL; pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) { @@ -2388,11 +2392,6 @@ bool VulkanVideoParser::DecodePicture( referenceSlots, pCurrFrameDecParams->pGopReferenceImagesIndexes, &setupReferenceSlot.slotIndex); - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); @@ -2410,6 +2409,16 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } + if (m_dumpParserData) { for (int32_t i = 0; i < HEVC_MAX_DPB_SLOTS; i++) { std::cout << "\tdpbIndex: " << i; @@ -2462,12 +2471,6 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->pGopReferenceImagesIndexes, &setupReferenceSlot.slotIndex); - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } - if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; @@ -2483,6 +2486,17 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } + // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface. //pDecodePictureInfo->videoFrameType = static_cast(pin->frame_type); pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan? @@ -2562,14 +2576,6 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->pGopReferenceImagesIndexes, &setupReferenceSlot.slotIndex); - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth; - pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight; - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } - if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; @@ -2586,6 +2592,18 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth; + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight; + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } + // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface. //pDecodePictureInfo->videoFrameType = static_cast(pin->frame_type); pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan?