diff --git a/common/include/VkVideoCore/DecodeFrameBufferIf.h b/common/include/VkVideoCore/DecodeFrameBufferIf.h index 60393c38..fc99a4f5 100644 --- a/common/include/VkVideoCore/DecodeFrameBufferIf.h +++ b/common/include/VkVideoCore/DecodeFrameBufferIf.h @@ -107,6 +107,16 @@ class DecodeFrameBufferIf } }; + enum SemSyncTypeIdx : uint64_t { SEM_SYNC_TYPE_IDX_DECODE = (1ULL << 0), // Decode operation was signaled + SEM_SYNC_TYPE_IDX_DISPLAY = (1ULL << 0), // Display operation was signaled + SEM_SYNC_TYPE_IDX_FILTER = (1ULL << 1), // Filter operation was signaled + SEM_SYNC_TYPE_IDX_SHIFT = 2, // Shift semaphore counter value left + }; + + static uint64_t GetSemaphoreValue(SemSyncTypeIdx semSyncType, uint64_t semOrder) { + return (semOrder << SEM_SYNC_TYPE_IDX_SHIFT) | semSyncType; + } + }; #endif /* _VKVIDEOCORE_DECODEFRAMEBUFFERIF_H_ */ diff --git a/common/include/VkVideoCore/VkVideoCoreProfile.h b/common/include/VkVideoCore/VkVideoCoreProfile.h index 7483d8ce..55ea56e7 100644 --- a/common/include/VkVideoCore/VkVideoCoreProfile.h +++ b/common/include/VkVideoCore/VkVideoCoreProfile.h @@ -50,7 +50,8 @@ class VkVideoCoreProfile { return (videoCodecOperations & (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); @@ -100,12 +101,26 @@ class VkVideoCoreProfile m_av1DecodeProfile = *pProfileExt; } else { // Use default ext profile parameters - m_av1DecodeProfile.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + m_av1DecodeProfile.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR; m_av1DecodeProfile.stdProfile = STD_VIDEO_AV1_PROFILE_MAIN; } m_profile.pNext = &m_av1DecodeProfile; m_av1DecodeProfile.pNext = NULL; - + } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + VkVideoDecodeVP9ProfileInfoKHR const * pProfileExt = (VkVideoDecodeVP9ProfileInfoKHR const *)pVideoProfileExt; + if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR)) { + m_profile.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + return false; + } + if (pProfileExt) { + m_vp9DecodeProfile = *pProfileExt; + } else { + // Use default ext profile parameters + m_vp9DecodeProfile.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR; + m_vp9DecodeProfile.stdProfile = STD_VIDEO_VP9_PROFILE_0; + } + m_profile.pNext = &m_vp9DecodeProfile; + m_vp9DecodeProfile.pNext = NULL; } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) { VkVideoEncodeH264ProfileInfoKHR const * pProfileExt = (VkVideoEncodeH264ProfileInfoKHR const *)pVideoProfileExt; if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR)) { @@ -205,6 +220,7 @@ class VkVideoCoreProfile VkVideoDecodeH264ProfileInfoKHR decodeH264ProfilesRequest; VkVideoDecodeH265ProfileInfoKHR decodeH265ProfilesRequest; VkVideoDecodeAV1ProfileInfoKHR decodeAV1ProfilesRequest; + VkVideoDecodeVP9ProfileInfoKHR decodeVP9ProfilesRequest; VkVideoEncodeH264ProfileInfoKHR encodeH264ProfilesRequest; VkVideoEncodeH265ProfileInfoKHR encodeH265ProfilesRequest; VkVideoEncodeAV1ProfileInfoKHR encodeAV1ProfilesRequest; @@ -243,6 +259,13 @@ class VkVideoCoreProfile STD_VIDEO_H265_PROFILE_IDC_INVALID : (StdVideoH265ProfileIdc)videoH26xProfileIdc; pVideoProfileExt = (VkBaseInStructure*)&decodeH265ProfilesRequest; + } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + decodeVP9ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR; + decodeVP9ProfilesRequest.pNext = NULL; + decodeVP9ProfilesRequest.stdProfile = (videoH26xProfileIdc == 0) ? + STD_VIDEO_VP9_PROFILE_0 : + (StdVideoVP9Profile)videoH26xProfileIdc; + pVideoProfileExt = (VkBaseInStructure*)&decodeVP9ProfilesRequest; } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) { encodeH264ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR; encodeH264ProfilesRequest.pNext = pEncodeUsageInfo; @@ -287,7 +310,9 @@ class VkVideoCoreProfile bool IsDecodeCodecType() const { return ((m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) || - (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR)); + (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) || + (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) || + (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR)); } operator bool() const @@ -340,6 +365,15 @@ class VkVideoCoreProfile } } + const VkVideoDecodeVP9ProfileInfoKHR* GetDecodeVP9Profile() const + { + if (m_vp9DecodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR) { + return &m_vp9DecodeProfile; + } else { + return NULL; + } + } + const VkVideoEncodeH264ProfileInfoKHR* GetEncodeH264Profile() const { if (m_h264EncodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR) { @@ -605,6 +639,8 @@ class VkVideoCoreProfile return "decode h.265"; case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: return "decode av1"; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + return "decode vp9"; case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: return "encode h.264"; case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: @@ -769,6 +805,7 @@ class VkVideoCoreProfile VkVideoDecodeH264ProfileInfoKHR m_h264DecodeProfile; VkVideoDecodeH265ProfileInfoKHR m_h265DecodeProfile; VkVideoDecodeAV1ProfileInfoKHR m_av1DecodeProfile; + VkVideoDecodeVP9ProfileInfoKHR m_vp9DecodeProfile; VkVideoEncodeH264ProfileInfoKHR m_h264EncodeProfile; VkVideoEncodeH265ProfileInfoKHR m_h265EncodeProfile; VkVideoEncodeAV1ProfileInfoKHR m_av1EncodeProfile; diff --git a/common/include/VkVideoCore/VulkanVideoCapabilities.h b/common/include/VkVideoCore/VulkanVideoCapabilities.h index b703298b..a2cc4af9 100644 --- a/common/include/VkVideoCore/VulkanVideoCapabilities.h +++ b/common/include/VkVideoCore/VulkanVideoCapabilities.h @@ -38,6 +38,7 @@ class VulkanVideoCapabilities VkVideoDecodeH264CapabilitiesKHR h264Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, nullptr }; VkVideoDecodeH265CapabilitiesKHR h265Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, nullptr }; VkVideoDecodeAV1CapabilitiesKHR av1Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR, nullptr }; + VkVideoDecodeVP9CapabilitiesKHR vp9Capabilities = { VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR, nullptr }; if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { videoDecodeCapabilities.pNext = &h264Capabilities; @@ -45,6 +46,8 @@ class VulkanVideoCapabilities videoDecodeCapabilities.pNext = &h265Capabilities; } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { videoDecodeCapabilities.pNext = &av1Capabilities; + } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + videoDecodeCapabilities.pNext = &vp9Capabilities; } else { assert(!"Unsupported codec"); return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR; @@ -197,6 +200,16 @@ class VulkanVideoCapabilities } } break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + { + assert(pVideoDecodeCapabilities->pNext); + const VkVideoDecodeVP9CapabilitiesKHR* pVP9Capabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext; + assert(pVP9Capabilities->sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR); + if (pVP9Capabilities->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR) { + return VK_ERROR_INITIALIZATION_FAILED; + } + } + break; case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: { assert(pVideoEncodeCapabilities->pNext); @@ -277,6 +290,26 @@ class VulkanVideoCapabilities assert(!"Unsupported h.265 STD version"); return VK_ERROR_INCOMPATIBLE_DRIVER; } + } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + const VkVideoDecodeAV1CapabilitiesKHR* pAV1DecCapabilities = (VkVideoDecodeAV1CapabilitiesKHR*)pVideoDecodeCapabilities->pNext; + std::cout << "\t\t\t" << "maxLevelIdc: " << pAV1DecCapabilities->maxLevel << std::endl; + if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName, + VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, + sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) || + (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION)) { + assert(!"Unsupported AV1 STD version"); + return VK_ERROR_INCOMPATIBLE_DRIVER; + } + } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + const VkVideoDecodeVP9CapabilitiesKHR* pVP9DecCapabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext; + std::cout << "\t\t\t" << "maxLevelIdc: " << pVP9DecCapabilities->maxLevel << std::endl; + if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName, + VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, + sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) || + (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) { + assert(!"Unsupported VP9 STD version"); + return VK_ERROR_INCOMPATIBLE_DRIVER; + } } else { assert(!"Unsupported codec"); } @@ -354,8 +387,12 @@ class VulkanVideoCapabilities int32_t* pVideoQueueFamily, VkQueueFlags queueFlagsRequired = ( VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR), VkVideoCodecOperationFlagsKHR videoCodeOperations = - ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | + ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)) { std::vector queues; @@ -429,6 +466,16 @@ class VulkanVideoCapabilities &videoDecodeCapabilities); } + static VkResult GetDecodeVP9Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t, + const VkVideoProfileInfoKHR& videoProfile, + VkVideoCapabilitiesKHR &videoDecodeCapabilities) + { + videoDecodeCapabilities.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + return vkDevCtx->GetPhysicalDeviceVideoCapabilitiesKHR(vkDevCtx->getPhysicalDevice(), + &videoProfile, + &videoDecodeCapabilities); + } + static VkResult GetEncodeH264Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t, const VkVideoProfileInfoKHR& videoProfile, VkVideoCapabilitiesKHR &videoEncodeCapabilities, diff --git a/common/libs/VkCodecUtils/DecoderConfig.h b/common/libs/VkCodecUtils/DecoderConfig.h index 4d06a1d5..b0f14a59 100644 --- a/common/libs/VkCodecUtils/DecoderConfig.h +++ b/common/libs/VkCodecUtils/DecoderConfig.h @@ -75,7 +75,6 @@ struct DecoderConfig { directMode = false; enableHwLoadBalancing = false; selectVideoWithComputeQueue = false; - enableVideoEncoder = false; outputy4m = false; outputcrcPerFrame = false; outputcrc = false; @@ -137,6 +136,9 @@ struct DecoderConfig { } else if (strcmp(args[0], "av1") == 0) { forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR; return true; + } else if ((strcmp(args[0], "vp9") == 0)) { + forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; + return true; } else { std::cerr << "Invalid codec \"" << args[0] << "\"" << std::endl; return false; @@ -470,7 +472,6 @@ struct DecoderConfig { uint32_t noPresent : 1; uint32_t enableHwLoadBalancing : 1; uint32_t selectVideoWithComputeQueue : 1; - uint32_t enableVideoEncoder : 1; uint32_t outputy4m : 1; uint32_t outputcrc : 1; uint32_t outputcrcPerFrame : 1; diff --git a/common/libs/VkCodecUtils/Helpers.h b/common/libs/VkCodecUtils/Helpers.h index 4218d36d..e4c70abb 100644 --- a/common/libs/VkCodecUtils/Helpers.h +++ b/common/libs/VkCodecUtils/Helpers.h @@ -238,23 +238,21 @@ inline VkResult WaitAndResetFence(const VkInterfaceFunctions* vkIf, VkDevice dev while (fenceTotalWaitTimeout >= fenceCurrentWaitTimeout) { - result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout); - if (result != VK_SUCCESS) { - fprintf(stderr, "\t **** WARNING: fence %s(%llu) is not done after %llu nSec with result 0x%x ****\n", - fenceName, (long long unsigned int)fence, (long long unsigned int)fenceWaitTimeout, result); - assert(!"Fence is not signaled yet after more than 100 mSec wait"); - } + fenceCurrentWaitTimeout += fenceWaitTimeout; - if (result != VK_TIMEOUT) { - break; + result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout); + if (result == VK_TIMEOUT) { + fprintf(stderr, "\t **** WARNING: fence %s(%llu) is not done after %llu mSec with result 0x%x ****\n", + fenceName, (long long unsigned int)fence, (long long unsigned int)fenceCurrentWaitTimeout/(1000ULL * 1000ULL), result); + } else { + break; // either success or an error occured } - fenceCurrentWaitTimeout += fenceWaitTimeout; } if (result != VK_SUCCESS) { - fprintf(stderr, "\t **** ERROR: fence %s(%llu) is not done after %llu nSec with result 0x%x ****\n", - fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout, vkIf->GetFenceStatus(device, fence)); + fprintf(stderr, "\t **** ERROR: fence %s(%llu) is not done after %llu mSec with result 0x%x ****\n", + fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout/(1000ULL * 1000ULL), vkIf->GetFenceStatus(device, fence)); assert(!"Fence is not signaled yet after more than 100 mSec wait"); } diff --git a/common/libs/VkCodecUtils/VkImageResource.cpp b/common/libs/VkCodecUtils/VkImageResource.cpp index 302ba8a1..0ea91333 100644 --- a/common/libs/VkCodecUtils/VkImageResource.cpp +++ b/common/libs/VkCodecUtils/VkImageResource.cpp @@ -88,6 +88,11 @@ VkImageResource::VkImageResource(const VulkanDeviceContext* vkDevCtx, } } +VkImageResource::~VkImageResource() +{ + Destroy(); +} + VkResult VkImageResource::Create(const VulkanDeviceContext* vkDevCtx, const VkImageCreateInfo* pImageCreateInfo, VkMemoryPropertyFlags memoryPropertyFlags, diff --git a/common/libs/VkCodecUtils/VkImageResource.h b/common/libs/VkCodecUtils/VkImageResource.h index 314a2f01..0c4c0ac8 100644 --- a/common/libs/VkCodecUtils/VkImageResource.h +++ b/common/libs/VkCodecUtils/VkImageResource.h @@ -113,7 +113,7 @@ class VkImageResource : public VkVideoRefCountBase void Destroy(); - virtual ~VkImageResource() { Destroy(); } + virtual ~VkImageResource(); }; class VkImageResourceView : public VkVideoRefCountBase diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp index 26d78757..097439d7 100644 --- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp +++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp @@ -328,6 +328,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize); assert(readImagePtr != nullptr); + int32_t secondaryPlaneWidth = frameWidth; int32_t secondaryPlaneHeight = frameHeight; int32_t imageHeight = frameHeight; bool isUnnormalizedRgba = false; @@ -335,8 +336,11 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { isUnnormalizedRgba = true; } + if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { + secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2; + } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { - secondaryPlaneHeight /= 2; + secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2; } VkImageSubresource subResource = {}; @@ -381,15 +385,9 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { yuvPlaneLayouts[0].offset = 0; yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel; yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight; - yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[1].rowPitch /= 2; - } + yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel; yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight); - yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[2].rowPitch /= 2; - } + yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel; // Copy the luma plane const uint32_t numCompatiblePlanes = 1; @@ -410,7 +408,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput { for (uint32_t plane = numCompatiblePlanes; plane < numPlanes; plane++) { const uint32_t srcPlane = std::min(plane, mpInfo->planesLayout.numberOfExtraPlanes); uint8_t* pDst = pOutBuffer + yuvPlaneLayouts[plane].offset; - const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? frameWidth / 2 : frameWidth; + const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? (frameWidth + 1) / 2 : frameWidth; for (int32_t height = 0; height < secondaryPlaneHeight; height++) { const uint8_t* pSrc; diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp index f13d598e..c85c2814 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp +++ b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp @@ -214,11 +214,27 @@ VkResult VulkanDeviceContext::AddReqDeviceExtensions(const char* const* required break; } m_requestedDeviceExtensions.push_back(name); + if (verbose) { + std::cout << "Added required device extension: " << name << std::endl; + } } return VK_SUCCESS; } +VkResult VulkanDeviceContext::AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose) +{ + if (requiredDeviceExtension) { + m_requestedDeviceExtensions.push_back(requiredDeviceExtension); + if (verbose) { + std::cout << "Added required device extension: " << requiredDeviceExtension << std::endl; + } + } + + return VK_SUCCESS; +} + + // optional device extensions VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose) { @@ -229,6 +245,9 @@ VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optional break; } m_optDeviceExtensions.push_back(name); + if (verbose) { + std::cout << "Added optional device extension: " << name << std::endl; + } } return VK_SUCCESS; @@ -712,26 +731,57 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues, devInfo.queueCreateInfoCount++; } + VkPhysicalDeviceVideoDecodeVP9FeaturesKHR videoDecodeVP9Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR, + nullptr, + false // videoDecodeVP9 + }; + VkPhysicalDeviceVideoEncodeAV1FeaturesKHR videoEncodeAV1Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR, nullptr, false // videoEncodeAV1 - }; + }; + // Chain only the structures that are requested + VkBaseInStructure* pNext = nullptr; + if (videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) { + videoEncodeAV1Feature.pNext = pNext; + pNext = (VkBaseInStructure*)&videoEncodeAV1Feature; + } + if (videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + videoDecodeVP9Feature.pNext = pNext; + pNext = (VkBaseInStructure*)&videoDecodeVP9Feature; + } + VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + pNext, + VK_FALSE + }; VkPhysicalDeviceVideoMaintenance1FeaturesKHR videoMaintenance1Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR, - ((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) ? - &videoEncodeAV1Feature : - nullptr, - false}; + &timelineSemaphoreFeatures, + VK_FALSE + }; VkPhysicalDeviceSynchronization2Features synchronization2Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES, &videoMaintenance1Features, - false + VK_FALSE }; VkPhysicalDeviceFeatures2 deviceFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, &synchronization2Features}; GetPhysicalDeviceFeatures2(m_physDevice, &deviceFeatures); + + assert(timelineSemaphoreFeatures.timelineSemaphore); + assert(videoMaintenance1Features.videoMaintenance1); + assert(synchronization2Features.synchronization2); + assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) == + (videoEncodeAV1Feature.videoEncodeAV1 != VK_FALSE)); + assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) != 0) == + (videoDecodeVP9Feature.videoDecodeVP9 != VK_FALSE)); + + // Validate feature support here. + // TODO: Currntly this method is receiving all codec bits irrespective of the codec that is required to decode/encode provided input. + // Provide only required codec and features and validate the support. + devInfo.pNext = &deviceFeatures; if ((numDecodeQueues > 0) && @@ -987,6 +1037,7 @@ VkResult VulkanDeviceContext::PopulateDeviceExtensions() VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, VkInstance vkInstance, + VkVideoCodecOperationFlagsKHR videoCodecs, bool enableWsi, bool enableWsiDirectMode, bool enableValidation, @@ -1020,6 +1071,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; @@ -1039,6 +1091,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, nullptr }; @@ -1070,6 +1123,19 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName, /********** End WSI instance extensions support *******************************************/ #endif // VIDEO_DISPLAY_QUEUE_SUPPORT + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME); + } + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME); + } + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME); + } + if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME); + } + VkResult result = InitVulkanDevice(pAppName, vkInstance, enbaleVerboseDump); if (result != VK_SUCCESS) { printf("Could not initialize the Vulkan device!\n"); diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.h b/common/libs/VkCodecUtils/VulkanDeviceContext.h index a3cf53a5..6e83e33f 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceContext.h +++ b/common/libs/VkCodecUtils/VulkanDeviceContext.h @@ -24,6 +24,7 @@ #include #include #include "VkShell/VkWsiDisplay.h" +#include "VkCodecUtils/VulkanSemaphoreDump.h" class VulkanDeviceContext : public vk::VkInterfaceFunctions { @@ -50,6 +51,21 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { MAX_QUEUE_FAMILIES = 6, // Gfx, Present, Compute, Transfer, Decode, Encode }; + static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_DECODE = + VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; + + static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ENCODE = + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | + VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR; + + static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ALL = + VIDEO_CODEC_OPERATIONS_DECODE | + VIDEO_CODEC_OPERATIONS_ENCODE; + VulkanDeviceContext(); VkInstance getInstance() const { @@ -157,11 +173,22 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { }; VkResult MultiThreadedQueueSubmit(const QueueFamilySubmitType submitType, const int32_t queueIndex, - uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) const + uint32_t submitCount, const VkSubmitInfo2KHR* pSubmits, VkFence fence, + const char* submissionName = nullptr, + uint64_t decodeEncodeOrder = UINT64_MAX, + uint64_t displayInputOrder = UINT64_MAX) const { MtQueueMutex queue(this, submitType, queueIndex); if (queue) { - return QueueSubmit(queue, submitCount, pSubmits, fence); + + // Dump semaphore info for debugging + if (false) { + for (uint32_t i = 0; i < submitCount; i++) { + VulkanSemaphoreDump::DumpSemaphoreInfo(pSubmits[i], submissionName, decodeEncodeOrder, displayInputOrder); + } + } + + return QueueSubmit2KHR(queue, submitCount, pSubmits, fence); } else { return VK_ERROR_INITIALIZATION_FAILED; } @@ -218,6 +245,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { VkResult InitVulkanDecoderDevice(const char * pAppName, VkInstance vkInstance = VK_NULL_HANDLE, + VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL, bool enableWsi = false, bool enableWsiDirectMode = false, bool enableValidation = false, @@ -231,6 +259,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { VkResult AddReqInstanceExtension(const char* requiredInstanceExtension, bool verbose = false); VkResult CheckAllInstanceExtensions(bool verbose = false); VkResult AddReqDeviceExtensions(const char* const* requiredDeviceExtensions, bool verbose = false); + VkResult AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose = false); VkResult AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose = false); bool HasAllDeviceExtensions(VkPhysicalDevice physDevice, const char* printMissingDeviceExt = nullptr); @@ -248,26 +277,16 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions { const VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_TRANSFER_BIT, const VkVideoCodecOperationFlagsKHR requestVideoDecodeQueueOperations = - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), + VIDEO_CODEC_OPERATIONS_DECODE, const VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_TRANSFER_BIT, const VkVideoCodecOperationFlagsKHR requestVideoEncodeQueueOperations = - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR), + VIDEO_CODEC_OPERATIONS_ENCODE, VkPhysicalDevice vkPhysicalDevice = VK_NULL_HANDLE); VkResult CreateVulkanDevice(int32_t numDecodeQueues = 1, int32_t numEncodeQueues = 0, - VkVideoCodecOperationFlagsKHR videoCodecs = - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) | - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR), + VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL, bool createTransferQueue = false, bool createGraphicsQueue = false, bool createPresentQueue = false, diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp index db05c81a..f78b0de8 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp +++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp @@ -410,3 +410,8 @@ const uint8_t* VulkanDeviceMemoryImpl::GetReadOnlyDataPtr(VkDeviceSize offset, V maxSize = m_memoryRequirements.size - offset; return readData; } + +VulkanDeviceMemoryImpl::~VulkanDeviceMemoryImpl() +{ + Deinitialize(); +} diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h index 6b94e38e..f7d1c2d7 100644 --- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h +++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h @@ -106,7 +106,7 @@ class VulkanDeviceMemoryImpl : public VkVideoRefCountBase void Deinitialize(); - virtual ~VulkanDeviceMemoryImpl() { Deinitialize(); } + virtual ~VulkanDeviceMemoryImpl(); private: std::atomic m_refCount; diff --git a/common/libs/VkCodecUtils/VulkanDisplayFrame.h b/common/libs/VkCodecUtils/VulkanDisplayFrame.h index 246183c9..c86f5ea0 100644 --- a/common/libs/VkCodecUtils/VulkanDisplayFrame.h +++ b/common/libs/VkCodecUtils/VulkanDisplayFrame.h @@ -41,7 +41,9 @@ class VulkanDisplayFrame VkFence frameCompleteFence; // If valid, the fence is signaled when the decoder or encoder is done decoding / encoding the frame. VkFence frameConsumerDoneFence; // If valid, the fence is signaled when the consumer (graphics, compute or display) is done using the frame. VkSemaphore frameCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame. - VkSemaphore frameConsumerDoneSemaphore; // If valid, the semaphore is signaled when the consumer (graphics, compute or display) is done using the frame. + VkSemaphore consumerCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame. + uint64_t frameCompleteDoneSemValue; // The semaphore is signaled by the decoder or the decoder's filter when this semaphore value has been reached. + uint64_t frameConsumerDoneSemValue; // The semaphore is signaled by the consumer (graphics, compute or display) when this semaphore value has been reached. VkQueryPool queryPool; // queryPool handle used for the video queries. int32_t startQueryId; // query Id used for the this frame. uint32_t numQueries; // usually one query per frame @@ -64,10 +66,12 @@ class VulkanDisplayFrame imageViews[imageTypeIdx].inUse = false; } } - frameCompleteFence = VkFence(); - frameConsumerDoneFence = VkFence(); - frameCompleteSemaphore = VkSemaphore(); - frameConsumerDoneSemaphore = VkSemaphore(); + frameCompleteFence = VK_NULL_HANDLE; + frameConsumerDoneFence = VK_NULL_HANDLE; + frameCompleteSemaphore = VK_NULL_HANDLE; + consumerCompleteSemaphore = VK_NULL_HANDLE; + frameCompleteDoneSemValue = (0ULL); // Frame 0 signaled by the decoder and/or filter + frameConsumerDoneSemValue = (0ULL); // Frame 0 signaled by the consumer queryPool = VkQueryPool(); startQueryId = 0; numQueries = 0; @@ -92,7 +96,9 @@ class VulkanDisplayFrame , frameCompleteFence() , frameConsumerDoneFence() , frameCompleteSemaphore() - , frameConsumerDoneSemaphore() + , consumerCompleteSemaphore() + , frameCompleteDoneSemValue(0ULL) + , frameConsumerDoneSemValue(0ULL) , queryPool() , startQueryId() , numQueries() diff --git a/common/libs/VkCodecUtils/VulkanFilter.h b/common/libs/VkCodecUtils/VulkanFilter.h index 2670304c..bb88799b 100644 --- a/common/libs/VkCodecUtils/VulkanFilter.h +++ b/common/libs/VkCodecUtils/VulkanFilter.h @@ -24,6 +24,7 @@ #include "VkCodecUtils/VulkanShaderCompiler.h" #include "VkCodecUtils/VkImageResource.h" #include "VkCodecUtils/VulkanCommandBufferPool.h" +#include "VkCodecUtils/VulkanSemaphoreDump.h" struct VulkanShaderInput { const std::string shader; @@ -34,6 +35,9 @@ struct VulkanShaderInput { class VulkanFilter : public VulkanCommandBufferPool { public: + // Constants moved inside the class as static constexpr + static constexpr uint32_t MAX_SEMAPHORES = 4; + static constexpr uint32_t MAX_CMD_BUFFERS = 4; VulkanFilter(const VulkanDeviceContext* vkDevCtx, uint32_t queueFamilyIndex, @@ -76,40 +80,146 @@ class VulkanFilter : public VulkanCommandBufferPool uint32_t bufferIdx) = 0; virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers, + const VkCommandBuffer* pCommandBuffers, uint32_t waitSemaphoreCount, const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags2KHR* pWaitStageMasks, uint32_t signalSemaphoreCount, const VkSemaphore* pSignalSemaphores, + const VkPipelineStageFlags2KHR* pSignalStageMasks, VkFence filterCompleteFence) const { - assert(m_queue != VK_NULL_HANDLE); + assert(commandBufferCount <= MAX_CMD_BUFFERS); + assert(waitSemaphoreCount <= MAX_SEMAPHORES); + assert(signalSemaphoreCount <= MAX_SEMAPHORES); + + // Prepare command buffer info on stack + VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS]; + for (uint32_t i = 0; i < commandBufferCount; i++) { + cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos[i].pNext = nullptr; + cmdBufferInfos[i].commandBuffer = pCommandBuffers[i]; + cmdBufferInfos[i].deviceMask = 0; + } - // Wait for rendering finished - VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // Prepare wait semaphore info on stack + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[i].pNext = nullptr; + waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i]; + waitSemaphoreInfos[i].value = 0; // Binary semaphore + waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i]; + waitSemaphoreInfos[i].deviceIndex = 0; + } - // Submit compute commands - VkSubmitInfo submitInfo {}; - submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submitInfo.pCommandBuffers = pCommandBuffers; - submitInfo.commandBufferCount = commandBufferCount; - submitInfo.waitSemaphoreCount = waitSemaphoreCount; - submitInfo.pWaitSemaphores = pWaitSemaphores; - submitInfo.pWaitDstStageMask = &waitStageMask; - submitInfo.signalSemaphoreCount = signalSemaphoreCount; - submitInfo.pSignalSemaphores = pSignalSemaphores; + // Prepare signal semaphore info on stack + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[i].pNext = nullptr; + signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i]; + signalSemaphoreInfos[i].value = 0; // Binary semaphore + signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i]; + signalSemaphoreInfos[i].deviceIndex = 0; + } + + // Submit info + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + submitInfo.pNext = nullptr; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = commandBufferCount; + submitInfo.pCommandBufferInfos = cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; + + if (false) { + // Dump semaphore info for debugging + VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0); + } assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence)); - VkResult result = m_vkDevCtx->QueueSubmit(m_queue, 1, &submitInfo, filterCompleteFence); + VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence); + + return result; + } + + virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers, + uint32_t waitSemaphoreCount, + const VkSemaphore* pWaitSemaphores, + const uint64_t* pWaitSemaphoreValues, + const VkPipelineStageFlags2KHR* pWaitStageMasks, + uint32_t signalSemaphoreCount, + const VkSemaphore* pSignalSemaphores, + const uint64_t* pSignalSemaphoreValues, + const VkPipelineStageFlags2KHR* pSignalStageMasks, + VkFence filterCompleteFence) const + { + assert(m_queue != VK_NULL_HANDLE); + assert(commandBufferCount <= MAX_CMD_BUFFERS); + assert(waitSemaphoreCount <= MAX_SEMAPHORES); + assert(signalSemaphoreCount <= MAX_SEMAPHORES); + + // Prepare command buffer info on stack + VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS]; + for (uint32_t i = 0; i < commandBufferCount; i++) { + cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos[i].pNext = nullptr; + cmdBufferInfos[i].commandBuffer = pCommandBuffers[i]; + cmdBufferInfos[i].deviceMask = 0; + } - if (result != VK_SUCCESS) { - return result; + // Prepare wait semaphore info on stack + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[i].pNext = nullptr; + waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i]; + waitSemaphoreInfos[i].value = pWaitSemaphoreValues[i]; // Timeline value + waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i]; + waitSemaphoreInfos[i].deviceIndex = 0; } + // Prepare signal semaphore info on stack + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES]; + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[i].pNext = nullptr; + signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i]; + signalSemaphoreInfos[i].value = pSignalSemaphoreValues[i]; // Timeline value + signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i]; + signalSemaphoreInfos[i].deviceIndex = 0; + } + + // Submit info + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + submitInfo.pNext = nullptr; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = commandBufferCount; + submitInfo.pCommandBufferInfos = cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; + + if (false) { + // Dump semaphore info for debugging + VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0); + } + + assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence)); + VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence); + return result; } + protected: VulkanShaderCompiler m_vulkanShaderCompiler; uint32_t m_queueFamilyIndex; diff --git a/common/libs/VkCodecUtils/VulkanFrame.cpp b/common/libs/VkCodecUtils/VulkanFrame.cpp index 2e87885d..b95bdf96 100644 --- a/common/libs/VkCodecUtils/VulkanFrame.cpp +++ b/common/libs/VkCodecUtils/VulkanFrame.cpp @@ -18,6 +18,7 @@ #include #include #include +#include // Added for std::this_thread::sleep_for #include "VkCodecUtils/Helpers.h" #include "VkCodecUtils/VulkanDeviceContext.h" @@ -25,6 +26,7 @@ #include "VkCodecUtils/VulkanVideoUtils.h" #include "VulkanFrame.h" #include "VkVideoCore/DecodeFrameBufferIf.h" +#include "VkCodecUtils/VulkanSemaphoreDump.h" template VulkanFrame::VulkanFrame(const VulkanDeviceContext* vkDevCtx) @@ -420,6 +422,7 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, if (renderIndex < 0) { renderIndex = -renderIndex; } + vulkanVideoUtils::VulkanPerDrawContext* pPerDrawContext = m_videoRenderer->m_renderInfo.GetDrawContext(renderIndex); VkSharedBaseObj imageResourceView; @@ -583,54 +586,77 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, } } - const uint32_t maxWaitSemaphores = 2; - uint32_t numWaitSemaphores = 0; - VkSemaphore waitSemaphores[maxWaitSemaphores] = {}; + const uint32_t waitSemaphoreMaxCount = 2; + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{}; + + const uint32_t signalSemaphoreMaxCount = 2; + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{}; - assert(waitSemaphoreCount <= 1); - if ((waitSemaphoreCount > 0) && (pWaitSemaphores != nullptr)) { - waitSemaphores[numWaitSemaphores++] = *pWaitSemaphores; + for (uint32_t i = 0; i < waitSemaphoreCount; i++) { + waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[i].pNext = nullptr; + waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i]; + waitSemaphoreInfos[i].value = 0; // Binary semaphore + waitSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + waitSemaphoreInfos[i].deviceIndex = 0; } - if (inFrame && (inFrame->frameCompleteSemaphore != VkSemaphore())) { - waitSemaphores[numWaitSemaphores++] = inFrame->frameCompleteSemaphore; + for (uint32_t i = 0; i < signalSemaphoreCount; i++) { + signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[i].pNext = nullptr; + signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i]; + signalSemaphoreInfos[i].value = 0; // Binary semaphore + signalSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + signalSemaphoreInfos[i].deviceIndex = 0; } - assert(numWaitSemaphores <= maxWaitSemaphores); - const uint32_t maxSignalSemaphores = 2; - uint32_t numSignalSemaphores = 0; - VkSemaphore signalSemaphores[maxSignalSemaphores] = {}; + if (inFrame && (inFrame->frameCompleteSemaphore != VK_NULL_HANDLE)) { - assert(signalSemaphoreCount <= 1); - if ((signalSemaphoreCount > 0) && (pSignalSemaphores != nullptr)) { - signalSemaphores[numSignalSemaphores++] = *pSignalSemaphores; - } + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = inFrame->frameCompleteSemaphore; + waitSemaphoreInfos[waitSemaphoreCount].value = inFrame->frameCompleteDoneSemValue; + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR | + VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; + waitSemaphoreCount++; + + signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr; + signalSemaphoreInfos[signalSemaphoreCount].semaphore = inFrame->consumerCompleteSemaphore; + signalSemaphoreInfos[signalSemaphoreCount].value = inFrame->frameConsumerDoneSemValue; + signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0; + signalSemaphoreCount++; - if (inFrame && (inFrame->frameConsumerDoneSemaphore != VkSemaphore())) { - signalSemaphores[numSignalSemaphores++] = inFrame->frameConsumerDoneSemaphore; inFrame->hasConsummerSignalSemaphore = true; } - assert(numSignalSemaphores <= maxSignalSemaphores); + + assert(waitSemaphoreCount <= waitSemaphoreMaxCount); + assert(signalSemaphoreCount <= signalSemaphoreMaxCount); if (frameConsumerDoneFence != VkFence()) { inFrame->hasConsummerSignalFence = true; } - - // Wait for the image to be owned and signal for render completion - VkPipelineStageFlags primaryCmdSubmitWaitStages[2] = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT }; - VkSubmitInfo primaryCmdSubmitInfo = VkSubmitInfo(); - primaryCmdSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - primaryCmdSubmitInfo.pWaitDstStageMask = primaryCmdSubmitWaitStages; - primaryCmdSubmitInfo.commandBufferCount = 1; - - primaryCmdSubmitInfo.waitSemaphoreCount = numWaitSemaphores; - primaryCmdSubmitInfo.pWaitSemaphores = numWaitSemaphores ? waitSemaphores : NULL; - primaryCmdSubmitInfo.pCommandBuffers = pPerDrawContext->commandBuffer.GetCommandBuffer(); - - primaryCmdSubmitInfo.signalSemaphoreCount = numSignalSemaphores; - primaryCmdSubmitInfo.pSignalSemaphores = numSignalSemaphores ? signalSemaphores : NULL; + VkCommandBufferSubmitInfoKHR cmdBufferInfos; + cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos.pNext = nullptr; + cmdBufferInfos.commandBuffer = *pPerDrawContext->commandBuffer.GetCommandBuffer(); + cmdBufferInfos.deviceMask = 0; + + // Submit info + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + submitInfo.pNext = nullptr; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; // For fence/sync debugging if (false && inFrame && inFrame->frameCompleteFence) { @@ -646,7 +672,14 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, } } - result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS, 0, 1, &primaryCmdSubmitInfo, frameConsumerDoneFence); + result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS, + 0, // queueIndex + 1, // submitCount + &submitInfo, + frameConsumerDoneFence, + "Graphics Submit", + (inFrame != nullptr) ? inFrame->decodeOrder : UINT64_MAX, + (inFrame != nullptr) ? inFrame->displayOrder : UINT64_MAX); if (result != VK_SUCCESS) { assert(result == VK_SUCCESS); fprintf(stderr, "\nERROR: MultiThreadedQueueSubmit() result: 0x%x\n", result); @@ -676,6 +709,11 @@ VkResult VulkanFrame::DrawFrame( int32_t renderIndex, m_frameDataIndex = (m_frameDataIndex + 1) % m_frameData.size(); + if (false) { + // Add a 20ms sleep + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + return result; } diff --git a/common/libs/VkCodecUtils/VulkanSemaphoreDump.h b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h new file mode 100644 index 00000000..6e1b8913 --- /dev/null +++ b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h @@ -0,0 +1,90 @@ +/* +* Copyright 2024 NVIDIA Corporation. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#pragma once + +#include +#include +#include + +namespace VulkanSemaphoreDump { + +/** + * @brief Dumps the semaphore information from a VkSubmitInfo2KHR structure + * + * @param submitInfo The VkSubmitInfo2KHR structure containing semaphore information + * @param submissionName Optional name to identify the submission (e.g., "DECODE", "COMPUTE") + * @param decodeOrder Optional decode order number or identifier (uint64_t) + * @param displayOrder Optional display order number or identifier (uint64_t) + */ +inline void DumpSemaphoreInfo( + const VkSubmitInfo2KHR& submitInfo, + const char* submissionName = nullptr, + uint64_t decodeEncodeOrder = UINT64_MAX, + uint64_t displayInputOrder = UINT64_MAX) +{ + + std::cout << "----------------------------\n"; + + if (submissionName) { + std::cout << submissionName << " "; + } + + std::cout << "TL Semaphore sync"; + + if (decodeEncodeOrder != UINT64_MAX) { + std::cout << " (decode / encode = " << decodeEncodeOrder; + if (displayInputOrder != UINT64_MAX) { + std::cout << ", display / input = " << displayInputOrder; + } + std::cout << ")"; + } else if (displayInputOrder != UINT64_MAX) { + std::cout << " (display / input = " << displayInputOrder << ")"; + } + + std::cout << ":\n"; + + // Dump wait semaphores + for (uint32_t i = 0; i < submitInfo.waitSemaphoreInfoCount; i++) { + const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pWaitSemaphoreInfos[i]; + std::cout << " Wait sem[" << i << "]: " << semInfo.semaphore + << " value = " << semInfo.value + << " stage = 0x" << std::hex << semInfo.stageMask << std::dec; + + if (semInfo.deviceIndex > 0) { + std::cout << " deviceIndex=" << semInfo.deviceIndex; + } + std::cout << std::endl; + } + + // Dump signal semaphores + for (uint32_t i = 0; i < submitInfo.signalSemaphoreInfoCount; i++) { + const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pSignalSemaphoreInfos[i]; + std::cout << " Signal sem[" << i << "]: " << semInfo.semaphore + << " value = " << semInfo.value + << " stage = 0x" << std::hex << semInfo.stageMask << std::dec; + + if (semInfo.deviceIndex > 0) { + std::cout << " deviceIndex = " << semInfo.deviceIndex; + } + std::cout << std::endl; + } + + std::cout << "----------------------------" << std::endl; +} + + +} // namespace VulkanSemaphoreDump diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp index 3122b062..97d3906f 100644 --- a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp @@ -382,6 +382,7 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize); assert(readImagePtr != nullptr); + int32_t secondaryPlaneWidth = frameWidth; int32_t secondaryPlaneHeight = frameHeight; int32_t imageHeight = frameHeight; bool isUnnormalizedRgba = false; @@ -389,8 +390,11 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt isUnnormalizedRgba = true; } + if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { + secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2; + } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { - secondaryPlaneHeight /= 2; + secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2; } VkImageSubresource subResource = {}; @@ -439,15 +443,9 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt yuvPlaneLayouts[0].offset = 0; yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel; yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight; - yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[1].rowPitch /= 2; - } + yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel; yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight); - yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel; - if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) { - yuvPlaneLayouts[2].rowPitch /= 2; - } + yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel; // Copy the luma plane, always assume the 422 or 444 formats and src CbCr always is interleaved (shares the same plane). uint32_t numCompatiblePlanes = 1; @@ -642,6 +640,7 @@ VkResult VulkanVideoProcessor::CreateParser(const char*, static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION }; static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION }; static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION }; + static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION }; const VkExtensionProperties* pStdExtensionVersion = NULL; if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { @@ -650,6 +649,8 @@ VkResult VulkanVideoProcessor::CreateParser(const char*, pStdExtensionVersion = &h265StdExtensionVersion; } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { pStdExtensionVersion = &av1StdExtensionVersion; + } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pStdExtensionVersion = &vp9StdExtensionVersion; } else { assert(!"Unsupported Codec Type"); return VK_ERROR_FORMAT_NOT_SUPPORTED; diff --git a/common/libs/VkCodecUtils/VulkanVideoSession.cpp b/common/libs/VkCodecUtils/VulkanVideoSession.cpp index 021ec538..3a8935d7 100644 --- a/common/libs/VkCodecUtils/VulkanVideoSession.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoSession.cpp @@ -39,6 +39,7 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx, static const VkExtensionProperties h264DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION }; static const VkExtensionProperties h265DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION }; static const VkExtensionProperties av1DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION }; + static const VkExtensionProperties vp9DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION }; static const VkExtensionProperties h264EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION }; static const VkExtensionProperties h265EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION }; static const VkExtensionProperties av1EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_SPEC_VERSION }; @@ -63,6 +64,9 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx, case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: createInfo.pStdHeaderVersion = &av1DecodeStdExtensionVersion; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + createInfo.pStdHeaderVersion = &vp9DecodeStdExtensionVersion; + break; case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: createInfo.pStdHeaderVersion = &h264EncodeStdExtensionVersion; break; diff --git a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp index 39f3b6f1..e2cdf9ce 100644 --- a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp +++ b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp @@ -254,7 +254,7 @@ VkResult ImageObject::CopyYuvToVkImage(uint32_t numPlanes, const uint8_t* yuvPla } if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) { - cbimageHeight /= 2; + cbimageHeight = (cbimageHeight + 1) / 2; } if (mpInfo && !isUnnormalizedRgba) { diff --git a/vk_video_decoder/demos/vk-video-dec/Main.cpp b/vk_video_decoder/demos/vk-video-dec/Main.cpp index 8579362e..3e499c32 100644 --- a/vk_video_decoder/demos/vk-video-dec/Main.cpp +++ b/vk_video_decoder/demos/vk-video-dec/Main.cpp @@ -28,14 +28,33 @@ #include "VkShell/Shell.h" #include "VkCodecUtils/VkVideoFrameOutput.h" -int main(int argc, const char **argv) { +int main(int argc, const char **argv) +{ DecoderConfig decoderConfig(argv[0]); decoderConfig.ParseArgs(argc, argv); + VkSharedBaseObj videoStreamDemuxer; + VkResult result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), + decoderConfig.forceParserType, + decoderConfig.enableStreamDemuxing, + decoderConfig.initialWidth, + decoderConfig.initialHeight, + decoderConfig.initialBitdepth, + videoStreamDemuxer); + if (result != VK_SUCCESS) { + assert(!"Can't initialize the VideoStreamDemuxer!"); + return -1; + } + + VkVideoCodecOperationFlagsKHR videoCodecOperation = (decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR) ? + decoderConfig.forceParserType : + videoStreamDemuxer->GetVideoCodec(); + VulkanDeviceContext vkDevCtxt; - VkResult result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(), + result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(), VK_NULL_HANDLE, + videoCodecOperation, !decoderConfig.noPresent, decoderConfig.directMode, decoderConfig.validate, @@ -54,16 +73,8 @@ int main(int argc, const char **argv) { VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR; - VkQueueFlags requestVideoEncodeQueueMask = 0; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - } - if (decoderConfig.selectVideoWithComputeQueue) { requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -71,17 +82,6 @@ int main(int argc, const char **argv) { requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT; } - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs | - (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR); - if (!decoderConfig.noPresent) { VkSharedBaseObj displayShell; @@ -98,17 +98,12 @@ int main(int argc, const char **argv) { result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoDecodeQueueMask), displayShell, requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), - requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + videoCodecOperation, + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); @@ -117,30 +112,15 @@ int main(int argc, const char **argv) { assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(), vkDevCtxt.GetPresentQueueFamilyIdx())); - vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues - videoCodecs, - false, // createTransferQueue - true, // createGraphicsQueue - true, // createDisplayQueue + vkDevCtxt.CreateVulkanDevice(numDecodeQueues, // numDecodeQueues + 0, // num encode queues + videoCodecOperation, // videoCodecs + false, // createTransferQueue + true, // createGraphicsQueue + true, // createDisplayQueue requestVideoComputeQueueMask != 0 // createComputeQueue ); - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } - VkSharedBaseObj vulkanVideoProcessor; result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor); if (result != VK_SUCCESS) { @@ -176,8 +156,7 @@ int main(int argc, const char **argv) { result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_TRANSFER_BIT | requestVideoDecodeQueueMask | - requestVideoComputeQueueMask | - requestVideoEncodeQueueMask), + requestVideoComputeQueueMask), nullptr, requestVideoDecodeQueueMask); if (result != VK_SUCCESS) { @@ -187,9 +166,9 @@ int main(int argc, const char **argv) { } - result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - 0, // num encode queues - videoCodecs, + result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues, // numDecodeQueues + 0, // num encode queues + videoCodecOperation, // videoCodecs // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. @@ -204,21 +183,6 @@ int main(int argc, const char **argv) { return -1; } - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } - VkSharedBaseObj vulkanVideoProcessor; result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor); if (result != VK_SUCCESS) { diff --git a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h index 21ad0fed..d5141c1b 100644 --- a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h +++ b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h @@ -269,61 +269,6 @@ typedef struct VkParserHevcPictureData { } VkParserHevcPictureData; -typedef struct VkParserVp9PictureData { - uint32_t width; - uint32_t height; - - // Frame Indexes - VkPicIf* pLastRef; - VkPicIf* pGoldenRef; - VkPicIf* pAltRef; - - uint32_t keyFrame; - uint32_t version; - uint32_t showFrame; - uint32_t errorResilient; - uint32_t bit_depth_minus8; - uint32_t colorSpace; - uint32_t subsamplingX; - uint32_t subsamplingY; - uint32_t activeRefIdx[3]; - uint32_t intraOnly; - uint32_t resetFrameContext; - uint32_t frameParallelDecoding; - uint32_t refreshFrameFlags; - uint8_t refFrameSignBias[4]; - uint32_t frameContextIdx; - uint32_t allow_high_precision_mv; - uint32_t mcomp_filter_type; - uint32_t loopFilterLevel; - uint32_t loopFilterSharpness; - uint32_t log2_tile_columns; - uint32_t log2_tile_rows; - int32_t mbRefLfDelta[4]; - int32_t mbModeLfDelta[2]; - int32_t segmentMapTemporalUpdate; - uint8_t segmentFeatureEnable[8][4]; - uint8_t mb_segment_tree_probs[7]; - uint8_t segment_pred_probs[3]; - int16_t segmentFeatureData[8][4]; - uint32_t scaledWidth; - uint32_t scaledHeight; - uint32_t scalingActive; - uint32_t segmentEnabled; - uint32_t prevIsKeyFrame; - uint32_t PrevShowFrame; - uint32_t modeRefLfEnabled; - int32_t qpYAc; - int32_t qpYDc; - int32_t qpChDc; - int32_t qpChAc; - uint32_t segmentMapUpdate; - uint32_t segmentFeatureMode; - uint32_t refreshEntropyProbs; - uint32_t frameTagSize; - uint32_t offsetToDctParts; -} VkParserVp9PictureData; - struct VkParserAv1PictureData { // The picture info structure is mostly pointing at other // structures defining the coding tool parameters. Those @@ -373,6 +318,42 @@ struct VkParserAv1PictureData { uint32_t frame_height; }; +typedef struct VkParserVp9PictureData { + + StdVideoDecodeVP9PictureInfo stdPictureInfo; + StdVideoVP9ColorConfig stdColorConfig; + StdVideoVP9LoopFilter stdLoopFilter; + StdVideoVP9Segmentation stdSegmentation; + + // frame dimentions + uint32_t FrameWidth, FrameHeight; + uint32_t MiCols, MiRows; + uint32_t Sb64Cols, Sb64Rows; + uint32_t renderWidth, renderHeight; + + // display details + uint8_t frame_to_show_map_idx; + bool show_existing_frame; + + // references + uint8_t ref_frame_idx[STD_VIDEO_VP9_REFS_PER_FRAME]; + uint8_t pic_idx[STD_VIDEO_VP9_NUM_REF_FRAMES]; + VkPicIf* pLastRef; + VkPicIf* pGoldenRef; + VkPicIf* pAltRef; + + // other derived parameters + bool FrameIsIntra; + uint8_t ChromaFormat; + uint32_t numTiles; + uint32_t compressedHeaderSize; + + // bitstream divisons + uint32_t uncompressedHeaderOffset; + uint32_t compressedHeaderOffset; + uint32_t tilesOffset; +} VkParserVp9PictureData; + typedef struct VkParserPictureData { int32_t PicWidthInMbs; // Coded Frame Size int32_t FrameHeightInMbs; // Coded Frame Height diff --git a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h index 142f7db8..503f5827 100644 --- a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h +++ b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h @@ -22,31 +22,36 @@ #include "VulkanVideoDecoder.h" -typedef enum { - EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR, - SWITCHABLE /* should be the last one */ -} INTERPOLATIONFILTERTYPE; - -typedef enum { - //NONE = -1, - INTRA_FRAME = 0, - LAST_FRAME = 1, - GOLDEN_FRAME = 2, - ALTREF_FRAME = 3, - VP9_MAX_REF_FRAMES = 4 -}MV_REFERENCE_FRAME; - -typedef enum { - ONLY_4X4 = 0, - ALLOW_8X8 = 1, - ALLOW_16X16 = 2, - ALLOW_32X32 = 3, - TX_MODE_SELECT = 4, - NB_TXFM_MODES = 5, -} TXFM_MODE; +#define VP9_FRAME_MARKER 2 +#define VP9_FRAME_SYNC_CODE 0x498342 +#define VP9_MAX_PRBABILITY 255 +#define VP9_MIN_TILE_WIDTH_B64 4 +#define VP9_MAX_TILE_WIDTH_B64 64 +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) +#define ALIGN_POWER_OF_TWO(value, n) (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +#define VP9_BUFFER_POOL_MAX_SIZE 10 +#define VP9_MAX_NUM_SPATIAL_LAYERS 4 + +#define VP9_CHECK_FRAME_MARKER { \ + if (u(2) != VP9_FRAME_MARKER) { \ + assert(!"Invalid frame marker");\ + return false; \ + } \ +} + +#define VP9_CHECK_ZERO_BIT { \ + if (u(1) != 0) { \ + assert("!Invalid syntax"); \ + return false; \ + } \ +} + +#define VP9_CHECK_FRAME_SYNC_CODE { \ + if (u(24) != VP9_FRAME_SYNC_CODE) { \ + assert("!Invalid frame sync code"); \ + } \ +} // Segment level features. typedef enum { @@ -57,1492 +62,78 @@ typedef enum { SEG_LVL_MAX = 4 // Number of MB level features supported } SEG_LVL_FEATURES; -typedef enum { - SINGLE_PREDICTION_ONLY = 0, - COMP_PREDICTION_ONLY = 1, - HYBRID_PREDICTION = 2, - NB_PREDICTION_TYPES = 3, -} COMPPREDMODE_TYPE; - -/* Symbols for coding which components are zero jointly */ -typedef enum { - MV_JOINT_ZERO = 0, /* Zero vector */ - MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ - MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ - MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ -} MV_JOINT_TYPE; - -/* Symbols for coding magnitude class of nonzero components */ -typedef enum { - MV_CLASS_0 = 0, /* (0, 2] integer pel */ - MV_CLASS_1 = 1, /* (2, 4] integer pel */ - MV_CLASS_2 = 2, /* (4, 8] integer pel */ - MV_CLASS_3 = 3, /* (8, 16] integer pel */ - MV_CLASS_4 = 4, /* (16, 32] integer pel */ - MV_CLASS_5 = 5, /* (32, 64] integer pel */ - MV_CLASS_6 = 6, /* (64, 128] integer pel */ - MV_CLASS_7 = 7, /* (128, 256] integer pel */ - MV_CLASS_8 = 8, /* (256, 512] integer pel */ - MV_CLASS_9 = 9, /* (512, 1024] integer pel */ - MV_CLASS_10 = 10, /* (1024,2048] integer pel */ -} MV_CLASS_TYPE; - -typedef enum PARTITION_TYPE { - PARTITION_NONE, - PARTITION_HORZ, - PARTITION_VERT, - PARTITION_SPLIT, - PARTITION_TYPES -} PARTITION_TYPE; - - -typedef enum -{ - DC_PRED, /* average of above and left pixels */ - V_PRED, /* vertical prediction */ - H_PRED, /* horizontal prediction */ - D45_PRED, /* Directional 45 deg prediction [anti-clockwise from 0 deg hor] */ - D135_PRED, /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */ - D117_PRED, /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */ - D153_PRED, /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */ - D27_PRED, /* Directional 22 deg prediction [anti-clockwise from 0 deg hor] */ - D63_PRED, /* Directional 67 deg prediction [anti-clockwise from 0 deg hor] */ - TM_PRED, /* Truemotion prediction */ - NEARESTMV, - NEARMV, - ZEROMV, - NEWMV, - SPLITMV, - MB_MODE_COUNT -} MB_PREDICTION_MODE; - -typedef enum { - KEY_FRAME = 0, - INTER_FRAME = 1, - NUM_FRAME_TYPES, -} FRAME_TYPE; - -// Segment level features. -typedef enum { - TX_4X4 = 0, // 4x4 dct transform - TX_8X8 = 1, // 8x8 dct transform - TX_16X16 = 2, // 16x16 dct transform - TX_32X32 = 3, // 32x32 dct transform - TX_SIZE_MAX_SB, // Number of transforms available to SBs -} TX_SIZE; - -#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) - -#define BIG_NUM 0xffff -#define MIN_TILE_WIDTH_B64 4 -#define MAX_TILE_WIDTH_B64 64 -#define MI_SIZE_LOG2 3 -#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) -#define ALIGN_POWER_OF_TWO(value, n) \ - (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) -#define VP9_MB_LVL_MAX 2 -#define VP9_MAX_MB_SEGMENTS 4 -#define VP9_MB_FEATURE_TREE_PROBS 3 -#define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 2 //for vp8 its 4 -#define ALLOWED_REFS_PER_FRAME 3 -#define NUM_REF_FRAMES 8 -#define NUM_REF_FRAMES_LG2 3 -#define NUM_FRAME_CONTEXTS_LG2 2 -#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6) -#define MIN_TILE_WIDTH 256 -#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6) -//#define MAX_TILE_WIDTH 4096 -#define MAX_MB_SEGMENTS 8 -#define MB_SEG_TREE_PROBS (MAX_MB_SEGMENTS-1) -#define MAX_PROB 255 -#define PREDICTION_PROBS 3 -#define TX_SIZE_CONTEXTS 2 -#define PARTITION_PLOFFSET 4 // number of probability models per block size -#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) -#define BLOCK_SIZE_GROUPS 4 -#define VP9_INTRA_MODES 10/* (TM_PRED + 1) */ -#define COMP_PRED_CONTEXTS 2 -/* Entropy nodes above is divided in two parts, first three probs in part1 - * and the modeled probs in part2. Part1 is padded so that tables align with - * 32 byte addresses, so there is four bytes for each table. */ -#define ENTROPY_NODES_PART1 4 -#define ENTROPY_NODES_PART2 8 -#define INTER_MODE_CONTEXTS 7 -#define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */ -#define COMP_PRED_CONTEXTS 2 -#define INTRA_INTER_CONTEXTS 4 -#define COMP_INTER_CONTEXTS 5 -#define REF_CONTEXTS 5 -#define VP9_BLOCK_TYPES 2 -#define VP9_REF_TYPES 2 // intra=0, inter=1 -#define VP9_COEF_BANDS 6 -#define VP9_PREV_COEF_CONTEXTS 6 -#define MBSKIP_CONTEXTS 3 -#define COEF_UPDATE_PROB 252 -#define VP9_PROB_HALF 128 -#define VP9_NMV_UPDATE_PROB 252 -#define VP9_MV_UPDATE_PRECISION 7 -#define MV_JOINTS 4 -#define MV_CLASSES 11 -#define CLASS0_BITS 1 -#define CLASS0_SIZE (1 << CLASS0_BITS) -#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) -/* The first nodes of the entropy probs are unconstrained, the rest are - * modeled with statistic distribution. */ -#define UNCONSTRAINED_NODES 3 -#define MODEL_NODES (VP9_ENTROPY_NODES - UNCONSTRAINED_NODES) -#define PIVOT_NODE 2 // which node is pivot -#define COEFPROB_MODELS 128 -#define END_OF_STREAM 0xFFFFFFFFU -#define VP9_DEF_UPDATE_PROB 252 -#define MODULUS_PARAM 13 -#define OK 0 //HANTRO_OK -#define NOK 1 //HANTRO_NOK -#define CHECK_END_OF_STREAM(s) if((s)==END_OF_STREAM) return (s) -#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV) -#define VP9_REF_LIST_SIZE 8 -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 -#define MAXQ 255 -#define LOTS_OF_BITS 0x40000000 -#define BD_VALUE_SIZE ((int32_t)sizeof(VP9_BD_VALUE)*CHAR_BIT) - -#define VP9_ENTROPY_NODES 11 -#define COEF_COUNT_SAT 24 -#define COEF_MAX_UPDATE_FACTOR 112 -#define COEF_COUNT_SAT_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_KEY 112 -#define COEF_COUNT_SAT_AFTER_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 -#define MODE_COUNT_SAT 20 -#define MODE_MAX_UPDATE_FACTOR 128 -#define MAX_PROBS 32 -#define MVREF_COUNT_SAT 20 -#define MVREF_MAX_UPDATE_FACTOR 128 -#define MV_COUNT_SAT 20 -#define MV_MAX_UPDATE_FACTOR 128 - -/* Coefficient token alphabet */ - -#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ -#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ -#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ -#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ -#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ -#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ -#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ -#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ -#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ -#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */ -#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ -#define MAX_ENTROPY_TOKENS 12 -#define FRAME_CONTEXTS_LOG2 2 -#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) - -#define DCT_EOB_MODEL_TOKEN 3 /* EOB Extra Bits 0+0 */ - -typedef signed char vp9_tree_index; - -static const int32_t seg_feature_data_signed[SEG_LVL_MAX] = {1, 1, 0, 0}; -static const int32_t seg_feature_data_max[SEG_LVL_MAX] = {MAXQ, 63, 3, 0}; - -#define NVDEC_VP9HWPAD(x, y) unsigned char x[y] - -typedef struct { - /* last bytes of address 41 */ - unsigned char joints[3]; - unsigned char sign[2]; - /* address 42 */ - unsigned char class0[2][1]; - unsigned char fp[2][3]; - unsigned char class0_hp[2]; - unsigned char hp[2]; - unsigned char classes[2][10]; - /* address 43 */ - unsigned char class0_fp[2][2][3]; - unsigned char bits[2][10]; - -} nvdec_nmv_context; - -/* Adaptive entropy contexts, padding elements are added to have - * 256 bit aligned tables for HW access. - * Compile with TRACE_PROB_TABLES to print bases for each table. */ -typedef struct nvdec_vp9AdaptiveEntropyProbs_s -{ - /* address 32 */ - unsigned char inter_mode_prob[7][4]; - unsigned char intra_inter_prob[4]; - - /* address 33 */ - unsigned char uv_mode_prob[10][8]; - unsigned char tx8x8_prob[2][1]; - unsigned char tx16x16_prob[2][2]; - unsigned char tx32x32_prob[2][3]; - unsigned char sb_ymode_probB[4][1]; - unsigned char sb_ymode_prob[4][8]; - - /* address 37 */ - unsigned char partition_prob[2][16][4]; - - /* address 41 */ - unsigned char uv_mode_probB[10][1]; - unsigned char switchable_interp_prob[4][2]; - unsigned char comp_inter_prob[5]; - unsigned char mbskip_probs[3]; - NVDEC_VP9HWPAD(pad1, 1); - - nvdec_nmv_context nmvc; - - /* address 44 */ - unsigned char single_ref_prob[5][2]; - unsigned char comp_ref_prob[5]; - NVDEC_VP9HWPAD(pad2, 17); - - /* address 45 */ - unsigned char probCoeffs[2][2][6][6][4]; - unsigned char probCoeffs8x8[2][2][6][6][4]; - unsigned char probCoeffs16x16[2][2][6][6][4]; - unsigned char probCoeffs32x32[2][2][6][6][4]; - -} nvdec_vp9AdaptiveEntropyProbs_t; - -typedef struct nvdec_vp9EntropyProbs_s -{ - /* Default keyframe probs */ - /* Table formatted for 256b memory, probs 0to7 for all tables followed by - * probs 8toN for all tables. - * Compile with TRACE_PROB_TABLES to print bases for each table. */ - - unsigned char kf_bmode_prob[10][10][8]; - - /* Address 25 */ - unsigned char kf_bmode_probB[10][10][1]; - unsigned char ref_pred_probs[3]; - unsigned char mb_segment_tree_probs[7]; - unsigned char segment_pred_probs[3]; - unsigned char ref_scores[4]; - unsigned char prob_comppred[2]; - NVDEC_VP9HWPAD(pad1, 9); - - /* Address 29 */ - unsigned char kf_uv_mode_prob[10][8]; - unsigned char kf_uv_mode_probB[10][1]; - NVDEC_VP9HWPAD(pad2, 6); - - nvdec_vp9AdaptiveEntropyProbs_t a; /* Probs with backward adaptation */ - - -} nvdec_vp9EntropyProbs_t; - -typedef struct { - unsigned int joints[4]; - unsigned int sign[2][2]; - unsigned int classes[2][11]; - unsigned int class0[2][2]; - unsigned int bits[2][10][2]; - unsigned int class0_fp[2][2][4]; - unsigned int fp[2][4]; - unsigned int class0_hp[2][2]; - unsigned int hp[2][2]; - -} nvdec_nmv_context_counts; - -typedef struct nvdec_vp9EntropyCounts_s -{ - unsigned int inter_mode_counts[7][3][2]; - unsigned int sb_ymode_counts[4][10]; - unsigned int uv_mode_counts[10][10]; - unsigned int partition_counts[16][4]; - unsigned int switchable_interp_counts[4][3]; - unsigned int intra_inter_count[4][2]; - unsigned int comp_inter_count[5][2]; - unsigned int single_ref_count[5][2][2]; - unsigned int comp_ref_count[5][2]; - unsigned int tx32x32_count[2][4]; - unsigned int tx16x16_count[2][3]; - unsigned int tx8x8_count[2][2]; - unsigned int mbskip_count[3][2]; - - nvdec_nmv_context_counts nmvcount; - - unsigned int countCoeffs[2][2][6][6][4]; - unsigned int countCoeffs8x8[2][2][6][6][4]; - unsigned int countCoeffs16x16[2][2][6][6][4]; - unsigned int countCoeffs32x32[2][2][6][6][4]; - - unsigned int countEobs[4][2][2][6][6]; - -} nvdec_vp9EntropyCounts_t; - -// Structure required to update Forward and Backward probabilities -typedef struct _vp9_prob_update_s -{ - nvdec_vp9EntropyProbs_t *pProbTab; - nvdec_vp9EntropyCounts_t *pCtxCounters; - unsigned char keyFrame : 1; - unsigned char prevIsKeyFrame : 1; - unsigned char resolutionChange : 1; - unsigned char errorResilient : 1; - unsigned char prevShowFrame : 1; - unsigned char intraOnly : 1; - unsigned char reserved2 : 2; - char lossless; - char transform_mode; - char allow_high_precision_mv; - char mcomp_filter_type; - char comp_pred_mode; - unsigned char FrameParallelDecoding; - unsigned char RefreshEntropyProbs; - uint32_t resetFrameContext; - uint32_t frameContextIdx; - uint32_t offsetToDctParts; - uint32_t allow_comp_inter_inter; - uint32_t probsDecoded; -} vp9_prob_update_s; - -typedef uint32_t VP9_BD_VALUE; - -typedef struct { - uint32_t buffer_end; - uint32_t buffer; - int32_t value; - int32_t count; - uint32_t range; - uint32_t pos; -} vp9_reader; - -const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ -{ - -DCT_EOB_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, 6, /* 2 = ONE */ - 8, 12, /* 3 = LOW_VAL */ - -TWO_TOKEN, 10, /* 4 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 14, 16, /* 6 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ - 18, 20, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ -}; - -const vp9_tree_index vp9_coefmodel_tree[6] = { - -DCT_EOB_MODEL_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, -TWO_TOKEN, /* 2 = ONE */ -}; +typedef struct _vp9_ref_frames_s { + VkPicIf* buffer; + StdVideoVP9FrameType frame_type; + bool segmentation_enabled; +} vp9_ref_frames_s; -const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { - -0, 2, - -1, -2 -}; - -const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = { - -MV_JOINT_ZERO, 2, - -MV_JOINT_HNZVZ, 4, - -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ -}; - -const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = { - -0, -1, -}; - -const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = { - -MV_CLASS_0, 2, - -MV_CLASS_1, 4, - 6, 8, - -MV_CLASS_2, -MV_CLASS_3, - 10, 12, - -MV_CLASS_4, -MV_CLASS_5, - -MV_CLASS_6, 14, - 16, 18, - -MV_CLASS_7, -MV_CLASS_8, - -MV_CLASS_9, -MV_CLASS_10, -}; - -const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = { - -0, 2, - -1, 4, - -2, -3 -}; - -static const uint32_t vp9dx_bitreader_norm[256] = +class VulkanVP9Decoder : public VulkanVideoDecoder { - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -//***************************************************************** -//vp9_entropymode.c -typedef uint8_t vp9_prob; -//typedef uint8_t vp9_tree_index; // typedef i8 vp9_tree_index -static const vp9_prob default_kf_uv_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { - { 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */, - { 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */, - { 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */, - { 120, 11, 50, 123, 163, 135, 64, 77, 103 } /* y = d45 */, - { 113, 9, 36, 155, 111, 157, 32, 44, 161 } /* y = d135 */, - { 116, 9, 55, 176, 76, 96, 37, 61, 149 } /* y = d117 */, - { 115, 9, 28, 141, 161, 167, 21, 25, 193 } /* y = d153 */, - { 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d27 */, - { 116, 12, 64, 120, 140, 125, 49, 115, 121 } /* y = d63 */, - { 102, 19, 66, 162, 182, 122, 35, 59, 128 } /* y = tm */ -}; - -static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS] - [VP9_INTRA_MODES - 1] = { - { 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */, - { 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */, - { 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */, - { 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */ -}; - -static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES - 1] = { - { 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */, - { 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */, - { 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */, - { 97, 5, 44, 131, 176, 139, 48, 68, 97 } /* y = d45 */, - { 83, 5, 42, 156, 111, 152, 26, 49, 152 } /* y = d135 */, - { 80, 5, 58, 178, 74, 83, 33, 62, 145 } /* y = d117 */, - { 86, 5, 32, 154, 192, 168, 14, 22, 163 } /* y = d153 */, - { 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d27 */, - { 77, 7, 64, 116, 132, 122, 37, 126, 120 } /* y = d63 */, - { 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */ -}; - -static const uint8_t vp9_default_inter_mode_prob[INTER_MODE_CONTEXTS][4] = { - {2, 173, 34, 0}, // 0 = both zero mv - {7, 145, 85, 0}, // 1 = one zero mv + one a predicted mv - {7, 166, 63, 0}, // 2 = two predicted mvs - {7, 94, 66, 0}, // 3 = one predicted/zero and one new mv - {8, 64, 46, 0}, // 4 = two new mvs - {17, 81, 31, 0}, // 5 = one intra neighbour + x - {25, 29, 30, 0}, // 6 = two intra neighbours -}; -static const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS] - [PARTITION_TYPES] = { /* 1 byte padding */ - { /* frame_type = keyframe */ - /* 8x8 -> 4x4 */ - { 158, 97, 94, 0 } /* a/l both not split */, - { 93, 24, 99, 0 } /* a split, l not split */, - { 85, 119, 44, 0 } /* l split, a not split */, - { 62, 59, 67, 0 } /* a/l both split */, - /* 16x16 -> 8x8 */ - { 149, 53, 53, 0 } /* a/l both not split */, - { 94, 20, 48, 0 } /* a split, l not split */, - { 83, 53, 24, 0 } /* l split, a not split */, - { 52, 18, 18, 0 } /* a/l both split */, - /* 32x32 -> 16x16 */ - { 150, 40, 39, 0 } /* a/l both not split */, - { 78, 12, 26, 0 } /* a split, l not split */, - { 67, 33, 11, 0 } /* l split, a not split */, - { 24, 7, 5, 0 } /* a/l both split */, - /* 64x64 -> 32x32 */ - { 174, 35, 49, 0 } /* a/l both not split */, - { 68, 11, 27, 0 } /* a split, l not split */, - { 57, 15, 9, 0 } /* l split, a not split */, - { 12, 3, 3, 0 } /* a/l both split */ - }, { /* frame_type = interframe */ - /* 8x8 -> 4x4 */ - { 199, 122, 141, 0 } /* a/l both not split */, - { 147, 63, 159, 0 } /* a split, l not split */, - { 148, 133, 118, 0 } /* l split, a not split */, - { 121, 104, 114, 0 } /* a/l both split */, - /* 16x16 -> 8x8 */ - { 174, 73, 87, 0 } /* a/l both not split */, - { 92, 41, 83, 0 } /* a split, l not split */, - { 82, 99, 50, 0 } /* l split, a not split */, - { 53, 39, 39, 0 } /* a/l both split */, - /* 32x32 -> 16x16 */ - { 177, 58, 59, 0 } /* a/l both not split */, - { 68, 26, 63, 0 } /* a split, l not split */, - { 52, 79, 25, 0 } /* l split, a not split */, - { 17, 14, 12, 0 } /* a/l both split */, - /* 64x64 -> 32x32 */ - { 222, 34, 30, 0 } /* a/l both not split */, - { 72, 16, 44, 0 } /* a split, l not split */, - { 58, 32, 12, 0 } /* l split, a not split */, - { 10, 7, 6, 0 } /* a/l both split */ - } -}; -static const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = { - -DC_PRED, 2, // 0 = DC_NODE - -TM_PRED, 4, // 1 = TM_NODE - -V_PRED, 6, // 2 = V_NODE - 8, 12, // 3 = COM_NODE - -H_PRED, 10, // 4 = H_NODE - -D135_PRED, -D117_PRED, // 5 = D135_NODE - -D45_PRED, 14, // 6 = D45_NODE - -D63_PRED, 16, // 7 = D63_NODE - -D153_PRED, -D27_PRED // 8 = D153_NODE -}; - -static const vp9_tree_index vp9_partition_tree[6] = { - -PARTITION_NONE, 2, - -PARTITION_HORZ, 4, - -PARTITION_VERT, -PARTITION_SPLIT -}; - -static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { - 9, 102, 187, 225 -}; - -static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { - 239, 183, 119, 96, 41 -}; - -static const vp9_prob default_comp_ref_p[REF_CONTEXTS] = { - 50, 126, 123, 221, 226 -}; - -static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = { - { 33, 16 }, - { 77, 74 }, - { 142, 142 }, - { 172, 170 }, - { 238, 247 } -}; - -static const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] - [VP9_SWITCHABLE_FILTERS-1] = { - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, -}; -static const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS] - [TX_SIZE_MAX_SB - 1] = { - { 3, 136, 37, }, - { 5, 52, 13, }, -}; -static const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS] - [TX_SIZE_MAX_SB - 2] = { - { 20, 152, }, - { 15, 101, }, -}; -static const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS] - [TX_SIZE_MAX_SB - 3] = { - { 100, }, - { 66, }, -}; -static const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = { //its C0..shud be f8?? - 192, 128, 64 -}; - -static const nvdec_nmv_context vp9_default_nmv_context = { - {32, 64, 96}, /* joints */ - {128, 128}, /* sign */ - {{216},{208}}, /* class0 */ - {{64, 96, 64},{64, 96, 64}}, /* fp */ - {160,160}, /* class0_hp bit */ - {128,128}, /* hp */ - {{224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}}, /* class */ - {{{128, 128, 64}, {96, 112, 64}}, - {{128, 128, 64}, {96, 112, 64}}}, /* class0_fp */ - {{136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}}, /* bits */ -}; +protected: + VkParserVp9PictureData m_PicData; -static const int32_t vp9_seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; -static const int32_t vp9_seg_feature_data_max[SEG_LVL_MAX] = { 255, 63, 3, 0 }; -typedef uint8_t vp9_coeff_probs[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES]; + VkPicIf* m_pCurrPic; + VkPicIf* m_pOutFrame[VP9_MAX_NUM_SPATIAL_LAYERS]; -static const vp9_coeff_probs default_coef_probs_4x4[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 195, 29, 183 }, - { 84, 49, 136 }, - { 8, 42, 71 } - }, { /* Coeff Band 1 */ - { 31, 107, 169 }, - { 35, 99, 159 }, - { 17, 82, 140 }, - { 8, 66, 114 }, - { 2, 44, 76 }, - { 1, 19, 32 } - }, { /* Coeff Band 2 */ - { 40, 132, 201 }, - { 29, 114, 187 }, - { 13, 91, 157 }, - { 7, 75, 127 }, - { 3, 58, 95 }, - { 1, 28, 47 } - }, { /* Coeff Band 3 */ - { 69, 142, 221 }, - { 42, 122, 201 }, - { 15, 91, 159 }, - { 6, 67, 121 }, - { 1, 42, 77 }, - { 1, 17, 31 } - }, { /* Coeff Band 4 */ - { 102, 148, 228 }, - { 67, 117, 204 }, - { 17, 82, 154 }, - { 6, 59, 114 }, - { 2, 39, 75 }, - { 1, 15, 29 } - }, { /* Coeff Band 5 */ - { 156, 57, 233 }, - { 119, 57, 212 }, - { 58, 48, 163 }, - { 29, 40, 124 }, - { 12, 30, 81 }, - { 3, 12, 31 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 191, 107, 226 }, - { 124, 117, 204 }, - { 25, 99, 155 } - }, { /* Coeff Band 1 */ - { 29, 148, 210 }, - { 37, 126, 194 }, - { 8, 93, 157 }, - { 2, 68, 118 }, - { 1, 39, 69 }, - { 1, 17, 33 } - }, { /* Coeff Band 2 */ - { 41, 151, 213 }, - { 27, 123, 193 }, - { 3, 82, 144 }, - { 1, 58, 105 }, - { 1, 32, 60 }, - { 1, 13, 26 } - }, { /* Coeff Band 3 */ - { 59, 159, 220 }, - { 23, 126, 198 }, - { 4, 88, 151 }, - { 1, 66, 114 }, - { 1, 38, 71 }, - { 1, 18, 34 } - }, { /* Coeff Band 4 */ - { 114, 136, 232 }, - { 51, 114, 207 }, - { 11, 83, 155 }, - { 3, 56, 105 }, - { 1, 33, 65 }, - { 1, 17, 34 } - }, { /* Coeff Band 5 */ - { 149, 65, 234 }, - { 121, 57, 215 }, - { 61, 49, 166 }, - { 28, 36, 114 }, - { 12, 25, 76 }, - { 3, 16, 42 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 214, 49, 220 }, - { 132, 63, 188 }, - { 42, 65, 137 } - }, { /* Coeff Band 1 */ - { 85, 137, 221 }, - { 104, 131, 216 }, - { 49, 111, 192 }, - { 21, 87, 155 }, - { 2, 49, 87 }, - { 1, 16, 28 } - }, { /* Coeff Band 2 */ - { 89, 163, 230 }, - { 90, 137, 220 }, - { 29, 100, 183 }, - { 10, 70, 135 }, - { 2, 42, 81 }, - { 1, 17, 33 } - }, { /* Coeff Band 3 */ - { 108, 167, 237 }, - { 55, 133, 222 }, - { 15, 97, 179 }, - { 4, 72, 135 }, - { 1, 45, 85 }, - { 1, 19, 38 } - }, { /* Coeff Band 4 */ - { 124, 146, 240 }, - { 66, 124, 224 }, - { 17, 88, 175 }, - { 4, 58, 122 }, - { 1, 36, 75 }, - { 1, 18, 37 } - }, { /* Coeff Band 5 */ - { 141, 79, 241 }, - { 126, 70, 227 }, - { 66, 58, 182 }, - { 30, 44, 136 }, - { 12, 34, 96 }, - { 2, 20, 47 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 229, 99, 249 }, - { 143, 111, 235 }, - { 46, 109, 192 } - }, { /* Coeff Band 1 */ - { 82, 158, 236 }, - { 94, 146, 224 }, - { 25, 117, 191 }, - { 9, 87, 149 }, - { 3, 56, 99 }, - { 1, 33, 57 } - }, { /* Coeff Band 2 */ - { 83, 167, 237 }, - { 68, 145, 222 }, - { 10, 103, 177 }, - { 2, 72, 131 }, - { 1, 41, 79 }, - { 1, 20, 39 } - }, { /* Coeff Band 3 */ - { 99, 167, 239 }, - { 47, 141, 224 }, - { 10, 104, 178 }, - { 2, 73, 133 }, - { 1, 44, 85 }, - { 1, 22, 47 } - }, { /* Coeff Band 4 */ - { 127, 145, 243 }, - { 71, 129, 228 }, - { 17, 93, 177 }, - { 3, 61, 124 }, - { 1, 41, 84 }, - { 1, 21, 52 } - }, { /* Coeff Band 5 */ - { 157, 78, 244 }, - { 140, 72, 231 }, - { 69, 58, 184 }, - { 31, 44, 137 }, - { 14, 38, 105 }, - { 8, 23, 61 } - } - } - } -}; -static const vp9_coeff_probs default_coef_probs_8x8[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 125, 34, 187 }, - { 52, 41, 133 }, - { 6, 31, 56 } - }, { /* Coeff Band 1 */ - { 37, 109, 153 }, - { 51, 102, 147 }, - { 23, 87, 128 }, - { 8, 67, 101 }, - { 1, 41, 63 }, - { 1, 19, 29 } - }, { /* Coeff Band 2 */ - { 31, 154, 185 }, - { 17, 127, 175 }, - { 6, 96, 145 }, - { 2, 73, 114 }, - { 1, 51, 82 }, - { 1, 28, 45 } - }, { /* Coeff Band 3 */ - { 23, 163, 200 }, - { 10, 131, 185 }, - { 2, 93, 148 }, - { 1, 67, 111 }, - { 1, 41, 69 }, - { 1, 14, 24 } - }, { /* Coeff Band 4 */ - { 29, 176, 217 }, - { 12, 145, 201 }, - { 3, 101, 156 }, - { 1, 69, 111 }, - { 1, 39, 63 }, - { 1, 14, 23 } - }, { /* Coeff Band 5 */ - { 57, 192, 233 }, - { 25, 154, 215 }, - { 6, 109, 167 }, - { 3, 78, 118 }, - { 1, 48, 69 }, - { 1, 21, 29 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 202, 105, 245 }, - { 108, 106, 216 }, - { 18, 90, 144 } - }, { /* Coeff Band 1 */ - { 33, 172, 219 }, - { 64, 149, 206 }, - { 14, 117, 177 }, - { 5, 90, 141 }, - { 2, 61, 95 }, - { 1, 37, 57 } - }, { /* Coeff Band 2 */ - { 33, 179, 220 }, - { 11, 140, 198 }, - { 1, 89, 148 }, - { 1, 60, 104 }, - { 1, 33, 57 }, - { 1, 12, 21 } - }, { /* Coeff Band 3 */ - { 30, 181, 221 }, - { 8, 141, 198 }, - { 1, 87, 145 }, - { 1, 58, 100 }, - { 1, 31, 55 }, - { 1, 12, 20 } - }, { /* Coeff Band 4 */ - { 32, 186, 224 }, - { 7, 142, 198 }, - { 1, 86, 143 }, - { 1, 58, 100 }, - { 1, 31, 55 }, - { 1, 12, 22 } - }, { /* Coeff Band 5 */ - { 57, 192, 227 }, - { 20, 143, 204 }, - { 3, 96, 154 }, - { 1, 68, 112 }, - { 1, 42, 69 }, - { 1, 19, 32 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 212, 35, 215 }, - { 113, 47, 169 }, - { 29, 48, 105 } - }, { /* Coeff Band 1 */ - { 74, 129, 203 }, - { 106, 120, 203 }, - { 49, 107, 178 }, - { 19, 84, 144 }, - { 4, 50, 84 }, - { 1, 15, 25 } - }, { /* Coeff Band 2 */ - { 71, 172, 217 }, - { 44, 141, 209 }, - { 15, 102, 173 }, - { 6, 76, 133 }, - { 2, 51, 89 }, - { 1, 24, 42 } - }, { /* Coeff Band 3 */ - { 64, 185, 231 }, - { 31, 148, 216 }, - { 8, 103, 175 }, - { 3, 74, 131 }, - { 1, 46, 81 }, - { 1, 18, 30 } - }, { /* Coeff Band 4 */ - { 65, 196, 235 }, - { 25, 157, 221 }, - { 5, 105, 174 }, - { 1, 67, 120 }, - { 1, 38, 69 }, - { 1, 15, 30 } - }, { /* Coeff Band 5 */ - { 65, 204, 238 }, - { 30, 156, 224 }, - { 7, 107, 177 }, - { 2, 70, 124 }, - { 1, 42, 73 }, - { 1, 18, 34 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 225, 86, 251 }, - { 144, 104, 235 }, - { 42, 99, 181 } - }, { /* Coeff Band 1 */ - { 85, 175, 239 }, - { 112, 165, 229 }, - { 29, 136, 200 }, - { 12, 103, 162 }, - { 6, 77, 123 }, - { 2, 53, 84 } - }, { /* Coeff Band 2 */ - { 75, 183, 239 }, - { 30, 155, 221 }, - { 3, 106, 171 }, - { 1, 74, 128 }, - { 1, 44, 76 }, - { 1, 17, 28 } - }, { /* Coeff Band 3 */ - { 73, 185, 240 }, - { 27, 159, 222 }, - { 2, 107, 172 }, - { 1, 75, 127 }, - { 1, 42, 73 }, - { 1, 17, 29 } - }, { /* Coeff Band 4 */ - { 62, 190, 238 }, - { 21, 159, 222 }, - { 2, 107, 172 }, - { 1, 72, 122 }, - { 1, 40, 71 }, - { 1, 18, 32 } - }, { /* Coeff Band 5 */ - { 61, 199, 240 }, - { 27, 161, 226 }, - { 4, 113, 180 }, - { 1, 76, 129 }, - { 1, 46, 80 }, - { 1, 23, 41 } - } - } - } -}; -static const vp9_coeff_probs default_coef_probs_16x16[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 7, 27, 153 }, - { 5, 30, 95 }, - { 1, 16, 30 } - }, { /* Coeff Band 1 */ - { 50, 75, 127 }, - { 57, 75, 124 }, - { 27, 67, 108 }, - { 10, 54, 86 }, - { 1, 33, 52 }, - { 1, 12, 18 } - }, { /* Coeff Band 2 */ - { 43, 125, 151 }, - { 26, 108, 148 }, - { 7, 83, 122 }, - { 2, 59, 89 }, - { 1, 38, 60 }, - { 1, 17, 27 } - }, { /* Coeff Band 3 */ - { 23, 144, 163 }, - { 13, 112, 154 }, - { 2, 75, 117 }, - { 1, 50, 81 }, - { 1, 31, 51 }, - { 1, 14, 23 } - }, { /* Coeff Band 4 */ - { 18, 162, 185 }, - { 6, 123, 171 }, - { 1, 78, 125 }, - { 1, 51, 86 }, - { 1, 31, 54 }, - { 1, 14, 23 } - }, { /* Coeff Band 5 */ - { 15, 199, 227 }, - { 3, 150, 204 }, - { 1, 91, 146 }, - { 1, 55, 95 }, - { 1, 30, 53 }, - { 1, 11, 20 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 19, 55, 240 }, - { 19, 59, 196 }, - { 3, 52, 105 } - }, { /* Coeff Band 1 */ - { 41, 166, 207 }, - { 104, 153, 199 }, - { 31, 123, 181 }, - { 14, 101, 152 }, - { 5, 72, 106 }, - { 1, 36, 52 } - }, { /* Coeff Band 2 */ - { 35, 176, 211 }, - { 12, 131, 190 }, - { 2, 88, 144 }, - { 1, 60, 101 }, - { 1, 36, 60 }, - { 1, 16, 28 } - }, { /* Coeff Band 3 */ - { 28, 183, 213 }, - { 8, 134, 191 }, - { 1, 86, 142 }, - { 1, 56, 96 }, - { 1, 30, 53 }, - { 1, 12, 20 } - }, { /* Coeff Band 4 */ - { 20, 190, 215 }, - { 4, 135, 192 }, - { 1, 84, 139 }, - { 1, 53, 91 }, - { 1, 28, 49 }, - { 1, 11, 20 } - }, { /* Coeff Band 5 */ - { 13, 196, 216 }, - { 2, 137, 192 }, - { 1, 86, 143 }, - { 1, 57, 99 }, - { 1, 32, 56 }, - { 1, 13, 24 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 211, 29, 217 }, - { 96, 47, 156 }, - { 22, 43, 87 } - }, { /* Coeff Band 1 */ - { 78, 120, 193 }, - { 111, 116, 186 }, - { 46, 102, 164 }, - { 15, 80, 128 }, - { 2, 49, 76 }, - { 1, 18, 28 } - }, { /* Coeff Band 2 */ - { 71, 161, 203 }, - { 42, 132, 192 }, - { 10, 98, 150 }, - { 3, 69, 109 }, - { 1, 44, 70 }, - { 1, 18, 29 } - }, { /* Coeff Band 3 */ - { 57, 186, 211 }, - { 30, 140, 196 }, - { 4, 93, 146 }, - { 1, 62, 102 }, - { 1, 38, 65 }, - { 1, 16, 27 } - }, { /* Coeff Band 4 */ - { 47, 199, 217 }, - { 14, 145, 196 }, - { 1, 88, 142 }, - { 1, 57, 98 }, - { 1, 36, 62 }, - { 1, 15, 26 } - }, { /* Coeff Band 5 */ - { 26, 219, 229 }, - { 5, 155, 207 }, - { 1, 94, 151 }, - { 1, 60, 104 }, - { 1, 36, 62 }, - { 1, 16, 28 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 233, 29, 248 }, - { 146, 47, 220 }, - { 43, 52, 140 } - }, { /* Coeff Band 1 */ - { 100, 163, 232 }, - { 179, 161, 222 }, - { 63, 142, 204 }, - { 37, 113, 174 }, - { 26, 89, 137 }, - { 18, 68, 97 } - }, { /* Coeff Band 2 */ - { 85, 181, 230 }, - { 32, 146, 209 }, - { 7, 100, 164 }, - { 3, 71, 121 }, - { 1, 45, 77 }, - { 1, 18, 30 } - }, { /* Coeff Band 3 */ - { 65, 187, 230 }, - { 20, 148, 207 }, - { 2, 97, 159 }, - { 1, 68, 116 }, - { 1, 40, 70 }, - { 1, 14, 29 } - }, { /* Coeff Band 4 */ - { 40, 194, 227 }, - { 8, 147, 204 }, - { 1, 94, 155 }, - { 1, 65, 112 }, - { 1, 39, 66 }, - { 1, 14, 26 } - }, { /* Coeff Band 5 */ - { 16, 208, 228 }, - { 3, 151, 207 }, - { 1, 98, 160 }, - { 1, 67, 117 }, - { 1, 41, 74 }, - { 1, 17, 31 } - } - } - } -}; -static const vp9_coeff_probs default_coef_probs_32x32[VP9_BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 17, 38, 140 }, - { 7, 34, 80 }, - { 1, 17, 29 } - }, { /* Coeff Band 1 */ - { 37, 75, 128 }, - { 41, 76, 128 }, - { 26, 66, 116 }, - { 12, 52, 94 }, - { 2, 32, 55 }, - { 1, 10, 16 } - }, { /* Coeff Band 2 */ - { 50, 127, 154 }, - { 37, 109, 152 }, - { 16, 82, 121 }, - { 5, 59, 85 }, - { 1, 35, 54 }, - { 1, 13, 20 } - }, { /* Coeff Band 3 */ - { 40, 142, 167 }, - { 17, 110, 157 }, - { 2, 71, 112 }, - { 1, 44, 72 }, - { 1, 27, 45 }, - { 1, 11, 17 } - }, { /* Coeff Band 4 */ - { 30, 175, 188 }, - { 9, 124, 169 }, - { 1, 74, 116 }, - { 1, 48, 78 }, - { 1, 30, 49 }, - { 1, 11, 18 } - }, { /* Coeff Band 5 */ - { 10, 222, 223 }, - { 2, 150, 194 }, - { 1, 83, 128 }, - { 1, 48, 79 }, - { 1, 27, 45 }, - { 1, 11, 17 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 36, 41, 235 }, - { 29, 36, 193 }, - { 10, 27, 111 } - }, { /* Coeff Band 1 */ - { 85, 165, 222 }, - { 177, 162, 215 }, - { 110, 135, 195 }, - { 57, 113, 168 }, - { 23, 83, 120 }, - { 10, 49, 61 } - }, { /* Coeff Band 2 */ - { 85, 190, 223 }, - { 36, 139, 200 }, - { 5, 90, 146 }, - { 1, 60, 103 }, - { 1, 38, 65 }, - { 1, 18, 30 } - }, { /* Coeff Band 3 */ - { 72, 202, 223 }, - { 23, 141, 199 }, - { 2, 86, 140 }, - { 1, 56, 97 }, - { 1, 36, 61 }, - { 1, 16, 27 } - }, { /* Coeff Band 4 */ - { 55, 218, 225 }, - { 13, 145, 200 }, - { 1, 86, 141 }, - { 1, 57, 99 }, - { 1, 35, 61 }, - { 1, 13, 22 } - }, { /* Coeff Band 5 */ - { 15, 235, 212 }, - { 1, 132, 184 }, - { 1, 84, 139 }, - { 1, 57, 97 }, - { 1, 34, 56 }, - { 1, 14, 23 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 181, 21, 201 }, - { 61, 37, 123 }, - { 10, 38, 71 } - }, { /* Coeff Band 1 */ - { 47, 106, 172 }, - { 95, 104, 173 }, - { 42, 93, 159 }, - { 18, 77, 131 }, - { 4, 50, 81 }, - { 1, 17, 23 } - }, { /* Coeff Band 2 */ - { 62, 147, 199 }, - { 44, 130, 189 }, - { 28, 102, 154 }, - { 18, 75, 115 }, - { 2, 44, 65 }, - { 1, 12, 19 } - }, { /* Coeff Band 3 */ - { 55, 153, 210 }, - { 24, 130, 194 }, - { 3, 93, 146 }, - { 1, 61, 97 }, - { 1, 31, 50 }, - { 1, 10, 16 } - }, { /* Coeff Band 4 */ - { 49, 186, 223 }, - { 17, 148, 204 }, - { 1, 96, 142 }, - { 1, 53, 83 }, - { 1, 26, 44 }, - { 1, 11, 17 } - }, { /* Coeff Band 5 */ - { 13, 217, 212 }, - { 2, 136, 180 }, - { 1, 78, 124 }, - { 1, 50, 83 }, - { 1, 29, 49 }, - { 1, 14, 23 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 197, 13, 247 }, - { 82, 17, 222 }, - { 25, 17, 162 } - }, { /* Coeff Band 1 */ - { 126, 186, 247 }, - { 234, 191, 243 }, - { 176, 177, 234 }, - { 104, 158, 220 }, - { 66, 128, 186 }, - { 55, 90, 137 } - }, { /* Coeff Band 2 */ - { 111, 197, 242 }, - { 46, 158, 219 }, - { 9, 104, 171 }, - { 2, 65, 125 }, - { 1, 44, 80 }, - { 1, 17, 91 } - }, { /* Coeff Band 3 */ - { 104, 208, 245 }, - { 39, 168, 224 }, - { 3, 109, 162 }, - { 1, 79, 124 }, - { 1, 50, 102 }, - { 1, 43, 102 } - }, { /* Coeff Band 4 */ - { 84, 220, 246 }, - { 31, 177, 231 }, - { 2, 115, 180 }, - { 1, 79, 134 }, - { 1, 55, 77 }, - { 1, 60, 79 } - }, { /* Coeff Band 5 */ - { 43, 243, 240 }, - { 8, 180, 217 }, - { 1, 115, 166 }, - { 1, 84, 121 }, - { 1, 51, 67 }, - { 1, 16, 6 } - } - } - } -}; + int m_frameIdx; + int m_dataSize; + int m_frameSize; + bool m_frameSizeChanged; -static const uint8_t vp9_kf_default_bmode_probs[VP9_INTRA_MODES] - [VP9_INTRA_MODES] - [VP9_INTRA_MODES-1] = { - { /* above = dc */ - { 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */, - { 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */, - { 73, 32, 19, 187, 222, 215, 46, 34, 100 } /* left = h */, - { 91, 30, 32, 116, 121, 186, 93, 86, 94 } /* left = d45 */, - { 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */, - { 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */, - { 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */, - { 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */, - { 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */, - { 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */ - }, { /* above = v */ - { 63, 36, 126, 146, 123, 158, 60, 90, 96 } /* left = dc */, - { 43, 46, 168, 134, 107, 128, 69, 142, 92 } /* left = v */, - { 44, 29, 68, 159, 201, 177, 50, 57, 77 } /* left = h */, - { 58, 38, 76, 114, 97, 172, 78, 133, 92 } /* left = d45 */, - { 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */, - { 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */, - { 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */, - { 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */, - { 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */, - { 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */ - }, { /* above = h */ - { 82, 26, 26, 171, 208, 204, 44, 32, 105 } /* left = dc */, - { 55, 44, 68, 166, 179, 192, 57, 57, 108 } /* left = v */, - { 42, 26, 11, 199, 241, 228, 23, 15, 85 } /* left = h */, - { 68, 42, 19, 131, 160, 199, 55, 52, 83 } /* left = d45 */, - { 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */, - { 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */, - { 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */, - { 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */, - { 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */, - { 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */ - }, { /* above = d45 */ - { 103, 26, 36, 129, 132, 201, 83, 80, 93 } /* left = dc */, - { 59, 38, 83, 112, 103, 162, 98, 136, 90 } /* left = v */, - { 62, 30, 23, 158, 200, 207, 59, 57, 50 } /* left = h */, - { 67, 30, 29, 84, 86, 191, 102, 91, 59 } /* left = d45 */, - { 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */, - { 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */, - { 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */, - { 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */, - { 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */, - { 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */ - }, { /* above = d135 */ - { 69, 23, 29, 128, 83, 199, 46, 44, 101 } /* left = dc */, - { 53, 40, 55, 139, 69, 183, 61, 80, 110 } /* left = v */, - { 40, 29, 19, 161, 180, 207, 43, 24, 91 } /* left = h */, - { 60, 34, 19, 105, 61, 198, 53, 64, 89 } /* left = d45 */, - { 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */, - { 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */, - { 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */, - { 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */, - { 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */, - { 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */ - }, { /* above = d117 */ - { 64, 19, 37, 156, 66, 138, 49, 95, 133 } /* left = dc */, - { 46, 27, 80, 150, 55, 124, 55, 121, 135 } /* left = v */, - { 36, 23, 27, 165, 149, 166, 54, 64, 118 } /* left = h */, - { 53, 21, 36, 131, 63, 163, 60, 109, 81 } /* left = d45 */, - { 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */, - { 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */, - { 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */, - { 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */, - { 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */, - { 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */ - }, { /* above = d153 */ - { 75, 17, 22, 136, 138, 185, 32, 34, 166 } /* left = dc */, - { 56, 39, 58, 133, 117, 173, 48, 53, 187 } /* left = v */, - { 35, 21, 12, 161, 212, 207, 20, 23, 145 } /* left = h */, - { 56, 29, 19, 117, 109, 181, 55, 68, 112 } /* left = d45 */, - { 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */, - { 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */, - { 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */, - { 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */, - { 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */, - { 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */ - }, { /* above = d27 */ - { 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */, - { 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */, - { 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */, - { 68, 36, 17, 106, 102, 206, 59, 74, 74 } /* left = d45 */, - { 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */, - { 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */, - { 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */, - { 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */, - { 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */, - { 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */ - }, { /* above = d63 */ - { 78, 23, 39, 111, 117, 170, 74, 124, 94 } /* left = dc */, - { 48, 34, 86, 101, 92, 146, 78, 179, 134 } /* left = v */, - { 47, 22, 24, 138, 187, 178, 68, 69, 59 } /* left = h */, - { 56, 25, 33, 105, 112, 187, 95, 177, 129 } /* left = d45 */, - { 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */, - { 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */, - { 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */, - { 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */, - { 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */, - { 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */ - }, { /* above = tm */ - { 65, 70, 60, 155, 159, 199, 61, 60, 81 } /* left = dc */, - { 44, 78, 115, 132, 119, 173, 71, 112, 93 } /* left = v */, - { 39, 38, 21, 184, 227, 206, 42, 32, 64 } /* left = h */, - { 58, 47, 36, 124, 137, 193, 80, 82, 78 } /* left = d45 */, - { 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */, - { 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */, - { 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */, - { 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */, - { 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */, - { 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */ - } -}; + int m_rtOrigWidth; + int m_rtOrigHeight; + bool m_pictureStarted; + bool m_bitstreamComplete; -class VulkanVP9Decoder : public VulkanVideoDecoder -{ -protected: - vp9_reader reader; - nvdec_vp9EntropyProbs_t m_EntropyLast[FRAME_CONTEXTS]; - nvdec_vp9AdaptiveEntropyProbs_t m_PrevCtx; - const unsigned char* m_pCompressedHeader; + // Parsing state for compute_image_size() side effects + int m_lastFrameWidth; + int m_lastFrameHeight; + bool m_lastShowFrame; - void vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup); - vp9_prob weighted_prob(int32_t prob1, int32_t prob2, int32_t factor); - vp9_prob clip_prob(uint32_t p); - vp9_prob get_prob(uint32_t num, uint32_t den); - vp9_prob get_binary_prob(uint32_t n0, uint32_t n1); - uint32_t convert_distribution(uint32_t i, - const vp9_tree_index * tree, - uint8_t probs[], - uint32_t branch_ct[][2], - const uint32_t num_events[], - uint32_t tok0_offset); - void vp9_tree_probs_from_distribution(const vp9_tree_index* tree, - uint8_t probs [ /* n-1 */ ], - uint32_t branch_ct [ /* n-1 */ ] [2], - const uint32_t num_events[ /* n */ ], - uint32_t tok0_offset); - void update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1], - uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS], - int32_t count_sat, int32_t update_factor); - void adaptCoefProbs(vp9_prob_update_s *pProbSetup); - int32_t update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2]); - int32_t update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2]); - void update_mode_probs(int32_t n_modes, - const vp9_tree_index *tree, uint32_t *cnt, - vp9_prob *pre_probs, vp9_prob *pre_probsB, - vp9_prob *dst_probs, vp9_prob *dst_probsB, - uint32_t tok0_offset); - void tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p, uint32_t (*ct_32x32p)[2]); - void tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p, uint32_t (*ct_16x16p)[2]); - void tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p, uint32_t (*ct_8x8p)[2]); - void adaptModeProbs(vp9_prob_update_s *pProbSetup); - void adaptModeContext(vp9_prob_update_s *pProbSetup); - uint32_t adapt_probs(uint32_t i, - const signed char* tree, - vp9_prob this_probs[], - const vp9_prob last_probs[], - const uint32_t num_events[]); - void adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2]); - void adaptNmvProbs(vp9_prob_update_s *pProbSetup); + // Last used loop filter parameters + int8_t m_loopFilterRefDeltas[STD_VIDEO_VP9_MAX_REF_FRAMES]; + int8_t m_loopFilterModeDeltas[STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS]; + vp9_ref_frames_s m_pBuffers[VP9_BUFFER_POOL_MAX_SIZE]; + protected: - void vp9_reader_fill(); - int32_t vp9_reader_init (uint32_t size); - int32_t vp9_read_bit(); - int32_t vp9_read(int32_t probability); - int32_t vp9_read_literal(int32_t bits); - uint32_t ParseCompressedVP9(); - int32_t get_unsigned_bits(uint32_t num_values); - uint32_t swGetBitsUnsignedMax( uint32_t maxValue); - vp9_prob vp9hwdReadProbDiffUpdate(uint8_t oldp); - int32_t vp9_inv_recenter_nonneg(int32_t v, int32_t m); - int32_t inv_remap_prob(int32_t v, int32_t m); - int32_t merge_index(int32_t v, int32_t n, int32_t modulus); - uint32_t BoolDecodeUniform(uint32_t n); - uint32_t vp9hwdDecodeSubExp(uint32_t k, uint32_t num_syms); - uint32_t vp9hwdDecodeCoeffUpdate(uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1]); - uint32_t vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup); - void update_nmv(vp9_prob *const p, const vp9_prob upd_p); + void UpdateFramePointers(VkPicIf* currentPicture); + bool AddBuffertoOutputQueue(VkPicIf* pDispPic); + void AddBuffertoDispQueue(VkPicIf* pDispPic); + virtual void lEndPicture(VkPicIf* pDispPic); + void EndOfStream() override; public: VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std); - void ResetProbs(vp9_prob_update_s *pProbSetup); - void GetProbs(vp9_prob_update_s *pProbSetup); - uint32_t UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header); - void UpdateBackwardProbability(vp9_prob_update_s *pProbSetup); + ~VulkanVP9Decoder(); // TODO: Need to implement these functions. - bool IsPictureBoundary(int32_t) { return true; }; - int32_t ParseNalUnit() { return NALU_UNKNOWN; }; + bool IsPictureBoundary(int32_t) override { return true; }; + int32_t ParseNalUnit() override { return NALU_UNKNOWN; }; bool DecodePicture(VkParserPictureData *) { return false; }; - void InitParser() {} - bool BeginPicture(VkParserPictureData *) { return false; } - void CreatePrivateContext() {} - void FreeContext() {} + void InitParser() override; + bool BeginPicture(VkParserPictureData *) override; + void CreatePrivateContext() override {} + void FreeContext() override {} + +private: + bool ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBtes) override; + bool ParseFrameHeader(uint32_t framesize); + bool ParseUncompressedHeader(); + bool ParseColorConfig(); + void ParseFrameAndRenderSize(); + void ParseFrameAndRenderSizeWithRefs(); + void ComputeImageSize(); + void ParseLoopFilterParams(); + void ParseQuantizationParams(); + int32_t ReadDeltaQ(); + void ParseSegmentationParams(); + uint8_t CalcMinLog2TileCols(); + uint8_t CalcMaxLog2TileCols(); + void ParseTileInfo(); + void ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t sizes[8], uint32_t* count); + }; #endif // _VP9_PROBMANAGER_H_ diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp index 99452952..701e4c07 100644 --- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp +++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp @@ -20,1044 +20,889 @@ VulkanVP9Decoder::VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std) : VulkanVideoDecoder(std) -{ - memset(&m_EntropyLast, 0, sizeof(m_EntropyLast)); - memset(&m_PrevCtx, 0, sizeof(m_PrevCtx)); - memset(&reader, 0, sizeof(vp9_reader)); - m_pCompressedHeader = NULL; -} -void VulkanVP9Decoder::vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup) -{ - uint32_t i, j; - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - { - for (j = 0; j < 8; j++) - pProbSetup->pProbTab->a.sb_ymode_prob[i][j] = default_if_y_probs[i][j]; - pProbSetup->pProbTab->a.sb_ymode_probB[i][0] = default_if_y_probs[i][8]; - } - - for (i = 0; i < VP9_INTRA_MODES; i++) - { - for (j = 0; j < 8; j++) - pProbSetup->pProbTab->kf_uv_mode_prob[i][j] = default_kf_uv_probs[i][j]; - pProbSetup->pProbTab->kf_uv_mode_probB[i][0] = default_kf_uv_probs[i][8]; - - for (j = 0; j < 8; j++) - pProbSetup->pProbTab->a.uv_mode_prob[i][j] = default_if_uv_probs[i][j]; - pProbSetup->pProbTab->a.uv_mode_probB[i][0] = default_if_uv_probs[i][8]; - } - - memcpy(pProbSetup->pProbTab->a.switchable_interp_prob, vp9_switchable_interp_prob, - sizeof(vp9_switchable_interp_prob)); - memcpy(pProbSetup->pProbTab->a.partition_prob, vp9_partition_probs, - sizeof(vp9_partition_probs)); - memcpy(pProbSetup->pProbTab->a.intra_inter_prob, default_intra_inter_p, - sizeof(default_intra_inter_p)); - memcpy(pProbSetup->pProbTab->a.comp_inter_prob, default_comp_inter_p, - sizeof(default_comp_inter_p)); - memcpy(pProbSetup->pProbTab->a.comp_ref_prob, default_comp_ref_p, - sizeof(default_comp_ref_p)); - memcpy(pProbSetup->pProbTab->a.single_ref_prob, default_single_ref_p, - sizeof(default_single_ref_p)); - memcpy(pProbSetup->pProbTab->a.tx32x32_prob, vp9_default_tx_probs_32x32p, - sizeof(vp9_default_tx_probs_32x32p)); - memcpy(pProbSetup->pProbTab->a.tx16x16_prob, vp9_default_tx_probs_16x16p, - sizeof(vp9_default_tx_probs_16x16p)); - memcpy(pProbSetup->pProbTab->a.tx8x8_prob, vp9_default_tx_probs_8x8p, - sizeof(vp9_default_tx_probs_8x8p)); - memcpy(pProbSetup->pProbTab->a.mbskip_probs, vp9_default_mbskip_probs, - sizeof(vp9_default_mbskip_probs)); - - for (i = 0; i < VP9_INTRA_MODES; i++) - { - for (j = 0; j < VP9_INTRA_MODES; j++) - { - memcpy(pProbSetup->pProbTab->kf_bmode_prob[i][j], vp9_kf_default_bmode_probs[i][j], 8); - pProbSetup->pProbTab->kf_bmode_probB[i][j][0] = vp9_kf_default_bmode_probs[i][j][8]; - } - } + , m_PicData() + , m_pCurrPic() + , m_frameIdx(-1) + , m_dataSize() + , m_frameSize() + , m_frameSizeChanged() + , m_rtOrigWidth() + , m_rtOrigHeight() + , m_pictureStarted() + , m_bitstreamComplete(true) + , m_lastFrameWidth(0) + , m_lastFrameHeight(0) + , m_lastShowFrame(false) + , m_pBuffers() { } -void VulkanVP9Decoder::ResetProbs(vp9_prob_update_s *pProbSetup) +VulkanVP9Decoder::~VulkanVP9Decoder() { - //reset segmentMap (buffers going to HWIF_SEGMENT_READ_BASE_LSB and HWIF_SEGMENT_WRITE_BASE_LSB) - - uint32_t i, j, k, l, m; - - memcpy(pProbSetup->pProbTab->a.inter_mode_prob, vp9_default_inter_mode_prob, sizeof(vp9_default_inter_mode_prob)); - vp9_init_mbmode_probs(pProbSetup); - memcpy(&pProbSetup->pProbTab->a.nmvc, &vp9_default_nmv_context, sizeof(nvdec_nmv_context)); - - /* Copy the default probs into two separate prob tables: part1 and part2. */ - - for( i = 0; i < VP9_BLOCK_TYPES; i++ ) { - for ( j = 0; j < VP9_REF_TYPES; j++ ) { - for ( k = 0; k < VP9_COEF_BANDS; k++ ) { - for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ ) { - if (l >= 3 && k == 0) - continue; - - for ( m = 0; m < UNCONSTRAINED_NODES; m++ ) { - pProbSetup->pProbTab->a.probCoeffs[i][j][k][l][m] = - default_coef_probs_4x4[i][j][k][l][m]; - pProbSetup->pProbTab->a.probCoeffs8x8[i][j][k][l][m] = - default_coef_probs_8x8[i][j][k][l][m]; - pProbSetup->pProbTab->a.probCoeffs16x16[i][j][k][l][m] = - default_coef_probs_16x16[i][j][k][l][m]; - pProbSetup->pProbTab->a.probCoeffs32x32[i][j][k][l][m] = - default_coef_probs_32x32[i][j][k][l][m]; - } - } - } - } - } - - /* Store the default probs for all saved contexts */ - if (pProbSetup->keyFrame || pProbSetup->errorResilient || pProbSetup->resetFrameContext == 3) - { - for (i = 0; i < FRAME_CONTEXTS; i++) - memcpy( &m_EntropyLast[i], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t)); - } - else if (pProbSetup->resetFrameContext == 2) - memcpy( &m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t)); } -void VulkanVP9Decoder::GetProbs(vp9_prob_update_s *pProbSetup) +void VulkanVP9Decoder::InitParser() { - memcpy(pProbSetup->pProbTab, &m_EntropyLast[pProbSetup->frameContextIdx], sizeof(m_EntropyLast[pProbSetup->frameContextIdx])); + m_bNoStartCodes = true; + m_bEmulBytesPresent = false; + m_pCurrPic = nullptr; + m_bitstreamComplete = true; + m_pictureStarted = false; + EndOfStream(); } -///////////////////////////////////////////////////////////////////////////////// - - -void VulkanVP9Decoder::vp9_reader_fill() +void VulkanVP9Decoder::EndOfStream() { - vp9_reader *r = &reader; - uint32_t buffer_end = r->buffer_end; - uint32_t buffer = r->buffer; - VP9_BD_VALUE value = r->value; - int32_t count = r->count; - int32_t shift = BD_VALUE_SIZE - 8 - (count + 8); - int32_t loop_end = 0; - const int32_t bits_left = (int32_t)((buffer_end - buffer)*CHAR_BIT); - const int32_t x = shift + CHAR_BIT - bits_left; - if (x >= 0) { - count += LOTS_OF_BITS; - loop_end = x; + if (m_pCurrPic) { + m_pCurrPic->Release(); + m_pCurrPic = nullptr; } - if (x < 0 || bits_left) - { - while (shift >= loop_end) - { - count += CHAR_BIT; - uint8_t temp = m_pCompressedHeader[r->pos++]; //u( 8); - value |= (VP9_BD_VALUE)temp << shift; - shift -= CHAR_BIT; - buffer++; + for (int i = 0; i < 8; i++) { + if (m_pBuffers[i].buffer) { + m_pBuffers[i].buffer->Release(); + m_pBuffers[i].buffer = nullptr; } } - r->buffer = buffer; - r->value = value; - r->count = count; -} - -int32_t VulkanVP9Decoder::vp9_reader_init(uint32_t size) -{ - int32_t marker_bit = 0; - vp9_reader *r = &reader; - r->buffer_end = 0 + size; - r->buffer = 0; - r->value = 0; - r->count = -8; - r->range = 255; - r->pos = 0; - - vp9_reader_fill(); - marker_bit = vp9_read_bit(); - return marker_bit != 0; } -int32_t VulkanVP9Decoder::vp9_read_bit() +bool VulkanVP9Decoder::ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBytes) { - return vp9_read( 128); -} + const uint8_t* pDataIn = (uint8_t*)pck->pByteStream; + int dataSize = (int)pck->nDataLength; -int32_t VulkanVP9Decoder::vp9_read(int32_t probability) -{ - - vp9_reader *br = &reader; - uint32_t bit = 0; - VP9_BD_VALUE value; - VP9_BD_VALUE bigsplit; - int32_t count; - uint32_t range; - uint32_t split = 1 + (((br->range - 1) * probability) >> 8); - if (br->count < 0) - vp9_reader_fill(); - value = br->value; - count = br->count; - bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8); - - range = split; - if (value >= bigsplit) - { - range = br->range - split; - value = value - bigsplit; - bit = 1; + if (pParsedBytes) { + *pParsedBytes = 0; } - uint32_t shift = vp9dx_bitreader_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - br->value = value; - br->count = count; - br->range = range; - return bit; -} - -int32_t VulkanVP9Decoder::vp9_read_literal( int32_t bits) -{ - int32_t z = 0, bit; - for (bit = bits - 1; bit >= 0; bit--) - { - z |= vp9_read_bit() << bit; + // Use different bitstreamBuffer than the previous frames bitstreamBuffer + // TODO: Make sure that the bitstreamBuffer is not in use. + VkSharedBaseObj bitstreamBuffer; + assert(m_pClient); + m_pClient->GetBitstreamBuffer(m_bitstreamDataLen, + m_bufferOffsetAlignment, m_bufferSizeAlignment, + nullptr, 0, bitstreamBuffer); + assert(bitstreamBuffer); + if (!bitstreamBuffer) { + return false; } - return z; -} -//////////////////////////////////////////////////////////////////////////////////// -//Forward Update -uint32_t VulkanVP9Decoder::UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header) -{ - nvdec_vp9EntropyProbs_t *fc = pProbSetup->pProbTab; // Frame context - - uint32_t tmp, i, j, k; + m_bitstreamDataLen = m_bitstreamData.SetBitstreamBuffer(bitstreamBuffer); + m_bitstreamData.ResetStreamMarkers(); - m_pCompressedHeader = pCompressed_Header; - m_PrevCtx = pProbSetup->pProbTab->a; - - if (vp9_reader_init(pProbSetup->offsetToDctParts) != 0) - { - return NOK; - } - - if (pProbSetup->lossless) - pProbSetup->transform_mode = ONLY_4X4; - else - { - pProbSetup->transform_mode = vp9_read_literal( 2); - if (pProbSetup->transform_mode == ALLOW_32X32) - pProbSetup->transform_mode += vp9_read_literal( 1); - if (pProbSetup->transform_mode == TX_MODE_SELECT) - { - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) - { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.tx8x8_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.tx16x16_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.tx32x32_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } - } + if (m_bitstreamData.GetBitstreamBuffer() == nullptr) { + // make sure we're initialized + return false; } - // Coefficient probability update - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs); + m_nCallbackEventCount = 0; - if( tmp != OK ) return (tmp); - if (pProbSetup->transform_mode > ONLY_4X4) { - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs8x8); - if( tmp != OK ) return (tmp); - } - if (pProbSetup->transform_mode > ALLOW_8X8) { - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs16x16); - if( tmp != OK ) return (tmp); - } - if (pProbSetup->transform_mode > ALLOW_16X16) { - tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs32x32); - if( tmp != OK ) return (tmp); + // Handle discontinuity + if (pck->bDiscontinuity) { + memset(&m_nalu, 0, sizeof(m_nalu)); + memset(&m_PTSQueue, 0, sizeof(m_PTSQueue)); + m_bDiscontinuityReported = true; + m_pictureStarted = false; } - pProbSetup->probsDecoded = 1; + if (pck->bPTSValid) { + m_PTSQueue[m_lPTSPos].bPTSValid = true; + m_PTSQueue[m_lPTSPos].llPTS = pck->llPTS; + m_PTSQueue[m_lPTSPos].llPTSPos = m_llParsedBytes; + m_PTSQueue[m_lPTSPos].bDiscontinuity = m_bDiscontinuityReported; + m_bDiscontinuityReported = false; + m_lPTSPos = (m_lPTSPos + 1) % MAX_QUEUED_PTS; + } - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - fc->a.mbskip_probs[k] = vp9hwdReadProbDiffUpdate( fc->a.mbskip_probs[k]); - } + if (pck->pByteStream && pck->nDataLength && m_frameIdx == -1) { + memset(&m_PicData, 0, sizeof(VkParserVp9PictureData)); + m_frameIdx++; } - if(!pProbSetup->keyFrame) - { - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - for (j = 0; j < VP9_INTER_MODES - 1; j++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.inter_mode_prob[i][j]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } + while ((dataSize > 0) || m_pictureStarted) { + if (!m_pictureStarted) { + if (m_bitstreamComplete) { + // fill bitstreambuffer from start + // assuming parser will get bitstream per frame from demuxer + m_frameSize = dataSize; + m_nalu.start_offset = 0; + m_nalu.end_offset = 0; } - } - if (pProbSetup->mcomp_filter_type == SWITCHABLE) { - for (j = 0; j < VP9_SWITCHABLE_FILTERS+1; ++j) { - for (i = 0; i < VP9_SWITCHABLE_FILTERS-1; ++i) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.switchable_interp_prob[j][i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } + if (((VkDeviceSize)dataSize > m_bitstreamDataLen) && !resizeBitstreamBuffer(dataSize - m_bitstreamDataLen)) { + return false; } - } - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.intra_inter_prob[i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); + if (dataSize >= (m_frameSize - m_nalu.end_offset)) { + memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, m_frameSize - m_nalu.end_offset); + m_pictureStarted = true; + pDataIn += (m_frameSize - (int)m_nalu.end_offset); + dataSize -= (m_frameSize - (int)m_nalu.end_offset); + m_nalu.end_offset = m_frameSize; + m_bitstreamComplete = true; + } else { + memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, dataSize); + m_nalu.end_offset += dataSize; + pDataIn += dataSize; + dataSize = 0; + m_bitstreamComplete = false; } - } - - // Compound prediction mode probabilities - if (pProbSetup->allow_comp_inter_inter) { - tmp = vp9_read_literal( 1); - pProbSetup->comp_pred_mode = tmp; - if(tmp) { - tmp = vp9_read_literal( 1); - pProbSetup->comp_pred_mode += tmp; - if (pProbSetup->comp_pred_mode == HYBRID_PREDICTION) - { - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.comp_inter_prob[i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); + } else { + uint32_t frames_processed = 0; + uint32_t sizeparsed = 0, framesdone = 0; + + uint32_t frame_size = m_frameSize; + + const uint8_t* data_start = m_bitstreamData.GetBitstreamPtr(); + const uint8_t* data_end = data_start + m_frameSize; + uint32_t data_size = m_frameSize; + uint32_t frames_in_superframe, frame_sizes[8]; + + ParseSuperFrameIndex(data_start, data_size, frame_sizes, &frames_in_superframe); + + do { + // Skip over the superframe index, if present + if ((data_size > 0) && ((data_start[0] & 0xe0) == 0xc0)) { + const uint8_t marker = data_start[0]; + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const uint32_t index_sz = 2 + mag * frames; + + if ((data_size >= index_sz) && (data_start[index_sz - 1] == marker)) { + data_start += index_sz; + data_size -= index_sz; + if (data_start < data_end) { + continue; + } else { + break; } } } - } - } else { - pProbSetup->comp_pred_mode = SINGLE_PREDICTION_ONLY; - } - if (pProbSetup->comp_pred_mode != COMP_PREDICTION_ONLY) { - for (i = 0; i < REF_CONTEXTS; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.single_ref_prob[i][0]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.single_ref_prob[i][1]; - *prob = vp9hwdReadProbDiffUpdate( *prob); - } - } - } + // Use the correct size for this frame, if an index is present + if (frames_in_superframe > 0) { + frame_size = frame_sizes[frames_processed]; + if (data_size < frame_size) { + // Invalid frame size in index + return false; + } + data_size = frame_size; + m_nalu.start_offset = sizeparsed; - if (pProbSetup->comp_pred_mode != SINGLE_PREDICTION_ONLY) { - for (i = 0; i < REF_CONTEXTS; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.comp_ref_prob[i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); } - } - } - // Superblock intra luma pred mode probabilities - for(j = 0 ; j < BLOCK_SIZE_GROUPS; ++j) - { - for( i = 0 ; i < 8; ++i ) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - fc->a.sb_ymode_prob[j][i] = vp9hwdReadProbDiffUpdate( - fc->a.sb_ymode_prob[j][i]); - } - } - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - fc->a.sb_ymode_probB[j][0] = vp9hwdReadProbDiffUpdate( - fc->a.sb_ymode_probB[j][0]); - } - } + ParseFrameHeader(frame_size); - for (j = 0; j < NUM_PARTITION_CONTEXTS; j++) { - for (i = 0; i < PARTITION_TYPES - 1; i++) { - tmp = vp9_read( VP9_DEF_UPDATE_PROB); - if (tmp) { - uint8_t *prob = &fc->a.partition_prob[INTER_FRAME][j][i]; - *prob = vp9hwdReadProbDiffUpdate( *prob); + if (frames_in_superframe > 0) { + sizeparsed += frame_sizes[framesdone]; + framesdone++; } - } + data_start += data_size; + while (data_start < data_end && *data_start == 0) { + data_start++; + } + + data_size = (int)(data_end - data_start); + frames_processed += 1; + } while (data_start < data_end); + + m_frameIdx++; + m_pictureStarted = false; } - // Motion vector tree update - tmp = vp9hwdDecodeMvUpdate(pProbSetup); - if( tmp != OK ) - return (tmp); } - return (OK); -} + if (pck->bEOS) { + end_of_stream(); + } -void VulkanVP9Decoder::update_nmv( vp9_prob *const p, const vp9_prob upd_p) -{ - uint32_t tmp = vp9_read( upd_p); - if (tmp) { -#if 1 //def LOW_PRECISION_MV_UPDATE - *p = (vp9_read_literal( 7) << 1) | 1; -#else - *p = vp9_read_literal( 8); -#endif + if (pParsedBytes) { + *pParsedBytes = pck->nDataLength; } + + return true; } -uint32_t VulkanVP9Decoder::vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup) + +bool VulkanVP9Decoder::ParseFrameHeader(uint32_t framesize) { - uint32_t i, j, k; - nvdec_nmv_context *mvctx = &pProbSetup->pProbTab->a.nmvc; + m_llNaluStartLocation = m_llParsedBytes; + m_llFrameStartLocation = m_llNaluStartLocation; + m_llParsedBytes += framesize; + //m_pSliceOffsets[0] = 0; -#if 0 - tmp = vp9_read_literal( 1); - if (!tmp) return HANTRO_OK; -#endif + init_dbits(); + //parse uncompressed header + if(!ParseUncompressedHeader()) + { + assert((!"Error in ParseUncompressedVP9\n")); + return 0; + } + if (m_PicData.show_existing_frame == true) { + // display an existing frame + VkPicIf* pDispPic = m_pBuffers[m_PicData.frame_to_show_map_idx].buffer; + if (pDispPic) { + pDispPic->AddRef(); + } + + AddBuffertoOutputQueue(pDispPic); - for (j = 0; j < MV_JOINTS - 1; ++j) { - update_nmv( &mvctx->joints[j], - VP9_NMV_UPDATE_PROB); + return 0; } - for (i = 0; i < 2; ++i) { - update_nmv( &mvctx->sign[i], VP9_NMV_UPDATE_PROB); - for (j = 0; j < MV_CLASSES - 1; ++j) { - update_nmv( &mvctx->classes[i][j], VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < CLASS0_SIZE - 1; ++j) { - update_nmv( &mvctx->class0[i][j], VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < MV_OFFSET_BITS; ++j) { - update_nmv( &mvctx->bits[i][j], VP9_NMV_UPDATE_PROB); - } + + // handle bitstream start offset alignment (for super frame) + uint32_t addOffset = m_nalu.start_offset & (m_bufferOffsetAlignment - 1); + m_PicData.uncompressedHeaderOffset += addOffset; + m_PicData.compressedHeaderOffset += addOffset; + m_PicData.tilesOffset += addOffset; + + *m_pVkPictureData = VkParserPictureData(); + m_pVkPictureData->CodecSpecific.vp9 = m_PicData; + m_pVkPictureData->numSlices = m_PicData.numTiles; + m_pVkPictureData->bitstreamDataLen = (framesize + addOffset + m_bufferSizeAlignment - 1) & ~(m_bufferSizeAlignment - 1); // buffer is already aligned so, no issues. + m_pVkPictureData->bitstreamData = m_bitstreamData.GetBitstreamBuffer(); + m_pVkPictureData->bitstreamDataOffset = m_nalu.start_offset & ~((int64_t)m_bufferOffsetAlignment - 1); + + if (!BeginPicture(m_pVkPictureData)) { + assert(!"BeginPicture failed"); + return false; } - for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { - for (k = 0; k < 3; ++k) - update_nmv( &mvctx->class0_fp[i][j][k], VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < 3; ++j) { - update_nmv( &mvctx->fp[i][j], VP9_NMV_UPDATE_PROB); - } + bool bSkipped = false; + if (m_pClient != nullptr) { + // Notify client + if (!m_pClient->DecodePicture(m_pVkPictureData)) { + bSkipped = true; + // WARNING: skipped decoding current picture; + } else { + m_nCallbackEventCount++; + } + } else { + // WARNING: no valid render target for current picture } - if (pProbSetup->allow_high_precision_mv) { - for (i = 0; i < 2; ++i) { - update_nmv( &mvctx->class0_hp[i], VP9_NMV_UPDATE_PROB); - update_nmv( &mvctx->hp[i], VP9_NMV_UPDATE_PROB); - } + //m_PicData.prevIsKeyFrame = m_PicData.keyFrame; + //m_PicData.PrevShowFrame = m_PicData.showFrame; + UpdateFramePointers(m_pCurrPic); + + if (m_PicData.stdPictureInfo.flags.show_frame && !bSkipped) { + // Call back codec for post-decode event (display the decoded frame) + AddBuffertoOutputQueue(m_pCurrPic); + m_pCurrPic = nullptr; + } else { + m_pCurrPic->Release(); + m_pCurrPic = nullptr; } - return (OK); + return 1; } -uint32_t VulkanVP9Decoder::vp9hwdDecodeCoeffUpdate( - uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1]) +void VulkanVP9Decoder::UpdateFramePointers(VkPicIf* currentPicture) { - uint32_t i, j, k, l, m; - uint32_t tmp; - tmp = vp9_read_literal( 1); - if (!tmp) return OK; - for( i = 0; i < VP9_BLOCK_TYPES; i++ ) - { - for ( j = 0; j < VP9_REF_TYPES; j++ ) - { - for ( k = 0; k < VP9_COEF_BANDS; k++ ) - { - for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ ) - { - if (l >= 3 && k == 0) - continue; - - for ( m = 0; m < UNCONSTRAINED_NODES; m++ ) - { - tmp = vp9_read( 252); - CHECK_END_OF_STREAM(tmp); - if ( tmp ) - { - uint8_t old, latest; - old = probCoeffs[i][j][k][l][m]; - latest = vp9hwdReadProbDiffUpdate( old); - CHECK_END_OF_STREAM(tmp); - - probCoeffs[i][j][k][l][m] = latest; - } - } - } + StdVideoDecodeVP9PictureInfo* const pStdPicInfo = &m_PicData.stdPictureInfo; + + uint32_t mask, ref_index = 0; + + for (mask = pStdPicInfo->refresh_frame_flags; mask; mask >>= 1) { + if (mask & 1) { + if (m_pBuffers[ref_index].buffer) { + m_pBuffers[ref_index].buffer->Release(); + } + m_pBuffers[ref_index].buffer = currentPicture; + + if (m_pBuffers[ref_index].buffer) { + m_pBuffers[ref_index].buffer->AddRef(); } } + ++ref_index; } - return (OK); -} -int32_t VulkanVP9Decoder::get_unsigned_bits(uint32_t num_values) -{ - int32_t cat = 0; - if (num_values <= 1) - return 0; - num_values--; - while(num_values > 0) - { - cat++; - num_values >>= 1; - } - return cat; + // Invalidate these references until the next frame starts. + //for (int i = 0; i < ALLOWED_REFS_PER_FRAME; i++) { + // pFrameInfo->activeRefIdx[i] = 0xffff; + //} } -uint32_t VulkanVP9Decoder::BoolDecodeUniform( uint32_t n) +bool VulkanVP9Decoder::AddBuffertoOutputQueue(VkPicIf* pDispPic) { - int32_t value, v; - int32_t l = get_unsigned_bits(n); - int32_t m = (1 << l) - n; - if (!l) return 0; - value = vp9_read_literal( l - 1); - if (value >= m) { - v = vp9_read_literal( 1); - value = (value << 1) - m + v; - } - return value; + AddBuffertoDispQueue(pDispPic); + lEndPicture(pDispPic); + + return true; } -uint32_t VulkanVP9Decoder::vp9hwdDecodeSubExp( uint32_t k, uint32_t num_syms) +void VulkanVP9Decoder::AddBuffertoDispQueue(VkPicIf* pDispPic) { - uint32_t i=0, mk=0, value=0; - while (1) { - int32_t b = (i ? k + i - 1 : k); - uint32_t a = (1 << b); - if (num_syms <= mk + 3 * a) { - value = BoolDecodeUniform( num_syms - mk) + mk; + int lDisp = 0; + + // Find an entry in m_DispInfo + for (int i = 0; i < MAX_DELAY; i++) { + if (m_DispInfo[i].pPicBuf == pDispPic) { + lDisp = i; break; - } else { - value = vp9_read_bit(); - if (value) { - i++; - mk += a; - } else { - value = vp9_read_literal( b) + mk; - break; - } + } + if ((m_DispInfo[i].pPicBuf == nullptr) + || ((m_DispInfo[lDisp].pPicBuf != nullptr) && (m_DispInfo[i].llPTS - m_DispInfo[lDisp].llPTS < 0))) { + lDisp = i; } } - return value; -} + m_DispInfo[lDisp].pPicBuf = pDispPic; + m_DispInfo[lDisp].bSkipped = false; + m_DispInfo[lDisp].lPOC = 0; + m_DispInfo[lDisp].lNumFields = 2; -int32_t VulkanVP9Decoder::merge_index(int32_t v, int32_t n, int32_t modulus) -{ - int32_t max1 = (n - 1 - modulus / 2) / modulus + 1; - if (v < max1) v = v * modulus + modulus / 2; - else - { - int32_t w; - v -= max1; - w = v; - v += (v + modulus - modulus / 2) / modulus; - while (v % modulus == modulus / 2 || - w != v - (v + modulus - modulus / 2) / modulus) v++; + // Find a PTS in the list + unsigned int ndx = m_lPTSPos; + m_DispInfo[lDisp].llPTS = m_llExpectedPTS; // Will be updated later on + + for (int k = 0; k < MAX_QUEUED_PTS; k++) { + if ((m_PTSQueue[ndx].bPTSValid) && (m_PTSQueue[ndx].llPTSPos - m_llFrameStartLocation <= (m_bNoStartCodes?0:3))) { + m_DispInfo[lDisp].bPTSValid = true; + m_DispInfo[lDisp].llPTS = m_PTSQueue[ndx].llPTS; + m_PTSQueue[ndx].bPTSValid = false; + } + ndx = (ndx + 1) % MAX_QUEUED_PTS; } - return v; } -int32_t VulkanVP9Decoder::vp9_inv_recenter_nonneg(int32_t v, int32_t m) +void VulkanVP9Decoder::lEndPicture(VkPicIf* pDispPic) { - if (v > (m << 1)) return v; - else if ((v & 1) == 0) return (v >> 1) + m; - else return m - ((v + 1) >> 1); -} + if (pDispPic) { + display_picture(pDispPic); + pDispPic->Release(); + } -int32_t VulkanVP9Decoder::inv_remap_prob(int32_t v, int32_t m) -{ - const int32_t n = 255; - v = merge_index(v, n - 1, MODULUS_PARAM); - m--; - if ((m << 1) <= n) - return 1 + vp9_inv_recenter_nonneg(v + 1, m); - else - return n - vp9_inv_recenter_nonneg(v + 1, n - 1 - m); } -vp9_prob VulkanVP9Decoder::vp9hwdReadProbDiffUpdate( uint8_t oldp) + +bool VulkanVP9Decoder::ParseUncompressedHeader() { - int32_t p; - int32_t delp = vp9hwdDecodeSubExp( 4, 255 ); - p = (vp9_prob)inv_remap_prob(delp, oldp); - return p; -} + VkParserVp9PictureData *pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig; + StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter; + m_frameSizeChanged = false; -//Backward update + VP9_CHECK_FRAME_MARKER; + uint32_t profile = u(1); + profile |= u(1) << 1; + pStdPicInfo->profile = (StdVideoVP9Profile)profile; + if (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3) { + if (u(1) != 0) { + assert(!"Invalid syntax"); + return false; + } + } -// this function assumes prob1 and prob2 are already within [1,255] range -vp9_prob VulkanVP9Decoder::weighted_prob(int32_t prob1, int32_t prob2, int32_t factor) -{ - return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); -} + pPicData->show_existing_frame = u(1); + if (pPicData->show_existing_frame) { + pPicData->frame_to_show_map_idx = u(3); + //U32 frame_to_show = vp9parser->m_pBuffers[idx_to_show]; + //Handle direct show: CHECK + pPicData->uncompressedHeaderOffset = (consumed_bits() + 7) >> 3; + pPicData->compressedHeaderSize = 0; + pStdPicInfo->refresh_frame_flags = 0; + pStdLoopFilter->loop_filter_level = 0; + return true; + } -vp9_prob VulkanVP9Decoder::clip_prob(uint32_t p) -{ - return (vp9_prob)((p > 255) ? 255u : (p < 1) ? 1u : p); -} + pStdPicInfo->frame_type = (StdVideoVP9FrameType)u(1); + pStdPicInfo->flags.show_frame = u(1); + pStdPicInfo->flags.error_resilient_mode = u(1); -vp9_prob VulkanVP9Decoder::get_prob(uint32_t num, uint32_t den) -{ - return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den); -} + if (pStdPicInfo->frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY) { + VP9_CHECK_FRAME_SYNC_CODE; + ParseColorConfig(); + ParseFrameAndRenderSize(); + pStdPicInfo->refresh_frame_flags = (1 << STD_VIDEO_VP9_NUM_REF_FRAMES) - 1; + pPicData->FrameIsIntra = true; -vp9_prob VulkanVP9Decoder::get_binary_prob(uint32_t n0, uint32_t n1) -{ - return get_prob(n0, n0 + n1); -} + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) { + pPicData->ref_frame_idx[i] = 0; + } + } else { // non key frame + pStdPicInfo->flags.intra_only = pStdPicInfo->flags.show_frame ? 0 : u(1); + pPicData->FrameIsIntra = pStdPicInfo->flags.intra_only; + pStdPicInfo->reset_frame_context = pStdPicInfo->flags.error_resilient_mode ? 0 : u(2); + + if (pStdPicInfo->flags.intra_only == 1) { + VP9_CHECK_FRAME_SYNC_CODE; + if (pStdPicInfo->profile > STD_VIDEO_VP9_PROFILE_0) { + ParseColorConfig(); + } else { + pStdColorConfig->color_space = STD_VIDEO_VP9_COLOR_SPACE_BT_601; + pStdColorConfig->subsampling_x = 1; + pStdColorConfig->subsampling_y = 1; + pStdColorConfig->BitDepth = 8; + } -uint32_t VulkanVP9Decoder::convert_distribution(uint32_t i, - const vp9_tree_index * tree, - vp9_prob probs[], - uint32_t branch_ct[][2], - const uint32_t num_events[], - uint32_t tok0_offset) -{ - uint32_t left, right; + pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES); //for non key frame refresh only some - if (tree[i] <= 0) - { - left = num_events[-tree[i] - tok0_offset]; + ParseFrameAndRenderSize(); + } else { // inter frame + pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES); + + pStdPicInfo->ref_frame_sign_bias_mask = 0; + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { + pPicData->ref_frame_idx[i] = u(3); + pStdPicInfo->ref_frame_sign_bias_mask |= (u(1) << (STD_VIDEO_VP9_REFERENCE_NAME_LAST_FRAME + i)); + } + + ParseFrameAndRenderSizeWithRefs(); + + pStdPicInfo->flags.allow_high_precision_mv = u(1); + + // interpolation filter + bool is_filter_switchable = u(1); //mb_switchable_mcomp_filt + if (is_filter_switchable) { + pStdPicInfo->interpolation_filter = STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE; + } else { + const StdVideoVP9InterpolationFilter literal_to_filter[] = { + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH, + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP, + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP, + STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR }; + pStdPicInfo->interpolation_filter = literal_to_filter[u(2)]; + } + } } - else - { - left = convert_distribution(tree[i], tree, probs, branch_ct, num_events, tok0_offset); + + if (pStdPicInfo->flags.error_resilient_mode == 0) { + /* Refresh entropy probs, + * 0 == this frame probs are used only for this frame decoding, + * 1 == this frame probs will be stored for future reference */ + pStdPicInfo->flags.refresh_frame_context = u(1); + pStdPicInfo->flags.frame_parallel_decoding_mode = u(1); + } else { + pStdPicInfo->flags.refresh_frame_context = 0; + pStdPicInfo->flags.frame_parallel_decoding_mode = 1; } - if (tree[i + 1] <= 0) - { - right = num_events[-tree[i + 1] - tok0_offset]; + + pStdPicInfo->frame_context_idx = u(2); + + if ((pPicData->FrameIsIntra == 1) || (pStdPicInfo->flags.error_resilient_mode == 1)) { + StdVideoVP9Segmentation* pStdSegment = &pPicData->stdSegmentation; + ///* Clear all previous segment data */ + memset(pStdSegment->FeatureEnabled, 0, sizeof(pStdSegment->FeatureEnabled)); + memset(pStdSegment->FeatureData, 0, sizeof(pStdSegment->FeatureData)); + pStdPicInfo->frame_context_idx = 0; } - else - { - right = convert_distribution(tree[i + 1], tree, probs, branch_ct, num_events, tok0_offset); + + ParseLoopFilterParams(); + ParseQuantizationParams(); + ParseSegmentationParams(); + ParseTileInfo(); + + pPicData->compressedHeaderSize = u(16); + + pPicData->uncompressedHeaderOffset = 0; + pPicData->compressedHeaderOffset = (consumed_bits() + 7) >> 3; + pPicData->tilesOffset = pPicData->compressedHeaderOffset + pPicData->compressedHeaderSize; + + pPicData->ChromaFormat = (pStdColorConfig->subsampling_x == 1) && (pStdColorConfig->subsampling_y == 1) ? 1 : 0; + assert(pPicData->ChromaFormat); // TODO: support only YUV420 + + return true; +} + +bool VulkanVP9Decoder::ParseColorConfig() +{ + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig; + + if (pStdPicInfo->profile >= STD_VIDEO_VP9_PROFILE_2) { + pStdColorConfig->BitDepth = u(1) ? 12 : 10; + } else { + pStdColorConfig->BitDepth = 8; + } + + pStdColorConfig->color_space = (StdVideoVP9ColorSpace)u(3); + + if (pStdColorConfig->color_space != STD_VIDEO_VP9_COLOR_SPACE_RGB) { + pStdColorConfig->flags.color_range = u(1); + if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) || + (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) { + pStdColorConfig->subsampling_x = u(1); + pStdColorConfig->subsampling_y = u(1); + VP9_CHECK_ZERO_BIT + } else { + pStdColorConfig->subsampling_x = 1; + pStdColorConfig->subsampling_y = 1; + } + } else { + pStdColorConfig->flags.color_range = 1; + if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) || + (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) { + pStdColorConfig->subsampling_x = 0; + pStdColorConfig->subsampling_y = 0; + VP9_CHECK_ZERO_BIT + } } - probs[i>>1] = get_binary_prob(left, right); - branch_ct[i>>1][0] = left; - branch_ct[i>>1][1] = right; - return left + right; + return true; } -void VulkanVP9Decoder::vp9_tree_probs_from_distribution(const vp9_tree_index * tree, - vp9_prob probs [ /* n-1 */ ], - uint32_t branch_ct [ /* n-1 */ ] [2], - const uint32_t num_events[ /* n */ ], - uint32_t tok0_offset) +void VulkanVP9Decoder::ParseFrameAndRenderSize() { - convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset); + VkParserVp9PictureData *pPicData = &m_PicData; + + pPicData->FrameWidth = u(16) + 1; + pPicData->FrameHeight = u(16) + 1; + + ComputeImageSize(); + + if (u(1) == 1) { // render_and_frame_size_different + pPicData->renderWidth = u(16) + 1; + pPicData->renderHeight = u(16) + 1; + } else { + pPicData->renderWidth = pPicData->FrameWidth; + pPicData->renderHeight = pPicData->FrameHeight; + } } -void VulkanVP9Decoder::update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1], - uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1], - uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS], - int32_t count_sat, int32_t update_factor) +void VulkanVP9Decoder::ParseFrameAndRenderSizeWithRefs() { - int32_t t, i, j, k, l, count; - uint32_t branch_ct[VP9_ENTROPY_NODES][2]; - vp9_prob coef_probs[VP9_ENTROPY_NODES]; - int32_t factor; + VkParserVp9PictureData* pPicData = &m_PicData; - //int32_t brancharr[VP9_BLOCK_TYPES][VP9_REF_TYPES][36][VP9_PREV_COEF_CONTEXTS] = {0}; - //int32_t coeffprobarr[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS] = {0}; - //memset(brancharr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS); - //memset(coeffprobarr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS); + bool found_ref = false; - for (i = 0; i < VP9_BLOCK_TYPES; ++i) - { - for (j = 0; j < VP9_REF_TYPES; ++j) - { - for (k = 0; k < VP9_COEF_BANDS; ++k) - { - for (l = 0; l < VP9_PREV_COEF_CONTEXTS; ++l) - { - if (l >= 3 && k == 0) - continue; - vp9_tree_probs_from_distribution(vp9_coefmodel_tree, - coef_probs, branch_ct, - coef_counts[i][j][k][l], 0); - branch_ct[0][1] = eob_counts[i][j][k][l] - branch_ct[0][0]; - coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]); - //brancharr[i][j][k][l] = branch_ct[0][1]; - //coeffprobarr[i][j][k][l] = coef_probs[0]; - for (t = 0; t < UNCONSTRAINED_NODES; ++t) - { - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - dst_coef_probs[i][j][k][l][t] = weighted_prob(pre_coef_probs[i][j][k][l][t], coef_probs[t], factor); - } - } + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) { + found_ref = u(1); + if (found_ref) { + VkPicIf* pRefPic = m_pBuffers[pPicData->ref_frame_idx[i]].buffer; + if (pRefPic != nullptr) { + pPicData->FrameWidth = pRefPic->decodeWidth; + pPicData->FrameHeight = pRefPic->decodeHeight; + + ComputeImageSize(); + } + + if (u(1) == 1) { // render_and_frame_size_different + pPicData->renderWidth = u(16) + 1; + pPicData->renderHeight = u(16) + 1; + } else { + pPicData->renderWidth = pPicData->FrameWidth; + pPicData->renderHeight = pPicData->FrameHeight; } + + break; } } + if (!found_ref) { + ParseFrameAndRenderSize(); + } } -void VulkanVP9Decoder::adaptCoefProbs(vp9_prob_update_s *pProbSetup) +void VulkanVP9Decoder::ComputeImageSize() { - int32_t update_factor; /* denominator 256 */ - int32_t count_sat; + VkParserVp9PictureData* pPicData = &m_PicData; - if(pProbSetup->keyFrame) - { - update_factor = COEF_MAX_UPDATE_FACTOR_KEY; - count_sat = COEF_COUNT_SAT_KEY; - } - else if (pProbSetup->prevIsKeyFrame) - { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; // adapt quickly - count_sat = COEF_COUNT_SAT_AFTER_KEY; - } - else - { - update_factor = COEF_MAX_UPDATE_FACTOR; - count_sat = COEF_COUNT_SAT; + // compute_image_size() + pPicData->MiCols = (pPicData->FrameWidth + 7) >> 3; + pPicData->MiRows = (pPicData->FrameHeight + 7) >> 3; + pPicData->Sb64Cols = (pPicData->MiCols + 7) >> 3; + pPicData->Sb64Rows = (pPicData->MiRows + 7) >> 3; + + // compute_image_size() side effects (7.2.6) + if (((uint32_t)m_lastFrameHeight != pPicData->FrameHeight) || ((uint32_t)m_lastFrameWidth != pPicData->FrameWidth)) { + m_frameSizeChanged = true; + pPicData->stdPictureInfo.flags.UsePrevFrameMvs = false; + } else { /* 2.a, 2.b */ + bool intraOnly = pPicData->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY || pPicData->stdPictureInfo.flags.intra_only; + pPicData->stdPictureInfo.flags.UsePrevFrameMvs = m_lastShowFrame && /* 2.c */ + pPicData->stdPictureInfo.flags.error_resilient_mode == 0 && /* 2.d */ + !intraOnly /* 2.e */; } + m_lastFrameHeight = pPicData->FrameHeight; + m_lastFrameWidth = pPicData->FrameWidth; + m_lastShowFrame = pPicData->stdPictureInfo.flags.show_frame; - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs, - m_PrevCtx.probCoeffs, - pProbSetup->pCtxCounters->countCoeffs, - pProbSetup->pCtxCounters->countEobs[TX_4X4], - count_sat, update_factor); - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs8x8, - m_PrevCtx.probCoeffs8x8, - pProbSetup->pCtxCounters->countCoeffs8x8, - pProbSetup->pCtxCounters->countEobs[TX_8X8], - count_sat, update_factor); - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs16x16, - m_PrevCtx.probCoeffs16x16, - pProbSetup->pCtxCounters->countCoeffs16x16, - pProbSetup->pCtxCounters->countEobs[TX_16X16], - count_sat, update_factor); - update_coef_probs(pProbSetup->pProbTab->a.probCoeffs32x32, - m_PrevCtx.probCoeffs32x32, - pProbSetup->pCtxCounters->countCoeffs32x32, - pProbSetup->pCtxCounters->countEobs[TX_32X32], - count_sat, update_factor); } -int32_t VulkanVP9Decoder::update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2]) +void VulkanVP9Decoder::ParseLoopFilterParams() { - int32_t factor, count = branch_ct[0] + branch_ct[1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - return weighted_prob(pre_prob, prob, factor); -} + VkParserVp9PictureData *pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo *pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter; -int32_t VulkanVP9Decoder::update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2]) -{ - return update_mode_ct(pre_prob, get_binary_prob(branch_ct[0], branch_ct[1]), branch_ct); -} + if (pPicData->FrameIsIntra || (pStdPicInfo->flags.error_resilient_mode == 1)) { + // setup_past_independence() for loop filter params + memset(m_loopFilterRefDeltas, 0, sizeof(m_loopFilterRefDeltas)); + memset(m_loopFilterModeDeltas, 0, sizeof(m_loopFilterModeDeltas)); + m_loopFilterRefDeltas[0] = 1; + m_loopFilterRefDeltas[1] = 0; + m_loopFilterRefDeltas[2] = -1; + m_loopFilterRefDeltas[3] = -1; + } -void VulkanVP9Decoder::update_mode_probs(int32_t n_modes, - const vp9_tree_index *tree, uint32_t *cnt, - vp9_prob *pre_probs, vp9_prob *pre_probsB, - vp9_prob *dst_probs, vp9_prob *dst_probsB, - uint32_t tok0_offset) -{ - vp9_prob probs[MAX_PROBS]; - uint32_t branch_ct[MAX_PROBS][2]; - int32_t t, count, factor; + pStdLoopFilter->loop_filter_level = u(6); + pStdLoopFilter->loop_filter_sharpness = u(3); - assert(n_modes - 1 < MAX_PROBS); - vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset); - for (t = 0; t < n_modes - 1; ++t) - { - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - if (t < 8 || dst_probsB == NULL) - dst_probs[t] = weighted_prob(pre_probs[t], probs[t], factor); - else - dst_probsB[t-8] = weighted_prob(pre_probsB[t-8], probs[t], factor); + pStdLoopFilter->flags.loop_filter_delta_enabled = u(1); + if (pStdLoopFilter->flags.loop_filter_delta_enabled) { + + pStdLoopFilter->flags.loop_filter_delta_update = u(1); + + if (pStdLoopFilter->flags.loop_filter_delta_update) { + + for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) { + uint8_t update_ref_delta = u(1); + pStdLoopFilter->update_ref_delta |= update_ref_delta << i; + if (update_ref_delta == 1) { + m_loopFilterRefDeltas[i] = u(6); + if (u(1)) { // sign + m_loopFilterRefDeltas[i] = -m_loopFilterRefDeltas[i]; + } + } + } + + for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++) { + uint8_t update_mode_delta = u( 1); + pStdLoopFilter->update_mode_delta |= update_mode_delta << i; + if (update_mode_delta) { + m_loopFilterModeDeltas[i] = u(6); + if(u(1)) { // sign + m_loopFilterModeDeltas[i] = -m_loopFilterRefDeltas[i]; + } + } + } + } } -} -void VulkanVP9Decoder::tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p, - uint32_t (*ct_32x32p)[2]) -{ - ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; - ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32]; - ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; - ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32]; - ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; - ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; + memcpy(pStdLoopFilter->loop_filter_ref_deltas, m_loopFilterRefDeltas, sizeof(m_loopFilterRefDeltas)); + memcpy(pStdLoopFilter->loop_filter_mode_deltas, m_loopFilterModeDeltas, sizeof(m_loopFilterModeDeltas)); } -void VulkanVP9Decoder::tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p, - uint32_t (*ct_16x16p)[2]) +void VulkanVP9Decoder::ParseQuantizationParams() { - ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; - ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; - ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; - ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; + VkParserVp9PictureData *pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &pPicData->stdPictureInfo; + + pStdPicInfo->base_q_idx = u(8); + pStdPicInfo->delta_q_y_dc = ReadDeltaQ(); + pStdPicInfo->delta_q_uv_dc = ReadDeltaQ(); + pStdPicInfo->delta_q_uv_ac = ReadDeltaQ(); } -void VulkanVP9Decoder::tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p, - uint32_t (*ct_8x8p)[2]) +int32_t VulkanVP9Decoder::ReadDeltaQ() { - ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; - ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; + int32_t delta; + if (u(1)) { + delta = u(4); + if (u(1)) { + delta = -delta; + } + return delta; + } else { + return 0; + } } -void VulkanVP9Decoder::adaptModeProbs(vp9_prob_update_s *pProbSetup) +void VulkanVP9Decoder::ParseSegmentationParams() { - uint32_t i, j; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - pProbSetup->pProbTab->a.intra_inter_prob[i] = update_mode_ct2(m_PrevCtx.intra_inter_prob[i], pProbSetup->pCtxCounters->intra_inter_count[i]); - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - pProbSetup->pProbTab->a.comp_inter_prob[i] = update_mode_ct2(m_PrevCtx.comp_inter_prob[i], pProbSetup->pCtxCounters->comp_inter_count[i]); - for (i = 0; i < REF_CONTEXTS; i++) - pProbSetup->pProbTab->a.comp_ref_prob[i] = update_mode_ct2(m_PrevCtx.comp_ref_prob[i], pProbSetup->pCtxCounters->comp_ref_count[i]); - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - pProbSetup->pProbTab->a.single_ref_prob[i][j] = update_mode_ct2(m_PrevCtx.single_ref_prob[i][j], pProbSetup->pCtxCounters->single_ref_count[i][j]); - - for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) - { - update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, - pProbSetup->pCtxCounters->sb_ymode_counts[i], - m_PrevCtx.sb_ymode_prob[i], m_PrevCtx.sb_ymode_probB[i], - pProbSetup->pProbTab->a.sb_ymode_prob[i], pProbSetup->pProbTab->a.sb_ymode_probB[i], 0); - } - for (i = 0; i < VP9_INTRA_MODES; ++i) - { - update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree, - pProbSetup->pCtxCounters->uv_mode_counts[i], - m_PrevCtx.uv_mode_prob[i], - m_PrevCtx.uv_mode_probB[i], - pProbSetup->pProbTab->a.uv_mode_prob[i], - pProbSetup->pProbTab->a.uv_mode_probB[i], 0); - } - for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) - update_mode_probs(PARTITION_TYPES, vp9_partition_tree, - pProbSetup->pCtxCounters->partition_counts[i], - m_PrevCtx.partition_prob[INTER_FRAME][i], NULL, - pProbSetup->pProbTab->a.partition_prob[INTER_FRAME][i], NULL, 0); + uint8_t segmentation_feature_bits[STD_VIDEO_VP9_SEG_LVL_MAX] = { 8, 6, 2, 0}; + uint8_t segmentation_feature_signed[STD_VIDEO_VP9_SEG_LVL_MAX] = {1, 1, 0, 0}; - if (pProbSetup->mcomp_filter_type == SWITCHABLE) - { - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) - { - update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree, - pProbSetup->pCtxCounters->switchable_interp_counts[i], - m_PrevCtx.switchable_interp_prob[i], NULL, - pProbSetup->pProbTab->a.switchable_interp_prob[i], NULL, 0); - } + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + StdVideoVP9Segmentation* pSegment = &m_PicData.stdSegmentation; + + pSegment->flags.segmentation_update_map = 0; + pSegment->flags.segmentation_temporal_update = 0; + + pStdPicInfo->flags.segmentation_enabled = u(1); + if (pStdPicInfo->flags.segmentation_enabled == 0) { + return; } - if (pProbSetup->transform_mode == TX_MODE_SELECT) - { - uint32_t branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2]; - uint32_t branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2]; - uint32_t branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2]; - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - tx_counts_to_branch_counts_8x8(pProbSetup->pCtxCounters->tx8x8_count[i], branch_ct_8x8p); - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) - { - int32_t factor; - int32_t count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1]; - vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0], branch_ct_8x8p[j][1]); - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - pProbSetup->pProbTab->a.tx8x8_prob[i][j] = weighted_prob(m_PrevCtx.tx8x8_prob[i][j], prob, factor); - } + pSegment->flags.segmentation_update_map = u(1); + + if (pSegment->flags.segmentation_update_map == 1) { + + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++) { + uint8_t prob_coded = u(1); + pSegment->segmentation_tree_probs[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY; } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - tx_counts_to_branch_counts_16x16(pProbSetup->pCtxCounters->tx16x16_count[i], branch_ct_16x16p); - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) - { - int32_t factor; - int32_t count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1]; - vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0], branch_ct_16x16p[j][1]); - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - pProbSetup->pProbTab->a.tx16x16_prob[i][j] = weighted_prob(m_PrevCtx.tx16x16_prob[i][j], prob, factor); + + pSegment->flags.segmentation_temporal_update = u(1); + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++) { + if (pSegment->flags.segmentation_temporal_update) { + uint8_t prob_coded = u(1); + pSegment->segmentation_pred_prob[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY; + } else { + pSegment->segmentation_pred_prob[i] = VP9_MAX_PRBABILITY; } } - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - { - tx_counts_to_branch_counts_32x32(pProbSetup->pCtxCounters->tx32x32_count[i], branch_ct_32x32p); - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) - { - int32_t factor; - int32_t count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1]; - vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0], branch_ct_32x32p[j][1]); - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - pProbSetup->pProbTab->a.tx32x32_prob[i][j] = weighted_prob(m_PrevCtx.tx32x32_prob[i][j], prob, factor); + } + + pSegment->flags.segmentation_update_data = u(1); + if (pSegment->flags.segmentation_update_data == 1) { + pSegment->flags.segmentation_abs_or_delta_update = u(1); + + /* Clear all previous segment data */ + memset(pSegment->FeatureEnabled, 0, sizeof(pSegment->FeatureEnabled)); + memset(pSegment->FeatureData, 0, sizeof(pSegment->FeatureData)); + + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) { + for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) { + uint8_t feature_enabled = u(1); + pSegment->FeatureEnabled[i] |= (feature_enabled << j); + + if (feature_enabled == 1) { + pSegment->FeatureData[i][j] = u(segmentation_feature_bits[j]); + + if (segmentation_feature_signed[j] == 1) { + if (u(1) == 1) { + pSegment->FeatureData[i][j] = -pSegment->FeatureData[i][j]; + } + } + } } } + + } // segmentation_update_data +} + +uint8_t VulkanVP9Decoder::CalcMinLog2TileCols() +{ + VkParserVp9PictureData* pPicData = &m_PicData; + uint8_t minLog2 = 0; + + while (((uint32_t)VP9_MAX_TILE_WIDTH_B64 << minLog2) < pPicData->Sb64Cols) { + minLog2++; } - for (i = 0; i < MBSKIP_CONTEXTS; ++i) - pProbSetup->pProbTab->a.mbskip_probs[i] = update_mode_ct2(m_PrevCtx.mbskip_probs[i],pProbSetup->pCtxCounters->mbskip_count[i]); + + return minLog2; } -void VulkanVP9Decoder::adaptModeContext(vp9_prob_update_s *pProbSetup) +uint8_t VulkanVP9Decoder::CalcMaxLog2TileCols() { - uint32_t i, j; - uint32_t (*mode_ct)[VP9_INTER_MODES - 1][2] = pProbSetup->pCtxCounters->inter_mode_counts; + VkParserVp9PictureData* pPicData = &m_PicData; + uint8_t maxLog2 = 1; - for (j = 0; j < INTER_MODE_CONTEXTS; j++) - { - for (i = 0; i < VP9_INTER_MODES - 1; i++) - { - int32_t count = mode_ct[j][i][0] + mode_ct[j][i][1], factor; - count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count; - factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT); - pProbSetup->pProbTab->a.inter_mode_prob[j][i] = weighted_prob(m_PrevCtx.inter_mode_prob[j][i], - get_binary_prob(mode_ct[j][i][0], mode_ct[j][i][1]), - factor); - } + while ((pPicData->Sb64Cols >> maxLog2) >= VP9_MIN_TILE_WIDTH_B64) { + maxLog2++; } + + return maxLog2 - 1; } -uint32_t VulkanVP9Decoder::adapt_probs(uint32_t i, - const signed char* tree, - vp9_prob this_probs[], - const vp9_prob last_probs[], - const uint32_t num_events[]) +void VulkanVP9Decoder::ParseTileInfo() { - vp9_prob this_prob; - uint32_t weight; + VkParserVp9PictureData* pPicData = &m_PicData; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; - const uint32_t left = tree[i] <= 0 ? num_events[-tree[i]] : adapt_probs(tree[i], tree, this_probs, last_probs, num_events); - const uint32_t right = tree[i + 1] <= 0 ? num_events[-tree[i + 1]] : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events); - weight = left + right; - if (weight) - { - this_prob = get_binary_prob(left, right); - weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight; - this_prob = weighted_prob(last_probs[i >> 1], this_prob, MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT); + uint8_t minLog2TileCols = CalcMinLog2TileCols(); + uint8_t maxLog2TileCols = CalcMaxLog2TileCols(); + + pStdPicInfo->tile_cols_log2 = minLog2TileCols; + + while (pStdPicInfo->tile_cols_log2 < maxLog2TileCols) { + if (u(1) == 1) { // increment_tile_cols_log2 + pStdPicInfo->tile_cols_log2++; + } else { + break; + } } - else - { - this_prob = last_probs[i >> 1]; + + pStdPicInfo->tile_rows_log2 = u(1); + if (pStdPicInfo->tile_rows_log2 == 1) { + pStdPicInfo->tile_rows_log2 += u(1); } - this_probs[i >> 1] = this_prob; - return left + right; + + pPicData->numTiles = (1 << pStdPicInfo->tile_rows_log2) * (1 << pStdPicInfo->tile_cols_log2); } -void VulkanVP9Decoder::adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2]) +void VulkanVP9Decoder::ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t frame_sizes[8], uint32_t* frame_count) { - const int32_t count = std::min(ct[0] + ct[1], MV_COUNT_SAT); - if (count) - { - const vp9_prob newp = get_binary_prob(ct[0], ct[1]); - const int32_t factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT; - *dest = weighted_prob(prep, newp, factor); + uint8_t final_byte = data[data_sz - 1]; + *frame_count = 0; + + if ((final_byte & 0xe0) == 0xc0) { + const uint32_t frames = (final_byte & 0x7) + 1; + const uint32_t mag = ((final_byte >> 3) & 0x3) + 1; + const uint32_t index_sz = 2 + mag * frames; + + if (data_sz >= index_sz && data[data_sz - index_sz] == final_byte) { + // found a valid superframe index + const uint8_t* x = data + data_sz - index_sz + 1; + for (uint32_t i = 0; i < frames; i++) { + uint32_t this_sz = 0; + for (uint32_t j = 0; j < mag; j++) { + this_sz |= (*x++) << (j * 8); + } + frame_sizes[i] = this_sz; + } + *frame_count = frames; + } } - else - *dest = prep; } -void VulkanVP9Decoder::adaptNmvProbs(vp9_prob_update_s *pProbSetup) -{ - uint32_t usehp = pProbSetup->allow_high_precision_mv; - uint32_t i, j; - - adapt_probs(0, vp9_mv_joint_tree, - pProbSetup->pProbTab->a.nmvc.joints, - m_PrevCtx.nmvc.joints, - pProbSetup->pCtxCounters->nmvcount.joints); - for (i = 0; i < 2; ++i) - { - adapt_prob(&pProbSetup->pProbTab->a.nmvc.sign[i], - m_PrevCtx.nmvc.sign[i], - pProbSetup->pCtxCounters->nmvcount.sign[i]); - adapt_probs(0, vp9_mv_class_tree, - pProbSetup->pProbTab->a.nmvc.classes[i], - m_PrevCtx.nmvc.classes[i], - pProbSetup->pCtxCounters->nmvcount.classes[i]); - adapt_probs(0, vp9_mv_class0_tree, - pProbSetup->pProbTab->a.nmvc.class0[i], - m_PrevCtx.nmvc.class0[i], - pProbSetup->pCtxCounters->nmvcount.class0[i]); - for (j = 0; j < MV_OFFSET_BITS; ++j) - { - adapt_prob(&pProbSetup->pProbTab->a.nmvc.bits[i][j], - m_PrevCtx.nmvc.bits[i][j], - pProbSetup->pCtxCounters->nmvcount.bits[i][j]); +bool VulkanVP9Decoder::BeginPicture(VkParserPictureData* pnvpd) +{ + VkParserVp9PictureData* const pPicDataVP9 = &pnvpd->CodecSpecific.vp9; + StdVideoVP9ColorConfig* pStdColorConfig = &pPicDataVP9->stdColorConfig; + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo; + + uint32_t width = pPicDataVP9->FrameWidth; + uint32_t height = pPicDataVP9->FrameHeight; + + VkParserSequenceInfo nvsi = m_ExtSeqInfo; + nvsi.eCodec = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; + nvsi.nChromaFormat = pPicDataVP9->ChromaFormat; + nvsi.nMaxWidth = std::max(width, pPicDataVP9->renderWidth); + nvsi.nMaxHeight = std::max(height, pPicDataVP9->renderHeight); + nvsi.nCodedWidth = width; + nvsi.nCodedHeight = height; + nvsi.nDisplayWidth = pPicDataVP9->renderWidth; + nvsi.nDisplayHeight = pPicDataVP9->renderHeight; + nvsi.lDARWidth = pPicDataVP9->renderWidth; + nvsi.lDARHeight = pPicDataVP9->renderHeight; + nvsi.bProgSeq = true; // VP9 doesn't have explicit interlaced coding. + nvsi.nMinNumDecodeSurfaces = 9; + nvsi.uBitDepthLumaMinus8 = pStdColorConfig->BitDepth - 8; + nvsi.uBitDepthChromaMinus8 = pStdColorConfig->BitDepth - 8; + nvsi.codecProfile = pStdPicInfo->profile; + + // Reset decoder only if decode RT orig width is less than required coded width + if ((nvsi.nMaxWidth > m_rtOrigWidth) || (nvsi.nMaxHeight > m_rtOrigHeight)) { + m_rtOrigWidth = nvsi.nMaxWidth; + m_rtOrigHeight = nvsi.nMaxHeight; + + for (int i = 0; i < 8; i++) { + if (m_pBuffers[i].buffer != nullptr) { + m_pBuffers[i].buffer->Release(); + m_pBuffers[i].buffer = nullptr; + } } - for (j = 0; j < CLASS0_SIZE; ++j) - { - adapt_probs(0, vp9_mv_fp_tree, - pProbSetup->pProbTab->a.nmvc.class0_fp[i][j], - m_PrevCtx.nmvc.class0_fp[i][j], - pProbSetup->pCtxCounters->nmvcount.class0_fp[i][j]); + if (m_pCurrPic != nullptr) { + m_pCurrPic->Release(); + m_pCurrPic = nullptr; } - adapt_probs(0, vp9_mv_fp_tree, - pProbSetup->pProbTab->a.nmvc.fp[i], - m_PrevCtx.nmvc.fp[i], - pProbSetup->pCtxCounters->nmvcount.fp[i]); } - if (usehp) - { - for (i = 0; i < 2; ++i) - { - adapt_prob(&pProbSetup->pProbTab->a.nmvc.class0_hp[i], - m_PrevCtx.nmvc.class0_hp[i], - pProbSetup->pCtxCounters->nmvcount.class0_hp[i]); - adapt_prob(&pProbSetup->pProbTab->a.nmvc.hp[i], - m_PrevCtx.nmvc.hp[i], - pProbSetup->pCtxCounters->nmvcount.hp[i]); - } + + if (!init_sequence(&nvsi)) { + assert(!"init_sequence failed!"); + return false; } -} -void VulkanVP9Decoder::UpdateBackwardProbability(vp9_prob_update_s *pProbSetup) -{ - if (!pProbSetup->errorResilient && !pProbSetup->FrameParallelDecoding) - { - adaptCoefProbs(pProbSetup); //vp9_adapt_coef_probs - if(!pProbSetup->keyFrame && !pProbSetup->intraOnly) - { - adaptModeProbs(pProbSetup); //vp9_adapt_mode_probs - adaptModeContext(pProbSetup); - adaptNmvProbs(pProbSetup); //vp9_adapt_mv_probs - } + // Allocate a buffer for the current picture + if (m_pCurrPic == nullptr) { + m_pClient->AllocPictureBuffer(&m_pCurrPic); + assert(m_pCurrPic); + + m_pCurrPic->decodeWidth = width; + m_pCurrPic->decodeHeight = height; } - //vp9hwdStoreProbs - if (pProbSetup->RefreshEntropyProbs) - { - memcpy(&m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(m_EntropyLast[pProbSetup->frameContextIdx])); + + pnvpd->PicWidthInMbs = nvsi.nCodedWidth >> 4; + pnvpd->FrameHeightInMbs = nvsi.nCodedHeight >> 4; + pnvpd->pCurrPic = m_pCurrPic; + pnvpd->progressive_frame = 1; + pnvpd->ref_pic_flag = 1; + pnvpd->intra_pic_flag = pPicDataVP9->FrameIsIntra; + pnvpd->chroma_format = pPicDataVP9->ChromaFormat; + + // Reference slots information + for (int i = 0; i < STD_VIDEO_VP9_NUM_REF_FRAMES; i++) { + vkPicBuffBase* pb = reinterpret_cast(m_pBuffers[i].buffer); + pPicDataVP9->pic_idx[i] = pb ? pb->m_picIdx : -1; } - //VP9HwdUpdateRefs + + return true; } diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp index 135af873..83b968ef 100644 --- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp +++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp @@ -20,9 +20,6 @@ #include "nvVulkanVideoUtils.h" #include "nvVulkanVideoParser.h" #include -#ifdef ENABLE_VP9_DECODER -#include -#endif VulkanVideoDecoder::VulkanVideoDecoder(VkVideoCodecOperationFlagBitsKHR std) : m_refCount(0) @@ -646,6 +643,7 @@ void VulkanVideoDecoder::end_of_stream() #include "VulkanH264Decoder.h" #include "VulkanH265Decoder.h" #include "VulkanAV1Decoder.h" +#include "VulkanVP9Decoder.h" static nvParserLogFuncType gParserLogFunc = nullptr; static int gLogLevel = 0; @@ -739,12 +737,17 @@ VkResult CreateVulkanVideoDecodeParser(VkVideoCodecOperationFlagBitsKHR videoCod } nvVideoDecodeParser = VkSharedBaseObj(new VulkanAV1Decoder(videoCodecOperation, pParserPictureData->isAnnexB)); break; -#ifdef ENABLE_VP9_DECODER case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: - // TODO: This will not work and is only here as a placeholder to get the compiler to include and link the class. + if ((pStdExtensionVersion == nullptr) || + (0 != strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME)) || + (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) { + nvParserErrorLog("The requested decoder VP9 Codec STD version is NOT supported\n"); + nvParserErrorLog("The supported decoder VP9 Codec STD version is verion %d of %s\n", + VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME); + return VK_ERROR_INCOMPATIBLE_DRIVER; + } nvVideoDecodeParser = VkSharedBaseObj(new VulkanVP9Decoder(videoCodecOperation)); break; -#endif default: nvParserErrorLog("Unsupported codec type!!!\n"); } diff --git a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp index 37888fcc..c93a5141 100644 --- a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp +++ b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp @@ -140,6 +140,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer { bsf = av_bsf_get_by_name("hevc_mp4toannexb"); } else if (videoCodec == AV_CODEC_ID_AV1) { bsf = av_bsf_get_by_name("av1_metadata"); + } else if (videoCodec == AV_CODEC_ID_VP9) { + bsf = av_bsf_get_by_name("vp9_metadata"); } if (!bsf) { @@ -286,6 +288,10 @@ class FFmpegDemuxer : public VideoStreamDemuxer { videoCodecId = AV_CODEC_ID_H264; } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) { videoCodecId = AV_CODEC_ID_HEVC; + } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { + videoCodecId = AV_CODEC_ID_AV1; + } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + videoCodecId = AV_CODEC_ID_VP9; } } @@ -307,12 +313,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer { case AV_CODEC_ID_H264 : return VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR; case AV_CODEC_ID_HEVC : return VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR; case AV_CODEC_ID_VP8 : assert(false); return VkVideoCodecOperationFlagBitsKHR(0); - #ifdef VK_EXT_video_decode_vp9 case AV_CODEC_ID_VP9 : return VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR; - #endif // VK_EXT_video_decode_vp9 - #ifdef vulkan_video_codec_av1std_decode case AV_CODEC_ID_AV1 : return VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR; - #endif case AV_CODEC_ID_MJPEG : assert(false); return VkVideoCodecOperationFlagBitsKHR(0); default : assert(false); return VkVideoCodecOperationFlagBitsKHR(0); } @@ -365,6 +367,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer { case AV_PIX_FMT_YUVJ420P: ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting color_range case AV_PIX_FMT_YUV420P: ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples) case AV_PIX_FMT_YUV420P10LE: ///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian + case AV_PIX_FMT_YUV420P12LE: ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian case AV_PIX_FMT_YUV420P16LE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian case AV_PIX_FMT_YUV420P16BE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; @@ -391,7 +394,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer { virtual uint32_t GetProfileIdc() const { - switch (FFmpegToVkCodecOperation(videoCodec)) { + switch ((uint32_t)FFmpegToVkCodecOperation(videoCodec)) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { switch(profile) { @@ -431,6 +434,19 @@ class FFmpegDemuxer : public VideoStreamDemuxer { } } break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + { + switch(profile) { + case STD_VIDEO_VP9_PROFILE_0: + case STD_VIDEO_VP9_PROFILE_1: + case STD_VIDEO_VP9_PROFILE_2: + case STD_VIDEO_VP9_PROFILE_3: + break; + default: + std::cerr << "\nInvalid VP9 profile: " << profile << std::endl; + } + } + break; default: std::cerr << "\nInvalid codec type: " << FFmpegToVkCodecOperation(videoCodec) << std::endl; } diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp index dc980474..6bff5ce5 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp @@ -39,12 +39,8 @@ const char* VkVideoDecoder::GetVideoCodecString(VkVideoCodecOperationFlagBitsKHR { VK_VIDEO_CODEC_OPERATION_NONE_KHR, "None" }, { VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, "AVC/H.264" }, { VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR, "H.265/HEVC" }, -#ifdef VK_EXT_video_decode_vp9 { VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR, "VP9" }, -#endif // VK_EXT_video_decode_vp9 -#ifdef vulkan_video_codec_av1std { VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR, "AV1" }, -#endif // VK_EXT_video_decode_av1 }; for (unsigned i = 0; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) { @@ -126,6 +122,7 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR + | VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR ); assert(videoCodecs != VK_VIDEO_CODEC_OPERATION_NONE_KHR); @@ -637,12 +634,12 @@ int VkVideoDecoder::CopyOptimalToLinearImage(VkCommandBuffer& commandBuffer, copyRegion[0].dstSubresource.layerCount = 1; copyRegion[1].extent.width = copyRegion[0].extent.width; if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) { - copyRegion[1].extent.width /= 2; + copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2; } copyRegion[1].extent.height = copyRegion[0].extent.height; if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) { - copyRegion[1].extent.height /= 2; + copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2; } copyRegion[1].extent.depth = 1; @@ -706,7 +703,7 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(pCurrFrameDecParams->bitstreamData->GetMaxSize() >= pCurrFrameDecParams->bitstreamDataLen); pCurrFrameDecParams->decodeFrameInfo.srcBuffer = pCurrFrameDecParams->bitstreamData->GetBuffer(); - assert(pCurrFrameDecParams->bitstreamDataOffset == 0); + //assert(pCurrFrameDecParams->bitstreamDataOffset == 0); assert(pCurrFrameDecParams->firstSliceIndex == 0); // TODO: Assert if bitstreamDataOffset is aligned to VkVideoCapabilitiesKHR::minBitstreamBufferOffsetAlignment pCurrFrameDecParams->decodeFrameInfo.srcBufferOffset = pCurrFrameDecParams->bitstreamDataOffset; @@ -774,7 +771,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } pCurrFrameDecParams->dpbSetupPictureResource.codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode. - pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent; + // Setup picture may have different resolution compared to previous frames in VP9 + // So, set the codedExtent earlier in VP9 specific code and skip it here. + // TODO: Do the same for other codedcs + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent; + } if (dpbSetupPictureResourceInfo.currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) { imageBarriers[numDpbBarriers] = dpbBarrierTemplates[0]; @@ -816,7 +818,14 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } pOutputPictureResource->codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode. - pOutputPictureResource->codedExtent = m_codedExtent; + // Setup picture may have different resolution compared to previous frames in VP9 + // So, set the codedExtent earlier in VP9 specific code and skip it here. + // TODO: Do the same for other codedcs + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pOutputPictureResource->codedExtent = m_codedExtent; + } else { + pOutputPictureResource->codedExtent = pCurrFrameDecParams->dpbSetupPictureResource.codedExtent; + } // For Output Distinct transition the image to DECODE_DST if (pOutputPictureResourceInfo->currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) { @@ -909,9 +918,11 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters if (pictureResourcesInfo[resId].image != VK_NULL_HANDLE) { - // FIXME: m_codedExtent should have already be populated in in the + // FIXME: m_codedExtent should have already be populated in the // picture resource above from the FB. - pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent; + if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent; + } // FIXME: This parameter must to be adjusted based on the interlaced mode. pCurrFrameDecParams->pictureResources[resId].codedOffset = { 0, 0 }; } @@ -926,8 +937,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } } - decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount; - decodeBeginInfo.pReferenceSlots = pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots; + // Add setup reference slot details to decodeBeginInfo + decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount + + (pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot ? 1 : 0); + decodeBeginInfo.pReferenceSlots = (pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount > 0) ? + pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots : + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot; m_imageSpecsIndex.displayOut = ((m_dpbAndOutputCoincide == VK_TRUE) && !(pDecodePictureInfo->flags.applyFilmGrain == VK_TRUE)) ? @@ -951,12 +966,16 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters VulkanVideoFrameBuffer::FrameSynchronizationInfo frameSynchronizationInfo = VulkanVideoFrameBuffer::FrameSynchronizationInfo(); frameSynchronizationInfo.hasFrameCompleteSignalFence = true; frameSynchronizationInfo.hasFrameCompleteSignalSemaphore = true; + frameSynchronizationInfo.hasFilterSignalSemaphore = m_enableDecodeComputeFilter; + frameSynchronizationInfo.hasFrameConsumerSignalSemaphore = false; frameSynchronizationInfo.syncOnFrameCompleteFence = true; frameSynchronizationInfo.syncOnFrameConsumerDoneFence = true; frameSynchronizationInfo.imageSpecsIndex = m_imageSpecsIndex; VkSharedBaseObj currentVkPictureParameters; - if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1 + if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + decodeBeginInfo.videoSessionParameters = VK_NULL_HANDLE; + } else if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1 bool valid = pCurrFrameDecParams->pStdSps->GetClientObject(currentVkPictureParameters); assert(valid); @@ -1039,14 +1058,9 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, frameSynchronizationInfo.frameCompleteFence)); VkFence frameCompleteFence = frameSynchronizationInfo.frameCompleteFence; - VkSemaphore frameCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore; - VkSemaphore frameConsumerDoneSemaphore = frameSynchronizationInfo.frameConsumerDoneSemaphore; - // By default, the frameCompleteSemaphore is the videoDecodeCompleteSemaphore. - // If the video frame filter is enabled, since it is executed after the decoder's queue, - // the filter will provide its own semaphore for the video decoder to signal, instead. - // Then the frameCompleteSemaphore will be signaled by the filter of its completion. + VkSemaphore videoDecodeCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore; + VkSemaphore consumerCompleteSemaphore = frameSynchronizationInfo.consumerCompleteSemaphore; VkFence videoDecodeCompleteFence = frameCompleteFence; - VkSemaphore videoDecodeCompleteSemaphore = frameCompleteSemaphore; VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; @@ -1136,34 +1150,43 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters assert(filterCmdBuffer != nullptr); - // frameCompleteSemaphore is the semaphore that the filter is going to signal on completion when enabled. - // The videoDecodeCompleteSemaphore semaphore will be signaled by the decoder and then used by the filter to wait on. - + // videoDecodeCompleteFence is the fence that the filter is going to signal on completion when enabled. videoDecodeCompleteFence = filterCmdBuffer->GetFence(); - videoDecodeCompleteSemaphore = filterCmdBuffer->GetSemaphore(); } const uint32_t waitSemaphoreMaxCount = 3; - VkSemaphore waitSemaphores[waitSemaphoreMaxCount] = { VK_NULL_HANDLE }; + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{}; const uint32_t signalSemaphoreMaxCount = 3; - VkSemaphore signalSemaphores[signalSemaphoreMaxCount] = { VK_NULL_HANDLE }; + VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{}; uint32_t waitSemaphoreCount = 0; - if (frameConsumerDoneSemaphore != VK_NULL_HANDLE) { - waitSemaphores[waitSemaphoreCount] = frameConsumerDoneSemaphore; + uint32_t signalSemaphoreCount = 0; + + if (consumerCompleteSemaphore != VK_NULL_HANDLE) { + + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = consumerCompleteSemaphore; + waitSemaphoreInfos[waitSemaphoreCount].value = frameSynchronizationInfo.frameConsumerDoneTimelineValue; + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; waitSemaphoreCount++; } - uint32_t signalSemaphoreCount = 0; if (videoDecodeCompleteSemaphore != VK_NULL_HANDLE) { - signalSemaphores[signalSemaphoreCount] = videoDecodeCompleteSemaphore; + + signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr; + signalSemaphoreInfos[signalSemaphoreCount].semaphore = videoDecodeCompleteSemaphore; + signalSemaphoreInfos[signalSemaphoreCount].value = frameSynchronizationInfo.decodeCompleteTimelineValue; + signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0; signalSemaphoreCount++; } - uint64_t waitTlSemaphoresValues[waitSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ }; - uint64_t signalTlSemaphoresValues[signalSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ }; - VkTimelineSemaphoreSubmitInfo timelineSemaphoreInfos = {}; if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) { if (m_dumpDecodeData) { @@ -1172,67 +1195,53 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters std::cout << "\t TL semaphore value: " << currSemValue << ", status: " << semResult << std::endl; } - waitSemaphores[waitSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore; - waitTlSemaphoresValues[waitSemaphoreCount] = m_decodePicCount - 1; // wait for the previous value to be signaled + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore; + waitSemaphoreInfos[waitSemaphoreCount].value = m_decodePicCount - 1; // wait for the previous value to be signaled + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; waitSemaphoreCount++; - signalSemaphores[signalSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore; - signalTlSemaphoresValues[signalSemaphoreCount] = m_decodePicCount; // signal the current m_decodePicCount value + signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr; + signalSemaphoreInfos[signalSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore; + signalSemaphoreInfos[signalSemaphoreCount].value = m_decodePicCount; // signal the current m_decodePicCount value + signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0; signalSemaphoreCount++; - timelineSemaphoreInfos.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - timelineSemaphoreInfos.pNext = NULL; assert(waitSemaphoreCount < waitSemaphoreMaxCount); - timelineSemaphoreInfos.waitSemaphoreValueCount = waitSemaphoreCount; - timelineSemaphoreInfos.pWaitSemaphoreValues = waitTlSemaphoresValues; assert(signalSemaphoreCount < signalSemaphoreMaxCount); - timelineSemaphoreInfos.signalSemaphoreValueCount = signalSemaphoreCount; - timelineSemaphoreInfos.pSignalSemaphoreValues = signalTlSemaphoresValues; - if (m_dumpDecodeData) { - std::cout << "\t Wait for: " << (waitSemaphoreCount ? waitTlSemaphoresValues[waitSemaphoreCount - 1] : 0) << - ", signal at " << signalTlSemaphoresValues[signalSemaphoreCount - 1] << std::endl; - } } assert(waitSemaphoreCount <= waitSemaphoreMaxCount); assert(signalSemaphoreCount <= signalSemaphoreMaxCount); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoDecodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.pNext = (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) ? &timelineSemaphoreInfos : nullptr; - submitInfo.waitSemaphoreCount = waitSemaphoreCount; - submitInfo.pWaitSemaphores = waitSemaphores; - submitInfo.pWaitDstStageMask = &videoDecodeSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = &frameDataSlot.commandBuffer; - submitInfo.signalSemaphoreCount = signalSemaphoreCount; - submitInfo.pSignalSemaphores = signalSemaphores; - - if (m_dumpDecodeData) { - if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) { - std::cout << "\t\t waitSemaphoreValueCount: " << timelineSemaphoreInfos.waitSemaphoreValueCount << std::endl; - std::cout << "\t pWaitSemaphoreValues: " << timelineSemaphoreInfos.pWaitSemaphoreValues[0] << ", " << - timelineSemaphoreInfos.pWaitSemaphoreValues[1] << ", " << - timelineSemaphoreInfos.pWaitSemaphoreValues[2] << std::endl; - std::cout << "\t\t signalSemaphoreValueCount: " << timelineSemaphoreInfos.signalSemaphoreValueCount << std::endl; - std::cout << "\t pSignalSemaphoreValues: " << timelineSemaphoreInfos.pSignalSemaphoreValues[0] << ", " << - timelineSemaphoreInfos.pSignalSemaphoreValues[1] << ", " << - timelineSemaphoreInfos.pSignalSemaphoreValues[2] << std::endl; - } - - std::cout << "\t waitSemaphoreCount: " << submitInfo.waitSemaphoreCount << std::endl; - std::cout << "\t\t pWaitSemaphores: " << submitInfo.pWaitSemaphores[0] << ", " << - submitInfo.pWaitSemaphores[1] << ", " << - submitInfo.pWaitSemaphores[2] << std::endl; - std::cout << "\t signalSemaphoreCount: " << submitInfo.signalSemaphoreCount << std::endl; - std::cout << "\t\t pSignalSemaphores: " << submitInfo.pSignalSemaphores[0] << ", " << - submitInfo.pSignalSemaphores[1] << ", " << - submitInfo.pSignalSemaphores[2] << std::endl << std::endl; - } + VkCommandBufferSubmitInfoKHR cmdBufferInfos; + cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmdBufferInfos.pNext = nullptr; + cmdBufferInfos.commandBuffer = frameDataSlot.commandBuffer; + cmdBufferInfos.deviceMask = 0; + + // Submit info + VkSubmitInfo2KHR submitInfo { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfos; + submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount; + submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos; assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, videoDecodeCompleteFence)); - VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE, m_currentVideoQueueIndx, - 1, &submitInfo, videoDecodeCompleteFence); + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE, + m_currentVideoQueueIndx, + 1, + &submitInfo, + videoDecodeCompleteFence, + "Video Decode", + picNumInDecodeOrder); assert(result == VK_SUCCESS); if (result != VK_SUCCESS) { return -1; @@ -1368,11 +1377,23 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters result = filterCmdBuffer->EndCommandBufferRecording(cmdBuf); assert(result == VK_SUCCESS); - if (false) std::cout << currPicIdx << " : OUT view: " << outputImageView->GetImageView() << ", signalSem: " << frameCompleteSemaphore << std::endl << std::flush; - assert(videoDecodeCompleteSemaphore != frameCompleteSemaphore); - result = m_yuvFilter->SubmitCommandBuffer(1, filterCmdBuffer->GetCommandBuffer(), - 1, &videoDecodeCompleteSemaphore, - 1, &frameCompleteSemaphore, + // Wait for the decoder to complete. + const VkPipelineStageFlags2KHR waitDecoderStageMasks = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR; + + // Signal the compute stage after done. + const uint64_t computeCompleteTimelineValue = frameSynchronizationInfo.filterCompleteTimelineValue; + const VkPipelineStageFlags2KHR signalComputeStageMasks = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR; + + result = m_yuvFilter->SubmitCommandBuffer(1, // commandBufferCount + filterCmdBuffer->GetCommandBuffer(), + 1, // waitSemaphoreCount + &videoDecodeCompleteSemaphore, + &frameSynchronizationInfo.decodeCompleteTimelineValue, + &waitDecoderStageMasks, + 1, // signalSemaphoreCount + &videoDecodeCompleteSemaphore, + &computeCompleteTimelineValue, + &signalComputeStageMasks, frameCompleteFence); assert(result == VK_SUCCESS); filterCmdBuffer->SetCommandBufferSubmitted(); diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp index 129ec34a..bbb68e66 100644 --- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp +++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp @@ -173,6 +173,30 @@ struct nvVideoAV1PicParameters { nvVideoDecodeAV1DpbSlotInfo dpbRefList[nvVideoDecodeAV1DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1]; }; + +struct nvVideoDecodeVP9DpbSlotInfo +{ + enum { + // Number of reference frame types (including intra type) + TOTAL_REFS_PER_FRAME = 8, + }; + VkExtent2D codedExtent{}; + + void Invalidate() { memset(this, 0x00, sizeof(*this)); } + + // Set the STD data here for VP9. + +}; + +struct nvVideoVP9PicParameters { + StdVideoDecodeVP9PictureInfo stdPictureInfo; + StdVideoVP9ColorConfig stdColorConfig; + StdVideoVP9Segmentation stdSegment; + StdVideoVP9LoopFilter stdLoopFilter; + VkVideoDecodeVP9PictureInfoKHR vkPictureInfo{ VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR, nullptr, &stdPictureInfo }; + nvVideoDecodeVP9DpbSlotInfo dpbRefList[nvVideoDecodeVP9DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1]; +}; + static vkPicBuffBase* GetPic(VkPicIf* pPicBuf) { return (vkPicBuffBase*)pPicBuf; @@ -550,9 +574,9 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, // Vulkan Video parser.cpp -- maintains its own indices. // We can use more indices in the parser than the spec. (Ther eis a max of 8 but we can use 16) // Reason for single structure for DPB -- the array is passed in the callback (in the proxy of the processor) - // It checks which references are in use. + // It checks which references are in use. // 2nd Finds which DPB references were assigned before - and reuses indices. - // The local array maintains the + // The local array maintains the pRefPicInfo->flags.disable_frame_end_update_cdf = ; pRefPicInfo->flags.segmentation_enabled = ; pRefPicInfo->base_q_idx = ; @@ -574,6 +598,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, } } + void setVP9PictureData(nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo, + VkVideoReferenceSlotInfoKHR* pReferenceSlots, + uint32_t dpbEntryIdx, uint32_t dpbSlotIndex) + { + // TODO: VP9 dpb management + assert(0); + } + } dpbH264Entry; virtual int32_t AddRef(); @@ -685,6 +717,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, VkVideoReferenceSlotInfoKHR* pReferenceSlots, int8_t* pGopReferenceImagesIndexes, int32_t* pCurrAllocatedSlotIndex); + uint32_t FillDpbVP9State(const VkParserPictureData* pd, + VkParserVp9PictureData* pin, + nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo, + StdVideoDecodeVP9PictureInfo* pStdPictureInfo, + uint32_t maxRefPictures, + VkVideoReferenceSlotInfoKHR* pReferenceSlots, + int8_t* pGopReferenceImagesIndexes, + int32_t* pCurrAllocatedSlotIndex); int8_t AllocateDpbSlotForCurrentH264( vkPicBuffBase* pPic, StdVideoDecodeH264PictureInfoFlags currPicFlags, @@ -693,7 +733,8 @@ class VulkanVideoParser : public VkParserVideoDecodeClient, int8_t presetDpbSlot); int8_t AllocateDpbSlotForCurrentAV1(vkPicBuffBase* pPic, bool isReference, int8_t presetDpbSlot); - + int8_t AllocateDpbSlotForCurrentVP9(vkPicBuffBase* pPic, bool isReference, + int8_t presetDpbSlot); protected: VkSharedBaseObj m_vkParser; @@ -944,6 +985,7 @@ VkResult VulkanVideoParser::Initialize( static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION }; static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION }; static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION }; + static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION }; const VkExtensionProperties* pStdExtensionVersion = NULL; if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) { @@ -952,6 +994,8 @@ VkResult VulkanVideoParser::Initialize( pStdExtensionVersion = &h265StdExtensionVersion; } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { pStdExtensionVersion = &av1StdExtensionVersion; + } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pStdExtensionVersion = &vp9StdExtensionVersion; } else { assert(!"Unsupported codec type"); return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR; @@ -1098,6 +1142,14 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi) if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { maxDpbSlots = 9; + if ((pnvsi->nCodedWidth <= m_nvsi.nCodedWidth) && (pnvsi->nCodedHeight <= m_nvsi.nCodedHeight)) { + return 1; + } + } else if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + maxDpbSlots = 9; + if ((pnvsi->nMaxWidth <= m_nvsi.nMaxWidth) && (pnvsi->nMaxHeight <= m_nvsi.nMaxHeight)) { + return 1; + } } uint32_t configDpbSlots = (pnvsi->nMinNumDpbSlots > 0) ? pnvsi->nMinNumDpbSlots : maxDpbSlots; @@ -1120,8 +1172,8 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi) } m_nvsi = *pnvsi; - m_nvsi.nMaxWidth = pnvsi->nCodedWidth; - m_nvsi.nMaxHeight = pnvsi->nCodedHeight; + m_nvsi.nMaxWidth = pnvsi->nMaxWidth; + m_nvsi.nMaxHeight = pnvsi->nMaxHeight; m_maxNumDecodeSurfaces = pnvsi->nMinNumDecodeSurfaces; @@ -1814,7 +1866,7 @@ uint32_t VulkanVideoParser::FillDpbAV1State( uint8_t yellowSquare[] = { 0xf0, 0x9f, 0x9f, 0xa8, 0x00 }; printf("\nSlotsInUse: "); for (int i = 0; i < 9; i++) { - printf("%-2s ", (slotsInUse & (1<ref_frame_idx[i]); + } + printf("\n"); + + printf("m_pictureToDpbSlotMap: "); + for (int i = 0; i < MAX_FRM_CNT; i++) { + printf("%02d ", i); + } + printf("\nm_pictureToDpbSlotMap: "); + for (int i = 0; i < MAX_FRM_CNT; i++) { + printf("%02d ", m_pictureToDpbSlotMap[i]); + } + printf("\n"); + + printf("ref_frame_picture: "); + for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) { + printf("%02d ", inIdx); + } + printf("\nref_frame_picture: "); + for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) { + int8_t picIdx = pin->pic_idx[inIdx]; + printf("%02d ", picIdx); + } + printf("\n"); + } + + bool isKeyFrame = (pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY); + + // It doesn't look like this tracking is needed. + int8_t activeReferences[32]; + memset(activeReferences, 0, sizeof(activeReferences)); + for (size_t refName = 0; refName < STD_VIDEO_VP9_REFS_PER_FRAME; refName++) { + int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[refName]]; + if (picIdx < 0) { + //pKhr->referenceNameSlotIndices[refName] = -1; + continue; + } + int8_t dpbSlot = GetPicDpbSlot(picIdx); + assert(dpbSlot >= 0); + //pKhr->referenceNameSlotIndices[refName] = dpbSlot; + activeReferences[dpbSlot]++; + //hdr.delta_frame_id_minus_1[dpbSlot] = pin->delta_frame_id_minus_1[pin->ref_frame_idx[i]]; + } + + for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) { + int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[inIdx]; + int8_t dpbSlot = -1; + if ((picIdx >= 0) && !(refDpbUsedAndValidMask & (1 << picIdx))) { + dpbSlot = GetPicDpbSlot(picIdx); + + assert(dpbSlot >= 0); // There is still content hitting this assert. + if (dpbSlot < 0) { + continue; + } + + refDpbUsedAndValidMask |= (1 << picIdx); + m_dpb[dpbSlot].MarkInUse(m_nCurrentPictureID); + if (activeReferences[dpbSlot] == 0) { + continue; + } + + pReferenceSlots[referenceIndex].sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR; + pReferenceSlots[referenceIndex].pNext = nullptr; + pReferenceSlots[referenceIndex].slotIndex = dpbSlot; + pGopReferenceImagesIndexes[referenceIndex] = picIdx; + + VkExtent2D &codedExtent = pDpbSlotInfo[referenceIndex].codedExtent; + codedExtent.width = m_dpb[dpbSlot].getPictureResource()->decodeWidth; + codedExtent.height = m_dpb[dpbSlot].getPictureResource()->decodeHeight; + + referenceIndex++; + } + } + + if (m_dumpDpbData) { + printf(";;; pReferenceSlots (%d): ", referenceIndex); + for (size_t i =0 ;i < referenceIndex; i++) { + printf("%02d ", pReferenceSlots[i].slotIndex); + } + printf("\n"); + } + + ResetPicDpbSlots(refDpbUsedAndValidMask); + + // Take into account the reference picture now. + int8_t currPicIdx = GetPicIdx(pd->pCurrPic); + assert(currPicIdx >= 0); + if (currPicIdx >= 0) { + refDpbUsedAndValidMask |= (1 << currPicIdx); + } + + // NOTE(charlie): Most likely we can consider isReference = refresh_frame_flags != 0; + // However, the AMD fw interface appears to always need a setup slot & a destination resource, + // so it's not clear what to properly do in that case. + int8_t dpbSlot = AllocateDpbSlotForCurrentAV1(GetPic(pd->pCurrPic), + true /* isReference */, pd->current_dpb_id); + + assert(dpbSlot >= 0); + + *pCurrAllocatedSlotIndex = dpbSlot; + assert(!(dpbSlot < 0)); + if (dpbSlot >= 0) { + assert(pd->ref_pic_flag); + } + + if (m_dumpDpbData) { + printf("SlotsInUse: "); + uint32_t slotsInUse = m_dpb.getSlotInUseMask(); + for (int i = 0; i < 9; i++) { + printf("%02d ", i); + } + uint8_t greenSquare[] = { 0xf0, 0x9f, 0x9f, 0xa9, 0x00 }; + uint8_t redSquare[] = { 0xf0, 0x9f, 0x9f, 0xa5, 0x00 }; + uint8_t yellowSquare[] = { 0xf0, 0x9f, 0x9f, 0xa8, 0x00 }; + printf("\nSlotsInUse: "); + for (int i = 0; i < 9; i++) { + printf("%-2s ", (slotsInUse & (1<= 0); SetPicDpbSlot(currPicIdx, dpbSlot); // Assign the dpbSlot to the current picture index. m_dpb[dpbSlot].setPictureResource(pPic, m_nCurrentPictureID); // m_nCurrentPictureID is our main index. @@ -1942,6 +2148,7 @@ bool VulkanVideoParser::DecodePicture( nvVideoH264PicParameters h264; nvVideoH265PicParameters hevc; nvVideoAV1PicParameters av1; + nvVideoVP9PicParameters vp9; // }; if (m_decoderHandler == NULL) { @@ -2072,11 +2279,6 @@ bool VulkanVideoParser::DecodePicture( h264.stdPictureInfo.flags, &setupReferenceSlot.slotIndex); // TODO: Remove it is for debugging only. Reserved fields must be set to "0". pout->stdPictureInfo.reserved1 = pCurrFrameDecParams->numGopReferenceSlots; - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; @@ -2092,6 +2294,15 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = NULL; pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) { @@ -2181,11 +2392,6 @@ bool VulkanVideoParser::DecodePicture( referenceSlots, pCurrFrameDecParams->pGopReferenceImagesIndexes, &setupReferenceSlot.slotIndex); - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); @@ -2203,6 +2409,16 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } + if (m_dumpParserData) { for (int32_t i = 0; i < HEVC_MAX_DPB_SLOTS; i++) { std::cout << "\tdpbIndex: " << i; @@ -2245,7 +2461,7 @@ bool VulkanVideoParser::DecodePicture( } nvVideoDecodeAV1DpbSlotInfo* dpbSlotsAv1 = av1.dpbRefList; - pCurrFrameDecParams->numGopReferenceSlots = + pCurrFrameDecParams->numGopReferenceSlots = FillDpbAV1State(pd, pin, dpbSlotsAv1, @@ -2255,12 +2471,6 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->pGopReferenceImagesIndexes, &setupReferenceSlot.slotIndex); - assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); - if (setupReferenceSlot.slotIndex >= 0) { - setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; - pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; - } - if (pCurrFrameDecParams->numGopReferenceSlots) { assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; @@ -2276,6 +2486,17 @@ bool VulkanVideoParser::DecodePicture( pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; } + + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } + // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface. //pDecodePictureInfo->videoFrameType = static_cast(pin->frame_type); pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan? @@ -2317,10 +2538,102 @@ bool VulkanVideoParser::DecodePicture( pin->tileInfo.pMiRowStarts = pin->MiRowStarts; pDecodePictureInfo->flags.applyFilmGrain = pin->std_info.flags.apply_grain; + + } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + + VkParserVp9PictureData* pin = &pd->CodecSpecific.vp9; + + vp9 = nvVideoVP9PicParameters(); + StdVideoDecodeVP9PictureInfo* pStdPicInfo = &vp9.stdPictureInfo; + VkVideoDecodeVP9PictureInfoKHR* pVkPicInfo = &vp9.vkPictureInfo; + nvVideoDecodeVP9DpbSlotInfo* pNvDpbSlotInfo = vp9.dpbRefList; + + // Copy std data and link pointers + memcpy(pStdPicInfo, &pin->stdPictureInfo, sizeof(StdVideoDecodeVP9PictureInfo)); + memcpy(&vp9.stdColorConfig, &pin->stdColorConfig, sizeof(StdVideoVP9ColorConfig)); + pStdPicInfo->pColorConfig = &vp9.stdColorConfig; + if (pStdPicInfo->flags.segmentation_enabled == 1) { + memcpy(&vp9.stdSegment, &pin->stdSegmentation, sizeof(StdVideoVP9Segmentation)); + pStdPicInfo->pSegmentation = &vp9.stdSegment; + } + memcpy(&vp9.stdLoopFilter, &pin->stdLoopFilter, sizeof(StdVideoVP9LoopFilter)); + pStdPicInfo->pLoopFilter = &vp9.stdLoopFilter; + + pVkPicInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR; + pVkPicInfo->pStdPictureInfo = pStdPicInfo; + + VkVideoDecodeInfoKHR* pKhrDecodeInfo = &pCurrFrameDecParams->decodeFrameInfo; + pKhrDecodeInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR; + pKhrDecodeInfo->pNext = pVkPicInfo; + + // dpb slots + pCurrFrameDecParams->numGopReferenceSlots = FillDpbVP9State(pd, + pin, + pNvDpbSlotInfo, + pStdPicInfo, + 9, + referenceSlots, + pCurrFrameDecParams->pGopReferenceImagesIndexes, + &setupReferenceSlot.slotIndex); + + if (pCurrFrameDecParams->numGopReferenceSlots) { + assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS); + for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots; + dpbEntryIdx++) { + pCurrFrameDecParams->pictureResources[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR; + pCurrFrameDecParams->pictureResources[dpbEntryIdx].codedExtent = pNvDpbSlotInfo[dpbEntryIdx].codedExtent; + referenceSlots[dpbEntryIdx].pPictureResource = &pCurrFrameDecParams->pictureResources[dpbEntryIdx]; + } + + pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = referenceSlots; + pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = pCurrFrameDecParams->numGopReferenceSlots; + } else { + pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = nullptr; + pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0; + } + + assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + if (setupReferenceSlot.slotIndex >= 0) { + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth; + pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight; + setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; + + // add the setup slot to the end of referenceSlots + assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS); + referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot; + } + + // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface. + //pDecodePictureInfo->videoFrameType = static_cast(pin->frame_type); + pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan? + + bool isKeyFrame = pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY; + for (size_t i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { + int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[i]]; + if (picIdx < 0) { + pVkPicInfo->referenceNameSlotIndices[i] = -1; + continue; + } + + int8_t dpbSlot = GetPicDpbSlot(picIdx); + assert(dpbSlot >= 0); + pVkPicInfo->referenceNameSlotIndices[i] = dpbSlot; + } + + pVkPicInfo->uncompressedHeaderOffset = pin->uncompressedHeaderOffset; + pVkPicInfo->compressedHeaderOffset = pin->compressedHeaderOffset; + pVkPicInfo->tilesOffset = pin->tilesOffset; + + // Use current frames with and height for display and writing to output + pDecodePictureInfo->displayWidth = pin->FrameWidth; + pDecodePictureInfo->displayHeight = pin->FrameHeight; } - pDecodePictureInfo->displayWidth = m_nvsi.nDisplayWidth; - pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight; + if (m_codecType != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + pDecodePictureInfo->displayWidth = m_nvsi.nDisplayWidth; + pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight; + } bRet = (m_decoderHandler->DecodePictureWithParameters(pCurrFrameDecParams, pDecodePictureInfo) >= 0); @@ -2405,6 +2718,11 @@ VkResult vulkanCreateVideoParser( assert(!"Decoder AV1 Codec version is NOT supported"); return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR; } + } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) { + if (!pStdExtensionVersion || strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME) || (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) { + assert(!"Decoder VP9 Codec version is NOT supported"); + return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR; + } } else { assert(!"Decoder Codec is NOT supported"); return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR; diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp index f8e925b0..2550bd7e 100644 --- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp +++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp @@ -51,18 +51,17 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { NvPerFrameDecodeResources() : m_picDispInfo() , m_frameCompleteFence() - , m_frameCompleteSemaphore() , m_frameConsumerDoneFence() - , m_frameConsumerDoneSemaphore() + , m_frameCompleteTimelineValue() + , m_frameConsumerDoneTimelineValue() , m_imageSpecsIndex() , m_hasFrameCompleteSignalFence(false) , m_hasFrameCompleteSignalSemaphore(false) , m_hasConsummerSignalFence(false) - , m_hasConsummerSignalSemaphore(false) + , m_useConsummerSignalSemaphore(false) , m_inDecodeQueue(false) , m_inDisplayQueue(false) , m_ownedByConsummer(false) - , m_vkDevCtx() , m_imageViewState() { } @@ -75,14 +74,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { VkResult init( const VulkanDeviceContext* vkDevCtx); - void Deinit(); + void Deinit(const VulkanDeviceContext* vkDevCtx); NvPerFrameDecodeResources (const NvPerFrameDecodeResources &srcObj) = delete; NvPerFrameDecodeResources (NvPerFrameDecodeResources &&srcObj) = delete; ~NvPerFrameDecodeResources() { - Deinit(); + Deinit(nullptr); } VkSharedBaseObj& GetImageView(uint8_t imageTypeIdx) { @@ -149,14 +148,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { VkParserDecodePictureInfo m_picDispInfo; VkFence m_frameCompleteFence; - VkSemaphore m_frameCompleteSemaphore; VkFence m_frameConsumerDoneFence; - VkSemaphore m_frameConsumerDoneSemaphore; + uint64_t m_frameCompleteTimelineValue; + uint64_t m_frameConsumerDoneTimelineValue; DecodeFrameBufferIf::ImageSpecsIndex m_imageSpecsIndex; uint32_t m_hasFrameCompleteSignalFence : 1; uint32_t m_hasFrameCompleteSignalSemaphore : 1; uint32_t m_hasConsummerSignalFence : 1; - uint32_t m_hasConsummerSignalSemaphore : 1; + uint32_t m_useConsummerSignalSemaphore : 1; uint32_t m_inDecodeQueue : 1; uint32_t m_inDisplayQueue : 1; uint32_t m_ownedByConsummer : 1; @@ -171,8 +170,8 @@ class NvPerFrameDecodeResources : public vkPicBuffBase { // The filter's pool node VkSharedBaseObj filterPoolNode; + private: - const VulkanDeviceContext* m_vkDevCtx; std::array m_imageViewState; }; @@ -180,7 +179,10 @@ class NvPerFrameDecodeImageSet { public: NvPerFrameDecodeImageSet() - : m_queueFamilyIndex((uint32_t)-1) + : m_vkDevCtx() + , m_queueFamilyIndex((uint32_t)-1) + , m_frameCompleteSemaphore() + , m_consumerCompleteSemaphore() , m_numImages(0) , m_maxNumImageTypeIdx(0) , m_perFrameDecodeResources(VulkanVideoFrameBuffer::maxImages) @@ -195,11 +197,12 @@ class NvPerFrameDecodeImageSet { const std::array& imageSpecs, uint32_t queueFamilyIndex); - void Deinit(); + void Deinit(const VulkanDeviceContext* vkDevCtx); ~NvPerFrameDecodeImageSet() { - Deinit(); + Deinit(m_vkDevCtx); + m_vkDevCtx = nullptr; } NvPerFrameDecodeResources& operator[](unsigned int index) @@ -258,8 +261,13 @@ class NvPerFrameDecodeImageSet { } private: + const VulkanDeviceContext* m_vkDevCtx; uint32_t m_queueFamilyIndex; VkVideoCoreProfile m_videoProfile; +public: + VkSemaphore m_frameCompleteSemaphore; + VkSemaphore m_consumerCompleteSemaphore; +private: uint32_t m_numImages; uint32_t m_maxNumImageTypeIdx; std::vector m_perFrameDecodeResources; @@ -372,7 +380,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { m_ownedByDisplayMask = 0; m_frameNumInDisplayOrder = 0; - m_perFrameDecodeImageSet.Deinit(); + m_perFrameDecodeImageSet.Deinit(m_vkDevCtx); if (m_queryPool != VkQueryPool()) { m_vkDevCtx->DestroyQueryPool(*m_vkDevCtx, m_queryPool, NULL); @@ -417,10 +425,9 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if ((pFrameSynchronizationInfo->syncOnFrameConsumerDoneFence == 1) && - ((m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore == 0) || - (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore == VK_NULL_HANDLE)) && - (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) && - (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) { + (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore == 0) && + (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) && + (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) { vk::WaitAndResetFence(m_vkDevCtx, *m_vkDevCtx, m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence, true, "frameConsumerDoneFence"); @@ -456,15 +463,35 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if (pFrameSynchronizationInfo->hasFrameCompleteSignalSemaphore) { - pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet[picId].m_frameCompleteSemaphore; - if (pFrameSynchronizationInfo->frameCompleteSemaphore) { + pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore; + if (pFrameSynchronizationInfo->frameCompleteSemaphore != VK_NULL_HANDLE) { + + pFrameSynchronizationInfo->decodeCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DECODE, + m_perFrameDecodeImageSet[picId].m_decodeOrder); + + if (pFrameSynchronizationInfo->hasFilterSignalSemaphore) { + pFrameSynchronizationInfo->filterCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_FILTER, + m_perFrameDecodeImageSet[picId].m_decodeOrder); + + m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->filterCompleteTimelineValue; + + } else { + + m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->decodeCompleteTimelineValue; + + } + m_perFrameDecodeImageSet[picId].m_hasFrameCompleteSignalSemaphore = true; } } - if (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore) { - pFrameSynchronizationInfo->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore; - m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = false; + if (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore) { + pFrameSynchronizationInfo->hasFrameConsumerSignalSemaphore = true; + pFrameSynchronizationInfo->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore; + pFrameSynchronizationInfo->frameConsumerDoneTimelineValue = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue; + m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = false; } pFrameSynchronizationInfo->queryPool = m_queryPool; @@ -529,14 +556,20 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { } if (m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore) { - pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteSemaphore; + pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore; + pDecodedFrame->frameCompleteDoneSemValue = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteTimelineValue; m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore = false; + + pDecodedFrame->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore; + pDecodedFrame->frameConsumerDoneSemValue = DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY, + m_perFrameDecodeImageSet[pictureIndex].m_displayOrder); + } else { - pDecodedFrame->frameCompleteSemaphore = VkSemaphore(); + pDecodedFrame->frameCompleteSemaphore = VK_NULL_HANDLE; } pDecodedFrame->frameConsumerDoneFence = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneFence; - pDecodedFrame->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneSemaphore; pDecodedFrame->timestamp = m_perFrameDecodeImageSet[pictureIndex].m_timestamp; pDecodedFrame->decodeOrder = m_perFrameDecodeImageSet[pictureIndex].m_decodeOrder; @@ -572,7 +605,13 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { m_perFrameDecodeImageSet[picId].Release(); m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence = pDecodedFrameRelease->hasConsummerSignalFence; - m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore; + m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore; + if (pDecodedFrameRelease->hasConsummerSignalSemaphore) { + m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue = + DecodeFrameBufferIf::GetSemaphoreValue( + DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY, + pDecodedFrameRelease->displayOrder); + } } return 0; } @@ -648,7 +687,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer { std::lock_guard lock(m_displayQueueMutex); for (unsigned int resId = 0; resId < numResources; resId++) { if ((uint32_t)indexes[resId] < m_perFrameDecodeImageSet.size()) { - m_perFrameDecodeImageSet[indexes[resId]].Deinit(); + m_perFrameDecodeImageSet[indexes[resId]].Deinit(m_vkDevCtx); } } return (int32_t)m_perFrameDecodeImageSet.size(); @@ -785,8 +824,6 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe } if (!ImageExist(pImageSpec->imageTypeIdx) || m_imageViewState[pImageSpec->imageTypeIdx].recreateImage) { - assert(m_vkDevCtx != nullptr); - m_imageViewState[pImageSpec->imageTypeIdx].currentLayerLayout = pImageSpec->createInfo.initialLayout; VkSharedBaseObj imageResource; @@ -839,21 +876,13 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx) { - m_vkDevCtx = vkDevCtx; - // The fence waited on for the first frame should be signaled. const VkFenceCreateInfo fenceFrameCompleteInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT }; - VkResult result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence); + VkResult result = vkDevCtx->CreateFence(*vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence); const VkFenceCreateInfo fenceInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr }; - result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence); - assert(result == VK_SUCCESS); - - const VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr }; - result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore); - assert(result == VK_SUCCESS); - result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameConsumerDoneSemaphore); + result = vkDevCtx->CreateFence(*vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence); assert(result == VK_SUCCESS); Reset(); @@ -861,49 +890,35 @@ VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx) return result; } -void NvPerFrameDecodeResources::Deinit() +void NvPerFrameDecodeResources::Deinit(const VulkanDeviceContext* vkDevCtx) { bitstreamData = nullptr; stdPps = nullptr; stdSps = nullptr; stdVps = nullptr; - if (m_vkDevCtx == nullptr) { + if (vkDevCtx == nullptr) { assert ((m_frameCompleteFence == VK_NULL_HANDLE) && - (m_frameConsumerDoneFence == VK_NULL_HANDLE) && - (m_frameCompleteSemaphore == VK_NULL_HANDLE) && - (m_frameConsumerDoneSemaphore == VK_NULL_HANDLE)); + (m_frameConsumerDoneFence == VK_NULL_HANDLE)); return; } if (m_frameCompleteFence != VkFence()) { - m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameCompleteFence, nullptr); + vkDevCtx->DestroyFence(*vkDevCtx, m_frameCompleteFence, nullptr); m_frameCompleteFence = VkFence(); } if (m_frameConsumerDoneFence != VkFence()) { - m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameConsumerDoneFence, nullptr); + vkDevCtx->DestroyFence(*vkDevCtx, m_frameConsumerDoneFence, nullptr); m_frameConsumerDoneFence = VkFence(); } - if (m_frameCompleteSemaphore != VkSemaphore()) { - m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameCompleteSemaphore, nullptr); - m_frameCompleteSemaphore = VkSemaphore(); - } - - if (m_frameConsumerDoneSemaphore != VkSemaphore()) { - m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameConsumerDoneSemaphore, nullptr); - m_frameConsumerDoneSemaphore = VkSemaphore(); - } - for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) { m_imageViewState[imageTypeIdx].view = nullptr; m_imageViewState[imageTypeIdx].singleLevelView = nullptr; } - m_vkDevCtx = nullptr; - Reset(); } @@ -919,6 +934,8 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, return -1; } + m_vkDevCtx = vkDevCtx; + for (uint32_t imageIndex = m_numImages; imageIndex < numImages; imageIndex++) { VkResult result = m_perFrameDecodeResources[imageIndex].init(vkDevCtx); assert(result == VK_SUCCESS); @@ -927,6 +944,20 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, } } + // Create timeline semaphores instead of binary semaphores + VkSemaphoreTypeCreateInfo timelineCreateInfo = {}; + timelineCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO; + timelineCreateInfo.pNext = nullptr; + timelineCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + timelineCreateInfo.initialValue = 0ULL; + + VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo }; + VkResult result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore); + assert(result == VK_SUCCESS); + + result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_consumerCompleteSemaphore); + assert(result == VK_SUCCESS); + m_videoProfile.InitFromProfile(pDecodeProfile); m_queueFamilyIndex = queueFamilyIndex; @@ -1048,10 +1079,21 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, return (int32_t)numImages; } -void NvPerFrameDecodeImageSet::Deinit() +void NvPerFrameDecodeImageSet::Deinit(const VulkanDeviceContext* vkDevCtx) { + + if (m_frameCompleteSemaphore != VK_NULL_HANDLE) { + m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_frameCompleteSemaphore, nullptr); + m_frameCompleteSemaphore = VK_NULL_HANDLE; + } + + if (m_consumerCompleteSemaphore != VK_NULL_HANDLE) { + m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_consumerCompleteSemaphore, nullptr); + m_consumerCompleteSemaphore = VK_NULL_HANDLE; + } + for (size_t ndx = 0; ndx < m_numImages; ndx++) { - m_perFrameDecodeResources[ndx].Deinit(); + m_perFrameDecodeResources[ndx].Deinit(vkDevCtx); } for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) { diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h index 863d3a4f..e622bb7f 100644 --- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h +++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h @@ -66,14 +66,20 @@ class VulkanVideoFrameBuffer : public IVulkanVideoFrameBufferParserCb { struct FrameSynchronizationInfo { VkFence frameCompleteFence; VkSemaphore frameCompleteSemaphore; + VkSemaphore consumerCompleteSemaphore; VkFence frameConsumerDoneFence; - VkSemaphore frameConsumerDoneSemaphore; + uint64_t frameConsumerDoneTimelineValue; + uint64_t decodeCompleteTimelineValue; + uint64_t filterCompleteTimelineValue; VkQueryPool queryPool; uint32_t startQueryId; uint32_t numQueries; DecodeFrameBufferIf::ImageSpecsIndex imageSpecsIndex; uint32_t hasFrameCompleteSignalFence : 1; + uint32_t hasFrameConsumerSignalSemaphore : 1; uint32_t hasFrameCompleteSignalSemaphore : 1; + // post processing filter + uint32_t hasFilterSignalSemaphore : 1; uint32_t syncOnFrameCompleteFence : 1; uint32_t syncOnFrameConsumerDoneFence : 1; }; diff --git a/vk_video_decoder/src/vulkan_video_decoder.cpp b/vk_video_decoder/src/vulkan_video_decoder.cpp index e26115cd..a0018a46 100644 --- a/vk_video_decoder/src/vulkan_video_decoder.cpp +++ b/vk_video_decoder/src/vulkan_video_decoder.cpp @@ -155,16 +155,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR; - VkQueueFlags requestVideoEncodeQueueMask = 0; - if (m_decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - } - if (m_decoderConfig.selectVideoWithComputeQueue) { requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (m_decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -172,16 +164,7 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT; } - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs | - (m_decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR); + VkVideoCodecOperationFlagsKHR videoCodecOperation = videoStreamDemuxer->GetVideoCodec(); const bool supportsShellPresent = ((!m_decoderConfig.noPresent == false) && (pWsiDisplay != nullptr)); const bool createGraphicsQueue = supportsShellPresent ? true : false; @@ -196,17 +179,12 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, ( VK_QUEUE_TRANSFER_BIT | requestGraphicsQueueMask | requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoDecodeQueueMask), pWsiDisplay, requestVideoDecodeQueueMask, - ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), - requestVideoEncodeQueueMask, - ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR), + videoCodecOperation, + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, vkPhysicalDevice); if (result != VK_SUCCESS) { @@ -216,8 +194,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance, } m_vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - m_decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues - videoCodecs, + 0, // num encode queues + videoCodecOperation, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. @@ -264,6 +242,7 @@ VkResult CreateVulkanVideoDecoder(VkInstance vkInstance, VkPhysicalDevice vkPhys case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: { } diff --git a/vk_video_decoder/test/vulkan-video-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-dec/Main.cpp index 2c5d4d0a..5a02d1b3 100644 --- a/vk_video_decoder/test/vulkan-video-dec/Main.cpp +++ b/vk_video_decoder/test/vulkan-video-dec/Main.cpp @@ -60,6 +60,20 @@ int main(int argc, const char** argv) return -1; } + + VkSharedBaseObj videoStreamDemuxer; + result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), + decoderConfig.forceParserType, + decoderConfig.enableStreamDemuxing, + decoderConfig.initialWidth, + decoderConfig.initialHeight, + decoderConfig.initialBitdepth, + videoStreamDemuxer); + if (result != VK_SUCCESS) { + assert(!"Can't initialize the VideoStreamDemuxer!"); + return result; + } + const int32_t numDecodeQueues = ((decoderConfig.queueId != 0) || (decoderConfig.enableHwLoadBalancing != 0)) ? -1 : // all available HW decoders @@ -67,16 +81,8 @@ int main(int argc, const char** argv) VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR; - VkQueueFlags requestVideoEncodeQueueMask = 0; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - } - if (decoderConfig.selectVideoWithComputeQueue) { requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (decoderConfig.enableVideoEncoder) { - requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -84,16 +90,9 @@ int main(int argc, const char** argv) requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT; } - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs | - (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR); + VkVideoCodecOperationFlagsKHR videoCodec = decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR ? + decoderConfig.forceParserType : + videoStreamDemuxer->GetVideoCodec(); if (!decoderConfig.noPresent) { @@ -111,17 +110,12 @@ int main(int argc, const char** argv) result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoDecodeQueueMask), displayShell, requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), - requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + videoCodec, + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); return -1; @@ -130,27 +124,14 @@ int main(int argc, const char** argv) vkDevCtxt.GetPresentQueueFamilyIdx())); vkDevCtxt.CreateVulkanDevice(numDecodeQueues, - decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues - videoCodecs, + 0, // num encode queues + videoCodec, false, // createTransferQueue true, // createGraphicsQueue true, // createDisplayQueue requestVideoComputeQueueMask != 0 // createComputeQueue ); - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } VkSharedBaseObj frameToFile; if (!decoderConfig.outputFileName.empty()) { @@ -194,8 +175,7 @@ int main(int argc, const char** argv) result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(), (VK_QUEUE_TRANSFER_BIT | requestVideoDecodeQueueMask | - requestVideoComputeQueueMask | - requestVideoEncodeQueueMask), + requestVideoComputeQueueMask), nullptr, requestVideoDecodeQueueMask); if (result != VK_SUCCESS) { @@ -205,7 +185,7 @@ int main(int argc, const char** argv) result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues, 0, // num encode queues - videoCodecs, + videoCodec, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. @@ -219,20 +199,6 @@ int main(int argc, const char** argv) return -1; } - VkSharedBaseObj videoStreamDemuxer; - result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(), - decoderConfig.forceParserType, - decoderConfig.enableStreamDemuxing, - decoderConfig.initialWidth, - decoderConfig.initialHeight, - decoderConfig.initialBitdepth, - videoStreamDemuxer); - - if (result != VK_SUCCESS) { - assert(!"Can't initialize the VideoStreamDemuxer!"); - return result; - } - VkSharedBaseObj frameToFile; if (!decoderConfig.outputFileName.empty()) { const char* crcOutputFile = decoderConfig.outputcrcPerFrame ? decoderConfig.crcOutputFileName.c_str() : nullptr; diff --git a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp index 8b4e845b..56bba249 100644 --- a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp +++ b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp @@ -114,6 +114,8 @@ int main(int argc, const char** argv) break; case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: break; + case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR: + break; default: std::cout << "Simple decoder does not support demuxing " << "and the decoder type must be set with --codec " diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp index 37b046a6..1a589fe0 100644 --- a/vk_video_encoder/demos/vk-video-enc/Main.cpp +++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp @@ -52,7 +52,7 @@ int main(int argc, char** argv) VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, - VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; @@ -70,6 +70,7 @@ int main(int argc, char** argv) VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, nullptr }; @@ -122,17 +123,9 @@ int main(int argc, char** argv) VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - VkQueueFlags requestVideoDecodeQueueMask = 0; - if (encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR | - VK_QUEUE_TRANSFER_BIT; - } if (encoderConfig->selectVideoWithComputeQueue) { requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -157,19 +150,6 @@ int main(int argc, char** argv) return -1; } - - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs | - encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR; - - VkSharedBaseObj encoder; // the encoder's instance if (supportsDisplay && encoderConfig->enableFramePresent) { @@ -185,14 +165,11 @@ int main(int argc, char** argv) result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(), (VK_QUEUE_GRAPHICS_BIT | - requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | - requestVideoEncodeQueueMask), + requestVideoComputeQueueMask | + requestVideoEncodeQueueMask), displayShell, - requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR), + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, requestVideoEncodeQueueMask, (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | @@ -205,9 +182,9 @@ int main(int argc, char** argv) assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(), vkDevCtxt.GetPresentQueueFamilyIdx())); - result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues + result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues numEncodeQueues, // num encode queues - videoCodecs, + encoderConfig->codec, false, // createTransferQueue true, // createGraphicsQueue true, // createDisplayQueue @@ -240,26 +217,22 @@ int main(int argc, char** argv) // No display presentation and no decoder - just the encoder result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(), (requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | requestVideoEncodeQueueMask | VK_QUEUE_TRANSFER_BIT), nullptr, - requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR), + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + encoderConfig->codec); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); return -1; } - result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues + result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues numEncodeQueues, // num encode queues - videoCodecs, + encoderConfig->codec, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations. diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h index c7de5367..e826064a 100644 --- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h +++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h @@ -762,7 +762,6 @@ struct EncoderConfig : public VkVideoRefCountBase { uint32_t verboseMsg : 1; uint32_t enableFramePresent : 1; uint32_t enableFrameDirectModePresent : 1; - uint32_t enableVideoDecoder : 1; uint32_t enableHwLoadBalancing : 1; uint32_t selectVideoWithComputeQueue : 1; uint32_t enablePreprocessComputeFilter : 1; @@ -857,7 +856,6 @@ struct EncoderConfig : public VkVideoRefCountBase { , verboseMsg(false) , enableFramePresent(false) , enableFrameDirectModePresent(false) - , enableVideoDecoder(false) , enableHwLoadBalancing(false) , selectVideoWithComputeQueue(false) , enablePreprocessComputeFilter(true) diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp index d79ed014..e4f71cd8 100644 --- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp +++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp @@ -441,22 +441,36 @@ VkResult VkVideoEncoder::SubmitStagedQpMap(VkSharedBaseObjqpMapCmdBuffer->GetCommandBuffer(); VkSemaphore frameCompleteSemaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore(); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.waitSemaphoreCount = 0; - submitInfo.pWaitSemaphores = nullptr; - submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = pCmdBuf; - submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr; - submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR }; + cmdBufferInfo.commandBuffer = *pCmdBuf; + cmdBufferInfo.deviceMask = 0; + + VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + signalSemaphoreInfo.semaphore = frameCompleteSemaphore; + signalSemaphoreInfo.value = 0; // Binary semaphore + signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete + signalSemaphoreInfo.deviceIndex = 0; + + VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = 0; + submitInfo.pWaitSemaphoreInfos = nullptr; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfo; + submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr; VkFence queueCompleteFence = encodeFrameInfo->qpMapCmdBuffer->GetFence(); assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence)); + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(((m_vkDevCtx->GetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ? - VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER, - 0, 1, &submitInfo, - queueCompleteFence); + VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER, + 0, // queueIndex + 1, // submitCount + &submitInfo, queueCompleteFence, + "Encode Staging QpMap", + m_encodeEncodeFrameNum, + m_encodeInputFrameNum); encodeFrameInfo->qpMapCmdBuffer->SetCommandBufferSubmitted(); bool syncCpuAfterStaging = false; @@ -475,15 +489,24 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObjinputCmdBuffer->GetCommandBuffer(); VkSemaphore frameCompleteSemaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore(); - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.waitSemaphoreCount = 0; - submitInfo.pWaitSemaphores = nullptr; - submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = pCmdBuf; - submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr; - submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR }; + cmdBufferInfo.commandBuffer = *pCmdBuf; + cmdBufferInfo.deviceMask = 0; + + VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + signalSemaphoreInfo.semaphore = frameCompleteSemaphore; + signalSemaphoreInfo.value = 0; // Binary semaphore + signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete + signalSemaphoreInfo.deviceIndex = 0; + + VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = 0; + submitInfo.pWaitSemaphoreInfos = nullptr; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfo; + submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr; VkFence queueCompleteFence = encodeFrameInfo->inputCmdBuffer->GetFence(); assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence)); @@ -491,9 +514,15 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObjGetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ? VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER); + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(submitType, - 0, 1, &submitInfo, - queueCompleteFence); + 0, // queueIndex + 1, // submitCount + &submitInfo, + queueCompleteFence, + "Encode Staging Input", + m_encodeEncodeFrameNum, + m_encodeInputFrameNum); encodeFrameInfo->inputCmdBuffer->SetCommandBufferSubmitted(); bool syncCpuAfterStaging = false; @@ -1306,12 +1335,12 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer copyRegion[0].dstSubresource.layerCount = 1; copyRegion[1].extent.width = copyRegion[0].extent.width; if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) { - copyRegion[1].extent.width /= 2; + copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2; } copyRegion[1].extent.height = copyRegion[0].extent.height; if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) { - copyRegion[1].extent.height /= 2; + copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2; } copyRegion[1].extent.depth = 1; @@ -1602,38 +1631,74 @@ VkResult VkVideoEncoder::SubmitVideoCodingCmds(VkSharedBaseObjencodeCmdBuffer != nullptr); + const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer(); + // The encode operation complete semaphore is not needed at this point. + VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore(); + + // Create command buffer submit info + VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR }; + cmdBufferInfo.commandBuffer = *pCmdBuf; + cmdBufferInfo.deviceMask = 0; + + + + // Create wait semaphore submit infos // If we are processing the input staging, wait for it's semaphore // to be done before processing the input frame with the encoder. - VkSemaphore inputWaitSemaphore[2] = { VK_NULL_HANDLE }; + VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[2]{}; uint32_t waitSemaphoreCount = 0; if (encodeFrameInfo->inputCmdBuffer) { - inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->inputCmdBuffer->GetSemaphore(); + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore(); + waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore + // Use transfer bit since these semaphores come from transfer operations + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; + waitSemaphoreCount++; } if (encodeFrameInfo->qpMapCmdBuffer) { - inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore(); - } - - const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer(); - // The encode operation complete semaphore is not needed at this point. - VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore(); - - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr }; - const VkPipelineStageFlags videoEncodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - submitInfo.pWaitSemaphores = (waitSemaphoreCount > 0) ? inputWaitSemaphore : nullptr; - submitInfo.waitSemaphoreCount = waitSemaphoreCount; - submitInfo.pWaitDstStageMask = &videoEncodeSubmitWaitStages; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = pCmdBuf; - submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr; - submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore(); + waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore + // Use transfer bit since these semaphores come from transfer operations + waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0; + waitSemaphoreCount++; + } + + // Create signal semaphore submit info if needed + VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + if (frameCompleteSemaphore != VK_NULL_HANDLE) { + signalSemaphoreInfo.semaphore = frameCompleteSemaphore; + signalSemaphoreInfo.value = 0; // Binary semaphore + signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR; + signalSemaphoreInfo.deviceIndex = 0; + } + + // Create submit info + VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr }; + submitInfo.flags = 0; + submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount; + submitInfo.pWaitSemaphoreInfos = (waitSemaphoreCount > 0) ? waitSemaphoreInfos : nullptr; + submitInfo.commandBufferInfoCount = 1; + submitInfo.pCommandBufferInfos = &cmdBufferInfo; + submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0; + submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr; VkFence queueCompleteFence = encodeFrameInfo->encodeCmdBuffer->GetFence(); assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence)); - VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE, 0, - 1, &submitInfo, - queueCompleteFence); + + VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE, + 0, // queueIndex + 1, // submitCount + &submitInfo, + queueCompleteFence, + "Video Encode", + m_encodeEncodeFrameNum, + m_encodeInputFrameNum); encodeFrameInfo->encodeCmdBuffer->SetCommandBufferSubmitted(); bool syncCpuAfterEncoding = false; diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp index 18831f2a..803d9da6 100644 --- a/vk_video_encoder/src/vulkan_video_encoder.cpp +++ b/vk_video_encoder/src/vulkan_video_encoder.cpp @@ -106,6 +106,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, nullptr }; @@ -114,6 +115,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, nullptr }; @@ -140,17 +142,8 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR; - VkQueueFlags requestVideoDecodeQueueMask = 0; - if (m_encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR | - VK_QUEUE_TRANSFER_BIT; - } - if (m_encoderConfig->selectVideoWithComputeQueue) { requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - if (m_encoderConfig->enableVideoDecoder) { - requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT; - } } VkQueueFlags requestVideoComputeQueueMask = 0; @@ -161,17 +154,13 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid // No display presentation and no decoder - just the encoder result = m_vkDevCtxt.InitPhysicalDevice(m_encoderConfig->deviceId, m_encoderConfig->GetDeviceUUID(), ( requestVideoComputeQueueMask | - requestVideoDecodeQueueMask | requestVideoEncodeQueueMask | VK_QUEUE_TRANSFER_BIT), nullptr, - requestVideoDecodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR), + 0, + VK_VIDEO_CODEC_OPERATION_NONE_KHR, requestVideoEncodeQueueMask, - (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR)); + videoCodecOperation); if (result != VK_SUCCESS) { assert(!"Can't initialize the Vulkan physical device!"); @@ -183,21 +172,9 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid -1 : // all available HW encoders 1; // only one HW encoder instance - VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR | - VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); - - VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs | - (m_encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (uint32_t)VK_VIDEO_CODEC_OPERATION_NONE_KHR); - - - result = m_vkDevCtxt.CreateVulkanDevice(m_encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues + result = m_vkDevCtxt.CreateVulkanDevice(0, // num decode queues numEncodeQueues, // num encode queues - videoCodecs, + videoCodecOperation, // If no graphics or compute queue is requested, only video queues // will be created. Not all implementations support transfer on video queues, // so request a separate transfer queue for such implementations.