diff --git a/common/include/VkVideoCore/DecodeFrameBufferIf.h b/common/include/VkVideoCore/DecodeFrameBufferIf.h
index 60393c38..fc99a4f5 100644
--- a/common/include/VkVideoCore/DecodeFrameBufferIf.h
+++ b/common/include/VkVideoCore/DecodeFrameBufferIf.h
@@ -107,6 +107,16 @@ class DecodeFrameBufferIf
         }
     };
 
+    enum SemSyncTypeIdx : uint64_t  {  SEM_SYNC_TYPE_IDX_DECODE      =  (1ULL << 0), // Decode operation was signaled
+                                       SEM_SYNC_TYPE_IDX_DISPLAY     =  (1ULL << 0), // Display operation was signaled
+                                       SEM_SYNC_TYPE_IDX_FILTER      =  (1ULL << 1), // Filter operation was signaled
+                                       SEM_SYNC_TYPE_IDX_SHIFT  = 2,                 // Shift semaphore counter value left
+                                    };
+
+    static uint64_t GetSemaphoreValue(SemSyncTypeIdx semSyncType, uint64_t semOrder) {
+        return (semOrder << SEM_SYNC_TYPE_IDX_SHIFT) | semSyncType;
+    }
+
 };
 
 #endif /* _VKVIDEOCORE_DECODEFRAMEBUFFERIF_H_ */
diff --git a/common/include/VkVideoCore/VkVideoCoreProfile.h b/common/include/VkVideoCore/VkVideoCoreProfile.h
index 7483d8ce..55ea56e7 100644
--- a/common/include/VkVideoCore/VkVideoCoreProfile.h
+++ b/common/include/VkVideoCore/VkVideoCoreProfile.h
@@ -50,7 +50,8 @@ class VkVideoCoreProfile
     {
         return  (videoCodecOperations & (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
                                          VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                         VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR |
+                                         VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR  |
+                                         VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR  |
                                          VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
                                          VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
                                          VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
@@ -100,12 +101,26 @@ class VkVideoCoreProfile
                 m_av1DecodeProfile = *pProfileExt;
             } else {
                 //  Use default ext profile parameters
-                m_av1DecodeProfile.sType      = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
+                m_av1DecodeProfile.sType      = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR;
                 m_av1DecodeProfile.stdProfile = STD_VIDEO_AV1_PROFILE_MAIN;
             }
             m_profile.pNext = &m_av1DecodeProfile;
             m_av1DecodeProfile.pNext = NULL;
-
+        } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            VkVideoDecodeVP9ProfileInfoKHR const * pProfileExt = (VkVideoDecodeVP9ProfileInfoKHR const *)pVideoProfileExt;
+            if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR)) {
+                m_profile.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+                return false;
+            }
+            if (pProfileExt) {
+                m_vp9DecodeProfile = *pProfileExt;
+            } else {
+                //  Use default ext profile parameters
+                m_vp9DecodeProfile.sType      = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR;
+                m_vp9DecodeProfile.stdProfile = STD_VIDEO_VP9_PROFILE_0;
+            }
+            m_profile.pNext = &m_vp9DecodeProfile;
+            m_vp9DecodeProfile.pNext = NULL;
         } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) {
             VkVideoEncodeH264ProfileInfoKHR const * pProfileExt = (VkVideoEncodeH264ProfileInfoKHR const *)pVideoProfileExt;
             if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR)) {
@@ -205,6 +220,7 @@ class VkVideoCoreProfile
         VkVideoDecodeH264ProfileInfoKHR decodeH264ProfilesRequest;
         VkVideoDecodeH265ProfileInfoKHR decodeH265ProfilesRequest;
         VkVideoDecodeAV1ProfileInfoKHR  decodeAV1ProfilesRequest;
+        VkVideoDecodeVP9ProfileInfoKHR  decodeVP9ProfilesRequest;
         VkVideoEncodeH264ProfileInfoKHR encodeH264ProfilesRequest;
         VkVideoEncodeH265ProfileInfoKHR encodeH265ProfilesRequest;
         VkVideoEncodeAV1ProfileInfoKHR encodeAV1ProfilesRequest;
@@ -243,6 +259,13 @@ class VkVideoCoreProfile
                                                        STD_VIDEO_H265_PROFILE_IDC_INVALID :
                                                        (StdVideoH265ProfileIdc)videoH26xProfileIdc;
             pVideoProfileExt = (VkBaseInStructure*)&decodeH265ProfilesRequest;
+        } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            decodeVP9ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR;
+            decodeVP9ProfilesRequest.pNext = NULL;
+            decodeVP9ProfilesRequest.stdProfile = (videoH26xProfileIdc == 0) ?
+                                                    STD_VIDEO_VP9_PROFILE_0 :
+                                                    (StdVideoVP9Profile)videoH26xProfileIdc;
+            pVideoProfileExt = (VkBaseInStructure*)&decodeVP9ProfilesRequest;
         } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) {
             encodeH264ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR;
             encodeH264ProfilesRequest.pNext = pEncodeUsageInfo;
@@ -287,7 +310,9 @@ class VkVideoCoreProfile
     bool IsDecodeCodecType() const
     {
         return ((m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) ||
-                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR));
+                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) ||
+                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR)  ||
+                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR));
     }
 
     operator bool() const
@@ -340,6 +365,15 @@ class VkVideoCoreProfile
         }
     }
 
+    const VkVideoDecodeVP9ProfileInfoKHR* GetDecodeVP9Profile() const
+    {
+        if (m_vp9DecodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR) {
+            return &m_vp9DecodeProfile;
+        } else {
+            return NULL;
+        }
+    }
+
     const VkVideoEncodeH264ProfileInfoKHR* GetEncodeH264Profile() const
     {
         if (m_h264EncodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR) {
@@ -605,6 +639,8 @@ class VkVideoCoreProfile
             return "decode h.265";
         case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
             return "decode av1";
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+            return "decode vp9";
         case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
             return "encode h.264";
         case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
@@ -769,6 +805,7 @@ class VkVideoCoreProfile
         VkVideoDecodeH264ProfileInfoKHR m_h264DecodeProfile;
         VkVideoDecodeH265ProfileInfoKHR m_h265DecodeProfile;
         VkVideoDecodeAV1ProfileInfoKHR  m_av1DecodeProfile;
+        VkVideoDecodeVP9ProfileInfoKHR  m_vp9DecodeProfile;
         VkVideoEncodeH264ProfileInfoKHR m_h264EncodeProfile;
         VkVideoEncodeH265ProfileInfoKHR m_h265EncodeProfile;
         VkVideoEncodeAV1ProfileInfoKHR m_av1EncodeProfile;
diff --git a/common/include/VkVideoCore/VulkanVideoCapabilities.h b/common/include/VkVideoCore/VulkanVideoCapabilities.h
index b703298b..a2cc4af9 100644
--- a/common/include/VkVideoCore/VulkanVideoCapabilities.h
+++ b/common/include/VkVideoCore/VulkanVideoCapabilities.h
@@ -38,6 +38,7 @@ class VulkanVideoCapabilities
         VkVideoDecodeH264CapabilitiesKHR h264Capabilities    = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, nullptr };
         VkVideoDecodeH265CapabilitiesKHR h265Capabilities    = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, nullptr };
         VkVideoDecodeAV1CapabilitiesKHR  av1Capabilities     = { VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR,  nullptr };
+        VkVideoDecodeVP9CapabilitiesKHR  vp9Capabilities     = { VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR,  nullptr };
 
         if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
             videoDecodeCapabilities.pNext = &h264Capabilities;
@@ -45,6 +46,8 @@ class VulkanVideoCapabilities
             videoDecodeCapabilities.pNext = &h265Capabilities;
         } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
             videoDecodeCapabilities.pNext = &av1Capabilities;
+        } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            videoDecodeCapabilities.pNext = &vp9Capabilities;
         } else {
             assert(!"Unsupported codec");
             return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR;
@@ -197,6 +200,16 @@ class VulkanVideoCapabilities
             }
         }
             break;
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+        {
+            assert(pVideoDecodeCapabilities->pNext);
+            const VkVideoDecodeVP9CapabilitiesKHR* pVP9Capabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext;
+            assert(pVP9Capabilities->sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR);
+            if (pVP9Capabilities->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR) {
+                return VK_ERROR_INITIALIZATION_FAILED;
+            }
+        }
+            break;
         case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
         {
             assert(pVideoEncodeCapabilities->pNext);
@@ -277,6 +290,26 @@ class VulkanVideoCapabilities
                     assert(!"Unsupported h.265 STD version");
                     return VK_ERROR_INCOMPATIBLE_DRIVER;
                 }
+            } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
+                const VkVideoDecodeAV1CapabilitiesKHR* pAV1DecCapabilities = (VkVideoDecodeAV1CapabilitiesKHR*)pVideoDecodeCapabilities->pNext;
+                std::cout << "\t\t\t" << "maxLevelIdc: " << pAV1DecCapabilities->maxLevel << std::endl;
+                if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName,
+                        VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME,
+                            sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) ||
+                    (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION)) {
+                    assert(!"Unsupported AV1 STD version");
+                    return VK_ERROR_INCOMPATIBLE_DRIVER;
+                }
+            } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+                const VkVideoDecodeVP9CapabilitiesKHR* pVP9DecCapabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext;
+                std::cout << "\t\t\t" << "maxLevelIdc: " << pVP9DecCapabilities->maxLevel << std::endl;
+                if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName,
+                        VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME,
+                            sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) ||
+                    (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) {
+                    assert(!"Unsupported VP9 STD version");
+                    return VK_ERROR_INCOMPATIBLE_DRIVER;
+                }
             } else {
                 assert(!"Unsupported codec");
             }
@@ -354,8 +387,12 @@ class VulkanVideoCapabilities
                                                             int32_t* pVideoQueueFamily,
             VkQueueFlags queueFlagsRequired = ( VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR),
             VkVideoCodecOperationFlagsKHR videoCodeOperations =
-                                              ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
+                                              ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
+                                                VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
+                                                VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR  |
+                                                VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR  |
+                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
+                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
                                                 VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR))
     {
         std::vector<VkQueueFamilyProperties2> queues;
@@ -429,6 +466,16 @@ class VulkanVideoCapabilities
                                                                &videoDecodeCapabilities);
     }
 
+    static VkResult GetDecodeVP9Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t,
+                                             const VkVideoProfileInfoKHR& videoProfile,
+                                             VkVideoCapabilitiesKHR &videoDecodeCapabilities)
+    {
+        videoDecodeCapabilities.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
+        return vkDevCtx->GetPhysicalDeviceVideoCapabilitiesKHR(vkDevCtx->getPhysicalDevice(),
+                                                               &videoProfile,
+                                                               &videoDecodeCapabilities);
+    }
+
     static VkResult GetEncodeH264Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t,
                                               const VkVideoProfileInfoKHR& videoProfile,
                                               VkVideoCapabilitiesKHR &videoEncodeCapabilities,
diff --git a/common/libs/VkCodecUtils/DecoderConfig.h b/common/libs/VkCodecUtils/DecoderConfig.h
index 4d06a1d5..b0f14a59 100644
--- a/common/libs/VkCodecUtils/DecoderConfig.h
+++ b/common/libs/VkCodecUtils/DecoderConfig.h
@@ -75,7 +75,6 @@ struct DecoderConfig {
         directMode = false;
         enableHwLoadBalancing = false;
         selectVideoWithComputeQueue = false;
-        enableVideoEncoder = false;
         outputy4m = false;
         outputcrcPerFrame = false;
         outputcrc = false;
@@ -137,6 +136,9 @@ struct DecoderConfig {
                     } else if (strcmp(args[0], "av1") == 0) {
                         forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
                         return true;
+                    } else if ((strcmp(args[0], "vp9") == 0)) {
+                        forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
+                        return true;
                     } else {
                         std::cerr << "Invalid codec \"" << args[0] << "\"" << std::endl;
                         return false;
@@ -470,7 +472,6 @@ struct DecoderConfig {
     uint32_t noPresent : 1;
     uint32_t enableHwLoadBalancing : 1;
     uint32_t selectVideoWithComputeQueue : 1;
-    uint32_t enableVideoEncoder : 1;
     uint32_t outputy4m : 1;
     uint32_t outputcrc : 1;
     uint32_t outputcrcPerFrame : 1;
diff --git a/common/libs/VkCodecUtils/Helpers.h b/common/libs/VkCodecUtils/Helpers.h
index 4218d36d..e4c70abb 100644
--- a/common/libs/VkCodecUtils/Helpers.h
+++ b/common/libs/VkCodecUtils/Helpers.h
@@ -238,23 +238,21 @@ inline VkResult WaitAndResetFence(const VkInterfaceFunctions* vkIf, VkDevice dev
 
     while (fenceTotalWaitTimeout >= fenceCurrentWaitTimeout) {
 
-        result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout);
-        if (result != VK_SUCCESS) {
-            fprintf(stderr, "\t **** WARNING: fence  %s(%llu) is not done after %llu nSec with result 0x%x ****\n",
-                            fenceName, (long long unsigned int)fence, (long long unsigned int)fenceWaitTimeout, result);
-            assert(!"Fence is not signaled yet after more than 100 mSec wait");
-        }
+        fenceCurrentWaitTimeout += fenceWaitTimeout;
 
-        if (result != VK_TIMEOUT) {
-            break;
+        result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout);
+        if (result == VK_TIMEOUT) {
+            fprintf(stderr, "\t **** WARNING: fence  %s(%llu) is not done after %llu mSec with result 0x%x ****\n",
+                            fenceName, (long long unsigned int)fence, (long long unsigned int)fenceCurrentWaitTimeout/(1000ULL * 1000ULL), result);
+        } else {
+            break; // either success or an error occured
         }
 
-        fenceCurrentWaitTimeout += fenceWaitTimeout;
     }
 
     if (result != VK_SUCCESS) {
-        fprintf(stderr, "\t **** ERROR: fence  %s(%llu) is not done after %llu nSec with result 0x%x ****\n",
-                        fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout, vkIf->GetFenceStatus(device, fence));
+        fprintf(stderr, "\t **** ERROR: fence  %s(%llu) is not done after %llu mSec with result 0x%x ****\n",
+                        fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout/(1000ULL * 1000ULL), vkIf->GetFenceStatus(device, fence));
         assert(!"Fence is not signaled yet after more than 100 mSec wait");
     }
 
diff --git a/common/libs/VkCodecUtils/VkImageResource.cpp b/common/libs/VkCodecUtils/VkImageResource.cpp
index 302ba8a1..0ea91333 100644
--- a/common/libs/VkCodecUtils/VkImageResource.cpp
+++ b/common/libs/VkCodecUtils/VkImageResource.cpp
@@ -88,6 +88,11 @@ VkImageResource::VkImageResource(const VulkanDeviceContext* vkDevCtx,
     }
 }
 
+VkImageResource::~VkImageResource()
+{
+    Destroy();
+}
+
 VkResult VkImageResource::Create(const VulkanDeviceContext* vkDevCtx,
                                  const VkImageCreateInfo* pImageCreateInfo,
                                  VkMemoryPropertyFlags memoryPropertyFlags,
diff --git a/common/libs/VkCodecUtils/VkImageResource.h b/common/libs/VkCodecUtils/VkImageResource.h
index 314a2f01..0c4c0ac8 100644
--- a/common/libs/VkCodecUtils/VkImageResource.h
+++ b/common/libs/VkCodecUtils/VkImageResource.h
@@ -113,7 +113,7 @@ class VkImageResource : public VkVideoRefCountBase
 
     void Destroy();
 
-    virtual ~VkImageResource() { Destroy(); }
+    virtual ~VkImageResource();
 };
 
 class VkImageResourceView : public VkVideoRefCountBase
diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index 26d78757..097439d7 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -328,6 +328,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize);
         assert(readImagePtr != nullptr);
 
+        int32_t secondaryPlaneWidth = frameWidth;
         int32_t secondaryPlaneHeight = frameHeight;
         int32_t imageHeight = frameHeight;
         bool isUnnormalizedRgba = false;
@@ -335,8 +336,11 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
             isUnnormalizedRgba = true;
         }
 
+        if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
+            secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2;
+        }
         if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
-            secondaryPlaneHeight /= 2;
+            secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2;
         }
 
         VkImageSubresource subResource = {};
@@ -381,15 +385,9 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         yuvPlaneLayouts[0].offset = 0;
         yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel;
         yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight;
-        yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel;
-        if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-            yuvPlaneLayouts[1].rowPitch /= 2;
-        }
+        yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel;
         yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
-        yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel;
-        if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-            yuvPlaneLayouts[2].rowPitch /= 2;
-        }
+        yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel;
 
         // Copy the luma plane
         const uint32_t numCompatiblePlanes = 1;
@@ -410,7 +408,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         for (uint32_t plane = numCompatiblePlanes; plane < numPlanes; plane++) {
             const uint32_t srcPlane = std::min(plane, mpInfo->planesLayout.numberOfExtraPlanes);
             uint8_t* pDst = pOutBuffer + yuvPlaneLayouts[plane].offset;
-            const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? frameWidth / 2 : frameWidth;
+            const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? (frameWidth + 1) / 2 : frameWidth;
 
             for (int32_t height = 0; height < secondaryPlaneHeight; height++) {
                 const uint8_t* pSrc;
diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp
index f13d598e..c85c2814 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp
+++ b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp
@@ -214,11 +214,27 @@ VkResult VulkanDeviceContext::AddReqDeviceExtensions(const char* const* required
             break;
         }
         m_requestedDeviceExtensions.push_back(name);
+        if (verbose) {
+            std::cout << "Added required device extension: " << name << std::endl;
+        }
     }
 
     return VK_SUCCESS;
 }
 
+VkResult VulkanDeviceContext::AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose)
+{
+    if (requiredDeviceExtension) {
+        m_requestedDeviceExtensions.push_back(requiredDeviceExtension);
+        if (verbose) {
+            std::cout << "Added required device extension: " << requiredDeviceExtension << std::endl;
+        }
+    }
+
+    return VK_SUCCESS;
+}
+
+
 // optional device extensions
 VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose)
 {
@@ -229,6 +245,9 @@ VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optional
             break;
         }
         m_optDeviceExtensions.push_back(name);
+        if (verbose) {
+            std::cout << "Added optional device extension: " << name << std::endl;
+        }
     }
 
     return VK_SUCCESS;
@@ -712,26 +731,57 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues,
             devInfo.queueCreateInfoCount++;
         }
 
+        VkPhysicalDeviceVideoDecodeVP9FeaturesKHR videoDecodeVP9Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR,
+                                                                          nullptr,
+                                                                          false // videoDecodeVP9
+                                                                        };
+
         VkPhysicalDeviceVideoEncodeAV1FeaturesKHR videoEncodeAV1Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR,
                                                                           nullptr,
                                                                           false // videoEncodeAV1
-                                                                         };
+                                                                        };
 
+        // Chain only the structures that are requested
+        VkBaseInStructure* pNext = nullptr;
+        if (videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) {
+            videoEncodeAV1Feature.pNext = pNext;
+            pNext = (VkBaseInStructure*)&videoEncodeAV1Feature;
+        }
+        if (videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            videoDecodeVP9Feature.pNext = pNext;
+            pNext = (VkBaseInStructure*)&videoDecodeVP9Feature;
+        }
 
+        VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
+                                                                              pNext,
+                                                                              VK_FALSE
+        };
 
         VkPhysicalDeviceVideoMaintenance1FeaturesKHR videoMaintenance1Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR,
-                                                                                 ((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) ?
-                                                                                         &videoEncodeAV1Feature :
-                                                                                         nullptr,
-                                                                                 false};
+                                                                                 &timelineSemaphoreFeatures,
+                                                                                 VK_FALSE
+                                                                               };
 
         VkPhysicalDeviceSynchronization2Features synchronization2Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
                                                                             &videoMaintenance1Features,
-                                                                            false
+                                                                            VK_FALSE
                                                                            };
 
         VkPhysicalDeviceFeatures2 deviceFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, &synchronization2Features};
         GetPhysicalDeviceFeatures2(m_physDevice, &deviceFeatures);
+
+        assert(timelineSemaphoreFeatures.timelineSemaphore);
+        assert(videoMaintenance1Features.videoMaintenance1);
+        assert(synchronization2Features.synchronization2);
+        assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) ==
+                (videoEncodeAV1Feature.videoEncodeAV1 != VK_FALSE));
+        assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) != 0) ==
+                (videoDecodeVP9Feature.videoDecodeVP9 != VK_FALSE));
+
+        // Validate feature support here.
+        // TODO: Currntly this method is receiving all codec bits irrespective of the codec that is required to decode/encode provided input.
+        //       Provide only required codec and features and validate the support.
+
         devInfo.pNext = &deviceFeatures;
 
         if ((numDecodeQueues > 0) &&
@@ -987,6 +1037,7 @@ VkResult VulkanDeviceContext::PopulateDeviceExtensions()
 
 VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
                                                       VkInstance vkInstance,
+                                                      VkVideoCodecOperationFlagsKHR videoCodecs,
                                                       bool enableWsi,
                                                       bool enableWsiDirectMode,
                                                       bool enableValidation,
@@ -1020,6 +1071,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
         VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,
+        VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
 
@@ -1039,6 +1091,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
         VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
         VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
         VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         nullptr
     };
 
@@ -1070,6 +1123,19 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
     /********** End WSI instance extensions support *******************************************/
 #endif // VIDEO_DISPLAY_QUEUE_SUPPORT
 
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME);
+    }
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME);
+    }
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME);
+    }
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME);
+    }
+
     VkResult result = InitVulkanDevice(pAppName, vkInstance, enbaleVerboseDump);
     if (result != VK_SUCCESS) {
         printf("Could not initialize the Vulkan device!\n");
diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.h b/common/libs/VkCodecUtils/VulkanDeviceContext.h
index a3cf53a5..6e83e33f 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceContext.h
+++ b/common/libs/VkCodecUtils/VulkanDeviceContext.h
@@ -24,6 +24,7 @@
 #include <vulkan_interfaces.h>
 #include <VkCodecUtils/HelpersDispatchTable.h>
 #include "VkShell/VkWsiDisplay.h"
+#include "VkCodecUtils/VulkanSemaphoreDump.h"
 
 class VulkanDeviceContext : public vk::VkInterfaceFunctions {
 
@@ -50,6 +51,21 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
         MAX_QUEUE_FAMILIES = 6, // Gfx, Present, Compute, Transfer, Decode, Encode
     };
 
+    static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_DECODE =
+        VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
+
+    static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ENCODE =
+        VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR;
+
+    static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ALL =
+        VIDEO_CODEC_OPERATIONS_DECODE |
+        VIDEO_CODEC_OPERATIONS_ENCODE;
+
     VulkanDeviceContext();
 
     VkInstance getInstance() const {
@@ -157,11 +173,22 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
     };
 
     VkResult MultiThreadedQueueSubmit(const QueueFamilySubmitType submitType, const int32_t queueIndex,
-                                      uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) const
+                                      uint32_t submitCount, const VkSubmitInfo2KHR* pSubmits, VkFence fence,
+                                      const char* submissionName = nullptr,
+                                      uint64_t decodeEncodeOrder = UINT64_MAX,
+                                      uint64_t displayInputOrder = UINT64_MAX) const
     {
         MtQueueMutex queue(this, submitType, queueIndex);
         if (queue) {
-            return QueueSubmit(queue, submitCount, pSubmits, fence);
+
+            // Dump semaphore info for debugging
+            if (false) {
+                for (uint32_t i = 0; i < submitCount; i++) {
+                    VulkanSemaphoreDump::DumpSemaphoreInfo(pSubmits[i], submissionName, decodeEncodeOrder, displayInputOrder);
+                }
+            }
+
+            return QueueSubmit2KHR(queue, submitCount, pSubmits, fence);
         } else {
             return VK_ERROR_INITIALIZATION_FAILED;
         }
@@ -218,6 +245,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
 
     VkResult InitVulkanDecoderDevice(const char * pAppName,
                                      VkInstance vkInstance = VK_NULL_HANDLE,
+                                     VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL,
                                      bool enableWsi = false,
                                      bool enableWsiDirectMode = false,
                                      bool enableValidation = false,
@@ -231,6 +259,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
     VkResult AddReqInstanceExtension(const char* requiredInstanceExtension, bool verbose = false);
     VkResult CheckAllInstanceExtensions(bool verbose = false);
     VkResult AddReqDeviceExtensions(const char* const* requiredDeviceExtensions, bool verbose = false);
+    VkResult AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose = false);
     VkResult AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose = false);
     bool HasAllDeviceExtensions(VkPhysicalDevice physDevice, const char* printMissingDeviceExt = nullptr);
 
@@ -248,26 +277,16 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
                                 const VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR |
                                                                                  VK_QUEUE_TRANSFER_BIT,
                                 const VkVideoCodecOperationFlagsKHR requestVideoDecodeQueueOperations =
-                                                                  (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
+                                                                  VIDEO_CODEC_OPERATIONS_DECODE,
                                 const VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR |
                                                                                  VK_QUEUE_TRANSFER_BIT,
                                 const VkVideoCodecOperationFlagsKHR requestVideoEncodeQueueOperations =
-                                                                  (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR),
+                                                                  VIDEO_CODEC_OPERATIONS_ENCODE,
                                 VkPhysicalDevice vkPhysicalDevice = VK_NULL_HANDLE);
 
     VkResult CreateVulkanDevice(int32_t numDecodeQueues = 1,
                                 int32_t numEncodeQueues = 0,
-                                VkVideoCodecOperationFlagsKHR videoCodecs =
-                                        (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                          VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                          VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) |
-                                        (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                          VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                          VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR),
+                                VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL,
                                 bool createTransferQueue = false,
                                 bool createGraphicsQueue = false,
                                 bool createPresentQueue = false,
diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp
index db05c81a..f78b0de8 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp
+++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp
@@ -410,3 +410,8 @@ const uint8_t* VulkanDeviceMemoryImpl::GetReadOnlyDataPtr(VkDeviceSize offset, V
     maxSize = m_memoryRequirements.size - offset;
     return readData;
 }
+
+VulkanDeviceMemoryImpl::~VulkanDeviceMemoryImpl()
+{
+    Deinitialize();
+}
diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h
index 6b94e38e..f7d1c2d7 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h
+++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h
@@ -106,7 +106,7 @@ class VulkanDeviceMemoryImpl : public VkVideoRefCountBase
 
     void Deinitialize();
 
-    virtual ~VulkanDeviceMemoryImpl() { Deinitialize(); }
+    virtual ~VulkanDeviceMemoryImpl();
 
 private:
     std::atomic<int32_t>       m_refCount;
diff --git a/common/libs/VkCodecUtils/VulkanDisplayFrame.h b/common/libs/VkCodecUtils/VulkanDisplayFrame.h
index 246183c9..c86f5ea0 100644
--- a/common/libs/VkCodecUtils/VulkanDisplayFrame.h
+++ b/common/libs/VkCodecUtils/VulkanDisplayFrame.h
@@ -41,7 +41,9 @@ class VulkanDisplayFrame
     VkFence frameCompleteFence; // If valid, the fence is signaled when the decoder or encoder is done decoding / encoding the frame.
     VkFence frameConsumerDoneFence; // If valid, the fence is signaled when the consumer (graphics, compute or display) is done using the frame.
     VkSemaphore frameCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame.
-    VkSemaphore frameConsumerDoneSemaphore; // If valid, the semaphore is signaled when the consumer (graphics, compute or display) is done using the frame.
+    VkSemaphore consumerCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame.
+    uint64_t frameCompleteDoneSemValue; // The semaphore is signaled by the decoder or the decoder's filter when this semaphore value has been reached.
+    uint64_t frameConsumerDoneSemValue; // The semaphore is signaled by the consumer (graphics, compute or display) when this semaphore value has been reached.
     VkQueryPool queryPool;                  // queryPool handle used for the video queries.
     int32_t startQueryId;                   // query Id used for the this frame.
     uint32_t numQueries;                    // usually one query per frame
@@ -64,10 +66,12 @@ class VulkanDisplayFrame
                 imageViews[imageTypeIdx].inUse = false;
             }
         }
-        frameCompleteFence = VkFence();
-        frameConsumerDoneFence = VkFence();
-        frameCompleteSemaphore = VkSemaphore();
-        frameConsumerDoneSemaphore = VkSemaphore();
+        frameCompleteFence = VK_NULL_HANDLE;
+        frameConsumerDoneFence = VK_NULL_HANDLE;
+        frameCompleteSemaphore = VK_NULL_HANDLE;
+        consumerCompleteSemaphore = VK_NULL_HANDLE;
+        frameCompleteDoneSemValue =  (0ULL); // Frame 0 signaled by the decoder and/or filter
+        frameConsumerDoneSemValue =  (0ULL); // Frame 0 signaled by the consumer
         queryPool = VkQueryPool();
         startQueryId = 0;
         numQueries = 0;
@@ -92,7 +96,9 @@ class VulkanDisplayFrame
     , frameCompleteFence()
     , frameConsumerDoneFence()
     , frameCompleteSemaphore()
-    , frameConsumerDoneSemaphore()
+    , consumerCompleteSemaphore()
+    , frameCompleteDoneSemValue(0ULL)
+    , frameConsumerDoneSemValue(0ULL)
     , queryPool()
     , startQueryId()
     , numQueries()
diff --git a/common/libs/VkCodecUtils/VulkanFilter.h b/common/libs/VkCodecUtils/VulkanFilter.h
index 2670304c..bb88799b 100644
--- a/common/libs/VkCodecUtils/VulkanFilter.h
+++ b/common/libs/VkCodecUtils/VulkanFilter.h
@@ -24,6 +24,7 @@
 #include "VkCodecUtils/VulkanShaderCompiler.h"
 #include "VkCodecUtils/VkImageResource.h"
 #include "VkCodecUtils/VulkanCommandBufferPool.h"
+#include "VkCodecUtils/VulkanSemaphoreDump.h"
 
 struct VulkanShaderInput {
     const std::string     shader;
@@ -34,6 +35,9 @@ struct VulkanShaderInput {
 class VulkanFilter : public VulkanCommandBufferPool
 {
 public:
+    // Constants moved inside the class as static constexpr
+    static constexpr uint32_t MAX_SEMAPHORES = 4;
+    static constexpr uint32_t MAX_CMD_BUFFERS = 4;
 
     VulkanFilter(const VulkanDeviceContext* vkDevCtx,
                  uint32_t queueFamilyIndex,
@@ -76,40 +80,146 @@ class VulkanFilter : public VulkanCommandBufferPool
                                          uint32_t bufferIdx) = 0;
 
     virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount,
-                                         const VkCommandBuffer*  pCommandBuffers,
+                                         const VkCommandBuffer* pCommandBuffers,
                                          uint32_t waitSemaphoreCount,
                                          const VkSemaphore* pWaitSemaphores,
+                                         const VkPipelineStageFlags2KHR* pWaitStageMasks,
                                          uint32_t signalSemaphoreCount,
                                          const VkSemaphore* pSignalSemaphores,
+                                         const VkPipelineStageFlags2KHR* pSignalStageMasks,
                                          VkFence filterCompleteFence) const
     {
-
         assert(m_queue != VK_NULL_HANDLE);
+        assert(commandBufferCount <= MAX_CMD_BUFFERS);
+        assert(waitSemaphoreCount <= MAX_SEMAPHORES);
+        assert(signalSemaphoreCount <= MAX_SEMAPHORES);
+
+        // Prepare command buffer info on stack
+        VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS];
+        for (uint32_t i = 0; i < commandBufferCount; i++) {
+            cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+            cmdBufferInfos[i].pNext = nullptr;
+            cmdBufferInfos[i].commandBuffer = pCommandBuffers[i];
+            cmdBufferInfos[i].deviceMask = 0;
+        }
 
-        // Wait for rendering finished
-        VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+        // Prepare wait semaphore info on stack
+        VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+            waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            waitSemaphoreInfos[i].pNext = nullptr;
+            waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i];
+            waitSemaphoreInfos[i].value = 0; // Binary semaphore
+            waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i];
+            waitSemaphoreInfos[i].deviceIndex = 0;
+        }
 
-        // Submit compute commands
-        VkSubmitInfo submitInfo {};
-        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-        submitInfo.pCommandBuffers = pCommandBuffers;
-        submitInfo.commandBufferCount = commandBufferCount;
-        submitInfo.waitSemaphoreCount = waitSemaphoreCount;
-        submitInfo.pWaitSemaphores = pWaitSemaphores;
-        submitInfo.pWaitDstStageMask = &waitStageMask;
-        submitInfo.signalSemaphoreCount = signalSemaphoreCount;
-        submitInfo.pSignalSemaphores = pSignalSemaphores;
+        // Prepare signal semaphore info on stack
+        VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < signalSemaphoreCount; i++) {
+            signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            signalSemaphoreInfos[i].pNext = nullptr;
+            signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i];
+            signalSemaphoreInfos[i].value = 0; // Binary semaphore
+            signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i];
+            signalSemaphoreInfos[i].deviceIndex = 0;
+        }
+
+        // Submit info
+        VkSubmitInfo2KHR submitInfo = {};
+        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR;
+        submitInfo.pNext = nullptr;
+        submitInfo.flags = 0;
+        submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+        submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+        submitInfo.commandBufferInfoCount = commandBufferCount;
+        submitInfo.pCommandBufferInfos = cmdBufferInfos;
+        submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+        submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
+
+        if (false) {
+            // Dump semaphore info for debugging
+            VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0);
+        }
 
         assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence));
-        VkResult result = m_vkDevCtx->QueueSubmit(m_queue, 1, &submitInfo, filterCompleteFence);
+        VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence);
+
+        return result;
+    }
+
+    virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount,
+                                         const VkCommandBuffer* pCommandBuffers,
+                                         uint32_t waitSemaphoreCount,
+                                         const VkSemaphore* pWaitSemaphores,
+                                         const uint64_t* pWaitSemaphoreValues,
+                                         const VkPipelineStageFlags2KHR* pWaitStageMasks,
+                                         uint32_t signalSemaphoreCount,
+                                         const VkSemaphore* pSignalSemaphores,
+                                         const uint64_t* pSignalSemaphoreValues,
+                                         const VkPipelineStageFlags2KHR* pSignalStageMasks,
+                                         VkFence filterCompleteFence) const
+    {
+        assert(m_queue != VK_NULL_HANDLE);
+        assert(commandBufferCount <= MAX_CMD_BUFFERS);
+        assert(waitSemaphoreCount <= MAX_SEMAPHORES);
+        assert(signalSemaphoreCount <= MAX_SEMAPHORES);
+
+        // Prepare command buffer info on stack
+        VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS];
+        for (uint32_t i = 0; i < commandBufferCount; i++) {
+            cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+            cmdBufferInfos[i].pNext = nullptr;
+            cmdBufferInfos[i].commandBuffer = pCommandBuffers[i];
+            cmdBufferInfos[i].deviceMask = 0;
+        }
 
-        if (result != VK_SUCCESS) {
-            return result;
+        // Prepare wait semaphore info on stack
+        VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+            waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            waitSemaphoreInfos[i].pNext = nullptr;
+            waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i];
+            waitSemaphoreInfos[i].value = pWaitSemaphoreValues[i]; // Timeline value
+            waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i];
+            waitSemaphoreInfos[i].deviceIndex = 0;
         }
 
+        // Prepare signal semaphore info on stack
+        VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < signalSemaphoreCount; i++) {
+            signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            signalSemaphoreInfos[i].pNext = nullptr;
+            signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i];
+            signalSemaphoreInfos[i].value = pSignalSemaphoreValues[i]; // Timeline value
+            signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i];
+            signalSemaphoreInfos[i].deviceIndex = 0;
+        }
+
+        // Submit info
+        VkSubmitInfo2KHR submitInfo = {};
+        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR;
+        submitInfo.pNext = nullptr;
+        submitInfo.flags = 0;
+        submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+        submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+        submitInfo.commandBufferInfoCount = commandBufferCount;
+        submitInfo.pCommandBufferInfos = cmdBufferInfos;
+        submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+        submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
+
+        if (false) {
+            // Dump semaphore info for debugging
+            VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0);
+        }
+
+        assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence));
+        VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence);
+
         return result;
     }
 
+
 protected:
     VulkanShaderCompiler m_vulkanShaderCompiler;
     uint32_t             m_queueFamilyIndex;
diff --git a/common/libs/VkCodecUtils/VulkanFrame.cpp b/common/libs/VkCodecUtils/VulkanFrame.cpp
index 2e87885d..b95bdf96 100644
--- a/common/libs/VkCodecUtils/VulkanFrame.cpp
+++ b/common/libs/VkCodecUtils/VulkanFrame.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <vector>
 #include <iostream>
+#include <thread>  // Added for std::this_thread::sleep_for
 
 #include "VkCodecUtils/Helpers.h"
 #include "VkCodecUtils/VulkanDeviceContext.h"
@@ -25,6 +26,7 @@
 #include "VkCodecUtils/VulkanVideoUtils.h"
 #include "VulkanFrame.h"
 #include "VkVideoCore/DecodeFrameBufferIf.h"
+#include "VkCodecUtils/VulkanSemaphoreDump.h"
 
 template<class FrameDataType>
 VulkanFrame<FrameDataType>::VulkanFrame(const VulkanDeviceContext* vkDevCtx)
@@ -420,6 +422,7 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
     if (renderIndex < 0) {
         renderIndex = -renderIndex;
     }
+
     vulkanVideoUtils::VulkanPerDrawContext* pPerDrawContext = m_videoRenderer->m_renderInfo.GetDrawContext(renderIndex);
 
     VkSharedBaseObj<VkImageResourceView> imageResourceView;
@@ -583,54 +586,77 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
         }
     }
 
-    const uint32_t maxWaitSemaphores = 2;
-    uint32_t numWaitSemaphores = 0;
-    VkSemaphore waitSemaphores[maxWaitSemaphores] = {};
+    const uint32_t waitSemaphoreMaxCount = 2;
+    VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{};
+
+    const uint32_t signalSemaphoreMaxCount = 2;
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{};
 
-    assert(waitSemaphoreCount <= 1);
-    if ((waitSemaphoreCount > 0) && (pWaitSemaphores != nullptr)) {
-        waitSemaphores[numWaitSemaphores++] = *pWaitSemaphores;
+    for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+        waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[i].pNext = nullptr;
+        waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i];
+        waitSemaphoreInfos[i].value = 0; // Binary semaphore
+        waitSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+        waitSemaphoreInfos[i].deviceIndex = 0;
     }
 
-    if (inFrame && (inFrame->frameCompleteSemaphore != VkSemaphore())) {
-        waitSemaphores[numWaitSemaphores++] = inFrame->frameCompleteSemaphore;
+    for (uint32_t i = 0; i < signalSemaphoreCount; i++) {
+        signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[i].pNext = nullptr;
+        signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i];
+        signalSemaphoreInfos[i].value = 0; // Binary semaphore
+        signalSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+        signalSemaphoreInfos[i].deviceIndex = 0;
     }
-    assert(numWaitSemaphores <= maxWaitSemaphores);
 
-    const uint32_t maxSignalSemaphores = 2;
-    uint32_t numSignalSemaphores = 0;
-    VkSemaphore signalSemaphores[maxSignalSemaphores] = {};
+    if (inFrame && (inFrame->frameCompleteSemaphore != VK_NULL_HANDLE)) {
 
-    assert(signalSemaphoreCount <= 1);
-    if ((signalSemaphoreCount > 0) && (pSignalSemaphores != nullptr)) {
-        signalSemaphores[numSignalSemaphores++] = *pSignalSemaphores;
-    }
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = inFrame->frameCompleteSemaphore;
+        waitSemaphoreInfos[waitSemaphoreCount].value =     inFrame->frameCompleteDoneSemValue;
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
+        waitSemaphoreCount++;
+
+        signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr;
+        signalSemaphoreInfos[signalSemaphoreCount].semaphore = inFrame->consumerCompleteSemaphore;
+        signalSemaphoreInfos[signalSemaphoreCount].value     = inFrame->frameConsumerDoneSemValue;
+        signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
+        signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0;
+        signalSemaphoreCount++;
 
-    if (inFrame && (inFrame->frameConsumerDoneSemaphore != VkSemaphore())) {
-        signalSemaphores[numSignalSemaphores++] = inFrame->frameConsumerDoneSemaphore;
         inFrame->hasConsummerSignalSemaphore = true;
     }
-    assert(numSignalSemaphores <= maxSignalSemaphores);
+
+    assert(waitSemaphoreCount <= waitSemaphoreMaxCount);
+    assert(signalSemaphoreCount <= signalSemaphoreMaxCount);
 
     if (frameConsumerDoneFence != VkFence()) {
         inFrame->hasConsummerSignalFence = true;
     }
 
-
-    // Wait for the image to be owned and signal for render completion
-    VkPipelineStageFlags primaryCmdSubmitWaitStages[2] = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                                                           VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT };
-    VkSubmitInfo primaryCmdSubmitInfo = VkSubmitInfo();
-    primaryCmdSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    primaryCmdSubmitInfo.pWaitDstStageMask = primaryCmdSubmitWaitStages;
-    primaryCmdSubmitInfo.commandBufferCount = 1;
-
-    primaryCmdSubmitInfo.waitSemaphoreCount = numWaitSemaphores;
-    primaryCmdSubmitInfo.pWaitSemaphores = numWaitSemaphores ? waitSemaphores : NULL;
-    primaryCmdSubmitInfo.pCommandBuffers = pPerDrawContext->commandBuffer.GetCommandBuffer();
-
-    primaryCmdSubmitInfo.signalSemaphoreCount = numSignalSemaphores;
-    primaryCmdSubmitInfo.pSignalSemaphores = numSignalSemaphores ? signalSemaphores : NULL;
+    VkCommandBufferSubmitInfoKHR cmdBufferInfos;
+    cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+    cmdBufferInfos.pNext = nullptr;
+    cmdBufferInfos.commandBuffer = *pPerDrawContext->commandBuffer.GetCommandBuffer();
+    cmdBufferInfos.deviceMask = 0;
+
+    // Submit info
+    VkSubmitInfo2KHR submitInfo = {};
+    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR;
+    submitInfo.pNext = nullptr;
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+    submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfos;
+    submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+    submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
 
     // For fence/sync debugging
     if (false && inFrame && inFrame->frameCompleteFence) {
@@ -646,7 +672,14 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
         }
     }
 
-    result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS, 0, 1, &primaryCmdSubmitInfo, frameConsumerDoneFence);
+    result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS,
+                                                  0, // queueIndex
+                                                  1, // submitCount
+                                                  &submitInfo,
+                                                  frameConsumerDoneFence,
+                                                  "Graphics Submit",
+                                                  (inFrame != nullptr) ? inFrame->decodeOrder  : UINT64_MAX,
+                                                  (inFrame != nullptr) ? inFrame->displayOrder : UINT64_MAX);
     if (result != VK_SUCCESS) {
         assert(result == VK_SUCCESS);
         fprintf(stderr, "\nERROR: MultiThreadedQueueSubmit() result: 0x%x\n", result);
@@ -676,6 +709,11 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
 
     m_frameDataIndex = (m_frameDataIndex + 1) % m_frameData.size();
 
+    if (false) {
+        // Add a 20ms sleep
+        std::this_thread::sleep_for(std::chrono::milliseconds(20));
+    }
+
     return result;
 }
 
diff --git a/common/libs/VkCodecUtils/VulkanSemaphoreDump.h b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h
new file mode 100644
index 00000000..6e1b8913
--- /dev/null
+++ b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h
@@ -0,0 +1,90 @@
+/*
+* Copyright 2024 NVIDIA Corporation.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+#include <iostream>
+#include <iomanip>
+
+namespace VulkanSemaphoreDump {
+
+/**
+ * @brief Dumps the semaphore information from a VkSubmitInfo2KHR structure
+ * 
+ * @param submitInfo The VkSubmitInfo2KHR structure containing semaphore information
+ * @param submissionName Optional name to identify the submission (e.g., "DECODE", "COMPUTE")
+ * @param decodeOrder Optional decode order number or identifier (uint64_t)
+ * @param displayOrder Optional display order number or identifier (uint64_t)
+ */
+inline void DumpSemaphoreInfo(
+    const VkSubmitInfo2KHR& submitInfo, 
+    const char* submissionName = nullptr,
+    uint64_t decodeEncodeOrder = UINT64_MAX,
+    uint64_t displayInputOrder = UINT64_MAX)
+{
+
+    std::cout << "----------------------------\n";
+
+    if (submissionName) {
+        std::cout << submissionName << " ";
+    }
+    
+    std::cout << "TL Semaphore sync";
+
+    if (decodeEncodeOrder != UINT64_MAX) {
+        std::cout << " (decode / encode = " << decodeEncodeOrder;
+        if (displayInputOrder != UINT64_MAX) {
+            std::cout << ", display / input = " << displayInputOrder;
+        }
+        std::cout << ")";
+    } else if (displayInputOrder != UINT64_MAX) {
+        std::cout << " (display / input = " << displayInputOrder << ")";
+    }
+    
+    std::cout << ":\n";
+
+    // Dump wait semaphores
+    for (uint32_t i = 0; i < submitInfo.waitSemaphoreInfoCount; i++) {
+        const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pWaitSemaphoreInfos[i];
+        std::cout << "  Wait sem[" << i << "]: " << semInfo.semaphore 
+                  << " value = " << semInfo.value
+                  << " stage = 0x" << std::hex << semInfo.stageMask << std::dec;
+
+        if (semInfo.deviceIndex > 0) {
+            std::cout << " deviceIndex=" << semInfo.deviceIndex;
+        }
+        std::cout << std::endl;
+    }
+    
+    // Dump signal semaphores
+    for (uint32_t i = 0; i < submitInfo.signalSemaphoreInfoCount; i++) {
+        const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pSignalSemaphoreInfos[i];
+        std::cout << "  Signal sem[" << i << "]: " << semInfo.semaphore 
+                  << " value = " << semInfo.value
+                  << " stage = 0x" << std::hex << semInfo.stageMask << std::dec;
+
+        if (semInfo.deviceIndex > 0) {
+            std::cout << " deviceIndex = " << semInfo.deviceIndex;
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "----------------------------" << std::endl;
+}
+
+
+} // namespace VulkanSemaphoreDump
diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
index 3122b062..97d3906f 100644
--- a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
@@ -382,6 +382,7 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt
     const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize);
     assert(readImagePtr != nullptr);
 
+    int32_t secondaryPlaneWidth = frameWidth;
     int32_t secondaryPlaneHeight = frameHeight;
     int32_t imageHeight = frameHeight;
     bool isUnnormalizedRgba = false;
@@ -389,8 +390,11 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt
         isUnnormalizedRgba = true;
     }
 
+    if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
+        secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2;
+    }
     if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
-        secondaryPlaneHeight /= 2;
+        secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2;
     }
 
     VkImageSubresource subResource = {};
@@ -439,15 +443,9 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt
     yuvPlaneLayouts[0].offset = 0;
     yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel;
     yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight;
-    yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel;
-    if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-        yuvPlaneLayouts[1].rowPitch /= 2;
-    }
+    yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel;
     yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
-    yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel;
-    if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-        yuvPlaneLayouts[2].rowPitch /= 2;
-    }
+    yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel;
 
     // Copy the luma plane, always assume the 422 or 444 formats and src CbCr always is interleaved (shares the same plane).
     uint32_t numCompatiblePlanes = 1;
@@ -642,6 +640,7 @@ VkResult VulkanVideoProcessor::CreateParser(const char*,
     static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION };
     static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION };
+    static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION };
 
     const VkExtensionProperties* pStdExtensionVersion = NULL;
     if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
@@ -650,6 +649,8 @@ VkResult VulkanVideoProcessor::CreateParser(const char*,
         pStdExtensionVersion = &h265StdExtensionVersion;
     } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
         pStdExtensionVersion = &av1StdExtensionVersion;
+    } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pStdExtensionVersion = &vp9StdExtensionVersion;
     } else {
         assert(!"Unsupported Codec Type");
         return VK_ERROR_FORMAT_NOT_SUPPORTED;
diff --git a/common/libs/VkCodecUtils/VulkanVideoSession.cpp b/common/libs/VkCodecUtils/VulkanVideoSession.cpp
index 021ec538..3a8935d7 100644
--- a/common/libs/VkCodecUtils/VulkanVideoSession.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoSession.cpp
@@ -39,6 +39,7 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx,
     static const VkExtensionProperties h264DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h265DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION };
     static const VkExtensionProperties av1DecodeStdExtensionVersion =  { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION };
+    static const VkExtensionProperties vp9DecodeStdExtensionVersion =  { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h264EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION };
     static const VkExtensionProperties h265EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION };
     static const VkExtensionProperties av1EncodeStdExtensionVersion =  { VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_SPEC_VERSION };
@@ -63,6 +64,9 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx,
     case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
         createInfo.pStdHeaderVersion = &av1DecodeStdExtensionVersion;
         break;
+    case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+        createInfo.pStdHeaderVersion = &vp9DecodeStdExtensionVersion;
+        break;
     case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
         createInfo.pStdHeaderVersion = &h264EncodeStdExtensionVersion;
         break;
diff --git a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp
index 39f3b6f1..e2cdf9ce 100644
--- a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp
@@ -254,7 +254,7 @@ VkResult ImageObject::CopyYuvToVkImage(uint32_t numPlanes, const uint8_t* yuvPla
     }
 
     if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
-        cbimageHeight /= 2;
+        cbimageHeight = (cbimageHeight + 1) / 2;
     }
 
     if (mpInfo && !isUnnormalizedRgba) {
diff --git a/vk_video_decoder/demos/vk-video-dec/Main.cpp b/vk_video_decoder/demos/vk-video-dec/Main.cpp
index 8579362e..3e499c32 100644
--- a/vk_video_decoder/demos/vk-video-dec/Main.cpp
+++ b/vk_video_decoder/demos/vk-video-dec/Main.cpp
@@ -28,14 +28,33 @@
 #include "VkShell/Shell.h"
 #include "VkCodecUtils/VkVideoFrameOutput.h"
 
-int main(int argc, const char **argv) {
+int main(int argc, const char **argv)
+{
 
     DecoderConfig decoderConfig(argv[0]);
     decoderConfig.ParseArgs(argc, argv);
 
+    VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
+    VkResult result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
+                                        decoderConfig.forceParserType,
+                                        decoderConfig.enableStreamDemuxing,
+                                        decoderConfig.initialWidth,
+                                        decoderConfig.initialHeight,
+                                        decoderConfig.initialBitdepth,
+                                        videoStreamDemuxer);
+    if (result != VK_SUCCESS) {
+        assert(!"Can't initialize the VideoStreamDemuxer!");
+        return -1;
+    }
+
+    VkVideoCodecOperationFlagsKHR videoCodecOperation = (decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR) ?
+                                                                    decoderConfig.forceParserType :
+                                                                    videoStreamDemuxer->GetVideoCodec();
+
     VulkanDeviceContext vkDevCtxt;
-    VkResult result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(),
+    result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(),
                                                         VK_NULL_HANDLE,
+                                                        videoCodecOperation,
                                                         !decoderConfig.noPresent,
                                                         decoderConfig.directMode,
                                                         decoderConfig.validate,
@@ -54,16 +73,8 @@ int main(int argc, const char **argv) {
 
     VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR;
 
-    VkQueueFlags requestVideoEncodeQueueMask = 0;
-    if (decoderConfig.enableVideoEncoder) {
-        requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
-    }
-
     if (decoderConfig.selectVideoWithComputeQueue) {
         requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (decoderConfig.enableVideoEncoder) {
-            requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -71,17 +82,6 @@ int main(int argc, const char **argv) {
         requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT;
     }
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs |
-                                        (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR);
-
     if (!decoderConfig.noPresent) {
 
         VkSharedBaseObj<Shell> displayShell;
@@ -98,17 +98,12 @@ int main(int argc, const char **argv) {
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT |
                                               requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask |
-                                              requestVideoEncodeQueueMask),
+                                              requestVideoDecodeQueueMask),
                                               displayShell,
                                               requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
-                                              requestVideoEncodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                              videoCodecOperation,
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR);
         if (result != VK_SUCCESS) {
 
             assert(!"Can't initialize the Vulkan physical device!");
@@ -117,30 +112,15 @@ int main(int argc, const char **argv) {
         assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(),
                                                   vkDevCtxt.GetPresentQueueFamilyIdx()));
 
-        vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                     decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues
-                                     videoCodecs,
-                                     false, //  createTransferQueue
-                                     true,  // createGraphicsQueue
-                                     true,  // createDisplayQueue
+        vkDevCtxt.CreateVulkanDevice(numDecodeQueues,           // numDecodeQueues
+                                     0,                         // num encode queues
+                                     videoCodecOperation,       // videoCodecs
+                                     false,                     // createTransferQueue
+                                     true,                      // createGraphicsQueue
+                                     true,                      // createDisplayQueue
                                      requestVideoComputeQueueMask != 0  // createComputeQueue
                                      );
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
-
         VkSharedBaseObj<VulkanVideoProcessor> vulkanVideoProcessor;
         result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor);
         if (result != VK_SUCCESS) {
@@ -176,8 +156,7 @@ int main(int argc, const char **argv) {
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_TRANSFER_BIT        |
                                                requestVideoDecodeQueueMask  |
-                                               requestVideoComputeQueueMask |
-                                               requestVideoEncodeQueueMask),
+                                               requestVideoComputeQueueMask),
                                               nullptr,
                                               requestVideoDecodeQueueMask);
         if (result != VK_SUCCESS) {
@@ -187,9 +166,9 @@ int main(int argc, const char **argv) {
         }
 
 
-        result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                              0,     // num encode queues
-                                              videoCodecs,
+        result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues,  // numDecodeQueues
+                                              0,                // num encode queues
+                                              videoCodecOperation,  // videoCodecs
                                               // If no graphics or compute queue is requested, only video queues
                                               // will be created. Not all implementations support transfer on video queues,
                                               // so request a separate transfer queue for such implementations.
@@ -204,21 +183,6 @@ int main(int argc, const char **argv) {
             return -1;
         }
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
-
         VkSharedBaseObj<VulkanVideoProcessor> vulkanVideoProcessor;
         result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor);
         if (result != VK_SUCCESS) {
diff --git a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h
index 21ad0fed..d5141c1b 100644
--- a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h
+++ b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h
@@ -269,61 +269,6 @@ typedef struct VkParserHevcPictureData {
 
 } VkParserHevcPictureData;
 
-typedef struct VkParserVp9PictureData {
-    uint32_t width;
-    uint32_t height;
-
-    // Frame Indexes
-    VkPicIf* pLastRef;
-    VkPicIf* pGoldenRef;
-    VkPicIf* pAltRef;
-
-    uint32_t keyFrame;
-    uint32_t version;
-    uint32_t showFrame;
-    uint32_t errorResilient;
-    uint32_t bit_depth_minus8;
-    uint32_t colorSpace;
-    uint32_t subsamplingX;
-    uint32_t subsamplingY;
-    uint32_t activeRefIdx[3];
-    uint32_t intraOnly;
-    uint32_t resetFrameContext;
-    uint32_t frameParallelDecoding;
-    uint32_t refreshFrameFlags;
-    uint8_t refFrameSignBias[4];
-    uint32_t frameContextIdx;
-    uint32_t allow_high_precision_mv;
-    uint32_t mcomp_filter_type;
-    uint32_t loopFilterLevel;
-    uint32_t loopFilterSharpness;
-    uint32_t log2_tile_columns;
-    uint32_t log2_tile_rows;
-    int32_t mbRefLfDelta[4];
-    int32_t mbModeLfDelta[2];
-    int32_t segmentMapTemporalUpdate;
-    uint8_t segmentFeatureEnable[8][4];
-    uint8_t mb_segment_tree_probs[7];
-    uint8_t segment_pred_probs[3];
-    int16_t segmentFeatureData[8][4];
-    uint32_t scaledWidth;
-    uint32_t scaledHeight;
-    uint32_t scalingActive;
-    uint32_t segmentEnabled;
-    uint32_t prevIsKeyFrame;
-    uint32_t PrevShowFrame;
-    uint32_t modeRefLfEnabled;
-    int32_t qpYAc;
-    int32_t qpYDc;
-    int32_t qpChDc;
-    int32_t qpChAc;
-    uint32_t segmentMapUpdate;
-    uint32_t segmentFeatureMode;
-    uint32_t refreshEntropyProbs;
-    uint32_t frameTagSize;
-    uint32_t offsetToDctParts;
-} VkParserVp9PictureData;
-
 struct VkParserAv1PictureData {
     // The picture info structure is mostly pointing at other
     // structures defining the coding tool parameters. Those
@@ -373,6 +318,42 @@ struct VkParserAv1PictureData {
     uint32_t frame_height;
 };
 
+typedef struct VkParserVp9PictureData {
+
+    StdVideoDecodeVP9PictureInfo stdPictureInfo;
+    StdVideoVP9ColorConfig       stdColorConfig;
+    StdVideoVP9LoopFilter        stdLoopFilter;
+    StdVideoVP9Segmentation      stdSegmentation;
+
+    // frame dimentions
+    uint32_t FrameWidth, FrameHeight;
+    uint32_t MiCols, MiRows;
+    uint32_t Sb64Cols, Sb64Rows;
+    uint32_t renderWidth, renderHeight;
+
+    // display details
+    uint8_t  frame_to_show_map_idx;
+    bool     show_existing_frame;
+
+    // references
+    uint8_t  ref_frame_idx[STD_VIDEO_VP9_REFS_PER_FRAME];
+    uint8_t  pic_idx[STD_VIDEO_VP9_NUM_REF_FRAMES];
+    VkPicIf* pLastRef;
+    VkPicIf* pGoldenRef;
+    VkPicIf* pAltRef;
+
+    // other derived parameters
+    bool     FrameIsIntra;
+    uint8_t  ChromaFormat;
+    uint32_t numTiles;
+    uint32_t compressedHeaderSize;
+
+    // bitstream divisons
+    uint32_t uncompressedHeaderOffset;
+    uint32_t compressedHeaderOffset;
+    uint32_t tilesOffset;
+} VkParserVp9PictureData;
+
 typedef struct VkParserPictureData {
     int32_t PicWidthInMbs;            // Coded Frame Size
     int32_t FrameHeightInMbs;         // Coded Frame Height
diff --git a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h
index 142f7db8..503f5827 100644
--- a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h
+++ b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h
@@ -22,31 +22,36 @@
 
 #include "VulkanVideoDecoder.h"
 
-typedef enum {
-  EIGHTTAP_SMOOTH,
-  EIGHTTAP,
-  EIGHTTAP_SHARP,
-  BILINEAR,
-  SWITCHABLE  /* should be the last one */
-} INTERPOLATIONFILTERTYPE;
-
-typedef enum {
-  //NONE = -1,
-  INTRA_FRAME = 0,
-  LAST_FRAME = 1,
-  GOLDEN_FRAME = 2,
-  ALTREF_FRAME = 3,
-  VP9_MAX_REF_FRAMES = 4
-}MV_REFERENCE_FRAME;
-
-typedef enum {
-  ONLY_4X4            = 0,
-  ALLOW_8X8           = 1,
-  ALLOW_16X16         = 2,
-  ALLOW_32X32         = 3,
-  TX_MODE_SELECT      = 4,
-  NB_TXFM_MODES       = 5,
-} TXFM_MODE;
+#define VP9_FRAME_MARKER 2
+#define VP9_FRAME_SYNC_CODE 0x498342
+#define VP9_MAX_PRBABILITY 255
+#define VP9_MIN_TILE_WIDTH_B64 4
+#define VP9_MAX_TILE_WIDTH_B64 64
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
+#define ALIGN_POWER_OF_TWO(value, n) (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+
+#define VP9_BUFFER_POOL_MAX_SIZE 10
+#define VP9_MAX_NUM_SPATIAL_LAYERS 4
+
+#define VP9_CHECK_FRAME_MARKER {    \
+  if (u(2) != VP9_FRAME_MARKER) {   \
+    assert(!"Invalid frame marker");\
+    return false;                   \
+  }                                 \
+}
+
+#define VP9_CHECK_ZERO_BIT {    \
+  if (u(1) != 0) {              \
+    assert("!Invalid syntax");  \
+    return false;               \
+  }                             \
+}
+
+#define VP9_CHECK_FRAME_SYNC_CODE   {   \
+  if (u(24) != VP9_FRAME_SYNC_CODE) {   \
+    assert("!Invalid frame sync code"); \
+  }                                     \
+}
 
 // Segment level features.
 typedef enum {
@@ -57,1492 +62,78 @@ typedef enum {
   SEG_LVL_MAX = 4                  // Number of MB level features supported
 } SEG_LVL_FEATURES;
 
-typedef enum {
-  SINGLE_PREDICTION_ONLY = 0,
-  COMP_PREDICTION_ONLY   = 1,
-  HYBRID_PREDICTION      = 2,
-  NB_PREDICTION_TYPES    = 3,
-} COMPPREDMODE_TYPE;
-
-/* Symbols for coding which components are zero jointly */
-typedef enum {
-  MV_JOINT_ZERO = 0,             /* Zero vector */
-  MV_JOINT_HNZVZ = 1,            /* Vert zero, hor nonzero */
-  MV_JOINT_HZVNZ = 2,            /* Hor zero, vert nonzero */
-  MV_JOINT_HNZVNZ = 3,           /* Both components nonzero */
-} MV_JOINT_TYPE;
-
-/* Symbols for coding magnitude class of nonzero components */
-typedef enum {
-  MV_CLASS_0 = 0,      /* (0, 2]     integer pel */
-  MV_CLASS_1 = 1,      /* (2, 4]     integer pel */
-  MV_CLASS_2 = 2,      /* (4, 8]     integer pel */
-  MV_CLASS_3 = 3,      /* (8, 16]    integer pel */
-  MV_CLASS_4 = 4,      /* (16, 32]   integer pel */
-  MV_CLASS_5 = 5,      /* (32, 64]   integer pel */
-  MV_CLASS_6 = 6,      /* (64, 128]  integer pel */
-  MV_CLASS_7 = 7,      /* (128, 256] integer pel */
-  MV_CLASS_8 = 8,      /* (256, 512] integer pel */
-  MV_CLASS_9 = 9,      /* (512, 1024] integer pel */
-  MV_CLASS_10 = 10,    /* (1024,2048] integer pel */
-} MV_CLASS_TYPE;
-
-typedef enum PARTITION_TYPE {
-  PARTITION_NONE,
-  PARTITION_HORZ,
-  PARTITION_VERT,
-  PARTITION_SPLIT,
-  PARTITION_TYPES
-} PARTITION_TYPE;
-
-
-typedef enum
-{
-  DC_PRED,            /* average of above and left pixels */
-  V_PRED,             /* vertical prediction */
-  H_PRED,             /* horizontal prediction */
-  D45_PRED,           /* Directional 45 deg prediction  [anti-clockwise from 0 deg hor] */
-  D135_PRED,          /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */
-  D117_PRED,          /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */
-  D153_PRED,          /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */
-  D27_PRED,           /* Directional 22 deg prediction  [anti-clockwise from 0 deg hor] */
-  D63_PRED,           /* Directional 67 deg prediction  [anti-clockwise from 0 deg hor] */
-  TM_PRED,            /* Truemotion prediction */
-  NEARESTMV,
-  NEARMV,
-  ZEROMV,
-  NEWMV,
-  SPLITMV,
-  MB_MODE_COUNT
-} MB_PREDICTION_MODE;
-
-typedef enum {
-  KEY_FRAME = 0,
-  INTER_FRAME = 1,
-  NUM_FRAME_TYPES,
-} FRAME_TYPE;
-
-// Segment level features.
-typedef enum {
-  TX_4X4 = 0,                      // 4x4 dct transform
-  TX_8X8 = 1,                      // 8x8 dct transform
-  TX_16X16 = 2,                    // 16x16 dct transform
-  TX_32X32 = 3,                    // 32x32 dct transform
-  TX_SIZE_MAX_SB,                  // Number of transforms available to SBs
-} TX_SIZE;
-
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
-
-#define BIG_NUM 0xffff
-#define MIN_TILE_WIDTH_B64 4
-#define MAX_TILE_WIDTH_B64 64
-#define MI_SIZE_LOG2 3
-#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2)
-#define ALIGN_POWER_OF_TWO(value, n) \
-    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-#define VP9_MB_LVL_MAX              2
-#define VP9_MAX_MB_SEGMENTS         4
-#define VP9_MB_FEATURE_TREE_PROBS   3
-#define MAX_REF_LF_DELTAS       4
-#define MAX_MODE_LF_DELTAS      2  //for vp8 its 4
-#define ALLOWED_REFS_PER_FRAME  3
-#define NUM_REF_FRAMES 8
-#define NUM_REF_FRAMES_LG2 3
-#define NUM_FRAME_CONTEXTS_LG2 2
-#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
-#define MIN_TILE_WIDTH 256
-#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
-//#define MAX_TILE_WIDTH 4096
-#define MAX_MB_SEGMENTS 8
-#define MB_SEG_TREE_PROBS  (MAX_MB_SEGMENTS-1)
-#define MAX_PROB 255
-#define PREDICTION_PROBS 3
-#define TX_SIZE_CONTEXTS 2
-#define PARTITION_PLOFFSET   4  // number of probability models per block size
-#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
-#define BLOCK_SIZE_GROUPS   4
-#define VP9_INTRA_MODES  10/* (TM_PRED + 1) */
-#define COMP_PRED_CONTEXTS   2
-/* Entropy nodes above is divided in two parts, first three probs in part1
- * and the modeled probs in part2. Part1 is padded so that tables align with
- *  32 byte addresses, so there is four bytes for each table. */
-#define ENTROPY_NODES_PART1 4
-#define ENTROPY_NODES_PART2 8
-#define INTER_MODE_CONTEXTS     7
-#define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */
-#define COMP_PRED_CONTEXTS   2
-#define INTRA_INTER_CONTEXTS 4
-#define COMP_INTER_CONTEXTS 5
-#define REF_CONTEXTS 5
-#define VP9_BLOCK_TYPES 2
-#define VP9_REF_TYPES 2  // intra=0, inter=1
-#define VP9_COEF_BANDS 6
-#define VP9_PREV_COEF_CONTEXTS       6
-#define MBSKIP_CONTEXTS 3
-#define COEF_UPDATE_PROB 252
-#define VP9_PROB_HALF 128
-#define VP9_NMV_UPDATE_PROB  252
-#define VP9_MV_UPDATE_PRECISION  7
-#define MV_JOINTS     4
-#define MV_CLASSES     11
-#define CLASS0_BITS    1
-#define CLASS0_SIZE    (1 << CLASS0_BITS)
-#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
-/* The first nodes of the entropy probs are unconstrained, the rest are
- * modeled with statistic distribution. */
-#define UNCONSTRAINED_NODES 3
-#define MODEL_NODES                 (VP9_ENTROPY_NODES - UNCONSTRAINED_NODES)
-#define PIVOT_NODE                  2   // which node is pivot
-#define COEFPROB_MODELS             128
-#define END_OF_STREAM 0xFFFFFFFFU
-#define VP9_DEF_UPDATE_PROB 252
-#define MODULUS_PARAM               13
-#define OK   0 //HANTRO_OK
-#define NOK  1 //HANTRO_NOK
-#define CHECK_END_OF_STREAM(s) if((s)==END_OF_STREAM) return (s)
-#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
-#define VP9_REF_LIST_SIZE   8
-#define SEGMENT_DELTADATA 0
-#define SEGMENT_ABSDATA 1
-#define MAXQ 255
-#define LOTS_OF_BITS 0x40000000
-#define BD_VALUE_SIZE ((int32_t)sizeof(VP9_BD_VALUE)*CHAR_BIT)
-
-#define VP9_ENTROPY_NODES 11
-#define COEF_COUNT_SAT 24
-#define COEF_MAX_UPDATE_FACTOR 112
-#define COEF_COUNT_SAT_KEY 24
-#define COEF_MAX_UPDATE_FACTOR_KEY 112
-#define COEF_COUNT_SAT_AFTER_KEY 24
-#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
-#define MODE_COUNT_SAT 20
-#define MODE_MAX_UPDATE_FACTOR 128
-#define MAX_PROBS 32
-#define MVREF_COUNT_SAT 20
-#define MVREF_MAX_UPDATE_FACTOR 128
-#define MV_COUNT_SAT 20
-#define MV_MAX_UPDATE_FACTOR 128
-
-/* Coefficient token alphabet */
-
-#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
-#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
-#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
-#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
-#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
-#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
-#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
-#define DCT_VAL_CATEGORY3       7       /* 11-18     Extra Bits 3+1 */
-#define DCT_VAL_CATEGORY4       8       /* 19-34     Extra Bits 4+1 */
-#define DCT_VAL_CATEGORY5       9       /* 35-66     Extra Bits 5+1 */
-#define DCT_VAL_CATEGORY6       10      /* 67+       Extra Bits 13+1 */
-#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */
-#define MAX_ENTROPY_TOKENS      12
-#define FRAME_CONTEXTS_LOG2     2
-#define FRAME_CONTEXTS          (1 << FRAME_CONTEXTS_LOG2)
-
-#define DCT_EOB_MODEL_TOKEN 3 /* EOB Extra Bits 0+0 */
-
-typedef signed char vp9_tree_index;
-
-static const int32_t seg_feature_data_signed[SEG_LVL_MAX] = {1, 1, 0, 0};
-static const int32_t seg_feature_data_max[SEG_LVL_MAX] = {MAXQ, 63, 3, 0};
-
-#define NVDEC_VP9HWPAD(x, y) unsigned char x[y]
-
-typedef struct {
-    /* last bytes of address 41 */
-    unsigned char joints[3];
-    unsigned char sign[2];
-    /* address 42 */
-    unsigned char class0[2][1];
-    unsigned char fp[2][3];
-    unsigned char class0_hp[2];
-    unsigned char hp[2];
-    unsigned char classes[2][10];
-    /* address 43 */
-    unsigned char class0_fp[2][2][3];
-    unsigned char bits[2][10];
-
-} nvdec_nmv_context;
-
-/* Adaptive entropy contexts, padding elements are added to have
- * 256 bit aligned tables for HW access.
- * Compile with TRACE_PROB_TABLES to print bases for each table. */
-typedef struct nvdec_vp9AdaptiveEntropyProbs_s
-{
-    /* address 32 */
-    unsigned char inter_mode_prob[7][4];
-    unsigned char intra_inter_prob[4];
-
-    /* address 33 */
-    unsigned char uv_mode_prob[10][8];
-    unsigned char tx8x8_prob[2][1];
-    unsigned char tx16x16_prob[2][2];
-    unsigned char tx32x32_prob[2][3];
-    unsigned char sb_ymode_probB[4][1];
-    unsigned char sb_ymode_prob[4][8];
-
-    /* address 37 */
-    unsigned char partition_prob[2][16][4];
-
-    /* address 41 */
-    unsigned char uv_mode_probB[10][1];
-    unsigned char switchable_interp_prob[4][2];
-    unsigned char comp_inter_prob[5];
-    unsigned char mbskip_probs[3];
-    NVDEC_VP9HWPAD(pad1, 1);
-
-    nvdec_nmv_context nmvc;
-
-    /* address 44 */
-    unsigned char single_ref_prob[5][2];
-    unsigned char comp_ref_prob[5];
-    NVDEC_VP9HWPAD(pad2, 17);
-
-    /* address 45 */
-    unsigned char probCoeffs[2][2][6][6][4];
-    unsigned char probCoeffs8x8[2][2][6][6][4];
-    unsigned char probCoeffs16x16[2][2][6][6][4];
-    unsigned char probCoeffs32x32[2][2][6][6][4];
-
-} nvdec_vp9AdaptiveEntropyProbs_t;
-
-typedef struct nvdec_vp9EntropyProbs_s
-{
-    /* Default keyframe probs */
-    /* Table formatted for 256b memory, probs 0to7 for all tables followed by
-     * probs 8toN for all tables.
-     * Compile with TRACE_PROB_TABLES to print bases for each table. */
-
-    unsigned char kf_bmode_prob[10][10][8];
-
-    /* Address 25 */
-    unsigned char kf_bmode_probB[10][10][1];
-    unsigned char ref_pred_probs[3];
-    unsigned char mb_segment_tree_probs[7];
-    unsigned char segment_pred_probs[3];
-    unsigned char ref_scores[4];
-    unsigned char prob_comppred[2];
-    NVDEC_VP9HWPAD(pad1, 9);
-
-    /* Address 29 */
-    unsigned char kf_uv_mode_prob[10][8];
-    unsigned char kf_uv_mode_probB[10][1];
-    NVDEC_VP9HWPAD(pad2, 6);
-
-    nvdec_vp9AdaptiveEntropyProbs_t a;    /* Probs with backward adaptation */
-
-
-} nvdec_vp9EntropyProbs_t;
-
-typedef struct {
-    unsigned int joints[4];
-    unsigned int sign[2][2];
-    unsigned int classes[2][11];
-    unsigned int class0[2][2];
-    unsigned int bits[2][10][2];
-    unsigned int class0_fp[2][2][4];
-    unsigned int fp[2][4];
-    unsigned int class0_hp[2][2];
-    unsigned int hp[2][2];
-
-} nvdec_nmv_context_counts;
-
-typedef struct nvdec_vp9EntropyCounts_s
-{
-    unsigned int inter_mode_counts[7][3][2];
-    unsigned int sb_ymode_counts[4][10];
-    unsigned int uv_mode_counts[10][10];
-    unsigned int partition_counts[16][4];
-    unsigned int switchable_interp_counts[4][3];
-    unsigned int intra_inter_count[4][2];
-    unsigned int comp_inter_count[5][2];
-    unsigned int single_ref_count[5][2][2];
-    unsigned int comp_ref_count[5][2];
-    unsigned int tx32x32_count[2][4];
-    unsigned int tx16x16_count[2][3];
-    unsigned int tx8x8_count[2][2];
-    unsigned int mbskip_count[3][2];
-
-    nvdec_nmv_context_counts nmvcount;
-
-    unsigned int countCoeffs[2][2][6][6][4];
-    unsigned int countCoeffs8x8[2][2][6][6][4];
-    unsigned int countCoeffs16x16[2][2][6][6][4];
-    unsigned int countCoeffs32x32[2][2][6][6][4];
-
-    unsigned int countEobs[4][2][2][6][6];
-
-} nvdec_vp9EntropyCounts_t;
-
-// Structure required to update Forward and Backward probabilities
-typedef struct _vp9_prob_update_s
-{
-    nvdec_vp9EntropyProbs_t  *pProbTab;
-    nvdec_vp9EntropyCounts_t *pCtxCounters;
-    unsigned char   keyFrame : 1;
-    unsigned char   prevIsKeyFrame : 1;
-    unsigned char   resolutionChange : 1;
-    unsigned char   errorResilient : 1;
-    unsigned char   prevShowFrame : 1;
-    unsigned char   intraOnly : 1;
-    unsigned char   reserved2 : 2;
-    char            lossless;
-    char            transform_mode;
-    char            allow_high_precision_mv;
-    char            mcomp_filter_type;
-    char            comp_pred_mode;
-    unsigned char   FrameParallelDecoding;
-    unsigned char   RefreshEntropyProbs;
-    uint32_t            resetFrameContext;
-    uint32_t            frameContextIdx;
-    uint32_t            offsetToDctParts;
-    uint32_t            allow_comp_inter_inter;
-    uint32_t            probsDecoded;
-} vp9_prob_update_s;
-
-typedef uint32_t VP9_BD_VALUE;
-
-typedef struct {
-    uint32_t buffer_end;
-    uint32_t buffer;
-    int32_t value;
-    int32_t count;
-    uint32_t range;
-    uint32_t pos;
-} vp9_reader;
-
-const vp9_tree_index vp9_coef_tree[ 22] =     /* corresponding _CONTEXT_NODEs */
-{
-  -DCT_EOB_TOKEN, 2,                             /* 0 = EOB */
-  -ZERO_TOKEN, 4,                               /* 1 = ZERO */
-  -ONE_TOKEN, 6,                               /* 2 = ONE */
-  8, 12,                                      /* 3 = LOW_VAL */
-  -TWO_TOKEN, 10,                            /* 4 = TWO */
-  -THREE_TOKEN, -FOUR_TOKEN,                /* 5 = THREE */
-  14, 16,                                    /* 6 = HIGH_LOW */
-  -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 7 = CAT_ONE */
-  18, 20,                                   /* 8 = CAT_THREEFOUR */
-  -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,  /* 9 = CAT_THREE */
-  -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6   /* 10 = CAT_FIVE */
-};
-
-const vp9_tree_index vp9_coefmodel_tree[6] = {
-  -DCT_EOB_MODEL_TOKEN, 2,                      /* 0 = EOB */
-  -ZERO_TOKEN, 4,                               /* 1 = ZERO */
-  -ONE_TOKEN, -TWO_TOKEN,                       /* 2 = ONE */
-};
+typedef struct _vp9_ref_frames_s {
+    VkPicIf* buffer;
+    StdVideoVP9FrameType frame_type;
+    bool segmentation_enabled;
+} vp9_ref_frames_s;
 
-const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
-  -0, 2,
-  -1, -2
-};
-
-const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
-  -MV_JOINT_ZERO, 2,
-  -MV_JOINT_HNZVZ, 4,
-  -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
-};
-
-const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
-  -0, -1,
-};
-
-const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
-  -MV_CLASS_0, 2,
-  -MV_CLASS_1, 4,
-  6, 8,
-  -MV_CLASS_2, -MV_CLASS_3,
-  10, 12,
-  -MV_CLASS_4, -MV_CLASS_5,
-  -MV_CLASS_6, 14,
-  16, 18,
-  -MV_CLASS_7, -MV_CLASS_8,
-  -MV_CLASS_9, -MV_CLASS_10,
-};
-
-const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
-  -0, 2,
-  -1, 4,
-  -2, -3
-};
-
-static const uint32_t vp9dx_bitreader_norm[256] =
+class VulkanVP9Decoder : public VulkanVideoDecoder
 {
-    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-//*****************************************************************
-//vp9_entropymode.c
-typedef uint8_t vp9_prob;
-//typedef uint8_t vp9_tree_index; // typedef i8 vp9_tree_index
-static const vp9_prob default_kf_uv_probs[VP9_INTRA_MODES]
-                                         [VP9_INTRA_MODES - 1] = {
-  { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
-  { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
-  { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
-  { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
-  { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
-  { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
-  { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
-  { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d27 */,
-  { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
-  { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
-};
-
-static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
-                                        [VP9_INTRA_MODES - 1] = {
-  {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* block_size < 8x8 */,
-  { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* block_size < 16x16 */,
-  { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* block_size < 32x32 */,
-  { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* block_size >= 32x32 */
-};
-
-static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
-                                         [VP9_INTRA_MODES - 1] = {
-  { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
-  {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
-  {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
-  {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
-  {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
-  {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
-  {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
-  {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d27 */,
-  {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
-  { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
-};
-
-static const uint8_t vp9_default_inter_mode_prob[INTER_MODE_CONTEXTS][4] = {
-  {2,       173,   34,   0},  // 0 = both zero mv
-  {7,       145,   85,   0},  // 1 = one zero mv + one a predicted mv
-  {7,       166,   63,   0},  // 2 = two predicted mvs
-  {7,       94,    66,   0},  // 3 = one predicted/zero and one new mv
-  {8,       64,    46,   0},  // 4 = two new mvs
-  {17,      81,    31,   0},  // 5 = one intra neighbour + x
-  {25,      29,    30,   0},  // 6 = two intra neighbours
-};
-static const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
-                                  [PARTITION_TYPES] = { /* 1 byte padding */
-  { /* frame_type = keyframe */
-    /* 8x8 -> 4x4 */
-    { 158,  97,  94, 0 } /* a/l both not split */,
-    {  93,  24,  99, 0 } /* a split, l not split */,
-    {  85, 119,  44, 0 } /* l split, a not split */,
-    {  62,  59,  67, 0 } /* a/l both split */,
-    /* 16x16 -> 8x8 */
-    { 149,  53,  53, 0 } /* a/l both not split */,
-    {  94,  20,  48, 0 } /* a split, l not split */,
-    {  83,  53,  24, 0 } /* l split, a not split */,
-    {  52,  18,  18, 0 } /* a/l both split */,
-    /* 32x32 -> 16x16 */
-    { 150,  40,  39, 0 } /* a/l both not split */,
-    {  78,  12,  26, 0 } /* a split, l not split */,
-    {  67,  33,  11, 0 } /* l split, a not split */,
-    {  24,   7,   5, 0 } /* a/l both split */,
-    /* 64x64 -> 32x32 */
-    { 174,  35,  49, 0 } /* a/l both not split */,
-    {  68,  11,  27, 0 } /* a split, l not split */,
-    {  57,  15,   9, 0 } /* l split, a not split */,
-    {  12,   3,   3, 0 } /* a/l both split */
-  }, { /* frame_type = interframe */
-    /* 8x8 -> 4x4 */
-    { 199, 122, 141, 0 } /* a/l both not split */,
-    { 147,  63, 159, 0 } /* a split, l not split */,
-    { 148, 133, 118, 0 } /* l split, a not split */,
-    { 121, 104, 114, 0 } /* a/l both split */,
-    /* 16x16 -> 8x8 */
-    { 174,  73,  87, 0 } /* a/l both not split */,
-    {  92,  41,  83, 0 } /* a split, l not split */,
-    {  82,  99,  50, 0 } /* l split, a not split */,
-    {  53,  39,  39, 0 } /* a/l both split */,
-    /* 32x32 -> 16x16 */
-    { 177,  58,  59, 0 } /* a/l both not split */,
-    {  68,  26,  63, 0 } /* a split, l not split */,
-    {  52,  79,  25, 0 } /* l split, a not split */,
-    {  17,  14,  12, 0 } /* a/l both split */,
-    /* 64x64 -> 32x32 */
-    { 222,  34,  30, 0 } /* a/l both not split */,
-    {  72,  16,  44, 0 } /* a split, l not split */,
-    {  58,  32,  12, 0 } /* l split, a not split */,
-    {  10,   7,   6, 0 } /* a/l both split */
-  }
-};
-static const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
-  -DC_PRED, 2,                      // 0 = DC_NODE
-  -TM_PRED, 4,                      // 1 = TM_NODE
-  -V_PRED, 6,                       // 2 = V_NODE
-  8, 12,                            // 3 = COM_NODE
-  -H_PRED, 10,                      // 4 = H_NODE
-  -D135_PRED, -D117_PRED,           // 5 = D135_NODE
-  -D45_PRED, 14,                    // 6 = D45_NODE
-  -D63_PRED, 16,                    // 7 = D63_NODE
-  -D153_PRED, -D27_PRED             // 8 = D153_NODE
-};
-
-static const vp9_tree_index vp9_partition_tree[6] = {
-  -PARTITION_NONE, 2,
-  -PARTITION_HORZ, 4,
-  -PARTITION_VERT, -PARTITION_SPLIT
-};
-
-static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
-  9, 102, 187, 225
-};
-
-static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
-  239, 183, 119,  96,  41
-};
-
-static const vp9_prob default_comp_ref_p[REF_CONTEXTS] = {
-  50, 126, 123, 221, 226
-};
-
-static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = {
-  {  33,  16 },
-  {  77,  74 },
-  { 142, 142 },
-  { 172, 170 },
-  { 238, 247 }
-};
-
-static const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
-                                          [VP9_SWITCHABLE_FILTERS-1] = {
-  { 235, 162, },
-  { 36, 255, },
-  { 34, 3, },
-  { 149, 144, },
-};
-static const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS]
-                                          [TX_SIZE_MAX_SB - 1] = {
-  { 3, 136, 37, },
-  { 5, 52, 13, },
-};
-static const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS]
-                                          [TX_SIZE_MAX_SB - 2] = {
-  { 20, 152, },
-  { 15, 101, },
-};
-static const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS]
-                                        [TX_SIZE_MAX_SB - 3] = {
-  { 100, },
-  { 66, },
-};
-static const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = {  //its C0..shud be f8??
-  192, 128, 64
-};
-
-static const nvdec_nmv_context vp9_default_nmv_context = {
-  {32, 64, 96}, /* joints */
-  {128, 128},   /* sign */
-  {{216},{208}},                                            /* class0 */
-  {{64, 96, 64},{64, 96, 64}},                              /* fp */
-  {160,160},                                                /* class0_hp bit */
-  {128,128},                                                /* hp */
-  {{224, 144, 192, 168, 192, 176, 192, 198, 198, 245},
-   {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}},     /* class */
-  {{{128, 128, 64}, {96, 112, 64}},
-   {{128, 128, 64}, {96, 112, 64}}},                        /* class0_fp */
-  {{136, 140, 148, 160, 176, 192, 224, 234, 234, 240},
-   {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}},     /* bits */
-};
+protected:
+    VkParserVp9PictureData m_PicData;
 
-static const int32_t vp9_seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
-static const int32_t vp9_seg_feature_data_max[SEG_LVL_MAX] = { 255, 63, 3, 0 };
-typedef uint8_t vp9_coeff_probs[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES];
+    VkPicIf*      m_pCurrPic;
+    VkPicIf*      m_pOutFrame[VP9_MAX_NUM_SPATIAL_LAYERS];
 
-static const vp9_coeff_probs default_coef_probs_4x4[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 195,  29, 183 },
-        {  84,  49, 136 },
-        {   8,  42,  71 }
-      }, { /* Coeff Band 1 */
-        {  31, 107, 169 },
-        {  35,  99, 159 },
-        {  17,  82, 140 },
-        {   8,  66, 114 },
-        {   2,  44,  76 },
-        {   1,  19,  32 }
-      }, { /* Coeff Band 2 */
-        {  40, 132, 201 },
-        {  29, 114, 187 },
-        {  13,  91, 157 },
-        {   7,  75, 127 },
-        {   3,  58,  95 },
-        {   1,  28,  47 }
-      }, { /* Coeff Band 3 */
-        {  69, 142, 221 },
-        {  42, 122, 201 },
-        {  15,  91, 159 },
-        {   6,  67, 121 },
-        {   1,  42,  77 },
-        {   1,  17,  31 }
-      }, { /* Coeff Band 4 */
-        { 102, 148, 228 },
-        {  67, 117, 204 },
-        {  17,  82, 154 },
-        {   6,  59, 114 },
-        {   2,  39,  75 },
-        {   1,  15,  29 }
-      }, { /* Coeff Band 5 */
-        { 156,  57, 233 },
-        { 119,  57, 212 },
-        {  58,  48, 163 },
-        {  29,  40, 124 },
-        {  12,  30,  81 },
-        {   3,  12,  31 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 191, 107, 226 },
-        { 124, 117, 204 },
-        {  25,  99, 155 }
-      }, { /* Coeff Band 1 */
-        {  29, 148, 210 },
-        {  37, 126, 194 },
-        {   8,  93, 157 },
-        {   2,  68, 118 },
-        {   1,  39,  69 },
-        {   1,  17,  33 }
-      }, { /* Coeff Band 2 */
-        {  41, 151, 213 },
-        {  27, 123, 193 },
-        {   3,  82, 144 },
-        {   1,  58, 105 },
-        {   1,  32,  60 },
-        {   1,  13,  26 }
-      }, { /* Coeff Band 3 */
-        {  59, 159, 220 },
-        {  23, 126, 198 },
-        {   4,  88, 151 },
-        {   1,  66, 114 },
-        {   1,  38,  71 },
-        {   1,  18,  34 }
-      }, { /* Coeff Band 4 */
-        { 114, 136, 232 },
-        {  51, 114, 207 },
-        {  11,  83, 155 },
-        {   3,  56, 105 },
-        {   1,  33,  65 },
-        {   1,  17,  34 }
-      }, { /* Coeff Band 5 */
-        { 149,  65, 234 },
-        { 121,  57, 215 },
-        {  61,  49, 166 },
-        {  28,  36, 114 },
-        {  12,  25,  76 },
-        {   3,  16,  42 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 214,  49, 220 },
-        { 132,  63, 188 },
-        {  42,  65, 137 }
-      }, { /* Coeff Band 1 */
-        {  85, 137, 221 },
-        { 104, 131, 216 },
-        {  49, 111, 192 },
-        {  21,  87, 155 },
-        {   2,  49,  87 },
-        {   1,  16,  28 }
-      }, { /* Coeff Band 2 */
-        {  89, 163, 230 },
-        {  90, 137, 220 },
-        {  29, 100, 183 },
-        {  10,  70, 135 },
-        {   2,  42,  81 },
-        {   1,  17,  33 }
-      }, { /* Coeff Band 3 */
-        { 108, 167, 237 },
-        {  55, 133, 222 },
-        {  15,  97, 179 },
-        {   4,  72, 135 },
-        {   1,  45,  85 },
-        {   1,  19,  38 }
-      }, { /* Coeff Band 4 */
-        { 124, 146, 240 },
-        {  66, 124, 224 },
-        {  17,  88, 175 },
-        {   4,  58, 122 },
-        {   1,  36,  75 },
-        {   1,  18,  37 }
-      }, { /* Coeff Band 5 */
-        { 141,  79, 241 },
-        { 126,  70, 227 },
-        {  66,  58, 182 },
-        {  30,  44, 136 },
-        {  12,  34,  96 },
-        {   2,  20,  47 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 229,  99, 249 },
-        { 143, 111, 235 },
-        {  46, 109, 192 }
-      }, { /* Coeff Band 1 */
-        {  82, 158, 236 },
-        {  94, 146, 224 },
-        {  25, 117, 191 },
-        {   9,  87, 149 },
-        {   3,  56,  99 },
-        {   1,  33,  57 }
-      }, { /* Coeff Band 2 */
-        {  83, 167, 237 },
-        {  68, 145, 222 },
-        {  10, 103, 177 },
-        {   2,  72, 131 },
-        {   1,  41,  79 },
-        {   1,  20,  39 }
-      }, { /* Coeff Band 3 */
-        {  99, 167, 239 },
-        {  47, 141, 224 },
-        {  10, 104, 178 },
-        {   2,  73, 133 },
-        {   1,  44,  85 },
-        {   1,  22,  47 }
-      }, { /* Coeff Band 4 */
-        { 127, 145, 243 },
-        {  71, 129, 228 },
-        {  17,  93, 177 },
-        {   3,  61, 124 },
-        {   1,  41,  84 },
-        {   1,  21,  52 }
-      }, { /* Coeff Band 5 */
-        { 157,  78, 244 },
-        { 140,  72, 231 },
-        {  69,  58, 184 },
-        {  31,  44, 137 },
-        {  14,  38, 105 },
-        {   8,  23,  61 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs default_coef_probs_8x8[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 125,  34, 187 },
-        {  52,  41, 133 },
-        {   6,  31,  56 }
-      }, { /* Coeff Band 1 */
-        {  37, 109, 153 },
-        {  51, 102, 147 },
-        {  23,  87, 128 },
-        {   8,  67, 101 },
-        {   1,  41,  63 },
-        {   1,  19,  29 }
-      }, { /* Coeff Band 2 */
-        {  31, 154, 185 },
-        {  17, 127, 175 },
-        {   6,  96, 145 },
-        {   2,  73, 114 },
-        {   1,  51,  82 },
-        {   1,  28,  45 }
-      }, { /* Coeff Band 3 */
-        {  23, 163, 200 },
-        {  10, 131, 185 },
-        {   2,  93, 148 },
-        {   1,  67, 111 },
-        {   1,  41,  69 },
-        {   1,  14,  24 }
-      }, { /* Coeff Band 4 */
-        {  29, 176, 217 },
-        {  12, 145, 201 },
-        {   3, 101, 156 },
-        {   1,  69, 111 },
-        {   1,  39,  63 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 5 */
-        {  57, 192, 233 },
-        {  25, 154, 215 },
-        {   6, 109, 167 },
-        {   3,  78, 118 },
-        {   1,  48,  69 },
-        {   1,  21,  29 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 202, 105, 245 },
-        { 108, 106, 216 },
-        {  18,  90, 144 }
-      }, { /* Coeff Band 1 */
-        {  33, 172, 219 },
-        {  64, 149, 206 },
-        {  14, 117, 177 },
-        {   5,  90, 141 },
-        {   2,  61,  95 },
-        {   1,  37,  57 }
-      }, { /* Coeff Band 2 */
-        {  33, 179, 220 },
-        {  11, 140, 198 },
-        {   1,  89, 148 },
-        {   1,  60, 104 },
-        {   1,  33,  57 },
-        {   1,  12,  21 }
-      }, { /* Coeff Band 3 */
-        {  30, 181, 221 },
-        {   8, 141, 198 },
-        {   1,  87, 145 },
-        {   1,  58, 100 },
-        {   1,  31,  55 },
-        {   1,  12,  20 }
-      }, { /* Coeff Band 4 */
-        {  32, 186, 224 },
-        {   7, 142, 198 },
-        {   1,  86, 143 },
-        {   1,  58, 100 },
-        {   1,  31,  55 },
-        {   1,  12,  22 }
-      }, { /* Coeff Band 5 */
-        {  57, 192, 227 },
-        {  20, 143, 204 },
-        {   3,  96, 154 },
-        {   1,  68, 112 },
-        {   1,  42,  69 },
-        {   1,  19,  32 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 212,  35, 215 },
-        { 113,  47, 169 },
-        {  29,  48, 105 }
-      }, { /* Coeff Band 1 */
-        {  74, 129, 203 },
-        { 106, 120, 203 },
-        {  49, 107, 178 },
-        {  19,  84, 144 },
-        {   4,  50,  84 },
-        {   1,  15,  25 }
-      }, { /* Coeff Band 2 */
-        {  71, 172, 217 },
-        {  44, 141, 209 },
-        {  15, 102, 173 },
-        {   6,  76, 133 },
-        {   2,  51,  89 },
-        {   1,  24,  42 }
-      }, { /* Coeff Band 3 */
-        {  64, 185, 231 },
-        {  31, 148, 216 },
-        {   8, 103, 175 },
-        {   3,  74, 131 },
-        {   1,  46,  81 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 4 */
-        {  65, 196, 235 },
-        {  25, 157, 221 },
-        {   5, 105, 174 },
-        {   1,  67, 120 },
-        {   1,  38,  69 },
-        {   1,  15,  30 }
-      }, { /* Coeff Band 5 */
-        {  65, 204, 238 },
-        {  30, 156, 224 },
-        {   7, 107, 177 },
-        {   2,  70, 124 },
-        {   1,  42,  73 },
-        {   1,  18,  34 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 225,  86, 251 },
-        { 144, 104, 235 },
-        {  42,  99, 181 }
-      }, { /* Coeff Band 1 */
-        {  85, 175, 239 },
-        { 112, 165, 229 },
-        {  29, 136, 200 },
-        {  12, 103, 162 },
-        {   6,  77, 123 },
-        {   2,  53,  84 }
-      }, { /* Coeff Band 2 */
-        {  75, 183, 239 },
-        {  30, 155, 221 },
-        {   3, 106, 171 },
-        {   1,  74, 128 },
-        {   1,  44,  76 },
-        {   1,  17,  28 }
-      }, { /* Coeff Band 3 */
-        {  73, 185, 240 },
-        {  27, 159, 222 },
-        {   2, 107, 172 },
-        {   1,  75, 127 },
-        {   1,  42,  73 },
-        {   1,  17,  29 }
-      }, { /* Coeff Band 4 */
-        {  62, 190, 238 },
-        {  21, 159, 222 },
-        {   2, 107, 172 },
-        {   1,  72, 122 },
-        {   1,  40,  71 },
-        {   1,  18,  32 }
-      }, { /* Coeff Band 5 */
-        {  61, 199, 240 },
-        {  27, 161, 226 },
-        {   4, 113, 180 },
-        {   1,  76, 129 },
-        {   1,  46,  80 },
-        {   1,  23,  41 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs default_coef_probs_16x16[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        {   7,  27, 153 },
-        {   5,  30,  95 },
-        {   1,  16,  30 }
-      }, { /* Coeff Band 1 */
-        {  50,  75, 127 },
-        {  57,  75, 124 },
-        {  27,  67, 108 },
-        {  10,  54,  86 },
-        {   1,  33,  52 },
-        {   1,  12,  18 }
-      }, { /* Coeff Band 2 */
-        {  43, 125, 151 },
-        {  26, 108, 148 },
-        {   7,  83, 122 },
-        {   2,  59,  89 },
-        {   1,  38,  60 },
-        {   1,  17,  27 }
-      }, { /* Coeff Band 3 */
-        {  23, 144, 163 },
-        {  13, 112, 154 },
-        {   2,  75, 117 },
-        {   1,  50,  81 },
-        {   1,  31,  51 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 4 */
-        {  18, 162, 185 },
-        {   6, 123, 171 },
-        {   1,  78, 125 },
-        {   1,  51,  86 },
-        {   1,  31,  54 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 5 */
-        {  15, 199, 227 },
-        {   3, 150, 204 },
-        {   1,  91, 146 },
-        {   1,  55,  95 },
-        {   1,  30,  53 },
-        {   1,  11,  20 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        {  19,  55, 240 },
-        {  19,  59, 196 },
-        {   3,  52, 105 }
-      }, { /* Coeff Band 1 */
-        {  41, 166, 207 },
-        { 104, 153, 199 },
-        {  31, 123, 181 },
-        {  14, 101, 152 },
-        {   5,  72, 106 },
-        {   1,  36,  52 }
-      }, { /* Coeff Band 2 */
-        {  35, 176, 211 },
-        {  12, 131, 190 },
-        {   2,  88, 144 },
-        {   1,  60, 101 },
-        {   1,  36,  60 },
-        {   1,  16,  28 }
-      }, { /* Coeff Band 3 */
-        {  28, 183, 213 },
-        {   8, 134, 191 },
-        {   1,  86, 142 },
-        {   1,  56,  96 },
-        {   1,  30,  53 },
-        {   1,  12,  20 }
-      }, { /* Coeff Band 4 */
-        {  20, 190, 215 },
-        {   4, 135, 192 },
-        {   1,  84, 139 },
-        {   1,  53,  91 },
-        {   1,  28,  49 },
-        {   1,  11,  20 }
-      }, { /* Coeff Band 5 */
-        {  13, 196, 216 },
-        {   2, 137, 192 },
-        {   1,  86, 143 },
-        {   1,  57,  99 },
-        {   1,  32,  56 },
-        {   1,  13,  24 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 211,  29, 217 },
-        {  96,  47, 156 },
-        {  22,  43,  87 }
-      }, { /* Coeff Band 1 */
-        {  78, 120, 193 },
-        { 111, 116, 186 },
-        {  46, 102, 164 },
-        {  15,  80, 128 },
-        {   2,  49,  76 },
-        {   1,  18,  28 }
-      }, { /* Coeff Band 2 */
-        {  71, 161, 203 },
-        {  42, 132, 192 },
-        {  10,  98, 150 },
-        {   3,  69, 109 },
-        {   1,  44,  70 },
-        {   1,  18,  29 }
-      }, { /* Coeff Band 3 */
-        {  57, 186, 211 },
-        {  30, 140, 196 },
-        {   4,  93, 146 },
-        {   1,  62, 102 },
-        {   1,  38,  65 },
-        {   1,  16,  27 }
-      }, { /* Coeff Band 4 */
-        {  47, 199, 217 },
-        {  14, 145, 196 },
-        {   1,  88, 142 },
-        {   1,  57,  98 },
-        {   1,  36,  62 },
-        {   1,  15,  26 }
-      }, { /* Coeff Band 5 */
-        {  26, 219, 229 },
-        {   5, 155, 207 },
-        {   1,  94, 151 },
-        {   1,  60, 104 },
-        {   1,  36,  62 },
-        {   1,  16,  28 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 233,  29, 248 },
-        { 146,  47, 220 },
-        {  43,  52, 140 }
-      }, { /* Coeff Band 1 */
-        { 100, 163, 232 },
-        { 179, 161, 222 },
-        {  63, 142, 204 },
-        {  37, 113, 174 },
-        {  26,  89, 137 },
-        {  18,  68,  97 }
-      }, { /* Coeff Band 2 */
-        {  85, 181, 230 },
-        {  32, 146, 209 },
-        {   7, 100, 164 },
-        {   3,  71, 121 },
-        {   1,  45,  77 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 3 */
-        {  65, 187, 230 },
-        {  20, 148, 207 },
-        {   2,  97, 159 },
-        {   1,  68, 116 },
-        {   1,  40,  70 },
-        {   1,  14,  29 }
-      }, { /* Coeff Band 4 */
-        {  40, 194, 227 },
-        {   8, 147, 204 },
-        {   1,  94, 155 },
-        {   1,  65, 112 },
-        {   1,  39,  66 },
-        {   1,  14,  26 }
-      }, { /* Coeff Band 5 */
-        {  16, 208, 228 },
-        {   3, 151, 207 },
-        {   1,  98, 160 },
-        {   1,  67, 117 },
-        {   1,  41,  74 },
-        {   1,  17,  31 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs default_coef_probs_32x32[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        {  17,  38, 140 },
-        {   7,  34,  80 },
-        {   1,  17,  29 }
-      }, { /* Coeff Band 1 */
-        {  37,  75, 128 },
-        {  41,  76, 128 },
-        {  26,  66, 116 },
-        {  12,  52,  94 },
-        {   2,  32,  55 },
-        {   1,  10,  16 }
-      }, { /* Coeff Band 2 */
-        {  50, 127, 154 },
-        {  37, 109, 152 },
-        {  16,  82, 121 },
-        {   5,  59,  85 },
-        {   1,  35,  54 },
-        {   1,  13,  20 }
-      }, { /* Coeff Band 3 */
-        {  40, 142, 167 },
-        {  17, 110, 157 },
-        {   2,  71, 112 },
-        {   1,  44,  72 },
-        {   1,  27,  45 },
-        {   1,  11,  17 }
-      }, { /* Coeff Band 4 */
-        {  30, 175, 188 },
-        {   9, 124, 169 },
-        {   1,  74, 116 },
-        {   1,  48,  78 },
-        {   1,  30,  49 },
-        {   1,  11,  18 }
-      }, { /* Coeff Band 5 */
-        {  10, 222, 223 },
-        {   2, 150, 194 },
-        {   1,  83, 128 },
-        {   1,  48,  79 },
-        {   1,  27,  45 },
-        {   1,  11,  17 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        {  36,  41, 235 },
-        {  29,  36, 193 },
-        {  10,  27, 111 }
-      }, { /* Coeff Band 1 */
-        {  85, 165, 222 },
-        { 177, 162, 215 },
-        { 110, 135, 195 },
-        {  57, 113, 168 },
-        {  23,  83, 120 },
-        {  10,  49,  61 }
-      }, { /* Coeff Band 2 */
-        {  85, 190, 223 },
-        {  36, 139, 200 },
-        {   5,  90, 146 },
-        {   1,  60, 103 },
-        {   1,  38,  65 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 3 */
-        {  72, 202, 223 },
-        {  23, 141, 199 },
-        {   2,  86, 140 },
-        {   1,  56,  97 },
-        {   1,  36,  61 },
-        {   1,  16,  27 }
-      }, { /* Coeff Band 4 */
-        {  55, 218, 225 },
-        {  13, 145, 200 },
-        {   1,  86, 141 },
-        {   1,  57,  99 },
-        {   1,  35,  61 },
-        {   1,  13,  22 }
-      }, { /* Coeff Band 5 */
-        {  15, 235, 212 },
-        {   1, 132, 184 },
-        {   1,  84, 139 },
-        {   1,  57,  97 },
-        {   1,  34,  56 },
-        {   1,  14,  23 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 181,  21, 201 },
-        {  61,  37, 123 },
-        {  10,  38,  71 }
-      }, { /* Coeff Band 1 */
-        {  47, 106, 172 },
-        {  95, 104, 173 },
-        {  42,  93, 159 },
-        {  18,  77, 131 },
-        {   4,  50,  81 },
-        {   1,  17,  23 }
-      }, { /* Coeff Band 2 */
-        {  62, 147, 199 },
-        {  44, 130, 189 },
-        {  28, 102, 154 },
-        {  18,  75, 115 },
-        {   2,  44,  65 },
-        {   1,  12,  19 }
-      }, { /* Coeff Band 3 */
-        {  55, 153, 210 },
-        {  24, 130, 194 },
-        {   3,  93, 146 },
-        {   1,  61,  97 },
-        {   1,  31,  50 },
-        {   1,  10,  16 }
-      }, { /* Coeff Band 4 */
-        {  49, 186, 223 },
-        {  17, 148, 204 },
-        {   1,  96, 142 },
-        {   1,  53,  83 },
-        {   1,  26,  44 },
-        {   1,  11,  17 }
-      }, { /* Coeff Band 5 */
-        {  13, 217, 212 },
-        {   2, 136, 180 },
-        {   1,  78, 124 },
-        {   1,  50,  83 },
-        {   1,  29,  49 },
-        {   1,  14,  23 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 197,  13, 247 },
-        {  82,  17, 222 },
-        {  25,  17, 162 }
-      }, { /* Coeff Band 1 */
-        { 126, 186, 247 },
-        { 234, 191, 243 },
-        { 176, 177, 234 },
-        { 104, 158, 220 },
-        {  66, 128, 186 },
-        {  55,  90, 137 }
-      }, { /* Coeff Band 2 */
-        { 111, 197, 242 },
-        {  46, 158, 219 },
-        {   9, 104, 171 },
-        {   2,  65, 125 },
-        {   1,  44,  80 },
-        {   1,  17,  91 }
-      }, { /* Coeff Band 3 */
-        { 104, 208, 245 },
-        {  39, 168, 224 },
-        {   3, 109, 162 },
-        {   1,  79, 124 },
-        {   1,  50, 102 },
-        {   1,  43, 102 }
-      }, { /* Coeff Band 4 */
-        {  84, 220, 246 },
-        {  31, 177, 231 },
-        {   2, 115, 180 },
-        {   1,  79, 134 },
-        {   1,  55,  77 },
-        {   1,  60,  79 }
-      }, { /* Coeff Band 5 */
-        {  43, 243, 240 },
-        {   8, 180, 217 },
-        {   1, 115, 166 },
-        {   1,  84, 121 },
-        {   1,  51,  67 },
-        {   1,  16,   6 }
-      }
-    }
-  }
-};
+    int           m_frameIdx;
+    int           m_dataSize;
+    int           m_frameSize;
+    bool          m_frameSizeChanged;
 
-static const uint8_t vp9_kf_default_bmode_probs[VP9_INTRA_MODES]
-                                   [VP9_INTRA_MODES]
-                                   [VP9_INTRA_MODES-1] = {
-  { /* above = dc */
-    { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
-    {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
-    {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
-    {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
-    {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
-    {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
-    {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
-    {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d27 */,
-    {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
-    {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
-  }, { /* above = v */
-    {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
-    {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
-    {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
-    {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
-    {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
-    {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
-    {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
-    {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d27 */,
-    {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
-    {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
-  }, { /* above = h */
-    {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
-    {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
-    {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
-    {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
-    {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
-    {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
-    {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
-    {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d27 */,
-    {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
-    {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
-  }, { /* above = d45 */
-    { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
-    {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
-    {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
-    {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
-    {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
-    {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
-    {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
-    {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d27 */,
-    {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
-    {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
-  }, { /* above = d135 */
-    {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
-    {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
-    {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
-    {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
-    {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
-    {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
-    {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
-    {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d27 */,
-    {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
-    {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
-  }, { /* above = d117 */
-    {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
-    {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
-    {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
-    {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
-    {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
-    {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
-    {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
-    {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d27 */,
-    {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
-    {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
-  }, { /* above = d153 */
-    {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
-    {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
-    {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
-    {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
-    {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
-    {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
-    {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
-    {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d27 */,
-    {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
-    {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
-  }, { /* above = d27 */
-    {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
-    {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
-    {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
-    {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
-    {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
-    {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
-    {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
-    {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d27 */,
-    {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
-    {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
-  }, { /* above = d63 */
-    {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
-    {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
-    {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
-    {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
-    {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
-    {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
-    {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
-    {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d27 */,
-    {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
-    {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
-  }, { /* above = tm */
-    {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
-    {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
-    {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
-    {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
-    {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
-    {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
-    {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
-    {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d27 */,
-    {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
-    {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
-  }
-};
+    int           m_rtOrigWidth;
+    int           m_rtOrigHeight;
+    bool          m_pictureStarted;
+    bool          m_bitstreamComplete;
 
-class VulkanVP9Decoder : public VulkanVideoDecoder
-{
-protected:
-    vp9_reader                      reader;
-    nvdec_vp9EntropyProbs_t         m_EntropyLast[FRAME_CONTEXTS];
-    nvdec_vp9AdaptiveEntropyProbs_t m_PrevCtx;
-    const unsigned char*            m_pCompressedHeader;
+    // Parsing state for compute_image_size() side effects
+    int           m_lastFrameWidth;
+    int           m_lastFrameHeight;
+    bool          m_lastShowFrame;
 
-    void vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup);
-    vp9_prob weighted_prob(int32_t prob1, int32_t prob2, int32_t factor);
-    vp9_prob clip_prob(uint32_t p);
-    vp9_prob get_prob(uint32_t num, uint32_t den);
-    vp9_prob get_binary_prob(uint32_t n0, uint32_t n1);
-    uint32_t convert_distribution(uint32_t i,
-                            const vp9_tree_index * tree,
-                            uint8_t probs[],
-                            uint32_t branch_ct[][2],
-                            const uint32_t num_events[],
-                            uint32_t tok0_offset);
-    void vp9_tree_probs_from_distribution(const vp9_tree_index* tree,
-                                        uint8_t probs          [ /* n-1 */ ],
-                                        uint32_t branch_ct       [ /* n-1 */ ] [2],
-                                        const uint32_t num_events[ /* n */ ],
-                                        uint32_t tok0_offset);
-    void update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1],
-                        uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS],
-                        int32_t count_sat, int32_t update_factor);
-    void adaptCoefProbs(vp9_prob_update_s *pProbSetup);
-    int32_t update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2]);
-    int32_t update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2]);
-    void update_mode_probs(int32_t n_modes,
-                            const vp9_tree_index *tree, uint32_t *cnt,
-                            vp9_prob *pre_probs, vp9_prob *pre_probsB,
-                            vp9_prob *dst_probs, vp9_prob *dst_probsB,
-                            uint32_t tok0_offset);
-    void tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p, uint32_t (*ct_32x32p)[2]);
-    void tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p, uint32_t (*ct_16x16p)[2]);
-    void tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p, uint32_t (*ct_8x8p)[2]);
-    void adaptModeProbs(vp9_prob_update_s *pProbSetup);
-    void adaptModeContext(vp9_prob_update_s *pProbSetup);
-    uint32_t adapt_probs(uint32_t i,
-                         const signed char* tree,
-                         vp9_prob this_probs[],
-                         const vp9_prob last_probs[],
-                         const uint32_t num_events[]);
-    void adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2]);
-    void adaptNmvProbs(vp9_prob_update_s *pProbSetup);
+    // Last used loop filter parameters
+    int8_t        m_loopFilterRefDeltas[STD_VIDEO_VP9_MAX_REF_FRAMES];
+    int8_t        m_loopFilterModeDeltas[STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS];
 
+    vp9_ref_frames_s m_pBuffers[VP9_BUFFER_POOL_MAX_SIZE];
+ 
 protected:
-    void vp9_reader_fill();
-    int32_t vp9_reader_init (uint32_t size);
-    int32_t vp9_read_bit();
-    int32_t vp9_read(int32_t probability);
-    int32_t vp9_read_literal(int32_t bits);
-    uint32_t ParseCompressedVP9();
-    int32_t get_unsigned_bits(uint32_t num_values);
-    uint32_t swGetBitsUnsignedMax( uint32_t maxValue);
-    vp9_prob vp9hwdReadProbDiffUpdate(uint8_t oldp);
-    int32_t vp9_inv_recenter_nonneg(int32_t v, int32_t m);
-    int32_t inv_remap_prob(int32_t v, int32_t m);
-    int32_t merge_index(int32_t v, int32_t n, int32_t modulus);
-    uint32_t BoolDecodeUniform(uint32_t n);
-    uint32_t vp9hwdDecodeSubExp(uint32_t k, uint32_t num_syms);
-    uint32_t vp9hwdDecodeCoeffUpdate(uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1]);
-    uint32_t vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup);
-    void update_nmv(vp9_prob *const p, const vp9_prob upd_p);
+    void UpdateFramePointers(VkPicIf* currentPicture);
+    bool AddBuffertoOutputQueue(VkPicIf* pDispPic);
+    void AddBuffertoDispQueue(VkPicIf* pDispPic);
+    virtual void lEndPicture(VkPicIf* pDispPic);
+    void EndOfStream() override;
 
 public:
     VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std);
-    void ResetProbs(vp9_prob_update_s *pProbSetup);
-    void GetProbs(vp9_prob_update_s *pProbSetup);
-    uint32_t UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header);
-    void UpdateBackwardProbability(vp9_prob_update_s *pProbSetup);
+    ~VulkanVP9Decoder();
 
     // TODO: Need to implement these functions.
-    bool                    IsPictureBoundary(int32_t) { return true; };
-    int32_t                 ParseNalUnit() { return NALU_UNKNOWN; };
+    bool                    IsPictureBoundary(int32_t) override { return true; };
+    int32_t                 ParseNalUnit() override { return NALU_UNKNOWN; };
     bool                    DecodePicture(VkParserPictureData *) { return false; };
-    void                    InitParser() {}
-    bool                    BeginPicture(VkParserPictureData *) { return false; }
-    void                    CreatePrivateContext() {}
-    void                    FreeContext() {}
+    void                    InitParser() override;
+    bool                    BeginPicture(VkParserPictureData *) override;
+    void                    CreatePrivateContext() override {}
+    void                    FreeContext() override {}
+
+private:
+    bool                    ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBtes) override;
+    bool                    ParseFrameHeader(uint32_t framesize);
+    bool                    ParseUncompressedHeader();
+    bool                    ParseColorConfig();
+    void                    ParseFrameAndRenderSize();
+    void                    ParseFrameAndRenderSizeWithRefs();
+    void                    ComputeImageSize();
+    void                    ParseLoopFilterParams();
+    void                    ParseQuantizationParams();
+    int32_t                 ReadDeltaQ();
+    void                    ParseSegmentationParams();
+    uint8_t                 CalcMinLog2TileCols();
+    uint8_t                 CalcMaxLog2TileCols();
+    void                    ParseTileInfo();
+    void                    ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t sizes[8], uint32_t* count);
+
 };
 
 #endif // _VP9_PROBMANAGER_H_
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp
index 99452952..701e4c07 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp
@@ -20,1044 +20,889 @@
 
 VulkanVP9Decoder::VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std)
     : VulkanVideoDecoder(std)
-{
-    memset(&m_EntropyLast, 0, sizeof(m_EntropyLast));
-    memset(&m_PrevCtx, 0, sizeof(m_PrevCtx));
-    memset(&reader, 0, sizeof(vp9_reader));
-    m_pCompressedHeader = NULL;
-}
-void VulkanVP9Decoder::vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup)
-{
-    uint32_t i, j;
-
-    for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
-    {
-        for (j = 0; j < 8; j++)
-            pProbSetup->pProbTab->a.sb_ymode_prob[i][j] = default_if_y_probs[i][j];
-        pProbSetup->pProbTab->a.sb_ymode_probB[i][0] = default_if_y_probs[i][8];
-    }
-
-    for (i = 0; i < VP9_INTRA_MODES; i++)
-    {
-        for (j = 0; j < 8; j++)
-            pProbSetup->pProbTab->kf_uv_mode_prob[i][j] = default_kf_uv_probs[i][j];
-        pProbSetup->pProbTab->kf_uv_mode_probB[i][0] = default_kf_uv_probs[i][8];
-
-        for (j = 0; j < 8; j++)
-            pProbSetup->pProbTab->a.uv_mode_prob[i][j] = default_if_uv_probs[i][j];
-        pProbSetup->pProbTab->a.uv_mode_probB[i][0] = default_if_uv_probs[i][8];
-    }
-
-    memcpy(pProbSetup->pProbTab->a.switchable_interp_prob, vp9_switchable_interp_prob,
-             sizeof(vp9_switchable_interp_prob));
-    memcpy(pProbSetup->pProbTab->a.partition_prob, vp9_partition_probs,
-             sizeof(vp9_partition_probs));
-    memcpy(pProbSetup->pProbTab->a.intra_inter_prob, default_intra_inter_p,
-             sizeof(default_intra_inter_p));
-    memcpy(pProbSetup->pProbTab->a.comp_inter_prob, default_comp_inter_p,
-             sizeof(default_comp_inter_p));
-    memcpy(pProbSetup->pProbTab->a.comp_ref_prob, default_comp_ref_p,
-             sizeof(default_comp_ref_p));
-    memcpy(pProbSetup->pProbTab->a.single_ref_prob, default_single_ref_p,
-             sizeof(default_single_ref_p));
-    memcpy(pProbSetup->pProbTab->a.tx32x32_prob, vp9_default_tx_probs_32x32p,
-             sizeof(vp9_default_tx_probs_32x32p));
-    memcpy(pProbSetup->pProbTab->a.tx16x16_prob, vp9_default_tx_probs_16x16p,
-             sizeof(vp9_default_tx_probs_16x16p));
-    memcpy(pProbSetup->pProbTab->a.tx8x8_prob, vp9_default_tx_probs_8x8p,
-             sizeof(vp9_default_tx_probs_8x8p));
-    memcpy(pProbSetup->pProbTab->a.mbskip_probs, vp9_default_mbskip_probs,
-             sizeof(vp9_default_mbskip_probs));
-
-    for (i = 0; i < VP9_INTRA_MODES; i++)
-    {
-        for (j = 0; j < VP9_INTRA_MODES; j++)
-        {
-            memcpy(pProbSetup->pProbTab->kf_bmode_prob[i][j], vp9_kf_default_bmode_probs[i][j], 8);
-            pProbSetup->pProbTab->kf_bmode_probB[i][j][0] = vp9_kf_default_bmode_probs[i][j][8];
-        }
-    }
+    , m_PicData()
+    , m_pCurrPic()
+    , m_frameIdx(-1)
+    , m_dataSize()
+    , m_frameSize()
+    , m_frameSizeChanged()
+    , m_rtOrigWidth()
+    , m_rtOrigHeight()
+    , m_pictureStarted()
+    , m_bitstreamComplete(true)
+    , m_lastFrameWidth(0)
+    , m_lastFrameHeight(0)
+    , m_lastShowFrame(false)
+    , m_pBuffers() {
 }
 
-void VulkanVP9Decoder::ResetProbs(vp9_prob_update_s *pProbSetup)
+VulkanVP9Decoder::~VulkanVP9Decoder()
 {
-    //reset segmentMap (buffers going to HWIF_SEGMENT_READ_BASE_LSB and HWIF_SEGMENT_WRITE_BASE_LSB)
-
-    uint32_t i, j, k, l, m;
-
-    memcpy(pProbSetup->pProbTab->a.inter_mode_prob, vp9_default_inter_mode_prob, sizeof(vp9_default_inter_mode_prob));
-    vp9_init_mbmode_probs(pProbSetup);
-    memcpy(&pProbSetup->pProbTab->a.nmvc, &vp9_default_nmv_context, sizeof(nvdec_nmv_context));
-
-    /* Copy the default probs into two separate prob tables: part1 and part2. */
-
-    for( i = 0; i < VP9_BLOCK_TYPES; i++ ) {
-        for ( j = 0; j < VP9_REF_TYPES; j++ ) {
-            for ( k = 0; k < VP9_COEF_BANDS; k++ ) {
-                for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ ) {
-                    if (l >= 3 && k == 0)
-                        continue;
-
-                    for ( m = 0; m < UNCONSTRAINED_NODES; m++ ) {
-                        pProbSetup->pProbTab->a.probCoeffs[i][j][k][l][m] =
-                            default_coef_probs_4x4[i][j][k][l][m];
-                        pProbSetup->pProbTab->a.probCoeffs8x8[i][j][k][l][m] =
-                            default_coef_probs_8x8[i][j][k][l][m];
-                        pProbSetup->pProbTab->a.probCoeffs16x16[i][j][k][l][m] =
-                            default_coef_probs_16x16[i][j][k][l][m];
-                        pProbSetup->pProbTab->a.probCoeffs32x32[i][j][k][l][m] =
-                            default_coef_probs_32x32[i][j][k][l][m];
-                    }
-                }
-            }
-        }
-    }
-
-    /* Store the default probs for all saved contexts */
-    if (pProbSetup->keyFrame || pProbSetup->errorResilient || pProbSetup->resetFrameContext == 3)
-    {
-        for (i = 0; i < FRAME_CONTEXTS; i++)
-            memcpy( &m_EntropyLast[i], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t));
-    }
-    else if (pProbSetup->resetFrameContext == 2)
-        memcpy( &m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t));
 }
 
-void VulkanVP9Decoder::GetProbs(vp9_prob_update_s *pProbSetup)
+void VulkanVP9Decoder::InitParser()
 {
-    memcpy(pProbSetup->pProbTab, &m_EntropyLast[pProbSetup->frameContextIdx], sizeof(m_EntropyLast[pProbSetup->frameContextIdx]));
+    m_bNoStartCodes = true;
+    m_bEmulBytesPresent = false;
+    m_pCurrPic = nullptr;
+    m_bitstreamComplete = true;
+    m_pictureStarted = false;
+    EndOfStream();
 }
 
-/////////////////////////////////////////////////////////////////////////////////
-
-
-void VulkanVP9Decoder::vp9_reader_fill()
+void VulkanVP9Decoder::EndOfStream()
 {
-    vp9_reader *r = &reader;
-    uint32_t buffer_end = r->buffer_end;
-    uint32_t buffer = r->buffer;
-    VP9_BD_VALUE value = r->value;
-    int32_t count = r->count;
-    int32_t shift = BD_VALUE_SIZE - 8 - (count + 8);
-    int32_t loop_end = 0;
-    const int32_t bits_left = (int32_t)((buffer_end - buffer)*CHAR_BIT);
-    const int32_t x = shift + CHAR_BIT - bits_left;
-    if (x >= 0) {
-        count += LOTS_OF_BITS;
-        loop_end = x;
+    if (m_pCurrPic) {
+        m_pCurrPic->Release();
+        m_pCurrPic = nullptr;
     }
-    if (x < 0 || bits_left)
-    {
-        while (shift >= loop_end)
-        {
-            count += CHAR_BIT;
-            uint8_t temp = m_pCompressedHeader[r->pos++]; //u( 8);
-            value |= (VP9_BD_VALUE)temp << shift;
-            shift -= CHAR_BIT;
-            buffer++;
+    for (int i = 0; i < 8; i++) {
+        if (m_pBuffers[i].buffer) {
+            m_pBuffers[i].buffer->Release();
+            m_pBuffers[i].buffer = nullptr;
         }
     }
-    r->buffer = buffer;
-    r->value = value;
-    r->count = count;
-}
-
-int32_t VulkanVP9Decoder::vp9_reader_init(uint32_t size)
-{
-    int32_t marker_bit = 0;
-    vp9_reader *r = &reader;
-    r->buffer_end = 0 + size;
-    r->buffer = 0;
-    r->value = 0;
-    r->count = -8;
-    r->range = 255;
-    r->pos = 0;
-
-    vp9_reader_fill();
-    marker_bit = vp9_read_bit();
-    return marker_bit != 0;
 }
 
-int32_t VulkanVP9Decoder::vp9_read_bit()
+bool VulkanVP9Decoder::ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBytes)
 {
-    return vp9_read( 128);
-}
+    const uint8_t* pDataIn = (uint8_t*)pck->pByteStream;
+    int dataSize = (int)pck->nDataLength;
 
-int32_t VulkanVP9Decoder::vp9_read(int32_t probability)
-{
-
-    vp9_reader *br = &reader;
-    uint32_t bit = 0;
-    VP9_BD_VALUE value;
-    VP9_BD_VALUE bigsplit;
-    int32_t count;
-    uint32_t range;
-    uint32_t split = 1 + (((br->range - 1) * probability) >> 8);
-    if (br->count < 0)
-        vp9_reader_fill();
-    value = br->value;
-    count = br->count;
-    bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8);
-
-    range = split;
-    if (value >= bigsplit)
-    {
-        range = br->range - split;
-        value = value - bigsplit;
-        bit = 1;
+    if (pParsedBytes) {
+        *pParsedBytes = 0;
     }
-    uint32_t shift = vp9dx_bitreader_norm[range];
-    range <<= shift;
-    value <<= shift;
-    count -= shift;
-    br->value = value;
-    br->count = count;
-    br->range = range;
-    return bit;
-}
-
-int32_t VulkanVP9Decoder::vp9_read_literal( int32_t bits)
-{
-    int32_t z = 0, bit;
 
-    for (bit = bits - 1; bit >= 0; bit--)
-    {
-        z |= vp9_read_bit() << bit;
+    // Use different bitstreamBuffer than the previous frames bitstreamBuffer
+    // TODO: Make sure that the bitstreamBuffer is not in use.
+    VkSharedBaseObj<VulkanBitstreamBuffer> bitstreamBuffer;
+    assert(m_pClient);
+    m_pClient->GetBitstreamBuffer(m_bitstreamDataLen,
+                                  m_bufferOffsetAlignment, m_bufferSizeAlignment,
+                                  nullptr, 0, bitstreamBuffer);
+    assert(bitstreamBuffer);
+    if (!bitstreamBuffer) {
+        return false;
     }
-    return z;
-}
-////////////////////////////////////////////////////////////////////////////////////
-//Forward Update
-uint32_t VulkanVP9Decoder::UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header)
-{
-    nvdec_vp9EntropyProbs_t *fc = pProbSetup->pProbTab; // Frame context
-
-    uint32_t tmp, i, j, k;
+    m_bitstreamDataLen = m_bitstreamData.SetBitstreamBuffer(bitstreamBuffer);
+    m_bitstreamData.ResetStreamMarkers();
 
-    m_pCompressedHeader = pCompressed_Header;
-    m_PrevCtx = pProbSetup->pProbTab->a;
-
-    if (vp9_reader_init(pProbSetup->offsetToDctParts) != 0)
-    {
-        return NOK;
-    }
-
-    if (pProbSetup->lossless)
-        pProbSetup->transform_mode = ONLY_4X4;
-    else
-    {
-        pProbSetup->transform_mode = vp9_read_literal( 2);
-        if (pProbSetup->transform_mode == ALLOW_32X32)
-            pProbSetup->transform_mode += vp9_read_literal( 1);
-        if (pProbSetup->transform_mode == TX_MODE_SELECT)
-        {
-             for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-             {
-                for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
-                {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.tx8x8_prob[i][j];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
-            }
-            for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-            {
-                for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.tx16x16_prob[i][j];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
-            }
-            for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-            {
-                for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.tx32x32_prob[i][j];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
-            }
-        }
+    if (m_bitstreamData.GetBitstreamBuffer() == nullptr) {
+        // make sure we're initialized
+        return false;
     }
 
-    // Coefficient probability update
-    tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs);
+    m_nCallbackEventCount = 0;
 
-    if( tmp != OK ) return (tmp);
-    if (pProbSetup->transform_mode > ONLY_4X4) {
-        tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs8x8);
-        if( tmp != OK ) return (tmp);
-    }
-    if (pProbSetup->transform_mode > ALLOW_8X8) {
-        tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs16x16);
-        if( tmp != OK ) return (tmp);
-    }
-    if (pProbSetup->transform_mode > ALLOW_16X16) {
-        tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs32x32);
-        if( tmp != OK ) return (tmp);
+    // Handle discontinuity
+    if (pck->bDiscontinuity) {
+        memset(&m_nalu, 0, sizeof(m_nalu));
+        memset(&m_PTSQueue, 0, sizeof(m_PTSQueue));
+        m_bDiscontinuityReported = true;
+        m_pictureStarted = false;
     }
 
-    pProbSetup->probsDecoded = 1;
+    if (pck->bPTSValid) {
+        m_PTSQueue[m_lPTSPos].bPTSValid = true;
+        m_PTSQueue[m_lPTSPos].llPTS = pck->llPTS;
+        m_PTSQueue[m_lPTSPos].llPTSPos = m_llParsedBytes;
+        m_PTSQueue[m_lPTSPos].bDiscontinuity = m_bDiscontinuityReported;
+        m_bDiscontinuityReported = false;
+        m_lPTSPos = (m_lPTSPos + 1) % MAX_QUEUED_PTS;
+    }
 
-    for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
-        tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-        if (tmp) {
-            fc->a.mbskip_probs[k] = vp9hwdReadProbDiffUpdate( fc->a.mbskip_probs[k]);
-        }
+    if (pck->pByteStream && pck->nDataLength && m_frameIdx == -1) {
+        memset(&m_PicData, 0, sizeof(VkParserVp9PictureData));
+        m_frameIdx++;
     }
 
-    if(!pProbSetup->keyFrame)
-    {
-        for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
-            for (j = 0; j < VP9_INTER_MODES - 1; j++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.inter_mode_prob[i][j];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
-                }
+    while ((dataSize > 0) || m_pictureStarted) {
+        if (!m_pictureStarted) {
+            if (m_bitstreamComplete) {
+                // fill bitstreambuffer from start
+                //  assuming parser will get bitstream per frame from demuxer
+                m_frameSize = dataSize;
+                m_nalu.start_offset = 0;
+                m_nalu.end_offset = 0;
             }
-        }
-        if (pProbSetup->mcomp_filter_type == SWITCHABLE) {
-            for (j = 0; j < VP9_SWITCHABLE_FILTERS+1; ++j) {
-                for (i = 0; i < VP9_SWITCHABLE_FILTERS-1; ++i) {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.switchable_interp_prob[j][i];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
+            if (((VkDeviceSize)dataSize > m_bitstreamDataLen) && !resizeBitstreamBuffer(dataSize - m_bitstreamDataLen)) {
+                return false;
             }
-        }
 
-        for (i = 0; i < INTRA_INTER_CONTEXTS; i++) {
-            tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-            if (tmp) {
-                uint8_t *prob = &fc->a.intra_inter_prob[i];
-                *prob = vp9hwdReadProbDiffUpdate( *prob);
+            if (dataSize >= (m_frameSize - m_nalu.end_offset)) {
+                memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, m_frameSize - m_nalu.end_offset);
+                m_pictureStarted = true;
+                pDataIn += (m_frameSize - (int)m_nalu.end_offset);
+                dataSize -= (m_frameSize - (int)m_nalu.end_offset);
+                m_nalu.end_offset = m_frameSize;
+                m_bitstreamComplete = true;
+            } else {
+                memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, dataSize);
+                m_nalu.end_offset += dataSize;
+                pDataIn += dataSize;
+                dataSize = 0;
+                m_bitstreamComplete = false;
             }
-        }
-
-        // Compound prediction mode probabilities
-        if (pProbSetup->allow_comp_inter_inter) {
-            tmp = vp9_read_literal( 1);
-            pProbSetup->comp_pred_mode = tmp;
-            if(tmp) {
-                tmp = vp9_read_literal( 1);
-                pProbSetup->comp_pred_mode += tmp;
-                if (pProbSetup->comp_pred_mode == HYBRID_PREDICTION)
-                {
-                    for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-                    {
-                        tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                        if (tmp) {
-                            uint8_t *prob = &fc->a.comp_inter_prob[i];
-                            *prob = vp9hwdReadProbDiffUpdate( *prob);
+        } else {
+            uint32_t frames_processed = 0;
+            uint32_t sizeparsed = 0, framesdone = 0;
+
+            uint32_t frame_size = m_frameSize;
+
+            const uint8_t* data_start = m_bitstreamData.GetBitstreamPtr();
+            const uint8_t* data_end = data_start + m_frameSize;
+            uint32_t data_size = m_frameSize;
+            uint32_t frames_in_superframe, frame_sizes[8];
+
+            ParseSuperFrameIndex(data_start, data_size, frame_sizes, &frames_in_superframe);
+
+            do {
+                // Skip over the superframe index, if present
+                if ((data_size > 0) && ((data_start[0] & 0xe0) == 0xc0)) {
+                    const uint8_t marker = data_start[0];
+                    const uint32_t frames = (marker & 0x7) + 1;
+                    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+                    const uint32_t index_sz = 2 + mag * frames;
+
+                    if ((data_size >= index_sz) && (data_start[index_sz - 1] == marker)) {
+                        data_start += index_sz;
+                        data_size -= index_sz;
+                        if (data_start < data_end) {
+                            continue;
+                        } else {
+                            break;
                         }
                     }
                 }
-            }
-        } else {
-            pProbSetup->comp_pred_mode = SINGLE_PREDICTION_ONLY;
-        }
 
-        if (pProbSetup->comp_pred_mode != COMP_PREDICTION_ONLY) {
-            for (i = 0; i < REF_CONTEXTS; i++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.single_ref_prob[i][0];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
-                }
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.single_ref_prob[i][1];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
-                }
-            }
-        }
+                // Use the correct size for this frame, if an index is present
+                if (frames_in_superframe > 0) {
+                    frame_size = frame_sizes[frames_processed];
+                    if (data_size < frame_size) {
+                        // Invalid frame size in index
+                        return false;
+                    }
+                    data_size = frame_size;
+                    m_nalu.start_offset = sizeparsed;
 
-        if (pProbSetup->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
-            for (i = 0; i < REF_CONTEXTS; i++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.comp_ref_prob[i];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
                 }
-            }
-        }
 
-        // Superblock intra luma pred mode probabilities
-        for(j = 0 ; j < BLOCK_SIZE_GROUPS; ++j)
-        {
-            for( i = 0 ; i < 8; ++i ) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    fc->a.sb_ymode_prob[j][i] = vp9hwdReadProbDiffUpdate(
-                            fc->a.sb_ymode_prob[j][i]);
-                }
-            }
-            tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-            if (tmp) {
-                fc->a.sb_ymode_probB[j][0] = vp9hwdReadProbDiffUpdate(
-                        fc->a.sb_ymode_probB[j][0]);
-            }
-        }
+                ParseFrameHeader(frame_size);
 
-        for (j = 0; j < NUM_PARTITION_CONTEXTS; j++) {
-            for (i = 0; i < PARTITION_TYPES - 1; i++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.partition_prob[INTER_FRAME][j][i];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
+                if (frames_in_superframe > 0) {
+                    sizeparsed += frame_sizes[framesdone];
+                    framesdone++;
                 }
-            }
+                data_start += data_size;
+                while (data_start < data_end && *data_start == 0) {
+                    data_start++;
+                }
+
+                data_size = (int)(data_end - data_start);
+                frames_processed += 1;
+            } while (data_start < data_end);
+
+            m_frameIdx++;
+            m_pictureStarted = false;
         }
 
-        // Motion vector tree update
-        tmp = vp9hwdDecodeMvUpdate(pProbSetup);
-        if( tmp != OK )
-            return (tmp);
     }
 
-    return (OK);
-}
+    if (pck->bEOS) {
+        end_of_stream();
+    }
 
-void VulkanVP9Decoder::update_nmv( vp9_prob *const p, const vp9_prob upd_p)
-{
-    uint32_t tmp = vp9_read( upd_p);
-    if (tmp) {
-#if 1 //def LOW_PRECISION_MV_UPDATE
-        *p = (vp9_read_literal( 7) << 1) | 1;
-#else
-        *p = vp9_read_literal( 8);
-#endif
+    if (pParsedBytes) {
+        *pParsedBytes = pck->nDataLength;
     }
+
+    return true;
 }
 
-uint32_t VulkanVP9Decoder::vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup)
+
+bool VulkanVP9Decoder::ParseFrameHeader(uint32_t framesize)
 {
-    uint32_t i, j, k;
-    nvdec_nmv_context *mvctx = &pProbSetup->pProbTab->a.nmvc;
+    m_llNaluStartLocation = m_llParsedBytes;
+    m_llFrameStartLocation = m_llNaluStartLocation;
+    m_llParsedBytes += framesize;
+    //m_pSliceOffsets[0] = 0;
 
-#if 0
-    tmp = vp9_read_literal( 1);
-    if (!tmp) return HANTRO_OK;
-#endif
+    init_dbits();
+    //parse uncompressed header
+    if(!ParseUncompressedHeader())
+    {
+        assert((!"Error in ParseUncompressedVP9\n"));
+        return 0;
+    }
+    if (m_PicData.show_existing_frame == true)  {
+        // display an existing frame
+        VkPicIf* pDispPic = m_pBuffers[m_PicData.frame_to_show_map_idx].buffer;
+        if (pDispPic) {
+            pDispPic->AddRef();
+        }
+
+        AddBuffertoOutputQueue(pDispPic);
 
-    for (j = 0; j < MV_JOINTS - 1; ++j) {
-      update_nmv( &mvctx->joints[j],
-                 VP9_NMV_UPDATE_PROB);
+        return 0;
     }
-    for (i = 0; i < 2; ++i) {
-      update_nmv( &mvctx->sign[i], VP9_NMV_UPDATE_PROB);
-      for (j = 0; j < MV_CLASSES - 1; ++j) {
-        update_nmv( &mvctx->classes[i][j], VP9_NMV_UPDATE_PROB);
-      }
-      for (j = 0; j < CLASS0_SIZE - 1; ++j) {
-        update_nmv( &mvctx->class0[i][j], VP9_NMV_UPDATE_PROB);
-      }
-      for (j = 0; j < MV_OFFSET_BITS; ++j) {
-        update_nmv( &mvctx->bits[i][j], VP9_NMV_UPDATE_PROB);
-      }
+
+    // handle bitstream start offset alignment (for super frame)
+    uint32_t addOffset = m_nalu.start_offset & (m_bufferOffsetAlignment - 1);
+    m_PicData.uncompressedHeaderOffset += addOffset;
+    m_PicData.compressedHeaderOffset += addOffset;
+    m_PicData.tilesOffset += addOffset;
+
+    *m_pVkPictureData = VkParserPictureData();
+    m_pVkPictureData->CodecSpecific.vp9 = m_PicData;
+    m_pVkPictureData->numSlices = m_PicData.numTiles;
+    m_pVkPictureData->bitstreamDataLen = (framesize + addOffset + m_bufferSizeAlignment - 1) & ~(m_bufferSizeAlignment - 1); // buffer is already aligned so, no issues.
+    m_pVkPictureData->bitstreamData = m_bitstreamData.GetBitstreamBuffer();
+    m_pVkPictureData->bitstreamDataOffset = m_nalu.start_offset & ~((int64_t)m_bufferOffsetAlignment - 1);
+
+    if (!BeginPicture(m_pVkPictureData)) {
+        assert(!"BeginPicture failed");
+        return false;
     }
 
-    for (i = 0; i < 2; ++i) {
-      for (j = 0; j < CLASS0_SIZE; ++j) {
-        for (k = 0; k < 3; ++k)
-          update_nmv( &mvctx->class0_fp[i][j][k], VP9_NMV_UPDATE_PROB);
-      }
-      for (j = 0; j < 3; ++j) {
-        update_nmv( &mvctx->fp[i][j], VP9_NMV_UPDATE_PROB);
-      }
+    bool bSkipped = false;
+    if (m_pClient != nullptr) {
+        // Notify client
+        if (!m_pClient->DecodePicture(m_pVkPictureData)) {
+            bSkipped = true;
+            // WARNING: skipped decoding current picture;
+        } else {
+            m_nCallbackEventCount++;
+        }
+    } else {
+        // WARNING: no valid render target for current picture
     }
 
-    if (pProbSetup->allow_high_precision_mv) {
-      for (i = 0; i < 2; ++i) {
-        update_nmv( &mvctx->class0_hp[i], VP9_NMV_UPDATE_PROB);
-        update_nmv( &mvctx->hp[i], VP9_NMV_UPDATE_PROB);
-      }
+    //m_PicData.prevIsKeyFrame = m_PicData.keyFrame;
+    //m_PicData.PrevShowFrame  = m_PicData.showFrame;
+    UpdateFramePointers(m_pCurrPic);
+
+    if (m_PicData.stdPictureInfo.flags.show_frame && !bSkipped) {
+        // Call back codec for post-decode event (display the decoded frame)
+        AddBuffertoOutputQueue(m_pCurrPic);
+        m_pCurrPic = nullptr;
+    } else {
+        m_pCurrPic->Release();
+        m_pCurrPic = nullptr;
     }
 
-    return (OK);
+    return 1;
 }
 
-uint32_t  VulkanVP9Decoder::vp9hwdDecodeCoeffUpdate(
-        uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1])
+void VulkanVP9Decoder::UpdateFramePointers(VkPicIf* currentPicture)
 {
-    uint32_t i, j, k, l, m;
-    uint32_t tmp;
-    tmp = vp9_read_literal( 1);
-    if (!tmp) return OK;
-    for( i = 0; i < VP9_BLOCK_TYPES; i++ )
-    {
-        for ( j = 0; j < VP9_REF_TYPES; j++ )
-        {
-            for ( k = 0; k < VP9_COEF_BANDS; k++ )
-            {
-                for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ )
-                {
-                    if (l >= 3 && k == 0)
-                        continue;
-
-                    for ( m = 0; m < UNCONSTRAINED_NODES; m++ )
-                    {
-                        tmp = vp9_read( 252);
-                        CHECK_END_OF_STREAM(tmp);
-                        if ( tmp )
-                        {
-                            uint8_t old, latest;
-                            old = probCoeffs[i][j][k][l][m];
-                            latest = vp9hwdReadProbDiffUpdate( old);
-                            CHECK_END_OF_STREAM(tmp);
-
-                            probCoeffs[i][j][k][l][m] = latest;
-                        }
-                    }
-                }
+    StdVideoDecodeVP9PictureInfo* const pStdPicInfo = &m_PicData.stdPictureInfo;
+
+    uint32_t mask, ref_index = 0;
+
+    for (mask = pStdPicInfo->refresh_frame_flags; mask; mask >>= 1) {
+        if (mask & 1) {
+            if (m_pBuffers[ref_index].buffer) {
+                m_pBuffers[ref_index].buffer->Release();
+            }
+            m_pBuffers[ref_index].buffer = currentPicture;
+
+            if (m_pBuffers[ref_index].buffer) {
+                m_pBuffers[ref_index].buffer->AddRef();
             }
         }
+        ++ref_index;
     }
-    return (OK);
-}
 
-int32_t VulkanVP9Decoder::get_unsigned_bits(uint32_t num_values)
-{
-    int32_t cat = 0;
-    if (num_values <= 1)
-        return 0;
-    num_values--;
-    while(num_values > 0)
-    {
-        cat++;
-        num_values >>= 1;
-    }
-    return cat;
+    // Invalidate these references until the next frame starts.
+    //for (int i = 0; i < ALLOWED_REFS_PER_FRAME; i++) {
+    //    pFrameInfo->activeRefIdx[i] = 0xffff;
+    //}
 }
 
-uint32_t VulkanVP9Decoder::BoolDecodeUniform( uint32_t n)
+bool VulkanVP9Decoder::AddBuffertoOutputQueue(VkPicIf* pDispPic)
 {
-    int32_t value, v;
-    int32_t l = get_unsigned_bits(n);
-    int32_t m = (1 << l) - n;
-    if (!l) return 0;
-    value = vp9_read_literal( l - 1);
-    if (value >= m) {
-        v = vp9_read_literal( 1);
-        value = (value << 1) - m + v;
-    }
-    return value;
+    AddBuffertoDispQueue(pDispPic);
+    lEndPicture(pDispPic);
+
+    return true;
 }
 
-uint32_t VulkanVP9Decoder::vp9hwdDecodeSubExp( uint32_t k, uint32_t num_syms)
+void VulkanVP9Decoder::AddBuffertoDispQueue(VkPicIf* pDispPic)
 {
-    uint32_t i=0, mk=0, value=0;
-    while (1) {
-        int32_t b = (i ? k + i - 1 : k);
-        uint32_t a = (1 << b);
-        if (num_syms <= mk + 3 * a) {
-            value = BoolDecodeUniform( num_syms - mk) + mk;
+    int lDisp = 0;
+
+    // Find an entry in m_DispInfo
+    for (int i = 0; i < MAX_DELAY; i++) {
+        if (m_DispInfo[i].pPicBuf == pDispPic) {
+            lDisp = i;
             break;
-        } else {
-            value = vp9_read_bit();
-            if (value) {
-                i++;
-                mk += a;
-            } else {
-                value = vp9_read_literal( b) + mk;
-                break;
-            }
+        }
+        if ((m_DispInfo[i].pPicBuf == nullptr)
+            || ((m_DispInfo[lDisp].pPicBuf != nullptr) && (m_DispInfo[i].llPTS - m_DispInfo[lDisp].llPTS < 0))) {
+            lDisp = i;
         }
     }
-    return value;
-}
+    m_DispInfo[lDisp].pPicBuf = pDispPic;
+    m_DispInfo[lDisp].bSkipped = false;
+    m_DispInfo[lDisp].lPOC = 0;
+    m_DispInfo[lDisp].lNumFields = 2;
 
-int32_t VulkanVP9Decoder::merge_index(int32_t v, int32_t n, int32_t modulus)
-{
-    int32_t max1 = (n - 1 - modulus / 2) / modulus + 1;
-    if (v < max1) v = v * modulus + modulus / 2;
-    else
-    {
-        int32_t w;
-        v -= max1;
-        w = v;
-        v += (v + modulus - modulus / 2) / modulus;
-        while (v % modulus == modulus / 2 ||
-            w != v - (v + modulus - modulus / 2) / modulus) v++;
+    // Find a PTS in the list
+    unsigned int ndx = m_lPTSPos;
+    m_DispInfo[lDisp].llPTS = m_llExpectedPTS; // Will be updated later on
+
+    for (int k = 0; k < MAX_QUEUED_PTS; k++) {
+        if ((m_PTSQueue[ndx].bPTSValid) && (m_PTSQueue[ndx].llPTSPos - m_llFrameStartLocation <= (m_bNoStartCodes?0:3))) {
+            m_DispInfo[lDisp].bPTSValid = true;
+            m_DispInfo[lDisp].llPTS = m_PTSQueue[ndx].llPTS;
+            m_PTSQueue[ndx].bPTSValid = false;
+        }
+        ndx = (ndx + 1) % MAX_QUEUED_PTS;
     }
-    return v;
 }
 
-int32_t VulkanVP9Decoder::vp9_inv_recenter_nonneg(int32_t v, int32_t m)
+void VulkanVP9Decoder::lEndPicture(VkPicIf* pDispPic)
 {
-    if (v > (m << 1)) return v;
-    else if ((v & 1) == 0) return (v >> 1) + m;
-    else return m - ((v + 1) >> 1);
-}
+    if (pDispPic) {
+        display_picture(pDispPic);
+        pDispPic->Release();
+    }
 
-int32_t VulkanVP9Decoder::inv_remap_prob(int32_t v, int32_t m)
-{
-    const int32_t n = 255;
-    v = merge_index(v, n - 1, MODULUS_PARAM);
-    m--;
-    if ((m << 1) <= n)
-        return 1 + vp9_inv_recenter_nonneg(v + 1, m);
-    else
-        return n - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
 }
 
-vp9_prob VulkanVP9Decoder::vp9hwdReadProbDiffUpdate( uint8_t oldp)
+
+bool VulkanVP9Decoder::ParseUncompressedHeader()
 {
-    int32_t p;
-    int32_t delp = vp9hwdDecodeSubExp( 4, 255 );
-    p = (vp9_prob)inv_remap_prob(delp, oldp);
-    return p;
-}
+    VkParserVp9PictureData *pPicData = &m_PicData;
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig;
+    StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter;
+    m_frameSizeChanged = false;
 
-//Backward update
+    VP9_CHECK_FRAME_MARKER;
 
+    uint32_t profile = u(1);
+    profile |= u(1) << 1;
+    pStdPicInfo->profile = (StdVideoVP9Profile)profile;
+    if (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3) {
+        if (u(1) != 0) {
+            assert(!"Invalid syntax");
+            return false;
+        }
+    }
 
-// this function assumes prob1 and prob2 are already within [1,255] range
-vp9_prob VulkanVP9Decoder::weighted_prob(int32_t prob1, int32_t prob2, int32_t factor)
-{
-    return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
-}
+    pPicData->show_existing_frame = u(1);
+    if (pPicData->show_existing_frame) {
+        pPicData->frame_to_show_map_idx = u(3);
+        //U32 frame_to_show = vp9parser->m_pBuffers[idx_to_show];
+        //Handle direct show:   CHECK
+        pPicData->uncompressedHeaderOffset = (consumed_bits() + 7) >> 3;
+        pPicData->compressedHeaderSize = 0;
+        pStdPicInfo->refresh_frame_flags = 0;
+        pStdLoopFilter->loop_filter_level = 0;
+        return true;
+    }
 
-vp9_prob VulkanVP9Decoder::clip_prob(uint32_t p)
-{
-    return (vp9_prob)((p > 255) ? 255u : (p < 1) ? 1u : p);
-}
+    pStdPicInfo->frame_type = (StdVideoVP9FrameType)u(1);
+    pStdPicInfo->flags.show_frame = u(1);
+    pStdPicInfo->flags.error_resilient_mode = u(1);
 
-vp9_prob VulkanVP9Decoder::get_prob(uint32_t num, uint32_t den)
-{
-    return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den);
-}
+    if (pStdPicInfo->frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY) {
+        VP9_CHECK_FRAME_SYNC_CODE;
+        ParseColorConfig();
+        ParseFrameAndRenderSize();
+        pStdPicInfo->refresh_frame_flags = (1 << STD_VIDEO_VP9_NUM_REF_FRAMES) - 1;
+        pPicData->FrameIsIntra = true;
 
-vp9_prob VulkanVP9Decoder::get_binary_prob(uint32_t n0, uint32_t n1)
-{
-    return get_prob(n0, n0 + n1);
-}
+        for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) {
+            pPicData->ref_frame_idx[i] = 0;
+        }
+    } else { // non key frame
+        pStdPicInfo->flags.intra_only = pStdPicInfo->flags.show_frame ? 0 : u(1);
+        pPicData->FrameIsIntra = pStdPicInfo->flags.intra_only;
+        pStdPicInfo->reset_frame_context = pStdPicInfo->flags.error_resilient_mode ? 0 : u(2);
+
+        if (pStdPicInfo->flags.intra_only == 1) {
+            VP9_CHECK_FRAME_SYNC_CODE;
+            if (pStdPicInfo->profile > STD_VIDEO_VP9_PROFILE_0) {
+                ParseColorConfig();
+            } else {
+                pStdColorConfig->color_space = STD_VIDEO_VP9_COLOR_SPACE_BT_601;
+                pStdColorConfig->subsampling_x = 1;
+                pStdColorConfig->subsampling_y = 1;
+                pStdColorConfig->BitDepth = 8;
+            }
 
-uint32_t VulkanVP9Decoder::convert_distribution(uint32_t i,
-                            const vp9_tree_index * tree,
-                            vp9_prob probs[],
-                            uint32_t branch_ct[][2],
-                            const uint32_t num_events[],
-                            uint32_t tok0_offset)
-{
-    uint32_t left, right;
+            pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES); //for non key frame refresh only some
 
-    if (tree[i] <= 0)
-    {
-        left = num_events[-tree[i] - tok0_offset];
+            ParseFrameAndRenderSize();
+        } else { // inter frame
+            pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES);
+
+            pStdPicInfo->ref_frame_sign_bias_mask = 0;
+            for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+                pPicData->ref_frame_idx[i] = u(3);
+                pStdPicInfo->ref_frame_sign_bias_mask |= (u(1) << (STD_VIDEO_VP9_REFERENCE_NAME_LAST_FRAME + i));
+            }
+
+            ParseFrameAndRenderSizeWithRefs();
+
+            pStdPicInfo->flags.allow_high_precision_mv = u(1);
+
+            // interpolation filter
+            bool is_filter_switchable = u(1); //mb_switchable_mcomp_filt
+            if (is_filter_switchable) {
+                pStdPicInfo->interpolation_filter = STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE;
+            } else {
+                const StdVideoVP9InterpolationFilter literal_to_filter[] = {
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH,
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP,
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP,
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR };
+                pStdPicInfo->interpolation_filter = literal_to_filter[u(2)];
+            }
+        }
     }
-    else
-    {
-        left = convert_distribution(tree[i], tree, probs, branch_ct, num_events, tok0_offset);
+
+    if (pStdPicInfo->flags.error_resilient_mode == 0) {
+         /* Refresh entropy probs,
+         * 0 == this frame probs are used only for this frame decoding,
+         * 1 == this frame probs will be stored for future reference */
+        pStdPicInfo->flags.refresh_frame_context = u(1);
+        pStdPicInfo->flags.frame_parallel_decoding_mode = u(1);
+    } else {
+        pStdPicInfo->flags.refresh_frame_context = 0;
+        pStdPicInfo->flags.frame_parallel_decoding_mode = 1;
     }
-    if (tree[i + 1] <= 0)
-    {
-        right = num_events[-tree[i + 1] - tok0_offset];
+
+    pStdPicInfo->frame_context_idx = u(2);
+
+    if ((pPicData->FrameIsIntra == 1) || (pStdPicInfo->flags.error_resilient_mode == 1)) {
+        StdVideoVP9Segmentation* pStdSegment = &pPicData->stdSegmentation;
+        ///* Clear all previous segment data */
+        memset(pStdSegment->FeatureEnabled, 0, sizeof(pStdSegment->FeatureEnabled));
+        memset(pStdSegment->FeatureData, 0, sizeof(pStdSegment->FeatureData));
+        pStdPicInfo->frame_context_idx = 0;
     }
-    else
-    {
-        right = convert_distribution(tree[i + 1], tree, probs, branch_ct, num_events, tok0_offset);
+
+    ParseLoopFilterParams();
+    ParseQuantizationParams();
+    ParseSegmentationParams();
+    ParseTileInfo();
+
+    pPicData->compressedHeaderSize = u(16);
+
+    pPicData->uncompressedHeaderOffset = 0;
+    pPicData->compressedHeaderOffset = (consumed_bits() + 7) >> 3;
+    pPicData->tilesOffset = pPicData->compressedHeaderOffset + pPicData->compressedHeaderSize;
+
+    pPicData->ChromaFormat = (pStdColorConfig->subsampling_x == 1) && (pStdColorConfig->subsampling_y == 1) ? 1 : 0;
+    assert(pPicData->ChromaFormat); // TODO: support only YUV420
+
+    return true;
+}
+
+bool VulkanVP9Decoder::ParseColorConfig()
+{
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig;
+
+    if (pStdPicInfo->profile >= STD_VIDEO_VP9_PROFILE_2) {
+        pStdColorConfig->BitDepth = u(1) ? 12 : 10;
+    } else {
+        pStdColorConfig->BitDepth = 8;
+    }
+
+    pStdColorConfig->color_space = (StdVideoVP9ColorSpace)u(3);
+
+    if (pStdColorConfig->color_space != STD_VIDEO_VP9_COLOR_SPACE_RGB) {
+        pStdColorConfig->flags.color_range = u(1);
+        if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) ||
+            (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) {
+            pStdColorConfig->subsampling_x = u(1);
+            pStdColorConfig->subsampling_y = u(1);
+            VP9_CHECK_ZERO_BIT
+        } else {
+            pStdColorConfig->subsampling_x = 1;
+            pStdColorConfig->subsampling_y = 1;
+        }
+    } else {
+        pStdColorConfig->flags.color_range = 1;
+        if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) ||
+            (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) {
+            pStdColorConfig->subsampling_x = 0;
+            pStdColorConfig->subsampling_y = 0;
+            VP9_CHECK_ZERO_BIT
+        }
     }
-    probs[i>>1] = get_binary_prob(left, right);
-    branch_ct[i>>1][0] = left;
-    branch_ct[i>>1][1] = right;
-    return left + right;
+    return true;
 }
 
-void VulkanVP9Decoder::vp9_tree_probs_from_distribution(const vp9_tree_index * tree,
-                                        vp9_prob probs          [ /* n-1 */ ],
-                                        uint32_t branch_ct       [ /* n-1 */ ] [2],
-                                        const uint32_t num_events[ /* n */ ],
-                                        uint32_t tok0_offset)
+void VulkanVP9Decoder::ParseFrameAndRenderSize()
 {
-    convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset);
+    VkParserVp9PictureData *pPicData = &m_PicData;
+
+    pPicData->FrameWidth = u(16) + 1;
+    pPicData->FrameHeight = u(16) + 1;
+
+    ComputeImageSize();
+
+    if (u(1) == 1) { // render_and_frame_size_different
+        pPicData->renderWidth = u(16) + 1;
+        pPicData->renderHeight = u(16) + 1;
+    } else {
+        pPicData->renderWidth = pPicData->FrameWidth;
+        pPicData->renderHeight = pPicData->FrameHeight;
+    }
 }
 
-void VulkanVP9Decoder::update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1],
-                        uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS],
-                        int32_t count_sat, int32_t update_factor)
+void VulkanVP9Decoder::ParseFrameAndRenderSizeWithRefs()
 {
-    int32_t t, i, j, k, l, count;
-    uint32_t branch_ct[VP9_ENTROPY_NODES][2];
-    vp9_prob coef_probs[VP9_ENTROPY_NODES];
-    int32_t factor;
+    VkParserVp9PictureData* pPicData = &m_PicData;
 
-    //int32_t brancharr[VP9_BLOCK_TYPES][VP9_REF_TYPES][36][VP9_PREV_COEF_CONTEXTS] = {0};
-    //int32_t coeffprobarr[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS] = {0};
-    //memset(brancharr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS);
-    //memset(coeffprobarr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS);
+    bool found_ref = false;
 
-    for (i = 0; i < VP9_BLOCK_TYPES; ++i)
-    {
-        for (j = 0; j < VP9_REF_TYPES; ++j)
-        {
-            for (k = 0; k < VP9_COEF_BANDS; ++k)
-            {
-                for (l = 0; l < VP9_PREV_COEF_CONTEXTS; ++l)
-                {
-                    if (l >= 3 && k == 0)
-                        continue;
-                    vp9_tree_probs_from_distribution(vp9_coefmodel_tree,
-                                                    coef_probs, branch_ct,
-                                                     coef_counts[i][j][k][l], 0);
-                    branch_ct[0][1] = eob_counts[i][j][k][l] - branch_ct[0][0];
-                    coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
-                    //brancharr[i][j][k][l] = branch_ct[0][1];
-                    //coeffprobarr[i][j][k][l] = coef_probs[0];
-                    for (t = 0; t < UNCONSTRAINED_NODES; ++t)
-                    {
-                        count = branch_ct[t][0] + branch_ct[t][1];
-                        count = count > count_sat ? count_sat : count;
-                        factor = (update_factor * count / count_sat);
-                        dst_coef_probs[i][j][k][l][t] = weighted_prob(pre_coef_probs[i][j][k][l][t], coef_probs[t], factor);
-                    }
-                }
+    for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) {
+        found_ref = u(1);
+        if (found_ref) {
+            VkPicIf* pRefPic = m_pBuffers[pPicData->ref_frame_idx[i]].buffer;
+            if (pRefPic != nullptr) {
+                pPicData->FrameWidth = pRefPic->decodeWidth;
+                pPicData->FrameHeight = pRefPic->decodeHeight;
+
+                ComputeImageSize();
+            }
+
+            if (u(1) == 1) { // render_and_frame_size_different
+                pPicData->renderWidth = u(16) + 1;
+                pPicData->renderHeight = u(16) + 1;
+            } else {
+                pPicData->renderWidth = pPicData->FrameWidth;
+                pPicData->renderHeight = pPicData->FrameHeight;
             }
+
+            break;
         }
     }
+    if (!found_ref) {
+        ParseFrameAndRenderSize();
+    }
 }
 
-void VulkanVP9Decoder::adaptCoefProbs(vp9_prob_update_s *pProbSetup)
+void VulkanVP9Decoder::ComputeImageSize()
 {
-    int32_t update_factor; /* denominator 256 */
-    int32_t count_sat;
+    VkParserVp9PictureData* pPicData = &m_PicData;
 
-    if(pProbSetup->keyFrame)
-    {
-        update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
-        count_sat = COEF_COUNT_SAT_KEY;
-    }
-    else if (pProbSetup->prevIsKeyFrame)
-    {
-        update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; // adapt quickly
-        count_sat = COEF_COUNT_SAT_AFTER_KEY;
-    }
-    else
-    {
-        update_factor = COEF_MAX_UPDATE_FACTOR;
-        count_sat = COEF_COUNT_SAT;
+    // compute_image_size()
+    pPicData->MiCols = (pPicData->FrameWidth + 7) >> 3;
+    pPicData->MiRows = (pPicData->FrameHeight + 7) >> 3;
+    pPicData->Sb64Cols = (pPicData->MiCols + 7) >> 3;
+    pPicData->Sb64Rows = (pPicData->MiRows + 7) >> 3;
+
+    // compute_image_size() side effects (7.2.6)
+    if (((uint32_t)m_lastFrameHeight != pPicData->FrameHeight) || ((uint32_t)m_lastFrameWidth != pPicData->FrameWidth)) {
+        m_frameSizeChanged = true;
+        pPicData->stdPictureInfo.flags.UsePrevFrameMvs = false;
+    } else { /* 2.a, 2.b */
+        bool intraOnly = pPicData->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY || pPicData->stdPictureInfo.flags.intra_only;
+        pPicData->stdPictureInfo.flags.UsePrevFrameMvs = m_lastShowFrame && /* 2.c */
+                                                         pPicData->stdPictureInfo.flags.error_resilient_mode == 0 && /* 2.d */
+                                                         !intraOnly /* 2.e */;
     }
+    m_lastFrameHeight = pPicData->FrameHeight;
+    m_lastFrameWidth = pPicData->FrameWidth;
+    m_lastShowFrame = pPicData->stdPictureInfo.flags.show_frame;
 
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs,
-                        m_PrevCtx.probCoeffs,
-                        pProbSetup->pCtxCounters->countCoeffs,
-                        pProbSetup->pCtxCounters->countEobs[TX_4X4],
-                        count_sat, update_factor);
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs8x8,
-                        m_PrevCtx.probCoeffs8x8,
-                        pProbSetup->pCtxCounters->countCoeffs8x8,
-                        pProbSetup->pCtxCounters->countEobs[TX_8X8],
-                        count_sat, update_factor);
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs16x16,
-                        m_PrevCtx.probCoeffs16x16,
-                        pProbSetup->pCtxCounters->countCoeffs16x16,
-                        pProbSetup->pCtxCounters->countEobs[TX_16X16],
-                        count_sat, update_factor);
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs32x32,
-                        m_PrevCtx.probCoeffs32x32,
-                        pProbSetup->pCtxCounters->countCoeffs32x32,
-                        pProbSetup->pCtxCounters->countEobs[TX_32X32],
-                        count_sat, update_factor);
 }
 
-int32_t VulkanVP9Decoder::update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2])
+void VulkanVP9Decoder::ParseLoopFilterParams()
 {
-    int32_t factor, count = branch_ct[0] + branch_ct[1];
-    count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-    factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-    return weighted_prob(pre_prob, prob, factor);
-}
+    VkParserVp9PictureData *pPicData = &m_PicData;
+    StdVideoDecodeVP9PictureInfo *pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter;
 
-int32_t VulkanVP9Decoder::update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2])
-{
-    return update_mode_ct(pre_prob, get_binary_prob(branch_ct[0], branch_ct[1]), branch_ct);
-}
+    if (pPicData->FrameIsIntra || (pStdPicInfo->flags.error_resilient_mode == 1)) {
+        // setup_past_independence() for loop filter params
+        memset(m_loopFilterRefDeltas, 0, sizeof(m_loopFilterRefDeltas));
+        memset(m_loopFilterModeDeltas, 0, sizeof(m_loopFilterModeDeltas));
+        m_loopFilterRefDeltas[0] = 1;
+        m_loopFilterRefDeltas[1] = 0;
+        m_loopFilterRefDeltas[2] = -1;
+        m_loopFilterRefDeltas[3] = -1;
+    }
 
-void VulkanVP9Decoder::update_mode_probs(int32_t n_modes,
-                        const vp9_tree_index *tree, uint32_t *cnt,
-                        vp9_prob *pre_probs, vp9_prob *pre_probsB,
-                        vp9_prob *dst_probs, vp9_prob *dst_probsB,
-                        uint32_t tok0_offset)
-{
-    vp9_prob probs[MAX_PROBS];
-    uint32_t branch_ct[MAX_PROBS][2];
-    int32_t t, count, factor;
+    pStdLoopFilter->loop_filter_level =  u(6);
+    pStdLoopFilter->loop_filter_sharpness = u(3);
 
-    assert(n_modes - 1 < MAX_PROBS);
-    vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset);
-    for (t = 0; t < n_modes - 1; ++t)
-    {
-        count = branch_ct[t][0] + branch_ct[t][1];
-        count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-        factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-        if (t < 8 || dst_probsB == NULL)
-            dst_probs[t] = weighted_prob(pre_probs[t], probs[t], factor);
-        else
-            dst_probsB[t-8] = weighted_prob(pre_probsB[t-8], probs[t], factor);
+    pStdLoopFilter->flags.loop_filter_delta_enabled = u(1);
+    if (pStdLoopFilter->flags.loop_filter_delta_enabled) {
+
+        pStdLoopFilter->flags.loop_filter_delta_update = u(1);
+
+        if (pStdLoopFilter->flags.loop_filter_delta_update) {
+
+            for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) {
+                uint8_t update_ref_delta = u(1);
+                pStdLoopFilter->update_ref_delta |= update_ref_delta << i;
+                if (update_ref_delta == 1) {
+                    m_loopFilterRefDeltas[i] = u(6);
+                    if (u(1)) { // sign
+                        m_loopFilterRefDeltas[i] = -m_loopFilterRefDeltas[i];
+                    }
+                }
+            }
+
+            for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++) {
+                uint8_t update_mode_delta = u( 1);
+                pStdLoopFilter->update_mode_delta |= update_mode_delta << i;
+                if (update_mode_delta) {
+                    m_loopFilterModeDeltas[i] = u(6);
+                    if(u(1)) { // sign
+                        m_loopFilterModeDeltas[i] = -m_loopFilterRefDeltas[i];
+                    }
+                }
+            }
+        }
     }
-}
 
-void VulkanVP9Decoder::tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p,
-                                      uint32_t (*ct_32x32p)[2])
-{
-    ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
-    ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32];
-    ct_32x32p[1][0] = tx_count_32x32p[TX_8X8];
-    ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32];
-    ct_32x32p[2][0] = tx_count_32x32p[TX_16X16];
-    ct_32x32p[2][1] = tx_count_32x32p[TX_32X32];
+    memcpy(pStdLoopFilter->loop_filter_ref_deltas, m_loopFilterRefDeltas, sizeof(m_loopFilterRefDeltas));
+    memcpy(pStdLoopFilter->loop_filter_mode_deltas, m_loopFilterModeDeltas, sizeof(m_loopFilterModeDeltas));
 }
 
-void VulkanVP9Decoder::tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p,
-                                      uint32_t (*ct_16x16p)[2])
+void VulkanVP9Decoder::ParseQuantizationParams()
 {
-    ct_16x16p[0][0] = tx_count_16x16p[TX_4X4];
-    ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16];
-    ct_16x16p[1][0] = tx_count_16x16p[TX_8X8];
-    ct_16x16p[1][1] = tx_count_16x16p[TX_16X16];
+   VkParserVp9PictureData *pPicData = &m_PicData;
+   StdVideoDecodeVP9PictureInfo* pStdPicInfo = &pPicData->stdPictureInfo;
+
+    pStdPicInfo->base_q_idx = u(8);
+    pStdPicInfo->delta_q_y_dc = ReadDeltaQ();
+    pStdPicInfo->delta_q_uv_dc = ReadDeltaQ();
+    pStdPicInfo->delta_q_uv_ac = ReadDeltaQ();
 }
 
-void VulkanVP9Decoder::tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p,
-                                    uint32_t (*ct_8x8p)[2])
+int32_t VulkanVP9Decoder::ReadDeltaQ()
 {
-    ct_8x8p[0][0] =   tx_count_8x8p[TX_4X4];
-    ct_8x8p[0][1] =   tx_count_8x8p[TX_8X8];
+    int32_t delta;
+    if (u(1)) {
+        delta = u(4);
+        if (u(1)) {
+            delta = -delta;
+        }
+        return delta;
+    } else {
+        return 0;
+    }
 }
 
-void VulkanVP9Decoder::adaptModeProbs(vp9_prob_update_s *pProbSetup)
+void VulkanVP9Decoder::ParseSegmentationParams()
 {
-    uint32_t i, j;
-
-    for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
-        pProbSetup->pProbTab->a.intra_inter_prob[i] = update_mode_ct2(m_PrevCtx.intra_inter_prob[i], pProbSetup->pCtxCounters->intra_inter_count[i]);
-    for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-        pProbSetup->pProbTab->a.comp_inter_prob[i] = update_mode_ct2(m_PrevCtx.comp_inter_prob[i], pProbSetup->pCtxCounters->comp_inter_count[i]);
-    for (i = 0; i < REF_CONTEXTS; i++)
-        pProbSetup->pProbTab->a.comp_ref_prob[i] = update_mode_ct2(m_PrevCtx.comp_ref_prob[i], pProbSetup->pCtxCounters->comp_ref_count[i]);
-    for (i = 0; i < REF_CONTEXTS; i++)
-        for (j = 0; j < 2; j++)
-            pProbSetup->pProbTab->a.single_ref_prob[i][j] = update_mode_ct2(m_PrevCtx.single_ref_prob[i][j], pProbSetup->pCtxCounters->single_ref_count[i][j]);
-
-    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
-    {
-        update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
-                            pProbSetup->pCtxCounters->sb_ymode_counts[i],
-                            m_PrevCtx.sb_ymode_prob[i], m_PrevCtx.sb_ymode_probB[i],
-                            pProbSetup->pProbTab->a.sb_ymode_prob[i], pProbSetup->pProbTab->a.sb_ymode_probB[i], 0);
-    }
-    for (i = 0; i < VP9_INTRA_MODES; ++i)
-    {
-        update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
-                            pProbSetup->pCtxCounters->uv_mode_counts[i],
-                            m_PrevCtx.uv_mode_prob[i],
-                            m_PrevCtx.uv_mode_probB[i],
-                            pProbSetup->pProbTab->a.uv_mode_prob[i],
-                            pProbSetup->pProbTab->a.uv_mode_probB[i], 0);
-    }
-    for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
-        update_mode_probs(PARTITION_TYPES, vp9_partition_tree,
-                            pProbSetup->pCtxCounters->partition_counts[i],
-                            m_PrevCtx.partition_prob[INTER_FRAME][i], NULL,
-                            pProbSetup->pProbTab->a.partition_prob[INTER_FRAME][i], NULL, 0);
+    uint8_t segmentation_feature_bits[STD_VIDEO_VP9_SEG_LVL_MAX] = { 8, 6, 2, 0};
+    uint8_t segmentation_feature_signed[STD_VIDEO_VP9_SEG_LVL_MAX] = {1, 1, 0, 0};
 
-    if (pProbSetup->mcomp_filter_type == SWITCHABLE)
-    {
-        for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
-        {
-            update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
-                                pProbSetup->pCtxCounters->switchable_interp_counts[i],
-                                m_PrevCtx.switchable_interp_prob[i], NULL,
-                                pProbSetup->pProbTab->a.switchable_interp_prob[i], NULL, 0);
-        }
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9Segmentation* pSegment = &m_PicData.stdSegmentation;
+
+    pSegment->flags.segmentation_update_map = 0;
+    pSegment->flags.segmentation_temporal_update = 0;
+
+    pStdPicInfo->flags.segmentation_enabled = u(1);
+    if (pStdPicInfo->flags.segmentation_enabled == 0) {
+        return;
     }
 
-    if (pProbSetup->transform_mode == TX_MODE_SELECT)
-    {
-        uint32_t branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2];
-        uint32_t branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2];
-        uint32_t branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2];
-        for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-        {
-            tx_counts_to_branch_counts_8x8(pProbSetup->pCtxCounters->tx8x8_count[i], branch_ct_8x8p);
-            for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
-            {
-                int32_t factor;
-                int32_t count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1];
-                vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0], branch_ct_8x8p[j][1]);
-                count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-                factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-                pProbSetup->pProbTab->a.tx8x8_prob[i][j] = weighted_prob(m_PrevCtx.tx8x8_prob[i][j], prob, factor);
-            }
+    pSegment->flags.segmentation_update_map = u(1);
+
+    if (pSegment->flags.segmentation_update_map == 1) {
+
+        for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++) {
+            uint8_t prob_coded = u(1);
+            pSegment->segmentation_tree_probs[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY;
         }
-        for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-        {
-            tx_counts_to_branch_counts_16x16(pProbSetup->pCtxCounters->tx16x16_count[i], branch_ct_16x16p);
-            for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
-            {
-                int32_t factor;
-                int32_t count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1];
-                vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0], branch_ct_16x16p[j][1]);
-                count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-                factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-                pProbSetup->pProbTab->a.tx16x16_prob[i][j] = weighted_prob(m_PrevCtx.tx16x16_prob[i][j], prob, factor);
+
+        pSegment->flags.segmentation_temporal_update = u(1);
+        for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++) {
+            if (pSegment->flags.segmentation_temporal_update) {
+                uint8_t prob_coded = u(1);
+                pSegment->segmentation_pred_prob[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY;
+            } else {
+                pSegment->segmentation_pred_prob[i] = VP9_MAX_PRBABILITY;
             }
         }
-        for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-        {
-            tx_counts_to_branch_counts_32x32(pProbSetup->pCtxCounters->tx32x32_count[i], branch_ct_32x32p);
-            for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
-            {
-                int32_t factor;
-                int32_t count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1];
-                vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0], branch_ct_32x32p[j][1]);
-                count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-                factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-                pProbSetup->pProbTab->a.tx32x32_prob[i][j] = weighted_prob(m_PrevCtx.tx32x32_prob[i][j], prob, factor);
+    }
+
+    pSegment->flags.segmentation_update_data = u(1);
+    if (pSegment->flags.segmentation_update_data == 1) {
+        pSegment->flags.segmentation_abs_or_delta_update = u(1);
+
+        /* Clear all previous segment data */
+        memset(pSegment->FeatureEnabled, 0, sizeof(pSegment->FeatureEnabled));
+        memset(pSegment->FeatureData, 0, sizeof(pSegment->FeatureData));
+
+        for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) {
+            for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) {
+                uint8_t feature_enabled = u(1);
+                pSegment->FeatureEnabled[i] |= (feature_enabled << j);
+
+                if (feature_enabled == 1) {
+                    pSegment->FeatureData[i][j] = u(segmentation_feature_bits[j]);
+
+                    if (segmentation_feature_signed[j] == 1) {
+                        if (u(1) == 1) {
+                            pSegment->FeatureData[i][j] = -pSegment->FeatureData[i][j];
+                        }
+                    }
+                }
             }
         }
+
+    } // segmentation_update_data
+}
+
+uint8_t VulkanVP9Decoder::CalcMinLog2TileCols()
+{
+    VkParserVp9PictureData* pPicData = &m_PicData;
+    uint8_t minLog2 = 0;
+
+    while (((uint32_t)VP9_MAX_TILE_WIDTH_B64 << minLog2) < pPicData->Sb64Cols) {
+        minLog2++;
     }
-    for (i = 0; i < MBSKIP_CONTEXTS; ++i)
-        pProbSetup->pProbTab->a.mbskip_probs[i] = update_mode_ct2(m_PrevCtx.mbskip_probs[i],pProbSetup->pCtxCounters->mbskip_count[i]);
+
+    return minLog2;
 }
 
-void VulkanVP9Decoder::adaptModeContext(vp9_prob_update_s *pProbSetup)
+uint8_t VulkanVP9Decoder::CalcMaxLog2TileCols()
 {
-    uint32_t i, j;
-    uint32_t (*mode_ct)[VP9_INTER_MODES - 1][2] = pProbSetup->pCtxCounters->inter_mode_counts;
+    VkParserVp9PictureData* pPicData = &m_PicData;
+    uint8_t maxLog2 = 1;
 
-    for (j = 0; j < INTER_MODE_CONTEXTS; j++)
-    {
-        for (i = 0; i < VP9_INTER_MODES - 1; i++)
-        {
-            int32_t count = mode_ct[j][i][0] + mode_ct[j][i][1], factor;
-            count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count;
-            factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT);
-            pProbSetup->pProbTab->a.inter_mode_prob[j][i] = weighted_prob(m_PrevCtx.inter_mode_prob[j][i],
-                                                                        get_binary_prob(mode_ct[j][i][0], mode_ct[j][i][1]),
-                                                                        factor);
-        }
+    while ((pPicData->Sb64Cols >> maxLog2) >= VP9_MIN_TILE_WIDTH_B64) {
+        maxLog2++;
     }
+
+    return maxLog2 - 1;
 }
 
-uint32_t VulkanVP9Decoder::adapt_probs(uint32_t i,
-                            const signed char* tree,
-                            vp9_prob this_probs[],
-                            const vp9_prob last_probs[],
-                            const uint32_t num_events[])
+void VulkanVP9Decoder::ParseTileInfo()
 {
-    vp9_prob this_prob;
-    uint32_t weight;
+    VkParserVp9PictureData* pPicData = &m_PicData;
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
 
-    const uint32_t left = tree[i] <= 0 ? num_events[-tree[i]] : adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
-    const uint32_t right = tree[i + 1] <= 0 ? num_events[-tree[i + 1]] : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events);
-    weight = left + right;
-    if (weight)
-    {
-        this_prob = get_binary_prob(left, right);
-        weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight;
-        this_prob = weighted_prob(last_probs[i >> 1], this_prob, MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT);
+    uint8_t minLog2TileCols = CalcMinLog2TileCols();
+    uint8_t maxLog2TileCols = CalcMaxLog2TileCols();
+
+    pStdPicInfo->tile_cols_log2 = minLog2TileCols;
+
+    while (pStdPicInfo->tile_cols_log2 < maxLog2TileCols) {
+        if (u(1) == 1) { // increment_tile_cols_log2
+            pStdPicInfo->tile_cols_log2++;
+        } else {
+            break;
+        }
     }
-    else
-    {
-        this_prob = last_probs[i >> 1];
+
+    pStdPicInfo->tile_rows_log2 = u(1);
+    if (pStdPicInfo->tile_rows_log2 == 1) {
+        pStdPicInfo->tile_rows_log2 += u(1);
     }
-    this_probs[i >> 1] = this_prob;
-    return left + right;
+
+    pPicData->numTiles = (1 << pStdPicInfo->tile_rows_log2) * (1 << pStdPicInfo->tile_cols_log2);
 }
 
-void VulkanVP9Decoder::adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2])
+void VulkanVP9Decoder::ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t frame_sizes[8], uint32_t* frame_count)
 {
-    const int32_t count = std::min<int32_t>(ct[0] + ct[1], MV_COUNT_SAT);
-    if (count)
-    {
-        const vp9_prob newp = get_binary_prob(ct[0], ct[1]);
-        const int32_t factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT;
-        *dest = weighted_prob(prep, newp, factor);
+    uint8_t final_byte = data[data_sz - 1];
+    *frame_count = 0;
+
+    if ((final_byte & 0xe0) == 0xc0) {
+        const uint32_t frames = (final_byte & 0x7) + 1;
+        const uint32_t mag = ((final_byte >> 3) & 0x3) + 1;
+        const uint32_t index_sz = 2 + mag * frames;
+
+        if (data_sz >= index_sz && data[data_sz - index_sz] == final_byte) {
+            // found a valid superframe index
+            const uint8_t* x = data + data_sz - index_sz + 1;
+            for (uint32_t i = 0; i < frames; i++) {
+                uint32_t this_sz = 0;
+                for (uint32_t j = 0; j < mag; j++) {
+                    this_sz |= (*x++) << (j * 8);
+                }
+                frame_sizes[i] = this_sz;
+            }
+            *frame_count = frames;
+        }
     }
-    else
-        *dest = prep;
 }
 
-void VulkanVP9Decoder::adaptNmvProbs(vp9_prob_update_s *pProbSetup)
-{
-    uint32_t usehp = pProbSetup->allow_high_precision_mv;
-    uint32_t i, j;
-
-    adapt_probs(0, vp9_mv_joint_tree,
-                pProbSetup->pProbTab->a.nmvc.joints,
-                m_PrevCtx.nmvc.joints,
-                pProbSetup->pCtxCounters->nmvcount.joints);
-    for (i = 0; i < 2; ++i)
-    {
-        adapt_prob(&pProbSetup->pProbTab->a.nmvc.sign[i],
-                    m_PrevCtx.nmvc.sign[i],
-                    pProbSetup->pCtxCounters->nmvcount.sign[i]);
-        adapt_probs(0, vp9_mv_class_tree,
-                    pProbSetup->pProbTab->a.nmvc.classes[i],
-                    m_PrevCtx.nmvc.classes[i],
-                    pProbSetup->pCtxCounters->nmvcount.classes[i]);
-        adapt_probs(0, vp9_mv_class0_tree,
-                    pProbSetup->pProbTab->a.nmvc.class0[i],
-                    m_PrevCtx.nmvc.class0[i],
-                    pProbSetup->pCtxCounters->nmvcount.class0[i]);
-        for (j = 0; j < MV_OFFSET_BITS; ++j)
-        {
-            adapt_prob(&pProbSetup->pProbTab->a.nmvc.bits[i][j],
-            m_PrevCtx.nmvc.bits[i][j],
-            pProbSetup->pCtxCounters->nmvcount.bits[i][j]);
+bool VulkanVP9Decoder::BeginPicture(VkParserPictureData* pnvpd)
+{
+    VkParserVp9PictureData* const pPicDataVP9 = &pnvpd->CodecSpecific.vp9;
+    StdVideoVP9ColorConfig* pStdColorConfig = &pPicDataVP9->stdColorConfig;
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+
+    uint32_t width = pPicDataVP9->FrameWidth;
+    uint32_t height = pPicDataVP9->FrameHeight;
+
+    VkParserSequenceInfo nvsi = m_ExtSeqInfo;
+    nvsi.eCodec = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
+    nvsi.nChromaFormat = pPicDataVP9->ChromaFormat;
+    nvsi.nMaxWidth = std::max(width, pPicDataVP9->renderWidth);
+    nvsi.nMaxHeight = std::max(height, pPicDataVP9->renderHeight);
+    nvsi.nCodedWidth = width;
+    nvsi.nCodedHeight = height;
+    nvsi.nDisplayWidth = pPicDataVP9->renderWidth;
+    nvsi.nDisplayHeight = pPicDataVP9->renderHeight;
+    nvsi.lDARWidth = pPicDataVP9->renderWidth;
+    nvsi.lDARHeight = pPicDataVP9->renderHeight;
+    nvsi.bProgSeq = true; // VP9 doesn't have explicit interlaced coding.
+    nvsi.nMinNumDecodeSurfaces = 9;
+    nvsi.uBitDepthLumaMinus8 = pStdColorConfig->BitDepth - 8;
+    nvsi.uBitDepthChromaMinus8 = pStdColorConfig->BitDepth - 8;
+    nvsi.codecProfile = pStdPicInfo->profile;
+
+    // Reset decoder only if decode RT orig width is less than required coded width
+    if ((nvsi.nMaxWidth > m_rtOrigWidth) || (nvsi.nMaxHeight > m_rtOrigHeight)) {
+        m_rtOrigWidth = nvsi.nMaxWidth;
+        m_rtOrigHeight = nvsi.nMaxHeight;
+
+        for (int i = 0; i < 8; i++) {
+            if (m_pBuffers[i].buffer != nullptr) {
+                m_pBuffers[i].buffer->Release();
+                m_pBuffers[i].buffer = nullptr;
+            }
         }
-        for (j = 0; j < CLASS0_SIZE; ++j)
-        {
-        adapt_probs(0, vp9_mv_fp_tree,
-                    pProbSetup->pProbTab->a.nmvc.class0_fp[i][j],
-                    m_PrevCtx.nmvc.class0_fp[i][j],
-                    pProbSetup->pCtxCounters->nmvcount.class0_fp[i][j]);
+        if (m_pCurrPic != nullptr) {
+            m_pCurrPic->Release();
+            m_pCurrPic = nullptr;
         }
-        adapt_probs(0, vp9_mv_fp_tree,
-                    pProbSetup->pProbTab->a.nmvc.fp[i],
-                    m_PrevCtx.nmvc.fp[i],
-                    pProbSetup->pCtxCounters->nmvcount.fp[i]);
     }
-    if (usehp)
-    {
-        for (i = 0; i < 2; ++i)
-        {
-            adapt_prob(&pProbSetup->pProbTab->a.nmvc.class0_hp[i],
-                        m_PrevCtx.nmvc.class0_hp[i],
-                        pProbSetup->pCtxCounters->nmvcount.class0_hp[i]);
-            adapt_prob(&pProbSetup->pProbTab->a.nmvc.hp[i],
-                        m_PrevCtx.nmvc.hp[i],
-                        pProbSetup->pCtxCounters->nmvcount.hp[i]);
-        }
+
+    if (!init_sequence(&nvsi)) {
+        assert(!"init_sequence failed!");
+        return false;
     }
-}
 
-void VulkanVP9Decoder::UpdateBackwardProbability(vp9_prob_update_s *pProbSetup)
-{
-    if (!pProbSetup->errorResilient && !pProbSetup->FrameParallelDecoding)
-    {
-        adaptCoefProbs(pProbSetup); //vp9_adapt_coef_probs
-        if(!pProbSetup->keyFrame && !pProbSetup->intraOnly)
-        {
-            adaptModeProbs(pProbSetup); //vp9_adapt_mode_probs
-            adaptModeContext(pProbSetup);
-            adaptNmvProbs(pProbSetup); //vp9_adapt_mv_probs
-        }
+    // Allocate a buffer for the current picture
+    if (m_pCurrPic == nullptr) {
+        m_pClient->AllocPictureBuffer(&m_pCurrPic);
+        assert(m_pCurrPic);
+
+        m_pCurrPic->decodeWidth = width;
+        m_pCurrPic->decodeHeight = height;
     }
-    //vp9hwdStoreProbs
-    if (pProbSetup->RefreshEntropyProbs)
-    {
-        memcpy(&m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(m_EntropyLast[pProbSetup->frameContextIdx]));
+
+    pnvpd->PicWidthInMbs = nvsi.nCodedWidth >> 4;
+    pnvpd->FrameHeightInMbs = nvsi.nCodedHeight >> 4;
+    pnvpd->pCurrPic = m_pCurrPic;
+    pnvpd->progressive_frame = 1;
+    pnvpd->ref_pic_flag = 1;
+    pnvpd->intra_pic_flag = pPicDataVP9->FrameIsIntra;
+    pnvpd->chroma_format = pPicDataVP9->ChromaFormat;
+
+    // Reference slots information
+    for (int i = 0; i < STD_VIDEO_VP9_NUM_REF_FRAMES; i++) {
+        vkPicBuffBase* pb = reinterpret_cast<vkPicBuffBase*>(m_pBuffers[i].buffer);
+        pPicDataVP9->pic_idx[i] = pb ? pb->m_picIdx : -1;
     }
-    //VP9HwdUpdateRefs
+
+    return true;
 }
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp
index 135af873..83b968ef 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp
@@ -20,9 +20,6 @@
 #include "nvVulkanVideoUtils.h"
 #include "nvVulkanVideoParser.h"
 #include <algorithm>
-#ifdef ENABLE_VP9_DECODER
-#include <VulkanVP9Decoder.h>
-#endif
 
 VulkanVideoDecoder::VulkanVideoDecoder(VkVideoCodecOperationFlagBitsKHR std)
     : m_refCount(0)
@@ -646,6 +643,7 @@ void VulkanVideoDecoder::end_of_stream()
 #include "VulkanH264Decoder.h"
 #include "VulkanH265Decoder.h"
 #include "VulkanAV1Decoder.h"
+#include "VulkanVP9Decoder.h"
 
 static nvParserLogFuncType gParserLogFunc = nullptr;
 static int gLogLevel = 0;
@@ -739,12 +737,17 @@ VkResult CreateVulkanVideoDecodeParser(VkVideoCodecOperationFlagBitsKHR videoCod
         }
         nvVideoDecodeParser =  VkSharedBaseObj<VulkanAV1Decoder>(new VulkanAV1Decoder(videoCodecOperation, pParserPictureData->isAnnexB));
         break;
-#ifdef ENABLE_VP9_DECODER
     case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
-        // TODO: This will not work and is only here as a placeholder to get the compiler to include and link the class.
+        if ((pStdExtensionVersion == nullptr) ||
+                (0 != strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME)) ||
+                (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) {
+             nvParserErrorLog("The requested decoder VP9 Codec STD version is NOT supported\n");
+             nvParserErrorLog("The supported decoder VP9 Codec STD version is verion %d of %s\n",
+                    VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME);
+             return VK_ERROR_INCOMPATIBLE_DRIVER;
+        }
         nvVideoDecodeParser =  VkSharedBaseObj<VulkanVP9Decoder>(new VulkanVP9Decoder(videoCodecOperation));
         break;
-#endif
     default:
         nvParserErrorLog("Unsupported codec type!!!\n");
     }
diff --git a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp
index 37888fcc..c93a5141 100644
--- a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp
+++ b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp
@@ -140,6 +140,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
                 bsf = av_bsf_get_by_name("hevc_mp4toannexb");
             } else if (videoCodec == AV_CODEC_ID_AV1) {
                 bsf = av_bsf_get_by_name("av1_metadata");
+            } else if (videoCodec == AV_CODEC_ID_VP9) {
+                bsf = av_bsf_get_by_name("vp9_metadata");
             }
 
             if (!bsf) {
@@ -286,6 +288,10 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
                 videoCodecId = AV_CODEC_ID_H264;
             } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
                 videoCodecId = AV_CODEC_ID_HEVC;
+            } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
+                videoCodecId = AV_CODEC_ID_AV1;
+            } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+                videoCodecId = AV_CODEC_ID_VP9;
             }
         }
 
@@ -307,12 +313,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
         case AV_CODEC_ID_H264       : return VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
         case AV_CODEC_ID_HEVC       : return VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
         case AV_CODEC_ID_VP8        : assert(false); return VkVideoCodecOperationFlagBitsKHR(0);
-    #ifdef VK_EXT_video_decode_vp9
         case AV_CODEC_ID_VP9        : return VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
-    #endif // VK_EXT_video_decode_vp9
-    #ifdef vulkan_video_codec_av1std_decode
         case AV_CODEC_ID_AV1        : return VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
-    #endif
         case AV_CODEC_ID_MJPEG      : assert(false); return VkVideoCodecOperationFlagBitsKHR(0);
         default                     : assert(false); return VkVideoCodecOperationFlagBitsKHR(0);
         }
@@ -365,6 +367,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
         case AV_PIX_FMT_YUVJ420P:    ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting color_range
         case AV_PIX_FMT_YUV420P:     ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
         case AV_PIX_FMT_YUV420P10LE: ///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+        case AV_PIX_FMT_YUV420P12LE: ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
         case AV_PIX_FMT_YUV420P16LE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
         case AV_PIX_FMT_YUV420P16BE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
             return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR;
@@ -391,7 +394,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
 
     virtual uint32_t GetProfileIdc() const
     {
-        switch (FFmpegToVkCodecOperation(videoCodec)) {
+        switch ((uint32_t)FFmpegToVkCodecOperation(videoCodec)) {
             case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
             {
                 switch(profile) {
@@ -431,6 +434,19 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
                 }
             }
             break;
+            case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+            {
+                switch(profile) {
+                    case STD_VIDEO_VP9_PROFILE_0:
+                    case STD_VIDEO_VP9_PROFILE_1:
+                    case STD_VIDEO_VP9_PROFILE_2:
+                    case STD_VIDEO_VP9_PROFILE_3:
+                        break;
+                    default:
+                        std::cerr << "\nInvalid VP9 profile: " << profile << std::endl;
+                }
+            }
+            break;
             default:
                 std::cerr << "\nInvalid codec type: " << FFmpegToVkCodecOperation(videoCodec) << std::endl;
         }
diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
index dc980474..6bff5ce5 100644
--- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
+++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
@@ -39,12 +39,8 @@ const char* VkVideoDecoder::GetVideoCodecString(VkVideoCodecOperationFlagBitsKHR
         { VK_VIDEO_CODEC_OPERATION_NONE_KHR, "None" },
         { VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, "AVC/H.264" },
         { VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR, "H.265/HEVC" },
-#ifdef VK_EXT_video_decode_vp9
         { VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR, "VP9" },
-#endif // VK_EXT_video_decode_vp9
-#ifdef vulkan_video_codec_av1std
         { VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR, "AV1" },
-#endif // VK_EXT_video_decode_av1
     };
 
     for (unsigned i = 0; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) {
@@ -126,6 +122,7 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
             VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR
             | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR
             | VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR
+            | VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR
     );
     assert(videoCodecs != VK_VIDEO_CODEC_OPERATION_NONE_KHR);
 
@@ -637,12 +634,12 @@ int VkVideoDecoder::CopyOptimalToLinearImage(VkCommandBuffer& commandBuffer,
     copyRegion[0].dstSubresource.layerCount = 1;
     copyRegion[1].extent.width = copyRegion[0].extent.width;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) {
-        copyRegion[1].extent.width /= 2;
+        copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2;
     }
 
     copyRegion[1].extent.height = copyRegion[0].extent.height;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) {
-        copyRegion[1].extent.height /= 2;
+        copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2;
     }
 
     copyRegion[1].extent.depth = 1;
@@ -706,7 +703,7 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     assert(pCurrFrameDecParams->bitstreamData->GetMaxSize() >= pCurrFrameDecParams->bitstreamDataLen);
 
     pCurrFrameDecParams->decodeFrameInfo.srcBuffer = pCurrFrameDecParams->bitstreamData->GetBuffer();
-    assert(pCurrFrameDecParams->bitstreamDataOffset == 0);
+    //assert(pCurrFrameDecParams->bitstreamDataOffset == 0);
     assert(pCurrFrameDecParams->firstSliceIndex == 0);
     // TODO: Assert if bitstreamDataOffset is aligned to VkVideoCapabilitiesKHR::minBitstreamBufferOffsetAlignment
     pCurrFrameDecParams->decodeFrameInfo.srcBufferOffset = pCurrFrameDecParams->bitstreamDataOffset;
@@ -774,7 +771,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     }
 
     pCurrFrameDecParams->dpbSetupPictureResource.codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode.
-    pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent;
+    // Setup picture may have different resolution compared to previous frames in VP9
+    // So, set the codedExtent earlier in VP9 specific code and skip it here.
+    // TODO: Do the same for other codedcs
+    if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent;
+    }
 
     if (dpbSetupPictureResourceInfo.currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
         imageBarriers[numDpbBarriers] = dpbBarrierTemplates[0];
@@ -816,7 +818,14 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
         }
 
         pOutputPictureResource->codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode.
-        pOutputPictureResource->codedExtent = m_codedExtent;
+        // Setup picture may have different resolution compared to previous frames in VP9
+        // So, set the codedExtent earlier in VP9 specific code and skip it here.
+        // TODO: Do the same for other codedcs
+        if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            pOutputPictureResource->codedExtent = m_codedExtent;
+        } else {
+            pOutputPictureResource->codedExtent = pCurrFrameDecParams->dpbSetupPictureResource.codedExtent;
+        }
 
         // For Output Distinct transition the image to DECODE_DST
         if (pOutputPictureResourceInfo->currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
@@ -909,9 +918,11 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
 
             if (pictureResourcesInfo[resId].image != VK_NULL_HANDLE) {
 
-                // FIXME: m_codedExtent should have already be populated in in the
+                // FIXME: m_codedExtent should have already be populated in the
                 // picture resource above from the FB.
-                pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent;
+                if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+                    pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent;
+                }
                 // FIXME: This parameter must to be adjusted based on the interlaced mode.
                 pCurrFrameDecParams->pictureResources[resId].codedOffset = { 0, 0 };
             }
@@ -926,8 +937,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
         }
     }
 
-    decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount;
-    decodeBeginInfo.pReferenceSlots = pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots;
+    // Add setup reference slot details to decodeBeginInfo
+    decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount +
+                                            (pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot ? 1 : 0);
+    decodeBeginInfo.pReferenceSlots = (pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount > 0) ?
+                                            pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots :
+                                            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot;
 
     m_imageSpecsIndex.displayOut = ((m_dpbAndOutputCoincide == VK_TRUE) &&
                                     !(pDecodePictureInfo->flags.applyFilmGrain == VK_TRUE)) ?
@@ -951,12 +966,16 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     VulkanVideoFrameBuffer::FrameSynchronizationInfo frameSynchronizationInfo = VulkanVideoFrameBuffer::FrameSynchronizationInfo();
     frameSynchronizationInfo.hasFrameCompleteSignalFence = true;
     frameSynchronizationInfo.hasFrameCompleteSignalSemaphore = true;
+    frameSynchronizationInfo.hasFilterSignalSemaphore = m_enableDecodeComputeFilter;
+    frameSynchronizationInfo.hasFrameConsumerSignalSemaphore = false;
     frameSynchronizationInfo.syncOnFrameCompleteFence = true;
     frameSynchronizationInfo.syncOnFrameConsumerDoneFence = true;
     frameSynchronizationInfo.imageSpecsIndex = m_imageSpecsIndex;
 
     VkSharedBaseObj<VkVideoRefCountBase> currentVkPictureParameters;
-    if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1
+    if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        decodeBeginInfo.videoSessionParameters = VK_NULL_HANDLE;
+    } else if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1
 
         bool valid = pCurrFrameDecParams->pStdSps->GetClientObject(currentVkPictureParameters);
         assert(valid);
@@ -1039,14 +1058,9 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, frameSynchronizationInfo.frameCompleteFence));
 
     VkFence frameCompleteFence = frameSynchronizationInfo.frameCompleteFence;
-    VkSemaphore frameCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore;
-    VkSemaphore frameConsumerDoneSemaphore = frameSynchronizationInfo.frameConsumerDoneSemaphore;
-    // By default, the frameCompleteSemaphore is the videoDecodeCompleteSemaphore.
-    // If the video frame filter is enabled, since it is executed after the decoder's queue,
-    // the filter will provide its own semaphore for the video decoder to signal, instead.
-    // Then the frameCompleteSemaphore will be signaled by the filter of its completion.
+    VkSemaphore videoDecodeCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore;
+    VkSemaphore  consumerCompleteSemaphore = frameSynchronizationInfo.consumerCompleteSemaphore;
     VkFence videoDecodeCompleteFence = frameCompleteFence;
-    VkSemaphore videoDecodeCompleteSemaphore = frameCompleteSemaphore;
 
     VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
     beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
@@ -1136,34 +1150,43 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
 
         assert(filterCmdBuffer != nullptr);
 
-        // frameCompleteSemaphore is the semaphore that the filter is going to signal on completion when enabled.
-        // The videoDecodeCompleteSemaphore semaphore will be signaled by the decoder and then used by the filter to wait on.
-
+        // videoDecodeCompleteFence is the fence that the filter is going to signal on completion when enabled.
         videoDecodeCompleteFence     = filterCmdBuffer->GetFence();
-        videoDecodeCompleteSemaphore = filterCmdBuffer->GetSemaphore();
     }
 
     const uint32_t waitSemaphoreMaxCount = 3;
-    VkSemaphore waitSemaphores[waitSemaphoreMaxCount] = { VK_NULL_HANDLE };
+    VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{};
 
     const uint32_t signalSemaphoreMaxCount = 3;
-    VkSemaphore signalSemaphores[signalSemaphoreMaxCount] = { VK_NULL_HANDLE };
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{};
 
     uint32_t waitSemaphoreCount = 0;
-    if (frameConsumerDoneSemaphore != VK_NULL_HANDLE) {
-        waitSemaphores[waitSemaphoreCount] = frameConsumerDoneSemaphore;
+    uint32_t signalSemaphoreCount = 0;
+
+    if (consumerCompleteSemaphore != VK_NULL_HANDLE) {
+
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = consumerCompleteSemaphore;
+        waitSemaphoreInfos[waitSemaphoreCount].value = frameSynchronizationInfo.frameConsumerDoneTimelineValue;
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
         waitSemaphoreCount++;
     }
 
-    uint32_t signalSemaphoreCount = 0;
     if (videoDecodeCompleteSemaphore != VK_NULL_HANDLE) {
-        signalSemaphores[signalSemaphoreCount] = videoDecodeCompleteSemaphore;
+
+        signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr;
+        signalSemaphoreInfos[signalSemaphoreCount].semaphore = videoDecodeCompleteSemaphore;
+        signalSemaphoreInfos[signalSemaphoreCount].value = frameSynchronizationInfo.decodeCompleteTimelineValue;
+        signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0;
         signalSemaphoreCount++;
     }
 
-    uint64_t waitTlSemaphoresValues[waitSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ };
-    uint64_t signalTlSemaphoresValues[signalSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ };
-    VkTimelineSemaphoreSubmitInfo timelineSemaphoreInfos = {};
     if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) {
 
         if (m_dumpDecodeData) {
@@ -1172,67 +1195,53 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
             std::cout << "\t TL semaphore value: " << currSemValue << ", status: " << semResult << std::endl;
         }
 
-        waitSemaphores[waitSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore;
-        waitTlSemaphoresValues[waitSemaphoreCount] = m_decodePicCount - 1; // wait for the previous value to be signaled
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore;
+        waitSemaphoreInfos[waitSemaphoreCount].value = m_decodePicCount - 1; // wait for the previous value to be signaled
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
         waitSemaphoreCount++;
 
-        signalSemaphores[signalSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore;
-        signalTlSemaphoresValues[signalSemaphoreCount] = m_decodePicCount; // signal the current m_decodePicCount value
+        signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr;
+        signalSemaphoreInfos[signalSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore;
+        signalSemaphoreInfos[signalSemaphoreCount].value = m_decodePicCount; // signal the current m_decodePicCount value
+        signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0;
         signalSemaphoreCount++;
 
-        timelineSemaphoreInfos.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
-        timelineSemaphoreInfos.pNext = NULL;
         assert(waitSemaphoreCount < waitSemaphoreMaxCount);
-        timelineSemaphoreInfos.waitSemaphoreValueCount = waitSemaphoreCount;
-        timelineSemaphoreInfos.pWaitSemaphoreValues = waitTlSemaphoresValues;
         assert(signalSemaphoreCount < signalSemaphoreMaxCount);
-        timelineSemaphoreInfos.signalSemaphoreValueCount = signalSemaphoreCount;
-        timelineSemaphoreInfos.pSignalSemaphoreValues = signalTlSemaphoresValues;
-        if (m_dumpDecodeData) {
-            std::cout << "\t Wait for: " << (waitSemaphoreCount ? waitTlSemaphoresValues[waitSemaphoreCount - 1] : 0) <<
-                             ", signal at " << signalTlSemaphoresValues[signalSemaphoreCount - 1] << std::endl;
-        }
     }
 
     assert(waitSemaphoreCount <= waitSemaphoreMaxCount);
     assert(signalSemaphoreCount <= signalSemaphoreMaxCount);
 
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoDecodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.pNext = (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) ? &timelineSemaphoreInfos : nullptr;
-    submitInfo.waitSemaphoreCount = waitSemaphoreCount;
-    submitInfo.pWaitSemaphores = waitSemaphores;
-    submitInfo.pWaitDstStageMask = &videoDecodeSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = &frameDataSlot.commandBuffer;
-    submitInfo.signalSemaphoreCount = signalSemaphoreCount;
-    submitInfo.pSignalSemaphores = signalSemaphores;
-
-    if (m_dumpDecodeData) {
-        if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) {
-            std::cout << "\t\t waitSemaphoreValueCount: " << timelineSemaphoreInfos.waitSemaphoreValueCount << std::endl;
-            std::cout << "\t pWaitSemaphoreValues: " << timelineSemaphoreInfos.pWaitSemaphoreValues[0] << ", " <<
-                                                timelineSemaphoreInfos.pWaitSemaphoreValues[1] << ", " <<
-                                                timelineSemaphoreInfos.pWaitSemaphoreValues[2] << std::endl;
-            std::cout << "\t\t signalSemaphoreValueCount: " << timelineSemaphoreInfos.signalSemaphoreValueCount << std::endl;
-            std::cout << "\t pSignalSemaphoreValues: " << timelineSemaphoreInfos.pSignalSemaphoreValues[0] << ", " <<
-                                                timelineSemaphoreInfos.pSignalSemaphoreValues[1] << ", " <<
-                                                timelineSemaphoreInfos.pSignalSemaphoreValues[2] << std::endl;
-        }
-
-        std::cout << "\t waitSemaphoreCount: " << submitInfo.waitSemaphoreCount << std::endl;
-        std::cout << "\t\t pWaitSemaphores: " << submitInfo.pWaitSemaphores[0] << ", " <<
-                                                 submitInfo.pWaitSemaphores[1] << ", " <<
-                                                 submitInfo.pWaitSemaphores[2] << std::endl;
-        std::cout << "\t signalSemaphoreCount: " << submitInfo.signalSemaphoreCount << std::endl;
-        std::cout << "\t\t pSignalSemaphores: " << submitInfo.pSignalSemaphores[0] << ", " <<
-                                             submitInfo.pSignalSemaphores[1] << ", " <<
-                                             submitInfo.pSignalSemaphores[2] << std::endl << std::endl;
-    }
+    VkCommandBufferSubmitInfoKHR cmdBufferInfos;
+    cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+    cmdBufferInfos.pNext = nullptr;
+    cmdBufferInfos.commandBuffer = frameDataSlot.commandBuffer;
+    cmdBufferInfos.deviceMask = 0;
+
+    // Submit info
+    VkSubmitInfo2KHR submitInfo { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+    submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfos;
+    submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+    submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
 
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, videoDecodeCompleteFence));
-    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE, m_currentVideoQueueIndx,
-                                                           1, &submitInfo, videoDecodeCompleteFence);
+    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE,
+                                                           m_currentVideoQueueIndx,
+                                                           1,
+                                                           &submitInfo,
+                                                           videoDecodeCompleteFence,
+                                                           "Video Decode",
+                                                           picNumInDecodeOrder);
     assert(result == VK_SUCCESS);
     if (result != VK_SUCCESS) {
         return -1;
@@ -1368,11 +1377,23 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
         result = filterCmdBuffer->EndCommandBufferRecording(cmdBuf);
         assert(result == VK_SUCCESS);
 
-        if (false) std::cout << currPicIdx << " : OUT view: " << outputImageView->GetImageView() << ", signalSem: " <<  frameCompleteSemaphore << std::endl << std::flush;
-        assert(videoDecodeCompleteSemaphore != frameCompleteSemaphore);
-        result = m_yuvFilter->SubmitCommandBuffer(1, filterCmdBuffer->GetCommandBuffer(),
-                                                  1, &videoDecodeCompleteSemaphore,
-                                                  1, &frameCompleteSemaphore,
+        // Wait for the decoder to complete.
+        const VkPipelineStageFlags2KHR waitDecoderStageMasks = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+
+        // Signal the compute stage after done.
+        const uint64_t computeCompleteTimelineValue = frameSynchronizationInfo.filterCompleteTimelineValue;
+        const VkPipelineStageFlags2KHR signalComputeStageMasks = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR;
+
+        result = m_yuvFilter->SubmitCommandBuffer(1, // commandBufferCount
+                                                  filterCmdBuffer->GetCommandBuffer(),
+                                                  1, // waitSemaphoreCount
+                                                  &videoDecodeCompleteSemaphore,
+                                                  &frameSynchronizationInfo.decodeCompleteTimelineValue,
+                                                  &waitDecoderStageMasks,
+                                                  1, // signalSemaphoreCount
+                                                  &videoDecodeCompleteSemaphore,
+                                                  &computeCompleteTimelineValue,
+                                                  &signalComputeStageMasks,
                                                   frameCompleteFence);
         assert(result == VK_SUCCESS);
         filterCmdBuffer->SetCommandBufferSubmitted();
diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
index 129ec34a..bbb68e66 100644
--- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
+++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
@@ -173,6 +173,30 @@ struct nvVideoAV1PicParameters {
     nvVideoDecodeAV1DpbSlotInfo dpbRefList[nvVideoDecodeAV1DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1];
 };
 
+
+struct nvVideoDecodeVP9DpbSlotInfo
+{
+    enum {
+        // Number of reference frame types (including intra type)
+        TOTAL_REFS_PER_FRAME = 8,
+    };
+    VkExtent2D codedExtent{};
+
+    void Invalidate() { memset(this, 0x00, sizeof(*this)); }
+
+    // Set the STD data here for VP9.
+
+};
+
+struct nvVideoVP9PicParameters {
+    StdVideoDecodeVP9PictureInfo stdPictureInfo;
+    StdVideoVP9ColorConfig stdColorConfig;
+    StdVideoVP9Segmentation stdSegment;
+    StdVideoVP9LoopFilter stdLoopFilter;
+    VkVideoDecodeVP9PictureInfoKHR vkPictureInfo{ VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR, nullptr, &stdPictureInfo };
+    nvVideoDecodeVP9DpbSlotInfo dpbRefList[nvVideoDecodeVP9DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1];
+};
+
 static vkPicBuffBase* GetPic(VkPicIf* pPicBuf)
 {
     return (vkPicBuffBase*)pPicBuf;
@@ -550,9 +574,9 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
             // Vulkan Video parser.cpp -- maintains its own indices.
             // We can use more indices in the parser than the spec. (Ther eis a max of 8 but we can use 16)
             // Reason for single structure for DPB -- the array is passed in the callback (in the proxy of the processor)
-            // It checks which references are in use. 
+            // It checks which references are in use.
             // 2nd Finds which DPB references were assigned before - and reuses indices.
-            // The local array maintains the 
+            // The local array maintains the
             pRefPicInfo->flags.disable_frame_end_update_cdf = ;
             pRefPicInfo->flags.segmentation_enabled = ;
             pRefPicInfo->base_q_idx = ;
@@ -574,6 +598,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
             }
         }
 
+        void setVP9PictureData(nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo,
+            VkVideoReferenceSlotInfoKHR* pReferenceSlots,
+            uint32_t dpbEntryIdx, uint32_t dpbSlotIndex)
+        {
+            // TODO: VP9 dpb management
+            assert(0);
+        }
+
     } dpbH264Entry;
 
     virtual int32_t AddRef();
@@ -685,6 +717,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
         VkVideoReferenceSlotInfoKHR* pReferenceSlots,
         int8_t* pGopReferenceImagesIndexes,
         int32_t* pCurrAllocatedSlotIndex);
+    uint32_t FillDpbVP9State(const VkParserPictureData* pd,
+        VkParserVp9PictureData* pin,
+        nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo,
+        StdVideoDecodeVP9PictureInfo* pStdPictureInfo,
+        uint32_t maxRefPictures,
+        VkVideoReferenceSlotInfoKHR* pReferenceSlots,
+        int8_t* pGopReferenceImagesIndexes,
+        int32_t* pCurrAllocatedSlotIndex);
 
     int8_t AllocateDpbSlotForCurrentH264(
         vkPicBuffBase* pPic, StdVideoDecodeH264PictureInfoFlags currPicFlags,
@@ -693,7 +733,8 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
                                          int8_t presetDpbSlot);
     int8_t AllocateDpbSlotForCurrentAV1(vkPicBuffBase* pPic, bool isReference,
                                          int8_t presetDpbSlot);
-    
+    int8_t AllocateDpbSlotForCurrentVP9(vkPicBuffBase* pPic, bool isReference,
+                                         int8_t presetDpbSlot);
 
 protected:
     VkSharedBaseObj<VulkanVideoDecodeParser>    m_vkParser;
@@ -944,6 +985,7 @@ VkResult VulkanVideoParser::Initialize(
     static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION };
     static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION };
+    static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION };
 
     const VkExtensionProperties* pStdExtensionVersion = NULL;
     if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
@@ -952,6 +994,8 @@ VkResult VulkanVideoParser::Initialize(
         pStdExtensionVersion = &h265StdExtensionVersion;
     } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
         pStdExtensionVersion = &av1StdExtensionVersion;
+    } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pStdExtensionVersion = &vp9StdExtensionVersion;
     } else {
         assert(!"Unsupported codec type");
         return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR;
@@ -1098,6 +1142,14 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi)
 
     if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
         maxDpbSlots = 9;
+        if ((pnvsi->nCodedWidth <= m_nvsi.nCodedWidth) && (pnvsi->nCodedHeight <= m_nvsi.nCodedHeight)) {
+            return 1;
+        }
+    } else if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        maxDpbSlots = 9;
+        if ((pnvsi->nMaxWidth <= m_nvsi.nMaxWidth) && (pnvsi->nMaxHeight <= m_nvsi.nMaxHeight)) {
+            return 1;
+        }
     }
 
     uint32_t configDpbSlots = (pnvsi->nMinNumDpbSlots > 0) ? pnvsi->nMinNumDpbSlots : maxDpbSlots;
@@ -1120,8 +1172,8 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi)
     }
 
     m_nvsi = *pnvsi;
-    m_nvsi.nMaxWidth = pnvsi->nCodedWidth;
-    m_nvsi.nMaxHeight = pnvsi->nCodedHeight;
+    m_nvsi.nMaxWidth = pnvsi->nMaxWidth;
+    m_nvsi.nMaxHeight = pnvsi->nMaxHeight;
 
     m_maxNumDecodeSurfaces = pnvsi->nMinNumDecodeSurfaces;
 
@@ -1814,7 +1866,7 @@ uint32_t VulkanVideoParser::FillDpbAV1State(
         uint8_t yellowSquare[] = { 0xf0, 0x9f,  0x9f, 0xa8, 0x00 };
         printf("\nSlotsInUse: ");
         for (int i = 0; i < 9; i++) {
-            printf("%-2s ", (slotsInUse & (1<<i)) ? (i == dpbSlot ? (char*)yellowSquare : (char*)greenSquare) : (char*)redSquare); 
+            printf("%-2s ", (slotsInUse & (1<<i)) ? (i == dpbSlot ? (char*)yellowSquare : (char*)greenSquare) : (char*)redSquare);
         }
         printf("\n");
     }
@@ -1822,6 +1874,160 @@ uint32_t VulkanVideoParser::FillDpbAV1State(
     return referenceIndex;
 }
 
+
+uint32_t VulkanVideoParser::FillDpbVP9State(
+        const VkParserPictureData* pd,
+        VkParserVp9PictureData* pin,
+        nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo,
+        StdVideoDecodeVP9PictureInfo*,
+        uint32_t,
+        VkVideoReferenceSlotInfoKHR* pReferenceSlots,
+        int8_t* pGopReferenceImagesIndexes,
+        int32_t* pCurrAllocatedSlotIndex)
+{
+    assert(m_maxNumDpbSlots <= STD_VIDEO_VP9_NUM_REF_FRAMES + 1);
+    uint32_t refDpbUsedAndValidMask = 0;
+    uint32_t referenceIndex = 0;
+
+    if (m_dumpParserData) {
+        std::cout << "Ref frames data: " << std::endl;
+    }
+
+    if (m_dumpDpbData) {
+        printf(";;;; ======= VP9 DPB fill begin %d =======\n", m_nCurrentPictureID);
+        printf("ref_frame_idx: "); {
+        for (int i = 0 ; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++)
+            printf("%02d ", i);
+        }
+        printf("\nref_frame_idx: ");
+        for (int i = 0 ; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+            printf("%02d ", pin->ref_frame_idx[i]);
+        }
+        printf("\n");
+
+        printf("m_pictureToDpbSlotMap: ");
+        for (int i = 0; i < MAX_FRM_CNT; i++) {
+            printf("%02d ", i);
+        }
+        printf("\nm_pictureToDpbSlotMap: ");
+        for (int i = 0; i < MAX_FRM_CNT; i++) {
+            printf("%02d ", m_pictureToDpbSlotMap[i]);
+        }
+        printf("\n");
+
+        printf("ref_frame_picture: ");
+        for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) {
+            printf("%02d ", inIdx);
+        }
+        printf("\nref_frame_picture: ");
+        for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) {
+            int8_t picIdx = pin->pic_idx[inIdx];
+            printf("%02d ", picIdx);
+        }
+        printf("\n");
+    }
+
+    bool isKeyFrame = (pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY);
+
+    // It doesn't look like this tracking is needed.
+    int8_t activeReferences[32];
+    memset(activeReferences, 0, sizeof(activeReferences));
+    for (size_t refName = 0; refName < STD_VIDEO_VP9_REFS_PER_FRAME; refName++) {
+        int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[refName]];
+        if (picIdx < 0) {
+            //pKhr->referenceNameSlotIndices[refName] = -1;
+            continue;
+        }
+        int8_t dpbSlot = GetPicDpbSlot(picIdx);
+        assert(dpbSlot >= 0);
+        //pKhr->referenceNameSlotIndices[refName] = dpbSlot;
+        activeReferences[dpbSlot]++;
+        //hdr.delta_frame_id_minus_1[dpbSlot] = pin->delta_frame_id_minus_1[pin->ref_frame_idx[i]];
+    }
+
+    for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) {
+        int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[inIdx];
+        int8_t dpbSlot = -1;
+        if ((picIdx >= 0) && !(refDpbUsedAndValidMask & (1 << picIdx))) {
+            dpbSlot = GetPicDpbSlot(picIdx);
+
+            assert(dpbSlot >= 0); // There is still content hitting this assert.
+            if (dpbSlot < 0) {
+                continue;
+            }
+
+            refDpbUsedAndValidMask |= (1 << picIdx);
+            m_dpb[dpbSlot].MarkInUse(m_nCurrentPictureID);
+            if (activeReferences[dpbSlot] == 0) {
+                continue;
+            }
+
+            pReferenceSlots[referenceIndex].sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR;
+            pReferenceSlots[referenceIndex].pNext = nullptr;
+            pReferenceSlots[referenceIndex].slotIndex = dpbSlot;
+            pGopReferenceImagesIndexes[referenceIndex] = picIdx;
+
+            VkExtent2D &codedExtent = pDpbSlotInfo[referenceIndex].codedExtent;
+            codedExtent.width = m_dpb[dpbSlot].getPictureResource()->decodeWidth;
+            codedExtent.height = m_dpb[dpbSlot].getPictureResource()->decodeHeight;
+
+            referenceIndex++;
+        }
+    }
+
+    if (m_dumpDpbData) {
+        printf(";;; pReferenceSlots (%d): ", referenceIndex);
+        for (size_t i =0 ;i < referenceIndex; i++) {
+            printf("%02d ", pReferenceSlots[i].slotIndex);
+        }
+        printf("\n");
+    }
+
+    ResetPicDpbSlots(refDpbUsedAndValidMask);
+
+    // Take into account the reference picture now.
+    int8_t currPicIdx = GetPicIdx(pd->pCurrPic);
+    assert(currPicIdx >= 0);
+    if (currPicIdx >= 0) {
+        refDpbUsedAndValidMask |= (1 << currPicIdx);
+    }
+
+    // NOTE(charlie): Most likely we can consider isReference = refresh_frame_flags != 0;
+    // However, the AMD fw interface appears to always need a setup slot & a destination resource,
+    // so it's not clear what to properly do in that case.
+    int8_t dpbSlot = AllocateDpbSlotForCurrentAV1(GetPic(pd->pCurrPic),
+        true /* isReference */, pd->current_dpb_id);
+
+    assert(dpbSlot >= 0);
+
+    *pCurrAllocatedSlotIndex = dpbSlot;
+    assert(!(dpbSlot < 0));
+    if (dpbSlot >= 0) {
+        assert(pd->ref_pic_flag);
+    }
+
+    if (m_dumpDpbData) {
+        printf("SlotsInUse: ");
+        uint32_t slotsInUse = m_dpb.getSlotInUseMask();
+        for (int i = 0; i < 9; i++) {
+            printf("%02d ", i);
+        }
+        uint8_t greenSquare[] = { 0xf0, 0x9f,  0x9f, 0xa9, 0x00 };
+        uint8_t redSquare[] = { 0xf0, 0x9f,  0x9f, 0xa5, 0x00 };
+        uint8_t yellowSquare[] = { 0xf0, 0x9f,  0x9f, 0xa8, 0x00 };
+        printf("\nSlotsInUse: ");
+        for (int i = 0; i < 9; i++) {
+            printf("%-2s ", (slotsInUse & (1<<i)) ? (i == dpbSlot ? (char*)yellowSquare : (char*)greenSquare) : (char*)redSquare);
+        }
+        printf("\n");
+    }
+
+    return referenceIndex;
+
+    return 1;
+}
+
+
 int8_t VulkanVideoParser::AllocateDpbSlotForCurrentH264(
     vkPicBuffBase* pPic, StdVideoDecodeH264PictureInfoFlags currPicFlags,
     int8_t /*presetDpbSlot*/)
@@ -1876,7 +2082,7 @@ int8_t VulkanVideoParser::AllocateDpbSlotForCurrentAV1(vkPicBuffBase* pPic,
     if (isReference) {
         dpbSlot = GetPicDpbSlot(currPicIdx); // use the associated slot, if not allocate a new slot.
         if (dpbSlot < 0) {
-            dpbSlot = m_dpb.AllocateSlot(); 
+            dpbSlot = m_dpb.AllocateSlot();
             assert(dpbSlot >= 0);
             SetPicDpbSlot(currPicIdx, dpbSlot); // Assign the dpbSlot to the current picture index.
             m_dpb[dpbSlot].setPictureResource(pPic, m_nCurrentPictureID); // m_nCurrentPictureID is our main index.
@@ -1942,6 +2148,7 @@ bool VulkanVideoParser::DecodePicture(
     nvVideoH264PicParameters h264;
     nvVideoH265PicParameters hevc;
     nvVideoAV1PicParameters av1;
+    nvVideoVP9PicParameters vp9;
     // };
 
     if (m_decoderHandler == NULL) {
@@ -2072,11 +2279,6 @@ bool VulkanVideoParser::DecodePicture(
             h264.stdPictureInfo.flags, &setupReferenceSlot.slotIndex);
         // TODO: Remove it is for debugging only. Reserved fields must be set to "0".
         pout->stdPictureInfo.reserved1 = pCurrFrameDecParams->numGopReferenceSlots;
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
             for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
@@ -2092,6 +2294,15 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = NULL;
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
 
     }
     else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
@@ -2181,11 +2392,6 @@ bool VulkanVideoParser::DecodePicture(
             referenceSlots, pCurrFrameDecParams->pGopReferenceImagesIndexes,
             &setupReferenceSlot.slotIndex);
 
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
 
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
@@ -2203,6 +2409,16 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
 
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
+
         if (m_dumpParserData) {
             for (int32_t i = 0; i < HEVC_MAX_DPB_SLOTS; i++) {
                 std::cout << "\tdpbIndex: " << i;
@@ -2245,7 +2461,7 @@ bool VulkanVideoParser::DecodePicture(
         }
 
         nvVideoDecodeAV1DpbSlotInfo* dpbSlotsAv1 = av1.dpbRefList;
-        pCurrFrameDecParams->numGopReferenceSlots = 
+        pCurrFrameDecParams->numGopReferenceSlots =
             FillDpbAV1State(pd,
                             pin,
                             dpbSlotsAv1,
@@ -2255,12 +2471,6 @@ bool VulkanVideoParser::DecodePicture(
                             pCurrFrameDecParams->pGopReferenceImagesIndexes,
                             &setupReferenceSlot.slotIndex);
 
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
-
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
             for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
@@ -2276,6 +2486,17 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
 
+
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
+
         // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface.
         //pDecodePictureInfo->videoFrameType = static_cast<uint16_t>(pin->frame_type);
         pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan?
@@ -2317,10 +2538,102 @@ bool VulkanVideoParser::DecodePicture(
         pin->tileInfo.pMiRowStarts = pin->MiRowStarts;
 
         pDecodePictureInfo->flags.applyFilmGrain = pin->std_info.flags.apply_grain;
+
+    } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+
+        VkParserVp9PictureData* pin = &pd->CodecSpecific.vp9;
+
+        vp9 = nvVideoVP9PicParameters();
+        StdVideoDecodeVP9PictureInfo* pStdPicInfo   = &vp9.stdPictureInfo;
+        VkVideoDecodeVP9PictureInfoKHR* pVkPicInfo = &vp9.vkPictureInfo;
+        nvVideoDecodeVP9DpbSlotInfo* pNvDpbSlotInfo = vp9.dpbRefList;
+
+        // Copy std data and link pointers
+        memcpy(pStdPicInfo, &pin->stdPictureInfo, sizeof(StdVideoDecodeVP9PictureInfo));
+        memcpy(&vp9.stdColorConfig, &pin->stdColorConfig, sizeof(StdVideoVP9ColorConfig));
+        pStdPicInfo->pColorConfig = &vp9.stdColorConfig;
+        if (pStdPicInfo->flags.segmentation_enabled == 1) {
+            memcpy(&vp9.stdSegment, &pin->stdSegmentation, sizeof(StdVideoVP9Segmentation));
+            pStdPicInfo->pSegmentation = &vp9.stdSegment;
+        }
+        memcpy(&vp9.stdLoopFilter, &pin->stdLoopFilter, sizeof(StdVideoVP9LoopFilter));
+        pStdPicInfo->pLoopFilter = &vp9.stdLoopFilter;
+
+        pVkPicInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR;
+        pVkPicInfo->pStdPictureInfo = pStdPicInfo;
+
+        VkVideoDecodeInfoKHR* pKhrDecodeInfo = &pCurrFrameDecParams->decodeFrameInfo;
+        pKhrDecodeInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR;
+        pKhrDecodeInfo->pNext = pVkPicInfo;
+
+        // dpb slots
+        pCurrFrameDecParams->numGopReferenceSlots = FillDpbVP9State(pd,
+                                                                    pin,
+                                                                    pNvDpbSlotInfo,
+                                                                    pStdPicInfo,
+                                                                    9,
+                                                                    referenceSlots,
+                                                                    pCurrFrameDecParams->pGopReferenceImagesIndexes,
+                                                                    &setupReferenceSlot.slotIndex);
+
+        if (pCurrFrameDecParams->numGopReferenceSlots) {
+            assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
+            for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
+                 dpbEntryIdx++) {
+                pCurrFrameDecParams->pictureResources[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR;
+                pCurrFrameDecParams->pictureResources[dpbEntryIdx].codedExtent = pNvDpbSlotInfo[dpbEntryIdx].codedExtent;
+                referenceSlots[dpbEntryIdx].pPictureResource = &pCurrFrameDecParams->pictureResources[dpbEntryIdx];
+            }
+
+            pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = referenceSlots;
+            pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = pCurrFrameDecParams->numGopReferenceSlots;
+        } else {
+            pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = nullptr;
+            pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
+        }
+
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth;
+            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight;
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
+
+        // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface.
+        //pDecodePictureInfo->videoFrameType = static_cast<uint16_t>(pin->frame_type);
+        pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan?
+
+        bool isKeyFrame = pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY;
+        for (size_t i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+            int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[i]];
+            if (picIdx < 0) {
+                pVkPicInfo->referenceNameSlotIndices[i] = -1;
+                continue;
+            }
+
+            int8_t dpbSlot = GetPicDpbSlot(picIdx);
+            assert(dpbSlot >= 0);
+            pVkPicInfo->referenceNameSlotIndices[i] = dpbSlot;
+        }
+
+        pVkPicInfo->uncompressedHeaderOffset = pin->uncompressedHeaderOffset;
+        pVkPicInfo->compressedHeaderOffset = pin->compressedHeaderOffset;
+        pVkPicInfo->tilesOffset = pin->tilesOffset;
+
+        // Use current frames with and height for display and writing to output
+        pDecodePictureInfo->displayWidth = pin->FrameWidth;
+        pDecodePictureInfo->displayHeight = pin->FrameHeight;
     }
 
-    pDecodePictureInfo->displayWidth  = m_nvsi.nDisplayWidth;
-    pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight;
+    if (m_codecType != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pDecodePictureInfo->displayWidth  = m_nvsi.nDisplayWidth;
+        pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight;
+    }
 
     bRet = (m_decoderHandler->DecodePictureWithParameters(pCurrFrameDecParams, pDecodePictureInfo) >= 0);
 
@@ -2405,6 +2718,11 @@ VkResult vulkanCreateVideoParser(
             assert(!"Decoder AV1 Codec version is NOT supported");
             return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR;
         }
+    } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        if (!pStdExtensionVersion || strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME) || (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) {
+            assert(!"Decoder VP9 Codec version is NOT supported");
+            return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR;
+        }
     } else {
         assert(!"Decoder Codec is NOT supported");
         return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR;
diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
index f8e925b0..2550bd7e 100644
--- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
+++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
@@ -51,18 +51,17 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
     NvPerFrameDecodeResources()
         : m_picDispInfo()
         , m_frameCompleteFence()
-        , m_frameCompleteSemaphore()
         , m_frameConsumerDoneFence()
-        , m_frameConsumerDoneSemaphore()
+        , m_frameCompleteTimelineValue()
+        , m_frameConsumerDoneTimelineValue()
         , m_imageSpecsIndex()
         , m_hasFrameCompleteSignalFence(false)
         , m_hasFrameCompleteSignalSemaphore(false)
         , m_hasConsummerSignalFence(false)
-        , m_hasConsummerSignalSemaphore(false)
+        , m_useConsummerSignalSemaphore(false)
         , m_inDecodeQueue(false)
         , m_inDisplayQueue(false)
         , m_ownedByConsummer(false)
-        , m_vkDevCtx()
         , m_imageViewState()
     {
     }
@@ -75,14 +74,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
 
     VkResult init( const VulkanDeviceContext* vkDevCtx);
 
-    void Deinit();
+    void Deinit(const VulkanDeviceContext* vkDevCtx);
 
     NvPerFrameDecodeResources (const NvPerFrameDecodeResources &srcObj) = delete;
     NvPerFrameDecodeResources (NvPerFrameDecodeResources &&srcObj) = delete;
 
     ~NvPerFrameDecodeResources()
     {
-        Deinit();
+        Deinit(nullptr);
     }
 
     VkSharedBaseObj<VkImageResourceView>& GetImageView(uint8_t imageTypeIdx) {
@@ -149,14 +148,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
 
     VkParserDecodePictureInfo m_picDispInfo;
     VkFence m_frameCompleteFence;
-    VkSemaphore m_frameCompleteSemaphore;
     VkFence m_frameConsumerDoneFence;
-    VkSemaphore m_frameConsumerDoneSemaphore;
+    uint64_t m_frameCompleteTimelineValue;
+    uint64_t m_frameConsumerDoneTimelineValue;
     DecodeFrameBufferIf::ImageSpecsIndex m_imageSpecsIndex;
     uint32_t m_hasFrameCompleteSignalFence : 1;
     uint32_t m_hasFrameCompleteSignalSemaphore : 1;
     uint32_t m_hasConsummerSignalFence : 1;
-    uint32_t m_hasConsummerSignalSemaphore : 1;
+    uint32_t m_useConsummerSignalSemaphore : 1;
     uint32_t m_inDecodeQueue : 1;
     uint32_t m_inDisplayQueue : 1;
     uint32_t m_ownedByConsummer : 1;
@@ -171,8 +170,8 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
 
     // The filter's pool node
     VkSharedBaseObj<VkVideoRefCountBase>  filterPoolNode;
+
 private:
-    const VulkanDeviceContext*  m_vkDevCtx;
     std::array<ImageViewState, DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES> m_imageViewState;
 };
 
@@ -180,7 +179,10 @@ class NvPerFrameDecodeImageSet {
 public:
 
     NvPerFrameDecodeImageSet()
-        : m_queueFamilyIndex((uint32_t)-1)
+        : m_vkDevCtx()
+        , m_queueFamilyIndex((uint32_t)-1)
+        , m_frameCompleteSemaphore()
+        , m_consumerCompleteSemaphore()
         , m_numImages(0)
         , m_maxNumImageTypeIdx(0)
         , m_perFrameDecodeResources(VulkanVideoFrameBuffer::maxImages)
@@ -195,11 +197,12 @@ class NvPerFrameDecodeImageSet {
         const std::array<VulkanVideoFrameBuffer::ImageSpec, DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES>& imageSpecs,
         uint32_t                 queueFamilyIndex);
 
-    void Deinit();
+    void Deinit(const VulkanDeviceContext* vkDevCtx);
 
     ~NvPerFrameDecodeImageSet()
     {
-        Deinit();
+        Deinit(m_vkDevCtx);
+        m_vkDevCtx = nullptr;
     }
 
     NvPerFrameDecodeResources& operator[](unsigned int index)
@@ -258,8 +261,13 @@ class NvPerFrameDecodeImageSet {
     }
 
 private:
+    const VulkanDeviceContext*             m_vkDevCtx;
     uint32_t                               m_queueFamilyIndex;
     VkVideoCoreProfile                     m_videoProfile;
+public:
+    VkSemaphore                            m_frameCompleteSemaphore;
+    VkSemaphore                            m_consumerCompleteSemaphore;
+private:
     uint32_t                               m_numImages;
     uint32_t                               m_maxNumImageTypeIdx;
     std::vector<NvPerFrameDecodeResources> m_perFrameDecodeResources;
@@ -372,7 +380,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         m_ownedByDisplayMask = 0;
         m_frameNumInDisplayOrder = 0;
 
-        m_perFrameDecodeImageSet.Deinit();
+        m_perFrameDecodeImageSet.Deinit(m_vkDevCtx);
 
         if (m_queryPool != VkQueryPool()) {
             m_vkDevCtx->DestroyQueryPool(*m_vkDevCtx, m_queryPool, NULL);
@@ -417,10 +425,9 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         }
 
         if ((pFrameSynchronizationInfo->syncOnFrameConsumerDoneFence  == 1) &&
-             ((m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore == 0) ||
-              (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore == VK_NULL_HANDLE)) &&
-                (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) &&
-                (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) {
+             (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore == 0) &&
+             (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) &&
+             (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) {
 
             vk::WaitAndResetFence(m_vkDevCtx, *m_vkDevCtx, m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence,
                                   true, "frameConsumerDoneFence");
@@ -456,15 +463,35 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         }
 
         if (pFrameSynchronizationInfo->hasFrameCompleteSignalSemaphore) {
-            pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet[picId].m_frameCompleteSemaphore;
-            if (pFrameSynchronizationInfo->frameCompleteSemaphore) {
+            pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore;
+            if (pFrameSynchronizationInfo->frameCompleteSemaphore != VK_NULL_HANDLE) {
+
+                pFrameSynchronizationInfo->decodeCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue(
+                                                                            DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DECODE,
+                                                                            m_perFrameDecodeImageSet[picId].m_decodeOrder);
+
+                if (pFrameSynchronizationInfo->hasFilterSignalSemaphore) {
+                    pFrameSynchronizationInfo->filterCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue(
+                                                                              DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_FILTER,
+                                                                              m_perFrameDecodeImageSet[picId].m_decodeOrder);
+
+                    m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->filterCompleteTimelineValue;
+
+                } else {
+
+                    m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->decodeCompleteTimelineValue;
+
+                }
+
                 m_perFrameDecodeImageSet[picId].m_hasFrameCompleteSignalSemaphore = true;
             }
         }
 
-        if (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore) {
-            pFrameSynchronizationInfo->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore;
-            m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = false;
+        if (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore) {
+            pFrameSynchronizationInfo->hasFrameConsumerSignalSemaphore = true;
+            pFrameSynchronizationInfo->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore;
+            pFrameSynchronizationInfo->frameConsumerDoneTimelineValue = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue;
+            m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = false;
         }
 
         pFrameSynchronizationInfo->queryPool = m_queryPool;
@@ -529,14 +556,20 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
             }
 
             if (m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore) {
-                pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteSemaphore;
+                pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore;
+                pDecodedFrame->frameCompleteDoneSemValue = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteTimelineValue;
                 m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore = false;
+
+                pDecodedFrame->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore;
+                pDecodedFrame->frameConsumerDoneSemValue = DecodeFrameBufferIf::GetSemaphoreValue(
+                                                               DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY,
+                                                               m_perFrameDecodeImageSet[pictureIndex].m_displayOrder);
+
             } else {
-                pDecodedFrame->frameCompleteSemaphore = VkSemaphore();
+                pDecodedFrame->frameCompleteSemaphore = VK_NULL_HANDLE;
             }
 
             pDecodedFrame->frameConsumerDoneFence = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneFence;
-            pDecodedFrame->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneSemaphore;
 
             pDecodedFrame->timestamp = m_perFrameDecodeImageSet[pictureIndex].m_timestamp;
             pDecodedFrame->decodeOrder = m_perFrameDecodeImageSet[pictureIndex].m_decodeOrder;
@@ -572,7 +605,13 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
             m_perFrameDecodeImageSet[picId].Release();
 
             m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence = pDecodedFrameRelease->hasConsummerSignalFence;
-            m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore;
+            m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore;
+            if (pDecodedFrameRelease->hasConsummerSignalSemaphore) {
+                m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue =
+                        DecodeFrameBufferIf::GetSemaphoreValue(
+                            DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY,
+                            pDecodedFrameRelease->displayOrder);
+            }
         }
         return 0;
     }
@@ -648,7 +687,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         std::lock_guard<std::mutex> lock(m_displayQueueMutex);
         for (unsigned int resId = 0; resId < numResources; resId++) {
             if ((uint32_t)indexes[resId] < m_perFrameDecodeImageSet.size()) {
-                m_perFrameDecodeImageSet[indexes[resId]].Deinit();
+                m_perFrameDecodeImageSet[indexes[resId]].Deinit(m_vkDevCtx);
             }
         }
         return (int32_t)m_perFrameDecodeImageSet.size();
@@ -785,8 +824,6 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe
     }
     if (!ImageExist(pImageSpec->imageTypeIdx) || m_imageViewState[pImageSpec->imageTypeIdx].recreateImage) {
 
-        assert(m_vkDevCtx != nullptr);
-
         m_imageViewState[pImageSpec->imageTypeIdx].currentLayerLayout = pImageSpec->createInfo.initialLayout;
 
         VkSharedBaseObj<VkImageResource> imageResource;
@@ -839,21 +876,13 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe
 VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx)
 {
 
-    m_vkDevCtx = vkDevCtx;
-
     // The fence waited on for the first frame should be signaled.
     const VkFenceCreateInfo fenceFrameCompleteInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr,
                                                        VK_FENCE_CREATE_SIGNALED_BIT };
-    VkResult result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence);
+    VkResult result = vkDevCtx->CreateFence(*vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence);
 
     const VkFenceCreateInfo fenceInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr };
-    result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence);
-    assert(result == VK_SUCCESS);
-
-    const VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr };
-    result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore);
-    assert(result == VK_SUCCESS);
-    result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameConsumerDoneSemaphore);
+    result = vkDevCtx->CreateFence(*vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence);
     assert(result == VK_SUCCESS);
 
     Reset();
@@ -861,49 +890,35 @@ VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx)
     return result;
 }
 
-void NvPerFrameDecodeResources::Deinit()
+void NvPerFrameDecodeResources::Deinit(const VulkanDeviceContext* vkDevCtx)
 {
     bitstreamData = nullptr;
     stdPps = nullptr;
     stdSps = nullptr;
     stdVps = nullptr;
 
-    if (m_vkDevCtx == nullptr) {
+    if (vkDevCtx == nullptr) {
         assert ((m_frameCompleteFence == VK_NULL_HANDLE) &&
-                (m_frameConsumerDoneFence == VK_NULL_HANDLE) &&
-                (m_frameCompleteSemaphore == VK_NULL_HANDLE) &&
-                (m_frameConsumerDoneSemaphore == VK_NULL_HANDLE));
+                (m_frameConsumerDoneFence == VK_NULL_HANDLE));
         return;
     }
 
     if (m_frameCompleteFence != VkFence()) {
-        m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameCompleteFence, nullptr);
+        vkDevCtx->DestroyFence(*vkDevCtx, m_frameCompleteFence, nullptr);
         m_frameCompleteFence = VkFence();
     }
 
     if (m_frameConsumerDoneFence != VkFence()) {
-        m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameConsumerDoneFence, nullptr);
+        vkDevCtx->DestroyFence(*vkDevCtx, m_frameConsumerDoneFence, nullptr);
         m_frameConsumerDoneFence = VkFence();
     }
 
-    if (m_frameCompleteSemaphore != VkSemaphore()) {
-        m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameCompleteSemaphore, nullptr);
-        m_frameCompleteSemaphore = VkSemaphore();
-    }
-
-    if (m_frameConsumerDoneSemaphore != VkSemaphore()) {
-        m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameConsumerDoneSemaphore, nullptr);
-        m_frameConsumerDoneSemaphore = VkSemaphore();
-    }
-
     for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) {
 
         m_imageViewState[imageTypeIdx].view = nullptr;
         m_imageViewState[imageTypeIdx].singleLevelView = nullptr;
     }
 
-    m_vkDevCtx = nullptr;
-
     Reset();
 }
 
@@ -919,6 +934,8 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,
         return -1;
     }
 
+    m_vkDevCtx = vkDevCtx;
+
     for (uint32_t imageIndex = m_numImages; imageIndex < numImages; imageIndex++) {
         VkResult result = m_perFrameDecodeResources[imageIndex].init(vkDevCtx);
         assert(result == VK_SUCCESS);
@@ -927,6 +944,20 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,
         }
     }
 
+    // Create timeline semaphores instead of binary semaphores
+    VkSemaphoreTypeCreateInfo timelineCreateInfo = {};
+    timelineCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
+    timelineCreateInfo.pNext = nullptr;
+    timelineCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
+    timelineCreateInfo.initialValue = 0ULL;
+
+    VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo };
+    VkResult result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore);
+    assert(result == VK_SUCCESS);
+
+    result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_consumerCompleteSemaphore);
+    assert(result == VK_SUCCESS);
+
     m_videoProfile.InitFromProfile(pDecodeProfile);
 
     m_queueFamilyIndex = queueFamilyIndex;
@@ -1048,10 +1079,21 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,
     return (int32_t)numImages;
 }
 
-void NvPerFrameDecodeImageSet::Deinit()
+void NvPerFrameDecodeImageSet::Deinit(const VulkanDeviceContext* vkDevCtx)
 {
+
+    if (m_frameCompleteSemaphore != VK_NULL_HANDLE) {
+        m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_frameCompleteSemaphore, nullptr);
+        m_frameCompleteSemaphore = VK_NULL_HANDLE;
+    }
+
+    if (m_consumerCompleteSemaphore != VK_NULL_HANDLE) {
+        m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_consumerCompleteSemaphore, nullptr);
+        m_consumerCompleteSemaphore = VK_NULL_HANDLE;
+    }
+
     for (size_t ndx = 0; ndx < m_numImages; ndx++) {
-        m_perFrameDecodeResources[ndx].Deinit();
+        m_perFrameDecodeResources[ndx].Deinit(vkDevCtx);
     }
 
     for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) {
diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
index 863d3a4f..e622bb7f 100644
--- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
+++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
@@ -66,14 +66,20 @@ class VulkanVideoFrameBuffer : public IVulkanVideoFrameBufferParserCb {
     struct FrameSynchronizationInfo {
         VkFence frameCompleteFence;
         VkSemaphore frameCompleteSemaphore;
+        VkSemaphore consumerCompleteSemaphore;
         VkFence frameConsumerDoneFence;
-        VkSemaphore frameConsumerDoneSemaphore;
+        uint64_t frameConsumerDoneTimelineValue;
+        uint64_t decodeCompleteTimelineValue;
+        uint64_t filterCompleteTimelineValue;
         VkQueryPool queryPool;
         uint32_t startQueryId;
         uint32_t numQueries;
         DecodeFrameBufferIf::ImageSpecsIndex imageSpecsIndex;
         uint32_t hasFrameCompleteSignalFence : 1;
+        uint32_t hasFrameConsumerSignalSemaphore : 1;
         uint32_t hasFrameCompleteSignalSemaphore : 1;
+        // post processing filter
+        uint32_t hasFilterSignalSemaphore : 1;
         uint32_t syncOnFrameCompleteFence : 1;
         uint32_t syncOnFrameConsumerDoneFence : 1;
     };
diff --git a/vk_video_decoder/src/vulkan_video_decoder.cpp b/vk_video_decoder/src/vulkan_video_decoder.cpp
index e26115cd..a0018a46 100644
--- a/vk_video_decoder/src/vulkan_video_decoder.cpp
+++ b/vk_video_decoder/src/vulkan_video_decoder.cpp
@@ -155,16 +155,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
 
     VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR;
 
-    VkQueueFlags requestVideoEncodeQueueMask = 0;
-    if (m_decoderConfig.enableVideoEncoder) {
-        requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
-    }
-
     if (m_decoderConfig.selectVideoWithComputeQueue) {
         requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (m_decoderConfig.enableVideoEncoder) {
-            requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -172,16 +164,7 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
         requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT;
     }
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs |
-                                        (m_decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+    VkVideoCodecOperationFlagsKHR videoCodecOperation = videoStreamDemuxer->GetVideoCodec();
 
     const bool supportsShellPresent = ((!m_decoderConfig.noPresent == false) && (pWsiDisplay != nullptr));
     const bool createGraphicsQueue = supportsShellPresent ? true  : false;
@@ -196,17 +179,12 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
                                             ( VK_QUEUE_TRANSFER_BIT |
                                               requestGraphicsQueueMask |
                                               requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask |
-                                              requestVideoEncodeQueueMask),
+                                              requestVideoDecodeQueueMask),
                                             pWsiDisplay,
                                             requestVideoDecodeQueueMask,
-                                            ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
-                                              requestVideoEncodeQueueMask,
-                                            ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR),
+                                            videoCodecOperation,
+                                              0,
+                                            VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                             vkPhysicalDevice);
 
     if (result != VK_SUCCESS) {
@@ -216,8 +194,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
     }
 
     m_vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                   m_decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues
-                                   videoCodecs,
+                                   0, // num encode queues
+                                   videoCodecOperation,
                                    // If no graphics or compute queue is requested, only video queues
                                    // will be created. Not all implementations support transfer on video queues,
                                    // so request a separate transfer queue for such implementations.
@@ -264,6 +242,7 @@ VkResult CreateVulkanVideoDecoder(VkInstance vkInstance, VkPhysicalDevice vkPhys
         case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
         case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
         case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
         {
 
         }
diff --git a/vk_video_decoder/test/vulkan-video-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-dec/Main.cpp
index 2c5d4d0a..5a02d1b3 100644
--- a/vk_video_decoder/test/vulkan-video-dec/Main.cpp
+++ b/vk_video_decoder/test/vulkan-video-dec/Main.cpp
@@ -60,6 +60,20 @@ int main(int argc, const char** argv)
         return -1;
     }
 
+
+    VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
+    result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
+                                        decoderConfig.forceParserType,
+                                        decoderConfig.enableStreamDemuxing,
+                                        decoderConfig.initialWidth,
+                                        decoderConfig.initialHeight,
+                                        decoderConfig.initialBitdepth,
+                                        videoStreamDemuxer);
+    if (result != VK_SUCCESS) {
+        assert(!"Can't initialize the VideoStreamDemuxer!");
+        return result;
+    }
+
     const int32_t numDecodeQueues = ((decoderConfig.queueId != 0) ||
                                      (decoderConfig.enableHwLoadBalancing != 0)) ?
                                      -1 : // all available HW decoders
@@ -67,16 +81,8 @@ int main(int argc, const char** argv)
 
     VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR;
 
-    VkQueueFlags requestVideoEncodeQueueMask = 0;
-    if (decoderConfig.enableVideoEncoder) {
-        requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
-    }
-
     if (decoderConfig.selectVideoWithComputeQueue) {
         requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (decoderConfig.enableVideoEncoder) {
-            requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -84,16 +90,9 @@ int main(int argc, const char** argv)
         requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT;
     }
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs |
-                                        (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+    VkVideoCodecOperationFlagsKHR videoCodec = decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR ? 
+                                                        decoderConfig.forceParserType :
+                                                        videoStreamDemuxer->GetVideoCodec();
 
     if (!decoderConfig.noPresent) {
 
@@ -111,17 +110,12 @@ int main(int argc, const char** argv)
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT |
                                               requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask |
-                                              requestVideoEncodeQueueMask),
+                                              requestVideoDecodeQueueMask),
                                               displayShell,
                                               requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
-                                              requestVideoEncodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                              videoCodec,
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR);
         if (result != VK_SUCCESS) {
             assert(!"Can't initialize the Vulkan physical device!");
             return -1;
@@ -130,27 +124,14 @@ int main(int argc, const char** argv)
                                                   vkDevCtxt.GetPresentQueueFamilyIdx()));
 
         vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                     decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues
-                                     videoCodecs,
+                                     0, // num encode queues
+                                     videoCodec,
                                      false, //  createTransferQueue
                                      true,  // createGraphicsQueue
                                      true,  // createDisplayQueue
                                      requestVideoComputeQueueMask != 0  // createComputeQueue
                                      );
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
 
         VkSharedBaseObj<VkVideoFrameOutput> frameToFile;
         if (!decoderConfig.outputFileName.empty()) {
@@ -194,8 +175,7 @@ int main(int argc, const char** argv)
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_TRANSFER_BIT        |
                                                requestVideoDecodeQueueMask  |
-                                               requestVideoComputeQueueMask |
-                                               requestVideoEncodeQueueMask),
+                                               requestVideoComputeQueueMask),
                                               nullptr,
                                               requestVideoDecodeQueueMask);
         if (result != VK_SUCCESS) {
@@ -205,7 +185,7 @@ int main(int argc, const char** argv)
 
         result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
                                               0,     // num encode queues
-                                              videoCodecs,
+                                              videoCodec,
                                               // If no graphics or compute queue is requested, only video queues
                                               // will be created. Not all implementations support transfer on video queues,
                                               // so request a separate transfer queue for such implementations.
@@ -219,20 +199,6 @@ int main(int argc, const char** argv)
             return -1;
         }
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
-
         VkSharedBaseObj<VkVideoFrameOutput> frameToFile;
         if (!decoderConfig.outputFileName.empty()) {
             const char* crcOutputFile = decoderConfig.outputcrcPerFrame ? decoderConfig.crcOutputFileName.c_str() : nullptr;
diff --git a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp
index 8b4e845b..56bba249 100644
--- a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp
+++ b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp
@@ -114,6 +114,8 @@ int main(int argc, const char** argv)
             break;
         case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
             break;
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+            break;
         default:
             std::cout << "Simple decoder does not support demuxing "
                       << "and the decoder type must be set with --codec <codec type>"
diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp
index 37b046a6..1a589fe0 100644
--- a/vk_video_encoder/demos/vk-video-enc/Main.cpp
+++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp
@@ -52,7 +52,7 @@ int main(int argc, char** argv)
         VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,
-        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
+        VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
 
@@ -70,6 +70,7 @@ int main(int argc, char** argv)
         VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
         VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
         VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         nullptr
     };
 
@@ -122,17 +123,9 @@ int main(int argc, char** argv)
 
     VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
 
-    VkQueueFlags requestVideoDecodeQueueMask = 0;
-    if (encoderConfig->enableVideoDecoder) {
-        requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR |
-                                       VK_QUEUE_TRANSFER_BIT;
-    }
 
     if (encoderConfig->selectVideoWithComputeQueue) {
         requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (encoderConfig->enableVideoDecoder) {
-            requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -157,19 +150,6 @@ int main(int argc, char** argv)
         return -1;
     }
 
-
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs |
-                                        encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR;
-
-
     VkSharedBaseObj<VkVideoEncoder> encoder; // the encoder's instance
     if (supportsDisplay && encoderConfig->enableFramePresent) {
 
@@ -185,14 +165,11 @@ int main(int argc, char** argv)
 
         result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(),
                                               (VK_QUEUE_GRAPHICS_BIT |
-                                                      requestVideoComputeQueueMask |
-                                                      requestVideoDecodeQueueMask  |
-                                                      requestVideoEncodeQueueMask),
+                                              requestVideoComputeQueueMask |
+                                              requestVideoEncodeQueueMask),
                                               displayShell,
-                                              requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                               requestVideoEncodeQueueMask,
                                               (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
@@ -205,9 +182,9 @@ int main(int argc, char** argv)
         assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(),
                                                    vkDevCtxt.GetPresentQueueFamilyIdx()));
 
-        result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues
+        result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues
                                               numEncodeQueues,   // num encode queues
-                                              videoCodecs,
+                                              encoderConfig->codec,
                                               false,             // createTransferQueue
                                               true,              // createGraphicsQueue
                                               true,              // createDisplayQueue
@@ -240,26 +217,22 @@ int main(int argc, char** argv)
         // No display presentation and no decoder - just the encoder
         result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(),
                                               (requestVideoComputeQueueMask |
-                                               requestVideoDecodeQueueMask  |
                                                requestVideoEncodeQueueMask  |
                                                VK_QUEUE_TRANSFER_BIT),
                                               nullptr,
-                                              requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR),
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                               requestVideoEncodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                              encoderConfig->codec);
         if (result != VK_SUCCESS) {
 
             assert(!"Can't initialize the Vulkan physical device!");
             return -1;
         }
 
-        result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues
+        result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues
                                               numEncodeQueues,     // num encode queues
-                                              videoCodecs,
+                                              encoderConfig->codec,
                                               // If no graphics or compute queue is requested, only video queues
                                               // will be created. Not all implementations support transfer on video queues,
                                               // so request a separate transfer queue for such implementations.
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
index c7de5367..e826064a 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
@@ -762,7 +762,6 @@ struct EncoderConfig : public VkVideoRefCountBase {
     uint32_t verboseMsg : 1;
     uint32_t enableFramePresent : 1;
     uint32_t enableFrameDirectModePresent : 1;
-    uint32_t enableVideoDecoder : 1;
     uint32_t enableHwLoadBalancing : 1;
     uint32_t selectVideoWithComputeQueue : 1;
     uint32_t enablePreprocessComputeFilter : 1;
@@ -857,7 +856,6 @@ struct EncoderConfig : public VkVideoRefCountBase {
     , verboseMsg(false)
     , enableFramePresent(false)
     , enableFrameDirectModePresent(false)
-    , enableVideoDecoder(false)
     , enableHwLoadBalancing(false)
     , selectVideoWithComputeQueue(false)
     , enablePreprocessComputeFilter(true)
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index d79ed014..e4f71cd8 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -441,22 +441,36 @@ VkResult VkVideoEncoder::SubmitStagedQpMap(VkSharedBaseObj<VkVideoEncodeFrameInf
     const VkCommandBuffer* pCmdBuf = encodeFrameInfo->qpMapCmdBuffer->GetCommandBuffer();
     VkSemaphore frameCompleteSemaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore();
 
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.waitSemaphoreCount = 0;
-    submitInfo.pWaitSemaphores = nullptr;
-    submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = pCmdBuf;
-    submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr;
-    submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR };
+    cmdBufferInfo.commandBuffer = *pCmdBuf;
+    cmdBufferInfo.deviceMask = 0;
+
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR };
+    signalSemaphoreInfo.semaphore = frameCompleteSemaphore;
+    signalSemaphoreInfo.value = 0; // Binary semaphore
+    signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete
+    signalSemaphoreInfo.deviceIndex = 0;
+
+    VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = 0;
+    submitInfo.pWaitSemaphoreInfos = nullptr;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfo;
+    submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr;
 
     VkFence queueCompleteFence = encodeFrameInfo->qpMapCmdBuffer->GetFence();
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence));
+
     VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(((m_vkDevCtx->GetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ?
-                                                               VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER,
-                                                           0, 1, &submitInfo,
-                                                           queueCompleteFence);
+                                                                     VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER,
+                                                             0, // queueIndex
+                                                             1, // submitCount
+                                                             &submitInfo, queueCompleteFence,
+                                                             "Encode Staging QpMap",
+                                                             m_encodeEncodeFrameNum,
+                                                             m_encodeInputFrameNum);
 
     encodeFrameInfo->qpMapCmdBuffer->SetCommandBufferSubmitted();
     bool syncCpuAfterStaging = false;
@@ -475,15 +489,24 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObj<VkVideoEncodeFra
     const VkCommandBuffer* pCmdBuf = encodeFrameInfo->inputCmdBuffer->GetCommandBuffer();
     VkSemaphore frameCompleteSemaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore();
 
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.waitSemaphoreCount = 0;
-    submitInfo.pWaitSemaphores = nullptr;
-    submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = pCmdBuf;
-    submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr;
-    submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR };
+    cmdBufferInfo.commandBuffer = *pCmdBuf;
+    cmdBufferInfo.deviceMask = 0;
+
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR };
+    signalSemaphoreInfo.semaphore = frameCompleteSemaphore;
+    signalSemaphoreInfo.value = 0; // Binary semaphore
+    signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete
+    signalSemaphoreInfo.deviceIndex = 0;
+
+    VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = 0;
+    submitInfo.pWaitSemaphoreInfos = nullptr;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfo;
+    submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr;
 
     VkFence queueCompleteFence = encodeFrameInfo->inputCmdBuffer->GetFence();
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence));
@@ -491,9 +514,15 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObj<VkVideoEncodeFra
             (m_inputComputeFilter != nullptr) ? VulkanDeviceContext::COMPUTE :
                     (((m_vkDevCtx->GetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ?
                             VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER);
+
     VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(submitType,
-                                                           0, 1, &submitInfo,
-                                                           queueCompleteFence);
+                                                           0, // queueIndex
+                                                           1, // submitCount
+                                                           &submitInfo,
+                                                           queueCompleteFence,
+                                                           "Encode Staging Input",
+                                                           m_encodeEncodeFrameNum,
+                                                           m_encodeInputFrameNum);
 
     encodeFrameInfo->inputCmdBuffer->SetCommandBufferSubmitted();
     bool syncCpuAfterStaging = false;
@@ -1306,12 +1335,12 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer
     copyRegion[0].dstSubresource.layerCount = 1;
     copyRegion[1].extent.width = copyRegion[0].extent.width;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) {
-        copyRegion[1].extent.width /= 2;
+        copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2;
     }
 
     copyRegion[1].extent.height = copyRegion[0].extent.height;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) {
-        copyRegion[1].extent.height /= 2;
+        copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2;
     }
 
     copyRegion[1].extent.depth = 1;
@@ -1602,38 +1631,74 @@ VkResult VkVideoEncoder::SubmitVideoCodingCmds(VkSharedBaseObj<VkVideoEncodeFram
     }
 
     assert(encodeFrameInfo);
+
     assert(encodeFrameInfo->encodeCmdBuffer != nullptr);
 
+    const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer();
+    // The encode operation complete semaphore is not needed at this point.
+    VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore();
+
+    // Create command buffer submit info
+    VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR };
+    cmdBufferInfo.commandBuffer = *pCmdBuf;
+    cmdBufferInfo.deviceMask = 0;
+
+
+
+    // Create wait semaphore submit infos
     // If we are processing the input staging, wait for it's semaphore
     // to be done before processing the input frame with the encoder.
-    VkSemaphore inputWaitSemaphore[2] = { VK_NULL_HANDLE };
+    VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[2]{};
     uint32_t waitSemaphoreCount = 0;
     if (encodeFrameInfo->inputCmdBuffer) {
-        inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->inputCmdBuffer->GetSemaphore();
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore();
+        waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore
+        // Use transfer bit since these semaphores come from transfer operations
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
+        waitSemaphoreCount++;
     }
     if (encodeFrameInfo->qpMapCmdBuffer) {
-        inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore();
-    }
-
-    const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer();
-    // The encode operation complete semaphore is not needed at this point.
-    VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore();
-
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoEncodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.pWaitSemaphores = (waitSemaphoreCount > 0) ? inputWaitSemaphore : nullptr;
-    submitInfo.waitSemaphoreCount = waitSemaphoreCount;
-    submitInfo.pWaitDstStageMask = &videoEncodeSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = pCmdBuf;
-    submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr;
-    submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore();
+        waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore
+        // Use transfer bit since these semaphores come from transfer operations
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
+        waitSemaphoreCount++;
+    }
+
+    // Create signal semaphore submit info if needed
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR };
+    if (frameCompleteSemaphore != VK_NULL_HANDLE) {
+        signalSemaphoreInfo.semaphore = frameCompleteSemaphore;
+        signalSemaphoreInfo.value = 0; // Binary semaphore
+        signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR;
+        signalSemaphoreInfo.deviceIndex = 0;
+    }
+
+    // Create submit info
+    VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+    submitInfo.pWaitSemaphoreInfos = (waitSemaphoreCount > 0) ? waitSemaphoreInfos : nullptr;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfo;
+    submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr;
 
     VkFence queueCompleteFence = encodeFrameInfo->encodeCmdBuffer->GetFence();
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence));
-    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE, 0,
-                                                           1, &submitInfo,
-                                                           queueCompleteFence);
+
+    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE,
+                                                           0, // queueIndex
+                                                           1, // submitCount
+                                                           &submitInfo,
+                                                           queueCompleteFence,
+                                                           "Video Encode",
+                                                           m_encodeEncodeFrameNum,
+                                                           m_encodeInputFrameNum);
 
     encodeFrameInfo->encodeCmdBuffer->SetCommandBufferSubmitted();
     bool syncCpuAfterEncoding = false;
diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp
index 18831f2a..803d9da6 100644
--- a/vk_video_encoder/src/vulkan_video_encoder.cpp
+++ b/vk_video_encoder/src/vulkan_video_encoder.cpp
@@ -106,6 +106,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
         VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,
+        VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
 
@@ -114,6 +115,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
         VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
         VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
         VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         nullptr
     };
 
@@ -140,17 +142,8 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
 
     VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
 
-    VkQueueFlags requestVideoDecodeQueueMask = 0;
-    if (m_encoderConfig->enableVideoDecoder) {
-        requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR |
-                                       VK_QUEUE_TRANSFER_BIT;
-    }
-
     if (m_encoderConfig->selectVideoWithComputeQueue) {
         requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (m_encoderConfig->enableVideoDecoder) {
-            requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -161,17 +154,13 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
     // No display presentation and no decoder - just the encoder
     result = m_vkDevCtxt.InitPhysicalDevice(m_encoderConfig->deviceId, m_encoderConfig->GetDeviceUUID(),
                                             ( requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask  |
                                               requestVideoEncodeQueueMask  |
                                               VK_QUEUE_TRANSFER_BIT),
                                             nullptr,
-                                            requestVideoDecodeQueueMask,
-                                            (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                             VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR),
+                                            0,
+                                            VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                             requestVideoEncodeQueueMask,
-                                            (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                             VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                             VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                            videoCodecOperation);
     if (result != VK_SUCCESS) {
 
         assert(!"Can't initialize the Vulkan physical device!");
@@ -183,21 +172,9 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
                                      -1 : // all available HW encoders
                                       1;  // only one HW encoder instance
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs |
-                                        (m_encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (uint32_t)VK_VIDEO_CODEC_OPERATION_NONE_KHR);
-
-
-    result = m_vkDevCtxt.CreateVulkanDevice(m_encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues
+    result = m_vkDevCtxt.CreateVulkanDevice(0, // num decode queues
                                             numEncodeQueues,     // num encode queues
-                                            videoCodecs,
+                                            videoCodecOperation,
                                             // If no graphics or compute queue is requested, only video queues
                                             // will be created. Not all implementations support transfer on video queues,
                                             // so request a separate transfer queue for such implementations.