From d3e8058606a60270878ce564ca6ba8282d34b85d Mon Sep 17 00:00:00 2001
From: Raju Konda <kraju@nvidia.com>
Date: Sun, 9 Mar 2025 23:53:42 -0700
Subject: [PATCH 1/7] decode: Round up width/height of the luma plane

For chroma resolution in 420 or 422 format,
round width/height to the next integer
when calculating from luma width and height.
---
 common/libs/VkCodecUtils/VkVideoFrameToFile.cpp         | 8 ++++----
 common/libs/VkCodecUtils/VulkanVideoUtils.cpp           | 2 +-
 vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index 26d78757..31b51c29 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -336,7 +336,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
 
         if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
-            secondaryPlaneHeight /= 2;
+            secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2;
         }
 
         VkImageSubresource subResource = {};
@@ -383,12 +383,12 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight;
         yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel;
         if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-            yuvPlaneLayouts[1].rowPitch /= 2;
+            yuvPlaneLayouts[1].rowPitch  = (yuvPlaneLayouts[1].rowPitch + 1) / 2;
         }
         yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
         yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel;
         if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-            yuvPlaneLayouts[2].rowPitch /= 2;
+            yuvPlaneLayouts[2].rowPitch  = (yuvPlaneLayouts[2].rowPitch + 1) / 2;
         }
 
         // Copy the luma plane
@@ -410,7 +410,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         for (uint32_t plane = numCompatiblePlanes; plane < numPlanes; plane++) {
             const uint32_t srcPlane = std::min(plane, mpInfo->planesLayout.numberOfExtraPlanes);
             uint8_t* pDst = pOutBuffer + yuvPlaneLayouts[plane].offset;
-            const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? frameWidth / 2 : frameWidth;
+            const int32_t planeWidth = mpInfo->planesLayout.secondaryPlaneSubsampledX ? (frameWidth + 1) / 2 : frameWidth;
 
             for (int32_t height = 0; height < secondaryPlaneHeight; height++) {
                 const uint8_t* pSrc;
diff --git a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp
index 39f3b6f1..e2cdf9ce 100644
--- a/common/libs/VkCodecUtils/VulkanVideoUtils.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoUtils.cpp
@@ -254,7 +254,7 @@ VkResult ImageObject::CopyYuvToVkImage(uint32_t numPlanes, const uint8_t* yuvPla
     }
 
     if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
-        cbimageHeight /= 2;
+        cbimageHeight = (cbimageHeight + 1) / 2;
     }
 
     if (mpInfo && !isUnnormalizedRgba) {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index d79ed014..29e2a36d 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -1306,12 +1306,12 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer
     copyRegion[0].dstSubresource.layerCount = 1;
     copyRegion[1].extent.width = copyRegion[0].extent.width;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) {
-        copyRegion[1].extent.width /= 2;
+        copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2;
     }
 
     copyRegion[1].extent.height = copyRegion[0].extent.height;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) {
-        copyRegion[1].extent.height /= 2;
+        copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2;
     }
 
     copyRegion[1].extent.depth = 1;

From 49e137444ca0c0ffc878e51f49a86ed76bccca3f Mon Sep 17 00:00:00 2001
From: Raju Konda <kraju@nvidia.com>
Date: Tue, 18 Mar 2025 00:14:41 -0700
Subject: [PATCH 2/7] decode: Remove assert when fence times out

Remove assert when fence times out but
the timeout value is less than the total wait time.
---
 common/libs/VkCodecUtils/Helpers.h | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/common/libs/VkCodecUtils/Helpers.h b/common/libs/VkCodecUtils/Helpers.h
index 4218d36d..e4c70abb 100644
--- a/common/libs/VkCodecUtils/Helpers.h
+++ b/common/libs/VkCodecUtils/Helpers.h
@@ -238,23 +238,21 @@ inline VkResult WaitAndResetFence(const VkInterfaceFunctions* vkIf, VkDevice dev
 
     while (fenceTotalWaitTimeout >= fenceCurrentWaitTimeout) {
 
-        result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout);
-        if (result != VK_SUCCESS) {
-            fprintf(stderr, "\t **** WARNING: fence  %s(%llu) is not done after %llu nSec with result 0x%x ****\n",
-                            fenceName, (long long unsigned int)fence, (long long unsigned int)fenceWaitTimeout, result);
-            assert(!"Fence is not signaled yet after more than 100 mSec wait");
-        }
+        fenceCurrentWaitTimeout += fenceWaitTimeout;
 
-        if (result != VK_TIMEOUT) {
-            break;
+        result = vkIf->WaitForFences(device, 1, &fence, true, fenceWaitTimeout);
+        if (result == VK_TIMEOUT) {
+            fprintf(stderr, "\t **** WARNING: fence  %s(%llu) is not done after %llu mSec with result 0x%x ****\n",
+                            fenceName, (long long unsigned int)fence, (long long unsigned int)fenceCurrentWaitTimeout/(1000ULL * 1000ULL), result);
+        } else {
+            break; // either success or an error occured
         }
 
-        fenceCurrentWaitTimeout += fenceWaitTimeout;
     }
 
     if (result != VK_SUCCESS) {
-        fprintf(stderr, "\t **** ERROR: fence  %s(%llu) is not done after %llu nSec with result 0x%x ****\n",
-                        fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout, vkIf->GetFenceStatus(device, fence));
+        fprintf(stderr, "\t **** ERROR: fence  %s(%llu) is not done after %llu mSec with result 0x%x ****\n",
+                        fenceName, (long long unsigned int)fence, (long long unsigned int)fenceTotalWaitTimeout/(1000ULL * 1000ULL), vkIf->GetFenceStatus(device, fence));
         assert(!"Fence is not signaled yet after more than 100 mSec wait");
     }
 

From 5753d4bc8dae37f8475423ebe8714c12e592b03c Mon Sep 17 00:00:00 2001
From: Raju Konda <kraju@nvidia.com>
Date: Tue, 18 Mar 2025 03:21:57 -0700
Subject: [PATCH 3/7] common: Provide separate destructor implementation

Instead of defining the destructor inline, provide its implementation
in a separate source file to make GCC 13 happy in release mode.

Also fix other compilation errors in release mode.

Signed-off-by: Raju Konda <kraju@nvidia.com>
---
 common/libs/VkCodecUtils/VkImageResource.cpp        | 5 +++++
 common/libs/VkCodecUtils/VkImageResource.h          | 2 +-
 common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp | 5 +++++
 common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h   | 2 +-
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/common/libs/VkCodecUtils/VkImageResource.cpp b/common/libs/VkCodecUtils/VkImageResource.cpp
index 302ba8a1..0ea91333 100644
--- a/common/libs/VkCodecUtils/VkImageResource.cpp
+++ b/common/libs/VkCodecUtils/VkImageResource.cpp
@@ -88,6 +88,11 @@ VkImageResource::VkImageResource(const VulkanDeviceContext* vkDevCtx,
     }
 }
 
+VkImageResource::~VkImageResource()
+{
+    Destroy();
+}
+
 VkResult VkImageResource::Create(const VulkanDeviceContext* vkDevCtx,
                                  const VkImageCreateInfo* pImageCreateInfo,
                                  VkMemoryPropertyFlags memoryPropertyFlags,
diff --git a/common/libs/VkCodecUtils/VkImageResource.h b/common/libs/VkCodecUtils/VkImageResource.h
index 314a2f01..0c4c0ac8 100644
--- a/common/libs/VkCodecUtils/VkImageResource.h
+++ b/common/libs/VkCodecUtils/VkImageResource.h
@@ -113,7 +113,7 @@ class VkImageResource : public VkVideoRefCountBase
 
     void Destroy();
 
-    virtual ~VkImageResource() { Destroy(); }
+    virtual ~VkImageResource();
 };
 
 class VkImageResourceView : public VkVideoRefCountBase
diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp
index db05c81a..f78b0de8 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp
+++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.cpp
@@ -410,3 +410,8 @@ const uint8_t* VulkanDeviceMemoryImpl::GetReadOnlyDataPtr(VkDeviceSize offset, V
     maxSize = m_memoryRequirements.size - offset;
     return readData;
 }
+
+VulkanDeviceMemoryImpl::~VulkanDeviceMemoryImpl()
+{
+    Deinitialize();
+}
diff --git a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h
index 6b94e38e..f7d1c2d7 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h
+++ b/common/libs/VkCodecUtils/VulkanDeviceMemoryImpl.h
@@ -106,7 +106,7 @@ class VulkanDeviceMemoryImpl : public VkVideoRefCountBase
 
     void Deinitialize();
 
-    virtual ~VulkanDeviceMemoryImpl() { Deinitialize(); }
+    virtual ~VulkanDeviceMemoryImpl();
 
 private:
     std::atomic<int32_t>       m_refCount;

From 48b501e0bef1780779f0bf8ba0b408361d1fb39e Mon Sep 17 00:00:00 2001
From: Raju Konda <kraju@nvidia.com>
Date: Mon, 28 Apr 2025 00:17:59 -0700
Subject: [PATCH 4/7] decode: Calculate chroma plane width separately

Calculate the chroma plane width separately,
and add the U and V widths to get the final UV plane width in NV12.
---
 .../libs/VkCodecUtils/VkVideoFrameToFile.cpp  | 14 ++++++--------
 .../VkCodecUtils/VulkanVideoProcessor.cpp     | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index 31b51c29..097439d7 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -328,6 +328,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize);
         assert(readImagePtr != nullptr);
 
+        int32_t secondaryPlaneWidth = frameWidth;
         int32_t secondaryPlaneHeight = frameHeight;
         int32_t imageHeight = frameHeight;
         bool isUnnormalizedRgba = false;
@@ -335,6 +336,9 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
             isUnnormalizedRgba = true;
         }
 
+        if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
+            secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2;
+        }
         if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
             secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2;
         }
@@ -381,15 +385,9 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         yuvPlaneLayouts[0].offset = 0;
         yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel;
         yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight;
-        yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel;
-        if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-            yuvPlaneLayouts[1].rowPitch  = (yuvPlaneLayouts[1].rowPitch + 1) / 2;
-        }
+        yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel;
         yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
-        yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel;
-        if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-            yuvPlaneLayouts[2].rowPitch  = (yuvPlaneLayouts[2].rowPitch + 1) / 2;
-        }
+        yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel;
 
         // Copy the luma plane
         const uint32_t numCompatiblePlanes = 1;
diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
index 3122b062..97d3906f 100644
--- a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
@@ -382,6 +382,7 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt
     const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize);
     assert(readImagePtr != nullptr);
 
+    int32_t secondaryPlaneWidth = frameWidth;
     int32_t secondaryPlaneHeight = frameHeight;
     int32_t imageHeight = frameHeight;
     bool isUnnormalizedRgba = false;
@@ -389,8 +390,11 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt
         isUnnormalizedRgba = true;
     }
 
+    if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
+        secondaryPlaneWidth = (secondaryPlaneWidth + 1) / 2;
+    }
     if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledY) {
-        secondaryPlaneHeight /= 2;
+        secondaryPlaneHeight = (secondaryPlaneHeight + 1) / 2;
     }
 
     VkImageSubresource subResource = {};
@@ -439,15 +443,9 @@ size_t ConvertFrameToNv12(const VulkanDeviceContext *vkDevCtx, int32_t frameWidt
     yuvPlaneLayouts[0].offset = 0;
     yuvPlaneLayouts[0].rowPitch = frameWidth * bytesPerPixel;
     yuvPlaneLayouts[1].offset = yuvPlaneLayouts[0].rowPitch * frameHeight;
-    yuvPlaneLayouts[1].rowPitch = frameWidth * bytesPerPixel;
-    if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-        yuvPlaneLayouts[1].rowPitch /= 2;
-    }
+    yuvPlaneLayouts[1].rowPitch = secondaryPlaneWidth * bytesPerPixel;
     yuvPlaneLayouts[2].offset = yuvPlaneLayouts[1].offset + (yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
-    yuvPlaneLayouts[2].rowPitch = frameWidth * bytesPerPixel;
-    if (mpInfo && mpInfo->planesLayout.secondaryPlaneSubsampledX) {
-        yuvPlaneLayouts[2].rowPitch /= 2;
-    }
+    yuvPlaneLayouts[2].rowPitch = secondaryPlaneWidth * bytesPerPixel;
 
     // Copy the luma plane, always assume the 422 or 444 formats and src CbCr always is interleaved (shares the same plane).
     uint32_t numCompatiblePlanes = 1;
@@ -642,6 +640,7 @@ VkResult VulkanVideoProcessor::CreateParser(const char*,
     static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION };
     static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION };
+    static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION };
 
     const VkExtensionProperties* pStdExtensionVersion = NULL;
     if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
@@ -650,6 +649,8 @@ VkResult VulkanVideoProcessor::CreateParser(const char*,
         pStdExtensionVersion = &h265StdExtensionVersion;
     } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
         pStdExtensionVersion = &av1StdExtensionVersion;
+    } else if (vkCodecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pStdExtensionVersion = &vp9StdExtensionVersion;
     } else {
         assert(!"Unsupported Codec Type");
         return VK_ERROR_FORMAT_NOT_SUPPORTED;

From 62fe6707a55d2c7e8d8ee89105c583af8a23003b Mon Sep 17 00:00:00 2001
From: "Tony Zlatinski (NVIDIA Corporation)" <tzlatinski@nvidia.com>
Date: Fri, 2 May 2025 18:37:19 +0000
Subject: [PATCH 5/7] common: Switch to VkSubmitInfo2KHR and use TL semaphores

Use VkSubmitInfo2KHR and use TL semaphores in both
encoder and decoder.
---
 .../include/VkVideoCore/DecodeFrameBufferIf.h |  10 ++
 .../libs/VkCodecUtils/VulkanDeviceContext.cpp |  78 ++++++++-
 .../libs/VkCodecUtils/VulkanDeviceContext.h   |  16 +-
 common/libs/VkCodecUtils/VulkanDisplayFrame.h |  18 +-
 common/libs/VkCodecUtils/VulkanFilter.h       | 144 +++++++++++++--
 common/libs/VkCodecUtils/VulkanFrame.cpp      | 108 ++++++++----
 .../libs/VkCodecUtils/VulkanSemaphoreDump.h   |  90 ++++++++++
 .../libs/VkVideoDecoder/VkVideoDecoder.cpp    | 150 ++++++++--------
 .../VulkanVideoFrameBuffer.cpp                | 164 +++++++++++-------
 .../VulkanVideoFrameBuffer.h                  |   8 +-
 vk_video_encoder/demos/vk-video-enc/Main.cpp  |   1 +
 .../libs/VkVideoEncoder/VkVideoEncoder.cpp    | 153 +++++++++++-----
 vk_video_encoder/src/vulkan_video_encoder.cpp |   1 +
 13 files changed, 696 insertions(+), 245 deletions(-)
 create mode 100644 common/libs/VkCodecUtils/VulkanSemaphoreDump.h

diff --git a/common/include/VkVideoCore/DecodeFrameBufferIf.h b/common/include/VkVideoCore/DecodeFrameBufferIf.h
index 60393c38..fc99a4f5 100644
--- a/common/include/VkVideoCore/DecodeFrameBufferIf.h
+++ b/common/include/VkVideoCore/DecodeFrameBufferIf.h
@@ -107,6 +107,16 @@ class DecodeFrameBufferIf
         }
     };
 
+    enum SemSyncTypeIdx : uint64_t  {  SEM_SYNC_TYPE_IDX_DECODE      =  (1ULL << 0), // Decode operation was signaled
+                                       SEM_SYNC_TYPE_IDX_DISPLAY     =  (1ULL << 0), // Display operation was signaled
+                                       SEM_SYNC_TYPE_IDX_FILTER      =  (1ULL << 1), // Filter operation was signaled
+                                       SEM_SYNC_TYPE_IDX_SHIFT  = 2,                 // Shift semaphore counter value left
+                                    };
+
+    static uint64_t GetSemaphoreValue(SemSyncTypeIdx semSyncType, uint64_t semOrder) {
+        return (semOrder << SEM_SYNC_TYPE_IDX_SHIFT) | semSyncType;
+    }
+
 };
 
 #endif /* _VKVIDEOCORE_DECODEFRAMEBUFFERIF_H_ */
diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp
index f13d598e..c85c2814 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceContext.cpp
+++ b/common/libs/VkCodecUtils/VulkanDeviceContext.cpp
@@ -214,11 +214,27 @@ VkResult VulkanDeviceContext::AddReqDeviceExtensions(const char* const* required
             break;
         }
         m_requestedDeviceExtensions.push_back(name);
+        if (verbose) {
+            std::cout << "Added required device extension: " << name << std::endl;
+        }
     }
 
     return VK_SUCCESS;
 }
 
+VkResult VulkanDeviceContext::AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose)
+{
+    if (requiredDeviceExtension) {
+        m_requestedDeviceExtensions.push_back(requiredDeviceExtension);
+        if (verbose) {
+            std::cout << "Added required device extension: " << requiredDeviceExtension << std::endl;
+        }
+    }
+
+    return VK_SUCCESS;
+}
+
+
 // optional device extensions
 VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose)
 {
@@ -229,6 +245,9 @@ VkResult VulkanDeviceContext::AddOptDeviceExtensions(const char* const* optional
             break;
         }
         m_optDeviceExtensions.push_back(name);
+        if (verbose) {
+            std::cout << "Added optional device extension: " << name << std::endl;
+        }
     }
 
     return VK_SUCCESS;
@@ -712,26 +731,57 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues,
             devInfo.queueCreateInfoCount++;
         }
 
+        VkPhysicalDeviceVideoDecodeVP9FeaturesKHR videoDecodeVP9Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR,
+                                                                          nullptr,
+                                                                          false // videoDecodeVP9
+                                                                        };
+
         VkPhysicalDeviceVideoEncodeAV1FeaturesKHR videoEncodeAV1Feature { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR,
                                                                           nullptr,
                                                                           false // videoEncodeAV1
-                                                                         };
+                                                                        };
 
+        // Chain only the structures that are requested
+        VkBaseInStructure* pNext = nullptr;
+        if (videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) {
+            videoEncodeAV1Feature.pNext = pNext;
+            pNext = (VkBaseInStructure*)&videoEncodeAV1Feature;
+        }
+        if (videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            videoDecodeVP9Feature.pNext = pNext;
+            pNext = (VkBaseInStructure*)&videoDecodeVP9Feature;
+        }
 
+        VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
+                                                                              pNext,
+                                                                              VK_FALSE
+        };
 
         VkPhysicalDeviceVideoMaintenance1FeaturesKHR videoMaintenance1Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR,
-                                                                                 ((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) ?
-                                                                                         &videoEncodeAV1Feature :
-                                                                                         nullptr,
-                                                                                 false};
+                                                                                 &timelineSemaphoreFeatures,
+                                                                                 VK_FALSE
+                                                                               };
 
         VkPhysicalDeviceSynchronization2Features synchronization2Features { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
                                                                             &videoMaintenance1Features,
-                                                                            false
+                                                                            VK_FALSE
                                                                            };
 
         VkPhysicalDeviceFeatures2 deviceFeatures { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, &synchronization2Features};
         GetPhysicalDeviceFeatures2(m_physDevice, &deviceFeatures);
+
+        assert(timelineSemaphoreFeatures.timelineSemaphore);
+        assert(videoMaintenance1Features.videoMaintenance1);
+        assert(synchronization2Features.synchronization2);
+        assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR) != 0) ==
+                (videoEncodeAV1Feature.videoEncodeAV1 != VK_FALSE));
+        assert(((videoCodecs & VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) != 0) ==
+                (videoDecodeVP9Feature.videoDecodeVP9 != VK_FALSE));
+
+        // Validate feature support here.
+        // TODO: Currntly this method is receiving all codec bits irrespective of the codec that is required to decode/encode provided input.
+        //       Provide only required codec and features and validate the support.
+
         devInfo.pNext = &deviceFeatures;
 
         if ((numDecodeQueues > 0) &&
@@ -987,6 +1037,7 @@ VkResult VulkanDeviceContext::PopulateDeviceExtensions()
 
 VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
                                                       VkInstance vkInstance,
+                                                      VkVideoCodecOperationFlagsKHR videoCodecs,
                                                       bool enableWsi,
                                                       bool enableWsiDirectMode,
                                                       bool enableValidation,
@@ -1020,6 +1071,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
         VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,
+        VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
 
@@ -1039,6 +1091,7 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
         VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
         VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
         VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         nullptr
     };
 
@@ -1070,6 +1123,19 @@ VkResult VulkanDeviceContext::InitVulkanDecoderDevice(const char * pAppName,
     /********** End WSI instance extensions support *******************************************/
 #endif // VIDEO_DISPLAY_QUEUE_SUPPORT
 
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME);
+    }
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME);
+    }
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME);
+    }
+    if (videoCodecs == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        AddReqDeviceExtension(VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME);
+    }
+
     VkResult result = InitVulkanDevice(pAppName, vkInstance, enbaleVerboseDump);
     if (result != VK_SUCCESS) {
         printf("Could not initialize the Vulkan device!\n");
diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.h b/common/libs/VkCodecUtils/VulkanDeviceContext.h
index a3cf53a5..97325f1d 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceContext.h
+++ b/common/libs/VkCodecUtils/VulkanDeviceContext.h
@@ -24,6 +24,7 @@
 #include <vulkan_interfaces.h>
 #include <VkCodecUtils/HelpersDispatchTable.h>
 #include "VkShell/VkWsiDisplay.h"
+#include "VkCodecUtils/VulkanSemaphoreDump.h"
 
 class VulkanDeviceContext : public vk::VkInterfaceFunctions {
 
@@ -157,11 +158,22 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
     };
 
     VkResult MultiThreadedQueueSubmit(const QueueFamilySubmitType submitType, const int32_t queueIndex,
-                                      uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) const
+                                      uint32_t submitCount, const VkSubmitInfo2KHR* pSubmits, VkFence fence,
+                                      const char* submissionName = nullptr,
+                                      uint64_t decodeEncodeOrder = UINT64_MAX,
+                                      uint64_t displayInputOrder = UINT64_MAX) const
     {
         MtQueueMutex queue(this, submitType, queueIndex);
         if (queue) {
-            return QueueSubmit(queue, submitCount, pSubmits, fence);
+
+            // Dump semaphore info for debugging
+            if (false) {
+                for (uint32_t i = 0; i < submitCount; i++) {
+                    VulkanSemaphoreDump::DumpSemaphoreInfo(pSubmits[i], submissionName, decodeEncodeOrder, displayInputOrder);
+                }
+            }
+
+            return QueueSubmit2KHR(queue, submitCount, pSubmits, fence);
         } else {
             return VK_ERROR_INITIALIZATION_FAILED;
         }
diff --git a/common/libs/VkCodecUtils/VulkanDisplayFrame.h b/common/libs/VkCodecUtils/VulkanDisplayFrame.h
index 246183c9..c86f5ea0 100644
--- a/common/libs/VkCodecUtils/VulkanDisplayFrame.h
+++ b/common/libs/VkCodecUtils/VulkanDisplayFrame.h
@@ -41,7 +41,9 @@ class VulkanDisplayFrame
     VkFence frameCompleteFence; // If valid, the fence is signaled when the decoder or encoder is done decoding / encoding the frame.
     VkFence frameConsumerDoneFence; // If valid, the fence is signaled when the consumer (graphics, compute or display) is done using the frame.
     VkSemaphore frameCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame.
-    VkSemaphore frameConsumerDoneSemaphore; // If valid, the semaphore is signaled when the consumer (graphics, compute or display) is done using the frame.
+    VkSemaphore consumerCompleteSemaphore; // If valid, the semaphore is signaled when the decoder or encoder is done decoding / encoding the frame.
+    uint64_t frameCompleteDoneSemValue; // The semaphore is signaled by the decoder or the decoder's filter when this semaphore value has been reached.
+    uint64_t frameConsumerDoneSemValue; // The semaphore is signaled by the consumer (graphics, compute or display) when this semaphore value has been reached.
     VkQueryPool queryPool;                  // queryPool handle used for the video queries.
     int32_t startQueryId;                   // query Id used for the this frame.
     uint32_t numQueries;                    // usually one query per frame
@@ -64,10 +66,12 @@ class VulkanDisplayFrame
                 imageViews[imageTypeIdx].inUse = false;
             }
         }
-        frameCompleteFence = VkFence();
-        frameConsumerDoneFence = VkFence();
-        frameCompleteSemaphore = VkSemaphore();
-        frameConsumerDoneSemaphore = VkSemaphore();
+        frameCompleteFence = VK_NULL_HANDLE;
+        frameConsumerDoneFence = VK_NULL_HANDLE;
+        frameCompleteSemaphore = VK_NULL_HANDLE;
+        consumerCompleteSemaphore = VK_NULL_HANDLE;
+        frameCompleteDoneSemValue =  (0ULL); // Frame 0 signaled by the decoder and/or filter
+        frameConsumerDoneSemValue =  (0ULL); // Frame 0 signaled by the consumer
         queryPool = VkQueryPool();
         startQueryId = 0;
         numQueries = 0;
@@ -92,7 +96,9 @@ class VulkanDisplayFrame
     , frameCompleteFence()
     , frameConsumerDoneFence()
     , frameCompleteSemaphore()
-    , frameConsumerDoneSemaphore()
+    , consumerCompleteSemaphore()
+    , frameCompleteDoneSemValue(0ULL)
+    , frameConsumerDoneSemValue(0ULL)
     , queryPool()
     , startQueryId()
     , numQueries()
diff --git a/common/libs/VkCodecUtils/VulkanFilter.h b/common/libs/VkCodecUtils/VulkanFilter.h
index 2670304c..bb88799b 100644
--- a/common/libs/VkCodecUtils/VulkanFilter.h
+++ b/common/libs/VkCodecUtils/VulkanFilter.h
@@ -24,6 +24,7 @@
 #include "VkCodecUtils/VulkanShaderCompiler.h"
 #include "VkCodecUtils/VkImageResource.h"
 #include "VkCodecUtils/VulkanCommandBufferPool.h"
+#include "VkCodecUtils/VulkanSemaphoreDump.h"
 
 struct VulkanShaderInput {
     const std::string     shader;
@@ -34,6 +35,9 @@ struct VulkanShaderInput {
 class VulkanFilter : public VulkanCommandBufferPool
 {
 public:
+    // Constants moved inside the class as static constexpr
+    static constexpr uint32_t MAX_SEMAPHORES = 4;
+    static constexpr uint32_t MAX_CMD_BUFFERS = 4;
 
     VulkanFilter(const VulkanDeviceContext* vkDevCtx,
                  uint32_t queueFamilyIndex,
@@ -76,40 +80,146 @@ class VulkanFilter : public VulkanCommandBufferPool
                                          uint32_t bufferIdx) = 0;
 
     virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount,
-                                         const VkCommandBuffer*  pCommandBuffers,
+                                         const VkCommandBuffer* pCommandBuffers,
                                          uint32_t waitSemaphoreCount,
                                          const VkSemaphore* pWaitSemaphores,
+                                         const VkPipelineStageFlags2KHR* pWaitStageMasks,
                                          uint32_t signalSemaphoreCount,
                                          const VkSemaphore* pSignalSemaphores,
+                                         const VkPipelineStageFlags2KHR* pSignalStageMasks,
                                          VkFence filterCompleteFence) const
     {
-
         assert(m_queue != VK_NULL_HANDLE);
+        assert(commandBufferCount <= MAX_CMD_BUFFERS);
+        assert(waitSemaphoreCount <= MAX_SEMAPHORES);
+        assert(signalSemaphoreCount <= MAX_SEMAPHORES);
+
+        // Prepare command buffer info on stack
+        VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS];
+        for (uint32_t i = 0; i < commandBufferCount; i++) {
+            cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+            cmdBufferInfos[i].pNext = nullptr;
+            cmdBufferInfos[i].commandBuffer = pCommandBuffers[i];
+            cmdBufferInfos[i].deviceMask = 0;
+        }
 
-        // Wait for rendering finished
-        VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+        // Prepare wait semaphore info on stack
+        VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+            waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            waitSemaphoreInfos[i].pNext = nullptr;
+            waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i];
+            waitSemaphoreInfos[i].value = 0; // Binary semaphore
+            waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i];
+            waitSemaphoreInfos[i].deviceIndex = 0;
+        }
 
-        // Submit compute commands
-        VkSubmitInfo submitInfo {};
-        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-        submitInfo.pCommandBuffers = pCommandBuffers;
-        submitInfo.commandBufferCount = commandBufferCount;
-        submitInfo.waitSemaphoreCount = waitSemaphoreCount;
-        submitInfo.pWaitSemaphores = pWaitSemaphores;
-        submitInfo.pWaitDstStageMask = &waitStageMask;
-        submitInfo.signalSemaphoreCount = signalSemaphoreCount;
-        submitInfo.pSignalSemaphores = pSignalSemaphores;
+        // Prepare signal semaphore info on stack
+        VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < signalSemaphoreCount; i++) {
+            signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            signalSemaphoreInfos[i].pNext = nullptr;
+            signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i];
+            signalSemaphoreInfos[i].value = 0; // Binary semaphore
+            signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i];
+            signalSemaphoreInfos[i].deviceIndex = 0;
+        }
+
+        // Submit info
+        VkSubmitInfo2KHR submitInfo = {};
+        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR;
+        submitInfo.pNext = nullptr;
+        submitInfo.flags = 0;
+        submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+        submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+        submitInfo.commandBufferInfoCount = commandBufferCount;
+        submitInfo.pCommandBufferInfos = cmdBufferInfos;
+        submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+        submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
+
+        if (false) {
+            // Dump semaphore info for debugging
+            VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0);
+        }
 
         assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence));
-        VkResult result = m_vkDevCtx->QueueSubmit(m_queue, 1, &submitInfo, filterCompleteFence);
+        VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence);
+
+        return result;
+    }
+
+    virtual VkResult SubmitCommandBuffer(uint32_t commandBufferCount,
+                                         const VkCommandBuffer* pCommandBuffers,
+                                         uint32_t waitSemaphoreCount,
+                                         const VkSemaphore* pWaitSemaphores,
+                                         const uint64_t* pWaitSemaphoreValues,
+                                         const VkPipelineStageFlags2KHR* pWaitStageMasks,
+                                         uint32_t signalSemaphoreCount,
+                                         const VkSemaphore* pSignalSemaphores,
+                                         const uint64_t* pSignalSemaphoreValues,
+                                         const VkPipelineStageFlags2KHR* pSignalStageMasks,
+                                         VkFence filterCompleteFence) const
+    {
+        assert(m_queue != VK_NULL_HANDLE);
+        assert(commandBufferCount <= MAX_CMD_BUFFERS);
+        assert(waitSemaphoreCount <= MAX_SEMAPHORES);
+        assert(signalSemaphoreCount <= MAX_SEMAPHORES);
+
+        // Prepare command buffer info on stack
+        VkCommandBufferSubmitInfoKHR cmdBufferInfos[MAX_CMD_BUFFERS];
+        for (uint32_t i = 0; i < commandBufferCount; i++) {
+            cmdBufferInfos[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+            cmdBufferInfos[i].pNext = nullptr;
+            cmdBufferInfos[i].commandBuffer = pCommandBuffers[i];
+            cmdBufferInfos[i].deviceMask = 0;
+        }
 
-        if (result != VK_SUCCESS) {
-            return result;
+        // Prepare wait semaphore info on stack
+        VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+            waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            waitSemaphoreInfos[i].pNext = nullptr;
+            waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i];
+            waitSemaphoreInfos[i].value = pWaitSemaphoreValues[i]; // Timeline value
+            waitSemaphoreInfos[i].stageMask = pWaitStageMasks[i];
+            waitSemaphoreInfos[i].deviceIndex = 0;
         }
 
+        // Prepare signal semaphore info on stack
+        VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[MAX_SEMAPHORES];
+        for (uint32_t i = 0; i < signalSemaphoreCount; i++) {
+            signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+            signalSemaphoreInfos[i].pNext = nullptr;
+            signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i];
+            signalSemaphoreInfos[i].value = pSignalSemaphoreValues[i]; // Timeline value
+            signalSemaphoreInfos[i].stageMask = pSignalStageMasks[i];
+            signalSemaphoreInfos[i].deviceIndex = 0;
+        }
+
+        // Submit info
+        VkSubmitInfo2KHR submitInfo = {};
+        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR;
+        submitInfo.pNext = nullptr;
+        submitInfo.flags = 0;
+        submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+        submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+        submitInfo.commandBufferInfoCount = commandBufferCount;
+        submitInfo.pCommandBufferInfos = cmdBufferInfos;
+        submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+        submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
+
+        if (false) {
+            // Dump semaphore info for debugging
+            VulkanSemaphoreDump::DumpSemaphoreInfo(submitInfo, "DECODE FILTER", 0);
+        }
+
+        assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, filterCompleteFence));
+        VkResult result = m_vkDevCtx->QueueSubmit2KHR(m_queue, 1, &submitInfo, filterCompleteFence);
+
         return result;
     }
 
+
 protected:
     VulkanShaderCompiler m_vulkanShaderCompiler;
     uint32_t             m_queueFamilyIndex;
diff --git a/common/libs/VkCodecUtils/VulkanFrame.cpp b/common/libs/VkCodecUtils/VulkanFrame.cpp
index 2e87885d..b95bdf96 100644
--- a/common/libs/VkCodecUtils/VulkanFrame.cpp
+++ b/common/libs/VkCodecUtils/VulkanFrame.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <vector>
 #include <iostream>
+#include <thread>  // Added for std::this_thread::sleep_for
 
 #include "VkCodecUtils/Helpers.h"
 #include "VkCodecUtils/VulkanDeviceContext.h"
@@ -25,6 +26,7 @@
 #include "VkCodecUtils/VulkanVideoUtils.h"
 #include "VulkanFrame.h"
 #include "VkVideoCore/DecodeFrameBufferIf.h"
+#include "VkCodecUtils/VulkanSemaphoreDump.h"
 
 template<class FrameDataType>
 VulkanFrame<FrameDataType>::VulkanFrame(const VulkanDeviceContext* vkDevCtx)
@@ -420,6 +422,7 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
     if (renderIndex < 0) {
         renderIndex = -renderIndex;
     }
+
     vulkanVideoUtils::VulkanPerDrawContext* pPerDrawContext = m_videoRenderer->m_renderInfo.GetDrawContext(renderIndex);
 
     VkSharedBaseObj<VkImageResourceView> imageResourceView;
@@ -583,54 +586,77 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
         }
     }
 
-    const uint32_t maxWaitSemaphores = 2;
-    uint32_t numWaitSemaphores = 0;
-    VkSemaphore waitSemaphores[maxWaitSemaphores] = {};
+    const uint32_t waitSemaphoreMaxCount = 2;
+    VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{};
+
+    const uint32_t signalSemaphoreMaxCount = 2;
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{};
 
-    assert(waitSemaphoreCount <= 1);
-    if ((waitSemaphoreCount > 0) && (pWaitSemaphores != nullptr)) {
-        waitSemaphores[numWaitSemaphores++] = *pWaitSemaphores;
+    for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+        waitSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[i].pNext = nullptr;
+        waitSemaphoreInfos[i].semaphore = pWaitSemaphores[i];
+        waitSemaphoreInfos[i].value = 0; // Binary semaphore
+        waitSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+        waitSemaphoreInfos[i].deviceIndex = 0;
     }
 
-    if (inFrame && (inFrame->frameCompleteSemaphore != VkSemaphore())) {
-        waitSemaphores[numWaitSemaphores++] = inFrame->frameCompleteSemaphore;
+    for (uint32_t i = 0; i < signalSemaphoreCount; i++) {
+        signalSemaphoreInfos[i].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[i].pNext = nullptr;
+        signalSemaphoreInfos[i].semaphore = pSignalSemaphores[i];
+        signalSemaphoreInfos[i].value = 0; // Binary semaphore
+        signalSemaphoreInfos[i].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+        signalSemaphoreInfos[i].deviceIndex = 0;
     }
-    assert(numWaitSemaphores <= maxWaitSemaphores);
 
-    const uint32_t maxSignalSemaphores = 2;
-    uint32_t numSignalSemaphores = 0;
-    VkSemaphore signalSemaphores[maxSignalSemaphores] = {};
+    if (inFrame && (inFrame->frameCompleteSemaphore != VK_NULL_HANDLE)) {
 
-    assert(signalSemaphoreCount <= 1);
-    if ((signalSemaphoreCount > 0) && (pSignalSemaphores != nullptr)) {
-        signalSemaphores[numSignalSemaphores++] = *pSignalSemaphores;
-    }
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = inFrame->frameCompleteSemaphore;
+        waitSemaphoreInfos[waitSemaphoreCount].value =     inFrame->frameCompleteDoneSemValue;
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
+        waitSemaphoreCount++;
+
+        signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr;
+        signalSemaphoreInfos[signalSemaphoreCount].semaphore = inFrame->consumerCompleteSemaphore;
+        signalSemaphoreInfos[signalSemaphoreCount].value     = inFrame->frameConsumerDoneSemValue;
+        signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
+        signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0;
+        signalSemaphoreCount++;
 
-    if (inFrame && (inFrame->frameConsumerDoneSemaphore != VkSemaphore())) {
-        signalSemaphores[numSignalSemaphores++] = inFrame->frameConsumerDoneSemaphore;
         inFrame->hasConsummerSignalSemaphore = true;
     }
-    assert(numSignalSemaphores <= maxSignalSemaphores);
+
+    assert(waitSemaphoreCount <= waitSemaphoreMaxCount);
+    assert(signalSemaphoreCount <= signalSemaphoreMaxCount);
 
     if (frameConsumerDoneFence != VkFence()) {
         inFrame->hasConsummerSignalFence = true;
     }
 
-
-    // Wait for the image to be owned and signal for render completion
-    VkPipelineStageFlags primaryCmdSubmitWaitStages[2] = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                                                           VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT };
-    VkSubmitInfo primaryCmdSubmitInfo = VkSubmitInfo();
-    primaryCmdSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    primaryCmdSubmitInfo.pWaitDstStageMask = primaryCmdSubmitWaitStages;
-    primaryCmdSubmitInfo.commandBufferCount = 1;
-
-    primaryCmdSubmitInfo.waitSemaphoreCount = numWaitSemaphores;
-    primaryCmdSubmitInfo.pWaitSemaphores = numWaitSemaphores ? waitSemaphores : NULL;
-    primaryCmdSubmitInfo.pCommandBuffers = pPerDrawContext->commandBuffer.GetCommandBuffer();
-
-    primaryCmdSubmitInfo.signalSemaphoreCount = numSignalSemaphores;
-    primaryCmdSubmitInfo.pSignalSemaphores = numSignalSemaphores ? signalSemaphores : NULL;
+    VkCommandBufferSubmitInfoKHR cmdBufferInfos;
+    cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+    cmdBufferInfos.pNext = nullptr;
+    cmdBufferInfos.commandBuffer = *pPerDrawContext->commandBuffer.GetCommandBuffer();
+    cmdBufferInfos.deviceMask = 0;
+
+    // Submit info
+    VkSubmitInfo2KHR submitInfo = {};
+    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR;
+    submitInfo.pNext = nullptr;
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+    submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfos;
+    submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+    submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
 
     // For fence/sync debugging
     if (false && inFrame && inFrame->frameCompleteFence) {
@@ -646,7 +672,14 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
         }
     }
 
-    result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS, 0, 1, &primaryCmdSubmitInfo, frameConsumerDoneFence);
+    result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::GRAPHICS,
+                                                  0, // queueIndex
+                                                  1, // submitCount
+                                                  &submitInfo,
+                                                  frameConsumerDoneFence,
+                                                  "Graphics Submit",
+                                                  (inFrame != nullptr) ? inFrame->decodeOrder  : UINT64_MAX,
+                                                  (inFrame != nullptr) ? inFrame->displayOrder : UINT64_MAX);
     if (result != VK_SUCCESS) {
         assert(result == VK_SUCCESS);
         fprintf(stderr, "\nERROR: MultiThreadedQueueSubmit() result: 0x%x\n", result);
@@ -676,6 +709,11 @@ VkResult VulkanFrame<FrameDataType>::DrawFrame( int32_t            renderIndex,
 
     m_frameDataIndex = (m_frameDataIndex + 1) % m_frameData.size();
 
+    if (false) {
+        // Add a 20ms sleep
+        std::this_thread::sleep_for(std::chrono::milliseconds(20));
+    }
+
     return result;
 }
 
diff --git a/common/libs/VkCodecUtils/VulkanSemaphoreDump.h b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h
new file mode 100644
index 00000000..6e1b8913
--- /dev/null
+++ b/common/libs/VkCodecUtils/VulkanSemaphoreDump.h
@@ -0,0 +1,90 @@
+/*
+* Copyright 2024 NVIDIA Corporation.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+#include <iostream>
+#include <iomanip>
+
+namespace VulkanSemaphoreDump {
+
+/**
+ * @brief Dumps the semaphore information from a VkSubmitInfo2KHR structure
+ * 
+ * @param submitInfo The VkSubmitInfo2KHR structure containing semaphore information
+ * @param submissionName Optional name to identify the submission (e.g., "DECODE", "COMPUTE")
+ * @param decodeOrder Optional decode order number or identifier (uint64_t)
+ * @param displayOrder Optional display order number or identifier (uint64_t)
+ */
+inline void DumpSemaphoreInfo(
+    const VkSubmitInfo2KHR& submitInfo, 
+    const char* submissionName = nullptr,
+    uint64_t decodeEncodeOrder = UINT64_MAX,
+    uint64_t displayInputOrder = UINT64_MAX)
+{
+
+    std::cout << "----------------------------\n";
+
+    if (submissionName) {
+        std::cout << submissionName << " ";
+    }
+    
+    std::cout << "TL Semaphore sync";
+
+    if (decodeEncodeOrder != UINT64_MAX) {
+        std::cout << " (decode / encode = " << decodeEncodeOrder;
+        if (displayInputOrder != UINT64_MAX) {
+            std::cout << ", display / input = " << displayInputOrder;
+        }
+        std::cout << ")";
+    } else if (displayInputOrder != UINT64_MAX) {
+        std::cout << " (display / input = " << displayInputOrder << ")";
+    }
+    
+    std::cout << ":\n";
+
+    // Dump wait semaphores
+    for (uint32_t i = 0; i < submitInfo.waitSemaphoreInfoCount; i++) {
+        const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pWaitSemaphoreInfos[i];
+        std::cout << "  Wait sem[" << i << "]: " << semInfo.semaphore 
+                  << " value = " << semInfo.value
+                  << " stage = 0x" << std::hex << semInfo.stageMask << std::dec;
+
+        if (semInfo.deviceIndex > 0) {
+            std::cout << " deviceIndex=" << semInfo.deviceIndex;
+        }
+        std::cout << std::endl;
+    }
+    
+    // Dump signal semaphores
+    for (uint32_t i = 0; i < submitInfo.signalSemaphoreInfoCount; i++) {
+        const VkSemaphoreSubmitInfoKHR& semInfo = submitInfo.pSignalSemaphoreInfos[i];
+        std::cout << "  Signal sem[" << i << "]: " << semInfo.semaphore 
+                  << " value = " << semInfo.value
+                  << " stage = 0x" << std::hex << semInfo.stageMask << std::dec;
+
+        if (semInfo.deviceIndex > 0) {
+            std::cout << " deviceIndex = " << semInfo.deviceIndex;
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "----------------------------" << std::endl;
+}
+
+
+} // namespace VulkanSemaphoreDump
diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
index dc980474..630232e7 100644
--- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
+++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
@@ -951,6 +951,8 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     VulkanVideoFrameBuffer::FrameSynchronizationInfo frameSynchronizationInfo = VulkanVideoFrameBuffer::FrameSynchronizationInfo();
     frameSynchronizationInfo.hasFrameCompleteSignalFence = true;
     frameSynchronizationInfo.hasFrameCompleteSignalSemaphore = true;
+    frameSynchronizationInfo.hasFilterSignalSemaphore = m_enableDecodeComputeFilter;
+    frameSynchronizationInfo.hasFrameConsumerSignalSemaphore = false;
     frameSynchronizationInfo.syncOnFrameCompleteFence = true;
     frameSynchronizationInfo.syncOnFrameConsumerDoneFence = true;
     frameSynchronizationInfo.imageSpecsIndex = m_imageSpecsIndex;
@@ -1039,14 +1041,9 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, frameSynchronizationInfo.frameCompleteFence));
 
     VkFence frameCompleteFence = frameSynchronizationInfo.frameCompleteFence;
-    VkSemaphore frameCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore;
-    VkSemaphore frameConsumerDoneSemaphore = frameSynchronizationInfo.frameConsumerDoneSemaphore;
-    // By default, the frameCompleteSemaphore is the videoDecodeCompleteSemaphore.
-    // If the video frame filter is enabled, since it is executed after the decoder's queue,
-    // the filter will provide its own semaphore for the video decoder to signal, instead.
-    // Then the frameCompleteSemaphore will be signaled by the filter of its completion.
+    VkSemaphore videoDecodeCompleteSemaphore = frameSynchronizationInfo.frameCompleteSemaphore;
+    VkSemaphore  consumerCompleteSemaphore = frameSynchronizationInfo.consumerCompleteSemaphore;
     VkFence videoDecodeCompleteFence = frameCompleteFence;
-    VkSemaphore videoDecodeCompleteSemaphore = frameCompleteSemaphore;
 
     VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
     beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
@@ -1136,34 +1133,43 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
 
         assert(filterCmdBuffer != nullptr);
 
-        // frameCompleteSemaphore is the semaphore that the filter is going to signal on completion when enabled.
-        // The videoDecodeCompleteSemaphore semaphore will be signaled by the decoder and then used by the filter to wait on.
-
+        // videoDecodeCompleteFence is the fence that the filter is going to signal on completion when enabled.
         videoDecodeCompleteFence     = filterCmdBuffer->GetFence();
-        videoDecodeCompleteSemaphore = filterCmdBuffer->GetSemaphore();
     }
 
     const uint32_t waitSemaphoreMaxCount = 3;
-    VkSemaphore waitSemaphores[waitSemaphoreMaxCount] = { VK_NULL_HANDLE };
+    VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[waitSemaphoreMaxCount]{};
 
     const uint32_t signalSemaphoreMaxCount = 3;
-    VkSemaphore signalSemaphores[signalSemaphoreMaxCount] = { VK_NULL_HANDLE };
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfos[signalSemaphoreMaxCount]{};
 
     uint32_t waitSemaphoreCount = 0;
-    if (frameConsumerDoneSemaphore != VK_NULL_HANDLE) {
-        waitSemaphores[waitSemaphoreCount] = frameConsumerDoneSemaphore;
+    uint32_t signalSemaphoreCount = 0;
+
+    if (consumerCompleteSemaphore != VK_NULL_HANDLE) {
+
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = consumerCompleteSemaphore;
+        waitSemaphoreInfos[waitSemaphoreCount].value = frameSynchronizationInfo.frameConsumerDoneTimelineValue;
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR |
+                                                           VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
         waitSemaphoreCount++;
     }
 
-    uint32_t signalSemaphoreCount = 0;
     if (videoDecodeCompleteSemaphore != VK_NULL_HANDLE) {
-        signalSemaphores[signalSemaphoreCount] = videoDecodeCompleteSemaphore;
+
+        signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr;
+        signalSemaphoreInfos[signalSemaphoreCount].semaphore = videoDecodeCompleteSemaphore;
+        signalSemaphoreInfos[signalSemaphoreCount].value = frameSynchronizationInfo.decodeCompleteTimelineValue;
+        signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0;
         signalSemaphoreCount++;
     }
 
-    uint64_t waitTlSemaphoresValues[waitSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ };
-    uint64_t signalTlSemaphoresValues[signalSemaphoreMaxCount] = { 0 /* ignored for binary semaphores */ };
-    VkTimelineSemaphoreSubmitInfo timelineSemaphoreInfos = {};
     if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) {
 
         if (m_dumpDecodeData) {
@@ -1172,67 +1178,53 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
             std::cout << "\t TL semaphore value: " << currSemValue << ", status: " << semResult << std::endl;
         }
 
-        waitSemaphores[waitSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore;
-        waitTlSemaphoresValues[waitSemaphoreCount] = m_decodePicCount - 1; // wait for the previous value to be signaled
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].pNext = nullptr;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore;
+        waitSemaphoreInfos[waitSemaphoreCount].value = m_decodePicCount - 1; // wait for the previous value to be signaled
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
         waitSemaphoreCount++;
 
-        signalSemaphores[signalSemaphoreCount] = m_hwLoadBalancingTimelineSemaphore;
-        signalTlSemaphoresValues[signalSemaphoreCount] = m_decodePicCount; // signal the current m_decodePicCount value
+        signalSemaphoreInfos[signalSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].pNext = nullptr;
+        signalSemaphoreInfos[signalSemaphoreCount].semaphore = m_hwLoadBalancingTimelineSemaphore;
+        signalSemaphoreInfos[signalSemaphoreCount].value = m_decodePicCount; // signal the current m_decodePicCount value
+        signalSemaphoreInfos[signalSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+        signalSemaphoreInfos[signalSemaphoreCount].deviceIndex = 0;
         signalSemaphoreCount++;
 
-        timelineSemaphoreInfos.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
-        timelineSemaphoreInfos.pNext = NULL;
         assert(waitSemaphoreCount < waitSemaphoreMaxCount);
-        timelineSemaphoreInfos.waitSemaphoreValueCount = waitSemaphoreCount;
-        timelineSemaphoreInfos.pWaitSemaphoreValues = waitTlSemaphoresValues;
         assert(signalSemaphoreCount < signalSemaphoreMaxCount);
-        timelineSemaphoreInfos.signalSemaphoreValueCount = signalSemaphoreCount;
-        timelineSemaphoreInfos.pSignalSemaphoreValues = signalTlSemaphoresValues;
-        if (m_dumpDecodeData) {
-            std::cout << "\t Wait for: " << (waitSemaphoreCount ? waitTlSemaphoresValues[waitSemaphoreCount - 1] : 0) <<
-                             ", signal at " << signalTlSemaphoresValues[signalSemaphoreCount - 1] << std::endl;
-        }
     }
 
     assert(waitSemaphoreCount <= waitSemaphoreMaxCount);
     assert(signalSemaphoreCount <= signalSemaphoreMaxCount);
 
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoDecodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.pNext = (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) ? &timelineSemaphoreInfos : nullptr;
-    submitInfo.waitSemaphoreCount = waitSemaphoreCount;
-    submitInfo.pWaitSemaphores = waitSemaphores;
-    submitInfo.pWaitDstStageMask = &videoDecodeSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = &frameDataSlot.commandBuffer;
-    submitInfo.signalSemaphoreCount = signalSemaphoreCount;
-    submitInfo.pSignalSemaphores = signalSemaphores;
-
-    if (m_dumpDecodeData) {
-        if (m_hwLoadBalancingTimelineSemaphore != VK_NULL_HANDLE) {
-            std::cout << "\t\t waitSemaphoreValueCount: " << timelineSemaphoreInfos.waitSemaphoreValueCount << std::endl;
-            std::cout << "\t pWaitSemaphoreValues: " << timelineSemaphoreInfos.pWaitSemaphoreValues[0] << ", " <<
-                                                timelineSemaphoreInfos.pWaitSemaphoreValues[1] << ", " <<
-                                                timelineSemaphoreInfos.pWaitSemaphoreValues[2] << std::endl;
-            std::cout << "\t\t signalSemaphoreValueCount: " << timelineSemaphoreInfos.signalSemaphoreValueCount << std::endl;
-            std::cout << "\t pSignalSemaphoreValues: " << timelineSemaphoreInfos.pSignalSemaphoreValues[0] << ", " <<
-                                                timelineSemaphoreInfos.pSignalSemaphoreValues[1] << ", " <<
-                                                timelineSemaphoreInfos.pSignalSemaphoreValues[2] << std::endl;
-        }
-
-        std::cout << "\t waitSemaphoreCount: " << submitInfo.waitSemaphoreCount << std::endl;
-        std::cout << "\t\t pWaitSemaphores: " << submitInfo.pWaitSemaphores[0] << ", " <<
-                                                 submitInfo.pWaitSemaphores[1] << ", " <<
-                                                 submitInfo.pWaitSemaphores[2] << std::endl;
-        std::cout << "\t signalSemaphoreCount: " << submitInfo.signalSemaphoreCount << std::endl;
-        std::cout << "\t\t pSignalSemaphores: " << submitInfo.pSignalSemaphores[0] << ", " <<
-                                             submitInfo.pSignalSemaphores[1] << ", " <<
-                                             submitInfo.pSignalSemaphores[2] << std::endl << std::endl;
-    }
+    VkCommandBufferSubmitInfoKHR cmdBufferInfos;
+    cmdBufferInfos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR;
+    cmdBufferInfos.pNext = nullptr;
+    cmdBufferInfos.commandBuffer = frameDataSlot.commandBuffer;
+    cmdBufferInfos.deviceMask = 0;
+
+    // Submit info
+    VkSubmitInfo2KHR submitInfo { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+    submitInfo.pWaitSemaphoreInfos = waitSemaphoreInfos;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfos;
+    submitInfo.signalSemaphoreInfoCount = signalSemaphoreCount;
+    submitInfo.pSignalSemaphoreInfos = signalSemaphoreInfos;
 
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, videoDecodeCompleteFence));
-    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE, m_currentVideoQueueIndx,
-                                                           1, &submitInfo, videoDecodeCompleteFence);
+    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::DECODE,
+                                                           m_currentVideoQueueIndx,
+                                                           1,
+                                                           &submitInfo,
+                                                           videoDecodeCompleteFence,
+                                                           "Video Decode",
+                                                           picNumInDecodeOrder);
     assert(result == VK_SUCCESS);
     if (result != VK_SUCCESS) {
         return -1;
@@ -1368,11 +1360,23 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
         result = filterCmdBuffer->EndCommandBufferRecording(cmdBuf);
         assert(result == VK_SUCCESS);
 
-        if (false) std::cout << currPicIdx << " : OUT view: " << outputImageView->GetImageView() << ", signalSem: " <<  frameCompleteSemaphore << std::endl << std::flush;
-        assert(videoDecodeCompleteSemaphore != frameCompleteSemaphore);
-        result = m_yuvFilter->SubmitCommandBuffer(1, filterCmdBuffer->GetCommandBuffer(),
-                                                  1, &videoDecodeCompleteSemaphore,
-                                                  1, &frameCompleteSemaphore,
+        // Wait for the decoder to complete.
+        const VkPipelineStageFlags2KHR waitDecoderStageMasks = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR;
+
+        // Signal the compute stage after done.
+        const uint64_t computeCompleteTimelineValue = frameSynchronizationInfo.filterCompleteTimelineValue;
+        const VkPipelineStageFlags2KHR signalComputeStageMasks = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR;
+
+        result = m_yuvFilter->SubmitCommandBuffer(1, // commandBufferCount
+                                                  filterCmdBuffer->GetCommandBuffer(),
+                                                  1, // waitSemaphoreCount
+                                                  &videoDecodeCompleteSemaphore,
+                                                  &frameSynchronizationInfo.decodeCompleteTimelineValue,
+                                                  &waitDecoderStageMasks,
+                                                  1, // signalSemaphoreCount
+                                                  &videoDecodeCompleteSemaphore,
+                                                  &computeCompleteTimelineValue,
+                                                  &signalComputeStageMasks,
                                                   frameCompleteFence);
         assert(result == VK_SUCCESS);
         filterCmdBuffer->SetCommandBufferSubmitted();
diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
index f8e925b0..2550bd7e 100644
--- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
+++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
@@ -51,18 +51,17 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
     NvPerFrameDecodeResources()
         : m_picDispInfo()
         , m_frameCompleteFence()
-        , m_frameCompleteSemaphore()
         , m_frameConsumerDoneFence()
-        , m_frameConsumerDoneSemaphore()
+        , m_frameCompleteTimelineValue()
+        , m_frameConsumerDoneTimelineValue()
         , m_imageSpecsIndex()
         , m_hasFrameCompleteSignalFence(false)
         , m_hasFrameCompleteSignalSemaphore(false)
         , m_hasConsummerSignalFence(false)
-        , m_hasConsummerSignalSemaphore(false)
+        , m_useConsummerSignalSemaphore(false)
         , m_inDecodeQueue(false)
         , m_inDisplayQueue(false)
         , m_ownedByConsummer(false)
-        , m_vkDevCtx()
         , m_imageViewState()
     {
     }
@@ -75,14 +74,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
 
     VkResult init( const VulkanDeviceContext* vkDevCtx);
 
-    void Deinit();
+    void Deinit(const VulkanDeviceContext* vkDevCtx);
 
     NvPerFrameDecodeResources (const NvPerFrameDecodeResources &srcObj) = delete;
     NvPerFrameDecodeResources (NvPerFrameDecodeResources &&srcObj) = delete;
 
     ~NvPerFrameDecodeResources()
     {
-        Deinit();
+        Deinit(nullptr);
     }
 
     VkSharedBaseObj<VkImageResourceView>& GetImageView(uint8_t imageTypeIdx) {
@@ -149,14 +148,14 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
 
     VkParserDecodePictureInfo m_picDispInfo;
     VkFence m_frameCompleteFence;
-    VkSemaphore m_frameCompleteSemaphore;
     VkFence m_frameConsumerDoneFence;
-    VkSemaphore m_frameConsumerDoneSemaphore;
+    uint64_t m_frameCompleteTimelineValue;
+    uint64_t m_frameConsumerDoneTimelineValue;
     DecodeFrameBufferIf::ImageSpecsIndex m_imageSpecsIndex;
     uint32_t m_hasFrameCompleteSignalFence : 1;
     uint32_t m_hasFrameCompleteSignalSemaphore : 1;
     uint32_t m_hasConsummerSignalFence : 1;
-    uint32_t m_hasConsummerSignalSemaphore : 1;
+    uint32_t m_useConsummerSignalSemaphore : 1;
     uint32_t m_inDecodeQueue : 1;
     uint32_t m_inDisplayQueue : 1;
     uint32_t m_ownedByConsummer : 1;
@@ -171,8 +170,8 @@ class NvPerFrameDecodeResources : public vkPicBuffBase {
 
     // The filter's pool node
     VkSharedBaseObj<VkVideoRefCountBase>  filterPoolNode;
+
 private:
-    const VulkanDeviceContext*  m_vkDevCtx;
     std::array<ImageViewState, DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES> m_imageViewState;
 };
 
@@ -180,7 +179,10 @@ class NvPerFrameDecodeImageSet {
 public:
 
     NvPerFrameDecodeImageSet()
-        : m_queueFamilyIndex((uint32_t)-1)
+        : m_vkDevCtx()
+        , m_queueFamilyIndex((uint32_t)-1)
+        , m_frameCompleteSemaphore()
+        , m_consumerCompleteSemaphore()
         , m_numImages(0)
         , m_maxNumImageTypeIdx(0)
         , m_perFrameDecodeResources(VulkanVideoFrameBuffer::maxImages)
@@ -195,11 +197,12 @@ class NvPerFrameDecodeImageSet {
         const std::array<VulkanVideoFrameBuffer::ImageSpec, DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES>& imageSpecs,
         uint32_t                 queueFamilyIndex);
 
-    void Deinit();
+    void Deinit(const VulkanDeviceContext* vkDevCtx);
 
     ~NvPerFrameDecodeImageSet()
     {
-        Deinit();
+        Deinit(m_vkDevCtx);
+        m_vkDevCtx = nullptr;
     }
 
     NvPerFrameDecodeResources& operator[](unsigned int index)
@@ -258,8 +261,13 @@ class NvPerFrameDecodeImageSet {
     }
 
 private:
+    const VulkanDeviceContext*             m_vkDevCtx;
     uint32_t                               m_queueFamilyIndex;
     VkVideoCoreProfile                     m_videoProfile;
+public:
+    VkSemaphore                            m_frameCompleteSemaphore;
+    VkSemaphore                            m_consumerCompleteSemaphore;
+private:
     uint32_t                               m_numImages;
     uint32_t                               m_maxNumImageTypeIdx;
     std::vector<NvPerFrameDecodeResources> m_perFrameDecodeResources;
@@ -372,7 +380,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         m_ownedByDisplayMask = 0;
         m_frameNumInDisplayOrder = 0;
 
-        m_perFrameDecodeImageSet.Deinit();
+        m_perFrameDecodeImageSet.Deinit(m_vkDevCtx);
 
         if (m_queryPool != VkQueryPool()) {
             m_vkDevCtx->DestroyQueryPool(*m_vkDevCtx, m_queryPool, NULL);
@@ -417,10 +425,9 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         }
 
         if ((pFrameSynchronizationInfo->syncOnFrameConsumerDoneFence  == 1) &&
-             ((m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore == 0) ||
-              (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore == VK_NULL_HANDLE)) &&
-                (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) &&
-                (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) {
+             (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore == 0) &&
+             (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence == 1) &&
+             (m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence != VK_NULL_HANDLE)) {
 
             vk::WaitAndResetFence(m_vkDevCtx, *m_vkDevCtx, m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence,
                                   true, "frameConsumerDoneFence");
@@ -456,15 +463,35 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         }
 
         if (pFrameSynchronizationInfo->hasFrameCompleteSignalSemaphore) {
-            pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet[picId].m_frameCompleteSemaphore;
-            if (pFrameSynchronizationInfo->frameCompleteSemaphore) {
+            pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore;
+            if (pFrameSynchronizationInfo->frameCompleteSemaphore != VK_NULL_HANDLE) {
+
+                pFrameSynchronizationInfo->decodeCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue(
+                                                                            DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DECODE,
+                                                                            m_perFrameDecodeImageSet[picId].m_decodeOrder);
+
+                if (pFrameSynchronizationInfo->hasFilterSignalSemaphore) {
+                    pFrameSynchronizationInfo->filterCompleteTimelineValue = DecodeFrameBufferIf::GetSemaphoreValue(
+                                                                              DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_FILTER,
+                                                                              m_perFrameDecodeImageSet[picId].m_decodeOrder);
+
+                    m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->filterCompleteTimelineValue;
+
+                } else {
+
+                    m_perFrameDecodeImageSet[picId].m_frameCompleteTimelineValue = pFrameSynchronizationInfo->decodeCompleteTimelineValue;
+
+                }
+
                 m_perFrameDecodeImageSet[picId].m_hasFrameCompleteSignalSemaphore = true;
             }
         }
 
-        if (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore) {
-            pFrameSynchronizationInfo->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore;
-            m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = false;
+        if (m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore) {
+            pFrameSynchronizationInfo->hasFrameConsumerSignalSemaphore = true;
+            pFrameSynchronizationInfo->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore;
+            pFrameSynchronizationInfo->frameConsumerDoneTimelineValue = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue;
+            m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = false;
         }
 
         pFrameSynchronizationInfo->queryPool = m_queryPool;
@@ -529,14 +556,20 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
             }
 
             if (m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore) {
-                pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteSemaphore;
+                pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet.m_frameCompleteSemaphore;
+                pDecodedFrame->frameCompleteDoneSemValue = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteTimelineValue;
                 m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore = false;
+
+                pDecodedFrame->consumerCompleteSemaphore = m_perFrameDecodeImageSet.m_consumerCompleteSemaphore;
+                pDecodedFrame->frameConsumerDoneSemValue = DecodeFrameBufferIf::GetSemaphoreValue(
+                                                               DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY,
+                                                               m_perFrameDecodeImageSet[pictureIndex].m_displayOrder);
+
             } else {
-                pDecodedFrame->frameCompleteSemaphore = VkSemaphore();
+                pDecodedFrame->frameCompleteSemaphore = VK_NULL_HANDLE;
             }
 
             pDecodedFrame->frameConsumerDoneFence = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneFence;
-            pDecodedFrame->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneSemaphore;
 
             pDecodedFrame->timestamp = m_perFrameDecodeImageSet[pictureIndex].m_timestamp;
             pDecodedFrame->decodeOrder = m_perFrameDecodeImageSet[pictureIndex].m_decodeOrder;
@@ -572,7 +605,13 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
             m_perFrameDecodeImageSet[picId].Release();
 
             m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence = pDecodedFrameRelease->hasConsummerSignalFence;
-            m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore;
+            m_perFrameDecodeImageSet[picId].m_useConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore;
+            if (pDecodedFrameRelease->hasConsummerSignalSemaphore) {
+                m_perFrameDecodeImageSet[picId].m_frameConsumerDoneTimelineValue =
+                        DecodeFrameBufferIf::GetSemaphoreValue(
+                            DecodeFrameBufferIf::SEM_SYNC_TYPE_IDX_DISPLAY,
+                            pDecodedFrameRelease->displayOrder);
+            }
         }
         return 0;
     }
@@ -648,7 +687,7 @@ class VkVideoFrameBuffer : public VulkanVideoFrameBuffer {
         std::lock_guard<std::mutex> lock(m_displayQueueMutex);
         for (unsigned int resId = 0; resId < numResources; resId++) {
             if ((uint32_t)indexes[resId] < m_perFrameDecodeImageSet.size()) {
-                m_perFrameDecodeImageSet[indexes[resId]].Deinit();
+                m_perFrameDecodeImageSet[indexes[resId]].Deinit(m_vkDevCtx);
             }
         }
         return (int32_t)m_perFrameDecodeImageSet.size();
@@ -785,8 +824,6 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe
     }
     if (!ImageExist(pImageSpec->imageTypeIdx) || m_imageViewState[pImageSpec->imageTypeIdx].recreateImage) {
 
-        assert(m_vkDevCtx != nullptr);
-
         m_imageViewState[pImageSpec->imageTypeIdx].currentLayerLayout = pImageSpec->createInfo.initialLayout;
 
         VkSharedBaseObj<VkImageResource> imageResource;
@@ -839,21 +876,13 @@ VkResult NvPerFrameDecodeResources::CreateImage( const VulkanDeviceContext* vkDe
 VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx)
 {
 
-    m_vkDevCtx = vkDevCtx;
-
     // The fence waited on for the first frame should be signaled.
     const VkFenceCreateInfo fenceFrameCompleteInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr,
                                                        VK_FENCE_CREATE_SIGNALED_BIT };
-    VkResult result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence);
+    VkResult result = vkDevCtx->CreateFence(*vkDevCtx, &fenceFrameCompleteInfo, nullptr, &m_frameCompleteFence);
 
     const VkFenceCreateInfo fenceInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr };
-    result = m_vkDevCtx->CreateFence(*m_vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence);
-    assert(result == VK_SUCCESS);
-
-    const VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr };
-    result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore);
-    assert(result == VK_SUCCESS);
-    result = m_vkDevCtx->CreateSemaphore(*m_vkDevCtx, &semInfo, nullptr, &m_frameConsumerDoneSemaphore);
+    result = vkDevCtx->CreateFence(*vkDevCtx, &fenceInfo, nullptr, &m_frameConsumerDoneFence);
     assert(result == VK_SUCCESS);
 
     Reset();
@@ -861,49 +890,35 @@ VkResult NvPerFrameDecodeResources::init(const VulkanDeviceContext* vkDevCtx)
     return result;
 }
 
-void NvPerFrameDecodeResources::Deinit()
+void NvPerFrameDecodeResources::Deinit(const VulkanDeviceContext* vkDevCtx)
 {
     bitstreamData = nullptr;
     stdPps = nullptr;
     stdSps = nullptr;
     stdVps = nullptr;
 
-    if (m_vkDevCtx == nullptr) {
+    if (vkDevCtx == nullptr) {
         assert ((m_frameCompleteFence == VK_NULL_HANDLE) &&
-                (m_frameConsumerDoneFence == VK_NULL_HANDLE) &&
-                (m_frameCompleteSemaphore == VK_NULL_HANDLE) &&
-                (m_frameConsumerDoneSemaphore == VK_NULL_HANDLE));
+                (m_frameConsumerDoneFence == VK_NULL_HANDLE));
         return;
     }
 
     if (m_frameCompleteFence != VkFence()) {
-        m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameCompleteFence, nullptr);
+        vkDevCtx->DestroyFence(*vkDevCtx, m_frameCompleteFence, nullptr);
         m_frameCompleteFence = VkFence();
     }
 
     if (m_frameConsumerDoneFence != VkFence()) {
-        m_vkDevCtx->DestroyFence(*m_vkDevCtx, m_frameConsumerDoneFence, nullptr);
+        vkDevCtx->DestroyFence(*vkDevCtx, m_frameConsumerDoneFence, nullptr);
         m_frameConsumerDoneFence = VkFence();
     }
 
-    if (m_frameCompleteSemaphore != VkSemaphore()) {
-        m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameCompleteSemaphore, nullptr);
-        m_frameCompleteSemaphore = VkSemaphore();
-    }
-
-    if (m_frameConsumerDoneSemaphore != VkSemaphore()) {
-        m_vkDevCtx->DestroySemaphore(*m_vkDevCtx, m_frameConsumerDoneSemaphore, nullptr);
-        m_frameConsumerDoneSemaphore = VkSemaphore();
-    }
-
     for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) {
 
         m_imageViewState[imageTypeIdx].view = nullptr;
         m_imageViewState[imageTypeIdx].singleLevelView = nullptr;
     }
 
-    m_vkDevCtx = nullptr;
-
     Reset();
 }
 
@@ -919,6 +934,8 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,
         return -1;
     }
 
+    m_vkDevCtx = vkDevCtx;
+
     for (uint32_t imageIndex = m_numImages; imageIndex < numImages; imageIndex++) {
         VkResult result = m_perFrameDecodeResources[imageIndex].init(vkDevCtx);
         assert(result == VK_SUCCESS);
@@ -927,6 +944,20 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,
         }
     }
 
+    // Create timeline semaphores instead of binary semaphores
+    VkSemaphoreTypeCreateInfo timelineCreateInfo = {};
+    timelineCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
+    timelineCreateInfo.pNext = nullptr;
+    timelineCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
+    timelineCreateInfo.initialValue = 0ULL;
+
+    VkSemaphoreCreateInfo semInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo };
+    VkResult result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_frameCompleteSemaphore);
+    assert(result == VK_SUCCESS);
+
+    result = vkDevCtx->CreateSemaphore(*vkDevCtx, &semInfo, nullptr, &m_consumerCompleteSemaphore);
+    assert(result == VK_SUCCESS);
+
     m_videoProfile.InitFromProfile(pDecodeProfile);
 
     m_queueFamilyIndex = queueFamilyIndex;
@@ -1048,10 +1079,21 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,
     return (int32_t)numImages;
 }
 
-void NvPerFrameDecodeImageSet::Deinit()
+void NvPerFrameDecodeImageSet::Deinit(const VulkanDeviceContext* vkDevCtx)
 {
+
+    if (m_frameCompleteSemaphore != VK_NULL_HANDLE) {
+        m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_frameCompleteSemaphore, nullptr);
+        m_frameCompleteSemaphore = VK_NULL_HANDLE;
+    }
+
+    if (m_consumerCompleteSemaphore != VK_NULL_HANDLE) {
+        m_vkDevCtx->DestroySemaphore(*vkDevCtx, m_consumerCompleteSemaphore, nullptr);
+        m_consumerCompleteSemaphore = VK_NULL_HANDLE;
+    }
+
     for (size_t ndx = 0; ndx < m_numImages; ndx++) {
-        m_perFrameDecodeResources[ndx].Deinit();
+        m_perFrameDecodeResources[ndx].Deinit(vkDevCtx);
     }
 
     for (uint32_t imageTypeIdx = 0; imageTypeIdx < DecodeFrameBufferIf::MAX_PER_FRAME_IMAGE_TYPES; imageTypeIdx++) {
diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
index 863d3a4f..e622bb7f 100644
--- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
+++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
@@ -66,14 +66,20 @@ class VulkanVideoFrameBuffer : public IVulkanVideoFrameBufferParserCb {
     struct FrameSynchronizationInfo {
         VkFence frameCompleteFence;
         VkSemaphore frameCompleteSemaphore;
+        VkSemaphore consumerCompleteSemaphore;
         VkFence frameConsumerDoneFence;
-        VkSemaphore frameConsumerDoneSemaphore;
+        uint64_t frameConsumerDoneTimelineValue;
+        uint64_t decodeCompleteTimelineValue;
+        uint64_t filterCompleteTimelineValue;
         VkQueryPool queryPool;
         uint32_t startQueryId;
         uint32_t numQueries;
         DecodeFrameBufferIf::ImageSpecsIndex imageSpecsIndex;
         uint32_t hasFrameCompleteSignalFence : 1;
+        uint32_t hasFrameConsumerSignalSemaphore : 1;
         uint32_t hasFrameCompleteSignalSemaphore : 1;
+        // post processing filter
+        uint32_t hasFilterSignalSemaphore : 1;
         uint32_t syncOnFrameCompleteFence : 1;
         uint32_t syncOnFrameConsumerDoneFence : 1;
     };
diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp
index 37b046a6..259260f5 100644
--- a/vk_video_encoder/demos/vk-video-enc/Main.cpp
+++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp
@@ -53,6 +53,7 @@ int main(int argc, char** argv)
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
+        VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index 29e2a36d..e4f71cd8 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -441,22 +441,36 @@ VkResult VkVideoEncoder::SubmitStagedQpMap(VkSharedBaseObj<VkVideoEncodeFrameInf
     const VkCommandBuffer* pCmdBuf = encodeFrameInfo->qpMapCmdBuffer->GetCommandBuffer();
     VkSemaphore frameCompleteSemaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore();
 
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.waitSemaphoreCount = 0;
-    submitInfo.pWaitSemaphores = nullptr;
-    submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = pCmdBuf;
-    submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr;
-    submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR };
+    cmdBufferInfo.commandBuffer = *pCmdBuf;
+    cmdBufferInfo.deviceMask = 0;
+
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR };
+    signalSemaphoreInfo.semaphore = frameCompleteSemaphore;
+    signalSemaphoreInfo.value = 0; // Binary semaphore
+    signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete
+    signalSemaphoreInfo.deviceIndex = 0;
+
+    VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = 0;
+    submitInfo.pWaitSemaphoreInfos = nullptr;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfo;
+    submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr;
 
     VkFence queueCompleteFence = encodeFrameInfo->qpMapCmdBuffer->GetFence();
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence));
+
     VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(((m_vkDevCtx->GetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ?
-                                                               VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER,
-                                                           0, 1, &submitInfo,
-                                                           queueCompleteFence);
+                                                                     VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER,
+                                                             0, // queueIndex
+                                                             1, // submitCount
+                                                             &submitInfo, queueCompleteFence,
+                                                             "Encode Staging QpMap",
+                                                             m_encodeEncodeFrameNum,
+                                                             m_encodeInputFrameNum);
 
     encodeFrameInfo->qpMapCmdBuffer->SetCommandBufferSubmitted();
     bool syncCpuAfterStaging = false;
@@ -475,15 +489,24 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObj<VkVideoEncodeFra
     const VkCommandBuffer* pCmdBuf = encodeFrameInfo->inputCmdBuffer->GetCommandBuffer();
     VkSemaphore frameCompleteSemaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore();
 
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoTransferSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.waitSemaphoreCount = 0;
-    submitInfo.pWaitSemaphores = nullptr;
-    submitInfo.pWaitDstStageMask = &videoTransferSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = pCmdBuf;
-    submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr;
-    submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR };
+    cmdBufferInfo.commandBuffer = *pCmdBuf;
+    cmdBufferInfo.deviceMask = 0;
+
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR };
+    signalSemaphoreInfo.semaphore = frameCompleteSemaphore;
+    signalSemaphoreInfo.value = 0; // Binary semaphore
+    signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; // Signal after transfer operations complete
+    signalSemaphoreInfo.deviceIndex = 0;
+
+    VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = 0;
+    submitInfo.pWaitSemaphoreInfos = nullptr;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfo;
+    submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr;
 
     VkFence queueCompleteFence = encodeFrameInfo->inputCmdBuffer->GetFence();
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence));
@@ -491,9 +514,15 @@ VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObj<VkVideoEncodeFra
             (m_inputComputeFilter != nullptr) ? VulkanDeviceContext::COMPUTE :
                     (((m_vkDevCtx->GetVideoEncodeQueueFlag() & VK_QUEUE_TRANSFER_BIT) != 0) ?
                             VulkanDeviceContext::ENCODE : VulkanDeviceContext::TRANSFER);
+
     VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(submitType,
-                                                           0, 1, &submitInfo,
-                                                           queueCompleteFence);
+                                                           0, // queueIndex
+                                                           1, // submitCount
+                                                           &submitInfo,
+                                                           queueCompleteFence,
+                                                           "Encode Staging Input",
+                                                           m_encodeEncodeFrameNum,
+                                                           m_encodeInputFrameNum);
 
     encodeFrameInfo->inputCmdBuffer->SetCommandBufferSubmitted();
     bool syncCpuAfterStaging = false;
@@ -1602,38 +1631,74 @@ VkResult VkVideoEncoder::SubmitVideoCodingCmds(VkSharedBaseObj<VkVideoEncodeFram
     }
 
     assert(encodeFrameInfo);
+
     assert(encodeFrameInfo->encodeCmdBuffer != nullptr);
 
+    const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer();
+    // The encode operation complete semaphore is not needed at this point.
+    VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore();
+
+    // Create command buffer submit info
+    VkCommandBufferSubmitInfoKHR cmdBufferInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR };
+    cmdBufferInfo.commandBuffer = *pCmdBuf;
+    cmdBufferInfo.deviceMask = 0;
+
+
+
+    // Create wait semaphore submit infos
     // If we are processing the input staging, wait for it's semaphore
     // to be done before processing the input frame with the encoder.
-    VkSemaphore inputWaitSemaphore[2] = { VK_NULL_HANDLE };
+    VkSemaphoreSubmitInfoKHR waitSemaphoreInfos[2]{};
     uint32_t waitSemaphoreCount = 0;
     if (encodeFrameInfo->inputCmdBuffer) {
-        inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->inputCmdBuffer->GetSemaphore();
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->inputCmdBuffer->GetSemaphore();
+        waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore
+        // Use transfer bit since these semaphores come from transfer operations
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
+        waitSemaphoreCount++;
     }
     if (encodeFrameInfo->qpMapCmdBuffer) {
-        inputWaitSemaphore[waitSemaphoreCount++] = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore();
-    }
-
-    const VkCommandBuffer* pCmdBuf = encodeFrameInfo->encodeCmdBuffer->GetCommandBuffer();
-    // The encode operation complete semaphore is not needed at this point.
-    VkSemaphore frameCompleteSemaphore = VK_NULL_HANDLE; // encodeFrameInfo->encodeCmdBuffer->GetSemaphore();
-
-    VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr };
-    const VkPipelineStageFlags videoEncodeSubmitWaitStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    submitInfo.pWaitSemaphores = (waitSemaphoreCount > 0) ? inputWaitSemaphore : nullptr;
-    submitInfo.waitSemaphoreCount = waitSemaphoreCount;
-    submitInfo.pWaitDstStageMask = &videoEncodeSubmitWaitStages;
-    submitInfo.commandBufferCount = 1;
-    submitInfo.pCommandBuffers = pCmdBuf;
-    submitInfo.pSignalSemaphores = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &frameCompleteSemaphore : nullptr;
-    submitInfo.signalSemaphoreCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+        waitSemaphoreInfos[waitSemaphoreCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].semaphore = encodeFrameInfo->qpMapCmdBuffer->GetSemaphore();
+        waitSemaphoreInfos[waitSemaphoreCount].value = 0; // Binary semaphore
+        // Use transfer bit since these semaphores come from transfer operations
+        waitSemaphoreInfos[waitSemaphoreCount].stageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR;
+        waitSemaphoreInfos[waitSemaphoreCount].deviceIndex = 0;
+        waitSemaphoreCount++;
+    }
+
+    // Create signal semaphore submit info if needed
+    VkSemaphoreSubmitInfoKHR signalSemaphoreInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR };
+    if (frameCompleteSemaphore != VK_NULL_HANDLE) {
+        signalSemaphoreInfo.semaphore = frameCompleteSemaphore;
+        signalSemaphoreInfo.value = 0; // Binary semaphore
+        signalSemaphoreInfo.stageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR;
+        signalSemaphoreInfo.deviceIndex = 0;
+    }
+
+    // Create submit info
+    VkSubmitInfo2KHR submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, nullptr };
+    submitInfo.flags = 0;
+    submitInfo.waitSemaphoreInfoCount = waitSemaphoreCount;
+    submitInfo.pWaitSemaphoreInfos = (waitSemaphoreCount > 0) ? waitSemaphoreInfos : nullptr;
+    submitInfo.commandBufferInfoCount = 1;
+    submitInfo.pCommandBufferInfos = &cmdBufferInfo;
+    submitInfo.signalSemaphoreInfoCount = (frameCompleteSemaphore != VK_NULL_HANDLE) ? 1 : 0;
+    submitInfo.pSignalSemaphoreInfos = (frameCompleteSemaphore != VK_NULL_HANDLE) ? &signalSemaphoreInfo : nullptr;
 
     VkFence queueCompleteFence = encodeFrameInfo->encodeCmdBuffer->GetFence();
     assert(VK_NOT_READY == m_vkDevCtx->GetFenceStatus(*m_vkDevCtx, queueCompleteFence));
-    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE, 0,
-                                                           1, &submitInfo,
-                                                           queueCompleteFence);
+
+    VkResult result = m_vkDevCtx->MultiThreadedQueueSubmit(VulkanDeviceContext::ENCODE,
+                                                           0, // queueIndex
+                                                           1, // submitCount
+                                                           &submitInfo,
+                                                           queueCompleteFence,
+                                                           "Video Encode",
+                                                           m_encodeEncodeFrameNum,
+                                                           m_encodeInputFrameNum);
 
     encodeFrameInfo->encodeCmdBuffer->SetCommandBufferSubmitted();
     bool syncCpuAfterEncoding = false;
diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp
index 18831f2a..196caf3a 100644
--- a/vk_video_encoder/src/vulkan_video_encoder.cpp
+++ b/vk_video_encoder/src/vulkan_video_encoder.cpp
@@ -106,6 +106,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
         VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,
+        VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
 

From 9f58f6b26024eb007c38dad9bfc749817328afcf Mon Sep 17 00:00:00 2001
From: Raju Konda <kraju@nvidia.com>
Date: Mon, 30 Dec 2024 04:26:15 -0800
Subject: [PATCH 6/7] decode: Add vp9 decoder support

Update vp9 decode headers
VK_KHR_video_decode_vp9 spec update
Fix offset of frames in super frame for parsing and decoding
VP9 Decode: Derive UsePrevFrameMvs
VP9 Decode: Use previous loop filter values when not coded
Fix self assignment of renderHeight when frame and render sizes are different
resolution change fixes
Handle alignment of bitstream offset for frames in super frame

VP9 Decode: Use correct constants for DPB state arrays

Fix output DISTINCT case for VP9

After recent changes related to codedExtent in VP9, the output distinct
case was broken, since the codedExtent for the dstPictureResource is not
set. In the COINCIDE case, the dstPictureResource is copied from the
reference resource.

Fix preparing ref_frame_sign_bias bitmask

Display an existing frame when show_existing_frame is set and skip HW decode
Fix parsing of profile
Fix conversion of 12-bit chroma format from ffmpeg values to vulkan video.

Unify display logic for show_frame and show_existing_frame.
App runs fine if a frame is displayed once or not displayed.
It crashes if a frame is displayed more
than once (example clip: vp90-2-10-show-existing-frame.webm).

Fix odd resolution frame decoding

VulkanVideoDecoder: Fix VP9 include name

The VP9 include file uses upper case.

VP9 Decoder: use different bitstreamBuffer than the buffer
used in previous frame decoding

Use override for VulkanVP9Decoder

Remove useless av1 includes

Remove StdVideoDecodeVP9ReferenceInfo

Cleanup ParseFrameHeader interface

Set UsePrevFrameMvs to 0 when frame size is changed

Parse VP9 10bit profile correctly in header parser
Make sure to retrive the VP9 10bit profile correctly as value 2 in the parser

VP9Decode: fill BitDepth in pStdColorConfig

Remove BitDepth in VkParserVp9PictureData

Use maximum of frame width/height and render width/height for
 vp9 session

VP9Decoder: set codecProfile in nvsi

nvsi.codecProfile was always 0 and might be used
by a parser client (CTS).

Enable requested video codec extension by adding it to required extension list.
Enable maintenance1 extension.  Add it to optional extension list.
Cleanup code to get codec type to chose decode extension list earlier.

Align source buffer range to the multiple of minBitstreamBufferSizeAlignment

Remove the local headers
---
 .../include/VkVideoCore/VkVideoCoreProfile.h  |   45 +-
 .../VkVideoCore/VulkanVideoCapabilities.h     |   51 +-
 common/libs/VkCodecUtils/DecoderConfig.h      |    5 +-
 .../libs/VkCodecUtils/VulkanDeviceContext.h   |   33 +-
 .../libs/VkCodecUtils/VulkanVideoSession.cpp  |    4 +
 vk_video_decoder/demos/vk-video-dec/Main.cpp  |  106 +-
 .../vkvideo_parser/VulkanVideoParserIf.h      |   91 +-
 .../NvVideoParser/include/VulkanVP9Decoder.h  | 1583 +---------------
 .../NvVideoParser/src/VulkanVP9Decoder.cpp    | 1599 ++++++++---------
 .../NvVideoParser/src/VulkanVideoDecoder.cpp  |   15 +-
 .../libs/VkDecoderUtils/FFmpegDemuxer.cpp     |   26 +-
 .../libs/VkVideoDecoder/VkVideoDecoder.cpp    |   37 +-
 .../libs/VkVideoParser/VulkanVideoParser.cpp  |  320 +++-
 vk_video_decoder/src/vulkan_video_decoder.cpp |   37 +-
 .../test/vulkan-video-dec/Main.cpp            |   84 +-
 .../test/vulkan-video-simple-dec/Main.cpp     |    2 +
 vk_video_encoder/demos/vk-video-enc/Main.cpp  |   52 +-
 .../libs/VkVideoEncoder/VkEncoderConfig.h     |    2 -
 vk_video_encoder/src/vulkan_video_encoder.cpp |   36 +-
 19 files changed, 1415 insertions(+), 2713 deletions(-)

diff --git a/common/include/VkVideoCore/VkVideoCoreProfile.h b/common/include/VkVideoCore/VkVideoCoreProfile.h
index 7483d8ce..55ea56e7 100644
--- a/common/include/VkVideoCore/VkVideoCoreProfile.h
+++ b/common/include/VkVideoCore/VkVideoCoreProfile.h
@@ -50,7 +50,8 @@ class VkVideoCoreProfile
     {
         return  (videoCodecOperations & (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
                                          VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                         VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR |
+                                         VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR  |
+                                         VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR  |
                                          VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
                                          VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
                                          VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
@@ -100,12 +101,26 @@ class VkVideoCoreProfile
                 m_av1DecodeProfile = *pProfileExt;
             } else {
                 //  Use default ext profile parameters
-                m_av1DecodeProfile.sType      = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
+                m_av1DecodeProfile.sType      = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR;
                 m_av1DecodeProfile.stdProfile = STD_VIDEO_AV1_PROFILE_MAIN;
             }
             m_profile.pNext = &m_av1DecodeProfile;
             m_av1DecodeProfile.pNext = NULL;
-
+        } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            VkVideoDecodeVP9ProfileInfoKHR const * pProfileExt = (VkVideoDecodeVP9ProfileInfoKHR const *)pVideoProfileExt;
+            if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR)) {
+                m_profile.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+                return false;
+            }
+            if (pProfileExt) {
+                m_vp9DecodeProfile = *pProfileExt;
+            } else {
+                //  Use default ext profile parameters
+                m_vp9DecodeProfile.sType      = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR;
+                m_vp9DecodeProfile.stdProfile = STD_VIDEO_VP9_PROFILE_0;
+            }
+            m_profile.pNext = &m_vp9DecodeProfile;
+            m_vp9DecodeProfile.pNext = NULL;
         } else if (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) {
             VkVideoEncodeH264ProfileInfoKHR const * pProfileExt = (VkVideoEncodeH264ProfileInfoKHR const *)pVideoProfileExt;
             if (pProfileExt && (pProfileExt->sType != VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR)) {
@@ -205,6 +220,7 @@ class VkVideoCoreProfile
         VkVideoDecodeH264ProfileInfoKHR decodeH264ProfilesRequest;
         VkVideoDecodeH265ProfileInfoKHR decodeH265ProfilesRequest;
         VkVideoDecodeAV1ProfileInfoKHR  decodeAV1ProfilesRequest;
+        VkVideoDecodeVP9ProfileInfoKHR  decodeVP9ProfilesRequest;
         VkVideoEncodeH264ProfileInfoKHR encodeH264ProfilesRequest;
         VkVideoEncodeH265ProfileInfoKHR encodeH265ProfilesRequest;
         VkVideoEncodeAV1ProfileInfoKHR encodeAV1ProfilesRequest;
@@ -243,6 +259,13 @@ class VkVideoCoreProfile
                                                        STD_VIDEO_H265_PROFILE_IDC_INVALID :
                                                        (StdVideoH265ProfileIdc)videoH26xProfileIdc;
             pVideoProfileExt = (VkBaseInStructure*)&decodeH265ProfilesRequest;
+        } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            decodeVP9ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR;
+            decodeVP9ProfilesRequest.pNext = NULL;
+            decodeVP9ProfilesRequest.stdProfile = (videoH26xProfileIdc == 0) ?
+                                                    STD_VIDEO_VP9_PROFILE_0 :
+                                                    (StdVideoVP9Profile)videoH26xProfileIdc;
+            pVideoProfileExt = (VkBaseInStructure*)&decodeVP9ProfilesRequest;
         } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) {
             encodeH264ProfilesRequest.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR;
             encodeH264ProfilesRequest.pNext = pEncodeUsageInfo;
@@ -287,7 +310,9 @@ class VkVideoCoreProfile
     bool IsDecodeCodecType() const
     {
         return ((m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) ||
-                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR));
+                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) ||
+                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR)  ||
+                (m_profile.videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR));
     }
 
     operator bool() const
@@ -340,6 +365,15 @@ class VkVideoCoreProfile
         }
     }
 
+    const VkVideoDecodeVP9ProfileInfoKHR* GetDecodeVP9Profile() const
+    {
+        if (m_vp9DecodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR) {
+            return &m_vp9DecodeProfile;
+        } else {
+            return NULL;
+        }
+    }
+
     const VkVideoEncodeH264ProfileInfoKHR* GetEncodeH264Profile() const
     {
         if (m_h264EncodeProfile.sType == VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR) {
@@ -605,6 +639,8 @@ class VkVideoCoreProfile
             return "decode h.265";
         case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
             return "decode av1";
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+            return "decode vp9";
         case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
             return "encode h.264";
         case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
@@ -769,6 +805,7 @@ class VkVideoCoreProfile
         VkVideoDecodeH264ProfileInfoKHR m_h264DecodeProfile;
         VkVideoDecodeH265ProfileInfoKHR m_h265DecodeProfile;
         VkVideoDecodeAV1ProfileInfoKHR  m_av1DecodeProfile;
+        VkVideoDecodeVP9ProfileInfoKHR  m_vp9DecodeProfile;
         VkVideoEncodeH264ProfileInfoKHR m_h264EncodeProfile;
         VkVideoEncodeH265ProfileInfoKHR m_h265EncodeProfile;
         VkVideoEncodeAV1ProfileInfoKHR m_av1EncodeProfile;
diff --git a/common/include/VkVideoCore/VulkanVideoCapabilities.h b/common/include/VkVideoCore/VulkanVideoCapabilities.h
index b703298b..a2cc4af9 100644
--- a/common/include/VkVideoCore/VulkanVideoCapabilities.h
+++ b/common/include/VkVideoCore/VulkanVideoCapabilities.h
@@ -38,6 +38,7 @@ class VulkanVideoCapabilities
         VkVideoDecodeH264CapabilitiesKHR h264Capabilities    = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, nullptr };
         VkVideoDecodeH265CapabilitiesKHR h265Capabilities    = { VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, nullptr };
         VkVideoDecodeAV1CapabilitiesKHR  av1Capabilities     = { VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR,  nullptr };
+        VkVideoDecodeVP9CapabilitiesKHR  vp9Capabilities     = { VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR,  nullptr };
 
         if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
             videoDecodeCapabilities.pNext = &h264Capabilities;
@@ -45,6 +46,8 @@ class VulkanVideoCapabilities
             videoDecodeCapabilities.pNext = &h265Capabilities;
         } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
             videoDecodeCapabilities.pNext = &av1Capabilities;
+        } else if (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            videoDecodeCapabilities.pNext = &vp9Capabilities;
         } else {
             assert(!"Unsupported codec");
             return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR;
@@ -197,6 +200,16 @@ class VulkanVideoCapabilities
             }
         }
             break;
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+        {
+            assert(pVideoDecodeCapabilities->pNext);
+            const VkVideoDecodeVP9CapabilitiesKHR* pVP9Capabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext;
+            assert(pVP9Capabilities->sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR);
+            if (pVP9Capabilities->sType != VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR) {
+                return VK_ERROR_INITIALIZATION_FAILED;
+            }
+        }
+            break;
         case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
         {
             assert(pVideoEncodeCapabilities->pNext);
@@ -277,6 +290,26 @@ class VulkanVideoCapabilities
                     assert(!"Unsupported h.265 STD version");
                     return VK_ERROR_INCOMPATIBLE_DRIVER;
                 }
+            } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
+                const VkVideoDecodeAV1CapabilitiesKHR* pAV1DecCapabilities = (VkVideoDecodeAV1CapabilitiesKHR*)pVideoDecodeCapabilities->pNext;
+                std::cout << "\t\t\t" << "maxLevelIdc: " << pAV1DecCapabilities->maxLevel << std::endl;
+                if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName,
+                        VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME,
+                            sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) ||
+                    (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION)) {
+                    assert(!"Unsupported AV1 STD version");
+                    return VK_ERROR_INCOMPATIBLE_DRIVER;
+                }
+            } else if (videoProfile.GetCodecType() == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+                const VkVideoDecodeVP9CapabilitiesKHR* pVP9DecCapabilities = (VkVideoDecodeVP9CapabilitiesKHR*)pVideoDecodeCapabilities->pNext;
+                std::cout << "\t\t\t" << "maxLevelIdc: " << pVP9DecCapabilities->maxLevel << std::endl;
+                if (strncmp(pVideoCapabilities->stdHeaderVersion.extensionName,
+                        VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME,
+                            sizeof (pVideoCapabilities->stdHeaderVersion.extensionName) - 1U) ||
+                    (pVideoCapabilities->stdHeaderVersion.specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) {
+                    assert(!"Unsupported VP9 STD version");
+                    return VK_ERROR_INCOMPATIBLE_DRIVER;
+                }
             } else {
                 assert(!"Unsupported codec");
             }
@@ -354,8 +387,12 @@ class VulkanVideoCapabilities
                                                             int32_t* pVideoQueueFamily,
             VkQueueFlags queueFlagsRequired = ( VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR),
             VkVideoCodecOperationFlagsKHR videoCodeOperations =
-                                              ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
+                                              ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
+                                                VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
+                                                VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR  |
+                                                VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR  |
+                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
+                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
                                                 VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR))
     {
         std::vector<VkQueueFamilyProperties2> queues;
@@ -429,6 +466,16 @@ class VulkanVideoCapabilities
                                                                &videoDecodeCapabilities);
     }
 
+    static VkResult GetDecodeVP9Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t,
+                                             const VkVideoProfileInfoKHR& videoProfile,
+                                             VkVideoCapabilitiesKHR &videoDecodeCapabilities)
+    {
+        videoDecodeCapabilities.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
+        return vkDevCtx->GetPhysicalDeviceVideoCapabilitiesKHR(vkDevCtx->getPhysicalDevice(),
+                                                               &videoProfile,
+                                                               &videoDecodeCapabilities);
+    }
+
     static VkResult GetEncodeH264Capabilities(const VulkanDeviceContext* vkDevCtx, uint32_t,
                                               const VkVideoProfileInfoKHR& videoProfile,
                                               VkVideoCapabilitiesKHR &videoEncodeCapabilities,
diff --git a/common/libs/VkCodecUtils/DecoderConfig.h b/common/libs/VkCodecUtils/DecoderConfig.h
index 4d06a1d5..b0f14a59 100644
--- a/common/libs/VkCodecUtils/DecoderConfig.h
+++ b/common/libs/VkCodecUtils/DecoderConfig.h
@@ -75,7 +75,6 @@ struct DecoderConfig {
         directMode = false;
         enableHwLoadBalancing = false;
         selectVideoWithComputeQueue = false;
-        enableVideoEncoder = false;
         outputy4m = false;
         outputcrcPerFrame = false;
         outputcrc = false;
@@ -137,6 +136,9 @@ struct DecoderConfig {
                     } else if (strcmp(args[0], "av1") == 0) {
                         forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
                         return true;
+                    } else if ((strcmp(args[0], "vp9") == 0)) {
+                        forceParserType = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
+                        return true;
                     } else {
                         std::cerr << "Invalid codec \"" << args[0] << "\"" << std::endl;
                         return false;
@@ -470,7 +472,6 @@ struct DecoderConfig {
     uint32_t noPresent : 1;
     uint32_t enableHwLoadBalancing : 1;
     uint32_t selectVideoWithComputeQueue : 1;
-    uint32_t enableVideoEncoder : 1;
     uint32_t outputy4m : 1;
     uint32_t outputcrc : 1;
     uint32_t outputcrcPerFrame : 1;
diff --git a/common/libs/VkCodecUtils/VulkanDeviceContext.h b/common/libs/VkCodecUtils/VulkanDeviceContext.h
index 97325f1d..6e83e33f 100644
--- a/common/libs/VkCodecUtils/VulkanDeviceContext.h
+++ b/common/libs/VkCodecUtils/VulkanDeviceContext.h
@@ -51,6 +51,21 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
         MAX_QUEUE_FAMILIES = 6, // Gfx, Present, Compute, Transfer, Decode, Encode
     };
 
+    static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_DECODE =
+        VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
+
+    static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ENCODE =
+        VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
+        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR;
+
+    static const VkVideoCodecOperationFlagsKHR VIDEO_CODEC_OPERATIONS_ALL =
+        VIDEO_CODEC_OPERATIONS_DECODE |
+        VIDEO_CODEC_OPERATIONS_ENCODE;
+
     VulkanDeviceContext();
 
     VkInstance getInstance() const {
@@ -230,6 +245,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
 
     VkResult InitVulkanDecoderDevice(const char * pAppName,
                                      VkInstance vkInstance = VK_NULL_HANDLE,
+                                     VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL,
                                      bool enableWsi = false,
                                      bool enableWsiDirectMode = false,
                                      bool enableValidation = false,
@@ -243,6 +259,7 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
     VkResult AddReqInstanceExtension(const char* requiredInstanceExtension, bool verbose = false);
     VkResult CheckAllInstanceExtensions(bool verbose = false);
     VkResult AddReqDeviceExtensions(const char* const* requiredDeviceExtensions, bool verbose = false);
+    VkResult AddReqDeviceExtension(const char* requiredDeviceExtension, bool verbose = false);
     VkResult AddOptDeviceExtensions(const char* const* optionalDeviceExtensions, bool verbose = false);
     bool HasAllDeviceExtensions(VkPhysicalDevice physDevice, const char* printMissingDeviceExt = nullptr);
 
@@ -260,26 +277,16 @@ class VulkanDeviceContext : public vk::VkInterfaceFunctions {
                                 const VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR |
                                                                                  VK_QUEUE_TRANSFER_BIT,
                                 const VkVideoCodecOperationFlagsKHR requestVideoDecodeQueueOperations =
-                                                                  (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
+                                                                  VIDEO_CODEC_OPERATIONS_DECODE,
                                 const VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR |
                                                                                  VK_QUEUE_TRANSFER_BIT,
                                 const VkVideoCodecOperationFlagsKHR requestVideoEncodeQueueOperations =
-                                                                  (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                                                   VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR),
+                                                                  VIDEO_CODEC_OPERATIONS_ENCODE,
                                 VkPhysicalDevice vkPhysicalDevice = VK_NULL_HANDLE);
 
     VkResult CreateVulkanDevice(int32_t numDecodeQueues = 1,
                                 int32_t numEncodeQueues = 0,
-                                VkVideoCodecOperationFlagsKHR videoCodecs =
-                                        (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                          VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                          VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) |
-                                        (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                          VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                          VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR),
+                                VkVideoCodecOperationFlagsKHR videoCodecs = VIDEO_CODEC_OPERATIONS_ALL,
                                 bool createTransferQueue = false,
                                 bool createGraphicsQueue = false,
                                 bool createPresentQueue = false,
diff --git a/common/libs/VkCodecUtils/VulkanVideoSession.cpp b/common/libs/VkCodecUtils/VulkanVideoSession.cpp
index 021ec538..3a8935d7 100644
--- a/common/libs/VkCodecUtils/VulkanVideoSession.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoSession.cpp
@@ -39,6 +39,7 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx,
     static const VkExtensionProperties h264DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h265DecodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION };
     static const VkExtensionProperties av1DecodeStdExtensionVersion =  { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION };
+    static const VkExtensionProperties vp9DecodeStdExtensionVersion =  { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h264EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION };
     static const VkExtensionProperties h265EncodeStdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION };
     static const VkExtensionProperties av1EncodeStdExtensionVersion =  { VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_SPEC_VERSION };
@@ -63,6 +64,9 @@ VkResult VulkanVideoSession::Create(const VulkanDeviceContext* vkDevCtx,
     case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
         createInfo.pStdHeaderVersion = &av1DecodeStdExtensionVersion;
         break;
+    case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+        createInfo.pStdHeaderVersion = &vp9DecodeStdExtensionVersion;
+        break;
     case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
         createInfo.pStdHeaderVersion = &h264EncodeStdExtensionVersion;
         break;
diff --git a/vk_video_decoder/demos/vk-video-dec/Main.cpp b/vk_video_decoder/demos/vk-video-dec/Main.cpp
index 8579362e..3e499c32 100644
--- a/vk_video_decoder/demos/vk-video-dec/Main.cpp
+++ b/vk_video_decoder/demos/vk-video-dec/Main.cpp
@@ -28,14 +28,33 @@
 #include "VkShell/Shell.h"
 #include "VkCodecUtils/VkVideoFrameOutput.h"
 
-int main(int argc, const char **argv) {
+int main(int argc, const char **argv)
+{
 
     DecoderConfig decoderConfig(argv[0]);
     decoderConfig.ParseArgs(argc, argv);
 
+    VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
+    VkResult result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
+                                        decoderConfig.forceParserType,
+                                        decoderConfig.enableStreamDemuxing,
+                                        decoderConfig.initialWidth,
+                                        decoderConfig.initialHeight,
+                                        decoderConfig.initialBitdepth,
+                                        videoStreamDemuxer);
+    if (result != VK_SUCCESS) {
+        assert(!"Can't initialize the VideoStreamDemuxer!");
+        return -1;
+    }
+
+    VkVideoCodecOperationFlagsKHR videoCodecOperation = (decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR) ?
+                                                                    decoderConfig.forceParserType :
+                                                                    videoStreamDemuxer->GetVideoCodec();
+
     VulkanDeviceContext vkDevCtxt;
-    VkResult result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(),
+    result = vkDevCtxt.InitVulkanDecoderDevice(decoderConfig.appName.c_str(),
                                                         VK_NULL_HANDLE,
+                                                        videoCodecOperation,
                                                         !decoderConfig.noPresent,
                                                         decoderConfig.directMode,
                                                         decoderConfig.validate,
@@ -54,16 +73,8 @@ int main(int argc, const char **argv) {
 
     VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR;
 
-    VkQueueFlags requestVideoEncodeQueueMask = 0;
-    if (decoderConfig.enableVideoEncoder) {
-        requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
-    }
-
     if (decoderConfig.selectVideoWithComputeQueue) {
         requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (decoderConfig.enableVideoEncoder) {
-            requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -71,17 +82,6 @@ int main(int argc, const char **argv) {
         requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT;
     }
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs |
-                                        (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR);
-
     if (!decoderConfig.noPresent) {
 
         VkSharedBaseObj<Shell> displayShell;
@@ -98,17 +98,12 @@ int main(int argc, const char **argv) {
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT |
                                               requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask |
-                                              requestVideoEncodeQueueMask),
+                                              requestVideoDecodeQueueMask),
                                               displayShell,
                                               requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
-                                              requestVideoEncodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                              videoCodecOperation,
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR);
         if (result != VK_SUCCESS) {
 
             assert(!"Can't initialize the Vulkan physical device!");
@@ -117,30 +112,15 @@ int main(int argc, const char **argv) {
         assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(),
                                                   vkDevCtxt.GetPresentQueueFamilyIdx()));
 
-        vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                     decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues
-                                     videoCodecs,
-                                     false, //  createTransferQueue
-                                     true,  // createGraphicsQueue
-                                     true,  // createDisplayQueue
+        vkDevCtxt.CreateVulkanDevice(numDecodeQueues,           // numDecodeQueues
+                                     0,                         // num encode queues
+                                     videoCodecOperation,       // videoCodecs
+                                     false,                     // createTransferQueue
+                                     true,                      // createGraphicsQueue
+                                     true,                      // createDisplayQueue
                                      requestVideoComputeQueueMask != 0  // createComputeQueue
                                      );
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
-
         VkSharedBaseObj<VulkanVideoProcessor> vulkanVideoProcessor;
         result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor);
         if (result != VK_SUCCESS) {
@@ -176,8 +156,7 @@ int main(int argc, const char **argv) {
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_TRANSFER_BIT        |
                                                requestVideoDecodeQueueMask  |
-                                               requestVideoComputeQueueMask |
-                                               requestVideoEncodeQueueMask),
+                                               requestVideoComputeQueueMask),
                                               nullptr,
                                               requestVideoDecodeQueueMask);
         if (result != VK_SUCCESS) {
@@ -187,9 +166,9 @@ int main(int argc, const char **argv) {
         }
 
 
-        result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                              0,     // num encode queues
-                                              videoCodecs,
+        result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues,  // numDecodeQueues
+                                              0,                // num encode queues
+                                              videoCodecOperation,  // videoCodecs
                                               // If no graphics or compute queue is requested, only video queues
                                               // will be created. Not all implementations support transfer on video queues,
                                               // so request a separate transfer queue for such implementations.
@@ -204,21 +183,6 @@ int main(int argc, const char **argv) {
             return -1;
         }
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
-
         VkSharedBaseObj<VulkanVideoProcessor> vulkanVideoProcessor;
         result = VulkanVideoProcessor::Create(decoderConfig, &vkDevCtxt, vulkanVideoProcessor);
         if (result != VK_SUCCESS) {
diff --git a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h
index 21ad0fed..d5141c1b 100644
--- a/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h
+++ b/vk_video_decoder/include/vkvideo_parser/VulkanVideoParserIf.h
@@ -269,61 +269,6 @@ typedef struct VkParserHevcPictureData {
 
 } VkParserHevcPictureData;
 
-typedef struct VkParserVp9PictureData {
-    uint32_t width;
-    uint32_t height;
-
-    // Frame Indexes
-    VkPicIf* pLastRef;
-    VkPicIf* pGoldenRef;
-    VkPicIf* pAltRef;
-
-    uint32_t keyFrame;
-    uint32_t version;
-    uint32_t showFrame;
-    uint32_t errorResilient;
-    uint32_t bit_depth_minus8;
-    uint32_t colorSpace;
-    uint32_t subsamplingX;
-    uint32_t subsamplingY;
-    uint32_t activeRefIdx[3];
-    uint32_t intraOnly;
-    uint32_t resetFrameContext;
-    uint32_t frameParallelDecoding;
-    uint32_t refreshFrameFlags;
-    uint8_t refFrameSignBias[4];
-    uint32_t frameContextIdx;
-    uint32_t allow_high_precision_mv;
-    uint32_t mcomp_filter_type;
-    uint32_t loopFilterLevel;
-    uint32_t loopFilterSharpness;
-    uint32_t log2_tile_columns;
-    uint32_t log2_tile_rows;
-    int32_t mbRefLfDelta[4];
-    int32_t mbModeLfDelta[2];
-    int32_t segmentMapTemporalUpdate;
-    uint8_t segmentFeatureEnable[8][4];
-    uint8_t mb_segment_tree_probs[7];
-    uint8_t segment_pred_probs[3];
-    int16_t segmentFeatureData[8][4];
-    uint32_t scaledWidth;
-    uint32_t scaledHeight;
-    uint32_t scalingActive;
-    uint32_t segmentEnabled;
-    uint32_t prevIsKeyFrame;
-    uint32_t PrevShowFrame;
-    uint32_t modeRefLfEnabled;
-    int32_t qpYAc;
-    int32_t qpYDc;
-    int32_t qpChDc;
-    int32_t qpChAc;
-    uint32_t segmentMapUpdate;
-    uint32_t segmentFeatureMode;
-    uint32_t refreshEntropyProbs;
-    uint32_t frameTagSize;
-    uint32_t offsetToDctParts;
-} VkParserVp9PictureData;
-
 struct VkParserAv1PictureData {
     // The picture info structure is mostly pointing at other
     // structures defining the coding tool parameters. Those
@@ -373,6 +318,42 @@ struct VkParserAv1PictureData {
     uint32_t frame_height;
 };
 
+typedef struct VkParserVp9PictureData {
+
+    StdVideoDecodeVP9PictureInfo stdPictureInfo;
+    StdVideoVP9ColorConfig       stdColorConfig;
+    StdVideoVP9LoopFilter        stdLoopFilter;
+    StdVideoVP9Segmentation      stdSegmentation;
+
+    // frame dimentions
+    uint32_t FrameWidth, FrameHeight;
+    uint32_t MiCols, MiRows;
+    uint32_t Sb64Cols, Sb64Rows;
+    uint32_t renderWidth, renderHeight;
+
+    // display details
+    uint8_t  frame_to_show_map_idx;
+    bool     show_existing_frame;
+
+    // references
+    uint8_t  ref_frame_idx[STD_VIDEO_VP9_REFS_PER_FRAME];
+    uint8_t  pic_idx[STD_VIDEO_VP9_NUM_REF_FRAMES];
+    VkPicIf* pLastRef;
+    VkPicIf* pGoldenRef;
+    VkPicIf* pAltRef;
+
+    // other derived parameters
+    bool     FrameIsIntra;
+    uint8_t  ChromaFormat;
+    uint32_t numTiles;
+    uint32_t compressedHeaderSize;
+
+    // bitstream divisons
+    uint32_t uncompressedHeaderOffset;
+    uint32_t compressedHeaderOffset;
+    uint32_t tilesOffset;
+} VkParserVp9PictureData;
+
 typedef struct VkParserPictureData {
     int32_t PicWidthInMbs;            // Coded Frame Size
     int32_t FrameHeightInMbs;         // Coded Frame Height
diff --git a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h
index 142f7db8..503f5827 100644
--- a/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h
+++ b/vk_video_decoder/libs/NvVideoParser/include/VulkanVP9Decoder.h
@@ -22,31 +22,36 @@
 
 #include "VulkanVideoDecoder.h"
 
-typedef enum {
-  EIGHTTAP_SMOOTH,
-  EIGHTTAP,
-  EIGHTTAP_SHARP,
-  BILINEAR,
-  SWITCHABLE  /* should be the last one */
-} INTERPOLATIONFILTERTYPE;
-
-typedef enum {
-  //NONE = -1,
-  INTRA_FRAME = 0,
-  LAST_FRAME = 1,
-  GOLDEN_FRAME = 2,
-  ALTREF_FRAME = 3,
-  VP9_MAX_REF_FRAMES = 4
-}MV_REFERENCE_FRAME;
-
-typedef enum {
-  ONLY_4X4            = 0,
-  ALLOW_8X8           = 1,
-  ALLOW_16X16         = 2,
-  ALLOW_32X32         = 3,
-  TX_MODE_SELECT      = 4,
-  NB_TXFM_MODES       = 5,
-} TXFM_MODE;
+#define VP9_FRAME_MARKER 2
+#define VP9_FRAME_SYNC_CODE 0x498342
+#define VP9_MAX_PRBABILITY 255
+#define VP9_MIN_TILE_WIDTH_B64 4
+#define VP9_MAX_TILE_WIDTH_B64 64
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
+#define ALIGN_POWER_OF_TWO(value, n) (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+
+#define VP9_BUFFER_POOL_MAX_SIZE 10
+#define VP9_MAX_NUM_SPATIAL_LAYERS 4
+
+#define VP9_CHECK_FRAME_MARKER {    \
+  if (u(2) != VP9_FRAME_MARKER) {   \
+    assert(!"Invalid frame marker");\
+    return false;                   \
+  }                                 \
+}
+
+#define VP9_CHECK_ZERO_BIT {    \
+  if (u(1) != 0) {              \
+    assert("!Invalid syntax");  \
+    return false;               \
+  }                             \
+}
+
+#define VP9_CHECK_FRAME_SYNC_CODE   {   \
+  if (u(24) != VP9_FRAME_SYNC_CODE) {   \
+    assert("!Invalid frame sync code"); \
+  }                                     \
+}
 
 // Segment level features.
 typedef enum {
@@ -57,1492 +62,78 @@ typedef enum {
   SEG_LVL_MAX = 4                  // Number of MB level features supported
 } SEG_LVL_FEATURES;
 
-typedef enum {
-  SINGLE_PREDICTION_ONLY = 0,
-  COMP_PREDICTION_ONLY   = 1,
-  HYBRID_PREDICTION      = 2,
-  NB_PREDICTION_TYPES    = 3,
-} COMPPREDMODE_TYPE;
-
-/* Symbols for coding which components are zero jointly */
-typedef enum {
-  MV_JOINT_ZERO = 0,             /* Zero vector */
-  MV_JOINT_HNZVZ = 1,            /* Vert zero, hor nonzero */
-  MV_JOINT_HZVNZ = 2,            /* Hor zero, vert nonzero */
-  MV_JOINT_HNZVNZ = 3,           /* Both components nonzero */
-} MV_JOINT_TYPE;
-
-/* Symbols for coding magnitude class of nonzero components */
-typedef enum {
-  MV_CLASS_0 = 0,      /* (0, 2]     integer pel */
-  MV_CLASS_1 = 1,      /* (2, 4]     integer pel */
-  MV_CLASS_2 = 2,      /* (4, 8]     integer pel */
-  MV_CLASS_3 = 3,      /* (8, 16]    integer pel */
-  MV_CLASS_4 = 4,      /* (16, 32]   integer pel */
-  MV_CLASS_5 = 5,      /* (32, 64]   integer pel */
-  MV_CLASS_6 = 6,      /* (64, 128]  integer pel */
-  MV_CLASS_7 = 7,      /* (128, 256] integer pel */
-  MV_CLASS_8 = 8,      /* (256, 512] integer pel */
-  MV_CLASS_9 = 9,      /* (512, 1024] integer pel */
-  MV_CLASS_10 = 10,    /* (1024,2048] integer pel */
-} MV_CLASS_TYPE;
-
-typedef enum PARTITION_TYPE {
-  PARTITION_NONE,
-  PARTITION_HORZ,
-  PARTITION_VERT,
-  PARTITION_SPLIT,
-  PARTITION_TYPES
-} PARTITION_TYPE;
-
-
-typedef enum
-{
-  DC_PRED,            /* average of above and left pixels */
-  V_PRED,             /* vertical prediction */
-  H_PRED,             /* horizontal prediction */
-  D45_PRED,           /* Directional 45 deg prediction  [anti-clockwise from 0 deg hor] */
-  D135_PRED,          /* Directional 135 deg prediction [anti-clockwise from 0 deg hor] */
-  D117_PRED,          /* Directional 112 deg prediction [anti-clockwise from 0 deg hor] */
-  D153_PRED,          /* Directional 157 deg prediction [anti-clockwise from 0 deg hor] */
-  D27_PRED,           /* Directional 22 deg prediction  [anti-clockwise from 0 deg hor] */
-  D63_PRED,           /* Directional 67 deg prediction  [anti-clockwise from 0 deg hor] */
-  TM_PRED,            /* Truemotion prediction */
-  NEARESTMV,
-  NEARMV,
-  ZEROMV,
-  NEWMV,
-  SPLITMV,
-  MB_MODE_COUNT
-} MB_PREDICTION_MODE;
-
-typedef enum {
-  KEY_FRAME = 0,
-  INTER_FRAME = 1,
-  NUM_FRAME_TYPES,
-} FRAME_TYPE;
-
-// Segment level features.
-typedef enum {
-  TX_4X4 = 0,                      // 4x4 dct transform
-  TX_8X8 = 1,                      // 8x8 dct transform
-  TX_16X16 = 2,                    // 16x16 dct transform
-  TX_32X32 = 3,                    // 32x32 dct transform
-  TX_SIZE_MAX_SB,                  // Number of transforms available to SBs
-} TX_SIZE;
-
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
-
-#define BIG_NUM 0xffff
-#define MIN_TILE_WIDTH_B64 4
-#define MAX_TILE_WIDTH_B64 64
-#define MI_SIZE_LOG2 3
-#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2)
-#define ALIGN_POWER_OF_TWO(value, n) \
-    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-#define VP9_MB_LVL_MAX              2
-#define VP9_MAX_MB_SEGMENTS         4
-#define VP9_MB_FEATURE_TREE_PROBS   3
-#define MAX_REF_LF_DELTAS       4
-#define MAX_MODE_LF_DELTAS      2  //for vp8 its 4
-#define ALLOWED_REFS_PER_FRAME  3
-#define NUM_REF_FRAMES 8
-#define NUM_REF_FRAMES_LG2 3
-#define NUM_FRAME_CONTEXTS_LG2 2
-#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
-#define MIN_TILE_WIDTH 256
-#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
-//#define MAX_TILE_WIDTH 4096
-#define MAX_MB_SEGMENTS 8
-#define MB_SEG_TREE_PROBS  (MAX_MB_SEGMENTS-1)
-#define MAX_PROB 255
-#define PREDICTION_PROBS 3
-#define TX_SIZE_CONTEXTS 2
-#define PARTITION_PLOFFSET   4  // number of probability models per block size
-#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
-#define BLOCK_SIZE_GROUPS   4
-#define VP9_INTRA_MODES  10/* (TM_PRED + 1) */
-#define COMP_PRED_CONTEXTS   2
-/* Entropy nodes above is divided in two parts, first three probs in part1
- * and the modeled probs in part2. Part1 is padded so that tables align with
- *  32 byte addresses, so there is four bytes for each table. */
-#define ENTROPY_NODES_PART1 4
-#define ENTROPY_NODES_PART2 8
-#define INTER_MODE_CONTEXTS     7
-#define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */
-#define COMP_PRED_CONTEXTS   2
-#define INTRA_INTER_CONTEXTS 4
-#define COMP_INTER_CONTEXTS 5
-#define REF_CONTEXTS 5
-#define VP9_BLOCK_TYPES 2
-#define VP9_REF_TYPES 2  // intra=0, inter=1
-#define VP9_COEF_BANDS 6
-#define VP9_PREV_COEF_CONTEXTS       6
-#define MBSKIP_CONTEXTS 3
-#define COEF_UPDATE_PROB 252
-#define VP9_PROB_HALF 128
-#define VP9_NMV_UPDATE_PROB  252
-#define VP9_MV_UPDATE_PRECISION  7
-#define MV_JOINTS     4
-#define MV_CLASSES     11
-#define CLASS0_BITS    1
-#define CLASS0_SIZE    (1 << CLASS0_BITS)
-#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
-/* The first nodes of the entropy probs are unconstrained, the rest are
- * modeled with statistic distribution. */
-#define UNCONSTRAINED_NODES 3
-#define MODEL_NODES                 (VP9_ENTROPY_NODES - UNCONSTRAINED_NODES)
-#define PIVOT_NODE                  2   // which node is pivot
-#define COEFPROB_MODELS             128
-#define END_OF_STREAM 0xFFFFFFFFU
-#define VP9_DEF_UPDATE_PROB 252
-#define MODULUS_PARAM               13
-#define OK   0 //HANTRO_OK
-#define NOK  1 //HANTRO_NOK
-#define CHECK_END_OF_STREAM(s) if((s)==END_OF_STREAM) return (s)
-#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
-#define VP9_REF_LIST_SIZE   8
-#define SEGMENT_DELTADATA 0
-#define SEGMENT_ABSDATA 1
-#define MAXQ 255
-#define LOTS_OF_BITS 0x40000000
-#define BD_VALUE_SIZE ((int32_t)sizeof(VP9_BD_VALUE)*CHAR_BIT)
-
-#define VP9_ENTROPY_NODES 11
-#define COEF_COUNT_SAT 24
-#define COEF_MAX_UPDATE_FACTOR 112
-#define COEF_COUNT_SAT_KEY 24
-#define COEF_MAX_UPDATE_FACTOR_KEY 112
-#define COEF_COUNT_SAT_AFTER_KEY 24
-#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
-#define MODE_COUNT_SAT 20
-#define MODE_MAX_UPDATE_FACTOR 128
-#define MAX_PROBS 32
-#define MVREF_COUNT_SAT 20
-#define MVREF_MAX_UPDATE_FACTOR 128
-#define MV_COUNT_SAT 20
-#define MV_MAX_UPDATE_FACTOR 128
-
-/* Coefficient token alphabet */
-
-#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
-#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
-#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
-#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
-#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
-#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
-#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
-#define DCT_VAL_CATEGORY3       7       /* 11-18     Extra Bits 3+1 */
-#define DCT_VAL_CATEGORY4       8       /* 19-34     Extra Bits 4+1 */
-#define DCT_VAL_CATEGORY5       9       /* 35-66     Extra Bits 5+1 */
-#define DCT_VAL_CATEGORY6       10      /* 67+       Extra Bits 13+1 */
-#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */
-#define MAX_ENTROPY_TOKENS      12
-#define FRAME_CONTEXTS_LOG2     2
-#define FRAME_CONTEXTS          (1 << FRAME_CONTEXTS_LOG2)
-
-#define DCT_EOB_MODEL_TOKEN 3 /* EOB Extra Bits 0+0 */
-
-typedef signed char vp9_tree_index;
-
-static const int32_t seg_feature_data_signed[SEG_LVL_MAX] = {1, 1, 0, 0};
-static const int32_t seg_feature_data_max[SEG_LVL_MAX] = {MAXQ, 63, 3, 0};
-
-#define NVDEC_VP9HWPAD(x, y) unsigned char x[y]
-
-typedef struct {
-    /* last bytes of address 41 */
-    unsigned char joints[3];
-    unsigned char sign[2];
-    /* address 42 */
-    unsigned char class0[2][1];
-    unsigned char fp[2][3];
-    unsigned char class0_hp[2];
-    unsigned char hp[2];
-    unsigned char classes[2][10];
-    /* address 43 */
-    unsigned char class0_fp[2][2][3];
-    unsigned char bits[2][10];
-
-} nvdec_nmv_context;
-
-/* Adaptive entropy contexts, padding elements are added to have
- * 256 bit aligned tables for HW access.
- * Compile with TRACE_PROB_TABLES to print bases for each table. */
-typedef struct nvdec_vp9AdaptiveEntropyProbs_s
-{
-    /* address 32 */
-    unsigned char inter_mode_prob[7][4];
-    unsigned char intra_inter_prob[4];
-
-    /* address 33 */
-    unsigned char uv_mode_prob[10][8];
-    unsigned char tx8x8_prob[2][1];
-    unsigned char tx16x16_prob[2][2];
-    unsigned char tx32x32_prob[2][3];
-    unsigned char sb_ymode_probB[4][1];
-    unsigned char sb_ymode_prob[4][8];
-
-    /* address 37 */
-    unsigned char partition_prob[2][16][4];
-
-    /* address 41 */
-    unsigned char uv_mode_probB[10][1];
-    unsigned char switchable_interp_prob[4][2];
-    unsigned char comp_inter_prob[5];
-    unsigned char mbskip_probs[3];
-    NVDEC_VP9HWPAD(pad1, 1);
-
-    nvdec_nmv_context nmvc;
-
-    /* address 44 */
-    unsigned char single_ref_prob[5][2];
-    unsigned char comp_ref_prob[5];
-    NVDEC_VP9HWPAD(pad2, 17);
-
-    /* address 45 */
-    unsigned char probCoeffs[2][2][6][6][4];
-    unsigned char probCoeffs8x8[2][2][6][6][4];
-    unsigned char probCoeffs16x16[2][2][6][6][4];
-    unsigned char probCoeffs32x32[2][2][6][6][4];
-
-} nvdec_vp9AdaptiveEntropyProbs_t;
-
-typedef struct nvdec_vp9EntropyProbs_s
-{
-    /* Default keyframe probs */
-    /* Table formatted for 256b memory, probs 0to7 for all tables followed by
-     * probs 8toN for all tables.
-     * Compile with TRACE_PROB_TABLES to print bases for each table. */
-
-    unsigned char kf_bmode_prob[10][10][8];
-
-    /* Address 25 */
-    unsigned char kf_bmode_probB[10][10][1];
-    unsigned char ref_pred_probs[3];
-    unsigned char mb_segment_tree_probs[7];
-    unsigned char segment_pred_probs[3];
-    unsigned char ref_scores[4];
-    unsigned char prob_comppred[2];
-    NVDEC_VP9HWPAD(pad1, 9);
-
-    /* Address 29 */
-    unsigned char kf_uv_mode_prob[10][8];
-    unsigned char kf_uv_mode_probB[10][1];
-    NVDEC_VP9HWPAD(pad2, 6);
-
-    nvdec_vp9AdaptiveEntropyProbs_t a;    /* Probs with backward adaptation */
-
-
-} nvdec_vp9EntropyProbs_t;
-
-typedef struct {
-    unsigned int joints[4];
-    unsigned int sign[2][2];
-    unsigned int classes[2][11];
-    unsigned int class0[2][2];
-    unsigned int bits[2][10][2];
-    unsigned int class0_fp[2][2][4];
-    unsigned int fp[2][4];
-    unsigned int class0_hp[2][2];
-    unsigned int hp[2][2];
-
-} nvdec_nmv_context_counts;
-
-typedef struct nvdec_vp9EntropyCounts_s
-{
-    unsigned int inter_mode_counts[7][3][2];
-    unsigned int sb_ymode_counts[4][10];
-    unsigned int uv_mode_counts[10][10];
-    unsigned int partition_counts[16][4];
-    unsigned int switchable_interp_counts[4][3];
-    unsigned int intra_inter_count[4][2];
-    unsigned int comp_inter_count[5][2];
-    unsigned int single_ref_count[5][2][2];
-    unsigned int comp_ref_count[5][2];
-    unsigned int tx32x32_count[2][4];
-    unsigned int tx16x16_count[2][3];
-    unsigned int tx8x8_count[2][2];
-    unsigned int mbskip_count[3][2];
-
-    nvdec_nmv_context_counts nmvcount;
-
-    unsigned int countCoeffs[2][2][6][6][4];
-    unsigned int countCoeffs8x8[2][2][6][6][4];
-    unsigned int countCoeffs16x16[2][2][6][6][4];
-    unsigned int countCoeffs32x32[2][2][6][6][4];
-
-    unsigned int countEobs[4][2][2][6][6];
-
-} nvdec_vp9EntropyCounts_t;
-
-// Structure required to update Forward and Backward probabilities
-typedef struct _vp9_prob_update_s
-{
-    nvdec_vp9EntropyProbs_t  *pProbTab;
-    nvdec_vp9EntropyCounts_t *pCtxCounters;
-    unsigned char   keyFrame : 1;
-    unsigned char   prevIsKeyFrame : 1;
-    unsigned char   resolutionChange : 1;
-    unsigned char   errorResilient : 1;
-    unsigned char   prevShowFrame : 1;
-    unsigned char   intraOnly : 1;
-    unsigned char   reserved2 : 2;
-    char            lossless;
-    char            transform_mode;
-    char            allow_high_precision_mv;
-    char            mcomp_filter_type;
-    char            comp_pred_mode;
-    unsigned char   FrameParallelDecoding;
-    unsigned char   RefreshEntropyProbs;
-    uint32_t            resetFrameContext;
-    uint32_t            frameContextIdx;
-    uint32_t            offsetToDctParts;
-    uint32_t            allow_comp_inter_inter;
-    uint32_t            probsDecoded;
-} vp9_prob_update_s;
-
-typedef uint32_t VP9_BD_VALUE;
-
-typedef struct {
-    uint32_t buffer_end;
-    uint32_t buffer;
-    int32_t value;
-    int32_t count;
-    uint32_t range;
-    uint32_t pos;
-} vp9_reader;
-
-const vp9_tree_index vp9_coef_tree[ 22] =     /* corresponding _CONTEXT_NODEs */
-{
-  -DCT_EOB_TOKEN, 2,                             /* 0 = EOB */
-  -ZERO_TOKEN, 4,                               /* 1 = ZERO */
-  -ONE_TOKEN, 6,                               /* 2 = ONE */
-  8, 12,                                      /* 3 = LOW_VAL */
-  -TWO_TOKEN, 10,                            /* 4 = TWO */
-  -THREE_TOKEN, -FOUR_TOKEN,                /* 5 = THREE */
-  14, 16,                                    /* 6 = HIGH_LOW */
-  -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 7 = CAT_ONE */
-  18, 20,                                   /* 8 = CAT_THREEFOUR */
-  -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,  /* 9 = CAT_THREE */
-  -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6   /* 10 = CAT_FIVE */
-};
-
-const vp9_tree_index vp9_coefmodel_tree[6] = {
-  -DCT_EOB_MODEL_TOKEN, 2,                      /* 0 = EOB */
-  -ZERO_TOKEN, 4,                               /* 1 = ZERO */
-  -ONE_TOKEN, -TWO_TOKEN,                       /* 2 = ONE */
-};
+typedef struct _vp9_ref_frames_s {
+    VkPicIf* buffer;
+    StdVideoVP9FrameType frame_type;
+    bool segmentation_enabled;
+} vp9_ref_frames_s;
 
-const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
-  -0, 2,
-  -1, -2
-};
-
-const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
-  -MV_JOINT_ZERO, 2,
-  -MV_JOINT_HNZVZ, 4,
-  -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
-};
-
-const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
-  -0, -1,
-};
-
-const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
-  -MV_CLASS_0, 2,
-  -MV_CLASS_1, 4,
-  6, 8,
-  -MV_CLASS_2, -MV_CLASS_3,
-  10, 12,
-  -MV_CLASS_4, -MV_CLASS_5,
-  -MV_CLASS_6, 14,
-  16, 18,
-  -MV_CLASS_7, -MV_CLASS_8,
-  -MV_CLASS_9, -MV_CLASS_10,
-};
-
-const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
-  -0, 2,
-  -1, 4,
-  -2, -3
-};
-
-static const uint32_t vp9dx_bitreader_norm[256] =
+class VulkanVP9Decoder : public VulkanVideoDecoder
 {
-    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-//*****************************************************************
-//vp9_entropymode.c
-typedef uint8_t vp9_prob;
-//typedef uint8_t vp9_tree_index; // typedef i8 vp9_tree_index
-static const vp9_prob default_kf_uv_probs[VP9_INTRA_MODES]
-                                         [VP9_INTRA_MODES - 1] = {
-  { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
-  { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
-  { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
-  { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
-  { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
-  { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
-  { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
-  { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d27 */,
-  { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
-  { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
-};
-
-static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
-                                        [VP9_INTRA_MODES - 1] = {
-  {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* block_size < 8x8 */,
-  { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* block_size < 16x16 */,
-  { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* block_size < 32x32 */,
-  { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* block_size >= 32x32 */
-};
-
-static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
-                                         [VP9_INTRA_MODES - 1] = {
-  { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
-  {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
-  {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
-  {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
-  {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
-  {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
-  {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
-  {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d27 */,
-  {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
-  { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
-};
-
-static const uint8_t vp9_default_inter_mode_prob[INTER_MODE_CONTEXTS][4] = {
-  {2,       173,   34,   0},  // 0 = both zero mv
-  {7,       145,   85,   0},  // 1 = one zero mv + one a predicted mv
-  {7,       166,   63,   0},  // 2 = two predicted mvs
-  {7,       94,    66,   0},  // 3 = one predicted/zero and one new mv
-  {8,       64,    46,   0},  // 4 = two new mvs
-  {17,      81,    31,   0},  // 5 = one intra neighbour + x
-  {25,      29,    30,   0},  // 6 = two intra neighbours
-};
-static const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
-                                  [PARTITION_TYPES] = { /* 1 byte padding */
-  { /* frame_type = keyframe */
-    /* 8x8 -> 4x4 */
-    { 158,  97,  94, 0 } /* a/l both not split */,
-    {  93,  24,  99, 0 } /* a split, l not split */,
-    {  85, 119,  44, 0 } /* l split, a not split */,
-    {  62,  59,  67, 0 } /* a/l both split */,
-    /* 16x16 -> 8x8 */
-    { 149,  53,  53, 0 } /* a/l both not split */,
-    {  94,  20,  48, 0 } /* a split, l not split */,
-    {  83,  53,  24, 0 } /* l split, a not split */,
-    {  52,  18,  18, 0 } /* a/l both split */,
-    /* 32x32 -> 16x16 */
-    { 150,  40,  39, 0 } /* a/l both not split */,
-    {  78,  12,  26, 0 } /* a split, l not split */,
-    {  67,  33,  11, 0 } /* l split, a not split */,
-    {  24,   7,   5, 0 } /* a/l both split */,
-    /* 64x64 -> 32x32 */
-    { 174,  35,  49, 0 } /* a/l both not split */,
-    {  68,  11,  27, 0 } /* a split, l not split */,
-    {  57,  15,   9, 0 } /* l split, a not split */,
-    {  12,   3,   3, 0 } /* a/l both split */
-  }, { /* frame_type = interframe */
-    /* 8x8 -> 4x4 */
-    { 199, 122, 141, 0 } /* a/l both not split */,
-    { 147,  63, 159, 0 } /* a split, l not split */,
-    { 148, 133, 118, 0 } /* l split, a not split */,
-    { 121, 104, 114, 0 } /* a/l both split */,
-    /* 16x16 -> 8x8 */
-    { 174,  73,  87, 0 } /* a/l both not split */,
-    {  92,  41,  83, 0 } /* a split, l not split */,
-    {  82,  99,  50, 0 } /* l split, a not split */,
-    {  53,  39,  39, 0 } /* a/l both split */,
-    /* 32x32 -> 16x16 */
-    { 177,  58,  59, 0 } /* a/l both not split */,
-    {  68,  26,  63, 0 } /* a split, l not split */,
-    {  52,  79,  25, 0 } /* l split, a not split */,
-    {  17,  14,  12, 0 } /* a/l both split */,
-    /* 64x64 -> 32x32 */
-    { 222,  34,  30, 0 } /* a/l both not split */,
-    {  72,  16,  44, 0 } /* a split, l not split */,
-    {  58,  32,  12, 0 } /* l split, a not split */,
-    {  10,   7,   6, 0 } /* a/l both split */
-  }
-};
-static const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
-  -DC_PRED, 2,                      // 0 = DC_NODE
-  -TM_PRED, 4,                      // 1 = TM_NODE
-  -V_PRED, 6,                       // 2 = V_NODE
-  8, 12,                            // 3 = COM_NODE
-  -H_PRED, 10,                      // 4 = H_NODE
-  -D135_PRED, -D117_PRED,           // 5 = D135_NODE
-  -D45_PRED, 14,                    // 6 = D45_NODE
-  -D63_PRED, 16,                    // 7 = D63_NODE
-  -D153_PRED, -D27_PRED             // 8 = D153_NODE
-};
-
-static const vp9_tree_index vp9_partition_tree[6] = {
-  -PARTITION_NONE, 2,
-  -PARTITION_HORZ, 4,
-  -PARTITION_VERT, -PARTITION_SPLIT
-};
-
-static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
-  9, 102, 187, 225
-};
-
-static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
-  239, 183, 119,  96,  41
-};
-
-static const vp9_prob default_comp_ref_p[REF_CONTEXTS] = {
-  50, 126, 123, 221, 226
-};
-
-static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = {
-  {  33,  16 },
-  {  77,  74 },
-  { 142, 142 },
-  { 172, 170 },
-  { 238, 247 }
-};
-
-static const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
-                                          [VP9_SWITCHABLE_FILTERS-1] = {
-  { 235, 162, },
-  { 36, 255, },
-  { 34, 3, },
-  { 149, 144, },
-};
-static const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS]
-                                          [TX_SIZE_MAX_SB - 1] = {
-  { 3, 136, 37, },
-  { 5, 52, 13, },
-};
-static const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS]
-                                          [TX_SIZE_MAX_SB - 2] = {
-  { 20, 152, },
-  { 15, 101, },
-};
-static const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS]
-                                        [TX_SIZE_MAX_SB - 3] = {
-  { 100, },
-  { 66, },
-};
-static const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = {  //its C0..shud be f8??
-  192, 128, 64
-};
-
-static const nvdec_nmv_context vp9_default_nmv_context = {
-  {32, 64, 96}, /* joints */
-  {128, 128},   /* sign */
-  {{216},{208}},                                            /* class0 */
-  {{64, 96, 64},{64, 96, 64}},                              /* fp */
-  {160,160},                                                /* class0_hp bit */
-  {128,128},                                                /* hp */
-  {{224, 144, 192, 168, 192, 176, 192, 198, 198, 245},
-   {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}},     /* class */
-  {{{128, 128, 64}, {96, 112, 64}},
-   {{128, 128, 64}, {96, 112, 64}}},                        /* class0_fp */
-  {{136, 140, 148, 160, 176, 192, 224, 234, 234, 240},
-   {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}},     /* bits */
-};
+protected:
+    VkParserVp9PictureData m_PicData;
 
-static const int32_t vp9_seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
-static const int32_t vp9_seg_feature_data_max[SEG_LVL_MAX] = { 255, 63, 3, 0 };
-typedef uint8_t vp9_coeff_probs[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES];
+    VkPicIf*      m_pCurrPic;
+    VkPicIf*      m_pOutFrame[VP9_MAX_NUM_SPATIAL_LAYERS];
 
-static const vp9_coeff_probs default_coef_probs_4x4[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 195,  29, 183 },
-        {  84,  49, 136 },
-        {   8,  42,  71 }
-      }, { /* Coeff Band 1 */
-        {  31, 107, 169 },
-        {  35,  99, 159 },
-        {  17,  82, 140 },
-        {   8,  66, 114 },
-        {   2,  44,  76 },
-        {   1,  19,  32 }
-      }, { /* Coeff Band 2 */
-        {  40, 132, 201 },
-        {  29, 114, 187 },
-        {  13,  91, 157 },
-        {   7,  75, 127 },
-        {   3,  58,  95 },
-        {   1,  28,  47 }
-      }, { /* Coeff Band 3 */
-        {  69, 142, 221 },
-        {  42, 122, 201 },
-        {  15,  91, 159 },
-        {   6,  67, 121 },
-        {   1,  42,  77 },
-        {   1,  17,  31 }
-      }, { /* Coeff Band 4 */
-        { 102, 148, 228 },
-        {  67, 117, 204 },
-        {  17,  82, 154 },
-        {   6,  59, 114 },
-        {   2,  39,  75 },
-        {   1,  15,  29 }
-      }, { /* Coeff Band 5 */
-        { 156,  57, 233 },
-        { 119,  57, 212 },
-        {  58,  48, 163 },
-        {  29,  40, 124 },
-        {  12,  30,  81 },
-        {   3,  12,  31 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 191, 107, 226 },
-        { 124, 117, 204 },
-        {  25,  99, 155 }
-      }, { /* Coeff Band 1 */
-        {  29, 148, 210 },
-        {  37, 126, 194 },
-        {   8,  93, 157 },
-        {   2,  68, 118 },
-        {   1,  39,  69 },
-        {   1,  17,  33 }
-      }, { /* Coeff Band 2 */
-        {  41, 151, 213 },
-        {  27, 123, 193 },
-        {   3,  82, 144 },
-        {   1,  58, 105 },
-        {   1,  32,  60 },
-        {   1,  13,  26 }
-      }, { /* Coeff Band 3 */
-        {  59, 159, 220 },
-        {  23, 126, 198 },
-        {   4,  88, 151 },
-        {   1,  66, 114 },
-        {   1,  38,  71 },
-        {   1,  18,  34 }
-      }, { /* Coeff Band 4 */
-        { 114, 136, 232 },
-        {  51, 114, 207 },
-        {  11,  83, 155 },
-        {   3,  56, 105 },
-        {   1,  33,  65 },
-        {   1,  17,  34 }
-      }, { /* Coeff Band 5 */
-        { 149,  65, 234 },
-        { 121,  57, 215 },
-        {  61,  49, 166 },
-        {  28,  36, 114 },
-        {  12,  25,  76 },
-        {   3,  16,  42 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 214,  49, 220 },
-        { 132,  63, 188 },
-        {  42,  65, 137 }
-      }, { /* Coeff Band 1 */
-        {  85, 137, 221 },
-        { 104, 131, 216 },
-        {  49, 111, 192 },
-        {  21,  87, 155 },
-        {   2,  49,  87 },
-        {   1,  16,  28 }
-      }, { /* Coeff Band 2 */
-        {  89, 163, 230 },
-        {  90, 137, 220 },
-        {  29, 100, 183 },
-        {  10,  70, 135 },
-        {   2,  42,  81 },
-        {   1,  17,  33 }
-      }, { /* Coeff Band 3 */
-        { 108, 167, 237 },
-        {  55, 133, 222 },
-        {  15,  97, 179 },
-        {   4,  72, 135 },
-        {   1,  45,  85 },
-        {   1,  19,  38 }
-      }, { /* Coeff Band 4 */
-        { 124, 146, 240 },
-        {  66, 124, 224 },
-        {  17,  88, 175 },
-        {   4,  58, 122 },
-        {   1,  36,  75 },
-        {   1,  18,  37 }
-      }, { /* Coeff Band 5 */
-        { 141,  79, 241 },
-        { 126,  70, 227 },
-        {  66,  58, 182 },
-        {  30,  44, 136 },
-        {  12,  34,  96 },
-        {   2,  20,  47 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 229,  99, 249 },
-        { 143, 111, 235 },
-        {  46, 109, 192 }
-      }, { /* Coeff Band 1 */
-        {  82, 158, 236 },
-        {  94, 146, 224 },
-        {  25, 117, 191 },
-        {   9,  87, 149 },
-        {   3,  56,  99 },
-        {   1,  33,  57 }
-      }, { /* Coeff Band 2 */
-        {  83, 167, 237 },
-        {  68, 145, 222 },
-        {  10, 103, 177 },
-        {   2,  72, 131 },
-        {   1,  41,  79 },
-        {   1,  20,  39 }
-      }, { /* Coeff Band 3 */
-        {  99, 167, 239 },
-        {  47, 141, 224 },
-        {  10, 104, 178 },
-        {   2,  73, 133 },
-        {   1,  44,  85 },
-        {   1,  22,  47 }
-      }, { /* Coeff Band 4 */
-        { 127, 145, 243 },
-        {  71, 129, 228 },
-        {  17,  93, 177 },
-        {   3,  61, 124 },
-        {   1,  41,  84 },
-        {   1,  21,  52 }
-      }, { /* Coeff Band 5 */
-        { 157,  78, 244 },
-        { 140,  72, 231 },
-        {  69,  58, 184 },
-        {  31,  44, 137 },
-        {  14,  38, 105 },
-        {   8,  23,  61 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs default_coef_probs_8x8[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 125,  34, 187 },
-        {  52,  41, 133 },
-        {   6,  31,  56 }
-      }, { /* Coeff Band 1 */
-        {  37, 109, 153 },
-        {  51, 102, 147 },
-        {  23,  87, 128 },
-        {   8,  67, 101 },
-        {   1,  41,  63 },
-        {   1,  19,  29 }
-      }, { /* Coeff Band 2 */
-        {  31, 154, 185 },
-        {  17, 127, 175 },
-        {   6,  96, 145 },
-        {   2,  73, 114 },
-        {   1,  51,  82 },
-        {   1,  28,  45 }
-      }, { /* Coeff Band 3 */
-        {  23, 163, 200 },
-        {  10, 131, 185 },
-        {   2,  93, 148 },
-        {   1,  67, 111 },
-        {   1,  41,  69 },
-        {   1,  14,  24 }
-      }, { /* Coeff Band 4 */
-        {  29, 176, 217 },
-        {  12, 145, 201 },
-        {   3, 101, 156 },
-        {   1,  69, 111 },
-        {   1,  39,  63 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 5 */
-        {  57, 192, 233 },
-        {  25, 154, 215 },
-        {   6, 109, 167 },
-        {   3,  78, 118 },
-        {   1,  48,  69 },
-        {   1,  21,  29 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 202, 105, 245 },
-        { 108, 106, 216 },
-        {  18,  90, 144 }
-      }, { /* Coeff Band 1 */
-        {  33, 172, 219 },
-        {  64, 149, 206 },
-        {  14, 117, 177 },
-        {   5,  90, 141 },
-        {   2,  61,  95 },
-        {   1,  37,  57 }
-      }, { /* Coeff Band 2 */
-        {  33, 179, 220 },
-        {  11, 140, 198 },
-        {   1,  89, 148 },
-        {   1,  60, 104 },
-        {   1,  33,  57 },
-        {   1,  12,  21 }
-      }, { /* Coeff Band 3 */
-        {  30, 181, 221 },
-        {   8, 141, 198 },
-        {   1,  87, 145 },
-        {   1,  58, 100 },
-        {   1,  31,  55 },
-        {   1,  12,  20 }
-      }, { /* Coeff Band 4 */
-        {  32, 186, 224 },
-        {   7, 142, 198 },
-        {   1,  86, 143 },
-        {   1,  58, 100 },
-        {   1,  31,  55 },
-        {   1,  12,  22 }
-      }, { /* Coeff Band 5 */
-        {  57, 192, 227 },
-        {  20, 143, 204 },
-        {   3,  96, 154 },
-        {   1,  68, 112 },
-        {   1,  42,  69 },
-        {   1,  19,  32 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 212,  35, 215 },
-        { 113,  47, 169 },
-        {  29,  48, 105 }
-      }, { /* Coeff Band 1 */
-        {  74, 129, 203 },
-        { 106, 120, 203 },
-        {  49, 107, 178 },
-        {  19,  84, 144 },
-        {   4,  50,  84 },
-        {   1,  15,  25 }
-      }, { /* Coeff Band 2 */
-        {  71, 172, 217 },
-        {  44, 141, 209 },
-        {  15, 102, 173 },
-        {   6,  76, 133 },
-        {   2,  51,  89 },
-        {   1,  24,  42 }
-      }, { /* Coeff Band 3 */
-        {  64, 185, 231 },
-        {  31, 148, 216 },
-        {   8, 103, 175 },
-        {   3,  74, 131 },
-        {   1,  46,  81 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 4 */
-        {  65, 196, 235 },
-        {  25, 157, 221 },
-        {   5, 105, 174 },
-        {   1,  67, 120 },
-        {   1,  38,  69 },
-        {   1,  15,  30 }
-      }, { /* Coeff Band 5 */
-        {  65, 204, 238 },
-        {  30, 156, 224 },
-        {   7, 107, 177 },
-        {   2,  70, 124 },
-        {   1,  42,  73 },
-        {   1,  18,  34 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 225,  86, 251 },
-        { 144, 104, 235 },
-        {  42,  99, 181 }
-      }, { /* Coeff Band 1 */
-        {  85, 175, 239 },
-        { 112, 165, 229 },
-        {  29, 136, 200 },
-        {  12, 103, 162 },
-        {   6,  77, 123 },
-        {   2,  53,  84 }
-      }, { /* Coeff Band 2 */
-        {  75, 183, 239 },
-        {  30, 155, 221 },
-        {   3, 106, 171 },
-        {   1,  74, 128 },
-        {   1,  44,  76 },
-        {   1,  17,  28 }
-      }, { /* Coeff Band 3 */
-        {  73, 185, 240 },
-        {  27, 159, 222 },
-        {   2, 107, 172 },
-        {   1,  75, 127 },
-        {   1,  42,  73 },
-        {   1,  17,  29 }
-      }, { /* Coeff Band 4 */
-        {  62, 190, 238 },
-        {  21, 159, 222 },
-        {   2, 107, 172 },
-        {   1,  72, 122 },
-        {   1,  40,  71 },
-        {   1,  18,  32 }
-      }, { /* Coeff Band 5 */
-        {  61, 199, 240 },
-        {  27, 161, 226 },
-        {   4, 113, 180 },
-        {   1,  76, 129 },
-        {   1,  46,  80 },
-        {   1,  23,  41 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs default_coef_probs_16x16[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        {   7,  27, 153 },
-        {   5,  30,  95 },
-        {   1,  16,  30 }
-      }, { /* Coeff Band 1 */
-        {  50,  75, 127 },
-        {  57,  75, 124 },
-        {  27,  67, 108 },
-        {  10,  54,  86 },
-        {   1,  33,  52 },
-        {   1,  12,  18 }
-      }, { /* Coeff Band 2 */
-        {  43, 125, 151 },
-        {  26, 108, 148 },
-        {   7,  83, 122 },
-        {   2,  59,  89 },
-        {   1,  38,  60 },
-        {   1,  17,  27 }
-      }, { /* Coeff Band 3 */
-        {  23, 144, 163 },
-        {  13, 112, 154 },
-        {   2,  75, 117 },
-        {   1,  50,  81 },
-        {   1,  31,  51 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 4 */
-        {  18, 162, 185 },
-        {   6, 123, 171 },
-        {   1,  78, 125 },
-        {   1,  51,  86 },
-        {   1,  31,  54 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 5 */
-        {  15, 199, 227 },
-        {   3, 150, 204 },
-        {   1,  91, 146 },
-        {   1,  55,  95 },
-        {   1,  30,  53 },
-        {   1,  11,  20 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        {  19,  55, 240 },
-        {  19,  59, 196 },
-        {   3,  52, 105 }
-      }, { /* Coeff Band 1 */
-        {  41, 166, 207 },
-        { 104, 153, 199 },
-        {  31, 123, 181 },
-        {  14, 101, 152 },
-        {   5,  72, 106 },
-        {   1,  36,  52 }
-      }, { /* Coeff Band 2 */
-        {  35, 176, 211 },
-        {  12, 131, 190 },
-        {   2,  88, 144 },
-        {   1,  60, 101 },
-        {   1,  36,  60 },
-        {   1,  16,  28 }
-      }, { /* Coeff Band 3 */
-        {  28, 183, 213 },
-        {   8, 134, 191 },
-        {   1,  86, 142 },
-        {   1,  56,  96 },
-        {   1,  30,  53 },
-        {   1,  12,  20 }
-      }, { /* Coeff Band 4 */
-        {  20, 190, 215 },
-        {   4, 135, 192 },
-        {   1,  84, 139 },
-        {   1,  53,  91 },
-        {   1,  28,  49 },
-        {   1,  11,  20 }
-      }, { /* Coeff Band 5 */
-        {  13, 196, 216 },
-        {   2, 137, 192 },
-        {   1,  86, 143 },
-        {   1,  57,  99 },
-        {   1,  32,  56 },
-        {   1,  13,  24 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 211,  29, 217 },
-        {  96,  47, 156 },
-        {  22,  43,  87 }
-      }, { /* Coeff Band 1 */
-        {  78, 120, 193 },
-        { 111, 116, 186 },
-        {  46, 102, 164 },
-        {  15,  80, 128 },
-        {   2,  49,  76 },
-        {   1,  18,  28 }
-      }, { /* Coeff Band 2 */
-        {  71, 161, 203 },
-        {  42, 132, 192 },
-        {  10,  98, 150 },
-        {   3,  69, 109 },
-        {   1,  44,  70 },
-        {   1,  18,  29 }
-      }, { /* Coeff Band 3 */
-        {  57, 186, 211 },
-        {  30, 140, 196 },
-        {   4,  93, 146 },
-        {   1,  62, 102 },
-        {   1,  38,  65 },
-        {   1,  16,  27 }
-      }, { /* Coeff Band 4 */
-        {  47, 199, 217 },
-        {  14, 145, 196 },
-        {   1,  88, 142 },
-        {   1,  57,  98 },
-        {   1,  36,  62 },
-        {   1,  15,  26 }
-      }, { /* Coeff Band 5 */
-        {  26, 219, 229 },
-        {   5, 155, 207 },
-        {   1,  94, 151 },
-        {   1,  60, 104 },
-        {   1,  36,  62 },
-        {   1,  16,  28 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 233,  29, 248 },
-        { 146,  47, 220 },
-        {  43,  52, 140 }
-      }, { /* Coeff Band 1 */
-        { 100, 163, 232 },
-        { 179, 161, 222 },
-        {  63, 142, 204 },
-        {  37, 113, 174 },
-        {  26,  89, 137 },
-        {  18,  68,  97 }
-      }, { /* Coeff Band 2 */
-        {  85, 181, 230 },
-        {  32, 146, 209 },
-        {   7, 100, 164 },
-        {   3,  71, 121 },
-        {   1,  45,  77 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 3 */
-        {  65, 187, 230 },
-        {  20, 148, 207 },
-        {   2,  97, 159 },
-        {   1,  68, 116 },
-        {   1,  40,  70 },
-        {   1,  14,  29 }
-      }, { /* Coeff Band 4 */
-        {  40, 194, 227 },
-        {   8, 147, 204 },
-        {   1,  94, 155 },
-        {   1,  65, 112 },
-        {   1,  39,  66 },
-        {   1,  14,  26 }
-      }, { /* Coeff Band 5 */
-        {  16, 208, 228 },
-        {   3, 151, 207 },
-        {   1,  98, 160 },
-        {   1,  67, 117 },
-        {   1,  41,  74 },
-        {   1,  17,  31 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs default_coef_probs_32x32[VP9_BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        {  17,  38, 140 },
-        {   7,  34,  80 },
-        {   1,  17,  29 }
-      }, { /* Coeff Band 1 */
-        {  37,  75, 128 },
-        {  41,  76, 128 },
-        {  26,  66, 116 },
-        {  12,  52,  94 },
-        {   2,  32,  55 },
-        {   1,  10,  16 }
-      }, { /* Coeff Band 2 */
-        {  50, 127, 154 },
-        {  37, 109, 152 },
-        {  16,  82, 121 },
-        {   5,  59,  85 },
-        {   1,  35,  54 },
-        {   1,  13,  20 }
-      }, { /* Coeff Band 3 */
-        {  40, 142, 167 },
-        {  17, 110, 157 },
-        {   2,  71, 112 },
-        {   1,  44,  72 },
-        {   1,  27,  45 },
-        {   1,  11,  17 }
-      }, { /* Coeff Band 4 */
-        {  30, 175, 188 },
-        {   9, 124, 169 },
-        {   1,  74, 116 },
-        {   1,  48,  78 },
-        {   1,  30,  49 },
-        {   1,  11,  18 }
-      }, { /* Coeff Band 5 */
-        {  10, 222, 223 },
-        {   2, 150, 194 },
-        {   1,  83, 128 },
-        {   1,  48,  79 },
-        {   1,  27,  45 },
-        {   1,  11,  17 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        {  36,  41, 235 },
-        {  29,  36, 193 },
-        {  10,  27, 111 }
-      }, { /* Coeff Band 1 */
-        {  85, 165, 222 },
-        { 177, 162, 215 },
-        { 110, 135, 195 },
-        {  57, 113, 168 },
-        {  23,  83, 120 },
-        {  10,  49,  61 }
-      }, { /* Coeff Band 2 */
-        {  85, 190, 223 },
-        {  36, 139, 200 },
-        {   5,  90, 146 },
-        {   1,  60, 103 },
-        {   1,  38,  65 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 3 */
-        {  72, 202, 223 },
-        {  23, 141, 199 },
-        {   2,  86, 140 },
-        {   1,  56,  97 },
-        {   1,  36,  61 },
-        {   1,  16,  27 }
-      }, { /* Coeff Band 4 */
-        {  55, 218, 225 },
-        {  13, 145, 200 },
-        {   1,  86, 141 },
-        {   1,  57,  99 },
-        {   1,  35,  61 },
-        {   1,  13,  22 }
-      }, { /* Coeff Band 5 */
-        {  15, 235, 212 },
-        {   1, 132, 184 },
-        {   1,  84, 139 },
-        {   1,  57,  97 },
-        {   1,  34,  56 },
-        {   1,  14,  23 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 181,  21, 201 },
-        {  61,  37, 123 },
-        {  10,  38,  71 }
-      }, { /* Coeff Band 1 */
-        {  47, 106, 172 },
-        {  95, 104, 173 },
-        {  42,  93, 159 },
-        {  18,  77, 131 },
-        {   4,  50,  81 },
-        {   1,  17,  23 }
-      }, { /* Coeff Band 2 */
-        {  62, 147, 199 },
-        {  44, 130, 189 },
-        {  28, 102, 154 },
-        {  18,  75, 115 },
-        {   2,  44,  65 },
-        {   1,  12,  19 }
-      }, { /* Coeff Band 3 */
-        {  55, 153, 210 },
-        {  24, 130, 194 },
-        {   3,  93, 146 },
-        {   1,  61,  97 },
-        {   1,  31,  50 },
-        {   1,  10,  16 }
-      }, { /* Coeff Band 4 */
-        {  49, 186, 223 },
-        {  17, 148, 204 },
-        {   1,  96, 142 },
-        {   1,  53,  83 },
-        {   1,  26,  44 },
-        {   1,  11,  17 }
-      }, { /* Coeff Band 5 */
-        {  13, 217, 212 },
-        {   2, 136, 180 },
-        {   1,  78, 124 },
-        {   1,  50,  83 },
-        {   1,  29,  49 },
-        {   1,  14,  23 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 197,  13, 247 },
-        {  82,  17, 222 },
-        {  25,  17, 162 }
-      }, { /* Coeff Band 1 */
-        { 126, 186, 247 },
-        { 234, 191, 243 },
-        { 176, 177, 234 },
-        { 104, 158, 220 },
-        {  66, 128, 186 },
-        {  55,  90, 137 }
-      }, { /* Coeff Band 2 */
-        { 111, 197, 242 },
-        {  46, 158, 219 },
-        {   9, 104, 171 },
-        {   2,  65, 125 },
-        {   1,  44,  80 },
-        {   1,  17,  91 }
-      }, { /* Coeff Band 3 */
-        { 104, 208, 245 },
-        {  39, 168, 224 },
-        {   3, 109, 162 },
-        {   1,  79, 124 },
-        {   1,  50, 102 },
-        {   1,  43, 102 }
-      }, { /* Coeff Band 4 */
-        {  84, 220, 246 },
-        {  31, 177, 231 },
-        {   2, 115, 180 },
-        {   1,  79, 134 },
-        {   1,  55,  77 },
-        {   1,  60,  79 }
-      }, { /* Coeff Band 5 */
-        {  43, 243, 240 },
-        {   8, 180, 217 },
-        {   1, 115, 166 },
-        {   1,  84, 121 },
-        {   1,  51,  67 },
-        {   1,  16,   6 }
-      }
-    }
-  }
-};
+    int           m_frameIdx;
+    int           m_dataSize;
+    int           m_frameSize;
+    bool          m_frameSizeChanged;
 
-static const uint8_t vp9_kf_default_bmode_probs[VP9_INTRA_MODES]
-                                   [VP9_INTRA_MODES]
-                                   [VP9_INTRA_MODES-1] = {
-  { /* above = dc */
-    { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
-    {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
-    {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
-    {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
-    {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
-    {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
-    {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
-    {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d27 */,
-    {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
-    {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
-  }, { /* above = v */
-    {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
-    {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
-    {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
-    {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
-    {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
-    {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
-    {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
-    {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d27 */,
-    {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
-    {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
-  }, { /* above = h */
-    {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
-    {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
-    {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
-    {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
-    {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
-    {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
-    {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
-    {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d27 */,
-    {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
-    {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
-  }, { /* above = d45 */
-    { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
-    {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
-    {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
-    {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
-    {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
-    {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
-    {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
-    {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d27 */,
-    {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
-    {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
-  }, { /* above = d135 */
-    {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
-    {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
-    {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
-    {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
-    {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
-    {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
-    {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
-    {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d27 */,
-    {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
-    {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
-  }, { /* above = d117 */
-    {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
-    {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
-    {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
-    {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
-    {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
-    {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
-    {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
-    {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d27 */,
-    {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
-    {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
-  }, { /* above = d153 */
-    {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
-    {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
-    {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
-    {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
-    {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
-    {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
-    {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
-    {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d27 */,
-    {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
-    {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
-  }, { /* above = d27 */
-    {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
-    {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
-    {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
-    {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
-    {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
-    {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
-    {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
-    {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d27 */,
-    {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
-    {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
-  }, { /* above = d63 */
-    {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
-    {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
-    {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
-    {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
-    {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
-    {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
-    {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
-    {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d27 */,
-    {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
-    {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
-  }, { /* above = tm */
-    {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
-    {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
-    {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
-    {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
-    {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
-    {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
-    {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
-    {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d27 */,
-    {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
-    {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
-  }
-};
+    int           m_rtOrigWidth;
+    int           m_rtOrigHeight;
+    bool          m_pictureStarted;
+    bool          m_bitstreamComplete;
 
-class VulkanVP9Decoder : public VulkanVideoDecoder
-{
-protected:
-    vp9_reader                      reader;
-    nvdec_vp9EntropyProbs_t         m_EntropyLast[FRAME_CONTEXTS];
-    nvdec_vp9AdaptiveEntropyProbs_t m_PrevCtx;
-    const unsigned char*            m_pCompressedHeader;
+    // Parsing state for compute_image_size() side effects
+    int           m_lastFrameWidth;
+    int           m_lastFrameHeight;
+    bool          m_lastShowFrame;
 
-    void vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup);
-    vp9_prob weighted_prob(int32_t prob1, int32_t prob2, int32_t factor);
-    vp9_prob clip_prob(uint32_t p);
-    vp9_prob get_prob(uint32_t num, uint32_t den);
-    vp9_prob get_binary_prob(uint32_t n0, uint32_t n1);
-    uint32_t convert_distribution(uint32_t i,
-                            const vp9_tree_index * tree,
-                            uint8_t probs[],
-                            uint32_t branch_ct[][2],
-                            const uint32_t num_events[],
-                            uint32_t tok0_offset);
-    void vp9_tree_probs_from_distribution(const vp9_tree_index* tree,
-                                        uint8_t probs          [ /* n-1 */ ],
-                                        uint32_t branch_ct       [ /* n-1 */ ] [2],
-                                        const uint32_t num_events[ /* n */ ],
-                                        uint32_t tok0_offset);
-    void update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1],
-                        uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS],
-                        int32_t count_sat, int32_t update_factor);
-    void adaptCoefProbs(vp9_prob_update_s *pProbSetup);
-    int32_t update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2]);
-    int32_t update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2]);
-    void update_mode_probs(int32_t n_modes,
-                            const vp9_tree_index *tree, uint32_t *cnt,
-                            vp9_prob *pre_probs, vp9_prob *pre_probsB,
-                            vp9_prob *dst_probs, vp9_prob *dst_probsB,
-                            uint32_t tok0_offset);
-    void tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p, uint32_t (*ct_32x32p)[2]);
-    void tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p, uint32_t (*ct_16x16p)[2]);
-    void tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p, uint32_t (*ct_8x8p)[2]);
-    void adaptModeProbs(vp9_prob_update_s *pProbSetup);
-    void adaptModeContext(vp9_prob_update_s *pProbSetup);
-    uint32_t adapt_probs(uint32_t i,
-                         const signed char* tree,
-                         vp9_prob this_probs[],
-                         const vp9_prob last_probs[],
-                         const uint32_t num_events[]);
-    void adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2]);
-    void adaptNmvProbs(vp9_prob_update_s *pProbSetup);
+    // Last used loop filter parameters
+    int8_t        m_loopFilterRefDeltas[STD_VIDEO_VP9_MAX_REF_FRAMES];
+    int8_t        m_loopFilterModeDeltas[STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS];
 
+    vp9_ref_frames_s m_pBuffers[VP9_BUFFER_POOL_MAX_SIZE];
+ 
 protected:
-    void vp9_reader_fill();
-    int32_t vp9_reader_init (uint32_t size);
-    int32_t vp9_read_bit();
-    int32_t vp9_read(int32_t probability);
-    int32_t vp9_read_literal(int32_t bits);
-    uint32_t ParseCompressedVP9();
-    int32_t get_unsigned_bits(uint32_t num_values);
-    uint32_t swGetBitsUnsignedMax( uint32_t maxValue);
-    vp9_prob vp9hwdReadProbDiffUpdate(uint8_t oldp);
-    int32_t vp9_inv_recenter_nonneg(int32_t v, int32_t m);
-    int32_t inv_remap_prob(int32_t v, int32_t m);
-    int32_t merge_index(int32_t v, int32_t n, int32_t modulus);
-    uint32_t BoolDecodeUniform(uint32_t n);
-    uint32_t vp9hwdDecodeSubExp(uint32_t k, uint32_t num_syms);
-    uint32_t vp9hwdDecodeCoeffUpdate(uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1]);
-    uint32_t vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup);
-    void update_nmv(vp9_prob *const p, const vp9_prob upd_p);
+    void UpdateFramePointers(VkPicIf* currentPicture);
+    bool AddBuffertoOutputQueue(VkPicIf* pDispPic);
+    void AddBuffertoDispQueue(VkPicIf* pDispPic);
+    virtual void lEndPicture(VkPicIf* pDispPic);
+    void EndOfStream() override;
 
 public:
     VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std);
-    void ResetProbs(vp9_prob_update_s *pProbSetup);
-    void GetProbs(vp9_prob_update_s *pProbSetup);
-    uint32_t UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header);
-    void UpdateBackwardProbability(vp9_prob_update_s *pProbSetup);
+    ~VulkanVP9Decoder();
 
     // TODO: Need to implement these functions.
-    bool                    IsPictureBoundary(int32_t) { return true; };
-    int32_t                 ParseNalUnit() { return NALU_UNKNOWN; };
+    bool                    IsPictureBoundary(int32_t) override { return true; };
+    int32_t                 ParseNalUnit() override { return NALU_UNKNOWN; };
     bool                    DecodePicture(VkParserPictureData *) { return false; };
-    void                    InitParser() {}
-    bool                    BeginPicture(VkParserPictureData *) { return false; }
-    void                    CreatePrivateContext() {}
-    void                    FreeContext() {}
+    void                    InitParser() override;
+    bool                    BeginPicture(VkParserPictureData *) override;
+    void                    CreatePrivateContext() override {}
+    void                    FreeContext() override {}
+
+private:
+    bool                    ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBtes) override;
+    bool                    ParseFrameHeader(uint32_t framesize);
+    bool                    ParseUncompressedHeader();
+    bool                    ParseColorConfig();
+    void                    ParseFrameAndRenderSize();
+    void                    ParseFrameAndRenderSizeWithRefs();
+    void                    ComputeImageSize();
+    void                    ParseLoopFilterParams();
+    void                    ParseQuantizationParams();
+    int32_t                 ReadDeltaQ();
+    void                    ParseSegmentationParams();
+    uint8_t                 CalcMinLog2TileCols();
+    uint8_t                 CalcMaxLog2TileCols();
+    void                    ParseTileInfo();
+    void                    ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t sizes[8], uint32_t* count);
+
 };
 
 #endif // _VP9_PROBMANAGER_H_
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp
index 99452952..701e4c07 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVP9Decoder.cpp
@@ -20,1044 +20,889 @@
 
 VulkanVP9Decoder::VulkanVP9Decoder(VkVideoCodecOperationFlagBitsKHR std)
     : VulkanVideoDecoder(std)
-{
-    memset(&m_EntropyLast, 0, sizeof(m_EntropyLast));
-    memset(&m_PrevCtx, 0, sizeof(m_PrevCtx));
-    memset(&reader, 0, sizeof(vp9_reader));
-    m_pCompressedHeader = NULL;
-}
-void VulkanVP9Decoder::vp9_init_mbmode_probs(vp9_prob_update_s *pProbSetup)
-{
-    uint32_t i, j;
-
-    for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
-    {
-        for (j = 0; j < 8; j++)
-            pProbSetup->pProbTab->a.sb_ymode_prob[i][j] = default_if_y_probs[i][j];
-        pProbSetup->pProbTab->a.sb_ymode_probB[i][0] = default_if_y_probs[i][8];
-    }
-
-    for (i = 0; i < VP9_INTRA_MODES; i++)
-    {
-        for (j = 0; j < 8; j++)
-            pProbSetup->pProbTab->kf_uv_mode_prob[i][j] = default_kf_uv_probs[i][j];
-        pProbSetup->pProbTab->kf_uv_mode_probB[i][0] = default_kf_uv_probs[i][8];
-
-        for (j = 0; j < 8; j++)
-            pProbSetup->pProbTab->a.uv_mode_prob[i][j] = default_if_uv_probs[i][j];
-        pProbSetup->pProbTab->a.uv_mode_probB[i][0] = default_if_uv_probs[i][8];
-    }
-
-    memcpy(pProbSetup->pProbTab->a.switchable_interp_prob, vp9_switchable_interp_prob,
-             sizeof(vp9_switchable_interp_prob));
-    memcpy(pProbSetup->pProbTab->a.partition_prob, vp9_partition_probs,
-             sizeof(vp9_partition_probs));
-    memcpy(pProbSetup->pProbTab->a.intra_inter_prob, default_intra_inter_p,
-             sizeof(default_intra_inter_p));
-    memcpy(pProbSetup->pProbTab->a.comp_inter_prob, default_comp_inter_p,
-             sizeof(default_comp_inter_p));
-    memcpy(pProbSetup->pProbTab->a.comp_ref_prob, default_comp_ref_p,
-             sizeof(default_comp_ref_p));
-    memcpy(pProbSetup->pProbTab->a.single_ref_prob, default_single_ref_p,
-             sizeof(default_single_ref_p));
-    memcpy(pProbSetup->pProbTab->a.tx32x32_prob, vp9_default_tx_probs_32x32p,
-             sizeof(vp9_default_tx_probs_32x32p));
-    memcpy(pProbSetup->pProbTab->a.tx16x16_prob, vp9_default_tx_probs_16x16p,
-             sizeof(vp9_default_tx_probs_16x16p));
-    memcpy(pProbSetup->pProbTab->a.tx8x8_prob, vp9_default_tx_probs_8x8p,
-             sizeof(vp9_default_tx_probs_8x8p));
-    memcpy(pProbSetup->pProbTab->a.mbskip_probs, vp9_default_mbskip_probs,
-             sizeof(vp9_default_mbskip_probs));
-
-    for (i = 0; i < VP9_INTRA_MODES; i++)
-    {
-        for (j = 0; j < VP9_INTRA_MODES; j++)
-        {
-            memcpy(pProbSetup->pProbTab->kf_bmode_prob[i][j], vp9_kf_default_bmode_probs[i][j], 8);
-            pProbSetup->pProbTab->kf_bmode_probB[i][j][0] = vp9_kf_default_bmode_probs[i][j][8];
-        }
-    }
+    , m_PicData()
+    , m_pCurrPic()
+    , m_frameIdx(-1)
+    , m_dataSize()
+    , m_frameSize()
+    , m_frameSizeChanged()
+    , m_rtOrigWidth()
+    , m_rtOrigHeight()
+    , m_pictureStarted()
+    , m_bitstreamComplete(true)
+    , m_lastFrameWidth(0)
+    , m_lastFrameHeight(0)
+    , m_lastShowFrame(false)
+    , m_pBuffers() {
 }
 
-void VulkanVP9Decoder::ResetProbs(vp9_prob_update_s *pProbSetup)
+VulkanVP9Decoder::~VulkanVP9Decoder()
 {
-    //reset segmentMap (buffers going to HWIF_SEGMENT_READ_BASE_LSB and HWIF_SEGMENT_WRITE_BASE_LSB)
-
-    uint32_t i, j, k, l, m;
-
-    memcpy(pProbSetup->pProbTab->a.inter_mode_prob, vp9_default_inter_mode_prob, sizeof(vp9_default_inter_mode_prob));
-    vp9_init_mbmode_probs(pProbSetup);
-    memcpy(&pProbSetup->pProbTab->a.nmvc, &vp9_default_nmv_context, sizeof(nvdec_nmv_context));
-
-    /* Copy the default probs into two separate prob tables: part1 and part2. */
-
-    for( i = 0; i < VP9_BLOCK_TYPES; i++ ) {
-        for ( j = 0; j < VP9_REF_TYPES; j++ ) {
-            for ( k = 0; k < VP9_COEF_BANDS; k++ ) {
-                for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ ) {
-                    if (l >= 3 && k == 0)
-                        continue;
-
-                    for ( m = 0; m < UNCONSTRAINED_NODES; m++ ) {
-                        pProbSetup->pProbTab->a.probCoeffs[i][j][k][l][m] =
-                            default_coef_probs_4x4[i][j][k][l][m];
-                        pProbSetup->pProbTab->a.probCoeffs8x8[i][j][k][l][m] =
-                            default_coef_probs_8x8[i][j][k][l][m];
-                        pProbSetup->pProbTab->a.probCoeffs16x16[i][j][k][l][m] =
-                            default_coef_probs_16x16[i][j][k][l][m];
-                        pProbSetup->pProbTab->a.probCoeffs32x32[i][j][k][l][m] =
-                            default_coef_probs_32x32[i][j][k][l][m];
-                    }
-                }
-            }
-        }
-    }
-
-    /* Store the default probs for all saved contexts */
-    if (pProbSetup->keyFrame || pProbSetup->errorResilient || pProbSetup->resetFrameContext == 3)
-    {
-        for (i = 0; i < FRAME_CONTEXTS; i++)
-            memcpy( &m_EntropyLast[i], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t));
-    }
-    else if (pProbSetup->resetFrameContext == 2)
-        memcpy( &m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(nvdec_vp9EntropyProbs_t));
 }
 
-void VulkanVP9Decoder::GetProbs(vp9_prob_update_s *pProbSetup)
+void VulkanVP9Decoder::InitParser()
 {
-    memcpy(pProbSetup->pProbTab, &m_EntropyLast[pProbSetup->frameContextIdx], sizeof(m_EntropyLast[pProbSetup->frameContextIdx]));
+    m_bNoStartCodes = true;
+    m_bEmulBytesPresent = false;
+    m_pCurrPic = nullptr;
+    m_bitstreamComplete = true;
+    m_pictureStarted = false;
+    EndOfStream();
 }
 
-/////////////////////////////////////////////////////////////////////////////////
-
-
-void VulkanVP9Decoder::vp9_reader_fill()
+void VulkanVP9Decoder::EndOfStream()
 {
-    vp9_reader *r = &reader;
-    uint32_t buffer_end = r->buffer_end;
-    uint32_t buffer = r->buffer;
-    VP9_BD_VALUE value = r->value;
-    int32_t count = r->count;
-    int32_t shift = BD_VALUE_SIZE - 8 - (count + 8);
-    int32_t loop_end = 0;
-    const int32_t bits_left = (int32_t)((buffer_end - buffer)*CHAR_BIT);
-    const int32_t x = shift + CHAR_BIT - bits_left;
-    if (x >= 0) {
-        count += LOTS_OF_BITS;
-        loop_end = x;
+    if (m_pCurrPic) {
+        m_pCurrPic->Release();
+        m_pCurrPic = nullptr;
     }
-    if (x < 0 || bits_left)
-    {
-        while (shift >= loop_end)
-        {
-            count += CHAR_BIT;
-            uint8_t temp = m_pCompressedHeader[r->pos++]; //u( 8);
-            value |= (VP9_BD_VALUE)temp << shift;
-            shift -= CHAR_BIT;
-            buffer++;
+    for (int i = 0; i < 8; i++) {
+        if (m_pBuffers[i].buffer) {
+            m_pBuffers[i].buffer->Release();
+            m_pBuffers[i].buffer = nullptr;
         }
     }
-    r->buffer = buffer;
-    r->value = value;
-    r->count = count;
-}
-
-int32_t VulkanVP9Decoder::vp9_reader_init(uint32_t size)
-{
-    int32_t marker_bit = 0;
-    vp9_reader *r = &reader;
-    r->buffer_end = 0 + size;
-    r->buffer = 0;
-    r->value = 0;
-    r->count = -8;
-    r->range = 255;
-    r->pos = 0;
-
-    vp9_reader_fill();
-    marker_bit = vp9_read_bit();
-    return marker_bit != 0;
 }
 
-int32_t VulkanVP9Decoder::vp9_read_bit()
+bool VulkanVP9Decoder::ParseByteStream(const VkParserBitstreamPacket* pck, size_t* pParsedBytes)
 {
-    return vp9_read( 128);
-}
+    const uint8_t* pDataIn = (uint8_t*)pck->pByteStream;
+    int dataSize = (int)pck->nDataLength;
 
-int32_t VulkanVP9Decoder::vp9_read(int32_t probability)
-{
-
-    vp9_reader *br = &reader;
-    uint32_t bit = 0;
-    VP9_BD_VALUE value;
-    VP9_BD_VALUE bigsplit;
-    int32_t count;
-    uint32_t range;
-    uint32_t split = 1 + (((br->range - 1) * probability) >> 8);
-    if (br->count < 0)
-        vp9_reader_fill();
-    value = br->value;
-    count = br->count;
-    bigsplit = (VP9_BD_VALUE)split << (BD_VALUE_SIZE - 8);
-
-    range = split;
-    if (value >= bigsplit)
-    {
-        range = br->range - split;
-        value = value - bigsplit;
-        bit = 1;
+    if (pParsedBytes) {
+        *pParsedBytes = 0;
     }
-    uint32_t shift = vp9dx_bitreader_norm[range];
-    range <<= shift;
-    value <<= shift;
-    count -= shift;
-    br->value = value;
-    br->count = count;
-    br->range = range;
-    return bit;
-}
-
-int32_t VulkanVP9Decoder::vp9_read_literal( int32_t bits)
-{
-    int32_t z = 0, bit;
 
-    for (bit = bits - 1; bit >= 0; bit--)
-    {
-        z |= vp9_read_bit() << bit;
+    // Use different bitstreamBuffer than the previous frames bitstreamBuffer
+    // TODO: Make sure that the bitstreamBuffer is not in use.
+    VkSharedBaseObj<VulkanBitstreamBuffer> bitstreamBuffer;
+    assert(m_pClient);
+    m_pClient->GetBitstreamBuffer(m_bitstreamDataLen,
+                                  m_bufferOffsetAlignment, m_bufferSizeAlignment,
+                                  nullptr, 0, bitstreamBuffer);
+    assert(bitstreamBuffer);
+    if (!bitstreamBuffer) {
+        return false;
     }
-    return z;
-}
-////////////////////////////////////////////////////////////////////////////////////
-//Forward Update
-uint32_t VulkanVP9Decoder::UpdateForwardProbability(vp9_prob_update_s *pProbSetup, const unsigned char* pCompressed_Header)
-{
-    nvdec_vp9EntropyProbs_t *fc = pProbSetup->pProbTab; // Frame context
-
-    uint32_t tmp, i, j, k;
+    m_bitstreamDataLen = m_bitstreamData.SetBitstreamBuffer(bitstreamBuffer);
+    m_bitstreamData.ResetStreamMarkers();
 
-    m_pCompressedHeader = pCompressed_Header;
-    m_PrevCtx = pProbSetup->pProbTab->a;
-
-    if (vp9_reader_init(pProbSetup->offsetToDctParts) != 0)
-    {
-        return NOK;
-    }
-
-    if (pProbSetup->lossless)
-        pProbSetup->transform_mode = ONLY_4X4;
-    else
-    {
-        pProbSetup->transform_mode = vp9_read_literal( 2);
-        if (pProbSetup->transform_mode == ALLOW_32X32)
-            pProbSetup->transform_mode += vp9_read_literal( 1);
-        if (pProbSetup->transform_mode == TX_MODE_SELECT)
-        {
-             for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-             {
-                for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
-                {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.tx8x8_prob[i][j];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
-            }
-            for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-            {
-                for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.tx16x16_prob[i][j];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
-            }
-            for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-            {
-                for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.tx32x32_prob[i][j];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
-            }
-        }
+    if (m_bitstreamData.GetBitstreamBuffer() == nullptr) {
+        // make sure we're initialized
+        return false;
     }
 
-    // Coefficient probability update
-    tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs);
+    m_nCallbackEventCount = 0;
 
-    if( tmp != OK ) return (tmp);
-    if (pProbSetup->transform_mode > ONLY_4X4) {
-        tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs8x8);
-        if( tmp != OK ) return (tmp);
-    }
-    if (pProbSetup->transform_mode > ALLOW_8X8) {
-        tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs16x16);
-        if( tmp != OK ) return (tmp);
-    }
-    if (pProbSetup->transform_mode > ALLOW_16X16) {
-        tmp = vp9hwdDecodeCoeffUpdate( fc->a.probCoeffs32x32);
-        if( tmp != OK ) return (tmp);
+    // Handle discontinuity
+    if (pck->bDiscontinuity) {
+        memset(&m_nalu, 0, sizeof(m_nalu));
+        memset(&m_PTSQueue, 0, sizeof(m_PTSQueue));
+        m_bDiscontinuityReported = true;
+        m_pictureStarted = false;
     }
 
-    pProbSetup->probsDecoded = 1;
+    if (pck->bPTSValid) {
+        m_PTSQueue[m_lPTSPos].bPTSValid = true;
+        m_PTSQueue[m_lPTSPos].llPTS = pck->llPTS;
+        m_PTSQueue[m_lPTSPos].llPTSPos = m_llParsedBytes;
+        m_PTSQueue[m_lPTSPos].bDiscontinuity = m_bDiscontinuityReported;
+        m_bDiscontinuityReported = false;
+        m_lPTSPos = (m_lPTSPos + 1) % MAX_QUEUED_PTS;
+    }
 
-    for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
-        tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-        if (tmp) {
-            fc->a.mbskip_probs[k] = vp9hwdReadProbDiffUpdate( fc->a.mbskip_probs[k]);
-        }
+    if (pck->pByteStream && pck->nDataLength && m_frameIdx == -1) {
+        memset(&m_PicData, 0, sizeof(VkParserVp9PictureData));
+        m_frameIdx++;
     }
 
-    if(!pProbSetup->keyFrame)
-    {
-        for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
-            for (j = 0; j < VP9_INTER_MODES - 1; j++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.inter_mode_prob[i][j];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
-                }
+    while ((dataSize > 0) || m_pictureStarted) {
+        if (!m_pictureStarted) {
+            if (m_bitstreamComplete) {
+                // fill bitstreambuffer from start
+                //  assuming parser will get bitstream per frame from demuxer
+                m_frameSize = dataSize;
+                m_nalu.start_offset = 0;
+                m_nalu.end_offset = 0;
             }
-        }
-        if (pProbSetup->mcomp_filter_type == SWITCHABLE) {
-            for (j = 0; j < VP9_SWITCHABLE_FILTERS+1; ++j) {
-                for (i = 0; i < VP9_SWITCHABLE_FILTERS-1; ++i) {
-                    tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                    if (tmp) {
-                        uint8_t *prob = &fc->a.switchable_interp_prob[j][i];
-                        *prob = vp9hwdReadProbDiffUpdate( *prob);
-                    }
-                }
+            if (((VkDeviceSize)dataSize > m_bitstreamDataLen) && !resizeBitstreamBuffer(dataSize - m_bitstreamDataLen)) {
+                return false;
             }
-        }
 
-        for (i = 0; i < INTRA_INTER_CONTEXTS; i++) {
-            tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-            if (tmp) {
-                uint8_t *prob = &fc->a.intra_inter_prob[i];
-                *prob = vp9hwdReadProbDiffUpdate( *prob);
+            if (dataSize >= (m_frameSize - m_nalu.end_offset)) {
+                memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, m_frameSize - m_nalu.end_offset);
+                m_pictureStarted = true;
+                pDataIn += (m_frameSize - (int)m_nalu.end_offset);
+                dataSize -= (m_frameSize - (int)m_nalu.end_offset);
+                m_nalu.end_offset = m_frameSize;
+                m_bitstreamComplete = true;
+            } else {
+                memcpy(m_bitstreamData.GetBitstreamPtr() + m_nalu.end_offset, pDataIn, dataSize);
+                m_nalu.end_offset += dataSize;
+                pDataIn += dataSize;
+                dataSize = 0;
+                m_bitstreamComplete = false;
             }
-        }
-
-        // Compound prediction mode probabilities
-        if (pProbSetup->allow_comp_inter_inter) {
-            tmp = vp9_read_literal( 1);
-            pProbSetup->comp_pred_mode = tmp;
-            if(tmp) {
-                tmp = vp9_read_literal( 1);
-                pProbSetup->comp_pred_mode += tmp;
-                if (pProbSetup->comp_pred_mode == HYBRID_PREDICTION)
-                {
-                    for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-                    {
-                        tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                        if (tmp) {
-                            uint8_t *prob = &fc->a.comp_inter_prob[i];
-                            *prob = vp9hwdReadProbDiffUpdate( *prob);
+        } else {
+            uint32_t frames_processed = 0;
+            uint32_t sizeparsed = 0, framesdone = 0;
+
+            uint32_t frame_size = m_frameSize;
+
+            const uint8_t* data_start = m_bitstreamData.GetBitstreamPtr();
+            const uint8_t* data_end = data_start + m_frameSize;
+            uint32_t data_size = m_frameSize;
+            uint32_t frames_in_superframe, frame_sizes[8];
+
+            ParseSuperFrameIndex(data_start, data_size, frame_sizes, &frames_in_superframe);
+
+            do {
+                // Skip over the superframe index, if present
+                if ((data_size > 0) && ((data_start[0] & 0xe0) == 0xc0)) {
+                    const uint8_t marker = data_start[0];
+                    const uint32_t frames = (marker & 0x7) + 1;
+                    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+                    const uint32_t index_sz = 2 + mag * frames;
+
+                    if ((data_size >= index_sz) && (data_start[index_sz - 1] == marker)) {
+                        data_start += index_sz;
+                        data_size -= index_sz;
+                        if (data_start < data_end) {
+                            continue;
+                        } else {
+                            break;
                         }
                     }
                 }
-            }
-        } else {
-            pProbSetup->comp_pred_mode = SINGLE_PREDICTION_ONLY;
-        }
 
-        if (pProbSetup->comp_pred_mode != COMP_PREDICTION_ONLY) {
-            for (i = 0; i < REF_CONTEXTS; i++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.single_ref_prob[i][0];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
-                }
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.single_ref_prob[i][1];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
-                }
-            }
-        }
+                // Use the correct size for this frame, if an index is present
+                if (frames_in_superframe > 0) {
+                    frame_size = frame_sizes[frames_processed];
+                    if (data_size < frame_size) {
+                        // Invalid frame size in index
+                        return false;
+                    }
+                    data_size = frame_size;
+                    m_nalu.start_offset = sizeparsed;
 
-        if (pProbSetup->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
-            for (i = 0; i < REF_CONTEXTS; i++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.comp_ref_prob[i];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
                 }
-            }
-        }
 
-        // Superblock intra luma pred mode probabilities
-        for(j = 0 ; j < BLOCK_SIZE_GROUPS; ++j)
-        {
-            for( i = 0 ; i < 8; ++i ) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    fc->a.sb_ymode_prob[j][i] = vp9hwdReadProbDiffUpdate(
-                            fc->a.sb_ymode_prob[j][i]);
-                }
-            }
-            tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-            if (tmp) {
-                fc->a.sb_ymode_probB[j][0] = vp9hwdReadProbDiffUpdate(
-                        fc->a.sb_ymode_probB[j][0]);
-            }
-        }
+                ParseFrameHeader(frame_size);
 
-        for (j = 0; j < NUM_PARTITION_CONTEXTS; j++) {
-            for (i = 0; i < PARTITION_TYPES - 1; i++) {
-                tmp = vp9_read( VP9_DEF_UPDATE_PROB);
-                if (tmp) {
-                    uint8_t *prob = &fc->a.partition_prob[INTER_FRAME][j][i];
-                    *prob = vp9hwdReadProbDiffUpdate( *prob);
+                if (frames_in_superframe > 0) {
+                    sizeparsed += frame_sizes[framesdone];
+                    framesdone++;
                 }
-            }
+                data_start += data_size;
+                while (data_start < data_end && *data_start == 0) {
+                    data_start++;
+                }
+
+                data_size = (int)(data_end - data_start);
+                frames_processed += 1;
+            } while (data_start < data_end);
+
+            m_frameIdx++;
+            m_pictureStarted = false;
         }
 
-        // Motion vector tree update
-        tmp = vp9hwdDecodeMvUpdate(pProbSetup);
-        if( tmp != OK )
-            return (tmp);
     }
 
-    return (OK);
-}
+    if (pck->bEOS) {
+        end_of_stream();
+    }
 
-void VulkanVP9Decoder::update_nmv( vp9_prob *const p, const vp9_prob upd_p)
-{
-    uint32_t tmp = vp9_read( upd_p);
-    if (tmp) {
-#if 1 //def LOW_PRECISION_MV_UPDATE
-        *p = (vp9_read_literal( 7) << 1) | 1;
-#else
-        *p = vp9_read_literal( 8);
-#endif
+    if (pParsedBytes) {
+        *pParsedBytes = pck->nDataLength;
     }
+
+    return true;
 }
 
-uint32_t VulkanVP9Decoder::vp9hwdDecodeMvUpdate(vp9_prob_update_s *pProbSetup)
+
+bool VulkanVP9Decoder::ParseFrameHeader(uint32_t framesize)
 {
-    uint32_t i, j, k;
-    nvdec_nmv_context *mvctx = &pProbSetup->pProbTab->a.nmvc;
+    m_llNaluStartLocation = m_llParsedBytes;
+    m_llFrameStartLocation = m_llNaluStartLocation;
+    m_llParsedBytes += framesize;
+    //m_pSliceOffsets[0] = 0;
 
-#if 0
-    tmp = vp9_read_literal( 1);
-    if (!tmp) return HANTRO_OK;
-#endif
+    init_dbits();
+    //parse uncompressed header
+    if(!ParseUncompressedHeader())
+    {
+        assert((!"Error in ParseUncompressedVP9\n"));
+        return 0;
+    }
+    if (m_PicData.show_existing_frame == true)  {
+        // display an existing frame
+        VkPicIf* pDispPic = m_pBuffers[m_PicData.frame_to_show_map_idx].buffer;
+        if (pDispPic) {
+            pDispPic->AddRef();
+        }
+
+        AddBuffertoOutputQueue(pDispPic);
 
-    for (j = 0; j < MV_JOINTS - 1; ++j) {
-      update_nmv( &mvctx->joints[j],
-                 VP9_NMV_UPDATE_PROB);
+        return 0;
     }
-    for (i = 0; i < 2; ++i) {
-      update_nmv( &mvctx->sign[i], VP9_NMV_UPDATE_PROB);
-      for (j = 0; j < MV_CLASSES - 1; ++j) {
-        update_nmv( &mvctx->classes[i][j], VP9_NMV_UPDATE_PROB);
-      }
-      for (j = 0; j < CLASS0_SIZE - 1; ++j) {
-        update_nmv( &mvctx->class0[i][j], VP9_NMV_UPDATE_PROB);
-      }
-      for (j = 0; j < MV_OFFSET_BITS; ++j) {
-        update_nmv( &mvctx->bits[i][j], VP9_NMV_UPDATE_PROB);
-      }
+
+    // handle bitstream start offset alignment (for super frame)
+    uint32_t addOffset = m_nalu.start_offset & (m_bufferOffsetAlignment - 1);
+    m_PicData.uncompressedHeaderOffset += addOffset;
+    m_PicData.compressedHeaderOffset += addOffset;
+    m_PicData.tilesOffset += addOffset;
+
+    *m_pVkPictureData = VkParserPictureData();
+    m_pVkPictureData->CodecSpecific.vp9 = m_PicData;
+    m_pVkPictureData->numSlices = m_PicData.numTiles;
+    m_pVkPictureData->bitstreamDataLen = (framesize + addOffset + m_bufferSizeAlignment - 1) & ~(m_bufferSizeAlignment - 1); // buffer is already aligned so, no issues.
+    m_pVkPictureData->bitstreamData = m_bitstreamData.GetBitstreamBuffer();
+    m_pVkPictureData->bitstreamDataOffset = m_nalu.start_offset & ~((int64_t)m_bufferOffsetAlignment - 1);
+
+    if (!BeginPicture(m_pVkPictureData)) {
+        assert(!"BeginPicture failed");
+        return false;
     }
 
-    for (i = 0; i < 2; ++i) {
-      for (j = 0; j < CLASS0_SIZE; ++j) {
-        for (k = 0; k < 3; ++k)
-          update_nmv( &mvctx->class0_fp[i][j][k], VP9_NMV_UPDATE_PROB);
-      }
-      for (j = 0; j < 3; ++j) {
-        update_nmv( &mvctx->fp[i][j], VP9_NMV_UPDATE_PROB);
-      }
+    bool bSkipped = false;
+    if (m_pClient != nullptr) {
+        // Notify client
+        if (!m_pClient->DecodePicture(m_pVkPictureData)) {
+            bSkipped = true;
+            // WARNING: skipped decoding current picture;
+        } else {
+            m_nCallbackEventCount++;
+        }
+    } else {
+        // WARNING: no valid render target for current picture
     }
 
-    if (pProbSetup->allow_high_precision_mv) {
-      for (i = 0; i < 2; ++i) {
-        update_nmv( &mvctx->class0_hp[i], VP9_NMV_UPDATE_PROB);
-        update_nmv( &mvctx->hp[i], VP9_NMV_UPDATE_PROB);
-      }
+    //m_PicData.prevIsKeyFrame = m_PicData.keyFrame;
+    //m_PicData.PrevShowFrame  = m_PicData.showFrame;
+    UpdateFramePointers(m_pCurrPic);
+
+    if (m_PicData.stdPictureInfo.flags.show_frame && !bSkipped) {
+        // Call back codec for post-decode event (display the decoded frame)
+        AddBuffertoOutputQueue(m_pCurrPic);
+        m_pCurrPic = nullptr;
+    } else {
+        m_pCurrPic->Release();
+        m_pCurrPic = nullptr;
     }
 
-    return (OK);
+    return 1;
 }
 
-uint32_t  VulkanVP9Decoder::vp9hwdDecodeCoeffUpdate(
-        uint8_t probCoeffs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1])
+void VulkanVP9Decoder::UpdateFramePointers(VkPicIf* currentPicture)
 {
-    uint32_t i, j, k, l, m;
-    uint32_t tmp;
-    tmp = vp9_read_literal( 1);
-    if (!tmp) return OK;
-    for( i = 0; i < VP9_BLOCK_TYPES; i++ )
-    {
-        for ( j = 0; j < VP9_REF_TYPES; j++ )
-        {
-            for ( k = 0; k < VP9_COEF_BANDS; k++ )
-            {
-                for ( l = 0; l < VP9_PREV_COEF_CONTEXTS; l++ )
-                {
-                    if (l >= 3 && k == 0)
-                        continue;
-
-                    for ( m = 0; m < UNCONSTRAINED_NODES; m++ )
-                    {
-                        tmp = vp9_read( 252);
-                        CHECK_END_OF_STREAM(tmp);
-                        if ( tmp )
-                        {
-                            uint8_t old, latest;
-                            old = probCoeffs[i][j][k][l][m];
-                            latest = vp9hwdReadProbDiffUpdate( old);
-                            CHECK_END_OF_STREAM(tmp);
-
-                            probCoeffs[i][j][k][l][m] = latest;
-                        }
-                    }
-                }
+    StdVideoDecodeVP9PictureInfo* const pStdPicInfo = &m_PicData.stdPictureInfo;
+
+    uint32_t mask, ref_index = 0;
+
+    for (mask = pStdPicInfo->refresh_frame_flags; mask; mask >>= 1) {
+        if (mask & 1) {
+            if (m_pBuffers[ref_index].buffer) {
+                m_pBuffers[ref_index].buffer->Release();
+            }
+            m_pBuffers[ref_index].buffer = currentPicture;
+
+            if (m_pBuffers[ref_index].buffer) {
+                m_pBuffers[ref_index].buffer->AddRef();
             }
         }
+        ++ref_index;
     }
-    return (OK);
-}
 
-int32_t VulkanVP9Decoder::get_unsigned_bits(uint32_t num_values)
-{
-    int32_t cat = 0;
-    if (num_values <= 1)
-        return 0;
-    num_values--;
-    while(num_values > 0)
-    {
-        cat++;
-        num_values >>= 1;
-    }
-    return cat;
+    // Invalidate these references until the next frame starts.
+    //for (int i = 0; i < ALLOWED_REFS_PER_FRAME; i++) {
+    //    pFrameInfo->activeRefIdx[i] = 0xffff;
+    //}
 }
 
-uint32_t VulkanVP9Decoder::BoolDecodeUniform( uint32_t n)
+bool VulkanVP9Decoder::AddBuffertoOutputQueue(VkPicIf* pDispPic)
 {
-    int32_t value, v;
-    int32_t l = get_unsigned_bits(n);
-    int32_t m = (1 << l) - n;
-    if (!l) return 0;
-    value = vp9_read_literal( l - 1);
-    if (value >= m) {
-        v = vp9_read_literal( 1);
-        value = (value << 1) - m + v;
-    }
-    return value;
+    AddBuffertoDispQueue(pDispPic);
+    lEndPicture(pDispPic);
+
+    return true;
 }
 
-uint32_t VulkanVP9Decoder::vp9hwdDecodeSubExp( uint32_t k, uint32_t num_syms)
+void VulkanVP9Decoder::AddBuffertoDispQueue(VkPicIf* pDispPic)
 {
-    uint32_t i=0, mk=0, value=0;
-    while (1) {
-        int32_t b = (i ? k + i - 1 : k);
-        uint32_t a = (1 << b);
-        if (num_syms <= mk + 3 * a) {
-            value = BoolDecodeUniform( num_syms - mk) + mk;
+    int lDisp = 0;
+
+    // Find an entry in m_DispInfo
+    for (int i = 0; i < MAX_DELAY; i++) {
+        if (m_DispInfo[i].pPicBuf == pDispPic) {
+            lDisp = i;
             break;
-        } else {
-            value = vp9_read_bit();
-            if (value) {
-                i++;
-                mk += a;
-            } else {
-                value = vp9_read_literal( b) + mk;
-                break;
-            }
+        }
+        if ((m_DispInfo[i].pPicBuf == nullptr)
+            || ((m_DispInfo[lDisp].pPicBuf != nullptr) && (m_DispInfo[i].llPTS - m_DispInfo[lDisp].llPTS < 0))) {
+            lDisp = i;
         }
     }
-    return value;
-}
+    m_DispInfo[lDisp].pPicBuf = pDispPic;
+    m_DispInfo[lDisp].bSkipped = false;
+    m_DispInfo[lDisp].lPOC = 0;
+    m_DispInfo[lDisp].lNumFields = 2;
 
-int32_t VulkanVP9Decoder::merge_index(int32_t v, int32_t n, int32_t modulus)
-{
-    int32_t max1 = (n - 1 - modulus / 2) / modulus + 1;
-    if (v < max1) v = v * modulus + modulus / 2;
-    else
-    {
-        int32_t w;
-        v -= max1;
-        w = v;
-        v += (v + modulus - modulus / 2) / modulus;
-        while (v % modulus == modulus / 2 ||
-            w != v - (v + modulus - modulus / 2) / modulus) v++;
+    // Find a PTS in the list
+    unsigned int ndx = m_lPTSPos;
+    m_DispInfo[lDisp].llPTS = m_llExpectedPTS; // Will be updated later on
+
+    for (int k = 0; k < MAX_QUEUED_PTS; k++) {
+        if ((m_PTSQueue[ndx].bPTSValid) && (m_PTSQueue[ndx].llPTSPos - m_llFrameStartLocation <= (m_bNoStartCodes?0:3))) {
+            m_DispInfo[lDisp].bPTSValid = true;
+            m_DispInfo[lDisp].llPTS = m_PTSQueue[ndx].llPTS;
+            m_PTSQueue[ndx].bPTSValid = false;
+        }
+        ndx = (ndx + 1) % MAX_QUEUED_PTS;
     }
-    return v;
 }
 
-int32_t VulkanVP9Decoder::vp9_inv_recenter_nonneg(int32_t v, int32_t m)
+void VulkanVP9Decoder::lEndPicture(VkPicIf* pDispPic)
 {
-    if (v > (m << 1)) return v;
-    else if ((v & 1) == 0) return (v >> 1) + m;
-    else return m - ((v + 1) >> 1);
-}
+    if (pDispPic) {
+        display_picture(pDispPic);
+        pDispPic->Release();
+    }
 
-int32_t VulkanVP9Decoder::inv_remap_prob(int32_t v, int32_t m)
-{
-    const int32_t n = 255;
-    v = merge_index(v, n - 1, MODULUS_PARAM);
-    m--;
-    if ((m << 1) <= n)
-        return 1 + vp9_inv_recenter_nonneg(v + 1, m);
-    else
-        return n - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
 }
 
-vp9_prob VulkanVP9Decoder::vp9hwdReadProbDiffUpdate( uint8_t oldp)
+
+bool VulkanVP9Decoder::ParseUncompressedHeader()
 {
-    int32_t p;
-    int32_t delp = vp9hwdDecodeSubExp( 4, 255 );
-    p = (vp9_prob)inv_remap_prob(delp, oldp);
-    return p;
-}
+    VkParserVp9PictureData *pPicData = &m_PicData;
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig;
+    StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter;
+    m_frameSizeChanged = false;
 
-//Backward update
+    VP9_CHECK_FRAME_MARKER;
 
+    uint32_t profile = u(1);
+    profile |= u(1) << 1;
+    pStdPicInfo->profile = (StdVideoVP9Profile)profile;
+    if (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3) {
+        if (u(1) != 0) {
+            assert(!"Invalid syntax");
+            return false;
+        }
+    }
 
-// this function assumes prob1 and prob2 are already within [1,255] range
-vp9_prob VulkanVP9Decoder::weighted_prob(int32_t prob1, int32_t prob2, int32_t factor)
-{
-    return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
-}
+    pPicData->show_existing_frame = u(1);
+    if (pPicData->show_existing_frame) {
+        pPicData->frame_to_show_map_idx = u(3);
+        //U32 frame_to_show = vp9parser->m_pBuffers[idx_to_show];
+        //Handle direct show:   CHECK
+        pPicData->uncompressedHeaderOffset = (consumed_bits() + 7) >> 3;
+        pPicData->compressedHeaderSize = 0;
+        pStdPicInfo->refresh_frame_flags = 0;
+        pStdLoopFilter->loop_filter_level = 0;
+        return true;
+    }
 
-vp9_prob VulkanVP9Decoder::clip_prob(uint32_t p)
-{
-    return (vp9_prob)((p > 255) ? 255u : (p < 1) ? 1u : p);
-}
+    pStdPicInfo->frame_type = (StdVideoVP9FrameType)u(1);
+    pStdPicInfo->flags.show_frame = u(1);
+    pStdPicInfo->flags.error_resilient_mode = u(1);
 
-vp9_prob VulkanVP9Decoder::get_prob(uint32_t num, uint32_t den)
-{
-    return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den);
-}
+    if (pStdPicInfo->frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY) {
+        VP9_CHECK_FRAME_SYNC_CODE;
+        ParseColorConfig();
+        ParseFrameAndRenderSize();
+        pStdPicInfo->refresh_frame_flags = (1 << STD_VIDEO_VP9_NUM_REF_FRAMES) - 1;
+        pPicData->FrameIsIntra = true;
 
-vp9_prob VulkanVP9Decoder::get_binary_prob(uint32_t n0, uint32_t n1)
-{
-    return get_prob(n0, n0 + n1);
-}
+        for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) {
+            pPicData->ref_frame_idx[i] = 0;
+        }
+    } else { // non key frame
+        pStdPicInfo->flags.intra_only = pStdPicInfo->flags.show_frame ? 0 : u(1);
+        pPicData->FrameIsIntra = pStdPicInfo->flags.intra_only;
+        pStdPicInfo->reset_frame_context = pStdPicInfo->flags.error_resilient_mode ? 0 : u(2);
+
+        if (pStdPicInfo->flags.intra_only == 1) {
+            VP9_CHECK_FRAME_SYNC_CODE;
+            if (pStdPicInfo->profile > STD_VIDEO_VP9_PROFILE_0) {
+                ParseColorConfig();
+            } else {
+                pStdColorConfig->color_space = STD_VIDEO_VP9_COLOR_SPACE_BT_601;
+                pStdColorConfig->subsampling_x = 1;
+                pStdColorConfig->subsampling_y = 1;
+                pStdColorConfig->BitDepth = 8;
+            }
 
-uint32_t VulkanVP9Decoder::convert_distribution(uint32_t i,
-                            const vp9_tree_index * tree,
-                            vp9_prob probs[],
-                            uint32_t branch_ct[][2],
-                            const uint32_t num_events[],
-                            uint32_t tok0_offset)
-{
-    uint32_t left, right;
+            pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES); //for non key frame refresh only some
 
-    if (tree[i] <= 0)
-    {
-        left = num_events[-tree[i] - tok0_offset];
+            ParseFrameAndRenderSize();
+        } else { // inter frame
+            pStdPicInfo->refresh_frame_flags = u(STD_VIDEO_VP9_NUM_REF_FRAMES);
+
+            pStdPicInfo->ref_frame_sign_bias_mask = 0;
+            for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+                pPicData->ref_frame_idx[i] = u(3);
+                pStdPicInfo->ref_frame_sign_bias_mask |= (u(1) << (STD_VIDEO_VP9_REFERENCE_NAME_LAST_FRAME + i));
+            }
+
+            ParseFrameAndRenderSizeWithRefs();
+
+            pStdPicInfo->flags.allow_high_precision_mv = u(1);
+
+            // interpolation filter
+            bool is_filter_switchable = u(1); //mb_switchable_mcomp_filt
+            if (is_filter_switchable) {
+                pStdPicInfo->interpolation_filter = STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE;
+            } else {
+                const StdVideoVP9InterpolationFilter literal_to_filter[] = {
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH,
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP,
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP,
+                                            STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR };
+                pStdPicInfo->interpolation_filter = literal_to_filter[u(2)];
+            }
+        }
     }
-    else
-    {
-        left = convert_distribution(tree[i], tree, probs, branch_ct, num_events, tok0_offset);
+
+    if (pStdPicInfo->flags.error_resilient_mode == 0) {
+         /* Refresh entropy probs,
+         * 0 == this frame probs are used only for this frame decoding,
+         * 1 == this frame probs will be stored for future reference */
+        pStdPicInfo->flags.refresh_frame_context = u(1);
+        pStdPicInfo->flags.frame_parallel_decoding_mode = u(1);
+    } else {
+        pStdPicInfo->flags.refresh_frame_context = 0;
+        pStdPicInfo->flags.frame_parallel_decoding_mode = 1;
     }
-    if (tree[i + 1] <= 0)
-    {
-        right = num_events[-tree[i + 1] - tok0_offset];
+
+    pStdPicInfo->frame_context_idx = u(2);
+
+    if ((pPicData->FrameIsIntra == 1) || (pStdPicInfo->flags.error_resilient_mode == 1)) {
+        StdVideoVP9Segmentation* pStdSegment = &pPicData->stdSegmentation;
+        ///* Clear all previous segment data */
+        memset(pStdSegment->FeatureEnabled, 0, sizeof(pStdSegment->FeatureEnabled));
+        memset(pStdSegment->FeatureData, 0, sizeof(pStdSegment->FeatureData));
+        pStdPicInfo->frame_context_idx = 0;
     }
-    else
-    {
-        right = convert_distribution(tree[i + 1], tree, probs, branch_ct, num_events, tok0_offset);
+
+    ParseLoopFilterParams();
+    ParseQuantizationParams();
+    ParseSegmentationParams();
+    ParseTileInfo();
+
+    pPicData->compressedHeaderSize = u(16);
+
+    pPicData->uncompressedHeaderOffset = 0;
+    pPicData->compressedHeaderOffset = (consumed_bits() + 7) >> 3;
+    pPicData->tilesOffset = pPicData->compressedHeaderOffset + pPicData->compressedHeaderSize;
+
+    pPicData->ChromaFormat = (pStdColorConfig->subsampling_x == 1) && (pStdColorConfig->subsampling_y == 1) ? 1 : 0;
+    assert(pPicData->ChromaFormat); // TODO: support only YUV420
+
+    return true;
+}
+
+bool VulkanVP9Decoder::ParseColorConfig()
+{
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9ColorConfig* pStdColorConfig = &m_PicData.stdColorConfig;
+
+    if (pStdPicInfo->profile >= STD_VIDEO_VP9_PROFILE_2) {
+        pStdColorConfig->BitDepth = u(1) ? 12 : 10;
+    } else {
+        pStdColorConfig->BitDepth = 8;
+    }
+
+    pStdColorConfig->color_space = (StdVideoVP9ColorSpace)u(3);
+
+    if (pStdColorConfig->color_space != STD_VIDEO_VP9_COLOR_SPACE_RGB) {
+        pStdColorConfig->flags.color_range = u(1);
+        if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) ||
+            (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) {
+            pStdColorConfig->subsampling_x = u(1);
+            pStdColorConfig->subsampling_y = u(1);
+            VP9_CHECK_ZERO_BIT
+        } else {
+            pStdColorConfig->subsampling_x = 1;
+            pStdColorConfig->subsampling_y = 1;
+        }
+    } else {
+        pStdColorConfig->flags.color_range = 1;
+        if ((pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_1) ||
+            (pStdPicInfo->profile == STD_VIDEO_VP9_PROFILE_3)) {
+            pStdColorConfig->subsampling_x = 0;
+            pStdColorConfig->subsampling_y = 0;
+            VP9_CHECK_ZERO_BIT
+        }
     }
-    probs[i>>1] = get_binary_prob(left, right);
-    branch_ct[i>>1][0] = left;
-    branch_ct[i>>1][1] = right;
-    return left + right;
+    return true;
 }
 
-void VulkanVP9Decoder::vp9_tree_probs_from_distribution(const vp9_tree_index * tree,
-                                        vp9_prob probs          [ /* n-1 */ ],
-                                        uint32_t branch_ct       [ /* n-1 */ ] [2],
-                                        const uint32_t num_events[ /* n */ ],
-                                        uint32_t tok0_offset)
+void VulkanVP9Decoder::ParseFrameAndRenderSize()
 {
-    convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset);
+    VkParserVp9PictureData *pPicData = &m_PicData;
+
+    pPicData->FrameWidth = u(16) + 1;
+    pPicData->FrameHeight = u(16) + 1;
+
+    ComputeImageSize();
+
+    if (u(1) == 1) { // render_and_frame_size_different
+        pPicData->renderWidth = u(16) + 1;
+        pPicData->renderHeight = u(16) + 1;
+    } else {
+        pPicData->renderWidth = pPicData->FrameWidth;
+        pPicData->renderHeight = pPicData->FrameHeight;
+    }
 }
 
-void VulkanVP9Decoder::update_coef_probs(uint8_t dst_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint8_t pre_coef_probs[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][ENTROPY_NODES_PART1],
-                        uint32_t coef_counts[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES+1],
-                        uint32_t (*eob_counts)[VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS],
-                        int32_t count_sat, int32_t update_factor)
+void VulkanVP9Decoder::ParseFrameAndRenderSizeWithRefs()
 {
-    int32_t t, i, j, k, l, count;
-    uint32_t branch_ct[VP9_ENTROPY_NODES][2];
-    vp9_prob coef_probs[VP9_ENTROPY_NODES];
-    int32_t factor;
+    VkParserVp9PictureData* pPicData = &m_PicData;
 
-    //int32_t brancharr[VP9_BLOCK_TYPES][VP9_REF_TYPES][36][VP9_PREV_COEF_CONTEXTS] = {0};
-    //int32_t coeffprobarr[VP9_BLOCK_TYPES][VP9_REF_TYPES][VP9_COEF_BANDS][VP9_PREV_COEF_CONTEXTS] = {0};
-    //memset(brancharr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS);
-    //memset(coeffprobarr, 0, sizeof(int32_t)*VP9_BLOCK_TYPES*VP9_REF_TYPES*VP9_COEF_BANDS*VP9_PREV_COEF_CONTEXTS);
+    bool found_ref = false;
 
-    for (i = 0; i < VP9_BLOCK_TYPES; ++i)
-    {
-        for (j = 0; j < VP9_REF_TYPES; ++j)
-        {
-            for (k = 0; k < VP9_COEF_BANDS; ++k)
-            {
-                for (l = 0; l < VP9_PREV_COEF_CONTEXTS; ++l)
-                {
-                    if (l >= 3 && k == 0)
-                        continue;
-                    vp9_tree_probs_from_distribution(vp9_coefmodel_tree,
-                                                    coef_probs, branch_ct,
-                                                     coef_counts[i][j][k][l], 0);
-                    branch_ct[0][1] = eob_counts[i][j][k][l] - branch_ct[0][0];
-                    coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
-                    //brancharr[i][j][k][l] = branch_ct[0][1];
-                    //coeffprobarr[i][j][k][l] = coef_probs[0];
-                    for (t = 0; t < UNCONSTRAINED_NODES; ++t)
-                    {
-                        count = branch_ct[t][0] + branch_ct[t][1];
-                        count = count > count_sat ? count_sat : count;
-                        factor = (update_factor * count / count_sat);
-                        dst_coef_probs[i][j][k][l][t] = weighted_prob(pre_coef_probs[i][j][k][l][t], coef_probs[t], factor);
-                    }
-                }
+    for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; ++i) {
+        found_ref = u(1);
+        if (found_ref) {
+            VkPicIf* pRefPic = m_pBuffers[pPicData->ref_frame_idx[i]].buffer;
+            if (pRefPic != nullptr) {
+                pPicData->FrameWidth = pRefPic->decodeWidth;
+                pPicData->FrameHeight = pRefPic->decodeHeight;
+
+                ComputeImageSize();
+            }
+
+            if (u(1) == 1) { // render_and_frame_size_different
+                pPicData->renderWidth = u(16) + 1;
+                pPicData->renderHeight = u(16) + 1;
+            } else {
+                pPicData->renderWidth = pPicData->FrameWidth;
+                pPicData->renderHeight = pPicData->FrameHeight;
             }
+
+            break;
         }
     }
+    if (!found_ref) {
+        ParseFrameAndRenderSize();
+    }
 }
 
-void VulkanVP9Decoder::adaptCoefProbs(vp9_prob_update_s *pProbSetup)
+void VulkanVP9Decoder::ComputeImageSize()
 {
-    int32_t update_factor; /* denominator 256 */
-    int32_t count_sat;
+    VkParserVp9PictureData* pPicData = &m_PicData;
 
-    if(pProbSetup->keyFrame)
-    {
-        update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
-        count_sat = COEF_COUNT_SAT_KEY;
-    }
-    else if (pProbSetup->prevIsKeyFrame)
-    {
-        update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; // adapt quickly
-        count_sat = COEF_COUNT_SAT_AFTER_KEY;
-    }
-    else
-    {
-        update_factor = COEF_MAX_UPDATE_FACTOR;
-        count_sat = COEF_COUNT_SAT;
+    // compute_image_size()
+    pPicData->MiCols = (pPicData->FrameWidth + 7) >> 3;
+    pPicData->MiRows = (pPicData->FrameHeight + 7) >> 3;
+    pPicData->Sb64Cols = (pPicData->MiCols + 7) >> 3;
+    pPicData->Sb64Rows = (pPicData->MiRows + 7) >> 3;
+
+    // compute_image_size() side effects (7.2.6)
+    if (((uint32_t)m_lastFrameHeight != pPicData->FrameHeight) || ((uint32_t)m_lastFrameWidth != pPicData->FrameWidth)) {
+        m_frameSizeChanged = true;
+        pPicData->stdPictureInfo.flags.UsePrevFrameMvs = false;
+    } else { /* 2.a, 2.b */
+        bool intraOnly = pPicData->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY || pPicData->stdPictureInfo.flags.intra_only;
+        pPicData->stdPictureInfo.flags.UsePrevFrameMvs = m_lastShowFrame && /* 2.c */
+                                                         pPicData->stdPictureInfo.flags.error_resilient_mode == 0 && /* 2.d */
+                                                         !intraOnly /* 2.e */;
     }
+    m_lastFrameHeight = pPicData->FrameHeight;
+    m_lastFrameWidth = pPicData->FrameWidth;
+    m_lastShowFrame = pPicData->stdPictureInfo.flags.show_frame;
 
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs,
-                        m_PrevCtx.probCoeffs,
-                        pProbSetup->pCtxCounters->countCoeffs,
-                        pProbSetup->pCtxCounters->countEobs[TX_4X4],
-                        count_sat, update_factor);
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs8x8,
-                        m_PrevCtx.probCoeffs8x8,
-                        pProbSetup->pCtxCounters->countCoeffs8x8,
-                        pProbSetup->pCtxCounters->countEobs[TX_8X8],
-                        count_sat, update_factor);
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs16x16,
-                        m_PrevCtx.probCoeffs16x16,
-                        pProbSetup->pCtxCounters->countCoeffs16x16,
-                        pProbSetup->pCtxCounters->countEobs[TX_16X16],
-                        count_sat, update_factor);
-    update_coef_probs(pProbSetup->pProbTab->a.probCoeffs32x32,
-                        m_PrevCtx.probCoeffs32x32,
-                        pProbSetup->pCtxCounters->countCoeffs32x32,
-                        pProbSetup->pCtxCounters->countEobs[TX_32X32],
-                        count_sat, update_factor);
 }
 
-int32_t VulkanVP9Decoder::update_mode_ct(vp9_prob pre_prob, vp9_prob prob, uint32_t branch_ct[2])
+void VulkanVP9Decoder::ParseLoopFilterParams()
 {
-    int32_t factor, count = branch_ct[0] + branch_ct[1];
-    count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-    factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-    return weighted_prob(pre_prob, prob, factor);
-}
+    VkParserVp9PictureData *pPicData = &m_PicData;
+    StdVideoDecodeVP9PictureInfo *pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9LoopFilter* pStdLoopFilter = &m_PicData.stdLoopFilter;
 
-int32_t VulkanVP9Decoder::update_mode_ct2(vp9_prob pre_prob, uint32_t branch_ct[2])
-{
-    return update_mode_ct(pre_prob, get_binary_prob(branch_ct[0], branch_ct[1]), branch_ct);
-}
+    if (pPicData->FrameIsIntra || (pStdPicInfo->flags.error_resilient_mode == 1)) {
+        // setup_past_independence() for loop filter params
+        memset(m_loopFilterRefDeltas, 0, sizeof(m_loopFilterRefDeltas));
+        memset(m_loopFilterModeDeltas, 0, sizeof(m_loopFilterModeDeltas));
+        m_loopFilterRefDeltas[0] = 1;
+        m_loopFilterRefDeltas[1] = 0;
+        m_loopFilterRefDeltas[2] = -1;
+        m_loopFilterRefDeltas[3] = -1;
+    }
 
-void VulkanVP9Decoder::update_mode_probs(int32_t n_modes,
-                        const vp9_tree_index *tree, uint32_t *cnt,
-                        vp9_prob *pre_probs, vp9_prob *pre_probsB,
-                        vp9_prob *dst_probs, vp9_prob *dst_probsB,
-                        uint32_t tok0_offset)
-{
-    vp9_prob probs[MAX_PROBS];
-    uint32_t branch_ct[MAX_PROBS][2];
-    int32_t t, count, factor;
+    pStdLoopFilter->loop_filter_level =  u(6);
+    pStdLoopFilter->loop_filter_sharpness = u(3);
 
-    assert(n_modes - 1 < MAX_PROBS);
-    vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset);
-    for (t = 0; t < n_modes - 1; ++t)
-    {
-        count = branch_ct[t][0] + branch_ct[t][1];
-        count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-        factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-        if (t < 8 || dst_probsB == NULL)
-            dst_probs[t] = weighted_prob(pre_probs[t], probs[t], factor);
-        else
-            dst_probsB[t-8] = weighted_prob(pre_probsB[t-8], probs[t], factor);
+    pStdLoopFilter->flags.loop_filter_delta_enabled = u(1);
+    if (pStdLoopFilter->flags.loop_filter_delta_enabled) {
+
+        pStdLoopFilter->flags.loop_filter_delta_update = u(1);
+
+        if (pStdLoopFilter->flags.loop_filter_delta_update) {
+
+            for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) {
+                uint8_t update_ref_delta = u(1);
+                pStdLoopFilter->update_ref_delta |= update_ref_delta << i;
+                if (update_ref_delta == 1) {
+                    m_loopFilterRefDeltas[i] = u(6);
+                    if (u(1)) { // sign
+                        m_loopFilterRefDeltas[i] = -m_loopFilterRefDeltas[i];
+                    }
+                }
+            }
+
+            for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++) {
+                uint8_t update_mode_delta = u( 1);
+                pStdLoopFilter->update_mode_delta |= update_mode_delta << i;
+                if (update_mode_delta) {
+                    m_loopFilterModeDeltas[i] = u(6);
+                    if(u(1)) { // sign
+                        m_loopFilterModeDeltas[i] = -m_loopFilterRefDeltas[i];
+                    }
+                }
+            }
+        }
     }
-}
 
-void VulkanVP9Decoder::tx_counts_to_branch_counts_32x32(uint32_t *tx_count_32x32p,
-                                      uint32_t (*ct_32x32p)[2])
-{
-    ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
-    ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32];
-    ct_32x32p[1][0] = tx_count_32x32p[TX_8X8];
-    ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32];
-    ct_32x32p[2][0] = tx_count_32x32p[TX_16X16];
-    ct_32x32p[2][1] = tx_count_32x32p[TX_32X32];
+    memcpy(pStdLoopFilter->loop_filter_ref_deltas, m_loopFilterRefDeltas, sizeof(m_loopFilterRefDeltas));
+    memcpy(pStdLoopFilter->loop_filter_mode_deltas, m_loopFilterModeDeltas, sizeof(m_loopFilterModeDeltas));
 }
 
-void VulkanVP9Decoder::tx_counts_to_branch_counts_16x16(uint32_t *tx_count_16x16p,
-                                      uint32_t (*ct_16x16p)[2])
+void VulkanVP9Decoder::ParseQuantizationParams()
 {
-    ct_16x16p[0][0] = tx_count_16x16p[TX_4X4];
-    ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16];
-    ct_16x16p[1][0] = tx_count_16x16p[TX_8X8];
-    ct_16x16p[1][1] = tx_count_16x16p[TX_16X16];
+   VkParserVp9PictureData *pPicData = &m_PicData;
+   StdVideoDecodeVP9PictureInfo* pStdPicInfo = &pPicData->stdPictureInfo;
+
+    pStdPicInfo->base_q_idx = u(8);
+    pStdPicInfo->delta_q_y_dc = ReadDeltaQ();
+    pStdPicInfo->delta_q_uv_dc = ReadDeltaQ();
+    pStdPicInfo->delta_q_uv_ac = ReadDeltaQ();
 }
 
-void VulkanVP9Decoder::tx_counts_to_branch_counts_8x8(uint32_t *tx_count_8x8p,
-                                    uint32_t (*ct_8x8p)[2])
+int32_t VulkanVP9Decoder::ReadDeltaQ()
 {
-    ct_8x8p[0][0] =   tx_count_8x8p[TX_4X4];
-    ct_8x8p[0][1] =   tx_count_8x8p[TX_8X8];
+    int32_t delta;
+    if (u(1)) {
+        delta = u(4);
+        if (u(1)) {
+            delta = -delta;
+        }
+        return delta;
+    } else {
+        return 0;
+    }
 }
 
-void VulkanVP9Decoder::adaptModeProbs(vp9_prob_update_s *pProbSetup)
+void VulkanVP9Decoder::ParseSegmentationParams()
 {
-    uint32_t i, j;
-
-    for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
-        pProbSetup->pProbTab->a.intra_inter_prob[i] = update_mode_ct2(m_PrevCtx.intra_inter_prob[i], pProbSetup->pCtxCounters->intra_inter_count[i]);
-    for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-        pProbSetup->pProbTab->a.comp_inter_prob[i] = update_mode_ct2(m_PrevCtx.comp_inter_prob[i], pProbSetup->pCtxCounters->comp_inter_count[i]);
-    for (i = 0; i < REF_CONTEXTS; i++)
-        pProbSetup->pProbTab->a.comp_ref_prob[i] = update_mode_ct2(m_PrevCtx.comp_ref_prob[i], pProbSetup->pCtxCounters->comp_ref_count[i]);
-    for (i = 0; i < REF_CONTEXTS; i++)
-        for (j = 0; j < 2; j++)
-            pProbSetup->pProbTab->a.single_ref_prob[i][j] = update_mode_ct2(m_PrevCtx.single_ref_prob[i][j], pProbSetup->pCtxCounters->single_ref_count[i][j]);
-
-    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
-    {
-        update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
-                            pProbSetup->pCtxCounters->sb_ymode_counts[i],
-                            m_PrevCtx.sb_ymode_prob[i], m_PrevCtx.sb_ymode_probB[i],
-                            pProbSetup->pProbTab->a.sb_ymode_prob[i], pProbSetup->pProbTab->a.sb_ymode_probB[i], 0);
-    }
-    for (i = 0; i < VP9_INTRA_MODES; ++i)
-    {
-        update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
-                            pProbSetup->pCtxCounters->uv_mode_counts[i],
-                            m_PrevCtx.uv_mode_prob[i],
-                            m_PrevCtx.uv_mode_probB[i],
-                            pProbSetup->pProbTab->a.uv_mode_prob[i],
-                            pProbSetup->pProbTab->a.uv_mode_probB[i], 0);
-    }
-    for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
-        update_mode_probs(PARTITION_TYPES, vp9_partition_tree,
-                            pProbSetup->pCtxCounters->partition_counts[i],
-                            m_PrevCtx.partition_prob[INTER_FRAME][i], NULL,
-                            pProbSetup->pProbTab->a.partition_prob[INTER_FRAME][i], NULL, 0);
+    uint8_t segmentation_feature_bits[STD_VIDEO_VP9_SEG_LVL_MAX] = { 8, 6, 2, 0};
+    uint8_t segmentation_feature_signed[STD_VIDEO_VP9_SEG_LVL_MAX] = {1, 1, 0, 0};
 
-    if (pProbSetup->mcomp_filter_type == SWITCHABLE)
-    {
-        for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
-        {
-            update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
-                                pProbSetup->pCtxCounters->switchable_interp_counts[i],
-                                m_PrevCtx.switchable_interp_prob[i], NULL,
-                                pProbSetup->pProbTab->a.switchable_interp_prob[i], NULL, 0);
-        }
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+    StdVideoVP9Segmentation* pSegment = &m_PicData.stdSegmentation;
+
+    pSegment->flags.segmentation_update_map = 0;
+    pSegment->flags.segmentation_temporal_update = 0;
+
+    pStdPicInfo->flags.segmentation_enabled = u(1);
+    if (pStdPicInfo->flags.segmentation_enabled == 0) {
+        return;
     }
 
-    if (pProbSetup->transform_mode == TX_MODE_SELECT)
-    {
-        uint32_t branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2];
-        uint32_t branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2];
-        uint32_t branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2];
-        for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-        {
-            tx_counts_to_branch_counts_8x8(pProbSetup->pCtxCounters->tx8x8_count[i], branch_ct_8x8p);
-            for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
-            {
-                int32_t factor;
-                int32_t count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1];
-                vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0], branch_ct_8x8p[j][1]);
-                count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-                factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-                pProbSetup->pProbTab->a.tx8x8_prob[i][j] = weighted_prob(m_PrevCtx.tx8x8_prob[i][j], prob, factor);
-            }
+    pSegment->flags.segmentation_update_map = u(1);
+
+    if (pSegment->flags.segmentation_update_map == 1) {
+
+        for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++) {
+            uint8_t prob_coded = u(1);
+            pSegment->segmentation_tree_probs[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY;
         }
-        for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-        {
-            tx_counts_to_branch_counts_16x16(pProbSetup->pCtxCounters->tx16x16_count[i], branch_ct_16x16p);
-            for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
-            {
-                int32_t factor;
-                int32_t count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1];
-                vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0], branch_ct_16x16p[j][1]);
-                count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-                factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-                pProbSetup->pProbTab->a.tx16x16_prob[i][j] = weighted_prob(m_PrevCtx.tx16x16_prob[i][j], prob, factor);
+
+        pSegment->flags.segmentation_temporal_update = u(1);
+        for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++) {
+            if (pSegment->flags.segmentation_temporal_update) {
+                uint8_t prob_coded = u(1);
+                pSegment->segmentation_pred_prob[i] = (prob_coded == 1) ? u(8) : VP9_MAX_PRBABILITY;
+            } else {
+                pSegment->segmentation_pred_prob[i] = VP9_MAX_PRBABILITY;
             }
         }
-        for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-        {
-            tx_counts_to_branch_counts_32x32(pProbSetup->pCtxCounters->tx32x32_count[i], branch_ct_32x32p);
-            for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
-            {
-                int32_t factor;
-                int32_t count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1];
-                vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0], branch_ct_32x32p[j][1]);
-                count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-                factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-                pProbSetup->pProbTab->a.tx32x32_prob[i][j] = weighted_prob(m_PrevCtx.tx32x32_prob[i][j], prob, factor);
+    }
+
+    pSegment->flags.segmentation_update_data = u(1);
+    if (pSegment->flags.segmentation_update_data == 1) {
+        pSegment->flags.segmentation_abs_or_delta_update = u(1);
+
+        /* Clear all previous segment data */
+        memset(pSegment->FeatureEnabled, 0, sizeof(pSegment->FeatureEnabled));
+        memset(pSegment->FeatureData, 0, sizeof(pSegment->FeatureData));
+
+        for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) {
+            for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) {
+                uint8_t feature_enabled = u(1);
+                pSegment->FeatureEnabled[i] |= (feature_enabled << j);
+
+                if (feature_enabled == 1) {
+                    pSegment->FeatureData[i][j] = u(segmentation_feature_bits[j]);
+
+                    if (segmentation_feature_signed[j] == 1) {
+                        if (u(1) == 1) {
+                            pSegment->FeatureData[i][j] = -pSegment->FeatureData[i][j];
+                        }
+                    }
+                }
             }
         }
+
+    } // segmentation_update_data
+}
+
+uint8_t VulkanVP9Decoder::CalcMinLog2TileCols()
+{
+    VkParserVp9PictureData* pPicData = &m_PicData;
+    uint8_t minLog2 = 0;
+
+    while (((uint32_t)VP9_MAX_TILE_WIDTH_B64 << minLog2) < pPicData->Sb64Cols) {
+        minLog2++;
     }
-    for (i = 0; i < MBSKIP_CONTEXTS; ++i)
-        pProbSetup->pProbTab->a.mbskip_probs[i] = update_mode_ct2(m_PrevCtx.mbskip_probs[i],pProbSetup->pCtxCounters->mbskip_count[i]);
+
+    return minLog2;
 }
 
-void VulkanVP9Decoder::adaptModeContext(vp9_prob_update_s *pProbSetup)
+uint8_t VulkanVP9Decoder::CalcMaxLog2TileCols()
 {
-    uint32_t i, j;
-    uint32_t (*mode_ct)[VP9_INTER_MODES - 1][2] = pProbSetup->pCtxCounters->inter_mode_counts;
+    VkParserVp9PictureData* pPicData = &m_PicData;
+    uint8_t maxLog2 = 1;
 
-    for (j = 0; j < INTER_MODE_CONTEXTS; j++)
-    {
-        for (i = 0; i < VP9_INTER_MODES - 1; i++)
-        {
-            int32_t count = mode_ct[j][i][0] + mode_ct[j][i][1], factor;
-            count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count;
-            factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT);
-            pProbSetup->pProbTab->a.inter_mode_prob[j][i] = weighted_prob(m_PrevCtx.inter_mode_prob[j][i],
-                                                                        get_binary_prob(mode_ct[j][i][0], mode_ct[j][i][1]),
-                                                                        factor);
-        }
+    while ((pPicData->Sb64Cols >> maxLog2) >= VP9_MIN_TILE_WIDTH_B64) {
+        maxLog2++;
     }
+
+    return maxLog2 - 1;
 }
 
-uint32_t VulkanVP9Decoder::adapt_probs(uint32_t i,
-                            const signed char* tree,
-                            vp9_prob this_probs[],
-                            const vp9_prob last_probs[],
-                            const uint32_t num_events[])
+void VulkanVP9Decoder::ParseTileInfo()
 {
-    vp9_prob this_prob;
-    uint32_t weight;
+    VkParserVp9PictureData* pPicData = &m_PicData;
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
 
-    const uint32_t left = tree[i] <= 0 ? num_events[-tree[i]] : adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
-    const uint32_t right = tree[i + 1] <= 0 ? num_events[-tree[i + 1]] : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events);
-    weight = left + right;
-    if (weight)
-    {
-        this_prob = get_binary_prob(left, right);
-        weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight;
-        this_prob = weighted_prob(last_probs[i >> 1], this_prob, MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT);
+    uint8_t minLog2TileCols = CalcMinLog2TileCols();
+    uint8_t maxLog2TileCols = CalcMaxLog2TileCols();
+
+    pStdPicInfo->tile_cols_log2 = minLog2TileCols;
+
+    while (pStdPicInfo->tile_cols_log2 < maxLog2TileCols) {
+        if (u(1) == 1) { // increment_tile_cols_log2
+            pStdPicInfo->tile_cols_log2++;
+        } else {
+            break;
+        }
     }
-    else
-    {
-        this_prob = last_probs[i >> 1];
+
+    pStdPicInfo->tile_rows_log2 = u(1);
+    if (pStdPicInfo->tile_rows_log2 == 1) {
+        pStdPicInfo->tile_rows_log2 += u(1);
     }
-    this_probs[i >> 1] = this_prob;
-    return left + right;
+
+    pPicData->numTiles = (1 << pStdPicInfo->tile_rows_log2) * (1 << pStdPicInfo->tile_cols_log2);
 }
 
-void VulkanVP9Decoder::adapt_prob(vp9_prob *dest, vp9_prob prep, uint32_t ct[2])
+void VulkanVP9Decoder::ParseSuperFrameIndex(const uint8_t* data, uint32_t data_sz, uint32_t frame_sizes[8], uint32_t* frame_count)
 {
-    const int32_t count = std::min<int32_t>(ct[0] + ct[1], MV_COUNT_SAT);
-    if (count)
-    {
-        const vp9_prob newp = get_binary_prob(ct[0], ct[1]);
-        const int32_t factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT;
-        *dest = weighted_prob(prep, newp, factor);
+    uint8_t final_byte = data[data_sz - 1];
+    *frame_count = 0;
+
+    if ((final_byte & 0xe0) == 0xc0) {
+        const uint32_t frames = (final_byte & 0x7) + 1;
+        const uint32_t mag = ((final_byte >> 3) & 0x3) + 1;
+        const uint32_t index_sz = 2 + mag * frames;
+
+        if (data_sz >= index_sz && data[data_sz - index_sz] == final_byte) {
+            // found a valid superframe index
+            const uint8_t* x = data + data_sz - index_sz + 1;
+            for (uint32_t i = 0; i < frames; i++) {
+                uint32_t this_sz = 0;
+                for (uint32_t j = 0; j < mag; j++) {
+                    this_sz |= (*x++) << (j * 8);
+                }
+                frame_sizes[i] = this_sz;
+            }
+            *frame_count = frames;
+        }
     }
-    else
-        *dest = prep;
 }
 
-void VulkanVP9Decoder::adaptNmvProbs(vp9_prob_update_s *pProbSetup)
-{
-    uint32_t usehp = pProbSetup->allow_high_precision_mv;
-    uint32_t i, j;
-
-    adapt_probs(0, vp9_mv_joint_tree,
-                pProbSetup->pProbTab->a.nmvc.joints,
-                m_PrevCtx.nmvc.joints,
-                pProbSetup->pCtxCounters->nmvcount.joints);
-    for (i = 0; i < 2; ++i)
-    {
-        adapt_prob(&pProbSetup->pProbTab->a.nmvc.sign[i],
-                    m_PrevCtx.nmvc.sign[i],
-                    pProbSetup->pCtxCounters->nmvcount.sign[i]);
-        adapt_probs(0, vp9_mv_class_tree,
-                    pProbSetup->pProbTab->a.nmvc.classes[i],
-                    m_PrevCtx.nmvc.classes[i],
-                    pProbSetup->pCtxCounters->nmvcount.classes[i]);
-        adapt_probs(0, vp9_mv_class0_tree,
-                    pProbSetup->pProbTab->a.nmvc.class0[i],
-                    m_PrevCtx.nmvc.class0[i],
-                    pProbSetup->pCtxCounters->nmvcount.class0[i]);
-        for (j = 0; j < MV_OFFSET_BITS; ++j)
-        {
-            adapt_prob(&pProbSetup->pProbTab->a.nmvc.bits[i][j],
-            m_PrevCtx.nmvc.bits[i][j],
-            pProbSetup->pCtxCounters->nmvcount.bits[i][j]);
+bool VulkanVP9Decoder::BeginPicture(VkParserPictureData* pnvpd)
+{
+    VkParserVp9PictureData* const pPicDataVP9 = &pnvpd->CodecSpecific.vp9;
+    StdVideoVP9ColorConfig* pStdColorConfig = &pPicDataVP9->stdColorConfig;
+    StdVideoDecodeVP9PictureInfo* pStdPicInfo = &m_PicData.stdPictureInfo;
+
+    uint32_t width = pPicDataVP9->FrameWidth;
+    uint32_t height = pPicDataVP9->FrameHeight;
+
+    VkParserSequenceInfo nvsi = m_ExtSeqInfo;
+    nvsi.eCodec = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
+    nvsi.nChromaFormat = pPicDataVP9->ChromaFormat;
+    nvsi.nMaxWidth = std::max(width, pPicDataVP9->renderWidth);
+    nvsi.nMaxHeight = std::max(height, pPicDataVP9->renderHeight);
+    nvsi.nCodedWidth = width;
+    nvsi.nCodedHeight = height;
+    nvsi.nDisplayWidth = pPicDataVP9->renderWidth;
+    nvsi.nDisplayHeight = pPicDataVP9->renderHeight;
+    nvsi.lDARWidth = pPicDataVP9->renderWidth;
+    nvsi.lDARHeight = pPicDataVP9->renderHeight;
+    nvsi.bProgSeq = true; // VP9 doesn't have explicit interlaced coding.
+    nvsi.nMinNumDecodeSurfaces = 9;
+    nvsi.uBitDepthLumaMinus8 = pStdColorConfig->BitDepth - 8;
+    nvsi.uBitDepthChromaMinus8 = pStdColorConfig->BitDepth - 8;
+    nvsi.codecProfile = pStdPicInfo->profile;
+
+    // Reset decoder only if decode RT orig width is less than required coded width
+    if ((nvsi.nMaxWidth > m_rtOrigWidth) || (nvsi.nMaxHeight > m_rtOrigHeight)) {
+        m_rtOrigWidth = nvsi.nMaxWidth;
+        m_rtOrigHeight = nvsi.nMaxHeight;
+
+        for (int i = 0; i < 8; i++) {
+            if (m_pBuffers[i].buffer != nullptr) {
+                m_pBuffers[i].buffer->Release();
+                m_pBuffers[i].buffer = nullptr;
+            }
         }
-        for (j = 0; j < CLASS0_SIZE; ++j)
-        {
-        adapt_probs(0, vp9_mv_fp_tree,
-                    pProbSetup->pProbTab->a.nmvc.class0_fp[i][j],
-                    m_PrevCtx.nmvc.class0_fp[i][j],
-                    pProbSetup->pCtxCounters->nmvcount.class0_fp[i][j]);
+        if (m_pCurrPic != nullptr) {
+            m_pCurrPic->Release();
+            m_pCurrPic = nullptr;
         }
-        adapt_probs(0, vp9_mv_fp_tree,
-                    pProbSetup->pProbTab->a.nmvc.fp[i],
-                    m_PrevCtx.nmvc.fp[i],
-                    pProbSetup->pCtxCounters->nmvcount.fp[i]);
     }
-    if (usehp)
-    {
-        for (i = 0; i < 2; ++i)
-        {
-            adapt_prob(&pProbSetup->pProbTab->a.nmvc.class0_hp[i],
-                        m_PrevCtx.nmvc.class0_hp[i],
-                        pProbSetup->pCtxCounters->nmvcount.class0_hp[i]);
-            adapt_prob(&pProbSetup->pProbTab->a.nmvc.hp[i],
-                        m_PrevCtx.nmvc.hp[i],
-                        pProbSetup->pCtxCounters->nmvcount.hp[i]);
-        }
+
+    if (!init_sequence(&nvsi)) {
+        assert(!"init_sequence failed!");
+        return false;
     }
-}
 
-void VulkanVP9Decoder::UpdateBackwardProbability(vp9_prob_update_s *pProbSetup)
-{
-    if (!pProbSetup->errorResilient && !pProbSetup->FrameParallelDecoding)
-    {
-        adaptCoefProbs(pProbSetup); //vp9_adapt_coef_probs
-        if(!pProbSetup->keyFrame && !pProbSetup->intraOnly)
-        {
-            adaptModeProbs(pProbSetup); //vp9_adapt_mode_probs
-            adaptModeContext(pProbSetup);
-            adaptNmvProbs(pProbSetup); //vp9_adapt_mv_probs
-        }
+    // Allocate a buffer for the current picture
+    if (m_pCurrPic == nullptr) {
+        m_pClient->AllocPictureBuffer(&m_pCurrPic);
+        assert(m_pCurrPic);
+
+        m_pCurrPic->decodeWidth = width;
+        m_pCurrPic->decodeHeight = height;
     }
-    //vp9hwdStoreProbs
-    if (pProbSetup->RefreshEntropyProbs)
-    {
-        memcpy(&m_EntropyLast[pProbSetup->frameContextIdx], pProbSetup->pProbTab, sizeof(m_EntropyLast[pProbSetup->frameContextIdx]));
+
+    pnvpd->PicWidthInMbs = nvsi.nCodedWidth >> 4;
+    pnvpd->FrameHeightInMbs = nvsi.nCodedHeight >> 4;
+    pnvpd->pCurrPic = m_pCurrPic;
+    pnvpd->progressive_frame = 1;
+    pnvpd->ref_pic_flag = 1;
+    pnvpd->intra_pic_flag = pPicDataVP9->FrameIsIntra;
+    pnvpd->chroma_format = pPicDataVP9->ChromaFormat;
+
+    // Reference slots information
+    for (int i = 0; i < STD_VIDEO_VP9_NUM_REF_FRAMES; i++) {
+        vkPicBuffBase* pb = reinterpret_cast<vkPicBuffBase*>(m_pBuffers[i].buffer);
+        pPicDataVP9->pic_idx[i] = pb ? pb->m_picIdx : -1;
     }
-    //VP9HwdUpdateRefs
+
+    return true;
 }
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp
index 135af873..83b968ef 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanVideoDecoder.cpp
@@ -20,9 +20,6 @@
 #include "nvVulkanVideoUtils.h"
 #include "nvVulkanVideoParser.h"
 #include <algorithm>
-#ifdef ENABLE_VP9_DECODER
-#include <VulkanVP9Decoder.h>
-#endif
 
 VulkanVideoDecoder::VulkanVideoDecoder(VkVideoCodecOperationFlagBitsKHR std)
     : m_refCount(0)
@@ -646,6 +643,7 @@ void VulkanVideoDecoder::end_of_stream()
 #include "VulkanH264Decoder.h"
 #include "VulkanH265Decoder.h"
 #include "VulkanAV1Decoder.h"
+#include "VulkanVP9Decoder.h"
 
 static nvParserLogFuncType gParserLogFunc = nullptr;
 static int gLogLevel = 0;
@@ -739,12 +737,17 @@ VkResult CreateVulkanVideoDecodeParser(VkVideoCodecOperationFlagBitsKHR videoCod
         }
         nvVideoDecodeParser =  VkSharedBaseObj<VulkanAV1Decoder>(new VulkanAV1Decoder(videoCodecOperation, pParserPictureData->isAnnexB));
         break;
-#ifdef ENABLE_VP9_DECODER
     case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
-        // TODO: This will not work and is only here as a placeholder to get the compiler to include and link the class.
+        if ((pStdExtensionVersion == nullptr) ||
+                (0 != strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME)) ||
+                (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) {
+             nvParserErrorLog("The requested decoder VP9 Codec STD version is NOT supported\n");
+             nvParserErrorLog("The supported decoder VP9 Codec STD version is verion %d of %s\n",
+                    VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME);
+             return VK_ERROR_INCOMPATIBLE_DRIVER;
+        }
         nvVideoDecodeParser =  VkSharedBaseObj<VulkanVP9Decoder>(new VulkanVP9Decoder(videoCodecOperation));
         break;
-#endif
     default:
         nvParserErrorLog("Unsupported codec type!!!\n");
     }
diff --git a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp
index 37888fcc..c93a5141 100644
--- a/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp
+++ b/vk_video_decoder/libs/VkDecoderUtils/FFmpegDemuxer.cpp
@@ -140,6 +140,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
                 bsf = av_bsf_get_by_name("hevc_mp4toannexb");
             } else if (videoCodec == AV_CODEC_ID_AV1) {
                 bsf = av_bsf_get_by_name("av1_metadata");
+            } else if (videoCodec == AV_CODEC_ID_VP9) {
+                bsf = av_bsf_get_by_name("vp9_metadata");
             }
 
             if (!bsf) {
@@ -286,6 +288,10 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
                 videoCodecId = AV_CODEC_ID_H264;
             } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
                 videoCodecId = AV_CODEC_ID_HEVC;
+            } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
+                videoCodecId = AV_CODEC_ID_AV1;
+            } else if (codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+                videoCodecId = AV_CODEC_ID_VP9;
             }
         }
 
@@ -307,12 +313,8 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
         case AV_CODEC_ID_H264       : return VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
         case AV_CODEC_ID_HEVC       : return VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
         case AV_CODEC_ID_VP8        : assert(false); return VkVideoCodecOperationFlagBitsKHR(0);
-    #ifdef VK_EXT_video_decode_vp9
         case AV_CODEC_ID_VP9        : return VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR;
-    #endif // VK_EXT_video_decode_vp9
-    #ifdef vulkan_video_codec_av1std_decode
         case AV_CODEC_ID_AV1        : return VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
-    #endif
         case AV_CODEC_ID_MJPEG      : assert(false); return VkVideoCodecOperationFlagBitsKHR(0);
         default                     : assert(false); return VkVideoCodecOperationFlagBitsKHR(0);
         }
@@ -365,6 +367,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
         case AV_PIX_FMT_YUVJ420P:    ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting color_range
         case AV_PIX_FMT_YUV420P:     ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
         case AV_PIX_FMT_YUV420P10LE: ///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+        case AV_PIX_FMT_YUV420P12LE: ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
         case AV_PIX_FMT_YUV420P16LE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
         case AV_PIX_FMT_YUV420P16BE: ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
             return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR;
@@ -391,7 +394,7 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
 
     virtual uint32_t GetProfileIdc() const
     {
-        switch (FFmpegToVkCodecOperation(videoCodec)) {
+        switch ((uint32_t)FFmpegToVkCodecOperation(videoCodec)) {
             case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
             {
                 switch(profile) {
@@ -431,6 +434,19 @@ class FFmpegDemuxer : public VideoStreamDemuxer {
                 }
             }
             break;
+            case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+            {
+                switch(profile) {
+                    case STD_VIDEO_VP9_PROFILE_0:
+                    case STD_VIDEO_VP9_PROFILE_1:
+                    case STD_VIDEO_VP9_PROFILE_2:
+                    case STD_VIDEO_VP9_PROFILE_3:
+                        break;
+                    default:
+                        std::cerr << "\nInvalid VP9 profile: " << profile << std::endl;
+                }
+            }
+            break;
             default:
                 std::cerr << "\nInvalid codec type: " << FFmpegToVkCodecOperation(videoCodec) << std::endl;
         }
diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
index 630232e7..57a2c367 100644
--- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
+++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
@@ -39,12 +39,8 @@ const char* VkVideoDecoder::GetVideoCodecString(VkVideoCodecOperationFlagBitsKHR
         { VK_VIDEO_CODEC_OPERATION_NONE_KHR, "None" },
         { VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, "AVC/H.264" },
         { VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR, "H.265/HEVC" },
-#ifdef VK_EXT_video_decode_vp9
         { VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR, "VP9" },
-#endif // VK_EXT_video_decode_vp9
-#ifdef vulkan_video_codec_av1std
         { VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR, "AV1" },
-#endif // VK_EXT_video_decode_av1
     };
 
     for (unsigned i = 0; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) {
@@ -126,6 +122,7 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
             VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR
             | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR
             | VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR
+            | VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR
     );
     assert(videoCodecs != VK_VIDEO_CODEC_OPERATION_NONE_KHR);
 
@@ -637,12 +634,12 @@ int VkVideoDecoder::CopyOptimalToLinearImage(VkCommandBuffer& commandBuffer,
     copyRegion[0].dstSubresource.layerCount = 1;
     copyRegion[1].extent.width = copyRegion[0].extent.width;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledX != 0) {
-        copyRegion[1].extent.width /= 2;
+        copyRegion[1].extent.width = (copyRegion[1].extent.width + 1) / 2;
     }
 
     copyRegion[1].extent.height = copyRegion[0].extent.height;
     if (mpInfo->planesLayout.secondaryPlaneSubsampledY != 0) {
-        copyRegion[1].extent.height /= 2;
+        copyRegion[1].extent.height = (copyRegion[1].extent.height + 1) / 2;
     }
 
     copyRegion[1].extent.depth = 1;
@@ -706,7 +703,7 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     assert(pCurrFrameDecParams->bitstreamData->GetMaxSize() >= pCurrFrameDecParams->bitstreamDataLen);
 
     pCurrFrameDecParams->decodeFrameInfo.srcBuffer = pCurrFrameDecParams->bitstreamData->GetBuffer();
-    assert(pCurrFrameDecParams->bitstreamDataOffset == 0);
+    //assert(pCurrFrameDecParams->bitstreamDataOffset == 0);
     assert(pCurrFrameDecParams->firstSliceIndex == 0);
     // TODO: Assert if bitstreamDataOffset is aligned to VkVideoCapabilitiesKHR::minBitstreamBufferOffsetAlignment
     pCurrFrameDecParams->decodeFrameInfo.srcBufferOffset = pCurrFrameDecParams->bitstreamDataOffset;
@@ -774,7 +771,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     }
 
     pCurrFrameDecParams->dpbSetupPictureResource.codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode.
-    pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent;
+    // Setup picture may have different resolution compared to previous frames in VP9
+    // So, set the codedExtent earlier in VP9 specific code and skip it here.
+    // TODO: Do the same for other codedcs
+    if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pCurrFrameDecParams->dpbSetupPictureResource.codedExtent = m_codedExtent;
+    }
 
     if (dpbSetupPictureResourceInfo.currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
         imageBarriers[numDpbBarriers] = dpbBarrierTemplates[0];
@@ -816,7 +818,14 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
         }
 
         pOutputPictureResource->codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode.
-        pOutputPictureResource->codedExtent = m_codedExtent;
+        // Setup picture may have different resolution compared to previous frames in VP9
+        // So, set the codedExtent earlier in VP9 specific code and skip it here.
+        // TODO: Do the same for other codedcs
+        if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+            pOutputPictureResource->codedExtent = m_codedExtent;
+        } else {
+            pOutputPictureResource->codedExtent = pCurrFrameDecParams->dpbSetupPictureResource.codedExtent;
+        }
 
         // For Output Distinct transition the image to DECODE_DST
         if (pOutputPictureResourceInfo->currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
@@ -909,9 +918,11 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
 
             if (pictureResourcesInfo[resId].image != VK_NULL_HANDLE) {
 
-                // FIXME: m_codedExtent should have already be populated in in the
+                // FIXME: m_codedExtent should have already be populated in the
                 // picture resource above from the FB.
-                pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent;
+                if (m_videoFormat.codec != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+                    pCurrFrameDecParams->pictureResources[resId].codedExtent = m_codedExtent;
+                }
                 // FIXME: This parameter must to be adjusted based on the interlaced mode.
                 pCurrFrameDecParams->pictureResources[resId].codedOffset = { 0, 0 };
             }
@@ -958,7 +969,9 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
     frameSynchronizationInfo.imageSpecsIndex = m_imageSpecsIndex;
 
     VkSharedBaseObj<VkVideoRefCountBase> currentVkPictureParameters;
-    if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1
+    if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        decodeBeginInfo.videoSessionParameters = VK_NULL_HANDLE;
+    } else if (m_videoFormat.codec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) { // AV1
 
         bool valid = pCurrFrameDecParams->pStdSps->GetClientObject(currentVkPictureParameters);
         assert(valid);
diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
index 129ec34a..d1646934 100644
--- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
+++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
@@ -173,6 +173,30 @@ struct nvVideoAV1PicParameters {
     nvVideoDecodeAV1DpbSlotInfo dpbRefList[nvVideoDecodeAV1DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1];
 };
 
+
+struct nvVideoDecodeVP9DpbSlotInfo
+{
+    enum {
+        // Number of reference frame types (including intra type)
+        TOTAL_REFS_PER_FRAME = 8,
+    };
+    VkExtent2D codedExtent{};
+
+    void Invalidate() { memset(this, 0x00, sizeof(*this)); }
+
+    // Set the STD data here for VP9.
+
+};
+
+struct nvVideoVP9PicParameters {
+    StdVideoDecodeVP9PictureInfo stdPictureInfo;
+    StdVideoVP9ColorConfig stdColorConfig;
+    StdVideoVP9Segmentation stdSegment;
+    StdVideoVP9LoopFilter stdLoopFilter;
+    VkVideoDecodeVP9PictureInfoKHR vkPictureInfo{ VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR, nullptr, &stdPictureInfo };
+    nvVideoDecodeVP9DpbSlotInfo dpbRefList[nvVideoDecodeVP9DpbSlotInfo::TOTAL_REFS_PER_FRAME + 1];
+};
+
 static vkPicBuffBase* GetPic(VkPicIf* pPicBuf)
 {
     return (vkPicBuffBase*)pPicBuf;
@@ -550,9 +574,9 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
             // Vulkan Video parser.cpp -- maintains its own indices.
             // We can use more indices in the parser than the spec. (Ther eis a max of 8 but we can use 16)
             // Reason for single structure for DPB -- the array is passed in the callback (in the proxy of the processor)
-            // It checks which references are in use. 
+            // It checks which references are in use.
             // 2nd Finds which DPB references were assigned before - and reuses indices.
-            // The local array maintains the 
+            // The local array maintains the
             pRefPicInfo->flags.disable_frame_end_update_cdf = ;
             pRefPicInfo->flags.segmentation_enabled = ;
             pRefPicInfo->base_q_idx = ;
@@ -574,6 +598,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
             }
         }
 
+        void setVP9PictureData(nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo,
+            VkVideoReferenceSlotInfoKHR* pReferenceSlots,
+            uint32_t dpbEntryIdx, uint32_t dpbSlotIndex)
+        {
+            // TODO: VP9 dpb management
+            assert(0);
+        }
+
     } dpbH264Entry;
 
     virtual int32_t AddRef();
@@ -685,6 +717,14 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
         VkVideoReferenceSlotInfoKHR* pReferenceSlots,
         int8_t* pGopReferenceImagesIndexes,
         int32_t* pCurrAllocatedSlotIndex);
+    uint32_t FillDpbVP9State(const VkParserPictureData* pd,
+        VkParserVp9PictureData* pin,
+        nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo,
+        StdVideoDecodeVP9PictureInfo* pStdPictureInfo,
+        uint32_t maxRefPictures,
+        VkVideoReferenceSlotInfoKHR* pReferenceSlots,
+        int8_t* pGopReferenceImagesIndexes,
+        int32_t* pCurrAllocatedSlotIndex);
 
     int8_t AllocateDpbSlotForCurrentH264(
         vkPicBuffBase* pPic, StdVideoDecodeH264PictureInfoFlags currPicFlags,
@@ -693,7 +733,8 @@ class VulkanVideoParser : public VkParserVideoDecodeClient,
                                          int8_t presetDpbSlot);
     int8_t AllocateDpbSlotForCurrentAV1(vkPicBuffBase* pPic, bool isReference,
                                          int8_t presetDpbSlot);
-    
+    int8_t AllocateDpbSlotForCurrentVP9(vkPicBuffBase* pPic, bool isReference,
+                                         int8_t presetDpbSlot);
 
 protected:
     VkSharedBaseObj<VulkanVideoDecodeParser>    m_vkParser;
@@ -944,6 +985,7 @@ VkResult VulkanVideoParser::Initialize(
     static const VkExtensionProperties h264StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION };
     static const VkExtensionProperties h265StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION };
     static const VkExtensionProperties av1StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION };
+    static const VkExtensionProperties vp9StdExtensionVersion = { VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION };
 
     const VkExtensionProperties* pStdExtensionVersion = NULL;
     if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) {
@@ -952,6 +994,8 @@ VkResult VulkanVideoParser::Initialize(
         pStdExtensionVersion = &h265StdExtensionVersion;
     } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
         pStdExtensionVersion = &av1StdExtensionVersion;
+    } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pStdExtensionVersion = &vp9StdExtensionVersion;
     } else {
         assert(!"Unsupported codec type");
         return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR;
@@ -1098,6 +1142,14 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi)
 
     if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
         maxDpbSlots = 9;
+        if ((pnvsi->nCodedWidth <= m_nvsi.nCodedWidth) && (pnvsi->nCodedHeight <= m_nvsi.nCodedHeight)) {
+            return 1;
+        }
+    } else if (pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        maxDpbSlots = 9;
+        if ((pnvsi->nMaxWidth <= m_nvsi.nMaxWidth) && (pnvsi->nMaxHeight <= m_nvsi.nMaxHeight)) {
+            return 1;
+        }
     }
 
     uint32_t configDpbSlots = (pnvsi->nMinNumDpbSlots > 0) ? pnvsi->nMinNumDpbSlots : maxDpbSlots;
@@ -1120,8 +1172,8 @@ int32_t VulkanVideoParser::BeginSequence(const VkParserSequenceInfo* pnvsi)
     }
 
     m_nvsi = *pnvsi;
-    m_nvsi.nMaxWidth = pnvsi->nCodedWidth;
-    m_nvsi.nMaxHeight = pnvsi->nCodedHeight;
+    m_nvsi.nMaxWidth = pnvsi->nMaxWidth;
+    m_nvsi.nMaxHeight = pnvsi->nMaxHeight;
 
     m_maxNumDecodeSurfaces = pnvsi->nMinNumDecodeSurfaces;
 
@@ -1814,7 +1866,7 @@ uint32_t VulkanVideoParser::FillDpbAV1State(
         uint8_t yellowSquare[] = { 0xf0, 0x9f,  0x9f, 0xa8, 0x00 };
         printf("\nSlotsInUse: ");
         for (int i = 0; i < 9; i++) {
-            printf("%-2s ", (slotsInUse & (1<<i)) ? (i == dpbSlot ? (char*)yellowSquare : (char*)greenSquare) : (char*)redSquare); 
+            printf("%-2s ", (slotsInUse & (1<<i)) ? (i == dpbSlot ? (char*)yellowSquare : (char*)greenSquare) : (char*)redSquare);
         }
         printf("\n");
     }
@@ -1822,6 +1874,160 @@ uint32_t VulkanVideoParser::FillDpbAV1State(
     return referenceIndex;
 }
 
+
+uint32_t VulkanVideoParser::FillDpbVP9State(
+        const VkParserPictureData* pd,
+        VkParserVp9PictureData* pin,
+        nvVideoDecodeVP9DpbSlotInfo* pDpbSlotInfo,
+        StdVideoDecodeVP9PictureInfo*,
+        uint32_t,
+        VkVideoReferenceSlotInfoKHR* pReferenceSlots,
+        int8_t* pGopReferenceImagesIndexes,
+        int32_t* pCurrAllocatedSlotIndex)
+{
+    assert(m_maxNumDpbSlots <= STD_VIDEO_VP9_NUM_REF_FRAMES + 1);
+    uint32_t refDpbUsedAndValidMask = 0;
+    uint32_t referenceIndex = 0;
+
+    if (m_dumpParserData) {
+        std::cout << "Ref frames data: " << std::endl;
+    }
+
+    if (m_dumpDpbData) {
+        printf(";;;; ======= VP9 DPB fill begin %d =======\n", m_nCurrentPictureID);
+        printf("ref_frame_idx: "); {
+        for (int i = 0 ; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++)
+            printf("%02d ", i);
+        }
+        printf("\nref_frame_idx: ");
+        for (int i = 0 ; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+            printf("%02d ", pin->ref_frame_idx[i]);
+        }
+        printf("\n");
+
+        printf("m_pictureToDpbSlotMap: ");
+        for (int i = 0; i < MAX_FRM_CNT; i++) {
+            printf("%02d ", i);
+        }
+        printf("\nm_pictureToDpbSlotMap: ");
+        for (int i = 0; i < MAX_FRM_CNT; i++) {
+            printf("%02d ", m_pictureToDpbSlotMap[i]);
+        }
+        printf("\n");
+
+        printf("ref_frame_picture: ");
+        for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) {
+            printf("%02d ", inIdx);
+        }
+        printf("\nref_frame_picture: ");
+        for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) {
+            int8_t picIdx = pin->pic_idx[inIdx];
+            printf("%02d ", picIdx);
+        }
+        printf("\n");
+    }
+
+    bool isKeyFrame = (pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY);
+
+    // It doesn't look like this tracking is needed.
+    int8_t activeReferences[32];
+    memset(activeReferences, 0, sizeof(activeReferences));
+    for (size_t refName = 0; refName < STD_VIDEO_VP9_REFS_PER_FRAME; refName++) {
+        int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[refName]];
+        if (picIdx < 0) {
+            //pKhr->referenceNameSlotIndices[refName] = -1;
+            continue;
+        }
+        int8_t dpbSlot = GetPicDpbSlot(picIdx);
+        assert(dpbSlot >= 0);
+        //pKhr->referenceNameSlotIndices[refName] = dpbSlot;
+        activeReferences[dpbSlot]++;
+        //hdr.delta_frame_id_minus_1[dpbSlot] = pin->delta_frame_id_minus_1[pin->ref_frame_idx[i]];
+    }
+
+    for (int32_t inIdx = 0; inIdx < STD_VIDEO_VP9_NUM_REF_FRAMES; inIdx++) {
+        int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[inIdx];
+        int8_t dpbSlot = -1;
+        if ((picIdx >= 0) && !(refDpbUsedAndValidMask & (1 << picIdx))) {
+            dpbSlot = GetPicDpbSlot(picIdx);
+
+            assert(dpbSlot >= 0); // There is still content hitting this assert.
+            if (dpbSlot < 0) {
+                continue;
+            }
+
+            refDpbUsedAndValidMask |= (1 << picIdx);
+            m_dpb[dpbSlot].MarkInUse(m_nCurrentPictureID);
+            if (activeReferences[dpbSlot] == 0) {
+                continue;
+            }
+
+            pReferenceSlots[referenceIndex].sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR;
+            pReferenceSlots[referenceIndex].pNext = nullptr;
+            pReferenceSlots[referenceIndex].slotIndex = dpbSlot;
+            pGopReferenceImagesIndexes[referenceIndex] = picIdx;
+
+            VkExtent2D &codedExtent = pDpbSlotInfo[referenceIndex].codedExtent;
+            codedExtent.width = m_dpb[dpbSlot].getPictureResource()->decodeWidth;
+            codedExtent.height = m_dpb[dpbSlot].getPictureResource()->decodeHeight;
+
+            referenceIndex++;
+        }
+    }
+
+    if (m_dumpDpbData) {
+        printf(";;; pReferenceSlots (%d): ", referenceIndex);
+        for (size_t i =0 ;i < referenceIndex; i++) {
+            printf("%02d ", pReferenceSlots[i].slotIndex);
+        }
+        printf("\n");
+    }
+
+    ResetPicDpbSlots(refDpbUsedAndValidMask);
+
+    // Take into account the reference picture now.
+    int8_t currPicIdx = GetPicIdx(pd->pCurrPic);
+    assert(currPicIdx >= 0);
+    if (currPicIdx >= 0) {
+        refDpbUsedAndValidMask |= (1 << currPicIdx);
+    }
+
+    // NOTE(charlie): Most likely we can consider isReference = refresh_frame_flags != 0;
+    // However, the AMD fw interface appears to always need a setup slot & a destination resource,
+    // so it's not clear what to properly do in that case.
+    int8_t dpbSlot = AllocateDpbSlotForCurrentAV1(GetPic(pd->pCurrPic),
+        true /* isReference */, pd->current_dpb_id);
+
+    assert(dpbSlot >= 0);
+
+    *pCurrAllocatedSlotIndex = dpbSlot;
+    assert(!(dpbSlot < 0));
+    if (dpbSlot >= 0) {
+        assert(pd->ref_pic_flag);
+    }
+
+    if (m_dumpDpbData) {
+        printf("SlotsInUse: ");
+        uint32_t slotsInUse = m_dpb.getSlotInUseMask();
+        for (int i = 0; i < 9; i++) {
+            printf("%02d ", i);
+        }
+        uint8_t greenSquare[] = { 0xf0, 0x9f,  0x9f, 0xa9, 0x00 };
+        uint8_t redSquare[] = { 0xf0, 0x9f,  0x9f, 0xa5, 0x00 };
+        uint8_t yellowSquare[] = { 0xf0, 0x9f,  0x9f, 0xa8, 0x00 };
+        printf("\nSlotsInUse: ");
+        for (int i = 0; i < 9; i++) {
+            printf("%-2s ", (slotsInUse & (1<<i)) ? (i == dpbSlot ? (char*)yellowSquare : (char*)greenSquare) : (char*)redSquare);
+        }
+        printf("\n");
+    }
+
+    return referenceIndex;
+
+    return 1;
+}
+
+
 int8_t VulkanVideoParser::AllocateDpbSlotForCurrentH264(
     vkPicBuffBase* pPic, StdVideoDecodeH264PictureInfoFlags currPicFlags,
     int8_t /*presetDpbSlot*/)
@@ -1876,7 +2082,7 @@ int8_t VulkanVideoParser::AllocateDpbSlotForCurrentAV1(vkPicBuffBase* pPic,
     if (isReference) {
         dpbSlot = GetPicDpbSlot(currPicIdx); // use the associated slot, if not allocate a new slot.
         if (dpbSlot < 0) {
-            dpbSlot = m_dpb.AllocateSlot(); 
+            dpbSlot = m_dpb.AllocateSlot();
             assert(dpbSlot >= 0);
             SetPicDpbSlot(currPicIdx, dpbSlot); // Assign the dpbSlot to the current picture index.
             m_dpb[dpbSlot].setPictureResource(pPic, m_nCurrentPictureID); // m_nCurrentPictureID is our main index.
@@ -1942,6 +2148,7 @@ bool VulkanVideoParser::DecodePicture(
     nvVideoH264PicParameters h264;
     nvVideoH265PicParameters hevc;
     nvVideoAV1PicParameters av1;
+    nvVideoVP9PicParameters vp9;
     // };
 
     if (m_decoderHandler == NULL) {
@@ -2245,7 +2452,7 @@ bool VulkanVideoParser::DecodePicture(
         }
 
         nvVideoDecodeAV1DpbSlotInfo* dpbSlotsAv1 = av1.dpbRefList;
-        pCurrFrameDecParams->numGopReferenceSlots = 
+        pCurrFrameDecParams->numGopReferenceSlots =
             FillDpbAV1State(pd,
                             pin,
                             dpbSlotsAv1,
@@ -2317,10 +2524,98 @@ bool VulkanVideoParser::DecodePicture(
         pin->tileInfo.pMiRowStarts = pin->MiRowStarts;
 
         pDecodePictureInfo->flags.applyFilmGrain = pin->std_info.flags.apply_grain;
+
+    } else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+
+        VkParserVp9PictureData* pin = &pd->CodecSpecific.vp9;
+
+        vp9 = nvVideoVP9PicParameters();
+        StdVideoDecodeVP9PictureInfo* pStdPicInfo   = &vp9.stdPictureInfo;
+        VkVideoDecodeVP9PictureInfoKHR* pVkPicInfo = &vp9.vkPictureInfo;
+        nvVideoDecodeVP9DpbSlotInfo* pNvDpbSlotInfo = vp9.dpbRefList;
+
+        // Copy std data and link pointers
+        memcpy(pStdPicInfo, &pin->stdPictureInfo, sizeof(StdVideoDecodeVP9PictureInfo));
+        memcpy(&vp9.stdColorConfig, &pin->stdColorConfig, sizeof(StdVideoVP9ColorConfig));
+        pStdPicInfo->pColorConfig = &vp9.stdColorConfig;
+        if (pStdPicInfo->flags.segmentation_enabled == 1) {
+            memcpy(&vp9.stdSegment, &pin->stdSegmentation, sizeof(StdVideoVP9Segmentation));
+            pStdPicInfo->pSegmentation = &vp9.stdSegment;
+        }
+        memcpy(&vp9.stdLoopFilter, &pin->stdLoopFilter, sizeof(StdVideoVP9LoopFilter));
+        pStdPicInfo->pLoopFilter = &vp9.stdLoopFilter;
+
+        pVkPicInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR;
+        pVkPicInfo->pStdPictureInfo = pStdPicInfo;
+
+        VkVideoDecodeInfoKHR* pKhrDecodeInfo = &pCurrFrameDecParams->decodeFrameInfo;
+        pKhrDecodeInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR;
+        pKhrDecodeInfo->pNext = pVkPicInfo;
+
+        // dpb slots
+        pCurrFrameDecParams->numGopReferenceSlots = FillDpbVP9State(pd,
+                                                                    pin,
+                                                                    pNvDpbSlotInfo,
+                                                                    pStdPicInfo,
+                                                                    9,
+                                                                    referenceSlots,
+                                                                    pCurrFrameDecParams->pGopReferenceImagesIndexes,
+                                                                    &setupReferenceSlot.slotIndex);
+
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth;
+            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight;
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+        }
+
+        if (pCurrFrameDecParams->numGopReferenceSlots) {
+            assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
+            for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
+                 dpbEntryIdx++) {
+                pCurrFrameDecParams->pictureResources[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR;
+                pCurrFrameDecParams->pictureResources[dpbEntryIdx].codedExtent = pNvDpbSlotInfo[dpbEntryIdx].codedExtent;
+                referenceSlots[dpbEntryIdx].pPictureResource = &pCurrFrameDecParams->pictureResources[dpbEntryIdx];
+            }
+
+            pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = referenceSlots;
+            pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = pCurrFrameDecParams->numGopReferenceSlots;
+        } else {
+            pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = nullptr;
+            pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
+        }
+
+        // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface.
+        //pDecodePictureInfo->videoFrameType = static_cast<uint16_t>(pin->frame_type);
+        pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan?
+
+        bool isKeyFrame = pin->stdPictureInfo.frame_type == STD_VIDEO_VP9_FRAME_TYPE_KEY;
+        for (size_t i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+            int8_t picIdx = isKeyFrame ? -1 : pin->pic_idx[pin->ref_frame_idx[i]];
+            if (picIdx < 0) {
+                pVkPicInfo->referenceNameSlotIndices[i] = -1;
+                continue;
+            }
+
+            int8_t dpbSlot = GetPicDpbSlot(picIdx);
+            assert(dpbSlot >= 0);
+            pVkPicInfo->referenceNameSlotIndices[i] = dpbSlot;
+        }
+
+        pVkPicInfo->uncompressedHeaderOffset = pin->uncompressedHeaderOffset;
+        pVkPicInfo->compressedHeaderOffset = pin->compressedHeaderOffset;
+        pVkPicInfo->tilesOffset = pin->tilesOffset;
+
+        // Use current frames with and height for display and writing to output
+        pDecodePictureInfo->displayWidth = pin->FrameWidth;
+        pDecodePictureInfo->displayHeight = pin->FrameHeight;
     }
 
-    pDecodePictureInfo->displayWidth  = m_nvsi.nDisplayWidth;
-    pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight;
+    if (m_codecType != VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        pDecodePictureInfo->displayWidth  = m_nvsi.nDisplayWidth;
+        pDecodePictureInfo->displayHeight = m_nvsi.nDisplayHeight;
+    }
 
     bRet = (m_decoderHandler->DecodePictureWithParameters(pCurrFrameDecParams, pDecodePictureInfo) >= 0);
 
@@ -2405,6 +2700,11 @@ VkResult vulkanCreateVideoParser(
             assert(!"Decoder AV1 Codec version is NOT supported");
             return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR;
         }
+    } else if (videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
+        if (!pStdExtensionVersion || strcmp(pStdExtensionVersion->extensionName, VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME) || (pStdExtensionVersion->specVersion != VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION)) {
+            assert(!"Decoder VP9 Codec version is NOT supported");
+            return VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR;
+        }
     } else {
         assert(!"Decoder Codec is NOT supported");
         return VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR;
diff --git a/vk_video_decoder/src/vulkan_video_decoder.cpp b/vk_video_decoder/src/vulkan_video_decoder.cpp
index e26115cd..a0018a46 100644
--- a/vk_video_decoder/src/vulkan_video_decoder.cpp
+++ b/vk_video_decoder/src/vulkan_video_decoder.cpp
@@ -155,16 +155,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
 
     VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR;
 
-    VkQueueFlags requestVideoEncodeQueueMask = 0;
-    if (m_decoderConfig.enableVideoEncoder) {
-        requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
-    }
-
     if (m_decoderConfig.selectVideoWithComputeQueue) {
         requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (m_decoderConfig.enableVideoEncoder) {
-            requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -172,16 +164,7 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
         requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT;
     }
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs |
-                                        (m_decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+    VkVideoCodecOperationFlagsKHR videoCodecOperation = videoStreamDemuxer->GetVideoCodec();
 
     const bool supportsShellPresent = ((!m_decoderConfig.noPresent == false) && (pWsiDisplay != nullptr));
     const bool createGraphicsQueue = supportsShellPresent ? true  : false;
@@ -196,17 +179,12 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
                                             ( VK_QUEUE_TRANSFER_BIT |
                                               requestGraphicsQueueMask |
                                               requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask |
-                                              requestVideoEncodeQueueMask),
+                                              requestVideoDecodeQueueMask),
                                             pWsiDisplay,
                                             requestVideoDecodeQueueMask,
-                                            ( VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
-                                              requestVideoEncodeQueueMask,
-                                            ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                              VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR),
+                                            videoCodecOperation,
+                                              0,
+                                            VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                             vkPhysicalDevice);
 
     if (result != VK_SUCCESS) {
@@ -216,8 +194,8 @@ VkResult VulkanVideoDecoderImpl::Initialize(VkInstance vkInstance,
     }
 
     m_vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                   m_decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues
-                                   videoCodecs,
+                                   0, // num encode queues
+                                   videoCodecOperation,
                                    // If no graphics or compute queue is requested, only video queues
                                    // will be created. Not all implementations support transfer on video queues,
                                    // so request a separate transfer queue for such implementations.
@@ -264,6 +242,7 @@ VkResult CreateVulkanVideoDecoder(VkInstance vkInstance, VkPhysicalDevice vkPhys
         case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
         case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
         case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
         {
 
         }
diff --git a/vk_video_decoder/test/vulkan-video-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-dec/Main.cpp
index 2c5d4d0a..5a02d1b3 100644
--- a/vk_video_decoder/test/vulkan-video-dec/Main.cpp
+++ b/vk_video_decoder/test/vulkan-video-dec/Main.cpp
@@ -60,6 +60,20 @@ int main(int argc, const char** argv)
         return -1;
     }
 
+
+    VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
+    result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
+                                        decoderConfig.forceParserType,
+                                        decoderConfig.enableStreamDemuxing,
+                                        decoderConfig.initialWidth,
+                                        decoderConfig.initialHeight,
+                                        decoderConfig.initialBitdepth,
+                                        videoStreamDemuxer);
+    if (result != VK_SUCCESS) {
+        assert(!"Can't initialize the VideoStreamDemuxer!");
+        return result;
+    }
+
     const int32_t numDecodeQueues = ((decoderConfig.queueId != 0) ||
                                      (decoderConfig.enableHwLoadBalancing != 0)) ?
                                      -1 : // all available HW decoders
@@ -67,16 +81,8 @@ int main(int argc, const char** argv)
 
     VkQueueFlags requestVideoDecodeQueueMask = VK_QUEUE_VIDEO_DECODE_BIT_KHR;
 
-    VkQueueFlags requestVideoEncodeQueueMask = 0;
-    if (decoderConfig.enableVideoEncoder) {
-        requestVideoEncodeQueueMask |= VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
-    }
-
     if (decoderConfig.selectVideoWithComputeQueue) {
         requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (decoderConfig.enableVideoEncoder) {
-            requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -84,16 +90,9 @@ int main(int argc, const char** argv)
         requestVideoComputeQueueMask = VK_QUEUE_COMPUTE_BIT;
     }
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoDecodeCodecs |
-                                        (decoderConfig.enableVideoEncoder ? videoEncodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+    VkVideoCodecOperationFlagsKHR videoCodec = decoderConfig.forceParserType != VK_VIDEO_CODEC_OPERATION_NONE_KHR ? 
+                                                        decoderConfig.forceParserType :
+                                                        videoStreamDemuxer->GetVideoCodec();
 
     if (!decoderConfig.noPresent) {
 
@@ -111,17 +110,12 @@ int main(int argc, const char** argv)
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT |
                                               requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask |
-                                              requestVideoEncodeQueueMask),
+                                              requestVideoDecodeQueueMask),
                                               displayShell,
                                               requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
-                                              requestVideoEncodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                              videoCodec,
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR);
         if (result != VK_SUCCESS) {
             assert(!"Can't initialize the Vulkan physical device!");
             return -1;
@@ -130,27 +124,14 @@ int main(int argc, const char** argv)
                                                   vkDevCtxt.GetPresentQueueFamilyIdx()));
 
         vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
-                                     decoderConfig.enableVideoEncoder ? 1 : 0, // num encode queues
-                                     videoCodecs,
+                                     0, // num encode queues
+                                     videoCodec,
                                      false, //  createTransferQueue
                                      true,  // createGraphicsQueue
                                      true,  // createDisplayQueue
                                      requestVideoComputeQueueMask != 0  // createComputeQueue
                                      );
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
 
         VkSharedBaseObj<VkVideoFrameOutput> frameToFile;
         if (!decoderConfig.outputFileName.empty()) {
@@ -194,8 +175,7 @@ int main(int argc, const char** argv)
         result = vkDevCtxt.InitPhysicalDevice(decoderConfig.deviceId, decoderConfig.GetDeviceUUID(),
                                               (VK_QUEUE_TRANSFER_BIT        |
                                                requestVideoDecodeQueueMask  |
-                                               requestVideoComputeQueueMask |
-                                               requestVideoEncodeQueueMask),
+                                               requestVideoComputeQueueMask),
                                               nullptr,
                                               requestVideoDecodeQueueMask);
         if (result != VK_SUCCESS) {
@@ -205,7 +185,7 @@ int main(int argc, const char** argv)
 
         result = vkDevCtxt.CreateVulkanDevice(numDecodeQueues,
                                               0,     // num encode queues
-                                              videoCodecs,
+                                              videoCodec,
                                               // If no graphics or compute queue is requested, only video queues
                                               // will be created. Not all implementations support transfer on video queues,
                                               // so request a separate transfer queue for such implementations.
@@ -219,20 +199,6 @@ int main(int argc, const char** argv)
             return -1;
         }
 
-        VkSharedBaseObj<VideoStreamDemuxer> videoStreamDemuxer;
-        result = VideoStreamDemuxer::Create(decoderConfig.videoFileName.c_str(),
-                                            decoderConfig.forceParserType,
-                                            decoderConfig.enableStreamDemuxing,
-                                            decoderConfig.initialWidth,
-                                            decoderConfig.initialHeight,
-                                            decoderConfig.initialBitdepth,
-                                            videoStreamDemuxer);
-
-        if (result != VK_SUCCESS) {
-            assert(!"Can't initialize the VideoStreamDemuxer!");
-            return result;
-        }
-
         VkSharedBaseObj<VkVideoFrameOutput> frameToFile;
         if (!decoderConfig.outputFileName.empty()) {
             const char* crcOutputFile = decoderConfig.outputcrcPerFrame ? decoderConfig.crcOutputFileName.c_str() : nullptr;
diff --git a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp
index 8b4e845b..56bba249 100644
--- a/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp
+++ b/vk_video_decoder/test/vulkan-video-simple-dec/Main.cpp
@@ -114,6 +114,8 @@ int main(int argc, const char** argv)
             break;
         case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
             break;
+        case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
+            break;
         default:
             std::cout << "Simple decoder does not support demuxing "
                       << "and the decoder type must be set with --codec <codec type>"
diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp
index 259260f5..1a589fe0 100644
--- a/vk_video_encoder/demos/vk-video-enc/Main.cpp
+++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp
@@ -52,7 +52,6 @@ int main(int argc, char** argv)
         VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
         VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
         VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,
-        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
         nullptr
     };
@@ -71,6 +70,7 @@ int main(int argc, char** argv)
         VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
         VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
         VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         nullptr
     };
 
@@ -123,17 +123,9 @@ int main(int argc, char** argv)
 
     VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
 
-    VkQueueFlags requestVideoDecodeQueueMask = 0;
-    if (encoderConfig->enableVideoDecoder) {
-        requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR |
-                                       VK_QUEUE_TRANSFER_BIT;
-    }
 
     if (encoderConfig->selectVideoWithComputeQueue) {
         requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (encoderConfig->enableVideoDecoder) {
-            requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -158,19 +150,6 @@ int main(int argc, char** argv)
         return -1;
     }
 
-
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs |
-                                        encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (VkVideoCodecOperationFlagsKHR) VK_VIDEO_CODEC_OPERATION_NONE_KHR;
-
-
     VkSharedBaseObj<VkVideoEncoder> encoder; // the encoder's instance
     if (supportsDisplay && encoderConfig->enableFramePresent) {
 
@@ -186,14 +165,11 @@ int main(int argc, char** argv)
 
         result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(),
                                               (VK_QUEUE_GRAPHICS_BIT |
-                                                      requestVideoComputeQueueMask |
-                                                      requestVideoDecodeQueueMask  |
-                                                      requestVideoEncodeQueueMask),
+                                              requestVideoComputeQueueMask |
+                                              requestVideoEncodeQueueMask),
                                               displayShell,
-                                              requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR),
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                               requestVideoEncodeQueueMask,
                                               (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
                                                VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
@@ -206,9 +182,9 @@ int main(int argc, char** argv)
         assert(displayShell->PhysDeviceCanPresent(vkDevCtxt.getPhysicalDevice(),
                                                    vkDevCtxt.GetPresentQueueFamilyIdx()));
 
-        result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues
+        result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues
                                               numEncodeQueues,   // num encode queues
-                                              videoCodecs,
+                                              encoderConfig->codec,
                                               false,             // createTransferQueue
                                               true,              // createGraphicsQueue
                                               true,              // createDisplayQueue
@@ -241,26 +217,22 @@ int main(int argc, char** argv)
         // No display presentation and no decoder - just the encoder
         result = vkDevCtxt.InitPhysicalDevice(encoderConfig->deviceId, encoderConfig->GetDeviceUUID(),
                                               (requestVideoComputeQueueMask |
-                                               requestVideoDecodeQueueMask  |
                                                requestVideoEncodeQueueMask  |
                                                VK_QUEUE_TRANSFER_BIT),
                                               nullptr,
-                                              requestVideoDecodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR),
+                                              0,
+                                              VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                               requestVideoEncodeQueueMask,
-                                              (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                               VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                              encoderConfig->codec);
         if (result != VK_SUCCESS) {
 
             assert(!"Can't initialize the Vulkan physical device!");
             return -1;
         }
 
-        result = vkDevCtxt.CreateVulkanDevice(encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues
+        result = vkDevCtxt.CreateVulkanDevice(0, // num decode queues
                                               numEncodeQueues,     // num encode queues
-                                              videoCodecs,
+                                              encoderConfig->codec,
                                               // If no graphics or compute queue is requested, only video queues
                                               // will be created. Not all implementations support transfer on video queues,
                                               // so request a separate transfer queue for such implementations.
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
index c7de5367..e826064a 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
@@ -762,7 +762,6 @@ struct EncoderConfig : public VkVideoRefCountBase {
     uint32_t verboseMsg : 1;
     uint32_t enableFramePresent : 1;
     uint32_t enableFrameDirectModePresent : 1;
-    uint32_t enableVideoDecoder : 1;
     uint32_t enableHwLoadBalancing : 1;
     uint32_t selectVideoWithComputeQueue : 1;
     uint32_t enablePreprocessComputeFilter : 1;
@@ -857,7 +856,6 @@ struct EncoderConfig : public VkVideoRefCountBase {
     , verboseMsg(false)
     , enableFramePresent(false)
     , enableFrameDirectModePresent(false)
-    , enableVideoDecoder(false)
     , enableHwLoadBalancing(false)
     , selectVideoWithComputeQueue(false)
     , enablePreprocessComputeFilter(true)
diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp
index 196caf3a..803d9da6 100644
--- a/vk_video_encoder/src/vulkan_video_encoder.cpp
+++ b/vk_video_encoder/src/vulkan_video_encoder.cpp
@@ -115,6 +115,7 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
         VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
         VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
         VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+        VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,
         nullptr
     };
 
@@ -141,17 +142,8 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
 
     VkQueueFlags requestVideoEncodeQueueMask = VK_QUEUE_VIDEO_ENCODE_BIT_KHR;
 
-    VkQueueFlags requestVideoDecodeQueueMask = 0;
-    if (m_encoderConfig->enableVideoDecoder) {
-        requestVideoDecodeQueueMask |= VK_QUEUE_VIDEO_DECODE_BIT_KHR |
-                                       VK_QUEUE_TRANSFER_BIT;
-    }
-
     if (m_encoderConfig->selectVideoWithComputeQueue) {
         requestVideoEncodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        if (m_encoderConfig->enableVideoDecoder) {
-            requestVideoDecodeQueueMask |= VK_QUEUE_COMPUTE_BIT;
-        }
     }
 
     VkQueueFlags requestVideoComputeQueueMask = 0;
@@ -162,17 +154,13 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
     // No display presentation and no decoder - just the encoder
     result = m_vkDevCtxt.InitPhysicalDevice(m_encoderConfig->deviceId, m_encoderConfig->GetDeviceUUID(),
                                             ( requestVideoComputeQueueMask |
-                                              requestVideoDecodeQueueMask  |
                                               requestVideoEncodeQueueMask  |
                                               VK_QUEUE_TRANSFER_BIT),
                                             nullptr,
-                                            requestVideoDecodeQueueMask,
-                                            (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
-                                             VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR),
+                                            0,
+                                            VK_VIDEO_CODEC_OPERATION_NONE_KHR,
                                             requestVideoEncodeQueueMask,
-                                            (VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
-                                             VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR |
-                                             VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR));
+                                            videoCodecOperation);
     if (result != VK_SUCCESS) {
 
         assert(!"Can't initialize the Vulkan physical device!");
@@ -184,21 +172,9 @@ VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR vid
                                      -1 : // all available HW encoders
                                       1;  // only one HW encoder instance
 
-    VkVideoCodecOperationFlagsKHR videoDecodeCodecs = (VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR  |
-                                                       VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoEncodeCodecs = ( VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR  |
-                                                        VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
-
-    VkVideoCodecOperationFlagsKHR videoCodecs = videoEncodeCodecs |
-                                        (m_encoderConfig->enableVideoDecoder ? videoDecodeCodecs : (uint32_t)VK_VIDEO_CODEC_OPERATION_NONE_KHR);
-
-
-    result = m_vkDevCtxt.CreateVulkanDevice(m_encoderConfig->enableVideoDecoder ? 1 : 0, // num decode queues
+    result = m_vkDevCtxt.CreateVulkanDevice(0, // num decode queues
                                             numEncodeQueues,     // num encode queues
-                                            videoCodecs,
+                                            videoCodecOperation,
                                             // If no graphics or compute queue is requested, only video queues
                                             // will be created. Not all implementations support transfer on video queues,
                                             // so request a separate transfer queue for such implementations.

From 321f336862d1d29beda15d2e7f89f10c1456eda9 Mon Sep 17 00:00:00 2001
From: Raju Konda <kraju@nvidia.com>
Date: Mon, 28 Apr 2025 00:00:58 -0700
Subject: [PATCH 7/7] decode: Include pSetupReferenceSlot in the
 VkVideoBeginCodingInfoKHR

Include pSetupReferenceSlot in the
VkVideoBeginCodingInfoKHR::pReferenceSlots list
when it is not null.
---
 .../libs/VkVideoDecoder/VkVideoDecoder.cpp    |  8 ++-
 .../libs/VkVideoParser/VulkanVideoParser.cpp  | 66 ++++++++++++-------
 2 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
index 57a2c367..6bff5ce5 100644
--- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
+++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
@@ -937,8 +937,12 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
         }
     }
 
-    decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount;
-    decodeBeginInfo.pReferenceSlots = pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots;
+    // Add setup reference slot details to decodeBeginInfo
+    decodeBeginInfo.referenceSlotCount = pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount +
+                                            (pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot ? 1 : 0);
+    decodeBeginInfo.pReferenceSlots = (pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount > 0) ?
+                                            pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots :
+                                            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot;
 
     m_imageSpecsIndex.displayOut = ((m_dpbAndOutputCoincide == VK_TRUE) &&
                                     !(pDecodePictureInfo->flags.applyFilmGrain == VK_TRUE)) ?
diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
index d1646934..bbb68e66 100644
--- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
+++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
@@ -2279,11 +2279,6 @@ bool VulkanVideoParser::DecodePicture(
             h264.stdPictureInfo.flags, &setupReferenceSlot.slotIndex);
         // TODO: Remove it is for debugging only. Reserved fields must be set to "0".
         pout->stdPictureInfo.reserved1 = pCurrFrameDecParams->numGopReferenceSlots;
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
             for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
@@ -2299,6 +2294,15 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.pReferenceSlots = NULL;
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
 
     }
     else if (m_codecType == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
@@ -2388,11 +2392,6 @@ bool VulkanVideoParser::DecodePicture(
             referenceSlots, pCurrFrameDecParams->pGopReferenceImagesIndexes,
             &setupReferenceSlot.slotIndex);
 
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
 
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
@@ -2410,6 +2409,16 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
 
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
+
         if (m_dumpParserData) {
             for (int32_t i = 0; i < HEVC_MAX_DPB_SLOTS; i++) {
                 std::cout << "\tdpbIndex: " << i;
@@ -2462,12 +2471,6 @@ bool VulkanVideoParser::DecodePicture(
                             pCurrFrameDecParams->pGopReferenceImagesIndexes,
                             &setupReferenceSlot.slotIndex);
 
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
-
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
             for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
@@ -2483,6 +2486,17 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
 
+
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
+
         // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface.
         //pDecodePictureInfo->videoFrameType = static_cast<uint16_t>(pin->frame_type);
         pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan?
@@ -2562,14 +2576,6 @@ bool VulkanVideoParser::DecodePicture(
                                                                     pCurrFrameDecParams->pGopReferenceImagesIndexes,
                                                                     &setupReferenceSlot.slotIndex);
 
-        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
-        if (setupReferenceSlot.slotIndex >= 0) {
-            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth;
-            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight;
-            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
-            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
-        }
-
         if (pCurrFrameDecParams->numGopReferenceSlots) {
             assert(pCurrFrameDecParams->numGopReferenceSlots <= (int32_t)MAX_DPB_REF_SLOTS);
             for (uint32_t dpbEntryIdx = 0; dpbEntryIdx < (uint32_t)pCurrFrameDecParams->numGopReferenceSlots;
@@ -2586,6 +2592,18 @@ bool VulkanVideoParser::DecodePicture(
             pCurrFrameDecParams->decodeFrameInfo.referenceSlotCount = 0;
         }
 
+        assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));
+        if (setupReferenceSlot.slotIndex >= 0) {
+            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.width = pin->FrameWidth;
+            pCurrFrameDecParams->dpbSetupPictureResource.codedExtent.height = pin->FrameHeight;
+            setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
+            pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
+
+            // add the setup slot to the end of referenceSlots
+            assert((uint32_t)pCurrFrameDecParams->numGopReferenceSlots < MAX_DPB_REF_AND_SETUP_SLOTS);
+            referenceSlots[pCurrFrameDecParams->numGopReferenceSlots] = setupReferenceSlot;
+        }
+
         // @review: this field seems only useful for debug display, but since AV1 needs a dword, should probably change the interface.
         //pDecodePictureInfo->videoFrameType = static_cast<uint16_t>(pin->frame_type);
         pDecodePictureInfo->viewId = 0; // @review: Doesn't seem to be used in Vulkan?