diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index dfae921c..f56d3f6c 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/checkout@v5
 
       - name: Setup Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v6
         with:
           go-version: '~1.24'
 
diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml
index 3b4fec3f..42354dfe 100644
--- a/.github/workflows/test-e2e.yml
+++ b/.github/workflows/test-e2e.yml
@@ -1,5 +1,8 @@
 name: E2E Tests
 
+permissions:
+  contents: read
+
 on:
   workflow_dispatch:
 
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1528d13e..b4be4381 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -28,13 +28,13 @@ jobs:
     strategy:
       matrix:
         # from https://github.com/kubernetes-sigs/controller-tools/blob/main/envtest-releases.yaml
-        envtest_k8s_version: [1.23.5, 1.33.0]
+        envtest_k8s_version: [1.23.5, 1.34.0]
     steps:
       - name: Clone the code
         uses: actions/checkout@v5
 
       - name: Setup Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v6
         with:
           go-version: '~1.24'
 
diff --git a/.vscode/launch.json b/.vscode/launch.json
index bce7b733..ef1ab245 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -61,7 +61,8 @@
                 "KUBECONFIG": "~/.kube/config-local-studio",
                 "ENABLE_WEBHOOKS": "false",
                 "ENABLE_SCHEDULER": "true",
-                "ENABLE_CR_CONTROLLER": "true"
+                "ENABLE_CR_CONTROLLER": "true",
+                "NVIDIA_OPERATOR_PROGRESSIVE_MIGRATION": "true"
             },
             "args": [
                 "--metrics-path", "${workspaceFolder}/logs/metrics.log",
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1285d84e..a5da5620 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -25,6 +25,7 @@
         "clientcmdapi",
         "clientgoscheme",
         "clientset",
+        "clientsetfake",
         "cloudnative",
         "cloudprovider",
         "clusterissuers",
@@ -46,6 +47,8 @@
         "envtest",
         "essd",
         "Eventf",
+        "evictable",
+        "featuregate",
         "finalizer",
         "Finalizers",
         "frameworkruntime",
@@ -78,6 +81,8 @@
         "iface",
         "imageutils",
         "influxdata",
+        "internalcache",
+        "internalqueue",
         "jsonpatch",
         "karpenter",
         "karpv",
@@ -129,6 +134,7 @@
         "schedulingconfigtemplate",
         "schedulingconfigtemplates",
         "schedulingcorev",
+        "schedv",
         "serviceaccount",
         "shirou",
         "shortuuid",
diff --git a/api/v1/gpupool_types.go b/api/v1/gpupool_types.go
index 08d139b5..ca9224c4 100644
--- a/api/v1/gpupool_types.go
+++ b/api/v1/gpupool_types.go
@@ -238,6 +238,12 @@ type QosConfig struct {
 	Definitions []QosDefinition `json:"definitions,omitempty"`
 	DefaultQoS  QoSLevel        `json:"defaultQoS,omitempty"`
 	Pricing     []QosPricing    `json:"pricing,omitempty"`
+
+	// Eviction protection price ratio applied to cost calculation during protection period
+	// This multiplier increases pricing for protected workloads to discourage preemption
+	// +optional
+	// +kubebuilder:default="1.2"
+	EvictionProtectionPriceRatio string `json:"evictionProtectionPriceRatio,omitempty"`
 }
 
 type QosDefinition struct {
diff --git a/api/v1/gpuresourcequota_types.go b/api/v1/gpuresourcequota_types.go
index 46e068b7..2b41b512 100644
--- a/api/v1/gpuresourcequota_types.go
+++ b/api/v1/gpuresourcequota_types.go
@@ -19,7 +19,7 @@ package v1
 import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
+	fwk "k8s.io/kube-scheduler/framework"
 )
 
 // GPUResourceQuotaSpec defines the desired state of GPUResourceQuota
@@ -192,6 +192,12 @@ type AllocRequest struct {
 
 	// cel filter expression
 	CELFilterExpression string
+
+	QoS QoSLevel
+}
+
+func (p *AllocRequest) Clone() fwk.StateData {
+	return p
 }
 
 type GPUAllocationInfo struct {
@@ -209,7 +215,7 @@ type AdjustRequest struct {
 	NewLimit   Resource
 }
 
-func (ar *AllocRequest) Clone() framework.StateData {
+func (ar *AdjustRequest) Clone() fwk.StateData {
 	return ar
 }
 
diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go
index 44f07bef..8611ed99 100644
--- a/api/v1/schedulingconfigtemplate_types.go
+++ b/api/v1/schedulingconfigtemplate_types.go
@@ -39,6 +39,10 @@ type SchedulingConfigTemplateSpec struct {
 	// single GPU device multi-process queuing and fair scheduling with QoS constraint
 	// +optional
 	Hypervisor *HypervisorScheduling `json:"hypervisor,omitempty"`
+
+	// enable Dynamic Resource Allocation (DRA) for GPU resource management
+	// +optional
+	DRA *DRAConfig `json:"dra,omitempty"`
 }
 
 type PlacementConfig struct {
@@ -206,6 +210,17 @@ type MultiProcessQueuing struct {
 	QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"`
 }
 
+// DRAConfig configures Dynamic Resource Allocation support
+type DRAConfig struct {
+	// Enable DRA mode for all workloads in this configuration template
+	// +optional
+	Enable *bool `json:"enable,omitempty"`
+
+	// ResourceClaimTemplateName specifies the ResourceClaim template name to use
+	// +optional
+	ResourceClaimTemplateName string `json:"resourceClaimTemplateName,omitempty"`
+}
+
 // SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.
 type SchedulingConfigTemplateStatus struct {
 	// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 9be4f47c..5699677a 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -332,6 +332,26 @@ func (in *ComputingVendorParams) DeepCopy() *ComputingVendorParams {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DRAConfig) DeepCopyInto(out *DRAConfig) {
+	*out = *in
+	if in.Enable != nil {
+		in, out := &in.Enable, &out.Enable
+		*out = new(bool)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRAConfig.
+func (in *DRAConfig) DeepCopy() *DRAConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(DRAConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GPU) DeepCopyInto(out *GPU) {
 	*out = *in
@@ -1963,6 +1983,11 @@ func (in *SchedulingConfigTemplateSpec) DeepCopyInto(out *SchedulingConfigTempla
 		*out = new(HypervisorScheduling)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.DRA != nil {
+		in, out := &in.DRA, &out.DRA
+		*out = new(DRAConfig)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateSpec.
diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
index 59de69d1..042d05c2 100644
--- a/charts/tensor-fusion/Chart.yaml
+++ b/charts/tensor-fusion/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.5.5
+version: 1.5.9
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
index 8bc65e66..2158529c 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
@@ -562,6 +562,12 @@ spec:
                           type: integer
                       type: object
                     type: array
+                  evictionProtectionPriceRatio:
+                    default: "1.2"
+                    description: |-
+                      Eviction protection price ratio applied to cost calculation during protection period
+                      This multiplier increases pricing for protected workloads to discourage preemption
+                    type: string
                   pricing:
                     items:
                       properties:
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
index 91a01eae..7c0c281b 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -143,6 +143,20 @@ spec:
                         type: string
                     type: object
                 type: object
+              dra:
+                description: enable Dynamic Resource Allocation (DRA) for GPU resource
+                  management
+                properties:
+                  enable:
+                    description: Enable DRA mode for all workloads in this configuration
+                      template
+                    type: boolean
+                  resourceClass:
+                    default: tensorfusion.ai/gpu
+                    description: ResourceClass specifies the DRA resource class name
+                      to use
+                    type: string
+                type: object
               hypervisor:
                 description: single GPU device multi-process queuing and fair scheduling
                   with QoS constraint
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
index 45bc9a47..496541bc 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
@@ -629,6 +629,12 @@ spec:
                                     type: integer
                                 type: object
                               type: array
+                            evictionProtectionPriceRatio:
+                              default: "1.2"
+                              description: |-
+                                Eviction protection price ratio applied to cost calculation during protection period
+                                This multiplier increases pricing for protected workloads to discourage preemption
+                              type: string
                             pricing:
                               items:
                                 properties:
diff --git a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
index 7fcdda1a..242d17e0 100644
--- a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
+++ b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
@@ -11,7 +11,7 @@ webhooks:
       namespace: {{ include "tensor-fusion.namespace" . }}
       path: /mutate-v1-pod
   failurePolicy: {{ .Values.controller.admissionWebhooks.failurePolicy }}
-  name: mpod-v1.kb.io
+  name: mpod.tensor-fusion.ai
   rules:
   - apiGroups:
     - ""
diff --git a/charts/tensor-fusion/templates/controller-deployment.yaml b/charts/tensor-fusion/templates/controller-deployment.yaml
index ca09a6a1..c16c4aab 100644
--- a/charts/tensor-fusion/templates/controller-deployment.yaml
+++ b/charts/tensor-fusion/templates/controller-deployment.yaml
@@ -32,6 +32,7 @@ spec:
       {{- end }}
       serviceAccountName: {{ include "tensor-fusion.serviceAccountName" . }}
       enableServiceLinks: false
+      priorityClassName: "system-cluster-critical"
       containers:
         - name: controller
           image: "{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag | default .Chart.AppVersion }}"
diff --git a/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml b/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml
index d473fcfa..2c88583b 100644
--- a/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml
+++ b/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml
@@ -45,6 +45,18 @@ data:
       costPerHour: 1.64
       fp16TFlops: 312
 
+    - model: A100_PCIe_40GB
+      fullModelName: "NVIDIA A100-PCIE-40GB"
+      vendor: NVIDIA
+      costPerHour: 1.64
+      fp16TFlops: 312
+
+    - model: A100_PCIe_80GB
+      fullModelName: "NVIDIA A100-PCIE-80GB"
+      vendor: NVIDIA
+      costPerHour: 1.64
+      fp16TFlops: 312
+
     - model: A100_SXM_40G
       fullModelName: "NVIDIA A100-SXM4-40GB"
       vendor: NVIDIA
@@ -70,13 +82,13 @@ data:
       fp16TFlops: 312
     
     - model: A800_PCIe_80G
-      fullModelName: "NVIDIA A800 80GB PCIe"
+      fullModelName: "NVIDIA A800-PCIE-80GB"
       vendor: NVIDIA
       costPerHour: 1.64
       fp16TFlops: 312
 
     - model: A800_PCIe_40G
-      fullModelName: "NVIDIA A800 40GB PCIe"
+      fullModelName: "NVIDIA A800-PCIE-40GB"
       vendor: NVIDIA
       costPerHour: 1.64
       fp16TFlops: 312  
@@ -95,7 +107,7 @@ data:
       fp16TFlops: 125
 
     - model: A40
-      fullModelName: "NVIDIA A40 48GB PCIe"
+      fullModelName: "NVIDIA A40-PCIE-48GB"
       vendor: NVIDIA
       costPerHour: 0.4
       fp16TFlops: 149.7
diff --git a/charts/tensor-fusion/templates/priorityclass.yaml b/charts/tensor-fusion/templates/priorityclass.yaml
new file mode 100644
index 00000000..e1f493b8
--- /dev/null
+++ b/charts/tensor-fusion/templates/priorityclass.yaml
@@ -0,0 +1,23 @@
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-critical
+value: 100000
+globalDefault: false
+description: "TensorFusion critical priority"
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-high
+value: 10000
+globalDefault: false
+description: "TensorFusion high priority"
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-medium
+value: 0
+globalDefault: false
+description: "TensorFusion medium priority"
diff --git a/charts/tensor-fusion/values.yaml b/charts/tensor-fusion/values.yaml
index cf4865f4..2c06aba6 100644
--- a/charts/tensor-fusion/values.yaml
+++ b/charts/tensor-fusion/values.yaml
@@ -31,7 +31,7 @@ controller:
   image:
     repository: tensorfusion/tensor-fusion-operator
     # Overrides the image tag whose default is the chart appVersion.
-    tag: "latest"
+    tag: "1.43.4"
   # This is for setting Kubernetes Annotations to a Pod.
   # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ 
   
@@ -120,7 +120,7 @@ agent:
   
   image:
     repository: tensorfusion/tensor-fusion-agent
-    tag: "latest"
+    tag: "1.0.0"
   
   resources:
     requests:
@@ -169,8 +169,8 @@ schedulerConfig:
   kind: KubeSchedulerConfiguration
   clientConnection:
     kubeconfig: ""
-    qps: 50
-    burst: 100
+    qps: 1000
+    burst: 2000
   profiles:
   # Refer: https://kubernetes.io/docs/reference/scheduling/config/
   - schedulerName: tensor-fusion-scheduler
diff --git a/cmd/main.go b/cmd/main.go
index 92021131..b0ec36e7 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -33,6 +33,7 @@ import (
 	"k8s.io/client-go/rest"
 	"k8s.io/klog/v2"
 
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
@@ -55,6 +56,7 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/controller"
+	"github.com/NexusGPU/tensor-fusion/internal/controller/dra"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	"github.com/NexusGPU/tensor-fusion/internal/portallocator"
@@ -65,6 +67,8 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/NexusGPU/tensor-fusion/internal/version"
 	webhookcorev1 "github.com/NexusGPU/tensor-fusion/internal/webhook/v1"
+	k8sVer "k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apiserver/pkg/util/feature"
 	// +kubebuilder:scaffold:imports
 )
 
@@ -99,6 +103,7 @@ var alertEvaluatorReady chan struct{}
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 	utilruntime.Must(tfv1.AddToScheme(scheme))
+	utilruntime.Must(resourcev1beta2.AddToScheme(scheme))
 	// +kubebuilder:scaffold:scheme
 }
 
@@ -204,6 +209,14 @@ func main() {
 	_ = os.Setenv(constants.KubeApiVersionMajorEnv, version.Major)
 	_ = os.Setenv(constants.KubeApiVersionMinorEnv, version.Minor)
 
+	// TODO: there will still be risk after FeatureGate removed when the feature is stable for a long time
+	// To be compatible with long-term k8s version, need to patch Kubernetes source code
+	k8sVersion := k8sVer.MustParseSemantic(version.String())
+	err = feature.DefaultMutableFeatureGate.SetEmulationVersion(k8sVersion)
+	if err != nil {
+		setupLog.Error(err, "unable to set k8s version for feature gating")
+	}
+
 	alertEvaluatorReady = make(chan struct{})
 	setupTimeSeriesAndWatchGlobalConfigChanges(ctx, mgr)
 
@@ -217,9 +230,11 @@ func main() {
 	// Initialize GPU allocator and set up watches
 	allocator, portAllocator := startTensorFusionAllocators(ctx, mgr)
 
-	startWebhook(mgr, portAllocator)
+	// Create pricing provider for webhook
+	pricingProvider := pricing.NewStaticPricingProvider()
+	startWebhook(mgr, portAllocator, pricingProvider)
 
-	scheduler := startScheduler(ctx, allocator, mgr)
+	scheduler := startScheduler(ctx, allocator, mgr, k8sVersion)
 
 	startCustomResourceController(ctx, mgr, metricsRecorder, allocator, portAllocator)
 
@@ -356,9 +371,10 @@ func startCustomResourceController(
 	}
 
 	if err = (&controller.GPUNodeReconciler{
-		Client:   mgr.GetClient(),
-		Scheme:   mgr.GetScheme(),
-		Recorder: mgr.GetEventRecorderFor("GPUNode"),
+		Client:    mgr.GetClient(),
+		Scheme:    mgr.GetScheme(),
+		Recorder:  mgr.GetEventRecorderFor("GPUNode"),
+		Allocator: allocator,
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "GPUNode")
 		os.Exit(1)
@@ -395,6 +411,23 @@ func startCustomResourceController(
 		setupLog.Error(err, "unable to create controller", "controller", "Pod")
 		os.Exit(1)
 	}
+
+	// Setup ResourceClaim controller for DRA Phase 2
+	if err = (&dra.ResourceClaimReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "ResourceClaim")
+		os.Exit(1)
+	}
+	// Setup ResourceSlice controller for DRA Phase 2
+	if err = (&dra.ResourceSliceReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "ResourceSlice")
+		os.Exit(1)
+	}
 	if err = (&controller.NodeReconciler{
 		Client:   mgr.GetClient(),
 		Scheme:   mgr.GetScheme(),
@@ -441,11 +474,15 @@ func startCustomResourceController(
 	}
 }
 
-func startWebhook(mgr manager.Manager, portAllocator *portallocator.PortAllocator) {
+func startWebhook(
+	mgr manager.Manager,
+	portAllocator *portallocator.PortAllocator,
+	pricingProvider pricing.PricingProvider,
+) {
 	if os.Getenv(constants.EnableWebhookEnv) == constants.FalseStringValue {
 		return
 	}
-	if err := webhookcorev1.SetupPodWebhookWithManager(mgr, portAllocator); err != nil {
+	if err := webhookcorev1.SetupPodWebhookWithManager(mgr, portAllocator, pricingProvider); err != nil {
 		setupLog.Error(err, "unable to create webhook", "webhook", "Pod")
 		os.Exit(1)
 	}
@@ -455,6 +492,7 @@ func startScheduler(
 	ctx context.Context,
 	allocator *gpuallocator.GpuAllocator,
 	mgr manager.Manager,
+	k8sVersion *k8sVer.Version,
 ) *scheduler.Scheduler {
 	if os.Getenv(constants.EnableSchedulerEnv) == constants.FalseStringValue {
 		return nil
@@ -473,7 +511,9 @@ func startScheduler(
 		gpuTopoPlugin.NewWithDeps(allocator, mgr.GetClient()),
 	)
 
-	cc, scheduler, err := sched.SetupScheduler(ctx, mgr, schedulerConfigPath, false, gpuResourceFitOpt, gpuTopoOpt)
+	cc, scheduler, err := sched.SetupScheduler(
+		ctx, mgr, schedulerConfigPath, false, k8sVersion, gpuResourceFitOpt, gpuTopoOpt,
+	)
 	if err != nil {
 		setupLog.Error(err, "unable to create tensor fusion scheduler")
 		os.Exit(1)
@@ -570,7 +610,7 @@ func startMetricsRecorder(
 
 		// Worker level map will be updated by cluster reconcile
 		// Key is poolName, second level key is QoS level
-		WorkerUnitPriceMap: make(map[string]map[string]metrics.RawBillingPricing),
+		WorkerUnitPriceMap: make(map[string]map[string]metrics.RawBillingPricing, 8),
 	}
 	if enableLeaderElection {
 		go func() {
diff --git a/cmd/sched/setup.go b/cmd/sched/setup.go
index 2818fba2..20b28f96 100644
--- a/cmd/sched/setup.go
+++ b/cmd/sched/setup.go
@@ -22,6 +22,8 @@ import (
 	"strings"
 
 	utilerrors "k8s.io/apimachinery/pkg/util/errors"
+	k8sVer "k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/client-go/tools/events"
 	"k8s.io/component-base/configz"
 	"k8s.io/klog/v2"
@@ -50,6 +52,7 @@ func SetupScheduler(
 	mgr manager.Manager,
 	schedulerConfigPath string,
 	disableHttpEndpoint bool,
+	k8sVersion *k8sVer.Version,
 	outOfTreeRegistryOptions ...app.Option,
 ) (*schedulerserverconfig.CompletedConfig, *scheduler.Scheduler, error) {
 	opts := options.NewOptions()
@@ -73,6 +76,12 @@ func SetupScheduler(
 		return nil, nil, err
 	}
 
+	// Setup enumerationVersion again since it's overridden by the config
+	err = feature.DefaultMutableFeatureGate.SetEmulationVersion(k8sVersion)
+	if err != nil {
+		return nil, nil, err
+	}
+
 	if cfg, err := latest.Default(); err != nil {
 		return nil, nil, err
 	} else {
diff --git a/config/crd/bases/tensor-fusion.ai_gpupools.yaml b/config/crd/bases/tensor-fusion.ai_gpupools.yaml
index 8bc65e66..2158529c 100644
--- a/config/crd/bases/tensor-fusion.ai_gpupools.yaml
+++ b/config/crd/bases/tensor-fusion.ai_gpupools.yaml
@@ -562,6 +562,12 @@ spec:
                           type: integer
                       type: object
                     type: array
+                  evictionProtectionPriceRatio:
+                    default: "1.2"
+                    description: |-
+                      Eviction protection price ratio applied to cost calculation during protection period
+                      This multiplier increases pricing for protected workloads to discourage preemption
+                    type: string
                   pricing:
                     items:
                       properties:
diff --git a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
index 91a01eae..7c0c281b 100644
--- a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -143,6 +143,20 @@ spec:
                         type: string
                     type: object
                 type: object
+              dra:
+                description: enable Dynamic Resource Allocation (DRA) for GPU resource
+                  management
+                properties:
+                  enable:
+                    description: Enable DRA mode for all workloads in this configuration
+                      template
+                    type: boolean
+                  resourceClass:
+                    default: tensorfusion.ai/gpu
+                    description: ResourceClass specifies the DRA resource class name
+                      to use
+                    type: string
+                type: object
               hypervisor:
                 description: single GPU device multi-process queuing and fair scheduling
                   with QoS constraint
diff --git a/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml b/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml
index 45bc9a47..496541bc 100644
--- a/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml
+++ b/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml
@@ -629,6 +629,12 @@ spec:
                                     type: integer
                                 type: object
                               type: array
+                            evictionProtectionPriceRatio:
+                              default: "1.2"
+                              description: |-
+                                Eviction protection price ratio applied to cost calculation during protection period
+                                This multiplier increases pricing for protected workloads to discourage preemption
+                              type: string
                             pricing:
                               items:
                                 properties:
diff --git a/config/samples/dynamic-config.yaml b/config/samples/dynamic-config.yaml
index c3102f3b..ae9350a3 100644
--- a/config/samples/dynamic-config.yaml
+++ b/config/samples/dynamic-config.yaml
@@ -1,23 +1,260 @@
 metricsTTL: 30d
 
 # default to 'influx', influx v2 line protocol
-metricsFormat: json
+metricsFormat: influx
 
-alertRules:
-- name: GPUTFlopsFull
-  query: |
-    SELECT
-      node,
-      pool,
-      uuid,
-      avg(compute_percentage) AS compute_used
-    FROM tf_gpu_usage
-    WHERE compute_percentage > {{ .Threshold }} AND {{ .Conditions }}
-    GROUP BY node, pool, uuid
-  threshold: 97
-  evaluationInterval: 30s
-  consecutiveCount: 4
-  severity: P1
-  summary: "GPU TFlops Full, used {{ .compute_used }}% on {{ .node }} {{ .uuid }}"
-  alertTargetInstance: "{{ .uuid }}"
-  description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has TFlops usage above {{ .Threshold }}% for 4 consecutive 30s, average usage: {{ .compute_used }}%"
\ No newline at end of file
+alertRules:    
+  # Worker TFlops throttled alert
+  - name: WorkerTFlopsThrottled
+    query: |
+      SELECT workload, worker, uuid, node, MAX(compute_throttled_cnt)-MIN(compute_throttled_cnt) as throttled_increase
+      FROM tf_worker_usage
+      WHERE {{ .Conditions }}
+      GROUP BY workload, worker, uuid, node
+      HAVING throttled_increase > {{ .Threshold }}
+    threshold: 0
+    evaluationInterval: 15s
+    consecutiveCount: 3
+    severity: P1
+    summary: "Worker TFlops Throttled"
+    description: "Worker {{ .worker }} from Node {{ .node }} is using more than {{ .Threshold }}% of its TFlops limit"
+    alertTargetInstance: "{{ .worker }}-{{ .uuid }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # Worker VRAM switching too frequent alert
+  - name: WorkerVRAMSwitchCountIncreasing
+    query: |
+      SELECT workload, worker, uuid, node, MAX(vram_resumed_cnt)-MIN(vram_resumed_cnt) as switch_increase
+      FROM tf_worker_usage 
+      WHERE {{ .Conditions }}
+      GROUP BY workload, worker, uuid, node
+      HAVING switch_increase > {{ .Threshold }}
+    threshold: 0
+    evaluationInterval: 2m
+    consecutiveCount: 1
+    severity: P1
+    summary: "Worker VRAM Switch Count Increasing"
+    description: "Worker {{ .worker }} from Node {{ .node }} has switched VRAM {{ .switch_increase }} times in last 2 minutes, GPU may be too hot"
+    alertTargetInstance: "{{ .worker }}-{{ .uuid }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # Worker can not scale up/scheduled alert
+  - name: WorkerAllocationFailed
+    query: |
+      SELECT pool, (MAX(total_allocation_fail_cnt) - MIN(total_allocation_fail_cnt)) as failure_increase,
+      FROM tf_system_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING failure_increase > {{ .Threshold }}
+    threshold: 0
+    evaluationInterval: 30s
+    consecutiveCount: 1
+    severity: P1
+    summary: "Worker allocation failed for GPU Pool {{ .pool }}"
+    description: "Worker allocation failed, {{ .failure_increase }} times in last 30 seconds for GPU Pool {{ .pool }}"
+    alertTargetInstance: "{{ .pool }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # Single GPU Alerts
+  
+  # GPU VRAM Full Alert
+  - name: GPUVRAMFull
+    query: |
+      SELECT
+        node,
+        pool,
+        uuid,
+        avg(memory_percentage) AS memory_used
+      FROM tf_gpu_usage
+      WHERE memory_percentage > {{ .Threshold }} AND {{ .Conditions }}
+      GROUP BY node, pool, uuid
+    threshold: 97
+    evaluationInterval: 30s
+    consecutiveCount: 2
+    severity: P1
+    summary: "GPU VRAM Full, used {{ .memory_used }}% on {{ .node }} {{ .uuid }}"
+    alertTargetInstance: "{{ .uuid }}"
+    description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has VRAM usage above {{ .Threshold }}% for 2 consecutive 30s, average usage: {{ .memory_used }}%"
+  
+  # GPU TFlops Full Alert
+  - name: GPUTFlopsFull
+    query: |
+      SELECT
+        node,
+        pool,
+        uuid,
+        avg(compute_percentage) AS compute_used
+      FROM tf_gpu_usage
+      WHERE compute_percentage > {{ .Threshold }} AND {{ .Conditions }}
+      GROUP BY node, pool, uuid
+    threshold: 97
+    evaluationInterval: 30s
+    consecutiveCount: 4
+    severity: P1
+    summary: "GPU TFlops Full, used {{ .compute_used }}% on {{ .node }} {{ .uuid }}"
+    alertTargetInstance: "{{ .uuid }}"
+    description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has TFlops usage above {{ .Threshold }}% for 4 consecutive 30s, average usage: {{ .compute_used }}%"
+  
+  # GPU Temperature alert
+  - name: GPUTemperatureHigh
+    query: |
+      SELECT
+        node,
+        pool,
+        uuid,
+        avg(temperature) AS avg_temperature
+      FROM tf_gpu_usage
+      WHERE temperature > {{ .Threshold }} AND {{ .Conditions }}
+      GROUP BY node, pool, uuid
+    threshold: 90
+    evaluationInterval: 30s
+    consecutiveCount: 3
+    severity: P1
+    summary: "GPU Temperature High, {{ .avg_temperature }}°C on {{ .node }} {{ .uuid }}"
+    alertTargetInstance: "{{ .uuid }}"
+    description: "GPU {{ .uuid }} from Node {{ .node }} has temperature above {{ .Threshold }}°C, Average temperature: {{ .avg_temperature }}, GPU Pool: {{ .pool }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # GPU Pool Alerts
+  
+  # Node TFlops allocation alert
+  - name: NodeTFlopsAllocationCritical
+    query: | 
+      SELECT node, pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 5
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P0
+    summary: "Available TFlops below threshold, remaining {{ .tflops_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+  
+  - name: NodeTFlopsAllocationWarning
+    query: | 
+      SELECT node, pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Node available TFlops below threshold, remaining {{ .tflops_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+  
+  # Pool TFlops allocation alert - Total
+  - name: PoolTotalTFlopsAllocationCritical
+    query: |
+      SELECT pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 5
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P0
+    summary: "Pool available TFlops below threshold, remaining {{ .tflops_available }}%"
+    description: "Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .pool }}"
+  
+  - name: PoolTotalTFlopsAllocationWarning
+    query: |
+      SELECT pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Pool available TFlops below threshold, remaining {{ .tflops_available }}%"
+    description: "Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .pool }}"
+  
+  # Node VRAM allocation alert
+  - name: NodeVRAMAllocationCritical
+    query: |
+      SELECT node, pool, (100 - avg(allocated_vram_percent)) as vram_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING vram_available < {{ .Threshold }}
+    threshold: 5
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Node available VRAM below threshold, remaining {{ .vram_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available VRAM below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+
+  - name: NodeVRAMAllocationWarning
+    query: |
+      SELECT node, pool, (100 - avg(allocated_vram_percent)) as vram_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING vram_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Node available VRAM below threshold, remaining {{ .vram_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available VRAM below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+  
+  # Pool VRAM allocation alert
+  - name: PoolVRAMAllocationWarning
+    query: |
+      SELECT pool, (100 - avg(allocated_vram_percent)) as vram_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING vram_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Pool available VRAM below threshold, remaining {{ .vram_available }}% for {{ .pool }}"
+    description: "Pool {{ .pool }} has available VRAM below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .pool }}"
+  
+  # Empty or Idle GPU Alert
+  - name: EmptyGPU
+    query: |
+      SELECT DISTINCT node 
+      FROM tf_node_metrics 
+      WHERE {{ .Conditions }} AND node NOT IN (
+          SELECT DISTINCT node 
+          FROM tf_worker_usage 
+          WHERE {{ .Conditions }}
+      )
+    threshold: 0
+    evaluationInterval: 5m
+    consecutiveCount: 2
+    severity: P2
+    summary: "Empty GPU without any workload, Node {{ .node }}"
+    description: "GPU Node {{ .node }} has no workload running, should be decommissioned"
+    alertTargetInstance: "{{ .node }}"
+  
+  - name: IdleGPU
+    query: |
+      SELECT node, pool, uuid, avg(compute_percentage) as compute, avg(memory_percentage) vram
+      FROM tf_gpu_usage
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool, uuid
+      HAVING compute < 1 and vram < {{ .Threshold }};
+    threshold: 5
+    evaluationInterval: 10m
+    consecutiveCount: 3
+    severity: P2
+    summary: "Idle GPU found: {{ .uuid }} on Node {{ .node }}"
+    description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has been idle for 3 consecutive 10m, compute: {{ .compute }}, vram: {{ .vram }}"
+    alertTargetInstance: "{{ .uuid }}"
diff --git a/go.mod b/go.mod
index 63af41a8..0a8a5ece 100644
--- a/go.mod
+++ b/go.mod
@@ -6,171 +6,185 @@ require (
 	github.com/DATA-DOG/go-sqlmock v1.5.2
 	github.com/NVIDIA/go-nvml v0.13.0-1
 	github.com/aliyun/alibaba-cloud-sdk-go v1.63.107
-	github.com/aws/aws-sdk-go-v2 v1.38.1
-	github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0
-	github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5
+	github.com/aws/aws-sdk-go-v2 v1.39.0
+	github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0
+	github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5
 	github.com/gin-contrib/gzip v1.2.3
 	github.com/gin-gonic/gin v1.10.1
+	github.com/go-sql-driver/mysql v1.9.3
 	github.com/google/cel-go v0.23.2
 	github.com/influxdata/line-protocol/v2 v2.2.1
 	github.com/lithammer/shortuuid/v4 v4.2.0
 	github.com/mitchellh/mapstructure v1.5.0
-	github.com/onsi/ginkgo/v2 v2.23.4
-	github.com/onsi/gomega v1.38.0
+	github.com/onsi/ginkgo/v2 v2.25.3
+	github.com/onsi/gomega v1.38.2
 	github.com/pkg/errors v0.9.1
 	github.com/samber/lo v1.51.0
 	github.com/shirou/gopsutil v3.21.11+incompatible
-	github.com/stretchr/testify v1.11.0
-	go.opentelemetry.io/otel v1.37.0
-	golang.org/x/time v0.12.0
+	github.com/stretchr/testify v1.11.1
+	go.opentelemetry.io/otel v1.38.0
+	go.uber.org/zap v1.27.0
+	golang.org/x/time v0.13.0
 	gomodules.xyz/jsonpatch/v2 v2.5.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	gorm.io/driver/mysql v1.6.0
-	gorm.io/gorm v1.30.1
-	k8s.io/api v0.33.3
-	k8s.io/apimachinery v0.33.3
-	k8s.io/client-go v0.33.3
-	k8s.io/component-base v0.33.3
-	k8s.io/component-helpers v0.33.3
+	gorm.io/gorm v1.31.0
+	k8s.io/api v0.34.1
+	k8s.io/apimachinery v0.34.1
+	k8s.io/apiserver v0.34.0
+	k8s.io/client-go v0.34.1
+	k8s.io/component-base v0.34.0
+	k8s.io/component-helpers v0.34.1
 	k8s.io/klog/v2 v2.130.1
-	k8s.io/kubernetes v1.33.3
-	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
-	sigs.k8s.io/controller-runtime v0.21.0
-	sigs.k8s.io/karpenter v1.6.1
-	sigs.k8s.io/scheduler-plugins v0.32.7
+	k8s.io/kube-scheduler v0.34.0
+	k8s.io/kubernetes v1.34.0
+	k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
+	sigs.k8s.io/controller-runtime v0.22.1
+	sigs.k8s.io/karpenter v1.6.2
 	sigs.k8s.io/yaml v1.6.0
 )
 
 require (
-	cel.dev/expr v0.23.1 // indirect
+	cel.dev/expr v0.24.0 // indirect
 	filippo.io/edwards25519 v1.1.0 // indirect
-	github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
+	github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
+	github.com/Masterminds/semver/v3 v3.4.0 // indirect
 	github.com/NYTimes/gziphandler v1.1.1 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2 // indirect
-	github.com/aws/smithy-go v1.22.5 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 // indirect
+	github.com/aws/smithy-go v1.23.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
-	github.com/bytedance/sonic v1.13.2 // indirect
-	github.com/bytedance/sonic/loader v0.2.4 // indirect
-	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/bytedance/gopkg v0.1.3 // indirect
+	github.com/bytedance/sonic v1.14.1 // indirect
+	github.com/bytedance/sonic/loader v0.3.0 // indirect
+	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.5 // indirect
+	github.com/cloudwego/base64x v0.1.6 // indirect
 	github.com/coreos/go-semver v0.3.1 // indirect
-	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
+	github.com/coreos/go-systemd/v22 v22.6.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/distribution/reference v0.6.0 // indirect
-	github.com/emicklei/go-restful/v3 v3.12.1 // indirect
+	github.com/emicklei/go-restful/v3 v3.13.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
-	github.com/fsnotify/fsnotify v1.8.0 // indirect
-	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
-	github.com/gabriel-vasile/mimetype v1.4.8 // indirect
-	github.com/gin-contrib/sse v1.0.0 // indirect
+	github.com/fsnotify/fsnotify v1.9.0 // indirect
+	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.10 // indirect
+	github.com/gin-contrib/sse v1.1.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-logr/zapr v1.3.0 // indirect
-	github.com/go-ole/go-ole v1.2.6 // indirect
-	github.com/go-openapi/jsonpointer v0.21.0 // indirect
-	github.com/go-openapi/jsonreference v0.21.0 // indirect
-	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/go-ole/go-ole v1.3.0 // indirect
+	github.com/go-openapi/jsonpointer v0.22.0 // indirect
+	github.com/go-openapi/jsonreference v0.21.1 // indirect
+	github.com/go-openapi/swag v0.24.1 // indirect
+	github.com/go-openapi/swag/cmdutils v0.24.0 // indirect
+	github.com/go-openapi/swag/conv v0.24.0 // indirect
+	github.com/go-openapi/swag/fileutils v0.24.0 // indirect
+	github.com/go-openapi/swag/jsonname v0.24.0 // indirect
+	github.com/go-openapi/swag/jsonutils v0.24.0 // indirect
+	github.com/go-openapi/swag/loading v0.24.0 // indirect
+	github.com/go-openapi/swag/mangling v0.24.0 // indirect
+	github.com/go-openapi/swag/netutils v0.24.0 // indirect
+	github.com/go-openapi/swag/stringutils v0.24.0 // indirect
+	github.com/go-openapi/swag/typeutils v0.24.0 // indirect
+	github.com/go-openapi/swag/yamlutils v0.24.0 // indirect
 	github.com/go-playground/locales v0.14.1 // indirect
 	github.com/go-playground/universal-translator v0.18.1 // indirect
-	github.com/go-playground/validator/v10 v10.26.0 // indirect
-	github.com/go-sql-driver/mysql v1.8.1 // indirect
+	github.com/go-playground/validator/v10 v10.27.0 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/goccy/go-json v0.10.5 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/btree v1.1.3 // indirect
-	github.com/google/gnostic-models v0.6.9 // indirect
+	github.com/google/cel-go v0.26.1 // indirect
+	github.com/google/gnostic-models v0.7.0 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
-	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
+	github.com/google/pprof v0.0.0-20250903194437-c28834ac2320 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
-	github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/jinzhu/now v1.1.5 // indirect
 	github.com/jmespath/go-jmespath v0.4.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.10 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/kylelemons/godebug v1.1.0 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect
-	github.com/moby/term v0.5.0 // indirect
+	github.com/moby/term v0.5.2 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
-	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
 	github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
-	github.com/pelletier/go-toml/v2 v2.2.3 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
-	github.com/prometheus/client_golang v1.22.0 // indirect
+	github.com/prometheus/client_golang v1.23.2 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
-	github.com/prometheus/common v0.62.0 // indirect
-	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/prometheus/common v0.66.1 // indirect
+	github.com/prometheus/procfs v0.17.0 // indirect
 	github.com/robfig/cron/v3 v3.0.1 // indirect
-	github.com/spf13/cobra v1.8.1 // indirect
-	github.com/spf13/pflag v1.0.6 // indirect
-	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/spf13/cobra v1.10.1 // indirect
+	github.com/spf13/pflag v1.0.10 // indirect
+	github.com/stoewer/go-strcase v1.3.1 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/ugorji/go/codec v1.3.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
-	go.etcd.io/etcd/api/v3 v3.5.21 // indirect
-	go.etcd.io/etcd/client/pkg/v3 v3.5.21 // indirect
-	go.etcd.io/etcd/client/v3 v3.5.21 // indirect
+	go.etcd.io/etcd/api/v3 v3.6.4 // indirect
+	go.etcd.io/etcd/client/pkg/v3 v3.6.4 // indirect
+	go.etcd.io/etcd/client/v3 v3.6.4 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
-	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 // indirect
-	go.opentelemetry.io/otel/metric v1.37.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.33.0 // indirect
-	go.opentelemetry.io/otel/trace v1.37.0 // indirect
-	go.opentelemetry.io/proto/otlp v1.4.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
+	go.opentelemetry.io/otel/metric v1.38.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.38.0 // indirect
+	go.opentelemetry.io/otel/trace v1.38.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.8.0 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
-	go.uber.org/zap v1.27.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
-	golang.org/x/arch v0.15.0 // indirect
-	golang.org/x/crypto v0.39.0 // indirect
-	golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
-	golang.org/x/net v0.41.0 // indirect
-	golang.org/x/oauth2 v0.27.0 // indirect
-	golang.org/x/sync v0.15.0 // indirect
-	golang.org/x/sys v0.33.0 // indirect
-	golang.org/x/term v0.32.0 // indirect
-	golang.org/x/text v0.26.0 // indirect
-	golang.org/x/tools v0.33.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d // indirect
-	google.golang.org/grpc v1.69.4 // indirect
-	google.golang.org/protobuf v1.36.6 // indirect
-	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+	go.yaml.in/yaml/v3 v3.0.4 // indirect
+	golang.org/x/arch v0.21.0 // indirect
+	golang.org/x/crypto v0.41.0 // indirect
+	golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect
+	golang.org/x/net v0.43.0 // indirect
+	golang.org/x/oauth2 v0.31.0 // indirect
+	golang.org/x/sync v0.17.0 // indirect
+	golang.org/x/sys v0.36.0 // indirect
+	golang.org/x/term v0.35.0 // indirect
+	golang.org/x/text v0.29.0 // indirect
+	golang.org/x/tools v0.36.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 // indirect
+	google.golang.org/grpc v1.75.0 // indirect
+	google.golang.org/protobuf v1.36.8 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/apiextensions-apiserver v0.33.2 // indirect
-	k8s.io/apiserver v0.33.2 // indirect
-	k8s.io/cloud-provider v0.33.2 // indirect
-	k8s.io/controller-manager v0.33.2 // indirect
-	k8s.io/csi-translation-lib v0.33.2 // indirect
-	k8s.io/dynamic-resource-allocation v0.33.1 // indirect
-	k8s.io/kms v0.33.2 // indirect
-	k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a // indirect
-	k8s.io/kube-scheduler v0.32.7 // indirect
-	k8s.io/kubelet v0.33.1 // indirect
-	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
-	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
+	k8s.io/apiextensions-apiserver v0.34.0 // indirect
+	k8s.io/cloud-provider v0.34.0 // indirect
+	k8s.io/controller-manager v0.34.0 // indirect
+	k8s.io/csi-translation-lib v0.34.0 // indirect
+	k8s.io/dynamic-resource-allocation v0.34.0 // indirect
+	k8s.io/kms v0.34.0 // indirect
+	k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 // indirect
+	k8s.io/kubelet v0.34.0 // indirect
+	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect
+	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
-	sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
+	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
 )
diff --git a/go.sum b/go.sum
index e98c785d..0130fbbf 100644
--- a/go.sum
+++ b/go.sum
@@ -1,14 +1,16 @@
-cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg=
-cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
+cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY=
+cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
 filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
 github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
 github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
 github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
 github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
 github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
@@ -22,43 +24,43 @@ github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYW
 github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
 github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
 github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
-github.com/aws/aws-sdk-go-v2 v1.38.1 h1:j7sc33amE74Rz0M/PoCpsZQ6OunLqys/m5antM0J+Z8=
-github.com/aws/aws-sdk-go-v2 v1.38.1/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 h1:sPiRHLVUIIQcoVZTNwqQcdtjkqkPopyYmIX0M5ElRf4=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2/go.mod h1:ik86P3sgV+Bk7c1tBFCwI3VxMoSEwl4YkRB9xn1s340=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 h1:ZdzDAg075H6stMZtbD2o+PyB933M/f20e9WmCBC17wA=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2/go.mod h1:eE1IIzXG9sdZCB0pNNpMpsYTLl4YdOQD3njiVN1e/E4=
-github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0 h1:twGX//bv1QH/9pyJaqynNSo0eXGkDEdDTFy8GNPsz5M=
-github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0/go.mod h1:HDxGArx3/bUnkoFsuvTNIxEj/cR3f+IgsVh1B7Pvay8=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2 h1:oxmDEO14NBZJbK/M8y3brhMFEIGN4j8a6Aq8eY0sqlo=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2/go.mod h1:4hH+8QCrk1uRWDPsVfsNDUup3taAjO8Dnx63au7smAU=
-github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw=
-github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
-github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5 h1:m/qynRSKYe4RKSroVqRRgMlp/cUXO54SY2upSUqfcqw=
-github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5/go.mod h1:3Lf3VaiJyr3IP0gH53sZp16Tu5CmoaDSUv4KQwFQO/I=
+github.com/aws/aws-sdk-go-v2 v1.39.0 h1:xm5WV/2L4emMRmMjHFykqiA4M/ra0DJVSWUkDyBjbg4=
+github.com/aws/aws-sdk-go-v2 v1.39.0/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 h1:uF68eJA6+S9iVr9WgX1NaRGyQ/6MdIyc4JNUo6TN1FA=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6/go.mod h1:qlPeVZCGPiobx8wb1ft0GHT5l+dc6ldnwInDFaMvC7Y=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 h1:pa1DEC6JoI0zduhZePp3zmhWvk/xxm4NB8Hy/Tlsgos=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6/go.mod h1:gxEjPebnhWGJoaDdtDkA0JX46VRg1wcTHYe63OfX5pE=
+github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0 h1:hGHSNZDTFnhLGUpRkQORM8uBY9R/FOkxCkuUUJBEOQ4=
+github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0/go.mod h1:SmMqzfS4HVsOD58lwLZ79oxF58f8zVe5YdK3o+/o1Ck=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 h1:oegbebPEMA/1Jny7kvwejowCaHz1FWZAQ94WXFNCyTM=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1/go.mod h1:kemo5Myr9ac0U9JfSjMo9yHLtw+pECEHsFtJ9tqCEI8=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 h1:LHS1YAIJXJ4K9zS+1d/xa9JAA9sL2QyXIQCQFQW/X08=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6/go.mod h1:c9PCiTEuh0wQID5/KqA32J+HAgZxN9tOGXKCiYJjTZI=
+github.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE=
+github.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
+github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5 h1:MM4Y7+YqhWLZiRuZfWrAXD2rZ0maVePbzARP3adeJ+g=
+github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5/go.mod h1:OCT5DIzVB2740qVgfRz0zQe/dDdvnsnFarzy6VdYNoA=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
 github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
-github.com/bytedance/sonic v1.13.2 h1:8/H1FempDZqC4VqjptGo14QQlJx8VdZJegxs6wwfqpQ=
-github.com/bytedance/sonic v1.13.2/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4=
-github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
-github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY=
-github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
-github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
-github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
+github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
+github.com/bytedance/sonic v1.14.1 h1:FBMC0zVz5XUmE4z9wF4Jey0An5FueFvOsTKKKtwIl7w=
+github.com/bytedance/sonic v1.14.1/go.mod h1:gi6uhQLMbTdeP0muCnrjHLeCUPyb70ujhnNlhOylAFc=
+github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
+github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
+github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
+github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4=
-github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
-github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
+github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
 github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4=
 github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec=
-github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
-github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
+github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
@@ -70,8 +72,8 @@ github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5Qvfr
 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU=
-github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=
+github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
 github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -83,16 +85,16 @@ github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P
 github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
 github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
 github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
-github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
-github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
-github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
-github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
-github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
-github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
+github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0=
+github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
 github.com/gin-contrib/gzip v1.2.3 h1:dAhT722RuEG330ce2agAs75z7yB+NKvX/ZM1r8w0u2U=
 github.com/gin-contrib/gzip v1.2.3/go.mod h1:ad72i4Bzmaypk8M762gNXa2wkxxjbz0icRNnuLJ9a/c=
-github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E=
-github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9Sp6Slhe0=
+github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
+github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
 github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ=
 github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
@@ -103,66 +105,87 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
-github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
-github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
-github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
-github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
-github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
+github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
+github.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM=
+github.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU=
+github.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA=
+github.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8=
+github.com/go-openapi/swag v0.24.1 h1:DPdYTZKo6AQCRqzwr/kGkxJzHhpKxZ9i/oX0zag+MF8=
+github.com/go-openapi/swag v0.24.1/go.mod h1:sm8I3lCPlspsBBwUm1t5oZeWZS0s7m/A+Psg0ooRU0A=
+github.com/go-openapi/swag/cmdutils v0.24.0 h1:KlRCffHwXFI6E5MV9n8o8zBRElpY4uK4yWyAMWETo9I=
+github.com/go-openapi/swag/cmdutils v0.24.0/go.mod h1:uxib2FAeQMByyHomTlsP8h1TtPd54Msu2ZDU/H5Vuf8=
+github.com/go-openapi/swag/conv v0.24.0 h1:ejB9+7yogkWly6pnruRX45D1/6J+ZxRu92YFivx54ik=
+github.com/go-openapi/swag/conv v0.24.0/go.mod h1:jbn140mZd7EW2g8a8Y5bwm8/Wy1slLySQQ0ND6DPc2c=
+github.com/go-openapi/swag/fileutils v0.24.0 h1:U9pCpqp4RUytnD689Ek/N1d2N/a//XCeqoH508H5oak=
+github.com/go-openapi/swag/fileutils v0.24.0/go.mod h1:3SCrCSBHyP1/N+3oErQ1gP+OX1GV2QYFSnrTbzwli90=
+github.com/go-openapi/swag/jsonname v0.24.0 h1:2wKS9bgRV/xB8c62Qg16w4AUiIrqqiniJFtZGi3dg5k=
+github.com/go-openapi/swag/jsonname v0.24.0/go.mod h1:GXqrPzGJe611P7LG4QB9JKPtUZ7flE4DOVechNaDd7Q=
+github.com/go-openapi/swag/jsonutils v0.24.0 h1:F1vE1q4pg1xtO3HTyJYRmEuJ4jmIp2iZ30bzW5XgZts=
+github.com/go-openapi/swag/jsonutils v0.24.0/go.mod h1:vBowZtF5Z4DDApIoxcIVfR8v0l9oq5PpYRUuteVu6f0=
+github.com/go-openapi/swag/loading v0.24.0 h1:ln/fWTwJp2Zkj5DdaX4JPiddFC5CHQpvaBKycOlceYc=
+github.com/go-openapi/swag/loading v0.24.0/go.mod h1:gShCN4woKZYIxPxbfbyHgjXAhO61m88tmjy0lp/LkJk=
+github.com/go-openapi/swag/mangling v0.24.0 h1:PGOQpViCOUroIeak/Uj/sjGAq9LADS3mOyjznmHy2pk=
+github.com/go-openapi/swag/mangling v0.24.0/go.mod h1:Jm5Go9LHkycsz0wfoaBDkdc4CkpuSnIEf62brzyCbhc=
+github.com/go-openapi/swag/netutils v0.24.0 h1:Bz02HRjYv8046Ycg/w80q3g9QCWeIqTvlyOjQPDjD8w=
+github.com/go-openapi/swag/netutils v0.24.0/go.mod h1:WRgiHcYTnx+IqfMCtu0hy9oOaPR0HnPbmArSRN1SkZM=
+github.com/go-openapi/swag/stringutils v0.24.0 h1:i4Z/Jawf9EvXOLUbT97O0HbPUja18VdBxeadyAqS1FM=
+github.com/go-openapi/swag/stringutils v0.24.0/go.mod h1:5nUXB4xA0kw2df5PRipZDslPJgJut+NjL7D25zPZ/4w=
+github.com/go-openapi/swag/typeutils v0.24.0 h1:d3szEGzGDf4L2y1gYOSSLeK6h46F+zibnEas2Jm/wIw=
+github.com/go-openapi/swag/typeutils v0.24.0/go.mod h1:q8C3Kmk/vh2VhpCLaoR2MVWOGP8y7Jc8l82qCTd1DYI=
+github.com/go-openapi/swag/yamlutils v0.24.0 h1:bhw4894A7Iw6ne+639hsBNRHg9iZg/ISrOVr+sJGp4c=
+github.com/go-openapi/swag/yamlutils v0.24.0/go.mod h1:DpKv5aYuaGm/sULePoeiG8uwMpZSfReo1HR3Ik0yaG8=
 github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
 github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
 github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
 github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
 github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
 github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.26.0 h1:SP05Nqhjcvz81uJaRfEV0YBSSSGMc/iMaVtFbr3Sw2k=
-github.com/go-playground/validator/v10 v10.26.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
-github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
-github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4=
+github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
+github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=
+github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
 github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
 github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
-github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A=
-github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
-github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
+github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
+github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
 github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
-github.com/google/cel-go v0.23.2 h1:UdEe3CvQh3Nv+E/j9r1Y//WO0K0cSyD7/y0bzyLIMI4=
-github.com/google/cel-go v0.23.2/go.mod h1:52Pb6QsDbC5kvgxvZhiL9QX1oZEkcUF/ZqaPx1J5Wwo=
-github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
-github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
+github.com/google/cel-go v0.26.1 h1:iPbVVEdkhTX++hpe3lzSk7D3G3QSYqLGoHOcEio+UXQ=
+github.com/google/cel-go v0.26.1/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM=
+github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
+github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
 github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/pprof v0.0.0-20250903194437-c28834ac2320 h1:c7ayAhbRP9HnEl/hg/WQOM9s0snWztfW6feWXZbGHw0=
+github.com/google/pprof v0.0.0-20250903194437-c28834ac2320/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
 github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
-github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
-github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
+github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
+github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
+github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0 h1:FbSCl+KggFl+Ocym490i/EyXF4lPgLoUtcSWquBM0Rs=
+github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0/go.mod h1:qOchhhIlmRcqk/O9uCo/puJlyo07YINaIqdZfZG3Jkc=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -182,24 +205,20 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y
 github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
-github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4=
-github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc=
+github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
+github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
-github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2 h1:uAwqOtyrFYggq3pVf3hs1XKkBxrQ8dkgjWz3LCLJsiY=
-github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2/go.mod h1:LBzS4n6GX1C69tzSd5EibZ9cGOXFuHP7GxEMDYVe1sM=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE=
-github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
-github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
@@ -221,28 +240,29 @@ github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4
 github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
-github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
-github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
+github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
-github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
-github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
-github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=
-github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=
+github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
+github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
+github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
+github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A=
 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU=
 github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
 github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
-github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
-github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
+github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
+github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -250,14 +270,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
 github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
-github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
-github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
 github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
-github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
-github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
-github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
-github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
+github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
+github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
 github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
 github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
 github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
@@ -271,13 +291,13 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
 github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
-github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
-github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
-github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
-github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
+github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
+github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
+github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stoewer/go-strcase v1.3.1 h1:iS0MdW+kVTxgMoE1LAZyMiYJFKlOzLooE4MxjirtkAs=
+github.com/stoewer/go-strcase v1.3.1/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -288,10 +308,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/stretchr/testify v1.11.0 h1:ib4sjIrwZKxE5u/Japgo/7SJV3PvgjGiRNAvTVGqQl8=
-github.com/stretchr/testify v1.11.0/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk=
 github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
@@ -300,8 +318,8 @@ github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaO
 github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
 github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg=
 github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
-github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
-github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA=
+github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk=
@@ -310,44 +328,42 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
-go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I=
-go.etcd.io/etcd/api/v3 v3.5.21 h1:A6O2/JDb3tvHhiIz3xf9nJ7REHvtEFJJ3veW3FbCnS8=
-go.etcd.io/etcd/api/v3 v3.5.21/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY=
-go.etcd.io/etcd/client/pkg/v3 v3.5.21 h1:lPBu71Y7osQmzlflM9OfeIV2JlmpBjqBNlLtcoBqUTc=
-go.etcd.io/etcd/client/pkg/v3 v3.5.21/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs=
-go.etcd.io/etcd/client/v2 v2.305.21 h1:eLiFfexc2mE+pTLz9WwnoEsX5JTTpLCYVivKkmVXIRA=
-go.etcd.io/etcd/client/v2 v2.305.21/go.mod h1:OKkn4hlYNf43hpjEM3Ke3aRdUkhSl8xjKjSf8eCq2J8=
-go.etcd.io/etcd/client/v3 v3.5.21 h1:T6b1Ow6fNjOLOtM0xSoKNQt1ASPCLWrF9XMHcH9pEyY=
-go.etcd.io/etcd/client/v3 v3.5.21/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU=
-go.etcd.io/etcd/pkg/v3 v3.5.21 h1:jUItxeKyrDuVuWhdh0HtjUANwyuzcb7/FAeUfABmQsk=
-go.etcd.io/etcd/pkg/v3 v3.5.21/go.mod h1:wpZx8Egv1g4y+N7JAsqi2zoUiBIUWznLjqJbylDjWgU=
-go.etcd.io/etcd/raft/v3 v3.5.21 h1:dOmE0mT55dIUsX77TKBLq+RgyumsQuYeiRQnW/ylugk=
-go.etcd.io/etcd/raft/v3 v3.5.21/go.mod h1:fmcuY5R2SNkklU4+fKVBQi2biVp5vafMrWUEj4TJ4Cs=
-go.etcd.io/etcd/server/v3 v3.5.21 h1:9w0/k12majtgarGmlMVuhwXRI2ob3/d1Ik3X5TKo0yU=
-go.etcd.io/etcd/server/v3 v3.5.21/go.mod h1:G1mOzdwuzKT1VRL7SqRchli/qcFrtLBTAQ4lV20sXXo=
+go.etcd.io/bbolt v1.4.2 h1:IrUHp260R8c+zYx/Tm8QZr04CX+qWS5PGfPdevhdm1I=
+go.etcd.io/bbolt v1.4.2/go.mod h1:Is8rSHO/b4f3XigBC0lL0+4FwAQv3HXEEIgFMuKHceM=
+go.etcd.io/etcd/api/v3 v3.6.4 h1:7F6N7toCKcV72QmoUKa23yYLiiljMrT4xCeBL9BmXdo=
+go.etcd.io/etcd/api/v3 v3.6.4/go.mod h1:eFhhvfR8Px1P6SEuLT600v+vrhdDTdcfMzmnxVXXSbk=
+go.etcd.io/etcd/client/pkg/v3 v3.6.4 h1:9HBYrjppeOfFjBjaMTRxT3R7xT0GLK8EJMVC4xg6ok0=
+go.etcd.io/etcd/client/pkg/v3 v3.6.4/go.mod h1:sbdzr2cl3HzVmxNw//PH7aLGVtY4QySjQFuaCgcRFAI=
+go.etcd.io/etcd/client/v3 v3.6.4 h1:YOMrCfMhRzY8NgtzUsHl8hC2EBSnuqbR3dh84Uryl7A=
+go.etcd.io/etcd/client/v3 v3.6.4/go.mod h1:jaNNHCyg2FdALyKWnd7hxZXZxZANb0+KGY+YQaEMISo=
+go.etcd.io/etcd/pkg/v3 v3.6.4 h1:fy8bmXIec1Q35/jRZ0KOes8vuFxbvdN0aAFqmEfJZWA=
+go.etcd.io/etcd/pkg/v3 v3.6.4/go.mod h1:kKcYWP8gHuBRcteyv6MXWSN0+bVMnfgqiHueIZnKMtE=
+go.etcd.io/etcd/server/v3 v3.6.4 h1:LsCA7CzjVt+8WGrdsnh6RhC0XqCsLkBly3ve5rTxMAU=
+go.etcd.io/etcd/server/v3 v3.6.4/go.mod h1:aYCL/h43yiONOv0QIR82kH/2xZ7m+IWYjzRmyQfnCAg=
+go.etcd.io/raft/v3 v3.6.0 h1:5NtvbDVYpnfZWcIHgGRk9DyzkBIXOi8j+DDp1IcnUWQ=
+go.etcd.io/raft/v3 v3.6.0/go.mod h1:nLvLevg6+xrVtHUmVaTcTz603gQPHfh7kUAwV6YpfGo=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc=
-go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q=
-go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
-go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA=
-go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
-go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
-go.opentelemetry.io/otel/sdk v1.33.0 h1:iax7M131HuAm9QkZotNHEfstof92xM+N8sr3uHXc2IM=
-go.opentelemetry.io/otel/sdk v1.33.0/go.mod h1:A1Q5oi7/9XaMlIWzPSxLRWOI8nG3FnzHJNbiENQuihM=
-go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc=
-go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8=
-go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
-go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
-go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg=
-go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
+go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
+go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
+go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
+go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
+go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
+go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
+go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
+go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
+go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
+go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
+go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8N5CE=
+go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0=
 go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
 go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
@@ -360,23 +376,23 @@ go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
 go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
 go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
 go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
-go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
-go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI=
-golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw=
-golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw=
+golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
-golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
+golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
+golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
-golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI=
-golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ=
+golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0=
+golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4=
 golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
 golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
@@ -388,32 +404,33 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
-golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
-golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
-golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
+golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo=
+golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
-golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
-golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
-golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
+golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
+golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
+golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
-golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
-golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
-golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
+golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
+golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
+golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
+golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -421,8 +438,8 @@ golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
-golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
+golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
+golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -431,24 +448,24 @@ gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0
 gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
 gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
 gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
 gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
-google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ=
-google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro=
-google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8 h1:st3LcW/BPi75W4q1jJTEor/QWwbNlPlDG0JTn6XhZu0=
-google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8/go.mod h1:klhJGKFyG8Tn50enBn7gizg4nXGXJ+jqEREdCWaPcV4=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d h1:xJJRGY7TJcvIlpSrN3K6LAWgNFUILlO+OMAqtg9aqnw=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d/go.mod h1:3ENsm/5D1mzDyhpzeRi1NR784I0BcofWBoSc5QqqMK4=
-google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A=
-google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4=
-google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
-google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1 h1:APHvLLYBhtZvsbnpkfknDZ7NyH4z5+ub/I0u8L3Oz6g=
+google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1/go.mod h1:xUjFWUnWDpZ/C0Gu0qloASKFb6f8/QXiiXhSPFsD668=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 h1:pmJpJEvT846VzausCQ5d7KreSROcDqmO388w5YbnltA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1/go.mod h1:GmFNa4BdJZ2a8G+wCe9Bg3wwThLrJun751XstdJt5Og=
+google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4=
+google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ=
+google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
+google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
-gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
+gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
@@ -465,61 +482,56 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg=
 gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo=
-gorm.io/gorm v1.30.1 h1:lSHg33jJTBxs2mgJRfRZeLDG+WZaHYCk3Wtfl6Ngzo4=
-gorm.io/gorm v1.30.1/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
-k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8=
-k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE=
-k8s.io/apiextensions-apiserver v0.33.2 h1:6gnkIbngnaUflR3XwE1mCefN3YS8yTD631JXQhsU6M8=
-k8s.io/apiextensions-apiserver v0.33.2/go.mod h1:IvVanieYsEHJImTKXGP6XCOjTwv2LUMos0YWc9O+QP8=
-k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA=
-k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
-k8s.io/apiserver v0.33.2 h1:KGTRbxn2wJagJowo29kKBp4TchpO1DRO3g+dB/KOJN4=
-k8s.io/apiserver v0.33.2/go.mod h1:9qday04wEAMLPWWo9AwqCZSiIn3OYSZacDyu/AcoM/M=
-k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA=
-k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg=
-k8s.io/cloud-provider v0.33.2 h1:tP/18SbhytAapqg2/tGD5PFUR6VLYra+QfJ7Qn3FN34=
-k8s.io/cloud-provider v0.33.2/go.mod h1:yS8ArLLLZV1+Tv6hkSYrZuYEVz+wQgiekUtaqe9Wxao=
-k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA=
-k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4=
-k8s.io/component-helpers v0.33.3 h1:fjWVORSQfI0WKzPeIFSju/gMD9sybwXBJ7oPbqQu6eM=
-k8s.io/component-helpers v0.33.3/go.mod h1:7iwv+Y9Guw6X4RrnNQOyQlXcvJrVjPveHVqUA5dm31c=
-k8s.io/controller-manager v0.33.2 h1:HIs8PbdTOaY6wTOvKKLwoAHSO6GeDjmYS0Gjnd6rF+c=
-k8s.io/controller-manager v0.33.2/go.mod h1:n8maAdN06E3cD0h5N0wuYBv9Qi9FePl7y6Iz3pfc9PY=
-k8s.io/csi-translation-lib v0.33.2 h1:QyWkVcf0rbNjc53uAqCyl9kmHCRn1O0Z4QT69y/jwHQ=
-k8s.io/csi-translation-lib v0.33.2/go.mod h1:nFPX6BA20EDdIQpitb6p2wVtvLBuXsmm6D1Cwi3rDnE=
-k8s.io/dynamic-resource-allocation v0.33.1 h1:xnEWV764LIsRQDTQ0tLFQMz1lY34Ep7D+/NNbrODfm4=
-k8s.io/dynamic-resource-allocation v0.33.1/go.mod h1:AgBLCrIi+//A4VKljjJ7YPpJ+LeyDyTvUk7v8+Qf3pI=
+gorm.io/gorm v1.31.0 h1:0VlycGreVhK7RF/Bwt51Fk8v0xLiiiFdbGDPIZQ7mJY=
+gorm.io/gorm v1.31.0/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=
+k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=
+k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=
+k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc=
+k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0=
+k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4=
+k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
+k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg=
+k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ=
+k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY=
+k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8=
+k8s.io/cloud-provider v0.34.0 h1:OgrNE+WSgfvDBQf6WS9qFM7Xr37bc0Og5kkL4hyWDmU=
+k8s.io/cloud-provider v0.34.0/go.mod h1:JbMa0t6JIGDMLI7Py6bdp9TN6cfuHrWGq+E/X+Ljkmo=
+k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8=
+k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg=
+k8s.io/component-helpers v0.34.1 h1:gWhH3CCdwAx5P3oJqZKb4Lg5FYZTWVbdWtOI8n9U4XY=
+k8s.io/component-helpers v0.34.1/go.mod h1:4VgnUH7UA/shuBur+OWoQC0xfb69sy/93ss0ybZqm3c=
+k8s.io/controller-manager v0.34.0 h1:oCHoqS8dcFp7zDSu7HUvTpakq3isSxil3GprGGlJMsE=
+k8s.io/controller-manager v0.34.0/go.mod h1:XFto21U+Mm9BT8r/Jd5E4tHCGtwjKAUFOuDcqaj2VK0=
+k8s.io/csi-translation-lib v0.34.0 h1:WhCkq35XATZ+x6NKqI4u7XSYtmucuCN7jDk+mmm9XUU=
+k8s.io/csi-translation-lib v0.34.0/go.mod h1:lZ+vpT3/6hx7GxXcI1mcoHxZSONvxgl2NwawzFnJP4Y=
+k8s.io/dynamic-resource-allocation v0.34.0 h1:RrFNZXb2s5cvvf+KKdO92ss/e+zjGFFaDKAIpzA+Pu8=
+k8s.io/dynamic-resource-allocation v0.34.0/go.mod h1:aqmoDIvXjQRhSgxQkFLl6+Ndg6MfdEOI+TQsj1j9V+g=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
-k8s.io/kms v0.33.2 h1:GFwNXX4CZGQCg9DPOaJi1/+iKidCtB9/OIAGdzRo8FI=
-k8s.io/kms v0.33.2/go.mod h1:C1I8mjFFBNzfUZXYt9FZVJ8MJl7ynFbGgZFbBzkBJ3E=
-k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a h1:ZV3Zr+/7s7aVbjNGICQt+ppKWsF1tehxggNfbM7XnG8=
-k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
-k8s.io/kube-scheduler v0.32.7 h1:QOvu/fNEYGg1gzzpowWHFCI8SD3vJs5Iz0qebEQADd4=
-k8s.io/kube-scheduler v0.32.7/go.mod h1:ez/2BnvZv2Bq1K9LpBsDgRsTvwJLAzkcpRMfY7rhLMA=
-k8s.io/kubelet v0.33.1 h1:x4LCw1/iZVWOKA4RoITnuB8gMHnw31HPB3S0EF0EexE=
-k8s.io/kubelet v0.33.1/go.mod h1:8WpdC9M95VmsqIdGSQrajXooTfT5otEj8pGWOm+KKfQ=
-k8s.io/kubernetes v1.33.3 h1:dBx5Z2ZhR8kNzAwCoCz4j1niUbUrNUDVxeSj4/Ienu0=
-k8s.io/kubernetes v1.33.3/go.mod h1:nrt8sldmckKz2fCZhgRX3SKfS2e+CzXATPv6ITNkU00=
-k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
-k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
-nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+k8s.io/kms v0.34.0 h1:u+/rcxQ3Jr7gC9AY5nXuEnBcGEB7ZOIJ9cdLdyHyEjQ=
+k8s.io/kms v0.34.0/go.mod h1:s1CFkLG7w9eaTYvctOxosx88fl4spqmixnNpys0JAtM=
+k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 h1:o4oKOsvSymDkZRsMAPZU7bRdwL+lPOK5VS10Dr1D6eg=
+k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=
+k8s.io/kube-scheduler v0.34.0 h1:iUT5spyg0RlZ9W5dImrxSxv0yTqbsI+/J72/Iuv9ed8=
+k8s.io/kube-scheduler v0.34.0/go.mod h1:7pt2HDb32lZOihbt/aamuMBvSe1o+rrd2rQC8aJyfP0=
+k8s.io/kubelet v0.34.0 h1:1nZt1Q6Kfx7xCaTS9vnqR9sjZDxf3cRSQkAFCczULmc=
+k8s.io/kubelet v0.34.0/go.mod h1:NqbF8ViVettlZbf9hw9DJhubaWn7rGvDDTcLMDm6tQ0=
+k8s.io/kubernetes v1.34.0 h1:NvUrwPAVB4W3mSOpJ/RtNGHWWYyUP/xPaX5rUSpzA0w=
+k8s.io/kubernetes v1.34.0/go.mod h1:iu+FhII+Oc/1gGWLJcer6wpyih441aNFHl7Pvm8yPto=
+k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0=
+k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
-sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM=
-sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
-sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
-sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
-sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
-sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
-sigs.k8s.io/karpenter v1.6.1 h1:ZAC802Prk/GyKoGUu0LuzEn9fFmJLfUtMfo64derQgw=
-sigs.k8s.io/karpenter v1.6.1/go.mod h1:AxCaeRjv1Pgw/Ff7vT4aqyXcg8v1UdBcfzWMCaKSVjA=
-sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 h1:qPrZsv1cwQiFeieFlRqT627fVZ+tyfou/+S5S0H5ua0=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
+sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg=
+sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/karpenter v1.6.2 h1:WFayZ49CSOaDMku1iYBTsD3A9hOB2yU/U95VcSAJ8KM=
+sigs.k8s.io/karpenter v1.6.2/go.mod h1:AxCaeRjv1Pgw/Ff7vT4aqyXcg8v1UdBcfzWMCaKSVjA=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
 sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
-sigs.k8s.io/scheduler-plugins v0.32.7 h1:fGr4JKraaTe6it4PIqUlXStfctFKYxJgYkDsiU6699o=
-sigs.k8s.io/scheduler-plugins v0.32.7/go.mod h1:Oem5rktj6wgFr2SUqcaInUTIBX8tlY8c4qid5vp2lBw=
-sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
-sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
-sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
 sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
 sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
diff --git a/internal/cloudprovider/common/utils.go b/internal/cloudprovider/common/utils.go
index fbe882ab..788ba96d 100644
--- a/internal/cloudprovider/common/utils.go
+++ b/internal/cloudprovider/common/utils.go
@@ -131,6 +131,16 @@ func CalculateLeastCostGPUNodes(ctx context.Context, provider types.GPUNodeProvi
 
 	nodes := make([]tfv1.GPUNodeClaimSpec, 0, bestNumInstances)
 	for i := int64(0); i < bestNumInstances; i++ {
+
+		tflopsQuantity, err := resource.ParseQuantity(fmt.Sprintf("%f", bestInstance.FP16TFlopsPerGPU*float64(bestInstance.GPUCount)))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse GPUDeviceOffered: %v", err)
+		}
+
+		vramQuantity, err := resource.ParseQuantity(fmt.Sprintf("%dGi", bestInstance.VRAMGigabytesPerGPU*bestInstance.GPUCount))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse VRAMOffered: %v", err)
+		}
 		nodes = append(nodes, tfv1.GPUNodeClaimSpec{
 			NodeName:     fmt.Sprintf("%s-%s", pool.Name, generateRandomString(8)),
 			InstanceType: bestInstance.InstanceType,
@@ -139,8 +149,8 @@ func CalculateLeastCostGPUNodes(ctx context.Context, provider types.GPUNodeProvi
 			Zone:         zone,
 			CapacityType: preferredCapacityType,
 
-			TFlopsOffered:    resource.MustParse(fmt.Sprintf("%f", bestInstance.FP16TFlopsPerGPU*float64(bestInstance.GPUCount))),
-			VRAMOffered:      resource.MustParse(fmt.Sprintf("%dGi", bestInstance.VRAMGigabytesPerGPU*bestInstance.GPUCount)),
+			TFlopsOffered:    tflopsQuantity,
+			VRAMOffered:      vramQuantity,
 			GPUDeviceOffered: bestInstance.GPUCount,
 
 			ExtraParams: cluster.Spec.ComputingVendor.Params.ExtraParams,
diff --git a/internal/cloudprovider/karpenter/nodeclaim.go b/internal/cloudprovider/karpenter/nodeclaim.go
index 2877e80d..15c8dcc0 100644
--- a/internal/cloudprovider/karpenter/nodeclaim.go
+++ b/internal/cloudprovider/karpenter/nodeclaim.go
@@ -318,7 +318,11 @@ func (p KarpenterGPUNodeProvider) buildNodeClaim(ctx context.Context, param *tfv
 
 	// Add GPU resources if specified (Karpenter supports nvidia.com/gpu)
 	if param.GPUDeviceOffered > 0 {
-		resourceRequests[karpenterConfig.GPUResourceName] = resource.MustParse(fmt.Sprintf("%d", param.GPUDeviceOffered))
+		quantity, err := resource.ParseQuantity(fmt.Sprintf("%d", param.GPUDeviceOffered))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse GPUDeviceOffered: %v", err)
+		}
+		resourceRequests[karpenterConfig.GPUResourceName] = quantity
 	}
 
 	// query nodeClass and build NodeClassRef
diff --git a/internal/cloudprovider/pricing/pricing.go b/internal/cloudprovider/pricing/pricing.go
index 33ee529f..45dd09bb 100644
--- a/internal/cloudprovider/pricing/pricing.go
+++ b/internal/cloudprovider/pricing/pricing.go
@@ -31,6 +31,7 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/types"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"k8s.io/apimachinery/pkg/api/resource"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
 
@@ -39,11 +40,17 @@ const (
 	providerAzure = "azure"
 )
 
+// CompleteGPUInfo combines GpuInfo with VRAM information from instance data
+type CompleteGPUInfo struct {
+	*config.GpuInfo
+	VRAMGigabytes int32
+}
+
 // Global data initialized at package load time
 var (
 	globalAWSGPUInstanceData   map[string]GPUNodeInstanceInfoAndPrice
 	globalAzureGPUInstanceData map[string]GPUNodeInstanceInfoAndPrice
-	tflopsMap                  map[string]*config.GpuInfo
+	tflopsMap                  map[string]*CompleteGPUInfo
 )
 
 var readyCh = make(chan struct{})
@@ -51,8 +58,9 @@ var initOnce sync.Once
 
 // PricingProvider provides pricing information and calculations for instance types
 type PricingProvider interface {
-	GetPricing(instanceType, capacityType tfv1.CapacityTypeEnum) (float64, bool)
-	GetGPUNodeInstanceTypeInfo(region string) ([]string, bool)
+	GetPricing(instanceType string, capacityType tfv1.CapacityTypeEnum, region string) (float64, bool)
+	GetRegionalGPUNodeInstanceTypes(region string) ([]types.GPUNodeInstanceInfo, bool)
+	GetGPUCapacityByModel(gpuModel string) (resource.Quantity, resource.Quantity, bool)
 }
 
 type GPUNodeInstanceInfoAndPrice struct {
@@ -77,7 +85,7 @@ var awsCSV string
 var azureCSV string
 
 func init() {
-	tflopsMap = make(map[string]*config.GpuInfo, 100)
+	tflopsMap = make(map[string]*CompleteGPUInfo, 100)
 }
 
 func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.GpuInfo) {
@@ -86,8 +94,14 @@ func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.G
 		return
 	}
 	for _, gpuInfo := range *gpuInfos {
-		tflopsMap[gpuInfo.FullModelName] = &gpuInfo
-		tflopsMap[gpuInfo.Model] = &gpuInfo
+		if tflopsMap[gpuInfo.FullModelName] != nil {
+			continue
+		}
+		completeInfo := &CompleteGPUInfo{
+			GpuInfo: &gpuInfo,
+		}
+		tflopsMap[gpuInfo.FullModelName] = completeInfo
+		tflopsMap[gpuInfo.Model] = completeInfo
 	}
 
 	initOnce.Do(func() {
@@ -151,6 +165,11 @@ func loadCSVInstanceDataFromPath(ctx context.Context, data []byte, provider stri
 		}
 		instanceInfo.FP16TFlopsPerGPU = gpuInfo.Fp16TFlops.AsApproximateFloat64()
 
+		// Fill VRAM information if not already set
+		if gpuInfo.VRAMGigabytes == 0 {
+			gpuInfo.VRAMGigabytes = instanceInfo.VRAMGigabytesPerGPU
+		}
+
 		instanceInfoAndPrice := GPUNodeInstanceInfoAndPrice{
 			GPUNodeInstanceInfo: instanceInfo,
 			onDemandPrice:       prices[0],
@@ -416,3 +435,19 @@ func (p *StaticPricingProvider) GetRegionalGPUNodeInstanceTypes(region string) (
 
 	return instanceTypes, len(instanceTypes) > 0
 }
+
+// GetGPUCapacityByModel gets the full capacity (TFlops and VRAM) for a GPU model
+// Returns TFlops, VRAM, and whether found
+func (p *StaticPricingProvider) GetGPUCapacityByModel(gpuModel string) (resource.Quantity, resource.Quantity, bool) {
+	<-readyCh
+
+	gpuInfo, exists := tflopsMap[gpuModel]
+	if !exists {
+		return resource.Quantity{}, resource.Quantity{}, false
+	}
+
+	tflops := gpuInfo.Fp16TFlops
+	vram := *resource.NewQuantity(int64(gpuInfo.VRAMGigabytes)*constants.GiBToBytes, resource.BinarySI)
+
+	return tflops, vram, true
+}
diff --git a/internal/config/rules.go b/internal/config/rules.go
index dd3713bd..8bbfb556 100644
--- a/internal/config/rules.go
+++ b/internal/config/rules.go
@@ -132,7 +132,7 @@ func (r *AlertRule) toPostableAlert(alertQueryResult map[string]interface{}, sta
 	labels := LabelSet{
 		"alertname": r.Name,
 		"severity":  r.Severity,
-		"job":       constants.AlertJobName,
+		"job":       constants.TensorFusionSystemName,
 		"instance":  instance,
 	}
 	annotations := LabelSet{
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index b1aa6b64..77648769 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -30,6 +30,7 @@ const (
 	LabelKeyClusterOwner    = Domain + "/cluster"
 	LabelKeyNodeClass       = Domain + "/node-class"
 	LabelKeyPodTemplateHash = Domain + "/pod-template-hash"
+	LabelNodeSelectorHash   = Domain + "/node-selector-hash"
 	LabelComponent          = Domain + "/component"
 	// used by TF connection, for matching the related connections when worker Pod state changed
 	LabelWorkerName = Domain + "/worker-name"
@@ -69,13 +70,12 @@ const (
 	GPUModelAnnotation = Domain + "/gpu-model"
 	// GPU ID list is assigned by scheduler, should not specified by user
 	GPUDeviceIDsAnnotation            = Domain + "/gpu-ids"
+	DedicatedGPUAnnotation            = Domain + "/dedicated-gpu"
 	SetPendingOwnedWorkloadAnnotation = Domain + "/pending-owned-workload"
 	PricingAnnotation                 = Domain + "/hourly-pricing"
 	// In remote vGPU mode, selected workload is set by user with /workload annotation or generated by system
 	SelectedWorkloadAnnotation = Domain + "/selected-workload"
 
-	CELFilterExpressionAnnotation = Domain + "/cel-filter-expression"
-
 	WorkloadModeAnnotation = Domain + "/workload-mode"
 	WorkloadModeDynamic    = "dynamic"
 	WorkloadModeFixed      = "fixed"
@@ -108,6 +108,8 @@ const (
 	// For grey release
 	TensorFusionEnabledReplicasAnnotation = Domain + "/enabled-replicas"
 	TensorFusionDefaultPoolKeyAnnotation  = Domain + "/is-default-pool"
+	// Eviction protection annotation for controlling pod eviction timing
+	EvictionProtectionAnnotation = Domain + "/eviction-protection"
 
 	NamespaceDefaultVal = "tensor-fusion-sys"
 
@@ -121,6 +123,27 @@ const (
 	QoSLevelMedium   = "medium"
 	QoSLevelHigh     = "high"
 	QoSLevelCritical = "critical"
+
+	// DRA support
+	// annotation for pod to indicate if DRA is enabled
+	DRAEnabledAnnotation       = Domain + "/dra-enabled"
+	DRACelExpressionAnnotation = Domain + "/dra-cel-expression"
+
+	DRADriverName        = Domain + ".dra-driver"
+	DRAResourceClaimName = "tensor-fusion-resource-claim-%s-%s"
+	// resource claim name for request
+	DRAResourceClaimRequestName = "tensor-fusion-resource-claim-request-%s"
+
+	DRAClaimDefineName = "tensor-fusion-gpu-claim"
+
+	TensorFusionResourceClaimTemplateLabel = Domain + "/resource-claim-template"
+
+	// ResourceClaimTemplate related constants
+	DRAResourceClaimTemplateName = "tensor-fusion-gpu-template"
+
+	// ResourceSlice related constants
+	DRAResourceSliceName = "tensor-fusion-resource-slice-%s"
+	DRAResourceSlicePool = "tensor-fusion-resource-slice-pool-%s"
 )
 
 // for avoid golang lint issues
@@ -177,7 +200,7 @@ const TFDataPath = "/run/tensor-fusion"
 const TFDataPathWorkerExpr = "shm/$(POD_NAMESPACE)/$(POD_NAME)"
 const DataVolumeName = "tf-data"
 const TensorFusionPoolManualCompaction = Domain + "/manual-compaction"
-const AlertJobName = "tensor-fusion"
+const TensorFusionSystemName = "tensor-fusion"
 
 const (
 	LeaderInfoConfigMapName        = "tensor-fusion-operator-leader-info"
@@ -203,3 +226,5 @@ const ExtraVerificationInfoPodIDKey = "authentication.kubernetes.io/pod-uid"
 const SchedulerSimulationKey = "simulate-schedule"
 
 const MobileGpuClockSpeedMultiplier = 0.75
+const DefaultEvictionProtectionPriceRatio = 1.2
+const NodeCriticalPriorityClassName = "system-node-critical"
diff --git a/internal/constants/env.go b/internal/constants/env.go
index 1e26a392..06212f20 100644
--- a/internal/constants/env.go
+++ b/internal/constants/env.go
@@ -73,9 +73,10 @@ const (
 	LdPreloadFileName = "ld.so.preload"
 	LdPreloadFile     = "/etc/ld.so.preload"
 
-	TFLibsVolumeName       = "tf-libs"
-	TFLibsVolumeMountPath  = "/tensor-fusion"
-	TFConnectionNamePrefix = "-tf-vgpu-"
+	TFLibsVolumeName         = "tf-libs"
+	TFLibsVolumeMountPath    = "/tensor-fusion"
+	TFConnectionNamePrefix   = "-tf-vgpu-"
+	TFConnectionNameNoPrefix = "tf-vgpu-"
 
 	HostIPFieldRef       = "status.hostIP"
 	NodeNameFieldRef     = "spec.nodeName"
@@ -98,8 +99,7 @@ const (
 	LdPreloadEnv     = "LD_PRELOAD"
 	LdPreloadLimiter = "/home/app/libcuda_limiter.so"
 
-	SharedMemDeviceName   = "/dev/shm"
-	SharedMemMountSubPath = "shm"
+	SharedMemMountSubPath = "/shm"
 
 	// disable GPU limiter, for emergency use
 	DisableGpuLimiterEnv = "DISABLE_GPU_LIMITER"
diff --git a/internal/controller/dra/resourceclaim_controller.go b/internal/controller/dra/resourceclaim_controller.go
new file mode 100644
index 00000000..6d18b234
--- /dev/null
+++ b/internal/controller/dra/resourceclaim_controller.go
@@ -0,0 +1,214 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	corev1 "k8s.io/api/core/v1"
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+// ResourceClaimReconciler reconciles ResourceClaim objects
+type ResourceClaimReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaims,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *ResourceClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	// Fetch the ResourceClaim instance
+	resourceClaim := &resourcev1beta2.ResourceClaim{}
+	if err := r.Get(ctx, req.NamespacedName, resourceClaim); err != nil {
+		if errors.IsNotFound(err) {
+			// Request object not found, could have been deleted after reconcile request.
+			// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
+			// Return and don't requeue
+			log.Info("ResourceClaim resource not found. Ignoring since object must be deleted")
+			return ctrl.Result{}, nil
+		}
+		// Error reading the object - requeue the request.
+		log.Error(err, "Failed to get ResourceClaim")
+		return ctrl.Result{}, err
+	}
+
+	// Check if this ResourceClaim is created from our ResourceClaimTemplate
+	if resourceClaim.Labels == nil {
+		// No labels, not our ResourceClaim
+		return ctrl.Result{}, nil
+	}
+
+	labelValue, exists := resourceClaim.Labels[constants.TensorFusionResourceClaimTemplateLabel]
+	if !exists || labelValue != constants.TrueStringValue {
+		// Not our ResourceClaim, ignore
+		return ctrl.Result{}, nil
+	}
+
+	log.Info("Processing TensorFusion ResourceClaim", "name", resourceClaim.Name, "namespace", resourceClaim.Namespace)
+
+	// Find the owner Pod to get the CEL expression annotation
+	ownerPod, err := r.findOwnerPod(ctx, resourceClaim)
+	if err != nil {
+		log.Error(err, "Failed to find owner Pod")
+		return ctrl.Result{}, err
+	}
+
+	if ownerPod == nil {
+		log.Info("Owner Pod not found, ResourceClaim may not have OwnerReference yet")
+		return ctrl.Result{RequeueAfter: constants.PendingRequeueDuration}, nil
+	}
+
+	// Update ResourceClaim with CEL expression
+	if err := r.updateResourceClaimCEL(resourceClaim, ownerPod); err != nil {
+		log.Error(err, "Failed to update ResourceClaim CEL expression")
+		return ctrl.Result{}, err
+	}
+	// Update ResourceClaim with capacity request
+	if err := r.updateCapacityRequest(resourceClaim, ownerPod); err != nil {
+		log.Error(err, "Failed to update ResourceClaim capacity request")
+		return ctrl.Result{}, err
+	}
+
+	if err := r.Update(ctx, resourceClaim); err != nil {
+		log.Error(err, "Failed to update ResourceClaim")
+		return ctrl.Result{}, err
+	}
+
+	log.Info("Successfully updated ResourceClaim")
+	return ctrl.Result{}, nil
+}
+
+// findOwnerPod finds the Pod that owns this ResourceClaim
+func (r *ResourceClaimReconciler) findOwnerPod(ctx context.Context, resourceClaim *resourcev1beta2.ResourceClaim) (*corev1.Pod, error) {
+	// Find the Pod OwnerReference (there should be exactly one)
+	var podOwnerRef *metav1.OwnerReference
+	for i, ownerRef := range resourceClaim.OwnerReferences {
+		if ownerRef.Kind == "Pod" && ownerRef.APIVersion == "v1" {
+			podOwnerRef = &resourceClaim.OwnerReferences[i]
+			break
+		}
+	}
+
+	if podOwnerRef == nil {
+		return nil, nil // No Pod owner found
+	}
+
+	// Get the Pod by name and namespace (UID is automatically verified by Kubernetes)
+	pod := &corev1.Pod{}
+	err := r.Get(ctx, types.NamespacedName{
+		Name:      podOwnerRef.Name,
+		Namespace: resourceClaim.Namespace,
+	}, pod)
+	if err != nil {
+		if errors.IsNotFound(err) {
+			return nil, nil // Pod was deleted
+		}
+		return nil, fmt.Errorf("failed to get owner Pod %s/%s: %w", resourceClaim.Namespace, podOwnerRef.Name, err)
+	}
+
+	// Verify the UID matches (additional safety check)
+	if pod.UID != podOwnerRef.UID {
+		return nil, fmt.Errorf("Pod UID mismatch: expected %s, got %s", podOwnerRef.UID, pod.UID)
+	}
+
+	return pod, nil
+}
+
+// updateResourceClaimCEL updates the ResourceClaim's CEL selector expression
+func (r *ResourceClaimReconciler) updateResourceClaimCEL(resourceClaim *resourcev1beta2.ResourceClaim, pod *corev1.Pod) error {
+	// Check if we need to update
+	if len(resourceClaim.Spec.Devices.Requests) == 0 {
+		return fmt.Errorf("no device requests found in ResourceClaim")
+	}
+
+	deviceReq := &resourceClaim.Spec.Devices.Requests[0]
+	if deviceReq.Exactly == nil {
+		return fmt.Errorf("no ExactDeviceRequest found")
+	}
+
+	// Get CEL expression from Pod annotation
+	celExpression := pod.Annotations[constants.DRACelExpressionAnnotation]
+
+	if celExpression == "" {
+		return nil
+	}
+
+	// Check if CEL expression is already set correctly
+	if len(deviceReq.Exactly.Selectors) > 0 &&
+		deviceReq.Exactly.Selectors[0].CEL != nil &&
+		deviceReq.Exactly.Selectors[0].CEL.Expression == celExpression {
+		// Already updated
+		return nil
+	}
+
+	// Update the CEL expression
+	if len(deviceReq.Exactly.Selectors) == 0 {
+		deviceReq.Exactly.Selectors = []resourcev1beta2.DeviceSelector{{}}
+	}
+
+	if deviceReq.Exactly.Selectors[0].CEL == nil {
+		deviceReq.Exactly.Selectors[0].CEL = &resourcev1beta2.CELDeviceSelector{}
+	}
+
+	deviceReq.Exactly.Selectors[0].CEL.Expression = celExpression
+
+	return nil
+}
+
+func (r *ResourceClaimReconciler) updateCapacityRequest(resourceClaim *resourcev1beta2.ResourceClaim, pod *corev1.Pod) error {
+	if len(resourceClaim.Spec.Devices.Requests) == 0 {
+		return fmt.Errorf("no device requests found in ResourceClaim")
+	}
+
+	deviceReq := &resourceClaim.Spec.Devices.Requests[0]
+	if deviceReq.Exactly == nil {
+		return fmt.Errorf("no ExactDeviceRequest found")
+	}
+	gpuRequestResource, err := utils.GetGPUResource(pod, true)
+	if err != nil {
+		return fmt.Errorf("failed to get GPU resource: %w", err)
+	}
+	//TODO extract to constants
+	deviceReq.Exactly.Capacity.Requests["tflops"] = gpuRequestResource.Tflops
+	deviceReq.Exactly.Capacity.Requests["vram"] = gpuRequestResource.Vram
+
+	return nil
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *ResourceClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&resourcev1beta2.ResourceClaim{}).
+		Complete(r)
+}
diff --git a/internal/controller/dra/resourceclaim_controller_test.go b/internal/controller/dra/resourceclaim_controller_test.go
new file mode 100644
index 00000000..aeebbda7
--- /dev/null
+++ b/internal/controller/dra/resourceclaim_controller_test.go
@@ -0,0 +1,564 @@
+package dra
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+func TestResourceClaimReconciler_Reconcile(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+	require.NoError(t, corev1.AddToScheme(scheme))
+
+	tests := []struct {
+		name           string
+		resourceClaim  *resourcev1beta2.ResourceClaim
+		pod            *corev1.Pod
+		expectedResult ctrl.Result
+		expectError    bool
+		expectUpdate   bool
+	}{
+		{
+			name:           "ResourceClaim not found",
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "ResourceClaim without TensorFusion label",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "ResourceClaim with wrong label value",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: "false",
+					},
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "ResourceClaim without owner Pod",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue,
+					},
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			expectedResult: ctrl.Result{RequeueAfter: constants.PendingRequeueDuration},
+			expectError:    false,
+		},
+		{
+			name: "Owner Pod without CEL annotation",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue,
+					},
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "Successful CEL expression update",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue,
+					},
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+					Annotations: map[string]string{
+						constants.DRACelExpressionAnnotation: `device.attributes["tflops"].quantity >= quantity("10")`,
+					},
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+			expectUpdate:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var objects []runtime.Object
+			if tt.resourceClaim != nil {
+				objects = append(objects, tt.resourceClaim)
+			}
+			if tt.pod != nil {
+				objects = append(objects, tt.pod)
+			}
+
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithRuntimeObjects(objects...).
+				Build()
+
+			reconciler := &ResourceClaimReconciler{
+				Client: fakeClient,
+				Scheme: scheme,
+			}
+
+			req := ctrl.Request{
+				NamespacedName: types.NamespacedName{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+			}
+
+			result, err := reconciler.Reconcile(context.Background(), req)
+
+			if tt.expectError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			assert.Equal(t, tt.expectedResult, result)
+
+			// Check if ResourceClaim was updated with CEL expression
+			if tt.expectUpdate && tt.resourceClaim != nil {
+				updatedClaim := &resourcev1beta2.ResourceClaim{}
+				err := fakeClient.Get(context.Background(), types.NamespacedName{
+					Name:      tt.resourceClaim.Name,
+					Namespace: tt.resourceClaim.Namespace,
+				}, updatedClaim)
+				require.NoError(t, err)
+
+				require.Len(t, updatedClaim.Spec.Devices.Requests, 1)
+				deviceReq := updatedClaim.Spec.Devices.Requests[0]
+				require.NotNil(t, deviceReq.Exactly)
+				require.Len(t, deviceReq.Exactly.Selectors, 1)
+				require.NotNil(t, deviceReq.Exactly.Selectors[0].CEL)
+				assert.Equal(t, `device.attributes["tflops"].quantity >= quantity("10")`, deviceReq.Exactly.Selectors[0].CEL.Expression)
+			}
+		})
+	}
+}
+
+func TestResourceClaimReconciler_findOwnerPod(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, corev1.AddToScheme(scheme))
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+
+	tests := []struct {
+		name          string
+		resourceClaim *resourcev1beta2.ResourceClaim
+		pod           *corev1.Pod
+		expectedPod   *corev1.Pod
+		expectError   bool
+	}{
+		{
+			name: "No owner references",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+			},
+			expectedPod: nil,
+			expectError: false,
+		},
+		{
+			name: "No Pod owner reference",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "apps/v1",
+							Kind:       "Deployment",
+							Name:       "test-deployment",
+							UID:        "deployment-uid-123",
+						},
+					},
+				},
+			},
+			expectedPod: nil,
+			expectError: false,
+		},
+		{
+			name: "Pod owner not found",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "nonexistent-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+			},
+			expectedPod: nil,
+			expectError: false,
+		},
+		{
+			name: "Pod UID mismatch",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "different-uid",
+				},
+			},
+			expectedPod: nil,
+			expectError: true,
+		},
+		{
+			name: "Successful Pod lookup",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+				},
+			},
+			expectedPod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+				},
+			},
+			expectError: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var objects []runtime.Object
+			if tt.pod != nil {
+				objects = append(objects, tt.pod)
+			}
+
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithRuntimeObjects(objects...).
+				Build()
+
+			reconciler := &ResourceClaimReconciler{
+				Client: fakeClient,
+				Scheme: scheme,
+			}
+
+			pod, err := reconciler.findOwnerPod(context.Background(), tt.resourceClaim)
+
+			if tt.expectError {
+				require.Error(t, err)
+				assert.Nil(t, pod)
+			} else {
+				require.NoError(t, err)
+				if tt.expectedPod == nil {
+					assert.Nil(t, pod)
+				} else {
+					require.NotNil(t, pod)
+					assert.Equal(t, tt.expectedPod.Name, pod.Name)
+					assert.Equal(t, tt.expectedPod.Namespace, pod.Namespace)
+					assert.Equal(t, tt.expectedPod.UID, pod.UID)
+				}
+			}
+		})
+	}
+}
+
+func TestResourceClaimReconciler_updateResourceClaimCEL(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+
+	tests := []struct {
+		name          string
+		resourceClaim *resourcev1beta2.ResourceClaim
+		celExpression string
+		expectError   bool
+		expectUpdate  bool
+	}{
+		{
+			name: "No device requests",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   true,
+		},
+		{
+			name: "No ExactDeviceRequest",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								// Exactly is nil
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   true,
+		},
+		{
+			name: "CEL expression already set correctly",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+									Selectors: []resourcev1beta2.DeviceSelector{
+										{
+											CEL: &resourcev1beta2.CELDeviceSelector{
+												Expression: `device.attributes["tflops"].quantity >= quantity("10")`,
+											},
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   false,
+			expectUpdate:  false, // No update needed
+		},
+		{
+			name: "Successful CEL expression update - empty selectors",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   false,
+			expectUpdate:  true,
+		},
+		{
+			name: "Successful CEL expression update - nil CEL",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+									Selectors: []resourcev1beta2.DeviceSelector{
+										{
+											// CEL is nil
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["vram"].quantity >= quantity("8Gi")`,
+			expectError:   false,
+			expectUpdate:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithRuntimeObjects(tt.resourceClaim).
+				Build()
+
+			reconciler := &ResourceClaimReconciler{
+				Client: fakeClient,
+				Scheme: scheme,
+			}
+
+			mockPod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{
+						constants.DRACelExpressionAnnotation: tt.celExpression,
+					},
+				},
+			}
+			err := reconciler.updateResourceClaimCEL(tt.resourceClaim, mockPod)
+
+			if tt.expectError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+
+				if tt.expectUpdate {
+					// Verify the CEL expression was set correctly
+					require.Len(t, tt.resourceClaim.Spec.Devices.Requests, 1)
+					deviceReq := tt.resourceClaim.Spec.Devices.Requests[0]
+					require.NotNil(t, deviceReq.Exactly)
+					require.Len(t, deviceReq.Exactly.Selectors, 1)
+					require.NotNil(t, deviceReq.Exactly.Selectors[0].CEL)
+					assert.Equal(t, tt.celExpression, deviceReq.Exactly.Selectors[0].CEL.Expression)
+				}
+			}
+		})
+	}
+}
diff --git a/internal/controller/dra/resourceslice_controller.go b/internal/controller/dra/resourceslice_controller.go
new file mode 100644
index 00000000..fbd03f6f
--- /dev/null
+++ b/internal/controller/dra/resourceslice_controller.go
@@ -0,0 +1,218 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"context"
+	"fmt"
+
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+// ResourceSliceReconciler reconciles ResourceSlice objects based on GPUNode and GPU changes
+type ResourceSliceReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceslices,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpunodes,verbs=get;list;watch
+//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpus,verbs=get;list;watch
+//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools,verbs=get;list;watch
+
+// Reconcile processes GPUNode changes and generates/updates corresponding ResourceSlices
+func (r *ResourceSliceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+	log.Info("Reconciling ResourceSlice for GPUNode", "name", req.Name)
+
+	// Fetch the GPUNode
+	gpuNode := &tfv1.GPUNode{}
+	if err := r.Get(ctx, req.NamespacedName, gpuNode); err != nil {
+		if errors.IsNotFound(err) {
+			// GPUNode was deleted, clean up associated ResourceSlice
+			return r.cleanupResourceSlice(ctx, req.Name)
+		}
+		log.Error(err, "Failed to get GPUNode")
+		return ctrl.Result{}, err
+	}
+
+	// If GPUNode is being deleted, clean up ResourceSlice
+	if !gpuNode.DeletionTimestamp.IsZero() {
+		return r.cleanupResourceSlice(ctx, gpuNode.Name)
+	}
+	// Get all GPUs owned by this node
+	gpuList := &tfv1.GPUList{}
+	if err := r.List(ctx, gpuList, client.MatchingLabels{constants.LabelKeyOwner: gpuNode.Name}); err != nil {
+		log.Error(err, "Failed to list GPUs for node")
+		return ctrl.Result{}, err
+	}
+
+	// Skip if no GPUs discovered yet
+	if len(gpuList.Items) == 0 {
+		log.Info("No GPUs discovered for node yet, skipping ResourceSlice generation")
+		return ctrl.Result{}, nil
+	}
+
+	// Generate/update ResourceSlice for this node
+	if err := r.reconcileResourceSlice(ctx, gpuNode, gpuList.Items); err != nil {
+		log.Error(err, "Failed to reconcile ResourceSlice")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// reconcileResourceSlice creates or updates the ResourceSlice for a GPUNode
+func (r *ResourceSliceReconciler) reconcileResourceSlice(ctx context.Context, gpuNode *tfv1.GPUNode, gpus []tfv1.GPU) error {
+	log := log.FromContext(ctx)
+
+	resourceSliceName := fmt.Sprintf(constants.DRAResourceSliceName, gpuNode.Name)
+	resourceSlice := &resourcev1beta2.ResourceSlice{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: resourceSliceName,
+		},
+	}
+
+	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, resourceSlice, func() error {
+		// Set basic spec fields
+		resourceSlice.Spec.Driver = constants.DRADriverName
+		resourceSlice.Spec.NodeName = &gpuNode.Name
+		resourceSlice.Spec.Pool = resourcev1beta2.ResourcePool{
+			Name:               gpuNode.Labels[constants.GpuPoolKey],
+			Generation:         gpuNode.Generation,
+			ResourceSliceCount: 1,
+		}
+
+		// Generate devices list
+		devices, err := r.generateDevices(ctx, gpus)
+		if err != nil {
+			return fmt.Errorf("failed to generate devices: %w", err)
+		}
+		resourceSlice.Spec.Devices = devices
+
+		// Set labels for easy identification
+		if resourceSlice.Labels == nil {
+			resourceSlice.Labels = make(map[string]string)
+		}
+		resourceSlice.Labels[constants.LabelKeyOwner] = gpuNode.Name
+		resourceSlice.Labels[constants.KubernetesHostNameLabel] = gpuNode.Name
+		return nil
+	})
+
+	if err != nil {
+		return fmt.Errorf("failed to create or update ResourceSlice: %w", err)
+	}
+
+	log.Info("Successfully reconciled ResourceSlice", "resourceSlice", resourceSliceName)
+	return nil
+}
+
+// generateDevices creates the device list for ResourceSlice based on physical GPUs
+func (r *ResourceSliceReconciler) generateDevices(_ context.Context, gpus []tfv1.GPU) ([]resourcev1beta2.Device, error) {
+	devices := make([]resourcev1beta2.Device, 0, len(gpus))
+
+	// Calculate virtual capacities for proportional allocation
+
+	for _, gpu := range gpus {
+		if gpu.Status.Capacity == nil {
+			continue
+		}
+		//TODO extract to constants
+		//TODO quota support
+		poolName := gpu.Labels[constants.GpuPoolKey]
+		device := resourcev1beta2.Device{
+			Name: gpu.Status.UUID,
+			Attributes: map[resourcev1beta2.QualifiedName]resourcev1beta2.DeviceAttribute{
+				"model": {
+					StringValue: &gpu.Status.GPUModel,
+				},
+				"pool_name": {
+					StringValue: &poolName,
+				},
+				"pod_namespace": {
+					StringValue: &gpu.Namespace,
+				},
+			},
+			Capacity: map[resourcev1beta2.QualifiedName]resourcev1beta2.DeviceCapacity{
+				"tflops": {
+					Value: gpu.Status.Capacity.Tflops,
+				},
+				"vram": {
+					Value: gpu.Status.Capacity.Vram,
+				},
+			},
+			AllowMultipleAllocations: func() *bool { b := true; return &b }(),
+		}
+
+		devices = append(devices, device)
+	}
+
+	return devices, nil
+}
+
+// cleanupResourceSlice removes the ResourceSlice associated with a deleted GPUNode
+func (r *ResourceSliceReconciler) cleanupResourceSlice(ctx context.Context, nodeName string) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	resourceSliceName := fmt.Sprintf(constants.DRAResourceSliceName, nodeName)
+	resourceSlice := &resourcev1beta2.ResourceSlice{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: resourceSliceName,
+		},
+	}
+
+	err := r.Delete(ctx, resourceSlice)
+	if err != nil && !errors.IsNotFound(err) {
+		log.Error(err, "Failed to delete ResourceSlice", "name", resourceSliceName)
+		return ctrl.Result{}, err
+	}
+
+	log.Info("Successfully cleaned up ResourceSlice", "name", resourceSliceName)
+	return ctrl.Result{}, nil
+}
+
+// SetupWithManager sets up the controller with the Manager
+func (r *ResourceSliceReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&tfv1.GPUNode{}).
+		Watches(&tfv1.GPU{}, handler.EnqueueRequestsFromMapFunc(
+			func(ctx context.Context, obj client.Object) []reconcile.Request {
+				// Get the owner GPUNode name from GPU labels
+				if labels := obj.GetLabels(); labels != nil {
+					if nodeName, ok := labels[constants.LabelKeyOwner]; ok {
+						return []reconcile.Request{
+							{NamespacedName: types.NamespacedName{Name: nodeName}},
+						}
+					}
+				}
+				return nil
+			})).
+		Complete(r)
+}
diff --git a/internal/controller/gpunode_controller.go b/internal/controller/gpunode_controller.go
index 9035ecdd..ae503f28 100644
--- a/internal/controller/gpunode_controller.go
+++ b/internal/controller/gpunode_controller.go
@@ -47,8 +47,9 @@ import (
 // GPUNodeReconciler reconciles a GPUNode object
 type GPUNodeReconciler struct {
 	client.Client
-	Scheme   *runtime.Scheme
-	Recorder record.EventRecorder
+	Scheme    *runtime.Scheme
+	Recorder  record.EventRecorder
+	Allocator *gpuallocator.GpuAllocator
 }
 
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpunodes,verbs=get;list;watch;create;update;patch;delete
@@ -140,7 +141,7 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 		return ctrl.Result{}, nil
 	}
 
-	hypervisorName, err := r.reconcileHypervisorPod(ctx, node, poolObj)
+	hypervisorName, err := r.reconcileHypervisorPod(ctx, node, poolObj, coreNode)
 	if err != nil {
 		return ctrl.Result{}, err
 	}
@@ -158,7 +159,9 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 	return ctrl.Result{}, err
 }
 
-func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(ctx context.Context, hypervisorName string, node *tfv1.GPUNode, poolObj *tfv1.GPUPool) error {
+func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(
+	ctx context.Context, hypervisorName string, node *tfv1.GPUNode, poolObj *tfv1.GPUPool,
+) error {
 	pod := &corev1.Pod{}
 	fetchErr := r.Get(ctx, client.ObjectKey{Name: hypervisorName, Namespace: utils.CurrentNamespace()}, pod)
 	if fetchErr != nil {
@@ -183,7 +186,7 @@ func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(ctx context.Cont
 
 		return nil
 	} else {
-		gpuModels, err := gpuallocator.RefreshGPUNodeCapacity(ctx, r.Client, node, poolObj)
+		gpuModels, err := gpuallocator.RefreshGPUNodeCapacity(ctx, r.Client, node, poolObj, r.Allocator)
 		if err != nil {
 			return err
 		}
@@ -319,7 +322,12 @@ func (r *GPUNodeReconciler) reconcileNodeDiscoveryJob(
 	return nil
 }
 
-func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tfv1.GPUNode, pool *tfv1.GPUPool) (string, error) {
+func (r *GPUNodeReconciler) reconcileHypervisorPod(
+	ctx context.Context,
+	node *tfv1.GPUNode,
+	pool *tfv1.GPUPool,
+	k8sNode *corev1.Node,
+) (string, error) {
 	log := log.FromContext(ctx)
 
 	if pool.Spec.ComponentConfig == nil || pool.Spec.ComponentConfig.Hypervisor == nil {
@@ -361,7 +369,7 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf
 	}
 
 	log.Info("hypervisor pod not found, creating new one", "node", node.Name)
-	if err := r.createHypervisorPod(ctx, key, node, pool); err != nil {
+	if err := r.createHypervisorPod(ctx, key, node, pool, k8sNode); err != nil {
 		if errors.IsAlreadyExists(err) {
 			log.Info("hypervisor pod already exists, skip creation", "node", node.Name)
 			return "", nil
@@ -372,7 +380,13 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf
 	return key.Name, nil
 }
 
-func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.ObjectKey, node *tfv1.GPUNode, pool *tfv1.GPUPool) error {
+func (r *GPUNodeReconciler) createHypervisorPod(
+	ctx context.Context,
+	key client.ObjectKey,
+	node *tfv1.GPUNode,
+	pool *tfv1.GPUPool,
+	k8sNode *corev1.Node,
+) error {
 	log := log.FromContext(ctx)
 
 	podTmpl := &corev1.PodTemplate{}
@@ -447,7 +461,11 @@ func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.
 	})
 	err = controllerutil.SetControllerReference(node, newPod, r.Scheme)
 	if err != nil {
-		return fmt.Errorf("failed to set controller reference: %w", err)
+		return fmt.Errorf("failed to set controller reference for hypervisor: %w", err)
+	}
+	// also set node owned by k8s node to allow Karpenter to delete the node while hypervisor exists
+	if err := controllerutil.SetOwnerReference(k8sNode, newPod, r.Scheme); err != nil {
+		return fmt.Errorf("failed to set owner reference for hypervisor: %w", err)
 	}
 
 	// create hypervisor pod
diff --git a/internal/controller/gpupool_controller.go b/internal/controller/gpupool_controller.go
index 987eb81b..da8c63aa 100644
--- a/internal/controller/gpupool_controller.go
+++ b/internal/controller/gpupool_controller.go
@@ -30,13 +30,16 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	utils "github.com/NexusGPU/tensor-fusion/internal/utils"
 	"golang.org/x/time/rate"
+	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/equality"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilerrors "k8s.io/apimachinery/pkg/util/errors"
 	"k8s.io/client-go/tools/record"
+	"k8s.io/client-go/util/retry"
 	"k8s.io/client-go/util/workqueue"
+	schedulingcorev1 "k8s.io/component-helpers/scheduling/corev1"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -83,6 +86,9 @@ type GPUPoolReconciler struct {
 // and requeue until current time after that, start provisioning loop
 var provisioningInitializationMinTime = map[string]time.Time{}
 
+// When GPU nodeSelector changed, trigger all node update
+var poolSelectorChangeMap = map[string]string{}
+
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools/status,verbs=get;update;patch
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools/finalizers,verbs=update
@@ -116,6 +122,10 @@ func (r *GPUPoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 		return ctrl.Result{}, nil
 	}
 
+	if err := r.reconcilePoolSelectorChange(ctx, pool); err != nil {
+		return ctrl.Result{}, err
+	}
+
 	if err := r.reconcilePoolCurrentCapacityAndReadiness(ctx, pool); err != nil {
 		return ctrl.Result{}, err
 	}
@@ -404,6 +414,59 @@ func (r *GPUPoolReconciler) reconcilePoolComponents(ctx context.Context, pool *t
 	return ctrlResult, utilerrors.NewAggregate(errs)
 }
 
+func (r *GPUPoolReconciler) reconcilePoolSelectorChange(ctx context.Context, pool *tfv1.GPUPool) error {
+	if pool.Spec.NodeManagerConfig != nil && pool.Spec.NodeManagerConfig.NodeSelector != nil {
+		hash := utils.GetObjectHash(pool.Spec.NodeManagerConfig.NodeSelector)
+		if poolSelectorChangeMap[pool.Name] == hash {
+			return nil
+		}
+
+		// hash has changed, or first reconcile, should check all k8s nodes
+		nodes := &corev1.NodeList{}
+		selectors := utils.GetInitialGPUNodeSelector()
+		if err := r.List(ctx, nodes, client.MatchingLabels{selectors[0]: selectors[1]}); err != nil {
+			return err
+		}
+		for _, node := range nodes.Items {
+			// skip no label or deleting nodes
+			if node.Labels == nil || !node.DeletionTimestamp.IsZero() {
+				continue
+			}
+			matches, err := schedulingcorev1.MatchNodeSelectorTerms(&node, pool.Spec.NodeManagerConfig.NodeSelector)
+			if err != nil {
+				return err
+			}
+			if matches {
+				if err := UpdateK8SNodeSelectorHash(ctx, r.Client, &node, hash); err != nil {
+					return err
+				}
+			}
+		}
+		poolSelectorChangeMap[pool.Name] = hash
+		return nil
+	}
+	return nil
+}
+
+func UpdateK8SNodeSelectorHash(ctx context.Context, k8sClient client.Client, node *corev1.Node, hash string) error {
+	// skip nodes that already injected the hash
+	if node.Labels[constants.LabelNodeSelectorHash] == hash {
+		return nil
+	}
+	// update label to trigger the GPUNode reconcile
+	if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
+		latest := &corev1.Node{}
+		if err := k8sClient.Get(ctx, client.ObjectKey{Name: node.Name}, latest); err != nil {
+			return err
+		}
+		latest.Labels[constants.LabelNodeSelectorHash] = hash
+		return k8sClient.Update(ctx, latest)
+	}); err != nil {
+		return err
+	}
+	return nil
+}
+
 func (r *GPUPoolReconciler) cleanUpPool(ctx context.Context, pool *tfv1.GPUPool) (bool, error) {
 	log := log.FromContext(ctx)
 	log.Info("TensorFusionGPUPool is being deleted", "name", pool.Name)
diff --git a/internal/controller/gpupool_controller_test.go b/internal/controller/gpupool_controller_test.go
index 50b033cd..e3be7a99 100644
--- a/internal/controller/gpupool_controller_test.go
+++ b/internal/controller/gpupool_controller_test.go
@@ -42,6 +42,14 @@ var _ = Describe("GPUPool Controller", func() {
 				pool := tfEnv.GetGPUPool(0)
 				g.Expect(pool.Status.Phase).Should(Equal(tfv1.TensorFusionPoolPhaseRunning))
 			}).Should(Succeed())
+			Eventually(func(g Gomega) {
+				nodeList := tfEnv.GetGPUNodeList(0)
+				for _, gpuNode := range nodeList.Items {
+					node := &corev1.Node{}
+					g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: gpuNode.Name}, node)).Should(Succeed())
+					g.Expect(node.Labels).To(HaveKey(constants.LabelNodeSelectorHash))
+				}
+			}).Should(Succeed())
 			tfEnv.Cleanup()
 		})
 	})
diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go
index 3a9c652d..d8908847 100644
--- a/internal/controller/node_controller.go
+++ b/internal/controller/node_controller.go
@@ -19,8 +19,6 @@ package controller
 import (
 	"context"
 	"fmt"
-	"os"
-	"strings"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
@@ -55,6 +53,8 @@ type NodeReconciler struct {
 // +kubebuilder:rbac:groups=core,resources=nodes/finalizers,verbs=create;get;patch;update
 
 // Reconcile k8s nodes to create and update GPUNode
+//
+//nolint:gocyclo
 func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	log := log.FromContext(ctx)
 	node := &corev1.Node{}
@@ -86,12 +86,15 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
 		return ctrl.Result{}, err
 	}
 	if !matched {
-		// delete gpunode if no matched pool
-		if err := r.Delete(ctx, &tfv1.GPUNode{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: node.Name,
-			},
-		}); err != nil {
+		existingGPUNode := &tfv1.GPUNode{}
+		if err := r.Get(ctx, client.ObjectKey{Name: node.Name}, existingGPUNode); err != nil {
+			if errors.IsNotFound(err) {
+				return ctrl.Result{}, nil
+			}
+			return ctrl.Result{}, fmt.Errorf("can not get gpuNode(%s) : %w", node.Name, err)
+		}
+		// delete existing gpunode if no matched pool
+		if err := r.Delete(ctx, existingGPUNode); err != nil {
 			// requeue if the gpunode is not generated
 			if errors.IsNotFound(err) {
 				return ctrl.Result{}, nil
@@ -121,6 +124,14 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
 		return ctrl.Result{}, nil
 	}
 
+	// update k8s node hash
+	hash := utils.GetObjectHash(pool.Spec.NodeManagerConfig.NodeSelector)
+	if node.Labels[constants.LabelNodeSelectorHash] != hash {
+		if err := UpdateK8SNodeSelectorHash(ctx, r.Client, node, hash); err != nil {
+			return ctrl.Result{}, fmt.Errorf("failed to update k8s node hash: %w", err)
+		}
+	}
+
 	provisioningMode := pool.Spec.NodeManagerConfig.ProvisioningMode
 	isDirectManagedMode := provisioningMode == tfv1.ProvisioningModeProvisioned
 	isManagedNode := isDirectManagedMode || provisioningMode == tfv1.ProvisioningModeKarpenter
@@ -199,11 +210,7 @@ func (r *NodeReconciler) generateGPUNode(node *corev1.Node, pool *tfv1.GPUPool,
 // SetupWithManager sets up the controller with the Manager.
 func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	// must choose an initial label selector to avoid performance impact in large Kubernetes clusters
-	selector := os.Getenv("INITIAL_GPU_NODE_LABEL_SELECTOR")
-	if selector == "" {
-		selector = constants.InitialGPUNodeSelector
-	}
-	selectors := strings.Split(selector, "=")
+	selectors := utils.GetInitialGPUNodeSelector()
 	p, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
 		MatchLabels: map[string]string{
 			selectors[0]: selectors[1],
diff --git a/internal/controller/pod_controller.go b/internal/controller/pod_controller.go
index ab335948..a7bf7c2f 100644
--- a/internal/controller/pod_controller.go
+++ b/internal/controller/pod_controller.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"fmt"
 	"strconv"
+	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
@@ -66,6 +67,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 	if err := r.Get(ctx, req.NamespacedName, pod); err != nil {
 		if errors.IsNotFound(err) {
 			r.Allocator.DeallocByPodIdentifier(ctx, req.NamespacedName)
+			metrics.RemoveWorkerMetrics(req.Name, time.Now())
 			log.Info("Released GPU resources when pod deleted", "pod", req.NamespacedName)
 			return ctrl.Result{}, nil
 		}
@@ -106,8 +108,9 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 	}
 
 	if pod.Labels[constants.LabelComponent] == constants.ComponentWorker {
-		metrics.SetWorkerMetricsByWorkload(pod)
-
+		if pod.DeletionTimestamp.IsZero() {
+			metrics.SetWorkerMetricsByWorkload(pod)
+		}
 		shouldReturn, err := r.handleWorkerPodFinalizer(ctx, pod)
 		if err != nil {
 			return ctrl.Result{}, err
@@ -148,7 +151,8 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 func (r *PodReconciler) handleWorkerPodFinalizer(ctx context.Context, pod *corev1.Pod) (bool, error) {
 	// Handle our GPU resource cleanup finalizer
 	shouldReturn, err := utils.HandleFinalizer(ctx, pod, r.Client, func(ctx context.Context, obj *corev1.Pod) (bool, error) {
-		metrics.RemoveWorkerMetrics(pod.Name, pod.DeletionTimestamp.Time)
+		// if the Pod keep terminating, should update deletion timestamp for raw cost calculation
+		metrics.RemoveWorkerMetrics(pod.Name, time.Now())
 		counter := &v1.TensorFusionPodCounter{Client: r.Client}
 		if err := counter.Decrease(ctx, pod); err != nil {
 			return false, err
diff --git a/internal/controller/pod_controller_test.go b/internal/controller/pod_controller_test.go
index b36f140f..cf53d119 100644
--- a/internal/controller/pod_controller_test.go
+++ b/internal/controller/pod_controller_test.go
@@ -230,9 +230,6 @@ var _ = Describe("Pod Controller", func() {
 				},
 			}
 			_ = k8sClient.Delete(ctx, connection)
-			Eventually(func() error {
-				return k8sClient.Get(ctx, client.ObjectKeyFromObject(connection), connection)
-			}).Should(Satisfy(errors.IsNotFound))
 		})
 
 		It("should successfully create TensorFusion connection for client pod", func() {
diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go
index 388b938f..0ba3228a 100644
--- a/internal/controller/suite_test.go
+++ b/internal/controller/suite_test.go
@@ -180,9 +180,10 @@ var _ = BeforeSuite(func() {
 	Expect(err).ToNot(HaveOccurred())
 
 	err = (&GPUNodeReconciler{
-		Client:   mgr.GetClient(),
-		Scheme:   mgr.GetScheme(),
-		Recorder: mgr.GetEventRecorderFor("GPUNode"),
+		Client:    mgr.GetClient(),
+		Scheme:    mgr.GetScheme(),
+		Recorder:  mgr.GetEventRecorderFor("GPUNode"),
+		Allocator: allocator,
 	}).SetupWithManager(mgr)
 	Expect(err).ToNot(HaveOccurred())
 
diff --git a/internal/controller/tensorfusioncluster_controller.go b/internal/controller/tensorfusioncluster_controller.go
index a2f8ba12..3c64429e 100644
--- a/internal/controller/tensorfusioncluster_controller.go
+++ b/internal/controller/tensorfusioncluster_controller.go
@@ -20,7 +20,6 @@ import (
 	"context"
 	"fmt"
 	"strconv"
-	"strings"
 	"sync"
 
 	"golang.org/x/time/rate"
@@ -43,7 +42,6 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	utils "github.com/NexusGPU/tensor-fusion/internal/utils"
-	corev1 "k8s.io/api/core/v1"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
@@ -305,7 +303,7 @@ func (r *TensorFusionClusterReconciler) reconcileGPUPool(ctx context.Context, tf
 			}
 			err = r.Create(ctx, gpupool)
 			anyPoolChanged = true
-			r.updateMetricsRecorder(ctx, gpupool)
+			r.MetricsRecorder.UpdateMetricsRecorder(gpupool, true)
 			if err != nil {
 				errors = append(errors, fmt.Errorf("failed to create GPUPool %s: %w", key, err))
 				continue
@@ -328,7 +326,7 @@ func (r *TensorFusionClusterReconciler) reconcileGPUPool(ctx context.Context, tf
 				}
 				anyPoolChanged = true
 			}
-			r.updateMetricsRecorder(ctx, existingPool)
+			r.MetricsRecorder.UpdateMetricsRecorder(existingPool, specChanged)
 		}
 	}
 
@@ -382,7 +380,6 @@ func (r *TensorFusionClusterReconciler) checkTFClusterComponentsReady(ctx contex
 		constants.LabelKeyOwner: tfc.GetName(),
 	}))
 	if err != nil {
-		r.Recorder.Eventf(tfc, corev1.EventTypeWarning, "CheckComponentStatusError", err.Error())
 		return false, nil, fmt.Errorf("failed to list GPUPools: %w", err)
 	}
 	if len(pools.Items) != len(tfc.Spec.GPUPools) {
@@ -411,7 +408,6 @@ func (r *TensorFusionClusterReconciler) updateTFClusterStatus(ctx context.Contex
 		}
 	}
 	if err := r.Status().Update(ctx, tfc); err != nil {
-		r.Recorder.Eventf(tfc, corev1.EventTypeWarning, "UpdateClusterStatusError", err.Error())
 		return err
 	}
 	return nil
@@ -443,34 +439,3 @@ func (r *TensorFusionClusterReconciler) SetupWithManager(mgr ctrl.Manager, addLi
 		Owns(&tfv1.GPUPool{}).
 		Complete(r)
 }
-
-// Update metrics recorder's raw billing map
-func (r *TensorFusionClusterReconciler) updateMetricsRecorder(ctx context.Context, pool *tfv1.GPUPool) {
-	const dollarSign = "$"
-	log := log.FromContext(ctx)
-	if pool.Spec.QosConfig == nil {
-		log.Info("QosConfig is nil, skip updating metrics recorder", "pool", pool.Name)
-		return
-	}
-
-	qosConfig := pool.Spec.QosConfig
-	if _, ok := r.MetricsRecorder.WorkerUnitPriceMap[pool.Name]; !ok {
-		r.MetricsRecorder.WorkerUnitPriceMap[pool.Name] = make(map[string]metrics.RawBillingPricing)
-	}
-	pricingDetail := r.MetricsRecorder.WorkerUnitPriceMap[pool.Name]
-	for _, pricing := range qosConfig.Pricing {
-		tflopsPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerFP16TFlopsPerHour, dollarSign), 64)
-		vramPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerGBOfVRAMPerHour, dollarSign), 64)
-		limitOverRequestChargingRatio, _ := strconv.ParseFloat(pricing.LimitsOverRequestsChargingRatio, 64)
-
-		pricingDetail[string(pricing.Qos)] = metrics.RawBillingPricing{
-			TflopsPerSecond: tflopsPerHour / float64(3600),
-			VramPerSecond:   vramPerHour / float64(3600),
-
-			TflopsOverRequestPerSecond: tflopsPerHour / float64(3600) * limitOverRequestChargingRatio,
-			VramOverRequestPerSecond:   vramPerHour / float64(3600) * limitOverRequestChargingRatio,
-		}
-	}
-
-	log.V(5).Info("Updated metrics recorder", "pool", pool.Name, "pricing", pricingDetail)
-}
diff --git a/internal/controller/tensorfusionworkload_controller.go b/internal/controller/tensorfusionworkload_controller.go
index 1ec0d722..bc8ced78 100644
--- a/internal/controller/tensorfusionworkload_controller.go
+++ b/internal/controller/tensorfusionworkload_controller.go
@@ -347,7 +347,6 @@ func (r *TensorFusionWorkloadReconciler) updateStatus(
 		readyCondition.Status = metav1.ConditionFalse
 		readyCondition.Reason = "WorkerFailed"
 		readyCondition.Message = fmt.Sprintf("Failed workers num: %d", failedWorkers)
-		r.Recorder.Eventf(workload, corev1.EventTypeWarning, "WorkerFailed", "Failed workers num: %d", failedWorkers)
 	} else if workload.Spec.IsDynamicReplica() {
 		// for dynamic replicas, if no worker failed, indicate workload is running
 		phase = tfv1.TensorFusionWorkloadPhaseRunning
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index a9369535..18a0d176 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -3,29 +3,533 @@ package cel_filter
 import (
 	"context"
 	"fmt"
-	"time"
+	"reflect"
+	"regexp"
+	"runtime"
+	"strings"
+	"sync"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/google/cel-go/cel"
 	"github.com/google/cel-go/common/types"
+	"github.com/google/cel-go/common/types/ref"
+	"github.com/google/cel-go/common/types/traits"
+	"github.com/google/cel-go/interpreter"
+	"github.com/samber/lo"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
 
+// Parallel processing constants
+const (
+	// Threshold for enabling parallel processing
+	ParallelThreshold = 2000
+	// Default number of worker goroutines
+	DefaultWorkerCount = 4
+)
+
+// Global string pool for GPU Phase values to reduce allocations
+var (
+	gpuPhaseStringPool = sync.OnceValue(func() map[string]types.String {
+		return map[string]types.String{
+			constants.PhaseUnknown:    types.String(constants.PhaseUnknown),
+			constants.PhasePending:    types.String(constants.PhasePending),
+			constants.PhaseUpdating:   types.String(constants.PhaseUpdating),
+			constants.PhaseRunning:    types.String(constants.PhaseRunning),
+			constants.PhaseMigrating:  types.String(constants.PhaseMigrating),
+			constants.PhaseDestroying: types.String(constants.PhaseDestroying),
+		}
+	})
+)
+
+// getPooledPhaseString returns a pooled CEL String for the given phase
+func getPooledPhaseString(phase string) ref.Val {
+	pool := gpuPhaseStringPool()
+	if pooled, exists := pool[phase]; exists {
+		return pooled
+	}
+	// Return error for unexpected phase values
+	return types.NewErr("unknown GPU phase: %s", phase)
+}
+
+// fieldUsage tracks which GPU fields are used in the expression
+type fieldUsage struct {
+	labels       bool
+	annotations  bool
+	available    bool
+	nodeSelector bool
+	runningApps  bool
+}
+
+// FastPathPredicate represents a compiled fast-path predicate function
+type FastPathPredicate func(gpu *tfv1.GPU) bool
+
+// ExpressionPattern represents a recognized expression pattern for fast path
+type ExpressionPattern struct {
+	Pattern   *regexp.Regexp
+	Generator func(matches []string) FastPathPredicate
+}
+
+// ZeroAllocActivation provides zero-allocation variable resolution for CEL
+// This eliminates the need to create map[string]interface{} for each GPU
+type ZeroAllocActivation struct {
+	gpuVal       gpuVal
+	workerPodKey workerPodKeyVal
+	usage        fieldUsage
+}
+
+func (a *ZeroAllocActivation) init(g *tfv1.GPU, k tfv1.NameNamespace, usage fieldUsage) {
+	a.gpuVal.GPU = g
+	a.gpuVal.labels = nil
+	a.gpuVal.annotations = nil
+	a.gpuVal.nodeSelector = nil
+	a.gpuVal.available = nil
+	a.gpuVal.runningApps = nil
+	a.workerPodKey.name = k.Name
+	a.workerPodKey.namespace = k.Namespace
+	a.usage = usage
+}
+
+// ResolveName implements interpreter.Activation interface
+func (a *ZeroAllocActivation) ResolveName(name string) (interface{}, bool) {
+	switch name {
+	case CELVarGPU:
+		return &a.gpuVal, true
+	case CELVarWorkerPodKey:
+		return &a.workerPodKey, true
+	default:
+		return nil, false
+	}
+}
+
+// Parent implements interpreter.Activation interface
+func (a *ZeroAllocActivation) Parent() interpreter.Activation {
+	return nil
+}
+
+type workerPodKeyVal struct {
+	name      string
+	namespace string
+}
+
+func (w *workerPodKeyVal) Type() ref.Type { return types.MapType }
+func (w *workerPodKeyVal) Value() interface{} {
+	return map[string]string{"name": w.name, "namespace": w.namespace}
+}
+func (w *workerPodKeyVal) Equal(other ref.Val) ref.Val { return types.False }
+func (w *workerPodKeyVal) ConvertToNative(t reflect.Type) (interface{}, error) {
+	return map[string]string{"name": w.name, "namespace": w.namespace}, nil
+}
+func (w *workerPodKeyVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (w *workerPodKeyVal) Get(index ref.Val) ref.Val {
+	key, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+	switch key {
+	case GPUFieldName:
+		return types.String(w.name)
+	case GPUFieldNamespace:
+		return types.String(w.namespace)
+	default:
+		return types.String("")
+	}
+}
+func (w *workerPodKeyVal) HasField(field string) bool {
+	return field == GPUFieldName || field == GPUFieldNamespace
+}
+
+type appVal struct {
+	name      string
+	namespace string
+	count     int64
+}
+
+func (a *appVal) Type() ref.Type              { return types.MapType }
+func (a *appVal) Value() interface{}          { return nil }
+func (a *appVal) Equal(other ref.Val) ref.Val { return types.False }
+func (a *appVal) ConvertToNative(t reflect.Type) (interface{}, error) {
+	return map[string]interface{}{
+		"name":      a.name,
+		"namespace": a.namespace,
+		"count":     a.count,
+	}, nil
+}
+func (a *appVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (a *appVal) Get(index ref.Val) ref.Val {
+	key, _ := index.Value().(string)
+	switch key {
+	case "name":
+		return types.String(a.name)
+	case "namespace":
+		return types.String(a.namespace)
+	case "count":
+		return types.Int(a.count)
+	default:
+		return types.String("")
+	}
+}
+func (a *appVal) HasField(field string) bool {
+	return field == "name" || field == "namespace" || field == "count"
+}
+
+type runningAppsVal struct {
+	apps []tfv1.RunningAppDetail
+}
+
+func (r *runningAppsVal) Type() ref.Type              { return types.ListType }
+func (r *runningAppsVal) Value() interface{}          { return r.apps }
+func (r *runningAppsVal) Equal(other ref.Val) ref.Val { return types.False }
+func (r *runningAppsVal) ConvertToNative(t reflect.Type) (interface{}, error) {
+	if t.Kind() == reflect.Slice {
+		out := make([]map[string]interface{}, len(r.apps))
+		for i, a := range r.apps {
+			out[i] = map[string]interface{}{
+				"name":      a.Name,
+				"namespace": a.Namespace,
+				"count":     a.Count,
+			}
+		}
+		return out, nil
+	}
+	return r.apps, nil
+}
+func (r *runningAppsVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (r *runningAppsVal) Get(index ref.Val) ref.Val {
+	i, ok := index.Value().(int)
+	if !ok {
+		if i64, ok2 := index.Value().(int64); ok2 {
+			i = int(i64)
+			ok = true
+		}
+	}
+	if !ok || i < 0 || i >= len(r.apps) {
+		return types.NewErr("index out of range")
+	}
+	app := r.apps[i]
+	return &appVal{name: app.Name, namespace: app.Namespace, count: int64(app.Count)}
+}
+
+func (r *runningAppsVal) Size() ref.Val { return types.Int(len(r.apps)) }
+
+func (r *runningAppsVal) Contains(elem ref.Val) ref.Val {
+	av, ok := elem.(*appVal)
+	if !ok {
+		return types.False
+	}
+	for _, a := range r.apps {
+		if a.Name == av.name && a.Namespace == av.namespace && int64(a.Count) == av.count {
+			return types.True
+		}
+	}
+	return types.False
+}
+func (r *runningAppsVal) Iterator() traits.Iterator {
+	return &runningAppsIterator{apps: r.apps}
+}
+func (r *runningAppsVal) Add(elem ref.Val) ref.Val {
+	return types.NewErr("runningApps list is read-only")
+}
+
+type runningAppsIterator struct {
+	apps []tfv1.RunningAppDetail
+	i    int
+}
+
+func (it *runningAppsIterator) Type() ref.Type              { return types.IteratorType }
+func (it *runningAppsIterator) Value() interface{}          { return nil }
+func (it *runningAppsIterator) Equal(other ref.Val) ref.Val { return types.False }
+func (it *runningAppsIterator) ConvertToNative(t reflect.Type) (interface{}, error) {
+	return nil, fmt.Errorf("iterator cannot convert to native")
+}
+func (it *runningAppsIterator) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (it *runningAppsIterator) HasNext() ref.Val {
+	return types.Bool(it.i < len(it.apps))
+}
+func (it *runningAppsIterator) Next() ref.Val {
+	if it.i >= len(it.apps) {
+		return types.NewErr("iterator past end")
+	}
+	a := it.apps[it.i]
+	it.i++
+	return &appVal{name: a.Name, namespace: a.Namespace, count: int64(a.Count)}
+}
+
+var _ traits.Lister = (*runningAppsVal)(nil)
+var _ traits.Iterator = (*runningAppsIterator)(nil)
+
+// gpuVal implements CEL value interface for GPU objects to eliminate map allocations
+type gpuVal struct {
+	*tfv1.GPU
+	// Cached sub-values to avoid repeated allocations
+	labels       ref.Val
+	annotations  ref.Val
+	nodeSelector ref.Val
+	available    ref.Val
+	runningApps  ref.Val
+}
+
+// Type implements ref.Val interface
+func (v *gpuVal) Type() ref.Type {
+	return types.MapType
+}
+
+// Value implements ref.Val interface
+func (v *gpuVal) Value() interface{} {
+	return v.GPU
+}
+
+// Equal implements ref.Val interface
+func (v *gpuVal) Equal(other ref.Val) ref.Val {
+	if otherGPU, ok := other.(*gpuVal); ok {
+		return types.Bool(v.UID == otherGPU.UID)
+	}
+	return types.False
+}
+
+// ConvertToNative implements ref.Val interface
+func (v *gpuVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
+	return v.GPU, nil
+}
+
+// ConvertToType implements ref.Val interface
+func (v *gpuVal) ConvertToType(typeValue ref.Type) ref.Val {
+	switch typeValue {
+	case types.TypeType:
+		return types.MapType
+	default:
+		return types.NewErr("type conversion error")
+	}
+}
+
+// HasField implements traits.FieldTester interface
+func (v *gpuVal) HasField(field string) bool {
+	switch field {
+	case GPUFieldName, GPUFieldNamespace, GPUFieldGPUModel, GPUFieldUUID,
+		GPUFieldPhase, GPUFieldUsedBy, GPUFieldMessage, GPUFieldLabels,
+		GPUFieldAnnotations, GPUFieldAvailable, GPUFieldNodeSelector, GPUFieldRunningApps:
+		return true
+	default:
+		return false
+	}
+}
+
+// Get implements traits.Indexer interface for field access with lazy caching
+func (v *gpuVal) Get(index ref.Val) ref.Val {
+	field, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+
+	switch field {
+	case GPUFieldName:
+		return types.String(v.Name)
+	case GPUFieldNamespace:
+		return types.String(v.Namespace)
+	case GPUFieldGPUModel:
+		return types.String(v.Status.GPUModel)
+	case GPUFieldUUID:
+		return types.String(v.Status.UUID)
+	case GPUFieldPhase:
+		return getPooledPhaseString(string(v.Status.Phase))
+	case GPUFieldUsedBy:
+		return types.String(string(v.Status.UsedBy))
+	case GPUFieldMessage:
+		return types.String(v.Status.Message)
+	case GPUFieldLabels:
+		// Lazy initialization with caching
+		if v.labels == nil {
+			v.labels = &labelsVal{labels: v.Labels}
+		}
+		return v.labels
+	case GPUFieldAnnotations:
+		// Lazy initialization with caching
+		if v.annotations == nil {
+			v.annotations = &labelsVal{labels: v.Annotations}
+		}
+		return v.annotations
+	case GPUFieldAvailable:
+		// Lazy initialization with caching
+		if v.available == nil {
+			v.available = &availableVal{available: v.Status.Available}
+		}
+		return v.available
+	case GPUFieldNodeSelector:
+		// Lazy initialization with caching
+		if v.nodeSelector == nil {
+			v.nodeSelector = &labelsVal{labels: v.Status.NodeSelector}
+		}
+		return v.nodeSelector
+	case GPUFieldRunningApps:
+		// For now, keep simple implementation - can optimize later if needed
+		if v.runningApps == nil {
+			apps := make([]tfv1.RunningAppDetail, len(v.Status.RunningApps))
+			for i, app := range v.Status.RunningApps {
+				apps[i] = *app
+			}
+			v.runningApps = &runningAppsVal{apps: apps}
+		}
+		return v.runningApps
+	default:
+		return types.NewErr("no such field: %s", field)
+	}
+}
+
+// availableVal provides direct access to GPU available resources without maps
+type availableVal struct {
+	available *tfv1.Resource
+}
+
+// Type implements ref.Val interface
+func (v *availableVal) Type() ref.Type {
+	return types.MapType
+}
+
+// Value implements ref.Val interface
+func (v *availableVal) Value() interface{} {
+	return v.available
+}
+
+// Equal implements ref.Val interface
+func (v *availableVal) Equal(other ref.Val) ref.Val {
+	return types.False // Not used in comparisons
+}
+
+// ConvertToNative implements ref.Val interface
+func (v *availableVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
+	return v.available, nil
+}
+
+// ConvertToType implements ref.Val interface
+func (v *availableVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+
+// Get implements field access for available resources
+func (v *availableVal) Get(index ref.Val) ref.Val {
+	field, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+
+	if v.available == nil {
+		switch field {
+		case ResourceFieldTFlops:
+			return types.Double(0.0)
+		case ResourceFieldVRAM:
+			return types.Double(0.0)
+		default:
+			return types.NewErr("no such field: %s", field)
+		}
+	}
+
+	switch field {
+	case ResourceFieldTFlops:
+		return types.Double(v.available.Tflops.AsApproximateFloat64())
+	case ResourceFieldVRAM:
+		return types.Double(float64(v.available.Vram.Value()))
+	default:
+		return types.NewErr("no such field: %s", field)
+	}
+}
+
+// HasField implements field testing
+func (v *availableVal) HasField(field string) bool {
+	return field == ResourceFieldTFlops || field == ResourceFieldVRAM
+}
+
+// labelsVal provides direct access to GPU labels without copying
+type labelsVal struct {
+	labels map[string]string
+}
+
+// Type implements ref.Val interface
+func (v *labelsVal) Type() ref.Type {
+	return types.MapType
+}
+
+// Value implements ref.Val interface
+func (v *labelsVal) Value() interface{} {
+	return v.labels
+}
+
+// Equal implements ref.Val interface
+func (v *labelsVal) Equal(other ref.Val) ref.Val {
+	return types.False // Not used in comparisons
+}
+
+// ConvertToNative implements ref.Val interface
+func (v *labelsVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
+	return v.labels, nil
+}
+
+// ConvertToType implements ref.Val interface
+func (v *labelsVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+
+// Get implements map access for labels
+func (v *labelsVal) Get(index ref.Val) ref.Val {
+	key, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+
+	if v.labels == nil {
+		return types.String("")
+	}
+
+	value, exists := v.labels[key]
+	if !exists {
+		return types.String("")
+	}
+	return types.String(value)
+}
+
 // AllocRequestCELFilter converts AllocRequest to CEL filter and executes it
 type CELFilter struct {
-	cache      *ExpressionCache
-	expression string
-	name       string
+	cache *ExpressionCache
+	name  string
+	// Store early filtering criteria for optimization
+	requiredPhases   []tfv1.TensorFusionGPUPhase
+	requiredGPUModel string
+	userExpression   string
+	// Track which fields are actually used
+	usage fieldUsage
+	// Display expression for logging (read-only)
+	displayExpression string
 }
 
 // NewAllocRequestCELFilter creates a new CEL filter from allocation request
 func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, error) {
-	// Convert AllocRequest to CEL expression
-	expression, err := convertAllocRequestToCEL(req)
-	if err != nil {
-		return nil, fmt.Errorf("failed to convert AllocRequest to CEL: %w", err)
+	// Extract early filtering criteria
+	var requiredPhases []tfv1.TensorFusionGPUPhase
+	var requiredGPUModel, userExpression, displayExpression string
+
+	if req != nil {
+		requiredPhases = []tfv1.TensorFusionGPUPhase{
+			tfv1.TensorFusionGPUPhaseRunning,
+			tfv1.TensorFusionGPUPhasePending,
+		}
+		requiredGPUModel = req.GPUModel
+		userExpression = req.CELFilterExpression
+
+		// Build display expression for logging (not used for execution)
+		displayExpression = buildDisplayExpression(req)
 	}
 
+	// Analyze field usage in user expression only
+	usage := analyzeFieldUsage(userExpression)
+
 	// Handle nil request case
 	name := "AllocRequest-unknown"
 	if req != nil {
@@ -33,9 +537,13 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e
 	}
 
 	return &CELFilter{
-		cache:      cache,
-		expression: expression,
-		name:       name,
+		cache:             cache,
+		name:              name,
+		requiredPhases:    requiredPhases,
+		requiredGPUModel:  requiredGPUModel,
+		userExpression:    userExpression,
+		usage:             usage,
+		displayExpression: displayExpression,
 	}, nil
 }
 
@@ -51,84 +559,76 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 		return gpus, nil
 	}
 
-	if f.expression == "" {
-		// If no expression, return all GPUs (no filtering needed)
-		return gpus, nil
-	}
-
-	// Get compiled program from cache
-	program, err := f.cache.GetOrCompileProgram(f.expression)
-	if err != nil {
-		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.expression, err)
-	}
-
+	// Pre-allocate result slice with estimated capacity for early filtering
 	var filteredGPUs []*tfv1.GPU
 
+	// Early filtering phase: apply basic filters first to reduce CEL evaluation overhead
+	earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
 	for _, gpu := range gpus {
-		// Create timeout context for CEL evaluation
-		evalCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
-
-		// Create variables for CEL evaluation
-		vars := createCELVariables(*gpu, workerPodKey)
-
-		// Evaluate with timeout
-		resultChan := make(chan evalResult, 1)
-		go func() {
-			result, _, evalErr := program.Eval(vars)
-			resultChan <- evalResult{result: result, err: evalErr}
-		}()
-
-		select {
-		case evalRes := <-resultChan:
-			cancel()
-			if evalRes.err != nil {
-				log.Error(evalRes.err, "CEL expression evaluation failed",
-					"expression", f.expression,
-					"gpu", gpu.Name,
-					"workerPodKey", workerPodKey)
-				// On error, exclude the GPU (fail-safe)
-				continue
-			}
+		// when running progressive migration mode, only return GPUs used by tensor-fusion
+		if utils.IsProgressiveMigration() && gpu.Status.UsedBy != tfv1.UsedByTensorFusion {
+			continue
+		}
+		// Fast path: check phase first (most common filter)
+		if f.requiredPhases != nil && !lo.Contains(f.requiredPhases, gpu.Status.Phase) {
+			continue
+		}
 
-			// Convert result to boolean
-			if boolResult, ok := evalRes.result.(types.Bool); ok {
-				if bool(boolResult) {
-					filteredGPUs = append(filteredGPUs, gpu)
-				}
-			} else {
-				log.Error(nil, "CEL expression did not return boolean",
-					"expression", f.expression,
-					"result", evalRes.result,
-					"gpu", gpu.Name)
-				// On non-boolean result, exclude the GPU (fail-safe)
-				continue
-			}
-		case <-evalCtx.Done():
-			cancel()
-			// Timeout - skip this GPU (fail-safe behavior)
-			log.V(1).Info("CEL evaluation timeout", "gpu", gpu.Name, "expression", f.expression)
+		// Fast path: check GPU model (second most common filter)
+		if f.requiredGPUModel != "" && gpu.Status.GPUModel != f.requiredGPUModel {
 			continue
 		}
+
+		earlyFilteredGPUs = append(earlyFilteredGPUs, gpu)
+	}
+
+	// If no user expression, return early filtered results
+	if f.userExpression == "" {
+		log.V(1).Info("CEL filter applied (early filtering only)",
+			"filter", f.name,
+			"inputGPUs", len(gpus),
+			"earlyFilteredGPUs", len(earlyFilteredGPUs),
+			"outputGPUs", len(earlyFilteredGPUs))
+		return earlyFilteredGPUs, nil
+	}
+
+	// If no GPUs passed early filtering, return empty result
+	if len(earlyFilteredGPUs) == 0 {
+		return earlyFilteredGPUs, nil
+	}
+
+	// Get compiled program from cache for user expression
+	program, err := f.cache.GetOrCompileProgram(f.userExpression)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.userExpression, err)
+	}
+
+	// Use fast path if available, otherwise fall back to CEL
+
+	// Fallback to CEL evaluation for complex expressions
+	if len(earlyFilteredGPUs) >= ParallelThreshold {
+		// Use parallel evaluation for large GPU sets
+		filteredGPUs = f.filterFallbackParallel(program, earlyFilteredGPUs, workerPodKey)
+	} else {
+		// Sequential evaluation for smaller sets
+		filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey)
 	}
 
-	log.V(1).Info("AllocRequest CEL filter applied",
+	log.V(1).Info("CEL filter applied (CEL evaluation)",
 		"filter", f.name,
-		"expression", f.expression,
+		"displayExpression", f.displayExpression,
+		"userExpression", f.userExpression,
 		"inputGPUs", len(gpus),
+		"earlyFilteredGPUs", len(earlyFilteredGPUs),
 		"outputGPUs", len(filteredGPUs))
 
 	return filteredGPUs, nil
 }
 
-type evalResult struct {
-	result interface{}
-	err    error
-}
-
-// convertAllocRequestToCEL converts an allocation request to a CEL expression
-func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
+// buildDisplayExpression creates a readable expression string for logging purposes only
+func buildDisplayExpression(req *tfv1.AllocRequest) string {
 	if req == nil {
-		return "", nil
+		return ""
 	}
 
 	var conditions []string
@@ -138,30 +638,24 @@ func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
 		conditions = append(conditions, req.CELFilterExpression)
 	}
 
-	// Add GPU phase condition (must be Ready)
-	conditions = append(conditions, "gpu.phase == 'Ready'")
-
-	// Add GPU model filter if specified
-	if req.GPUModel != "" {
-		conditions = append(conditions, fmt.Sprintf("gpu.gpuModel == '%s'", req.GPUModel))
-	}
-
-	// If no conditions, return empty expression (no filtering)
+	// If no conditions, return empty expression
 	if len(conditions) == 0 {
-		return "", nil
+		return ""
 	}
 
-	// Combine all conditions with AND
+	// Combine all conditions with AND using strings.Builder for efficiency
 	if len(conditions) == 1 {
-		return conditions[0], nil
+		return conditions[0]
 	}
 
-	expression := conditions[0]
+	var builder strings.Builder
+	builder.WriteString(conditions[0])
 	for i := 1; i < len(conditions); i++ {
-		expression += " && " + conditions[i]
+		builder.WriteString(" && ")
+		builder.WriteString(conditions[i])
 	}
 
-	return expression, nil
+	return builder.String()
 }
 
 // createCELEnvironment creates a CEL environment with GPU-related variables and functions
@@ -171,58 +665,135 @@ func createCELEnvironment() (*cel.Env, error) {
 		cel.Variable(CELVarGPU, cel.MapType(cel.StringType, cel.DynType)),
 		// Define worker pod key
 		cel.Variable(CELVarWorkerPodKey, cel.MapType(cel.StringType, cel.StringType)),
-		// Define request object structure
-		cel.Variable(CELVarRequest, cel.MapType(cel.StringType, cel.DynType)),
 	)
 }
 
-// createCELVariables creates variables for CEL evaluation from GPU and request information
-func createCELVariables(gpu tfv1.GPU, workerPodKey tfv1.NameNamespace) map[string]interface{} {
-	// Convert GPU to a map for CEL evaluation
-	gpuMap := map[string]interface{}{
-		GPUFieldName:        gpu.Name,
-		GPUFieldNamespace:   gpu.Namespace,
-		GPUFieldGPUModel:    gpu.Status.GPUModel,
-		GPUFieldUUID:        gpu.Status.UUID,
-		GPUFieldPhase:       string(gpu.Status.Phase),
-		GPUFieldUsedBy:      string(gpu.Status.UsedBy),
-		GPUFieldMessage:     gpu.Status.Message,
-		GPUFieldLabels:      gpu.Labels,
-		GPUFieldAnnotations: gpu.Annotations,
-	}
+// filterFallbackSequential performs sequential CEL evaluation for smaller GPU sets
+func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
+	filteredGPUs := make([]*tfv1.GPU, 0, len(gpus)/2)
+	log := log.FromContext(ctx)
+	var activation ZeroAllocActivation
+	for i, gpu := range gpus {
+		// Periodic context check every 64 GPUs for very large sets
+		if i&63 == 0 {
+			select {
+			case <-ctx.Done():
+				log.V(1).Info("CEL evaluation cancelled", "processedGPUs", len(filteredGPUs), "totalGPUs", len(gpus))
+				return filteredGPUs
+			default:
+			}
+		}
 
-	// Add available information if available
-	if gpu.Status.Available != nil {
-		gpuMap[GPUFieldAvailable] = map[string]interface{}{
-			ResourceFieldTFlops: gpu.Status.Available.Tflops.AsApproximateFloat64(),
-			ResourceFieldVRAM:   gpu.Status.Available.Vram.AsApproximateFloat64(),
+		// Use zero-allocation activation instead of maps
+		activation.init(gpu, workerPodKey, f.usage)
+
+		// Direct synchronous evaluation with custom activation
+		result, _, evalErr := program.Eval(&activation)
+
+		if evalErr != nil {
+			log.Error(evalErr, "CEL expression evaluation failed",
+				"expression", f.userExpression,
+				"gpu", gpu.Name,
+				"workerPodKey", workerPodKey)
+			// On error, exclude the GPU (fail-safe)
+			continue
+		}
+
+		// Convert result to boolean
+		if boolResult, ok := result.(types.Bool); ok && bool(boolResult) {
+			filteredGPUs = append(filteredGPUs, gpu)
+		} else {
+			log.Error(nil, "CEL expression did not return boolean",
+				"expression", f.userExpression,
+				"result", result,
+				"gpu", gpu.Name)
+			// On non-boolean result, exclude the GPU (fail-safe)
+			continue
 		}
 	}
 
-	// Add node selector information
-	if gpu.Status.NodeSelector != nil {
-		gpuMap[GPUFieldNodeSelector] = gpu.Status.NodeSelector
+	return filteredGPUs
+}
+
+// filterFallbackParallel performs parallel CEL evaluation for large GPU sets
+func (f *CELFilter) filterFallbackParallel(program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
+	numGPUs := len(gpus)
+	numWorkers := runtime.NumCPU()
+	if numWorkers > DefaultWorkerCount {
+		numWorkers = DefaultWorkerCount
 	}
 
-	// Add running apps information (always set, even if empty)
-	runningApps := make([]map[string]interface{}, len(gpu.Status.RunningApps))
-	for i, app := range gpu.Status.RunningApps {
-		runningApps[i] = map[string]interface{}{
-			AppFieldName:      app.Name,
-			AppFieldNamespace: app.Namespace,
-			AppFieldCount:     app.Count,
+	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
+	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
+	var activation ZeroAllocActivation
+	// Create workers
+	for i := 0; i < numWorkers; i++ {
+		start := i * chunkSize
+		end := start + chunkSize
+		if end > numGPUs {
+			end = numGPUs
+		}
+
+		if start >= end {
+			// No work for this worker
+			ch := make(chan []*tfv1.GPU, 1)
+			ch <- []*tfv1.GPU{}
+			close(ch)
+			resultChannels[i] = ch
+			continue
 		}
+
+		chunk := gpus[start:end]
+		resultCh := make(chan []*tfv1.GPU, 1)
+		resultChannels[i] = resultCh
+
+		// Start worker goroutine
+		go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) {
+			defer close(resultCh)
+
+			filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate
+
+			for _, gpu := range gpuChunk {
+				// Use zero-allocation activation
+				activation.init(gpu, workerPodKey, f.usage)
+
+				// Direct synchronous evaluation
+				result, _, evalErr := program.Eval(&activation)
+				if evalErr != nil {
+					// On error, exclude the GPU (fail-safe)
+					continue
+				}
+
+				// Convert result to boolean
+				if boolResult, ok := result.(types.Bool); ok && bool(boolResult) {
+					filtered = append(filtered, gpu)
+				}
+				// On non-boolean result, exclude the GPU (fail-safe)
+			}
+			resultCh <- filtered
+		}(chunk, resultCh)
 	}
-	gpuMap[GPUFieldRunningApps] = runningApps
 
-	// Worker pod key information
-	workerPodKeyMap := map[string]string{
-		PodKeyFieldName:      workerPodKey.Name,
-		PodKeyFieldNamespace: workerPodKey.Namespace,
+	// Collect results
+	var totalFiltered []*tfv1.GPU
+	for _, ch := range resultChannels {
+		chunkResults := <-ch
+		totalFiltered = append(totalFiltered, chunkResults...)
 	}
 
-	return map[string]interface{}{
-		CELVarGPU:          gpuMap,
-		CELVarWorkerPodKey: workerPodKeyMap,
+	return totalFiltered
+}
+
+// analyzeFieldUsage performs simple heuristic analysis of which fields are used in the expression
+func analyzeFieldUsage(expression string) fieldUsage {
+	if expression == "" {
+		return fieldUsage{}
+	}
+	return fieldUsage{
+		labels:       strings.Contains(expression, "labels"),
+		annotations:  strings.Contains(expression, "annotations"),
+		available:    strings.Contains(expression, "available") || strings.Contains(expression, "tflops") || strings.Contains(expression, "vram"),
+		nodeSelector: strings.Contains(expression, "nodeSelector"),
+		runningApps:  strings.Contains(expression, "runningApps"),
 	}
 }
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
index 8894db07..254baf7c 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -7,31 +7,41 @@ import (
 	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
+	dracel "k8s.io/dynamic-resource-allocation/cel"
+)
+
+// Test constants for repeated strings
+const (
+	testEnvironmentProduction = "production"
+	testTierHighPerformance   = "high-performance"
+	testPriorityCritical      = "critical"
 )
 
 // Benchmark performance of the CEL filter compared to the original filter
 func BenchmarkFilterPerformance(b *testing.B) {
 	// Create test data
-	const numGPUs = 1000
+	const numGPUs = 1000000
 	gpus := make([]*tfv1.GPU, numGPUs)
 	for i := 0; i < numGPUs; i++ {
 		gpuModel := "A100"
-		if i%3 == 0 {
+		switch i % 3 {
+		case 0:
 			gpuModel = "V100"
-		} else if i%3 == 1 {
+		case 1:
 			gpuModel = "H100"
 		}
 
-		phase := "Ready"
+		phase := constants.PhaseRunning
 		if i%10 == 0 {
-			phase = "Pending"
+			phase = constants.PhasePending
 		}
 
-		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", gpuModel, phase, 150.0, 40.0)
-		gpu.Labels["environment"] = "production"
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), gpuModel, phase, 150.0, 40.0)
+		gpu.Labels["environment"] = testEnvironmentProduction
 		if i%2 == 0 {
-			gpu.Labels["tier"] = "high-performance"
+			gpu.Labels["tier"] = testTierHighPerformance
 		}
 		gpus[i] = gpu
 	}
@@ -43,7 +53,7 @@ func BenchmarkFilterPerformance(b *testing.B) {
 	b.Run("OriginalFilters", func(b *testing.B) {
 		// Import the original filter package
 		registry := filter.NewFilterRegistry().With(
-			filter.NewPhaseFilter("Ready"),
+			filter.NewPhaseFilter(constants.PhaseRunning),
 			filter.NewGPUModelFilter("A100"),
 		)
 
@@ -59,7 +69,7 @@ func BenchmarkFilterPerformance(b *testing.B) {
 
 	// Benchmark CEL filter - basic filtering
 	b.Run("CELFilter_Basic", func(b *testing.B) {
-		request := createTestAllocRequest("default", "test-workload", "A100", "")
+		request := createTestAllocRequest("A100", "")
 		cache, err := NewExpressionCache(100, 5*time.Minute)
 		if err != nil {
 			b.Fatal(err)
@@ -82,7 +92,7 @@ func BenchmarkFilterPerformance(b *testing.B) {
 
 	// Benchmark CEL filter - complex expression
 	b.Run("CELFilter_Complex", func(b *testing.B) {
-		request := createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'")
+		request := createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '"+testEnvironmentProduction+"'")
 		cache, err := NewExpressionCache(100, 5*time.Minute)
 		if err != nil {
 			b.Fatal(err)
@@ -114,15 +124,16 @@ func BenchmarkFilterPerformance(b *testing.B) {
 			"gpu.gpuModel == 'A100' && gpu.available.tflops > 100.0",
 			"gpu.gpuModel == 'V100' && gpu.available.tflops > 80.0",
 			"gpu.gpuModel == 'H100' && gpu.available.tflops > 180.0",
-			"gpu.labels['environment'] == 'production'",
-			"gpu.labels['tier'] == 'high-performance'",
+
+			"gpu.labels['environment'] == '" + testEnvironmentProduction + "'",
+			"gpu.labels['tier'] == '" + testTierHighPerformance + "'",
 			"gpu.available.vram > 30000000000",
 		}
 
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
 			expression := expressions[i%len(expressions)]
-			request := createTestAllocRequest("default", "test-workload", "", expression)
+			request := createTestAllocRequest("", expression)
 
 			celFilter, err := NewCELFilter(request, cache)
 			if err != nil {
@@ -137,6 +148,46 @@ func BenchmarkFilterPerformance(b *testing.B) {
 		}
 	})
 
+	// Benchmark DRA CEL filter - basic filtering
+	b.Run("DRACELFilter_Basic", func(b *testing.B) {
+		request := createTestAllocRequest("A100", "")
+		cache := dracel.NewCache(100, dracel.Features{})
+
+		draFilter, err := NewDRACELFilter(request, cache)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, err := draFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
+	// Benchmark DRA CEL filter - complex expression
+	b.Run("DRACELFilter_Complex", func(b *testing.B) {
+		request := createTestAllocRequest("", "device.attributes['model'].string == 'A100' && device.attributes['label.environment'].string == '"+testEnvironmentProduction+"'")
+		cache := dracel.NewCache(100, dracel.Features{})
+
+		draFilter, err := NewDRACELFilter(request, cache)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, err := draFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
 	// Print performance comparison report after benchmarks
 	printPerformanceComparison(b)
 }
@@ -148,7 +199,7 @@ func BenchmarkCachePerformance(b *testing.B) {
 		b.Fatal(err)
 	}
 
-	expression := "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0"
+	expression := "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0"
 
 	b.Run("CacheHit", func(b *testing.B) {
 		// Pre-warm cache
@@ -169,7 +220,7 @@ func BenchmarkCachePerformance(b *testing.B) {
 	b.Run("CacheMiss", func(b *testing.B) {
 		expressions := make([]string, b.N)
 		for i := 0; i < b.N; i++ {
-			expressions[i] = fmt.Sprintf("gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50)
+			expressions[i] = fmt.Sprintf("gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50)
 		}
 
 		b.ResetTimer()
@@ -187,10 +238,10 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 	const numGPUs = 100
 	gpus := make([]*tfv1.GPU, numGPUs)
 	for i := 0; i < numGPUs; i++ {
-		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", "A100", "Ready", 150.0, 40.0)
-		gpu.Labels["environment"] = "production"
-		gpu.Labels["tier"] = "high-performance"
-		gpu.Annotations["priority"] = "critical"
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "A100", constants.PhaseRunning, 150.0, 40.0)
+		gpu.Labels["environment"] = testEnvironmentProduction
+		gpu.Labels["tier"] = testTierHighPerformance
+		gpu.Annotations["priority"] = testPriorityCritical
 		gpus[i] = gpu
 	}
 
@@ -203,23 +254,23 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 	}{
 		{
 			name:       "Simple",
-			expression: "gpu.phase == 'Ready'",
+			expression: "gpu.phase == 'Running'",
 		},
 		{
 			name:       "Medium",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100'",
 		},
 		{
 			name:       "Complex",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0",
 		},
 		{
 			name:       "VeryComplex",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '" + testEnvironmentProduction + "'",
 		},
 		{
 			name:       "UltraComplex",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production' && gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '" + testEnvironmentProduction + "' && gpu.labels['tier'] == '" + testTierHighPerformance + "' && gpu.annotations['priority'] == '" + testPriorityCritical + "'",
 		},
 	}
 
@@ -230,7 +281,7 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 				b.Fatal(err)
 			}
 
-			request := createTestAllocRequest("default", "test-workload", "", tc.expression)
+			request := createTestAllocRequest("", tc.expression)
 			celFilter, err := NewCELFilter(request, cache)
 			if err != nil {
 				b.Fatal(err)
@@ -254,7 +305,7 @@ func printPerformanceComparison(b *testing.B) {
 === GPU Filter Performance Comparison ===
 
 Test Environment:
-- Number of GPUs: 1000
+- Number of GPUs: 1000000
 - GPU Models: A100 (33%%), V100 (33%%), H100 (33%%)
 - GPU Phases: Ready (90%%), Pending (10%%)
 
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
index c21e4ee8..72481ee9 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
@@ -6,18 +6,25 @@ import (
 	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
+// Test constants for repeated strings used only in cel_filter_test.go
+const (
+	testEnvProduction = "production"
+	testPriorCritical = "critical"
+)
+
 // Helper functions for creating test data
-func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64) *tfv1.GPU {
+func createTestGPU(name, gpuModel, phase string, tflops, vram float64) *tfv1.GPU {
 	gpu := &tfv1.GPU{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:        name,
-			Namespace:   namespace,
+			Namespace:   "default",
 			Labels:      make(map[string]string),
 			Annotations: make(map[string]string),
 		},
@@ -40,11 +47,11 @@ func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64
 	return gpu
 }
 
-func createTestAllocRequest(namespace, name, gpuModel, celExpression string) *tfv1.AllocRequest {
+func createTestAllocRequest(gpuModel, celExpression string) *tfv1.AllocRequest {
 	return &tfv1.AllocRequest{
 		WorkloadNameNamespace: tfv1.NameNamespace{
-			Name:      name,
-			Namespace: namespace,
+			Name:      "test-workload",
+			Namespace: "default",
 		},
 		GPUModel:            gpuModel,
 		CELFilterExpression: celExpression,
@@ -65,71 +72,71 @@ func TestCELFilter_NormalCases(t *testing.T) {
 	}{
 		{
 			name:    "filter by GPU model",
-			request: createTestAllocRequest("default", "test-workload", "A100", ""),
+			request: createTestAllocRequest("A100", ""),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
-				createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
 			},
 			expectedCount: 2,
 			description:   "Should filter GPUs matching the specified model A100",
 		},
 		{
 			name:    "filter by GPU phase only",
-			request: createTestAllocRequest("default", "test-workload", "", ""),
+			request: createTestAllocRequest("", ""),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "A100", "Pending", 150.0, 40.0),
-				createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-4", "default", "A100", "Failed", 150.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "A100", constants.PhasePending, 150.0, 40.0),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-4", "A100", constants.PhaseFailed, 150.0, 40.0),
 			},
-			expectedCount: 2,
-			description:   "Should only return GPUs in Ready phase",
+			expectedCount: 3,
+			description:   "Should return GPUs in Running and Pending phases",
 		},
 		{
 			name:    "custom CEL expression - filter by available TFLOPS",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.tflops > 120.0"),
+			request: createTestAllocRequest("", "gpu.available.tflops > 120.0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
-				createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0),
+				createTestGPU("gpu-3", "H100", constants.PhaseRunning, 200.0, 80.0),
 			},
 			expectedCount: 2,
-			description:   "Should filter GPUs with TFLOPS > 120 and Ready phase",
+			description:   "Should filter GPUs with TFLOPS > 120 and Running/Pending phase",
 		},
 		{
 			name:    "custom CEL expression - filter by available VRAM",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.vram > 35000000000"), // > 35GB in bytes
+			request: createTestAllocRequest("", "gpu.available.vram > 35000000000"), // > 35GB in bytes
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), // 40GB
-				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), // 32GB
-				createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0), // 80GB
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), // 40GB
+				createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0), // 32GB
+				createTestGPU("gpu-3", "H100", constants.PhaseRunning, 200.0, 80.0), // 80GB
 			},
 			expectedCount: 2,
-			description:   "Should filter GPUs with VRAM > 35GB and Ready phase",
+			description:   "Should filter GPUs with VRAM > 35GB and Running/Pending phase",
 		},
 		{
 			name:    "combined model and custom CEL expression",
-			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0"),
+			request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0),
-				createTestGPU("gpu-3", "default", "V100", "Ready", 160.0, 32.0),
-				createTestGPU("gpu-4", "default", "A100", "Ready", 180.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "A100", constants.PhaseRunning, 120.0, 40.0),
+				createTestGPU("gpu-3", "V100", constants.PhaseRunning, 160.0, 32.0),
+				createTestGPU("gpu-4", "A100", constants.PhaseRunning, 180.0, 40.0),
 			},
 			expectedCount: 2,
-			description:   "Should filter A100 GPUs with TFLOPS >= 150 and Ready phase",
+			description:   "Should filter A100 GPUs with TFLOPS >= 150 and Running/Pending phase",
 		},
 		{
 			name:    "filter by labels",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['environment'] == 'production'"),
+			request: createTestAllocRequest("", "gpu.labels['environment'] == '"+testEnvProduction+"'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
-				gpu1.Labels["environment"] = "production"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1.Labels["environment"] = testEnvProduction
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Labels["environment"] = "development"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
-				gpu3.Labels["environment"] = "production"
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu3.Labels["environment"] = testEnvProduction
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
 			expectedCount: 2,
@@ -137,14 +144,14 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:    "filter by annotations",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.annotations['priority'] == 'critical'"),
+			request: createTestAllocRequest("", "gpu.annotations['priority'] == '"+testPriorCritical+"'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
-				gpu1.Annotations["priority"] = "critical"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1.Annotations["priority"] = testPriorCritical
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Annotations["priority"] = "low"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
-				gpu3.Annotations["priority"] = "critical"
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu3.Annotations["priority"] = testPriorCritical
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
 			expectedCount: 2,
@@ -152,15 +159,15 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:    "combined labels and annotations filter",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"),
+			request: createTestAllocRequest("", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu1.Labels["tier"] = "high-performance"
-				gpu1.Annotations["priority"] = "critical"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1.Annotations["priority"] = testPriorCritical
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Labels["tier"] = "standard"
 				gpu2.Annotations["priority"] = "critical"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu3.Labels["tier"] = "high-performance"
 				gpu3.Annotations["priority"] = "low"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
@@ -170,25 +177,139 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:          "empty GPU list",
-			request:       createTestAllocRequest("default", "test-workload", "A100", ""),
+			request:       createTestAllocRequest("A100", ""),
 			gpus:          []*tfv1.GPU{},
 			expectedCount: 0,
 			description:   "Should handle empty GPU list gracefully",
 		},
 		{
 			name:    "complex combined expression with model, resources, and metadata",
-			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"),
+			request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 180.0, 40.0)
-				gpu1.Labels["environment"] = "production"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 180.0, 40.0)
+				gpu1.Labels["environment"] = testEnvProduction
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 120.0, 40.0)
 				gpu2.Labels["environment"] = "production"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 200.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 200.0, 40.0)
 				gpu3.Labels["environment"] = "development"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
 			expectedCount: 1,
-			description:   "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Ready phase",
+			description:   "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Running/Pending phase",
+		},
+		{
+			name:    "filter by running apps - no running apps",
+			request: createTestAllocRequest("", "size(gpu.runningApps) == 0"),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "app1", Namespace: "default", Count: 1},
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
+			},
+			expectedCount: 2,
+			description:   "Should return GPUs with no running apps",
+		},
+		{
+			name:    "filter by running apps - has specific app",
+			request: createTestAllocRequest("", "gpu.runningApps.exists(app, app.name == 'training-job' && app.namespace == 'ml-team')"),
+			gpus: []*tfv1.GPU{
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "training-job", Namespace: "ml-team", Count: 2},
+						{Name: "other-job", Namespace: "default", Count: 1},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "other-job", Namespace: "ml-team", Count: 1},
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
+			},
+			expectedCount: 1,
+			description:   "Should return GPUs running specific training job",
+		},
+		{
+			name:    "filter by running apps - count threshold",
+			request: createTestAllocRequest("", "gpu.runningApps.all(app, app.count <= 2) && size(gpu.runningApps) > 0"),
+			gpus: []*tfv1.GPU{
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "default", Count: 1},
+						{Name: "job2", Namespace: "default", Count: 2},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "default", Count: 5}, // Count > 2
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+			},
+			expectedCount: 1,
+			description:   "Should return GPUs where all running apps have count <= 2",
+		},
+		{
+			name:    "filter by running apps - complex condition",
+			request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && (size(gpu.runningApps) == 0 || gpu.runningApps.all(app, app.namespace != 'restricted'))"),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "allowed", Count: 1},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "restricted", Count: 1}, // Restricted namespace
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-4", "V100", constants.PhaseRunning, 150.0, 40.0), // Wrong model
+			},
+			expectedCount: 2,
+			description:   "Should return A100 GPUs with sufficient resources and no restricted apps",
+		},
+		{
+			name:    "filter by running apps - namespace isolation",
+			request: createTestAllocRequest("", "!gpu.runningApps.exists(app, app.namespace == 'tenant-a')"),
+			gpus: []*tfv1.GPU{
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "tenant-b", Count: 1},
+						{Name: "job2", Namespace: "shared", Count: 1},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "tenant-a", Count: 1}, // Should be excluded
+						{Name: "job2", Namespace: "tenant-b", Count: 1},
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+			},
+			expectedCount: 2,
+			description:   "Should return GPUs not running apps from tenant-a",
 		},
 	}
 
@@ -207,6 +328,8 @@ func TestCELFilter_NormalCases(t *testing.T) {
 
 			// Verify results
 			require.NoError(t, err, "Filter execution should not fail")
+
+			// Debug output for complex condition test
 			assert.Len(t, filteredGPUs, tt.expectedCount, tt.description)
 
 			// Verify filter name
@@ -224,11 +347,11 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) {
 	t.Run("CEL expressions edge cases", func(t *testing.T) {
 		// Test GPUs for execution
 		testGPUs := []*tfv1.GPU{
-			createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-			createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
+			createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+			createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0),
 		}
 		// Add GPU with nil resources
-		gpuWithNilResources := createTestGPU("gpu-nil", "default", "A100", "Ready", 0, 0)
+		gpuWithNilResources := createTestGPU("gpu-nil", "A100", constants.PhaseRunning, 0, 0)
 		gpuWithNilResources.Status.Available = nil
 		testGPUs = append(testGPUs, gpuWithNilResources)
 
@@ -338,7 +461,7 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) {
 				cache, err := NewExpressionCache(10, 5*time.Minute)
 				require.NoError(t, err)
 
-				request := createTestAllocRequest("default", "test-workload", "", tt.expression)
+				request := createTestAllocRequest("", tt.expression)
 				celFilter, err := NewCELFilter(request, cache)
 
 				if tt.shouldFail {
diff --git a/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go b/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go
new file mode 100644
index 00000000..83b73c93
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go
@@ -0,0 +1,216 @@
+package cel_filter
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	"github.com/samber/lo"
+	resourceapi "k8s.io/api/resource/v1"
+	dracel "k8s.io/dynamic-resource-allocation/cel"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// DRACELFilter implements CEL filtering using k8s.io/dynamic-resource-allocation/cel
+type DRACELFilter struct {
+	name              string
+	requiredPhases    []tfv1.TensorFusionGPUPhase
+	userExpression    string
+	cache             *dracel.Cache
+	displayExpression string
+}
+
+// NewDRACELFilter creates a new DRA-based CEL filter from allocation request
+func NewDRACELFilter(req *tfv1.AllocRequest, cache *dracel.Cache) (*DRACELFilter, error) {
+	// Extract early filtering criteria
+	var requiredPhases []tfv1.TensorFusionGPUPhase
+	var userExpression, displayExpression string
+
+	if req != nil {
+		requiredPhases = []tfv1.TensorFusionGPUPhase{
+			tfv1.TensorFusionGPUPhaseRunning,
+			tfv1.TensorFusionGPUPhasePending,
+		}
+		userExpression = req.CELFilterExpression
+		displayExpression = buildDisplayExpression(req)
+	}
+
+	// Handle nil request case
+	name := "AllocRequest-unknown"
+	if req != nil {
+		name = fmt.Sprintf("AllocRequest-%s", req.WorkloadNameNamespace.String())
+	}
+
+	// Validate expression if provided
+	if userExpression != "" && cache != nil {
+		result := cache.Check(userExpression)
+		if result.Error != nil {
+			return nil, fmt.Errorf("failed to compile CEL expression %q: %w", userExpression, result.Error)
+		}
+	}
+
+	return &DRACELFilter{
+		name:              name,
+		requiredPhases:    requiredPhases,
+		userExpression:    userExpression,
+		cache:             cache,
+		displayExpression: displayExpression,
+	}, nil
+}
+
+// Name returns the filter name
+func (f *DRACELFilter) Name() string {
+	return f.name
+}
+
+// Filter applies the CEL expression to filter GPUs
+func (f *DRACELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) {
+	log := log.FromContext(ctx)
+	if len(gpus) == 0 {
+		return gpus, nil
+	}
+
+	// Early filtering phase: apply basic filters first
+	earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
+	for _, gpu := range gpus {
+		// Progressive migration mode check
+		if utils.IsProgressiveMigration() && gpu.Status.UsedBy != tfv1.UsedByTensorFusion {
+			continue
+		}
+
+		// Fast path: check phase first (most common filter)
+		if f.requiredPhases != nil && !lo.Contains(f.requiredPhases, gpu.Status.Phase) {
+			continue
+		}
+
+		earlyFilteredGPUs = append(earlyFilteredGPUs, gpu)
+	}
+
+	// If no user expression, return early filtered results
+	if f.userExpression == "" {
+		log.V(1).Info("DRA CEL filter applied (early filtering only)",
+			"filter", f.name,
+			"inputGPUs", len(gpus),
+			"outputGPUs", len(earlyFilteredGPUs))
+		return earlyFilteredGPUs, nil
+	}
+
+	// If no GPUs passed early filtering, return empty result
+	if len(earlyFilteredGPUs) == 0 {
+		return earlyFilteredGPUs, nil
+	}
+
+	// Get compiled expression from cache
+	compiledExpr := f.cache.GetOrCompile(f.userExpression)
+	if compiledExpr.Error != nil {
+		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", f.userExpression, compiledExpr.Error)
+	}
+
+	// Apply CEL filtering using DRA
+	filteredGPUs := make([]*tfv1.GPU, 0, len(earlyFilteredGPUs))
+	for _, gpu := range earlyFilteredGPUs {
+		// Convert GPU to DRA Device
+		device, err := convertGPUToDevice(gpu)
+		if err != nil {
+			log.Error(err, "Failed to convert GPU to Device", "gpu", gpu.Name)
+			continue
+		}
+
+		// Evaluate CEL expression
+		matches, details, err := compiledExpr.DeviceMatches(ctx, device)
+		if err != nil {
+			log.Error(err, "CEL expression evaluation failed",
+				"expression", f.userExpression,
+				"gpu", gpu.Name,
+				"details", details)
+			// On error, exclude the GPU (fail-safe)
+			continue
+		}
+
+		if matches {
+			filteredGPUs = append(filteredGPUs, gpu)
+		}
+	}
+
+	log.V(1).Info("DRA CEL filter applied",
+		"filter", f.name,
+		"displayExpression", f.displayExpression,
+		"userExpression", f.userExpression,
+		"inputGPUs", len(gpus),
+		"earlyFilteredGPUs", len(earlyFilteredGPUs),
+		"outputGPUs", len(filteredGPUs))
+
+	return filteredGPUs, nil
+}
+
+// convertGPUToDevice converts tfv1.GPU to dracel.Device
+func convertGPUToDevice(gpu *tfv1.GPU) (dracel.Device, error) {
+	if gpu == nil {
+		return dracel.Device{}, fmt.Errorf("GPU is nil")
+	}
+
+	allowMultiple := true
+	device := dracel.Device{
+		Driver:                   constants.DRADriverName,
+		AllowMultipleAllocations: &allowMultiple,
+		Attributes:               make(map[resourceapi.QualifiedName]resourceapi.DeviceAttribute),
+		Capacity:                 make(map[resourceapi.QualifiedName]resourceapi.DeviceCapacity),
+	}
+
+	// Map basic attributes
+	device.Attributes[GPUFieldName] = resourceapi.DeviceAttribute{StringValue: &gpu.Name}
+	device.Attributes[GPUFieldNamespace] = resourceapi.DeviceAttribute{StringValue: &gpu.Namespace}
+	model := gpu.Status.GPUModel
+	device.Attributes[GPUFieldGPUModel] = resourceapi.DeviceAttribute{StringValue: &model}
+	uuid := gpu.Status.UUID
+	device.Attributes[GPUFieldUUID] = resourceapi.DeviceAttribute{StringValue: &uuid}
+	usedBy := string(gpu.Status.UsedBy)
+	device.Attributes[GPUFieldUsedBy] = resourceapi.DeviceAttribute{StringValue: &usedBy}
+	message := gpu.Status.Message
+	device.Attributes[GPUFieldMessage] = resourceapi.DeviceAttribute{StringValue: &message}
+
+	// Map labels with prefix
+	if len(gpu.Labels) > 0 {
+		for k, v := range gpu.Labels {
+			labelValue := v
+			device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldLabels, k))] = resourceapi.DeviceAttribute{StringValue: &labelValue}
+		}
+	}
+
+	// Map annotations with prefix
+	if len(gpu.Annotations) > 0 {
+		for k, v := range gpu.Annotations {
+			annotationValue := v
+			device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldAnnotations, k))] = resourceapi.DeviceAttribute{StringValue: &annotationValue}
+		}
+	}
+
+	// Map nodeSelector with prefix
+	if len(gpu.Status.NodeSelector) > 0 {
+		for k, v := range gpu.Status.NodeSelector {
+			selectorValue := v
+			device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldNodeSelector, k))] = resourceapi.DeviceAttribute{StringValue: &selectorValue}
+		}
+	}
+
+	// Map runningApps as JSON string
+	if len(gpu.Status.RunningApps) > 0 {
+		appsJSON, err := json.Marshal(gpu.Status.RunningApps)
+		if err != nil {
+			return dracel.Device{}, fmt.Errorf("failed to marshal runningApps: %w", err)
+		}
+		appsStr := string(appsJSON)
+		device.Attributes[GPUFieldRunningApps] = resourceapi.DeviceAttribute{StringValue: &appsStr}
+	}
+
+	// Map capacity (tflops and vram) - DRA experimental version maintains capacity state
+	if gpu.Status.Capacity != nil {
+		device.Capacity[ResourceFieldTFlops] = resourceapi.DeviceCapacity{Value: gpu.Status.Capacity.Tflops}
+		device.Capacity[ResourceFieldVRAM] = resourceapi.DeviceCapacity{Value: gpu.Status.Capacity.Vram}
+	}
+
+	return device, nil
+}
diff --git a/internal/gpuallocator/filter/cel_filter/expression_cache.go b/internal/gpuallocator/filter/cel_filter/expression_cache.go
index 4065c3b9..f98fb1d1 100644
--- a/internal/gpuallocator/filter/cel_filter/expression_cache.go
+++ b/internal/gpuallocator/filter/cel_filter/expression_cache.go
@@ -88,6 +88,10 @@ func (c *ExpressionCache) GetOrCompileProgram(expression string) (cel.Program, e
 		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", expression, issues.Err())
 	}
 
+	// Validate result type - must return boolean
+	// Note: Skip type validation for now as CEL type system is complex
+	// Runtime validation in Filter method is sufficient
+
 	program, err := c.env.Program(ast)
 	if err != nil {
 		c.misses++
@@ -121,7 +125,7 @@ func (c *ExpressionCache) hashExpression(expression string) string {
 // evictLRU removes the least recently used entry from cache
 func (c *ExpressionCache) evictLRU() {
 	var oldestKey string
-	var oldestTime time.Time = time.Now()
+	var oldestTime = time.Now()
 
 	for key, cached := range c.cache {
 		if cached.AccessedAt.Before(oldestTime) {
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index 44deb3c4..21b0c5a8 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -31,7 +31,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/client-go/util/retry"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
+	fwk "k8s.io/kube-scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -41,6 +41,8 @@ import (
 const MaxGPUCounterPerAllocation = 128
 const CleanUpCheckInterval = 3 * time.Minute
 
+var GPUCapacityMap = map[string]tfv1.Resource{}
+
 type Strategy interface {
 	Score(gpu *tfv1.GPU) int
 
@@ -52,7 +54,7 @@ type SimulateSchedulingFilterDetail struct {
 	FilterStageDetails []filter.FilterDetail
 }
 
-func (p *SimulateSchedulingFilterDetail) Clone() framework.StateData {
+func (p *SimulateSchedulingFilterDetail) Clone() fwk.StateData {
 	return p
 }
 
@@ -250,6 +252,37 @@ func (s *GpuAllocator) applyLegacyFilters(
 	return filteredGPUs, filterDetails, nil
 }
 
+func (s *GpuAllocator) FilterWithPreempt(
+	req *tfv1.AllocRequest,
+	preemptAllocRequests []*tfv1.AllocRequest,
+) ([]*tfv1.GPU, []filter.FilterDetail, error) {
+	toFilterGPUs := []*tfv1.GPU{}
+	for _, preemptAllocRequest := range preemptAllocRequests {
+		for _, gpuName := range preemptAllocRequest.GPUNames {
+			gpu := s.gpuStore[types.NamespacedName{Name: gpuName}]
+			if gpu == nil {
+				return nil, nil, fmt.Errorf("gpu %s not found", gpuName)
+			}
+			gpuCopy := gpu.DeepCopy()
+			gpuCopy.Status.Available.Tflops.Add(preemptAllocRequest.Request.Tflops)
+			gpuCopy.Status.Available.Vram.Add(preemptAllocRequest.Request.Vram)
+			toFilterGPUs = append(toFilterGPUs, gpuCopy)
+		}
+	}
+
+	filterRegistry := s.filterRegistry.With(filter.NewResourceFilter(req.Request))
+	// Add GPU model filter if specified
+	if req.GPUModel != "" {
+		filterRegistry = filterRegistry.With(filter.NewGPUModelFilter(req.GPUModel))
+	}
+	// No need to check count and other filters since it's always in the same node during each preempt trial
+	filteredGPUs, filterDetails, err := filterRegistry.Apply(s.ctx, req.WorkloadNameNamespace, toFilterGPUs, false)
+	if err != nil {
+		return nil, nil, fmt.Errorf("apply filters: %w", err)
+	}
+	return filteredGPUs, filterDetails, nil
+}
+
 func (s *GpuAllocator) Select(req *tfv1.AllocRequest, filteredGPUs []*tfv1.GPU) ([]*tfv1.GPU, error) {
 	pool := &tfv1.GPUPool{}
 	if err := s.Get(s.ctx, client.ObjectKey{Name: req.PoolName}, pool); err != nil {
@@ -312,6 +345,19 @@ func (s *GpuAllocator) Bind(
 			gpuNodeName = gpu.Status.NodeSelector[constants.KubernetesHostNameLabel]
 		}
 
+		// Double-check resource availability to prevent over-allocation
+		if gpu.Status.Available == nil {
+			return nil, fmt.Errorf("GPU %s has nil available resources", selectedGPU)
+		}
+		if gpu.Status.Available.Tflops.Cmp(req.Request.Tflops) < 0 {
+			return nil, fmt.Errorf("GPU %s insufficient TFLOPs: available %s, requested %s",
+				selectedGPU, gpu.Status.Available.Tflops.String(), req.Request.Tflops.String())
+		}
+		if gpu.Status.Available.Vram.Cmp(req.Request.Vram) < 0 {
+			return nil, fmt.Errorf("GPU %s insufficient VRAM: available %s, requested %s",
+				selectedGPU, gpu.Status.Available.Vram.String(), req.Request.Vram.String())
+		}
+
 		// reduce available resource on the GPU status
 		gpu.Status.Available.Tflops.Sub(req.Request.Tflops)
 		gpu.Status.Available.Vram.Sub(req.Request.Vram)
@@ -372,9 +418,8 @@ func (s *GpuAllocator) Alloc(req *tfv1.AllocRequest) ([]*tfv1.GPU, error) {
 
 func (s *GpuAllocator) CheckQuotaAndFilter(ctx context.Context, req *tfv1.AllocRequest, isSimulateSchedule bool) ([]*tfv1.GPU, []filter.FilterDetail, error) {
 	<-s.initializedCh
-	// Fast quota check (fail fast if quota insufficient)
 	if err := s.quotaStore.CheckQuotaAvailable(req.WorkloadNameNamespace.Namespace, req); err != nil {
-		return nil, nil, fmt.Errorf("quota check failed: %w", err)
+		return nil, nil, err
 	}
 
 	// Get GPUs from the pool using the in-memory store
@@ -604,12 +649,13 @@ func (s *GpuAllocator) AdjustAllocation(ctx context.Context, adjustRequest tfv1.
 }
 
 func (s *GpuAllocator) ListNonUsingNodes() sets.Set[string] {
+	<-s.initializedCh
 	set := sets.New[string]()
-	for nodeName, gpuNames := range s.nodeWorkerStore {
+	for nodeName, podNames := range s.nodeWorkerStore {
 		// If using by TF, the node can not be used by original scheduler
 		// If using by other scheduler, won't record as TF worker, thus the map is empty
 		// Return non using nodes can ensure original scheduler not conflict with TF
-		if len(gpuNames) == 0 {
+		if len(podNames) == 0 {
 			set.Insert(nodeName)
 		}
 	}
@@ -623,6 +669,20 @@ func (s *GpuAllocator) DeallocByPodIdentifier(ctx context.Context, podIdentifier
 	}
 }
 
+func (s *GpuAllocator) GetAllocationReqByNodeName(nodeName string) []*tfv1.AllocRequest {
+	allocRequests := make([]*tfv1.AllocRequest, 0, 8)
+	for workerName := range s.nodeWorkerStore[nodeName] {
+		podUID := s.podNamespaceNsToPodUID[workerName.String()]
+		if podUID == "" {
+			continue
+		}
+		if request, exists := s.uniqueAllocation[podUID]; exists {
+			allocRequests = append(allocRequests, request)
+		}
+	}
+	return allocRequests
+}
+
 func (s *GpuAllocator) checkGPUCapacityAndQuota(gpu *tfv1.GPU, oldRes, newRes tfv1.Resource) (tfv1.Resource, error) {
 	if gpu.Status.Available == nil {
 		return tfv1.Resource{}, fmt.Errorf("GPU available is nil, skip check")
@@ -909,6 +969,11 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
 	defer s.storeMutex.Unlock()
 
 	if s.gpuStore[key] != nil {
+		if gpu.Status.GPUModel != "" {
+			if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists {
+				GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
+			}
+		}
 		syncGPUMetadataAndStatusFromCluster(s.gpuStore[key], gpu)
 		log.V(6).Info("GPU already exists in store", "name", key.Name)
 		return
@@ -924,25 +989,7 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
 	}
 	s.gpuStore[key] = gpuInMem
 
-	if gpuInMem.Status.NodeSelector != nil {
-		gpuNodeName := gpuInMem.Status.NodeSelector[constants.KubernetesHostNameLabel]
-		if gpuNodeName != "" {
-			if _, exists := s.nodeGpuStore[gpuNodeName]; !exists {
-				s.nodeGpuStore[gpuNodeName] = make(map[string]*tfv1.GPU, 4)
-			}
-			s.nodeGpuStore[gpuNodeName][gpuInMem.Name] = gpuInMem
-		}
-	}
-
-	if gpuInMem.Labels != nil {
-		pool := gpuInMem.Labels[constants.GpuPoolKey]
-		if pool != "" {
-			if _, exists := s.poolGpuStore[pool]; !exists {
-				s.poolGpuStore[pool] = make(map[string]*tfv1.GPU, 128)
-			}
-			s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem
-		}
-	}
+	s.addOrUpdateGPUMaps(gpuInMem)
 	log.Info("Added GPU to store", "name", key.Name, "phase", gpu.Status.Phase)
 }
 
@@ -991,6 +1038,38 @@ func (s *GpuAllocator) handleGPUUpdate(ctx context.Context, gpu *tfv1.GPU) {
 		s.gpuStore[key] = gpu.DeepCopy()
 		log.V(6).Info("Updated GPU in store (new entry)", "name", key.Name, "phase", gpu.Status.Phase)
 	}
+
+	s.addOrUpdateGPUMaps(s.gpuStore[key])
+}
+
+func (s *GpuAllocator) addOrUpdateGPUMaps(gpuInMem *tfv1.GPU) {
+	if gpuInMem.Status.NodeSelector != nil {
+		gpuNodeName := gpuInMem.Status.NodeSelector[constants.KubernetesHostNameLabel]
+		if gpuNodeName != "" {
+			if _, exists := s.nodeGpuStore[gpuNodeName]; !exists {
+				s.nodeGpuStore[gpuNodeName] = make(map[string]*tfv1.GPU, 4)
+			}
+			s.nodeGpuStore[gpuNodeName][gpuInMem.Name] = gpuInMem
+			if _, exists := s.nodeWorkerStore[gpuNodeName]; !exists {
+				s.nodeWorkerStore[gpuNodeName] = make(map[types.NamespacedName]struct{}, 4)
+			}
+		}
+
+	}
+
+	if gpuInMem.Labels != nil {
+		pool := gpuInMem.Labels[constants.GpuPoolKey]
+		if pool != "" {
+			if _, exists := s.poolGpuStore[pool]; !exists {
+				s.poolGpuStore[pool] = make(map[string]*tfv1.GPU, 128)
+			}
+			s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem
+		}
+	}
+
+	if gpuInMem.Status.GPUModel != "" {
+		GPUCapacityMap[gpuInMem.Status.GPUModel] = *gpuInMem.Status.Capacity
+	}
 }
 
 func syncGPUMetadataAndStatusFromCluster(old *tfv1.GPU, gpu *tfv1.GPU) {
@@ -1163,6 +1242,68 @@ func (s *GpuAllocator) ReconcileAllocationState() {
 	})
 }
 
+func (s *GpuAllocator) ReconcileAllocationStateForTesting() {
+	s.reconcileAllocationState()
+}
+
+func (s *GpuAllocator) CheckQuotaAndFilterSingleNodePreempt(
+	nodeName string, allocReq *tfv1.AllocRequest, toPreemptPods sets.Set[types.NamespacedName],
+) error {
+	<-s.initializedCh
+	// Only need to check total quotas when preempting
+	toPreemptUsage := &tfv1.GPUResourceUsage{
+		Requests: tfv1.Resource{
+			Tflops: resource.Quantity{},
+			Vram:   resource.Quantity{},
+		},
+		Limits: tfv1.Resource{
+			Tflops: resource.Quantity{},
+			Vram:   resource.Quantity{},
+		},
+	}
+	workers := s.nodeWorkerStore[nodeName]
+	preemptAllocRequests := make([]*tfv1.AllocRequest, 0, len(workers))
+	for workerName := range workers {
+		if !toPreemptPods.Has(workerName) {
+			continue
+		}
+		podUID := s.podNamespaceNsToPodUID[workerName.String()]
+		if podUID == "" {
+			continue
+		}
+		existingAllocation := s.uniqueAllocation[podUID]
+		if existingAllocation == nil {
+			continue
+		}
+		toPreemptUsage.Requests.Tflops.Add(existingAllocation.Request.Tflops)
+		toPreemptUsage.Requests.Vram.Add(existingAllocation.Request.Vram)
+		toPreemptUsage.Limits.Tflops.Add(existingAllocation.Limit.Tflops)
+		toPreemptUsage.Limits.Vram.Add(existingAllocation.Limit.Vram)
+		preemptAllocRequests = append(preemptAllocRequests, existingAllocation)
+	}
+
+	if log.FromContext(s.ctx).V(5).Enabled() {
+		log.FromContext(s.ctx).V(5).Info("Preempting node and check quotas", "nodeName", nodeName, "toPreemptUsage", toPreemptUsage)
+	}
+
+	if err := s.quotaStore.CheckTotalQuotaRelaxed(allocReq, toPreemptUsage); err != nil {
+		return fmt.Errorf("quota check failed during preempt: %w", err)
+	}
+
+	// Get GPUs from the pool using the in-memory store
+	if allocReq.PoolName == "" {
+		return fmt.Errorf("GPU Pool name is empty, can not find GPUs during preempt")
+	}
+	filteredGPUs, _, err := s.FilterWithPreempt(allocReq, preemptAllocRequests)
+	if err != nil {
+		return err
+	}
+	if len(filteredGPUs) < int(allocReq.Count) {
+		return fmt.Errorf("no gpus available or valid in pool %s after filtering during preempt", allocReq.PoolName)
+	}
+	return nil
+}
+
 func (s *GpuAllocator) reconcileAllocationState() {
 	ctx := s.ctx
 	logger := log.FromContext(ctx)
@@ -1210,7 +1351,7 @@ func (s *GpuAllocator) reconcileAllocationState() {
 		// No workers, but node contains GPU, need include into nodeWorkerStore with empty map
 		gpuNodeName := gpu.Status.NodeSelector[constants.KubernetesHostNameLabel]
 		if _, exists := s.nodeWorkerStore[gpuNodeName]; !exists {
-			s.nodeWorkerStore[gpuNodeName] = map[types.NamespacedName]struct{}{}
+			s.nodeWorkerStore[gpuNodeName] = make(map[types.NamespacedName]struct{}, 4)
 		}
 	}
 
@@ -1339,6 +1480,11 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 		return &tfv1.AllocRequest{}, "gpu count annotation is too large", nil
 	}
 
+	qosLevel := tfv1.QoSLevel(pod.Annotations[constants.QoSLevelAnnotation])
+	if qosLevel == "" {
+		qosLevel = tfv1.QoSMedium
+	}
+
 	disableCELFilter := false
 	if disabledFeatures, exists := pod.Annotations[constants.DisableFeaturesAnnotation]; exists {
 		disabledFeaturesList := strings.Split(disabledFeatures, ",")
@@ -1355,7 +1501,7 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 		Limit:    gpuLimitResource,
 
 		DisableCELFilter:    disableCELFilter,
-		CELFilterExpression: pod.Annotations[constants.CELFilterExpressionAnnotation],
+		CELFilterExpression: pod.Annotations[constants.DRACelExpressionAnnotation],
 
 		Count:    uint(count),
 		GPUModel: pod.Annotations[constants.GPUModelAnnotation],
@@ -1364,6 +1510,7 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 			Namespace: pod.Namespace,
 		},
 		PodMeta: pod.ObjectMeta,
+		QoS:     qosLevel,
 	}
 
 	// for already allocated workers, set the GPU device IDs for further scaling and retrieval
diff --git a/internal/gpuallocator/gpuallocator_test.go b/internal/gpuallocator/gpuallocator_test.go
index 08d78130..bb3a494d 100644
--- a/internal/gpuallocator/gpuallocator_test.go
+++ b/internal/gpuallocator/gpuallocator_test.go
@@ -97,7 +97,7 @@ var _ = Describe("GPU Allocator", func() {
 			if err := k8sClient.Get(ctx, types.NamespacedName{Name: "test-pool"}, pool); err != nil {
 				Expect(err).NotTo(HaveOccurred())
 			}
-			_, _ = RefreshGPUNodeCapacity(ctx, k8sClient, gpuNode, pool)
+			_, _ = RefreshGPUNodeCapacity(ctx, k8sClient, gpuNode, pool, allocator)
 
 			// Verify resources were reduced on the allocated GPU
 			gpu := getGPU(gpus[0].Name)
@@ -107,8 +107,14 @@ var _ = Describe("GPU Allocator", func() {
 			node := getGPUNode(gpu)
 			diffTflops := node.Status.TotalTFlops.Value() - node.Status.AvailableTFlops.Value()
 			diffVRAM := node.Status.TotalVRAM.Value() - node.Status.AvailableVRAM.Value()
+
+			diffVirtualTflops := node.Status.VirtualTFlops.Value() - node.Status.VirtualAvailableTFlops.Value()
+			diffVirtualVRAM := node.Status.VirtualVRAM.Value() - node.Status.VirtualAvailableVRAM.Value()
 			Expect(diffTflops).To(BeEquivalentTo(50))
 			Expect(diffVRAM).To(BeEquivalentTo(8 * 1024 * 1024 * 1024))
+
+			Expect(diffVirtualTflops).To(BeEquivalentTo(50))
+			Expect(diffVirtualVRAM).To(BeEquivalentTo(8 * 1024 * 1024 * 1024))
 		})
 
 		It("should allocate multiple GPUs from the same node", func() {
diff --git a/internal/gpuallocator/node_capacity.go b/internal/gpuallocator/node_capacity.go
index dc7488f6..43cce870 100644
--- a/internal/gpuallocator/node_capacity.go
+++ b/internal/gpuallocator/node_capacity.go
@@ -11,7 +11,11 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node *tfv1.GPUNode, pool *tfv1.GPUPool) ([]string, error) {
+func RefreshGPUNodeCapacity(
+	ctx context.Context, k8sClient client.Client,
+	node *tfv1.GPUNode, pool *tfv1.GPUPool,
+	allocator *GpuAllocator,
+) ([]string, error) {
 	gpuList := &tfv1.GPUList{}
 	if err := k8sClient.List(ctx, gpuList, client.MatchingLabels{constants.LabelKeyOwner: node.Name}); err != nil {
 		return nil, fmt.Errorf("failed to list GPUs: %w", err)
@@ -54,6 +58,17 @@ func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node *
 	node.Status.VirtualTFlops = virtualTFlops
 	node.Status.VirtualVRAM = virtualVRAM
 
+	vramAvailable := virtualVRAM.DeepCopy()
+	tflopsAvailable := virtualTFlops.DeepCopy()
+
+	allocRequests := allocator.GetAllocationReqByNodeName(node.Name)
+	for _, allocRequest := range allocRequests {
+		vramAvailable.Sub(allocRequest.Limit.Vram)
+		tflopsAvailable.Sub(allocRequest.Limit.Tflops)
+	}
+	node.Status.VirtualAvailableVRAM = &vramAvailable
+	node.Status.VirtualAvailableTFlops = &tflopsAvailable
+
 	node.Status.Phase = tfv1.TensorFusionGPUNodePhaseRunning
 
 	if !equality.Semantic.DeepEqual(node.Status, statusCopy) {
diff --git a/internal/metrics/encoders/influx.go b/internal/metrics/encoders/influx.go
index a459c7ee..4d089759 100644
--- a/internal/metrics/encoders/influx.go
+++ b/internal/metrics/encoders/influx.go
@@ -4,6 +4,7 @@ import (
 	"time"
 
 	metricsProto "github.com/influxdata/line-protocol/v2/lineprotocol"
+	"k8s.io/klog/v2"
 )
 
 // InfluxStrategy implements InfluxDB line protocol encoding
@@ -28,7 +29,12 @@ func (s *InfluxStrategy) AddTag(key, value string) {
 }
 
 func (s *InfluxStrategy) AddField(key string, value any) {
-	s.enc.AddField(key, metricsProto.MustNewValue(value))
+	v, parsed := metricsProto.NewValue(value)
+	if !parsed {
+		klog.Error("metrics influx encoder failed to parse value: ", key, value)
+		return
+	}
+	s.enc.AddField(key, v)
 }
 
 func (s *InfluxStrategy) EndLine(timestamp time.Time) {
diff --git a/internal/metrics/recorder.go b/internal/metrics/recorder.go
index 9050df00..7f47bab6 100644
--- a/internal/metrics/recorder.go
+++ b/internal/metrics/recorder.go
@@ -4,6 +4,7 @@ import (
 	"io"
 	"math"
 	"strconv"
+	"strings"
 	"sync"
 	"time"
 
@@ -19,15 +20,17 @@ import (
 // Worker level metrics, include worker resources/costs status
 // map updated in one reconcile loop in single goroutine, thus no RW lock needed
 var workerMetricsLock sync.RWMutex
-var workerMetricsMap = map[string]*WorkerResourceMetrics{}
+var workerMetricsMap = make(map[string]*WorkerResourceMetrics, 200)
 
 // Node level metrics, include node allocation/costs status
 var nodeMetricsLock sync.RWMutex
-var nodeMetricsMap = map[string]*NodeResourceMetrics{}
+var nodeMetricsMap = make(map[string]*NodeResourceMetrics, 100)
 
 // Pool level metrics, include pool allocation/costs status
 var poolMetricsLock sync.RWMutex
-var poolMetricsMap = map[string]*PoolResourceMetrics{}
+var poolMetricsMap = make(map[string]*PoolResourceMetrics, 4)
+
+var settingLock sync.RWMutex
 
 var log = ctrl.Log.WithName("metrics-recorder")
 
@@ -37,6 +40,9 @@ type MetricsRecorder struct {
 	// Raw billing result for node and workers
 	HourlyUnitPriceMap map[string]float64
 
+	// Pool level eviction protection price ratio map, key is pool name
+	PoolEvictionProtectionPriceRatioMap map[string]string
+
 	// Worker level unit price map, key is pool name, second level key is QoS level
 	WorkerUnitPriceMap map[string]map[string]RawBillingPricing
 }
@@ -80,14 +86,16 @@ func SetWorkerMetricsByWorkload(pod *corev1.Pod) {
 	// Initialize metrics
 	if _, ok := workerMetricsMap[pod.Name]; !ok {
 		workerMetricsMap[pod.Name] = &WorkerResourceMetrics{
-			WorkerName:     pod.Name,
-			WorkloadName:   pod.Labels[constants.WorkloadKey],
-			PoolName:       pod.Annotations[constants.GpuPoolKey],
-			Namespace:      pod.Namespace,
-			QoS:            pod.Annotations[constants.QoSLevelAnnotation],
-			podLabels:      pod.Labels,
-			RawCost:        0,
-			LastRecordTime: time.Now(),
+			WorkerName:         pod.Name,
+			WorkloadName:       pod.Labels[constants.WorkloadKey],
+			PoolName:           pod.Annotations[constants.GpuPoolKey],
+			Namespace:          pod.Namespace,
+			QoS:                pod.Annotations[constants.QoSLevelAnnotation],
+			podLabels:          pod.Labels,
+			RawCost:            0,
+			LastRecordTime:     time.Now(),
+			creationTime:       pod.CreationTimestamp.Time,
+			evictionProtection: pod.Annotations[constants.EvictionProtectionAnnotation],
 		}
 	}
 
@@ -187,19 +195,37 @@ func SetPoolMetrics(poolObj *tfv1.GPUPool) {
 	}
 
 	if poolObj.Status.VirtualAvailableTFlops != nil && poolObj.Status.VirtualAvailableVRAM != nil {
-		poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedVramBytes /
-			poolObj.Status.VirtualVRAM.AsApproximateFloat64() * 100
+		virtualVRAM := poolObj.Status.VirtualVRAM.AsApproximateFloat64()
+		virtualTFlops := poolObj.Status.VirtualTFlops.AsApproximateFloat64()
+
+		if virtualVRAM > 0 {
+			poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedVramBytes / virtualVRAM * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = 0
+		}
 
-		poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedTflops /
-			poolObj.Status.VirtualTFlops.AsApproximateFloat64() * 100
-		poolMetricsMap[poolObj.Name].AssignedLimitedTFlops = poolObj.Status.VirtualTFlops.AsApproximateFloat64() -
+		if virtualTFlops > 0 {
+			poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedTflops / virtualTFlops * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = 0
+		}
+
+		poolMetricsMap[poolObj.Name].AssignedLimitedTFlops = virtualTFlops -
 			poolObj.Status.VirtualAvailableTFlops.AsApproximateFloat64()
-		poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes = poolObj.Status.VirtualVRAM.AsApproximateFloat64() -
+		poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes = virtualVRAM -
 			poolObj.Status.VirtualAvailableVRAM.AsApproximateFloat64()
-		poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedTFlops /
-			poolObj.Status.VirtualTFlops.AsApproximateFloat64() * 100
-		poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes /
-			poolObj.Status.VirtualVRAM.AsApproximateFloat64() * 100
+
+		if virtualTFlops > 0 {
+			poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedTFlops / virtualTFlops * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = 0
+		}
+
+		if virtualVRAM > 0 {
+			poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes / virtualVRAM * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = 0
+		}
 	}
 	poolMetricsMap[poolObj.Name].GPUCount = int(poolObj.Status.TotalGPUs)
 }
@@ -269,13 +295,17 @@ func (mr *MetricsRecorder) Start() {
 	// Clean up worker metrics that have been deleted
 	go func() {
 		for {
-			time.Sleep(5 * time.Minute)
+			time.Sleep(1 * time.Minute)
 			workerMetricsLock.Lock()
-			for _, metrics := range workerMetricsMap {
+			var keysToDelete []string
+			for key, metrics := range workerMetricsMap {
 				if metrics.deletionTimestamp != nil && !metrics.deletionTimestamp.IsZero() {
-					delete(workerMetricsMap, metrics.WorkerName)
+					keysToDelete = append(keysToDelete, key)
 				}
 			}
+			for _, key := range keysToDelete {
+				delete(workerMetricsMap, key)
+			}
 			workerMetricsLock.Unlock()
 		}
 	}()
@@ -288,13 +318,12 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 
 	now := time.Now()
 	enc := NewEncoder(config.GetGlobalConfig().MetricsFormat)
-	workerMetricsLock.RLock()
+	workerMetricsLock.Lock()
 
 	activeWorkerCnt := 0
 	activeWorkerAndNodeByPool := map[string]*ActiveNodeAndWorker{}
 
 	for _, metrics := range workerMetricsMap {
-
 		if metrics.deletionTimestamp != nil && !metrics.deletionTimestamp.IsZero() {
 			metrics.RawCost = mr.getWorkerRawCost(metrics, metrics.deletionTimestamp.Sub(metrics.LastRecordTime))
 		} else {
@@ -315,7 +344,9 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 				nodeCnt:   0,
 			}
 		}
-		activeWorkerAndNodeByPool[metrics.PoolName].workerCnt++
+		if metrics.deletionTimestamp == nil || metrics.deletionTimestamp.IsZero() {
+			activeWorkerAndNodeByPool[metrics.PoolName].workerCnt++
+		}
 
 		enc.StartLine("tf_worker_resources")
 		enc.AddTag("namespace", metrics.Namespace)
@@ -344,7 +375,7 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 
 		enc.EndLine(now)
 	}
-	workerMetricsLock.RUnlock()
+	workerMetricsLock.Unlock()
 
 	nodeMetricsLock.RLock()
 
@@ -395,16 +426,16 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 		enc.StartLine("tf_pool_metrics")
 		enc.AddTag("pool", metrics.PoolName)
 		enc.AddTag("phase", metrics.Phase)
-		enc.AddField("allocatedTflops", metrics.AllocatedTflops)
-		enc.AddField("allocatedTflopsPercent", metrics.AllocatedTflopsPercent)
-		enc.AddField("allocatedTflopsPercentVirtual", metrics.AllocatedTflopsPercentToVirtualCap)
-		enc.AddField("allocatedVramBytes", metrics.AllocatedVramBytes)
-		enc.AddField("allocatedVramPercent", metrics.AllocatedVramPercent)
-		enc.AddField("allocatedVramPercentVirtual", metrics.AllocatedVramPercentToVirtualCap)
-		enc.AddField("assignedLimitedTFlops", metrics.AssignedLimitedTFlops)
-		enc.AddField("assignedLimitedVramBytes", metrics.AssignedLimitedVramBytes)
-		enc.AddField("assignedLimitedTFlopsPercentVirtual", metrics.AssignedLimitedTFlopsPercentToVirtualCap)
-		enc.AddField("assignedLimitedVramPercentVirtual", metrics.AssignedLimitedVramPercentToVirtualCap)
+		enc.AddField("allocated_tflops", metrics.AllocatedTflops)
+		enc.AddField("allocated_tflops_percent", metrics.AllocatedTflopsPercent)
+		enc.AddField("allocated_tflops_percent_virtual", metrics.AllocatedTflopsPercentToVirtualCap)
+		enc.AddField("allocated_vram_bytes", metrics.AllocatedVramBytes)
+		enc.AddField("allocated_vram_percent", metrics.AllocatedVramPercent)
+		enc.AddField("allocated_vram_percent_virtual", metrics.AllocatedVramPercentToVirtualCap)
+		enc.AddField("limited_tflops", metrics.AssignedLimitedTFlops)
+		enc.AddField("limited_vram_bytes", metrics.AssignedLimitedVramBytes)
+		enc.AddField("limited_tflops_percent_virtual", metrics.AssignedLimitedTFlopsPercentToVirtualCap)
+		enc.AddField("limited_vram_percent_virtual", metrics.AssignedLimitedVramPercentToVirtualCap)
 		enc.AddField("gpu_count", int64(metrics.GPUCount))
 		enc.EndLine(now)
 	}
@@ -421,7 +452,51 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 	log.Info("metrics and raw billing recorded:", "workerCount", activeWorkerCnt, "nodeCount", len(nodeMetricsMap))
 }
 
+// Update metrics recorder's raw billing map
+func (r *MetricsRecorder) UpdateMetricsRecorder(pool *tfv1.GPUPool, specChanged bool) {
+	const dollarSign = "$"
+	settingLock.Lock()
+	defer settingLock.Unlock()
+	if pool.Spec.QosConfig == nil {
+		log.Info("QosConfig is nil, skip updating metrics recorder", "pool", pool.Name)
+		return
+	}
+
+	qosConfig := pool.Spec.QosConfig
+	if _, ok := r.WorkerUnitPriceMap[pool.Name]; !ok {
+		r.WorkerUnitPriceMap[pool.Name] = make(map[string]RawBillingPricing)
+	}
+
+	if r.PoolEvictionProtectionPriceRatioMap == nil {
+		r.PoolEvictionProtectionPriceRatioMap = make(map[string]string, 4)
+	}
+	r.PoolEvictionProtectionPriceRatioMap[pool.Name] = qosConfig.EvictionProtectionPriceRatio
+
+	pricingDetail := r.WorkerUnitPriceMap[pool.Name]
+	if !specChanged && len(pricingDetail) == 0 {
+		return
+	}
+	// Pricing potentially changed
+	for _, pricing := range qosConfig.Pricing {
+		tflopsPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerFP16TFlopsPerHour, dollarSign), 64)
+		vramPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerGBOfVRAMPerHour, dollarSign), 64)
+		limitOverRequestChargingRatio, _ := strconv.ParseFloat(pricing.LimitsOverRequestsChargingRatio, 64)
+
+		pricingDetail[string(pricing.Qos)] = RawBillingPricing{
+			TflopsPerSecond: tflopsPerHour / float64(3600),
+			VramPerSecond:   vramPerHour / float64(3600),
+
+			TflopsOverRequestPerSecond: tflopsPerHour / float64(3600) * limitOverRequestChargingRatio,
+			VramOverRequestPerSecond:   vramPerHour / float64(3600) * limitOverRequestChargingRatio,
+		}
+	}
+
+	log.V(5).Info("Updated metrics recorder", "pool", pool.Name, "pricing", pricingDetail)
+}
+
 func (mr *MetricsRecorder) getWorkerRawCost(metrics *WorkerResourceMetrics, duration time.Duration) float64 {
+	settingLock.RLock()
+	defer settingLock.RUnlock()
 	qosPricing, ok := mr.WorkerUnitPriceMap[metrics.PoolName]
 	// The qos pricing for this pool not set
 	if !ok {
@@ -446,7 +521,37 @@ func (mr *MetricsRecorder) getWorkerRawCost(metrics *WorkerResourceMetrics, dura
 	rawCostVRAMLimitOverRequest := (metrics.VramBytesLimit - metrics.VramBytesRequest) * pricing.VramOverRequestPerSecond / constants.GiBToBytes
 	rawCostPerVRAM := pricing.VramPerSecond * metrics.VramBytesRequest / constants.GiBToBytes
 
-	return (rawCostPerTflops + rawCostPerVRAM + rawCostTflopsLimitOverRequest + rawCostVRAMLimitOverRequest) * duration.Seconds() * float64(metrics.GPUCount)
+	baseCost := (rawCostPerTflops + rawCostPerVRAM + rawCostTflopsLimitOverRequest + rawCostVRAMLimitOverRequest) * duration.Seconds() * float64(metrics.GPUCount)
+
+	// Apply eviction protection price ratio if the pod is under protection and QoS is not critical
+	if metrics.evictionProtection != "" && qosLevel != constants.QoSLevelCritical {
+		if isUnderProtection := mr.isUnderEvictionProtection(metrics); isUnderProtection {
+			protectionPriceRatio := mr.PoolEvictionProtectionPriceRatioMap[metrics.PoolName]
+			protectionPriceRatioFloat, _ := strconv.ParseFloat(protectionPriceRatio, 64)
+			if protectionPriceRatioFloat < 1 {
+				protectionPriceRatioFloat = constants.DefaultEvictionProtectionPriceRatio
+			}
+			baseCost *= protectionPriceRatioFloat
+		}
+	}
+
+	return baseCost
+}
+
+// isUnderEvictionProtection checks if a worker is under eviction protection
+func (mr *MetricsRecorder) isUnderEvictionProtection(metrics *WorkerResourceMetrics) bool {
+	if metrics.evictionProtection == "" {
+		return false
+	}
+
+	// Parse protection duration (1h, 5h, 24h, etc.)
+	duration, err := time.ParseDuration(metrics.evictionProtection)
+	if err != nil {
+		return false
+	}
+
+	protectionEndTime := metrics.creationTime.Add(duration)
+	return time.Now().Before(protectionEndTime)
 }
 
 // unit price data comes from global config map, and multi-GPU instance should normalized with per GPU pricing, e.g. 8xA100 p4d.24xlarge price should divide by 8
diff --git a/internal/metrics/types.go b/internal/metrics/types.go
index ff3449cb..df06f169 100644
--- a/internal/metrics/types.go
+++ b/internal/metrics/types.go
@@ -51,6 +51,10 @@ type WorkerResourceMetrics struct {
 	// For more accurate metrics, should record the deletion timestamp to calculate duration for the last metrics
 	deletionTimestamp *time.Time
 
+	// Fields for eviction protection tracking - private, not stored in TSDB
+	creationTime       time.Time
+	evictionProtection string
+
 	podLabels map[string]string
 }
 
diff --git a/internal/quota/quota_store.go b/internal/quota/quota_store.go
index 4edc7445..d9450236 100644
--- a/internal/quota/quota_store.go
+++ b/internal/quota/quota_store.go
@@ -79,7 +79,16 @@ func (qs *QuotaStore) CheckQuotaAvailable(namespace string, req *tfv1.AllocReque
 	if err := qs.checkSingleQuotas(entry, req); err != nil {
 		return err
 	}
-	return qs.checkTotalQuotas(entry, req)
+	return qs.checkTotalQuotas(entry, req, nil)
+}
+
+func (qs *QuotaStore) CheckTotalQuotaRelaxed(req *tfv1.AllocRequest, toReleaseResource *tfv1.GPUResourceUsage) error {
+	entry, exists := qs.QuotaStore[req.WorkloadNameNamespace.Namespace]
+	if !exists {
+		// No quota defined for this namespace, allow allocation
+		return nil
+	}
+	return qs.checkTotalQuotas(entry, req, toReleaseResource)
 }
 
 func (qs *QuotaStore) AdjustQuota(namespace string, reqDelta tfv1.Resource, limitDelta tfv1.Resource) {
@@ -103,41 +112,51 @@ func (qs *QuotaStore) checkSingleQuotas(entry *QuotaStoreEntry, req *tfv1.AllocR
 	if single.MaxLimits != nil {
 		if !single.MaxLimits.Tflops.IsZero() && req.Limit.Tflops.Cmp(single.MaxLimits.Tflops) > 0 {
 			return &QuotaExceededError{
-				Namespace: entry.Quota.Namespace,
-				Resource:  MaxTFlopsLimitResource,
-				Requested: req.Limit.Tflops,
-				Limit:     single.MaxLimits.Tflops,
+				Namespace:    entry.Quota.Namespace,
+				Resource:     MaxTFlopsLimitResource,
+				Requested:    req.Limit.Tflops,
+				Limit:        single.MaxLimits.Tflops,
+				Unresolvable: true,
 			}
 		}
 
 		// Check single VRAM limit (per GPU)
 		if !single.MaxLimits.Vram.IsZero() && req.Request.Vram.Cmp(single.MaxLimits.Vram) > 0 {
 			return &QuotaExceededError{
-				Namespace: entry.Quota.Namespace,
-				Resource:  MaxVRAMLimitResource,
-				Requested: req.Request.Vram,
-				Limit:     single.MaxLimits.Vram,
+				Namespace:    entry.Quota.Namespace,
+				Resource:     MaxVRAMLimitResource,
+				Requested:    req.Request.Vram,
+				Limit:        single.MaxLimits.Vram,
+				Unresolvable: true,
 			}
 		}
 
 		// Check single GPU count limit (per worker)
 		if single.MaxGPUCount != nil && int32(req.Count) > *single.MaxGPUCount {
 			return &QuotaExceededError{
-				Namespace: entry.Quota.Namespace,
-				Resource:  MaxGPULimitResource,
-				Requested: *resource.NewQuantity(int64(req.Count), resource.DecimalSI),
-				Limit:     *resource.NewQuantity(int64(*single.MaxGPUCount), resource.DecimalSI),
+				Namespace:    entry.Quota.Namespace,
+				Resource:     MaxGPULimitResource,
+				Requested:    *resource.NewQuantity(int64(req.Count), resource.DecimalSI),
+				Limit:        *resource.NewQuantity(int64(*single.MaxGPUCount), resource.DecimalSI),
+				Unresolvable: true,
 			}
 		}
 	}
 	return nil
 }
 
-func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRequest) error {
+func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRequest, toReleaseResource *tfv1.GPUResourceUsage) error {
 	quotaNs := entry.Quota.Namespace
+
+	// Check total requests
 	if entry.Quota.Spec.Total.Requests != nil {
 		total := entry.Quota.Spec.Total.Requests
-		current := entry.CurrentUsage.Requests
+		current := *entry.CurrentUsage.Requests.DeepCopy()
+
+		if toReleaseResource != nil {
+			current.Tflops.Sub(toReleaseResource.Requests.Tflops)
+			current.Vram.Sub(toReleaseResource.Requests.Vram)
+		}
 		err := checkTotalExceeded(req, total, current, quotaNs, true)
 		if err != nil {
 			return err
@@ -147,13 +166,24 @@ func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRe
 	// Check total limits
 	if entry.Quota.Spec.Total.Limits != nil {
 		total := entry.Quota.Spec.Total.Limits
-		usage := entry.CurrentUsage.Limits
+		usage := *entry.CurrentUsage.Limits.DeepCopy()
+
+		if toReleaseResource != nil {
+			usage.Tflops.Sub(toReleaseResource.Limits.Tflops)
+			usage.Vram.Sub(toReleaseResource.Limits.Vram)
+		}
 		err := checkTotalExceeded(req, total, usage, quotaNs, false)
 		if err != nil {
 			return err
 		}
 	}
 
+	// If it's preempt case, skip checking total workers since it's
+	// replacing existing workers rather than creating new ones
+	if toReleaseResource != nil {
+		return nil
+	}
+
 	// Check total workers, each allocation will create one worker instance
 	if entry.Quota.Spec.Total.MaxWorkers != nil {
 		if entry.CurrentUsage.Workers >= *entry.Quota.Spec.Total.MaxWorkers {
@@ -451,10 +481,11 @@ func (qs *QuotaStore) SyncQuotasToK8s(ctx context.Context) {
 
 // QuotaExceededError represents a quota exceeded error with detailed information
 type QuotaExceededError struct {
-	Namespace string
-	Resource  string
-	Requested resource.Quantity
-	Limit     resource.Quantity
+	Namespace    string
+	Resource     string
+	Requested    resource.Quantity
+	Limit        resource.Quantity
+	Unresolvable bool
 }
 
 func (e *QuotaExceededError) Error() string {
diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
index 16dd1c61..bc893087 100644
--- a/internal/scheduler/gpuresources/gpuresources.go
+++ b/internal/scheduler/gpuresources/gpuresources.go
@@ -6,12 +6,14 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
+	"github.com/NexusGPU/tensor-fusion/internal/quota"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/samber/lo"
 	v1 "k8s.io/api/core/v1"
@@ -19,6 +21,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/klog/v2"
+	fwk "k8s.io/kube-scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
@@ -26,6 +29,7 @@ import (
 const Name = "GPUResourcesFit"
 const CycleStateAllocateRequest = "allocateRequest"
 const CycleStateGPUSchedulingResult = "gpuSchedulingResult"
+
 const SchedulerSimulationKey = "schedulerSimulation"
 
 var _ framework.PreFilterPlugin = &GPUFit{}
@@ -56,9 +60,15 @@ type GPUSchedulingStateData struct {
 	// In Reserve stage, bind GPUs to pod, update allocator cache
 	// In PostBind stage, fetch final GPUs call Pod patch API to update annotation
 	FinalGPUs []string
+
+	// Preempt pods
+	PreemptPods sync.Map
+
+	// IsPreemption
+	IsPreemption bool
 }
 
-func (p *GPUSchedulingStateData) Clone() framework.StateData {
+func (p *GPUSchedulingStateData) Clone() fwk.StateData {
 	return p
 }
 
@@ -93,7 +103,7 @@ func (s *GPUFit) Name() string {
 	return Name
 }
 
-func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
+func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, _ []fwk.NodeInfo) (*framework.PreFilterResult, *fwk.Status) {
 	// Handle progressive migration case
 	if utils.IsProgressiveMigration() && utils.HasGPUResourceRequest(pod) {
 		nodeNames := s.allocator.ListNonUsingNodes()
@@ -102,19 +112,24 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 			"use native GPU resources, available native GPU nodes: "+strconv.Itoa(len(nodeNames)))
 		return &framework.PreFilterResult{
 			NodeNames: nodeNames,
-		}, framework.NewStatus(framework.Success, "progressive migration for native resources claim")
+		}, fwk.NewStatus(fwk.Success, "progressive migration for native resources claim")
+	}
+
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return nil, fwk.NewStatus(fwk.Skip, "DRA mode enabled, skipping custom GPU prefilter")
 	}
 
 	// Skip non tensor-fusion mode
 	if !utils.IsTensorFusionWorker(pod) {
-		return nil, framework.NewStatus(framework.Skip, "skip for non tensor-fusion mode")
+		return nil, fwk.NewStatus(fwk.Skip, "skip for non tensor-fusion mode")
 	}
 
 	// Handle tensor-fusion mode scheduling
 	s.logger.Info("checking GPU node resources for pod", "pod", pod.Name)
 	allocRequest, reason, err := s.allocator.ComposeAllocationRequest(pod)
 	if err != nil {
-		return nil, framework.NewStatus(framework.Error, reason)
+		return nil, fwk.NewStatus(fwk.Error, reason)
 	}
 	state.Write(CycleStateAllocateRequest, allocRequest)
 
@@ -134,7 +149,16 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 		s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeWarning, "GPUQuotaOrCapacityNotEnough",
 			"check quota and filter", "TensorFusion schedule failed, no enough resource or quotas: "+err.Error())
 		s.logger.Error(err, "failed to check quota and filter", "pod", pod.Name)
-		return nil, framework.NewStatus(framework.Unschedulable, err.Error())
+
+		if quotaErr, ok := err.(*quota.QuotaExceededError); ok {
+			if quotaErr.Unresolvable {
+				return nil, fwk.NewStatus(fwk.UnschedulableAndUnresolvable, quotaErr.Error())
+			} else {
+				return nil, fwk.NewStatus(fwk.Unschedulable, err.Error())
+			}
+		} else {
+			return nil, fwk.NewStatus(fwk.Unschedulable, err.Error())
+		}
 	}
 
 	validNodesValidGPUs := lo.GroupBy(filteredGPUs, func(gpu *tfv1.GPU) string {
@@ -142,10 +166,14 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 	})
 	validNodeNonMatchingGPUs := make(map[string][]*tfv1.GPU, len(validNodesValidGPUs))
 
-	nodeNames := sets.New[string]()
+	cnt := 0
+	allGPUNodeNames := sets.New[string]()
 	nodeGPUs := s.allocator.GetNodeGpuStore()
+	for k := range nodeGPUs {
+		allGPUNodeNames.Insert(k)
+	}
 	for k, matchedGPUs := range validNodesValidGPUs {
-		nodeNames.Insert(k)
+		cnt++
 
 		// get all GPUs on this node
 		allGPUs := nodeGPUs[k]
@@ -157,11 +185,17 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 			continue
 		}
 
+		preAllocSize := total - matched
+		if preAllocSize <= 0 {
+			s.logger.Error(nil, "Filtering GPU error, unexpected less than 0", "pod",
+				pod.Name, "node", k, "totalGPU count", total, "matchedGPU count", matched)
+			preAllocSize = 2
+		}
 		// range if it's not in validNodesValidGPUs, add to validNodeNonMatchingGPUs
-		validNodeNonMatchingGPUs[k] = make([]*tfv1.GPU, 0, total-matched)
+		validNodeNonMatchingGPUs[k] = make([]*tfv1.GPU, 0, preAllocSize)
 		for gpuName, gpu := range allGPUs {
 			seen := false
-			// just loop because the number always <= 8
+			// just loop because the number always <= 8/16
 			for _, matchedGPU := range matchedGPUs {
 				if gpuName == matchedGPU.Name {
 					seen = true
@@ -173,7 +207,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 			}
 		}
 	}
-	s.logger.Info("filtered valid node GPUs", "nodes count", nodeNames.Len(), "pod", pod.Name)
+	s.logger.Info("filtered valid node GPUs", "nodes count", cnt, "pod", pod.Name)
 
 	// assign score based on different strategies
 	score := s.allocator.Score(ctx, s.cfg, allocRequest, validNodesValidGPUs)
@@ -182,7 +216,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 	notMatchingGPUScore := s.allocator.Score(ctx, s.cfg, allocRequest, validNodeNonMatchingGPUs)
 
 	s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeNormal, "PreScheduleDone", "pre filter for TensorFusion workload",
-		"TensorFusion pre schedule done, valid GPU node count: "+strconv.Itoa(nodeNames.Len()))
+		"TensorFusion pre schedule done, valid GPU node count: "+strconv.Itoa(cnt))
 
 	if s.logger.V(6).Enabled() {
 		jsonStr, _ := json.Marshal(validNodesValidGPUs)
@@ -195,55 +229,133 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 		ValidNodeGPUScore:            score,
 		ValidNodeNotMatchingGPUScore: notMatchingGPUScore,
 		FinalGPUs:                    []string{},
+		PreemptPods:                  sync.Map{},
+		IsPreemption:                 false,
 	})
 
 	return &framework.PreFilterResult{
-		NodeNames: nodeNames,
-	}, framework.NewStatus(framework.Success)
+		NodeNames: allGPUNodeNames,
+	}, fwk.NewStatus(fwk.Success)
 }
 
 func (s *GPUFit) PreFilterExtensions() framework.PreFilterExtensions {
-	return nil
+	return s
+}
+
+func (s *GPUFit) AddPod(ctx context.Context, state fwk.CycleState, pod *v1.Pod, podInfoToAdd fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status {
+	stateData, err := state.Read(CycleStateGPUSchedulingResult)
+	if err != nil {
+		return fwk.NewStatus(fwk.Error, err.Error())
+	}
+	stateDataParsed := stateData.(*GPUSchedulingStateData)
+	if pods, ok := stateDataParsed.PreemptPods.Load(nodeInfo.Node().Name); ok {
+		podsParsed := pods.(sets.Set[types.NamespacedName])
+
+		nameNs := types.NamespacedName{
+			Namespace: podInfoToAdd.GetPod().Namespace,
+			Name:      podInfoToAdd.GetPod().Name,
+		}
+		if podsParsed.Has(nameNs) {
+			podsParsed.Delete(nameNs)
+		}
+	}
+	return fwk.NewStatus(fwk.Success, "")
+}
+
+func (s *GPUFit) RemovePod(ctx context.Context, state fwk.CycleState, pod *v1.Pod, podInfoToRemove fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status {
+	stateData, err := state.Read(CycleStateGPUSchedulingResult)
+	if err != nil {
+		if fwk.ErrNotFound == err {
+			stateData = &GPUSchedulingStateData{
+				PreemptPods: sync.Map{},
+			}
+			state.Write(CycleStateGPUSchedulingResult, stateData)
+		} else {
+			return fwk.NewStatus(fwk.Error, err.Error())
+		}
+	}
+	stateDataParsed := stateData.(*GPUSchedulingStateData)
+	stateDataParsed.IsPreemption = true
+	if pods, ok := stateDataParsed.PreemptPods.Load(nodeInfo.Node().Name); ok {
+		parsedPods := pods.(sets.Set[types.NamespacedName])
+		parsedPods.Insert(types.NamespacedName{
+			Namespace: podInfoToRemove.GetPod().Namespace,
+			Name:      podInfoToRemove.GetPod().Name,
+		})
+	} else {
+		stateDataParsed.PreemptPods.Store(nodeInfo.Node().Name, sets.New(types.NamespacedName{
+			Namespace: podInfoToRemove.GetPod().Namespace,
+			Name:      podInfoToRemove.GetPod().Name,
+		}))
+	}
+	return fwk.NewStatus(fwk.Success, "")
 }
 
-func (s *GPUFit) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
+func (s *GPUFit) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return fwk.NewStatus(fwk.Skip, "DRA mode enabled, skipping custom GPU filter")
+	}
+
 	if !utils.IsTensorFusionWorker(pod) {
-		return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode")
+		return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode")
 	}
 
 	filterResult, err := state.Read(CycleStateGPUSchedulingResult)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
+	}
+
+	// k8s will RemoveAll Pods, and run Filter for high priority pod,
+	// then Scheduler framework will reprieve victims one by one until filter returns unschedulable
+	if filterResult.(*GPUSchedulingStateData).IsPreemption {
+		allocRequest, err := state.Read(CycleStateAllocateRequest)
+		allocRequestParsed := allocRequest.(*tfv1.AllocRequest)
+		if err != nil {
+			return fwk.NewStatus(fwk.Error, err.Error())
+		}
+		podsToPreempt, ok := filterResult.(*GPUSchedulingStateData).PreemptPods.Load(nodeInfo.Node().Name)
+		if !ok {
+			return fwk.NewStatus(fwk.Unschedulable, "no pods to preempt")
+		}
+		podsToPreemptParsed := podsToPreempt.(sets.Set[types.NamespacedName])
+		err = s.allocator.CheckQuotaAndFilterSingleNodePreempt(
+			nodeInfo.Node().Name, allocRequestParsed, podsToPreemptParsed)
+		if err != nil {
+			return fwk.NewStatus(fwk.Unschedulable, err.Error())
+		}
+		return fwk.NewStatus(fwk.Success, "")
 	}
-	nodeName := nodeInfo.GetName()
+
+	nodeName := nodeInfo.Node().Name
 	if _, ok := filterResult.(*GPUSchedulingStateData).NodeGPUs[nodeName]; !ok {
-		return framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough")
+		return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
 	}
-	return framework.NewStatus(framework.Success, "")
+	return fwk.NewStatus(fwk.Success, "")
 }
 
 func (s *GPUFit) Score(
 	ctx context.Context,
-	state *framework.CycleState,
+	state fwk.CycleState,
 	pod *v1.Pod,
-	nodeInfo *framework.NodeInfo,
-) (int64, *framework.Status) {
+	nodeInfo fwk.NodeInfo,
+) (int64, *fwk.Status) {
 	// Skip non tensor-fusion mode scheduling
 	if !utils.IsTensorFusionWorker(pod) {
-		return 0, framework.NewStatus(framework.Success, "")
+		return 0, fwk.NewStatus(fwk.Success, "")
 	}
 
 	if state == nil {
-		return 0, framework.NewStatus(framework.Error, "no valid node found, gpu capacity not enough")
+		return 0, fwk.NewStatus(fwk.Error, "no valid node found, gpu capacity not enough")
 	}
 	filterResult, err := state.Read(CycleStateGPUSchedulingResult)
 	if err != nil {
-		return 0, framework.NewStatus(framework.Error, err.Error())
+		return 0, fwk.NewStatus(fwk.Error, err.Error())
 	}
 	scheduledState := filterResult.(*GPUSchedulingStateData)
-	gpuScoreMap, ok := scheduledState.ValidNodeGPUScore[nodeInfo.GetName()]
+	gpuScoreMap, ok := scheduledState.ValidNodeGPUScore[nodeInfo.Node().Name]
 	if !ok {
-		return 0, framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough")
+		return 0, fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
 	}
 	// normalize to 0-100, when node has more GPUs but filtered out,
 	// should consider it as 100 when strategy is compact_first, and consider as 0 when is low_load_first
@@ -252,7 +364,7 @@ func (s *GPUFit) Score(
 		sum += score
 	}
 
-	notMatchingGPUScoreMap, ok := scheduledState.ValidNodeNotMatchingGPUScore[nodeInfo.GetName()]
+	notMatchingGPUScoreMap, ok := scheduledState.ValidNodeNotMatchingGPUScore[nodeInfo.Node().Name]
 	if ok {
 		for _, score := range notMatchingGPUScoreMap {
 			sum += score
@@ -265,27 +377,27 @@ func (s *GPUFit) ScoreExtensions() framework.ScoreExtensions {
 	return nil
 }
 
-func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
+func (s *GPUFit) Reserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) *fwk.Status {
 	if !utils.IsTensorFusionWorker(pod) {
-		return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode")
+		return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode")
 	}
 
 	s.logger.Info("Reserving pod for GPU resources", "pod", pod.Name, "node", nodeName)
 	allocRequest, err := state.Read(CycleStateAllocateRequest)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
 
 	schedulingResultRaw, err := state.Read(CycleStateGPUSchedulingResult)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
 
 	// set final GPUs and try update GPU allocator cache
 	schedulingResult := schedulingResultRaw.(*GPUSchedulingStateData)
 	gpuScoreMap, ok := schedulingResult.ValidNodeGPUScore[nodeName]
 	if !ok {
-		return framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough")
+		return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
 	}
 
 	// find top N score GPUs in this node
@@ -306,12 +418,12 @@ func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *
 		allocRequest.(*tfv1.AllocRequest),
 	)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
-	return framework.NewStatus(framework.Success, "")
+	return fwk.NewStatus(fwk.Success, "")
 }
 
-func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+func (s *GPUFit) Unreserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) {
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
@@ -330,7 +442,7 @@ func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod
 	}, schedulingResult.FinalGPUs, pod.ObjectMeta)
 }
 
-func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+func (s *GPUFit) PostBind(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) {
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
@@ -359,3 +471,17 @@ func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod
 			"Attach GPU device ID info", "Attach TensorFusion GPU device IDs to Pod: "+gpuIDs)
 	}
 }
+
+// isDRAEnabled checks if DRA is enabled for a pod
+func isDRAEnabled(pod *v1.Pod) bool {
+	if pod.Annotations == nil {
+		return false
+	}
+	val, ok := pod.Annotations[constants.DRAEnabledAnnotation]
+	return ok && val == constants.TrueStringValue
+}
+
+// hasDRAClaim checks if a pod has DRA ResourceClaim references
+func hasDRAClaim(pod *v1.Pod) bool {
+	return len(pod.Spec.ResourceClaims) > 0
+}
diff --git a/internal/scheduler/gpuresources/gpuresources_dra_test.go b/internal/scheduler/gpuresources/gpuresources_dra_test.go
new file mode 100644
index 00000000..021be137
--- /dev/null
+++ b/internal/scheduler/gpuresources/gpuresources_dra_test.go
@@ -0,0 +1,237 @@
+package gpuresources
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+func TestIsDRAEnabled(t *testing.T) {
+	tests := []struct {
+		name        string
+		annotations map[string]string
+		expected    bool
+	}{
+		{
+			name: "DRA enabled annotation",
+			annotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.TrueStringValue,
+			},
+			expected: true,
+		},
+		{
+			name: "DRA disabled annotation",
+			annotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.FalseStringValue,
+			},
+			expected: false,
+		},
+		{
+			name:     "no annotation",
+			expected: false,
+		},
+		{
+			name: "other annotations",
+			annotations: map[string]string{
+				"other.annotation": "value",
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: tt.annotations,
+				},
+			}
+
+			result := isDRAEnabled(pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestHasDRAClaimScheduler(t *testing.T) {
+	tests := []struct {
+		name     string
+		pod      *corev1.Pod
+		expected bool
+	}{
+		{
+			name: "pod with resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{
+						{Name: "gpu-claim"},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "pod with multiple resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{
+						{Name: "gpu-claim"},
+						{Name: "other-claim"},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "pod without resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{},
+			},
+			expected: false,
+		},
+		{
+			name: "pod with empty resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{},
+				},
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := hasDRAClaim(tt.pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+// Integration test for DRA detection logic
+func TestDRADetectionIntegration(t *testing.T) {
+	tests := []struct {
+		name              string
+		draAnnotation     string
+		hasResourceClaims bool
+		expectedDRA       bool
+		expectedClaim     bool
+	}{
+		{
+			name:              "DRA enabled with claims",
+			draAnnotation:     constants.TrueStringValue,
+			hasResourceClaims: true,
+			expectedDRA:       true,
+			expectedClaim:     true,
+		},
+		{
+			name:              "DRA enabled without claims",
+			draAnnotation:     constants.TrueStringValue,
+			hasResourceClaims: false,
+			expectedDRA:       true,
+			expectedClaim:     false,
+		},
+		{
+			name:              "DRA disabled with claims",
+			draAnnotation:     constants.FalseStringValue,
+			hasResourceClaims: true,
+			expectedDRA:       false,
+			expectedClaim:     true,
+		},
+		{
+			name:              "no DRA annotation, no claims",
+			hasResourceClaims: false,
+			expectedDRA:       false,
+			expectedClaim:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: make(map[string]string),
+				},
+				Spec: corev1.PodSpec{},
+			}
+
+			if tt.draAnnotation != "" {
+				pod.Annotations[constants.DRAEnabledAnnotation] = tt.draAnnotation
+			}
+
+			if tt.hasResourceClaims {
+				pod.Spec.ResourceClaims = []corev1.PodResourceClaim{
+					{Name: "test-claim"},
+				}
+			}
+
+			draEnabled := isDRAEnabled(pod)
+			hasClaim := hasDRAClaim(pod)
+
+			assert.Equal(t, tt.expectedDRA, draEnabled, "DRA enabled detection mismatch")
+			assert.Equal(t, tt.expectedClaim, hasClaim, "Resource claim detection mismatch")
+		})
+	}
+}
+
+// Test the combination logic that scheduler uses
+func TestSchedulerDRALogic(t *testing.T) {
+	tests := []struct {
+		name                string
+		draAnnotation       string
+		hasResourceClaims   bool
+		shouldSkipScheduler bool
+	}{
+		{
+			name:                "DRA enabled with claims - should skip",
+			draAnnotation:       constants.TrueStringValue,
+			hasResourceClaims:   true,
+			shouldSkipScheduler: true,
+		},
+		{
+			name:                "DRA enabled without claims - should not skip",
+			draAnnotation:       constants.TrueStringValue,
+			hasResourceClaims:   false,
+			shouldSkipScheduler: false,
+		},
+		{
+			name:                "DRA disabled with claims - should not skip",
+			draAnnotation:       constants.FalseStringValue,
+			hasResourceClaims:   true,
+			shouldSkipScheduler: false,
+		},
+		{
+			name:                "no DRA, no claims - should not skip",
+			shouldSkipScheduler: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: make(map[string]string),
+				},
+				Spec: corev1.PodSpec{},
+			}
+
+			if tt.draAnnotation != "" {
+				pod.Annotations[constants.DRAEnabledAnnotation] = tt.draAnnotation
+			}
+
+			if tt.hasResourceClaims {
+				pod.Spec.ResourceClaims = []corev1.PodResourceClaim{
+					{Name: "test-claim"},
+				}
+			}
+
+			// This is the actual logic used in the scheduler
+			shouldSkip := isDRAEnabled(pod) && hasDRAClaim(pod)
+			assert.Equal(t, tt.shouldSkipScheduler, shouldSkip)
+		})
+	}
+}
diff --git a/internal/scheduler/gpuresources/gpuresources_test.go b/internal/scheduler/gpuresources/gpuresources_test.go
index fb7e45b5..5fa25150 100644
--- a/internal/scheduler/gpuresources/gpuresources_test.go
+++ b/internal/scheduler/gpuresources/gpuresources_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/samber/lo"
 	"github.com/stretchr/testify/suite"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
@@ -14,23 +15,28 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/informers"
+	clientsetfake "k8s.io/client-go/kubernetes/fake"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/events"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
+	fwk "k8s.io/kube-scheduler/framework"
+	framework "k8s.io/kubernetes/pkg/scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
+	"k8s.io/kubernetes/pkg/scheduler/metrics"
 	st "k8s.io/kubernetes/pkg/scheduler/testing"
 	tf "k8s.io/kubernetes/pkg/scheduler/testing/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	testutil "sigs.k8s.io/scheduler-plugins/test/util"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
+	internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
 )
 
 type GPUResourcesSuite struct {
@@ -201,6 +207,7 @@ func (s *GPUResourcesSuite) SetupTest() {
 			},
 		},
 	}
+
 	s.client = fake.NewClientBuilder().WithScheme(scheme.Scheme).
 		WithRuntimeObjects(objList...).
 		WithStatusSubresource(
@@ -213,9 +220,11 @@ func (s *GPUResourcesSuite) SetupTest() {
 		).
 		Build()
 
+	k8sObjs := make([]runtime.Object, 0, len(pods)+len(nodes))
 	for _, pod := range pods {
 		err := s.client.Create(s.ctx, pod)
 		s.NoError(err)
+		k8sObjs = append(k8sObjs, pod)
 	}
 	for _, gpu := range gpus {
 		err := s.client.Create(s.ctx, gpu)
@@ -224,6 +233,7 @@ func (s *GPUResourcesSuite) SetupTest() {
 	for _, node := range nodes {
 		err := s.client.Create(s.ctx, node)
 		s.NoError(err)
+		k8sObjs = append(k8sObjs, node)
 	}
 
 	var registerPlugins []tf.RegisterPluginFunc
@@ -233,11 +243,16 @@ func (s *GPUResourcesSuite) SetupTest() {
 		tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
 	)
 
+	fakeClientSet := clientsetfake.NewSimpleClientset(k8sObjs...)
+	informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
+	metrics.Register()
+	metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, s.ctx.Done())
 	fwk, err := tf.NewFramework(
 		s.ctx, registeredPlugins, "",
-		frameworkruntime.WithPodNominator(testutil.NewPodNominator(nil)),
-		frameworkruntime.WithSnapshotSharedLister(testutil.NewFakeSharedLister(pods, nodes)),
+		frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
+		frameworkruntime.WithSnapshotSharedLister(internalcache.NewEmptySnapshot()),
 		frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
+		frameworkruntime.WithMetricsRecorder(metricsRecorder),
 	)
 	s.NoError(err)
 	s.fwk = fwk
@@ -271,7 +286,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 	tests := []struct {
 		name           string
 		pod            *v1.Pod
-		expectedStatus framework.Code
+		expectedStatus fwk.Code
 		expectedNodes  string
 	}{
 		{
@@ -282,7 +297,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "100",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-a node-b",
 		},
 		{
@@ -293,7 +308,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "2000",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b",
 		},
 		{
@@ -304,7 +319,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "100",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b",
 		},
 		{
@@ -315,7 +330,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "2000",
 					constants.VRAMRequestAnnotation:   "80Gi",
 				}),
-			expectedStatus: framework.Unschedulable,
+			expectedStatus: fwk.Unschedulable,
 			expectedNodes:  "",
 		},
 		{
@@ -326,7 +341,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "100",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Unschedulable,
+			expectedStatus: fwk.Unschedulable,
 			expectedNodes:  "",
 		},
 	}
@@ -334,11 +349,11 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 	for _, tt := range tests {
 		s.Run(tt.name, func() {
 			state := framework.NewCycleState()
-			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod)
+			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod, []fwk.NodeInfo{})
 			s.Equal(tt.expectedStatus, status.Code(), status.Message())
-			if tt.expectedStatus == framework.Success {
+			if tt.expectedStatus == fwk.Success {
 				s.Require().NotNil(res)
-				nodes := sort.StringSlice(res.NodeNames.UnsortedList())
+				nodes := sort.StringSlice(getPreFilterResult(state))
 				nodes.Sort()
 				s.Equal(tt.expectedNodes, strings.Join(nodes, " "))
 			}
@@ -351,19 +366,19 @@ func (s *GPUResourcesSuite) TestPreFilterForNonTensorFusionPod() {
 	tests := []struct {
 		name           string
 		pod            *v1.Pod
-		expectedStatus framework.Code
+		expectedStatus fwk.Code
 		expectedNodes  string
 	}{
 		{
 			name:           "pod requires 1 GPU, enough capacity",
 			pod:            s.makeNonTensorFusionPod("p1", 1),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b node-c",
 		},
 		{
 			name:           "pod requires 2 GPU, enough capacity",
 			pod:            s.makeNonTensorFusionPod("p1", 2),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b node-c",
 		},
 	}
@@ -371,9 +386,9 @@ func (s *GPUResourcesSuite) TestPreFilterForNonTensorFusionPod() {
 	for _, tt := range tests {
 		s.Run(tt.name, func() {
 			state := framework.NewCycleState()
-			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod)
+			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod, []fwk.NodeInfo{})
 			s.Equal(tt.expectedStatus, status.Code(), status.Message())
-			if tt.expectedStatus == framework.Success {
+			if tt.expectedStatus == fwk.Success {
 				s.Require().NotNil(res)
 				nodes := sort.StringSlice(res.NodeNames.UnsortedList())
 				nodes.Sort()
@@ -394,23 +409,23 @@ func (s *GPUResourcesSuite) TestFilter() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	tests := []struct {
 		name           string
 		nodeName       string
-		expectedStatus framework.Code
+		expectedStatus fwk.Code
 	}{
 		{
 			name:           "node with available GPU",
 			nodeName:       "node-a",
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 		},
 		{
 			name:           "node without available GPU",
 			nodeName:       "node-c",
-			expectedStatus: framework.Unschedulable,
+			expectedStatus: fwk.Unschedulable,
 		},
 	}
 
@@ -435,7 +450,7 @@ func (s *GPUResourcesSuite) TestScore() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	// node a as one worker consumed 10% GPU resources
@@ -466,7 +481,7 @@ func (s *GPUResourcesSuite) TestReserveAndUnreserve() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	// Reserve on node-a
@@ -507,7 +522,7 @@ func (s *GPUResourcesSuite) TestPostBind() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	reserveStatus := s.plugin.Reserve(s.ctx, state, pod, "node-a")
@@ -609,7 +624,7 @@ func (s *GPUResourcesSuite) TestScoreExtensions() {
 
 func (s *GPUResourcesSuite) TestPreFilterExtensions() {
 	log.FromContext(s.ctx).Info("Running TestPreFilterExtensions")
-	s.Nil(s.plugin.PreFilterExtensions())
+	s.NotNil(s.plugin.PreFilterExtensions())
 }
 
 func (s *GPUResourcesSuite) TestName() {
@@ -629,13 +644,13 @@ func (s *GPUResourcesSuite) TestReserve_ErrorHandling() {
 	// No pre-filter call, so state is empty
 	status := s.plugin.Reserve(s.ctx, state, pod, "node-a")
 	s.Error(status.AsError())
-	s.Equal(framework.Error, status.Code())
+	s.Equal(fwk.Error, status.Code())
 
 	// Pre-filter, but for a different node
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 	status = s.plugin.Reserve(s.ctx, state, pod, "node-c-non-existent")
-	s.Equal(framework.Unschedulable, status.Code())
+	s.Equal(fwk.Unschedulable, status.Code())
 }
 
 func (s *GPUResourcesSuite) TestUnreserve_ErrorHandling() {
@@ -668,7 +683,7 @@ func (s *GPUResourcesSuite) TestPostBind_ErrorHandling() {
 	s.plugin.PostBind(s.ctx, state, pod, "node-a")
 
 	// Test with a pod that doesn't exist in the client
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 	reserveStatus := s.plugin.Reserve(s.ctx, state, pod, "node-a")
 	s.Require().True(reserveStatus.IsSuccess())
@@ -688,7 +703,7 @@ func (s *GPUResourcesSuite) TestFilter_ErrorHandling() {
 	// No pre-filter call, so state is empty
 	status := s.plugin.Filter(s.ctx, state, pod, nodeInfo)
 	s.Error(status.AsError())
-	s.Equal(framework.Error, status.Code())
+	s.Equal(fwk.Error, status.Code())
 }
 
 func (s *GPUResourcesSuite) TestScore_ErrorHandling() {
@@ -704,13 +719,21 @@ func (s *GPUResourcesSuite) TestScore_ErrorHandling() {
 	nodeInfo.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-a"}})
 	_, status := s.plugin.Score(s.ctx, state, pod, nodeInfo)
 	s.Error(status.AsError())
-	s.Equal(framework.Error, status.Code())
+	s.Equal(fwk.Error, status.Code())
 
 	// Pre-filter, but for a different node
 	nodeInfo = &framework.NodeInfo{}
 	nodeInfo.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-c-non-existent"}})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 	_, status = s.plugin.Score(s.ctx, state, pod, nodeInfo)
-	s.Equal(framework.Unschedulable, status.Code())
+	s.Equal(fwk.Unschedulable, status.Code())
+}
+
+func getPreFilterResult(state *framework.CycleState) []string {
+	data, err := state.Read(CycleStateGPUSchedulingResult)
+	if err != nil {
+		return nil
+	}
+	return lo.Keys(data.(*GPUSchedulingStateData).NodeGPUs)
 }
diff --git a/internal/scheduler/gputopo/gpu_network_topo.go b/internal/scheduler/gputopo/gpu_network_topo.go
index f481ea8c..197e3995 100644
--- a/internal/scheduler/gputopo/gpu_network_topo.go
+++ b/internal/scheduler/gputopo/gpu_network_topo.go
@@ -9,6 +9,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/klog/v2"
+	fwk "k8s.io/kube-scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
@@ -53,6 +54,6 @@ func (s *GPUNetworkTopologyAware) Name() string {
 	return Name
 }
 
-func (s *GPUNetworkTopologyAware) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
-	return framework.NewStatus(framework.Success, "")
+func (s *GPUNetworkTopologyAware) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
+	return fwk.NewStatus(fwk.Success, "")
 }
diff --git a/internal/server/router/allocator_info.go b/internal/server/router/allocator_info.go
index 7c8c4f78..58a949cf 100644
--- a/internal/server/router/allocator_info.go
+++ b/internal/server/router/allocator_info.go
@@ -17,6 +17,7 @@ import (
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	fwk "k8s.io/kube-scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -104,20 +105,20 @@ func (r *AllocatorInfoRouter) SimulateScheduleOnePod(ctx *gin.Context) {
 	state.SetRecordPluginMetrics(false)
 	podsToActivate := framework.NewPodsToActivate()
 	state.Write(framework.PodsToActivateKey, podsToActivate)
-	state.Write(framework.StateKey(constants.SchedulerSimulationKey), &gpuallocator.SimulateSchedulingFilterDetail{
+	state.Write(fwk.StateKey(constants.SchedulerSimulationKey), &gpuallocator.SimulateSchedulingFilterDetail{
 		FilterStageDetails: []filter.FilterDetail{},
 	})
 
 	// simulate schedulingCycle non side effect part
-	fwk := r.scheduler.Profiles[pod.Spec.SchedulerName]
-	if fwk == nil {
+	fwkInstance := r.scheduler.Profiles[pod.Spec.SchedulerName]
+	if fwkInstance == nil {
 		log.FromContext(ctx).Error(nil, "scheduler framework not found", "pod", pod.Name, "namespace", pod.Namespace)
 		ctx.JSON(http.StatusInternalServerError, gin.H{"error": "scheduler framework not found"})
 		return
 	}
-	scheduleResult, err := r.scheduler.SchedulePod(ctx, fwk, state, pod)
+	scheduleResult, err := r.scheduler.SchedulePod(ctx, fwkInstance, state, pod)
 	gpuCycleState, _ := state.Read(gpuresources.CycleStateGPUSchedulingResult)
-	simulateSchedulingFilterDetail, _ := state.Read(framework.StateKey(constants.SchedulerSimulationKey))
+	simulateSchedulingFilterDetail, _ := state.Read(fwk.StateKey(constants.SchedulerSimulationKey))
 	if err != nil {
 		if fitError, ok := err.(*framework.FitError); ok {
 			ctx.JSON(http.StatusOK, gin.H{
diff --git a/internal/utils/compose.go b/internal/utils/compose.go
index e7170881..22752f45 100644
--- a/internal/utils/compose.go
+++ b/internal/utils/compose.go
@@ -16,6 +16,10 @@ import (
 	"k8s.io/utils/ptr"
 )
 
+var injectLibResource v1.ResourceList = v1.ResourceList{
+	v1.ResourceCPU:    resource.MustParse("20m"),
+	v1.ResourceMemory: resource.MustParse("64Mi"),
+}
 var nodeDiscoveryDefaultRequests v1.ResourceList = v1.ResourceList{
 	v1.ResourceCPU:    resource.MustParse("20m"),
 	v1.ResourceMemory: resource.MustParse("64Mi"),
@@ -79,6 +83,8 @@ type TensorFusionInfo struct {
 	// Pod mutating webhook can not get Pod UID sometimes,
 	// thus need pod controller to set the owner reference
 	PendingSetPodAsOwner bool
+	// DRA support
+	DRAEnabled bool
 }
 
 func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo) {
@@ -113,6 +119,10 @@ func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo Tens
 	pod.Annotations[constants.IsLocalGPUAnnotation] = strconv.FormatBool(tfInfo.Profile.IsLocalGPU)
 	// add inject container annotation for client Pod, in case user doesn't specify it
 	pod.Annotations[constants.InjectContainerAnnotation] = strings.Join(tfInfo.ContainerNames, ",")
+	// add DRA enabled annotation
+	if tfInfo.DRAEnabled {
+		pod.Annotations[constants.DRAEnabledAnnotation] = constants.TrueStringValue
+	}
 }
 
 func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(
@@ -175,6 +185,11 @@ func AddTFDefaultClientConfBeforePatch(
 				MountPath: constants.TFLibsVolumeMountPath,
 			},
 		},
+		Resources: v1.ResourceRequirements{
+			Requests: injectLibResource,
+			Limits:   injectLibResource,
+		},
+		Env: convertDisabledFeatures4InjectLib(pod.Annotations[constants.DisableFeaturesAnnotation]),
 	})
 	pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{
 		Name: constants.TFLibsVolumeName,
@@ -220,10 +235,9 @@ func AddTFDefaultClientConfBeforePatch(
 			pod.Spec.Containers[injectContainerIndex].VolumeMounts = append(
 				pod.Spec.Containers[injectContainerIndex].VolumeMounts,
 				v1.VolumeMount{
-					Name:      constants.DataVolumeName,
-					MountPath: constants.SharedMemDeviceName,
-					SubPath:   constants.SharedMemMountSubPath,
-					//  + constants.TFLibsVolumeMountPath, SubPathExpr:      constants.TFDataPathWorkerExpr,
+					Name:             constants.DataVolumeName,
+					MountPath:        constants.TFDataPath + constants.SharedMemMountSubPath,
+					SubPathExpr:      constants.TFDataPathWorkerExpr,
 					MountPropagation: ptr.To(v1.MountPropagationHostToContainer),
 				})
 
@@ -302,22 +316,47 @@ func AddTFDefaultClientConfBeforePatch(
 }
 
 func convertDisabledFeaturesToEnvs(disabledFeatures string, envList []v1.EnvVar) []v1.EnvVar {
-	disabledFeaturesList := strings.Split(disabledFeatures, ",")
-	for _, feature := range disabledFeaturesList {
+	disabledFeaturesList := strings.SplitSeq(disabledFeatures, ",")
+	for feature := range disabledFeaturesList {
 		if feat, ok := featureShortcutMap[feature]; ok {
-			envList = append(envList, v1.EnvVar{
-				Name:  feat.EnvName,
-				Value: feat.EnvValue,
-			})
+			if !lo.ContainsBy(envList, func(item v1.EnvVar) bool {
+				return item.Name == feat.EnvName
+			}) {
+				envList = append(envList, v1.EnvVar{
+					Name:  feat.EnvName,
+					Value: feat.EnvValue,
+				})
+			}
 		}
 	}
 	return envList
 }
 
+func convertDisabledFeatures4InjectLib(disabledFeatures string) []v1.EnvVar {
+	if disabledFeatures == "" {
+		return []v1.EnvVar{}
+	}
+	disabledFeaturesList := strings.SplitSeq(disabledFeatures, ",")
+
+	// GPU limiter by-pass take effect in bootstrap stage, add special handling here
+	for feature := range disabledFeaturesList {
+		if feature == constants.BuiltInFeaturesGpuLimiter {
+			return []v1.EnvVar{
+				{
+					Name:  featureShortcutMap[feature].EnvName,
+					Value: featureShortcutMap[feature].EnvValue,
+				},
+			}
+		}
+	}
+	return []v1.EnvVar{}
+}
+
 func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, pool *tfv1.GPUPool) {
 	// Hypervisor needs to read /proc to map pod with processID
 	spec.HostPID = true
 	spec.TerminationGracePeriodSeconds = constants.GracefulPeriodSeconds
+	spec.PriorityClassName = constants.NodeCriticalPriorityClassName
 
 	enableVector := pool.Spec.ComponentConfig.Hypervisor != nil && pool.Spec.ComponentConfig.Hypervisor.EnableVector
 
@@ -430,8 +469,7 @@ func composeHypervisorContainer(spec *v1.PodSpec, pool *tfv1.GPUPool, enableVect
 	spec.Containers[0].VolumeMounts = append(spec.Containers[0].VolumeMounts, v1.VolumeMount{
 		Name:      constants.DataVolumeName,
 		ReadOnly:  false,
-		MountPath: constants.SharedMemDeviceName,
-		SubPath:   constants.SharedMemMountSubPath,
+		MountPath: constants.TFDataPath,
 	}, v1.VolumeMount{
 		Name:      constants.TensorFusionGPUInfoConfigVolumeName,
 		MountPath: constants.TensorFusionGPUInfoConfigMountPath,
@@ -649,12 +687,9 @@ func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerCon
 	spec.Containers[0].VolumeMounts = append(
 		spec.Containers[0].VolumeMounts,
 		v1.VolumeMount{
-			Name:      constants.DataVolumeName,
-			MountPath: constants.SharedMemDeviceName,
-			// TODO not working.
-			// + constants.TFLibsVolumeMountPath
-			// SubPathExpr: constants.TFDataPathWorkerExpr,
-			SubPath:          constants.SharedMemMountSubPath,
+			Name:             constants.DataVolumeName,
+			MountPath:        constants.TFDataPath + constants.SharedMemMountSubPath,
+			SubPathExpr:      constants.TFDataPathWorkerExpr,
 			MountPropagation: ptr.To(v1.MountPropagationHostToContainer),
 		})
 	spec.Containers[0].Env = append(spec.Containers[0].Env, v1.EnvVar{
diff --git a/internal/utils/reconcile.go b/internal/utils/reconcile.go
index ebc091ac..23026cf7 100644
--- a/internal/utils/reconcile.go
+++ b/internal/utils/reconcile.go
@@ -214,6 +214,15 @@ func IsTensorFusionWorker(pod *corev1.Pod) bool {
 	return pod.Labels[constants.LabelComponent] == constants.ComponentWorker
 }
 
+func GetInitialGPUNodeSelector() []string {
+	selector := os.Getenv("INITIAL_GPU_NODE_LABEL_SELECTOR")
+	if selector == "" {
+		selector = constants.InitialGPUNodeSelector
+	}
+	selectors := strings.Split(selector, "=")
+	return selectors
+}
+
 var GPUResourceNames = []corev1.ResourceName{
 	"nvidia.com/gpu",
 	"amd.com/gpu",
diff --git a/internal/webhook/v1/pod_dra.go b/internal/webhook/v1/pod_dra.go
new file mode 100644
index 00000000..6a55fc4f
--- /dev/null
+++ b/internal/webhook/v1/pod_dra.go
@@ -0,0 +1,197 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+)
+
+// DRAProcessor handles all DRA-related operations for pod admission
+type DRAProcessor struct {
+	client.Client
+	enableDRA                 bool
+	resourceClaimTemplateName string // cached ResourceClaimTemplate name
+	configLoaded              bool   // tracks if configuration has been loaded
+}
+
+// NewDRAProcessor creates a new DRA processor
+func NewDRAProcessor(client client.Client) *DRAProcessor {
+	return &DRAProcessor{
+		Client:    client,
+		enableDRA: false,
+	}
+}
+
+// InitializeDRAConfig is kept for backward compatibility but now does nothing
+// Configuration is loaded lazily on first use
+func (p *DRAProcessor) InitializeDRAConfig(ctx context.Context) error {
+	// No-op - configuration is now loaded lazily
+	if p.configLoaded {
+		return nil
+	}
+
+	// Set defaults first
+	p.enableDRA = false
+	p.resourceClaimTemplateName = constants.DRAResourceClaimTemplateName
+
+	templateList := &tfv1.SchedulingConfigTemplateList{}
+	// Use the provided context to respect cancellation
+	err := p.List(ctx, templateList)
+	if err != nil {
+		// Log error but don't fail - fall back to defaults
+		// This allows webhook to work even if templates are unavailable
+		p.configLoaded = true
+		return nil
+	}
+
+	// Check if any template has DRA enabled and cache the ResourceClaimTemplateName
+	for _, template := range templateList.Items {
+		if template.Spec.DRA != nil {
+			if template.Spec.DRA.Enable != nil && *template.Spec.DRA.Enable {
+				p.enableDRA = true
+			}
+			// Cache the ResourceClaimTemplateName from the template
+			if template.Spec.DRA.ResourceClaimTemplateName != "" {
+				p.resourceClaimTemplateName = template.Spec.DRA.ResourceClaimTemplateName
+			}
+		}
+	}
+
+	p.configLoaded = true
+	return nil
+}
+
+// IsDRAEnabled checks if DRA is enabled for a specific pod
+func (p *DRAProcessor) IsDRAEnabled(ctx context.Context, pod *corev1.Pod) bool {
+	// Load configuration if not yet loaded (lazy loading)
+	if !p.configLoaded {
+		_ = p.InitializeDRAConfig(ctx) // Ignore error to maintain backward compatibility
+	}
+
+	// Check pod-level annotation first (explicit override)
+	if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.TrueStringValue {
+		return true
+	}
+
+	// Check pod-level annotation for explicit disable
+	if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.FalseStringValue {
+		return false
+	}
+
+	// Fall back to global configuration
+	return p.enableDRA
+}
+
+// HasDRAClaim checks if a pod has DRA ResourceClaim references
+func HasDRAClaim(pod *corev1.Pod) bool {
+	return len(pod.Spec.ResourceClaims) > 0
+}
+
+// HandleDRAAdmission handles the complete DRA admission process
+func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod, tfInfo *utils.TensorFusionInfo, containerIndices []int) error {
+	// Load DRA configuration if needed
+	if err := p.InitializeDRAConfig(ctx); err != nil {
+		return fmt.Errorf("failed to load DRA config: %w", err)
+	}
+
+	// Convert GPU resources to ResourceClaimTemplate reference and store CEL in annotation
+	celSelector, err := BuildCELSelector(pod, tfInfo)
+	if err != nil {
+		return fmt.Errorf("failed to build CEL selector: %w", err)
+	}
+
+	// Inject ResourceClaimTemplate reference to Pod
+	p.injectResourceClaimTemplateRef(pod)
+
+	// Mark pod with DRA enabled annotation
+	if pod.Annotations == nil {
+		pod.Annotations = make(map[string]string)
+	}
+	pod.Annotations[constants.DRAEnabledAnnotation] = constants.TrueStringValue
+	pod.Annotations[constants.DRACelExpressionAnnotation] = celSelector
+
+	return nil
+}
+
+// BuildCELSelector constructs a CEL expression for DRA device selection based on TensorFusion requirements
+func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string, error) {
+	var conditions []string
+
+	// 1. GPU model filter (if specified - basic attribute that should be widely supported)
+	if tfInfo.Profile.GPUModel != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["model"] == "%s"`, tfInfo.Profile.GPUModel))
+	}
+
+	// 2. GPU count requirement (important for multi-GPU workloads)
+	if tfInfo.Profile.GPUCount > 0 {
+		conditions = append(conditions, fmt.Sprintf(`size(devices) >= %d`, tfInfo.Profile.GPUCount))
+	}
+
+	// 3. Pool name filter (for resource isolation and scheduling preferences)
+	if tfInfo.Profile.PoolName != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["pool_name"] == "%s"`, tfInfo.Profile.PoolName))
+	}
+
+	// 4. Pod namespace filter (for namespace-based device isolation)
+	if pod.Namespace != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["pod_namespace"] == "%s"`, pod.Namespace))
+	}
+
+	// Return a basic condition if no specific requirements
+	if len(conditions) == 0 {
+		// Simple condition that should work with most DRA drivers
+		return `device.attributes.exists("type")`, nil
+	}
+
+	return strings.Join(conditions, " && "), nil
+}
+
+// injectResourceClaimTemplateRef adds ResourceClaimTemplate reference to Pod spec
+func (p *DRAProcessor) injectResourceClaimTemplateRef(pod *corev1.Pod) {
+	// Add ResourceClaimTemplate reference to pod.Spec.ResourceClaims
+	if pod.Spec.ResourceClaims == nil {
+		pod.Spec.ResourceClaims = []corev1.PodResourceClaim{}
+	}
+
+	// Use ResourceClaimTemplate instead of direct ResourceClaim
+	claimRef := corev1.PodResourceClaim{
+		Name:                      constants.DRAClaimDefineName,
+		ResourceClaimTemplateName: &p.resourceClaimTemplateName,
+	}
+
+	// Check if the claim reference already exists to maintain idempotency
+	claimExists := false
+	for _, existingClaim := range pod.Spec.ResourceClaims {
+		if existingClaim.Name == constants.DRAClaimDefineName {
+			claimExists = true
+			break
+		}
+	}
+
+	if !claimExists {
+		pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, claimRef)
+	}
+}
diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go
index 53610ffe..a7773ece 100644
--- a/internal/webhook/v1/pod_webhook.go
+++ b/internal/webhook/v1/pod_webhook.go
@@ -37,6 +37,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/portallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
@@ -46,17 +47,24 @@ import (
 var httpClient = &http.Client{Timeout: 10 * time.Second}
 
 // SetupPodWebhookWithManager registers the webhook for Pod in the manager.
-func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator) error {
+func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator, pricingProvider pricing.PricingProvider) error {
 	webhookServer := mgr.GetWebhookServer()
 
-	webhookServer.Register("/mutate-v1-pod",
-		&admission.Webhook{
-			Handler: &TensorFusionPodMutator{
-				decoder:       admission.NewDecoder(runtime.NewScheme()),
-				Client:        mgr.GetClient(),
-				portAllocator: portAllocator,
-			},
-		})
+	// Initialize DRA processor
+	draProcessor := NewDRAProcessor(mgr.GetClient())
+	if err := draProcessor.InitializeDRAConfig(context.Background()); err != nil {
+		return fmt.Errorf("failed to initialize DRA config: %w", err)
+	}
+
+	// Initialize DRA setting from global configuration
+	mutator := &TensorFusionPodMutator{
+		decoder:       admission.NewDecoder(runtime.NewScheme()),
+		Client:        mgr.GetClient(),
+		portAllocator: portAllocator,
+		draProcessor:  draProcessor,
+	}
+
+	webhookServer.Register("/mutate-v1-pod", &admission.Webhook{Handler: mutator})
 	return nil
 }
 
@@ -64,6 +72,7 @@ type TensorFusionPodMutator struct {
 	Client        client.Client
 	decoder       admission.Decoder
 	portAllocator *portallocator.PortAllocator
+	draProcessor  *DRAProcessor
 }
 
 // Handle implements admission.Handler interface.
@@ -100,7 +109,7 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 		return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal current pod: %w", err))
 	}
 
-	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod)
+	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, m.draProcessor, pod)
 	if err != nil {
 		return admission.Errored(http.StatusInternalServerError, fmt.Errorf("parse tf resources: %w", err))
 	}
@@ -159,16 +168,34 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 		return admission.Allowed("no valid container to inject tensor-fusion, skipped")
 	}
 
-	// Add defaults and tensor-fusion injection logic
+	// Handle DRA-specific processing if enabled
+	if tfInfo.DRAEnabled {
+		// Process DRA workload
+		if err := m.draProcessor.HandleDRAAdmission(ctx, pod, &tfInfo, containerIndices); err != nil {
+			return admission.Errored(http.StatusInternalServerError, fmt.Errorf("failed to handle DRA admission: %w", err))
+		}
+	}
+
+	// Common processing for both DRA and regular modes
 	utils.AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod, tfInfo)
 	utils.AddTFDefaultClientConfBeforePatch(ctx, pod, pool, tfInfo, containerIndices)
 
+	// Add priorityClass if contains higher QoS level and Pod priority class not specified
+	if pod.Spec.PriorityClassName == "" &&
+		(tfInfo.Profile.Qos == tfv1.QoSHigh || tfInfo.Profile.Qos == tfv1.QoSCritical) {
+		pod.Spec.PriorityClassName = constants.TensorFusionSystemName + string(tfInfo.Profile.Qos)
+	}
+
 	// Inject initContainer and env variables
 	patches, err := m.patchTFClient(
-		pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices,
+		ctx, pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices,
 	)
 	if err != nil {
-		log.Error(err, "failed to patch tf client", "pod", req.Name, "namespace", req.Namespace)
+		mode := "regular"
+		if tfInfo.DRAEnabled {
+			mode = "DRA"
+		}
+		log.Error(err, "failed to patch tf client", "mode", mode, "pod", req.Name, "namespace", req.Namespace)
 		return admission.Errored(http.StatusInternalServerError, err)
 	}
 
@@ -266,6 +293,7 @@ func (m *TensorFusionPodMutator) createOrUpdateWorkload(ctx context.Context, pod
 }
 
 func (m *TensorFusionPodMutator) patchTFClient(
+	ctx context.Context,
 	pod *corev1.Pod,
 	pool *tfv1.GPUPool,
 	isLocalGPU bool,
@@ -392,7 +420,7 @@ func addConnectionForRemoteFixedReplicaVirtualGPU(pod *corev1.Pod, container *co
 	if pod.GenerateName == "" && pod.Name != "" {
 		prefix = pod.Name + constants.TFConnectionNamePrefix
 	} else {
-		prefix = pod.GenerateName + constants.TFConnectionNamePrefix
+		prefix = pod.GenerateName + constants.TFConnectionNameNoPrefix
 	}
 	connectionName := fmt.Sprintf("%s%s", prefix, utils.NewShortID(10))
 	connectionNamespace := pod.Namespace
@@ -516,16 +544,17 @@ func (m *TensorFusionPodMutator) assignClusterHostPortFromLeader(pod *corev1.Pod
 }
 
 func calculateQoSLevel(profile *tfv1.WorkloadProfileSpec, pool *tfv1.GPUPool) tfv1.QoSLevel {
-	sameReqLimits := profile.Resources.Limits.Tflops.Cmp(profile.Resources.Requests.Tflops) == 0 &&
-		profile.Resources.Limits.Vram.Cmp(profile.Resources.Requests.Vram) == 0
-
-	// set to critical if req == limits, same logic as Kubernetes QoS
-	if sameReqLimits {
-		return constants.QoSLevelCritical
-	}
-
 	// when not set, assign default QoS
 	if profile.Qos == "" {
+		sameReqLimits := profile.Resources.Limits.Tflops.Cmp(profile.Resources.Requests.Tflops) == 0 &&
+			profile.Resources.Limits.Vram.Cmp(profile.Resources.Requests.Vram) == 0
+
+		// set to high if req == limits, same logic as Kubernetes QoS
+		// critical QoS can preempt other pods, have to be set manually
+		if sameReqLimits {
+			return constants.QoSLevelHigh
+		}
+
 		if pool.Spec.QosConfig == nil || pool.Spec.QosConfig.DefaultQoS == "" {
 			return constants.QoSLevelMedium
 		}
diff --git a/internal/webhook/v1/pod_webhook_dra_test.go b/internal/webhook/v1/pod_webhook_dra_test.go
new file mode 100644
index 00000000..e6fce827
--- /dev/null
+++ b/internal/webhook/v1/pod_webhook_dra_test.go
@@ -0,0 +1,393 @@
+package v1
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+)
+
+func TestDRAProcessor_IsDRAEnabled(t *testing.T) {
+	tests := []struct {
+		name           string
+		processorDRA   bool
+		podAnnotations map[string]string
+		expected       bool
+	}{
+		{
+			name:         "global DRA enabled, no pod annotation",
+			processorDRA: true,
+			expected:     true,
+		},
+		{
+			name:         "global DRA disabled, no pod annotation",
+			processorDRA: false,
+			expected:     false,
+		},
+		{
+			name:         "global DRA disabled, pod annotation enabled",
+			processorDRA: false,
+			podAnnotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.TrueStringValue,
+			},
+			expected: true,
+		},
+		{
+			name:         "global DRA enabled, pod annotation disabled",
+			processorDRA: true,
+			podAnnotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.FalseStringValue,
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			processor := &DRAProcessor{
+				enableDRA:    tt.processorDRA,
+				configLoaded: true, // Skip config loading in tests
+			}
+
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: tt.podAnnotations,
+				},
+			}
+
+			result := processor.IsDRAEnabled(context.Background(), pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestDRAProcessor_HandleDRAAdmission(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, tfv1.AddToScheme(scheme))
+
+	// Create a SchedulingConfigTemplate with DRA config
+	template := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			DRA: &tfv1.DRAConfig{
+				Enable:                    &[]bool{true}[0],
+				ResourceClaimTemplateName: "custom-gpu-template",
+			},
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(template).
+		Build()
+
+	processor := &DRAProcessor{
+		Client: fakeClient,
+	}
+
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-pod",
+			Namespace: "test-namespace",
+		},
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{
+				{Name: "test-container"},
+			},
+		},
+	}
+
+	tfInfo := &utils.TensorFusionInfo{
+		Profile: &tfv1.WorkloadProfileSpec{
+			GPUCount: 1,
+			Resources: tfv1.Resources{
+				Requests: tfv1.Resource{
+					Tflops: resource.MustParse("10"),
+					Vram:   resource.MustParse("8Gi"),
+				},
+			},
+		},
+	}
+
+	containerIndices := []int{0}
+
+	// Test HandleDRAAdmission
+	err := processor.HandleDRAAdmission(context.Background(), pod, tfInfo, containerIndices)
+	require.NoError(t, err)
+
+	// Verify CEL expression is stored in Pod annotation
+	celExpression := pod.Annotations[constants.DRACelExpressionAnnotation]
+	require.NotEmpty(t, celExpression)
+	assert.Contains(t, celExpression, `device.attributes["tflops"].quantity >= quantity("10")`)
+	assert.Contains(t, celExpression, `device.attributes["vram"].quantity >= quantity("8Gi")`)
+
+	// Verify DRA enabled annotation is set
+	assert.Equal(t, constants.TrueStringValue, pod.Annotations[constants.DRAEnabledAnnotation])
+
+	// Verify ResourceClaimTemplate reference is added to Pod
+	require.Len(t, pod.Spec.ResourceClaims, 1)
+	podClaim := pod.Spec.ResourceClaims[0]
+	assert.Equal(t, constants.DRAClaimDefineName, podClaim.Name)
+	require.NotNil(t, podClaim.ResourceClaimTemplateName)
+	assert.Equal(t, "custom-gpu-template", *podClaim.ResourceClaimTemplateName)
+
+	// Verify processor has cached the ResourceClaimTemplateName
+	assert.Equal(t, "custom-gpu-template", processor.resourceClaimTemplateName)
+}
+
+func TestBuildCELSelector(t *testing.T) {
+	tests := []struct {
+		name                 string
+		pod                  *corev1.Pod
+		tfInfo               *utils.TensorFusionInfo
+		expectedConditions   []string
+		unexpectedConditions []string
+	}{
+		{
+			name: "Basic resource filters",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					GPUCount: 2,
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							Tflops: resource.MustParse("20"),
+							Vram:   resource.MustParse("16Gi"),
+						},
+					},
+					GPUModel: "H100",
+				},
+			},
+			expectedConditions: []string{
+				`device.attributes["tflops"].quantity >= quantity("20")`,
+				`device.attributes["vram"].quantity >= quantity("16Gi")`,
+				`device.attributes["model"] == "H100"`,
+				`int(device.attributes["gpu_count"]) >= 2`,
+				`device.attributes["pod_namespace"] == "test-namespace"`,
+			},
+		},
+		{
+			name: "All filters including pool and workload",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "production",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					GPUCount: 1,
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							Tflops: resource.MustParse("10"),
+							Vram:   resource.MustParse("8Gi"),
+						},
+					},
+					GPUModel: "A100",
+					PoolName: "high-priority",
+				},
+				WorkloadName: "ml-training-job",
+			},
+			expectedConditions: []string{
+				`device.attributes["tflops"].quantity >= quantity("10")`,
+				`device.attributes["vram"].quantity >= quantity("8Gi")`,
+				`device.attributes["model"] == "A100"`,
+				`int(device.attributes["gpu_count"]) >= 1`,
+				`device.attributes["pool_name"] == "high-priority"`,
+				`device.attributes["workload_name"] == "ml-training-job"`,
+				`device.attributes["workload_namespace"] == "production"`,
+				`device.attributes["pod_namespace"] == "production"`,
+			},
+		},
+		{
+			name: "Zero resources fallback to default condition",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					GPUCount: 0, // Zero count should not add condition
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							// Zero resources
+						},
+					},
+				},
+			},
+			expectedConditions: []string{
+				`device.attributes["pod_namespace"] == "default"`,
+			},
+		},
+		{
+			name: "Empty resources fallback to basic condition",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					// All empty/zero values
+				},
+			},
+			expectedConditions: []string{
+				`device.attributes.exists("type")`,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			celExpression, err := BuildCELSelector(tt.pod, tt.tfInfo)
+			require.NoError(t, err)
+			require.NotEmpty(t, celExpression)
+
+			// Verify expected conditions are present
+			for _, condition := range tt.expectedConditions {
+				assert.Contains(t, celExpression, condition, "Expected condition not found: %s", condition)
+			}
+
+			// Verify unexpected conditions are not present
+			for _, condition := range tt.unexpectedConditions {
+				assert.NotContains(t, celExpression, condition, "Unexpected condition found: %s", condition)
+			}
+
+			// Verify proper AND joining (unless it's the fallback condition)
+			if len(tt.expectedConditions) > 1 {
+				assert.Contains(t, celExpression, " && ", "Conditions should be joined with &&")
+			}
+		})
+	}
+}
+
+func TestHasDRAClaim(t *testing.T) {
+	tests := []struct {
+		name     string
+		pod      *corev1.Pod
+		expected bool
+	}{
+		{
+			name: "pod with resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{
+						{Name: "gpu-claim"},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "pod without resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{},
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := HasDRAClaim(tt.pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestDRAProcessor_LazyConfigLoading(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, tfv1.AddToScheme(scheme))
+
+	tests := []struct {
+		name      string
+		templates []tfv1.SchedulingConfigTemplate
+		expected  bool
+	}{
+		{
+			name: "DRA enabled in template",
+			templates: []tfv1.SchedulingConfigTemplate{
+				{
+					ObjectMeta: metav1.ObjectMeta{Name: "template1"},
+					Spec: tfv1.SchedulingConfigTemplateSpec{
+						DRA: &tfv1.DRAConfig{
+							Enable:                    &[]bool{true}[0],
+							ResourceClaimTemplateName: "test-gpu-template",
+						},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "DRA disabled in template",
+			templates: []tfv1.SchedulingConfigTemplate{
+				{
+					ObjectMeta: metav1.ObjectMeta{Name: "template1"},
+					Spec: tfv1.SchedulingConfigTemplateSpec{
+						DRA: &tfv1.DRAConfig{
+							Enable: &[]bool{false}[0],
+						},
+					},
+				},
+			},
+			expected: false,
+		},
+		{
+			name:     "no templates",
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			objects := make([]client.Object, len(tt.templates))
+			for i, template := range tt.templates {
+				objects[i] = &template
+			}
+
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithObjects(objects...).
+				Build()
+
+			processor := &DRAProcessor{
+				Client: fakeClient,
+			}
+
+			// Test lazy loading by calling a method that triggers config loading
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{},
+				},
+			}
+
+			result := processor.IsDRAEnabled(context.Background(), pod)
+			assert.Equal(t, tt.expected, result)
+
+			// Verify config was loaded
+			assert.True(t, processor.configLoaded)
+		})
+	}
+}
diff --git a/internal/webhook/v1/pod_webhook_test.go b/internal/webhook/v1/pod_webhook_test.go
index 55f29233..6f01b3ed 100644
--- a/internal/webhook/v1/pod_webhook_test.go
+++ b/internal/webhook/v1/pod_webhook_test.go
@@ -25,6 +25,7 @@ import (
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/samber/lo"
@@ -57,8 +58,9 @@ var _ = Describe("TensorFusionPodMutator", func() {
 		decoder = admission.NewDecoder(scheme)
 
 		mutator = &TensorFusionPodMutator{
-			Client:  k8sClient,
-			decoder: decoder,
+			Client:       k8sClient,
+			decoder:      decoder,
+			draProcessor: NewDRAProcessor(k8sClient),
 		}
 	})
 
@@ -240,6 +242,56 @@ var _ = Describe("TensorFusionPodMutator", func() {
 			Expect(resp.Patches).To(BeEmpty())
 		})
 
+		It("should handle dedicated GPU", func() {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod-local-gpu",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionEnabledLabelKey: "true",
+					},
+					Annotations: map[string]string{
+						constants.DedicatedGPUAnnotation: constants.TrueStringValue,
+						constants.GPUModelAnnotation:     "A100",
+						constants.GpuPoolKey:             "mock",
+					},
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{
+						{
+							Name:  "main",
+							Image: "test-image",
+						},
+					},
+				},
+			}
+			podBytes, err := json.Marshal(pod)
+			Expect(err).NotTo(HaveOccurred())
+			req := admission.Request{
+				AdmissionRequest: admissionv1.AdmissionRequest{
+					Object: runtime.RawExtension{
+						Raw: podBytes,
+					},
+					Operation: admissionv1.Create,
+					Namespace: "default",
+				},
+			}
+
+			gpuallocator.GPUCapacityMap["A100"] = tfv1.Resource{
+				Tflops: resource.MustParse("312"),
+				Vram:   resource.MustParse("40Gi"),
+			}
+			resp := mutator.Handle(ctx, req)
+			Expect(resp.Allowed).To(BeTrue())
+
+			op, found := lo.Find(resp.Patches, func(patch jsonpatch.JsonPatchOperation) bool {
+				return patch.Operation == "add" &&
+					patch.Path == "/metadata/annotations/tensor-fusion.ai~1tflops-request"
+			})
+			Expect(found).To(BeTrue())
+			Expect(op.Value).To(Equal("312"))
+		})
+
 		It("should handle invalid pod specification", func() {
 			req := admission.Request{
 				AdmissionRequest: admissionv1.AdmissionRequest{
@@ -532,7 +584,7 @@ var _ = Describe("TensorFusionPodMutator", func() {
 					},
 				},
 			}
-			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod)
+			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, mutator.draProcessor, pod)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(tfInfo.ContainerNames).To(HaveLen(1))
 			Expect(tfInfo.ContainerNames[0]).To(Equal("test-container"))
@@ -564,7 +616,7 @@ var _ = Describe("TensorFusionPodMutator", func() {
 
 			currentBytes, err := json.Marshal(pod)
 			Expect(err).NotTo(HaveOccurred())
-			patch, err := mutator.patchTFClient(pod, pool, false, currentBytes, []int{0})
+			patch, err := mutator.patchTFClient(context.Background(), pod, pool, false, currentBytes, []int{0})
 			Expect(err).NotTo(HaveOccurred())
 			Expect(patch).NotTo(BeEmpty())
 			// There should be at least 2 patches (initContainers and the container env patches)
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
index bf805b76..14f1ad3a 100644
--- a/internal/webhook/v1/tf_parser.go
+++ b/internal/webhook/v1/tf_parser.go
@@ -8,6 +8,7 @@ import (
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -28,6 +29,7 @@ type TFResource struct {
 func ParseTensorFusionInfo(
 	ctx context.Context,
 	k8sClient client.Client,
+	draProcessor *DRAProcessor,
 	pod *corev1.Pod,
 ) (utils.TensorFusionInfo, error) {
 	var info utils.TensorFusionInfo
@@ -115,6 +117,16 @@ func ParseTensorFusionInfo(
 		workloadProfile.Spec.GPUModel = gpuModel
 	}
 
+	// Parse DRA enabled annotation
+	if draProcessor.IsDRAEnabled(ctx, pod) {
+		info.DRAEnabled = true
+	}
+	// Handle dedicated GPU logic
+	err = handleDedicatedGPU(pod, workloadProfile)
+	if err != nil {
+		return info, fmt.Errorf("handle dedicated GPU: %w", err)
+	}
+
 	info.Profile = &workloadProfile.Spec
 	info.ContainerNames = containerNames
 	return info, nil
@@ -227,3 +239,29 @@ func setDefaultQuotasIfExists(workloadProfile *tfv1.WorkloadProfile, single tfv1
 		}
 	}
 }
+
+// handleDedicatedGPU handles dedicated GPU annotation by setting full GPU capacity
+func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile) error {
+	dedicatedGPU, ok := pod.Annotations[constants.DedicatedGPUAnnotation]
+	if !ok || dedicatedGPU != constants.TrueStringValue {
+		return nil // Not a dedicated GPU request
+	}
+
+	// Must have GPU model specified for dedicated GPU
+	if workloadProfile.Spec.GPUModel == "" {
+		return fmt.Errorf("dedicated GPU requires gpu-model annotation to be specified")
+	}
+
+	// Get full GPU capacity from pricing provider
+	resource, found := gpuallocator.GPUCapacityMap[workloadProfile.Spec.GPUModel]
+	if !found {
+		return fmt.Errorf("could not find capacity information for GPU model: %s", workloadProfile.Spec.GPUModel)
+	}
+
+	// Set full capacity for both requests and limits
+	workloadProfile.Spec.Resources.Requests.Tflops = resource.Tflops
+	workloadProfile.Spec.Resources.Requests.Vram = resource.Vram
+	workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops
+	workloadProfile.Spec.Resources.Limits.Vram = resource.Vram
+	return nil
+}
diff --git a/internal/webhook/v1/webhook_suite_test.go b/internal/webhook/v1/webhook_suite_test.go
index 4e5d369b..26a6685d 100644
--- a/internal/webhook/v1/webhook_suite_test.go
+++ b/internal/webhook/v1/webhook_suite_test.go
@@ -27,6 +27,7 @@ import (
 	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/portallocator"
 	. "github.com/onsi/ginkgo/v2"
@@ -134,11 +135,13 @@ var _ = BeforeSuite(func() {
 	})
 	Expect(err).NotTo(HaveOccurred())
 
+	// Create a mock pricing provider for testing
+	mockPricingProvider := &pricing.StaticPricingProvider{}
 	err = SetupPodWebhookWithManager(mgr, &portallocator.PortAllocator{
 		PortRangeStartCluster: 42000,
 		PortRangeEndCluster:   62000,
 		BitmapCluster:         make([]uint64, (62000-42000)/64+1),
-	})
+	}, mockPricingProvider)
 	Expect(err).NotTo(HaveOccurred())
 
 	// +kubebuilder:scaffold:webhook
diff --git a/patches/scheduler-csi-capacity-3.patch b/patches/scheduler-csi-capacity-3.patch
index 29a21ae8..c5841d08 100644
--- a/patches/scheduler-csi-capacity-3.patch
+++ b/patches/scheduler-csi-capacity-3.patch
@@ -9,11 +9,11 @@
  	"strings"
  	"time"
  
-@@ -514,6 +516,14 @@
+@@ -543,6 +545,14 @@
  			}
  			handlers = append(handlers, handlerRegistration)
- 		case framework.CSIStorageCapacity:
-+			// FIX kubernetes 1.24 and lower version API missing issue
+ 		case fwk.CSIStorageCapacity:
++			// FIX kubernetes 1.23 and lower version API missing issue
 +			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
 +			if minorVersionStr != "" {
 +				minorVersion, err := strconv.Atoi(minorVersionStr)
@@ -22,5 +22,50 @@
 +				}
 +			}
  			if handlerRegistration, err = informerFactory.Storage().V1().CSIStorageCapacities().Informer().AddEventHandler(
- 				buildEvtResHandler(at, framework.CSIStorageCapacity),
+ 				buildEvtResHandler(at, fwk.CSIStorageCapacity),
  			); err != nil {
+@@ -578,6 +588,14 @@
+ 			}
+ 			handlers = append(handlers, handlerRegistration)
+ 		case fwk.ResourceClaim:
++			// FIX kubernetes lower version API missing issue
++			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
++			if minorVersionStr != "" {
++				minorVersion, err := strconv.Atoi(minorVersionStr)
++				if err != nil || minorVersion < 34 {
++					continue
++				}
++			}
+ 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
+ 				handlerRegistration = resourceClaimCache.AddEventHandler(
+ 					buildEvtResHandler(at, fwk.ResourceClaim),
+@@ -585,6 +603,14 @@
+ 				handlers = append(handlers, handlerRegistration)
+ 			}
+ 		case fwk.ResourceSlice:
++			// FIX kubernetes lower version API missing issue
++			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
++			if minorVersionStr != "" {
++				minorVersion, err := strconv.Atoi(minorVersionStr)
++				if err != nil || minorVersion < 34 {
++					continue
++				}
++			}
+ 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
+ 				if handlerRegistration, err = resourceSliceTracker.AddEventHandler(
+ 					buildEvtResHandler(at, fwk.ResourceSlice),
+@@ -594,6 +620,14 @@
+ 				handlers = append(handlers, handlerRegistration)
+ 			}
+ 		case fwk.DeviceClass:
++			// FIX kubernetes lower version API missing issue
++			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
++			if minorVersionStr != "" {
++				minorVersion, err := strconv.Atoi(minorVersionStr)
++				if err != nil || minorVersion < 34 {
++					continue
++				}
++			}
+ 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
+ 				if handlerRegistration, err = informerFactory.Resource().V1().DeviceClasses().Informer().AddEventHandler(
+ 					buildEvtResHandler(at, fwk.DeviceClass),
diff --git a/patches/scheduler-pdb-1.patch b/patches/scheduler-pdb-1.patch
index ae9b966e..3a35e841 100644
--- a/patches/scheduler-pdb-1.patch
+++ b/patches/scheduler-pdb-1.patch
@@ -1,16 +1,38 @@
---- ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-08-06 17:45:27
-+++ ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-08-06 17:45:19
-@@ -20,7 +20,9 @@
+--- ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-09-15 17:45:27
++++ ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-09-15 17:45:19
+@@ -20,7 +20,10 @@
  	"context"
  	"fmt"
  	"math/rand"
 +	"os"
  	"sort"
 +	"strconv"
++	"time"
  
  	v1 "k8s.io/api/core/v1"
  	policy "k8s.io/api/policy/v1"
-@@ -364,5 +366,13 @@
+@@ -119,6 +122,20 @@
+ 	// Default behavior: No additional filtering, beyond the internal requirement that the victim pod
+ 	// have lower priority than the preemptor pod.
+ 	pl.IsEligiblePod = func(nodeInfo fwk.NodeInfo, victim fwk.PodInfo, preemptor *v1.Pod) bool {
++		victimAnnotation := victim.GetPod().Annotations
++		if victimAnnotation == nil {
++			return true
++		}
++		if protectionPeriod, ok := victimAnnotation["tensor-fusion.ai/eviction-protection"]; ok {
++			duration, err := time.ParseDuration(protectionPeriod)
++			if err != nil {
++				return true
++			}
++			// Still in protection period, not allow to preempt
++			if time.Now().Before(victim.GetPod().CreationTimestamp.Add(duration)) {
++				return false
++			}
++		}
+ 		return true
+ 	}
+ 
+@@ -430,5 +447,13 @@
  }
  
  func getPDBLister(informerFactory informers.SharedInformerFactory) policylisters.PodDisruptionBudgetLister {
@@ -24,3 +46,4 @@
 +	}
  	return informerFactory.Policy().V1().PodDisruptionBudgets().Lister()
  }
+ 
\ No newline at end of file
diff --git a/patches/scheduler-pdb-2.patch b/patches/scheduler-pdb-2.patch
index 810bb0c6..12af371e 100644
--- a/patches/scheduler-pdb-2.patch
+++ b/patches/scheduler-pdb-2.patch
@@ -9,22 +9,14 @@
  	"sync"
  	"sync/atomic"
  	"time"
-@@ -34,6 +36,7 @@
- 	"k8s.io/apimachinery/pkg/util/sets"
- 	corelisters "k8s.io/client-go/listers/core/v1"
- 	policylisters "k8s.io/client-go/listers/policy/v1"
-+	policyv1 "k8s.io/client-go/listers/policy/v1"
- 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
- 	"k8s.io/klog/v2"
- 	extenderv1 "k8s.io/kube-scheduler/extender/v1"
-@@ -145,7 +148,16 @@
+@@ -148,8 +150,17 @@
  
  func NewEvaluator(pluginName string, fh framework.Handle, i Interface, enableAsyncPreemption bool) *Evaluator {
  	podLister := fh.SharedInformerFactory().Core().V1().Pods().Lister()
 -	pdbLister := fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister()
-+
+ 
 +	// FIX kubernetes 1.21 and lower version API missing issue
-+	var pdbLister policyv1.PodDisruptionBudgetLister
++	var pdbLister policylisters.PodDisruptionBudgetLister
 +	minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
 +	if minorVersionStr != "" {
 +		minorVersion, err := strconv.Atoi(minorVersionStr)
@@ -32,6 +24,7 @@
 +			pdbLister = fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister()
 +		}
 +	}
- 
++
  	ev := &Evaluator{
  		PluginName:            pluginName,
+ 		Handler:               fh,
diff --git a/scripts/patch-scheduler.sh b/scripts/patch-scheduler.sh
index 9afbc43b..23c2837d 100755
--- a/scripts/patch-scheduler.sh
+++ b/scripts/patch-scheduler.sh
@@ -1,6 +1,10 @@
 #!/bin/bash
 git apply ./patches/scheduler-csi-capacity-1.patch
 git apply ./patches/scheduler-csi-capacity-2.patch
+
+# diff -u eventhandlers.go eventhandlers-new.go > changes.patch
 git apply ./patches/scheduler-csi-capacity-3.patch
 git apply ./patches/scheduler-pdb-1.patch
+
+# diff -u original_file.go modified_file.go > changes.patch
 git apply ./patches/scheduler-pdb-2.patch
\ No newline at end of file
diff --git a/test/sched/gpufit_bench_test.go b/test/sched/gpufit_bench_test.go
index 20be047e..147d31e8 100644
--- a/test/sched/gpufit_bench_test.go
+++ b/test/sched/gpufit_bench_test.go
@@ -20,7 +20,6 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 		NumNodes:  500,
 		NumGPUs:   3000,
 		NumPods:   10000,
-		BatchSize: 1,
 		PoolName:  "test-pool",
 		Namespace: "test-ns",
 		Timeout:   5 * time.Minute,
@@ -42,7 +41,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 				break
 			}
 			testPod := fixture.pods[i]
-			fixture.plugin.PreFilter(fixture.ctx, state, testPod)
+			fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil)
 			filterResult, err := state.Read(gpuResourceFitPlugin.CycleStateGPUSchedulingResult)
 			if err != nil {
 				b.Fatal(err)
@@ -82,7 +81,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 
 	b.Run("Filter", func(b *testing.B) {
 		state := framework.NewCycleState()
-		fixture.plugin.PreFilter(fixture.ctx, state, testPod)
+		fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil)
 		nodeInfo := &framework.NodeInfo{}
 
 		b.ResetTimer()
@@ -94,7 +93,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 
 	b.Run("Score", func(b *testing.B) {
 		state := framework.NewCycleState()
-		fixture.plugin.PreFilter(fixture.ctx, state, testPod)
+		fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil)
 		nodeInfo := &framework.NodeInfo{}
 
 		b.ResetTimer()
diff --git a/test/sched/preemption_test.go b/test/sched/preemption_test.go
new file mode 100644
index 00000000..1715d61b
--- /dev/null
+++ b/test/sched/preemption_test.go
@@ -0,0 +1,299 @@
+package sched
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/NexusGPU/tensor-fusion/cmd/sched"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources"
+	gpuTopoPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gputopo"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/zap/zapcore"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/klog/v2"
+	"k8s.io/kubernetes/cmd/kube-scheduler/app"
+	"k8s.io/kubernetes/pkg/scheduler"
+	st "k8s.io/kubernetes/pkg/scheduler/testing"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/envtest"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+)
+
+// PreemptionTestSuite holds common test setup for preemption tests
+type PreemptionTestSuite struct {
+	ctx            context.Context
+	cancel         context.CancelFunc
+	k8sClient      client.Client
+	scheduler      *scheduler.Scheduler
+	fixture        *BenchmarkFixture
+	testEnv        *envtest.Environment
+	kubeconfigPath string
+}
+
+// SetupSuite initializes the test environment for preemption tests
+func (pts *PreemptionTestSuite) SetupSuite(t *testing.T) {
+	klog.SetLogger(zap.New(zap.WriteTo(discardWriter{}), zap.UseDevMode(false), zap.Level(zapcore.InfoLevel)))
+
+	// Setup test environment
+	ver, cfg, err := setupKubernetes()
+	require.NoError(t, err)
+	pts.testEnv = testEnv
+
+	kubeconfigPath, err := writeKubeconfigToTempFileAndSetEnv(cfg)
+	require.NoError(t, err)
+	pts.kubeconfigPath = kubeconfigPath
+
+	k8sClient, err := client.New(cfg, client.Options{Scheme: scheme.Scheme})
+	require.NoError(t, err)
+	pts.k8sClient = k8sClient
+
+	// Configure test with limited resources for preemption scenarios
+	benchConfig := BenchmarkConfig{
+		NumNodes:  2,
+		NumGPUs:   4,
+		PoolName:  "preemption-test-pool",
+		Namespace: "preemption-test-ns",
+		Timeout:   1 * time.Minute,
+	}
+
+	mockBench := &testing.B{}
+	fixture := NewBenchmarkFixture(mockBench, benchConfig, k8sClient, true)
+	pts.fixture = fixture
+
+	utils.SetProgressiveMigration(false)
+
+	gpuResourceFitOpt := app.WithPlugin(
+		gpuResourceFitPlugin.Name,
+		gpuResourceFitPlugin.NewWithDeps(fixture.allocator, fixture.client),
+	)
+	gpuTopoOpt := app.WithPlugin(
+		gpuTopoPlugin.Name,
+		gpuTopoPlugin.NewWithDeps(fixture.allocator, fixture.client),
+	)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	pts.ctx = ctx
+	pts.cancel = cancel
+
+	cc, scheduler, err := sched.SetupScheduler(ctx, nil,
+		"../../config/samples/scheduler-config.yaml", true, ver, gpuResourceFitOpt, gpuTopoOpt)
+	require.NoError(t, err)
+	pts.scheduler = scheduler
+	scheduler.SchedulingQueue.Run(klog.FromContext(ctx))
+
+	// Start scheduler components
+	cc.EventBroadcaster.StartRecordingToSink(ctx.Done())
+	cc.InformerFactory.Start(ctx.Done())
+	cc.InformerFactory.WaitForCacheSync(ctx.Done())
+	require.NoError(t, scheduler.WaitForHandlersSync(ctx))
+}
+
+// TearDownSuite cleans up the test environment
+func (pts *PreemptionTestSuite) TearDownSuite(t *testing.T) {
+	if pts.cancel != nil {
+		pts.cancel()
+	}
+	if pts.fixture != nil {
+		pts.fixture.Close()
+	}
+	if pts.kubeconfigPath != "" {
+		require.NoError(t, cleanupKubeconfigTempFile(pts.kubeconfigPath))
+	}
+	if pts.testEnv != nil {
+		require.NoError(t, pts.testEnv.Stop())
+	}
+}
+
+// discardWriter implements io.Writer to discard log output during tests
+type discardWriter struct{}
+
+func (discardWriter) Write(p []byte) (n int, err error) {
+	return len(p), nil
+}
+
+// TestPreemption tests comprehensive preemption scenarios
+func TestPreemption(t *testing.T) {
+	suite := &PreemptionTestSuite{}
+	suite.SetupSuite(t)
+	defer suite.TearDownSuite(t)
+	testGPUResourcePreemption(t, suite)
+}
+
+// TestPreemptionEvictProtection tests comprehensive preemption scenarios
+func TestPreemptionEvictProtection(t *testing.T) {
+	suite := &PreemptionTestSuite{}
+	suite.SetupSuite(t)
+	defer suite.TearDownSuite(t)
+	testGPUResourceEvictProtection(t, suite)
+}
+
+// testGPUResourcePreemption tests GPU shortage detection logic
+func testGPUResourcePreemption(t *testing.T, suite *PreemptionTestSuite) {
+	// Mock cluster resources
+	// {"2250", "141Gi"}, // Simulate B200
+	// {"989", "80Gi"},   // Simulate H100
+	// {"450", "48Gi"},   // Simulate L40s
+	// {"312", "40Gi"},   // Simulate A100
+
+	// Create pods that will exhaust resources
+	toBeVictimPods := createPreemptionTestPodsWithQoS("victim", constants.QoSLevelMedium, 7+3+1+1, "300", "1Gi")
+
+	for _, pod := range toBeVictimPods {
+		require.NoError(t, suite.k8sClient.Create(suite.ctx, pod))
+		defer func() {
+			_ = suite.k8sClient.Delete(suite.ctx, pod)
+		}()
+	}
+
+	// Try scheduling all pending pods
+	for range 12 {
+		suite.scheduler.ScheduleOne(suite.ctx)
+	}
+
+	// schedule high priority pod
+	highPriorityPod := createPreemptionTestPodsWithQoS("high-priority", constants.QoSLevelHigh, 1, "300", "1Gi")[0]
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, highPriorityPod))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, highPriorityPod)
+	}()
+
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	// schedule critical priority pod
+	criticalPriorityPod := createPreemptionTestPodsWithQoS(
+		"critical-priority", constants.QoSLevelCritical, 1, "300", "1Gi")[0]
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, criticalPriorityPod))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, criticalPriorityPod)
+	}()
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	// Preemption should be triggered and victims deleted, wait informer sync
+	time.Sleep(1 * time.Second)
+
+	podList := &v1.PodList{}
+	err := suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	scheduledNodeMap := make(map[string]string)
+	for _, pod := range podList.Items {
+		scheduledNodeMap[pod.Name] = pod.Spec.NodeName
+	}
+	// 2 Pods deleted, 14 - 2 = 12
+	require.Equal(t, 12, len(podList.Items))
+
+	// without Pod Controller, directly reconcile all state to simulate the Pod deletion
+	suite.fixture.allocator.ReconcileAllocationStateForTesting()
+
+	// Trigger next 2 scheduling cycle, make sure the two higher priority pods are scheduled
+	suite.scheduler.ScheduleOne(suite.ctx)
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	time.Sleep(1 * time.Second)
+
+	err = suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	for _, pod := range podList.Items {
+		if strings.Contains(pod.Name, "victim") {
+			continue
+		}
+		scheduledNodeMap[pod.Name] = pod.Spec.NodeName
+	}
+	// not empty indicates the high priority pod is scheduled
+	require.NotEmpty(t, scheduledNodeMap["high-priority-0"])
+	require.NotEmpty(t, scheduledNodeMap["critical-priority-0"])
+}
+
+func testGPUResourceEvictProtection(t *testing.T, suite *PreemptionTestSuite) {
+	toBeVictimPods := createPreemptionTestPodsWithQoS("victim", constants.QoSLevelMedium, 1, "2000", "2Gi")
+	toBeVictimPods[0].Annotations[constants.EvictionProtectionAnnotation] = "2s"
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, toBeVictimPods[0]))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, toBeVictimPods[0])
+	}()
+
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	toBeVictimPods = createPreemptionTestPodsWithQoS("high-priority", constants.QoSLevelHigh, 1, "2000", "2Gi")
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, toBeVictimPods[0]))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, toBeVictimPods[0])
+	}()
+
+	// should not evict since it's inside protection period
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	podList := &v1.PodList{}
+	err := suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	require.Equal(t, 2, len(podList.Items))
+
+	// should evict since protection period over
+	time.Sleep(2 * time.Second)
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	suite.fixture.allocator.ReconcileAllocationStateForTesting()
+
+	// Should schedule the new high priority pod
+	suite.scheduler.ScheduleOne(suite.ctx)
+	// waiting for binding cycle take effect
+	time.Sleep(300 * time.Millisecond)
+
+	podList = &v1.PodList{}
+	err = suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	require.Equal(t, 1, len(podList.Items))
+	require.Equal(t, "high-priority-0", podList.Items[0].Name)
+	require.Equal(t, "node-0", podList.Items[0].Spec.NodeName)
+}
+
+// Helper functions
+func createPreemptionTestPodsWithQoS(baseName, qosLevel string, count int, tflops, vram string) []*v1.Pod {
+	pods := make([]*v1.Pod, count)
+	for i := 0; i < count; i++ {
+		pod := st.MakePod().
+			Namespace("preemption-test-ns").
+			Name(fmt.Sprintf("%s-%d", baseName, i)).
+			UID(fmt.Sprintf("%s-%d", baseName, i)).
+			SchedulerName("tensor-fusion-scheduler").
+			Res(map[v1.ResourceName]string{
+				v1.ResourceCPU:    "100m",
+				v1.ResourceMemory: "256Mi",
+			}).
+			Toleration("node.kubernetes.io/not-ready").
+			ZeroTerminationGracePeriod().Obj()
+
+		pod.Labels = map[string]string{
+			constants.LabelComponent: constants.ComponentWorker,
+			constants.WorkloadKey:    "test-workload",
+		}
+
+		pod.Annotations = map[string]string{
+			constants.GpuPoolKey:              "preemption-test-pool",
+			constants.QoSLevelAnnotation:      qosLevel,
+			constants.TFLOPSRequestAnnotation: tflops,
+			constants.VRAMRequestAnnotation:   vram,
+			constants.TFLOPSLimitAnnotation:   tflops,
+			constants.VRAMLimitAnnotation:     vram,
+			constants.GpuCountAnnotation:      "1",
+		}
+		pod.Spec.PriorityClassName = "tensor-fusion-" + qosLevel
+
+		pods[i] = pod
+	}
+	return pods
+}
+
+// func createPreemptionTestPodsWithEvictionProtection(
+// 	namespace, baseName, qosLevel, protectionDuration string, count int, tflops, vram string) []*v1.Pod {
+// 	pods := createPreemptionTestPodsWithQoS(namespace, baseName, qosLevel, count, tflops, vram)
+// 	for _, pod := range pods {
+// 		pod.Annotations[constants.EvictionProtectionAnnotation] = protectionDuration
+// 	}
+// 	return pods
+// }
diff --git a/test/sched/scheduler_bench_test.go b/test/sched/scheduler_bench_test.go
index 65f43a13..bbed548f 100644
--- a/test/sched/scheduler_bench_test.go
+++ b/test/sched/scheduler_bench_test.go
@@ -6,14 +6,18 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+	"strings"
 	"testing"
 	"time"
 
 	"github.com/NexusGPU/tensor-fusion/cmd/sched"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources"
 	gpuTopoPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gputopo"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"go.uber.org/zap/zapcore"
+	"k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
@@ -32,16 +36,27 @@ func defaultBenchmarkConfig() BenchmarkConfig {
 		NumNodes:  1000,
 		NumGPUs:   4000,
 		NumPods:   10000,
-		BatchSize: 100,
 		PoolName:  "benchmark-pool",
 		Namespace: "benchmark-ns",
-		Timeout:   10 * time.Minute,
+		Timeout:   5 * time.Minute,
 	}
 }
 
 var testEnv *envtest.Environment
 
-func setupKubernetes() (*rest.Config, error) {
+func setupKubernetes() (*version.Version, *rest.Config, error) {
+	// export ENVTEST_K8S_VERSION=1.34.0
+	// Run `./bin/setup-envtest use ${ENVTEST_K8S_VERSION} --bin-dir ./bin` before running the test
+	k8sVersion := os.Getenv("ENVTEST_K8S_VERSION")
+	if k8sVersion == "" {
+		k8sVersion = "1.31.0"
+	}
+	majorVersion := k8sVersion[:strings.Index(k8sVersion, ".")]
+	minorVersion := k8sVersion[strings.Index(k8sVersion, ".")+1 : strings.LastIndex(k8sVersion, ".")]
+	_ = os.Setenv(constants.KubeApiVersionMajorEnv, majorVersion)
+	_ = os.Setenv(constants.KubeApiVersionMinorEnv, minorVersion)
+	ver := version.MustParse(k8sVersion)
+	_ = feature.DefaultMutableFeatureGate.SetEmulationVersion(ver)
 	testEnv = &envtest.Environment{
 		CRDDirectoryPaths: []string{
 			filepath.Join("..", "..", "config", "crd", "bases"),
@@ -49,15 +64,14 @@ func setupKubernetes() (*rest.Config, error) {
 		},
 		ErrorIfCRDPathMissing: true,
 
-		// The BinaryAssetsDirectory is only required if you want to run the tests directly
-		// without call the makefile target test. If not informed it will look for the
-		// default path defined in controller-runtime which is /usr/local/kubebuilder/.
-		// Note that you must have the required binaries setup under the bin directory to perform
-		// the tests directly. When we run make test it will be setup and used automatically.
 		BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s",
-			fmt.Sprintf("1.31.0-%s-%s", runtime.GOOS, runtime.GOARCH)),
+			fmt.Sprintf("%s-%s-%s", k8sVersion, runtime.GOOS, runtime.GOARCH)),
+	}
+	cfg, err := testEnv.Start()
+	if err != nil {
+		return nil, nil, err
 	}
-	return testEnv.Start()
+	return ver, cfg, nil
 }
 
 // Estimated Performance: 400-500 pods/second for 1K nodes, 10K Pods cluster on Mac M4 Pro
@@ -65,7 +79,7 @@ func setupKubernetes() (*rest.Config, error) {
 func BenchmarkScheduler(b *testing.B) {
 	klog.SetLogger(zap.New(zap.WriteTo(os.Stderr), zap.UseDevMode(false), zap.Level(zapcore.ErrorLevel)))
 	// Setup phase - runs once before all benchmark iterations
-	cfg, err := setupKubernetes()
+	ver, cfg, err := setupKubernetes()
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -99,7 +113,7 @@ func BenchmarkScheduler(b *testing.B) {
 	testCtx := ctx
 
 	cc, scheduler, err := sched.SetupScheduler(testCtx, nil,
-		"../../config/samples/scheduler-config.yaml", true, gpuResourceFitOpt, gpuTopoOpt)
+		"../../config/samples/scheduler-config.yaml", true, ver, gpuResourceFitOpt, gpuTopoOpt)
 	if err != nil {
 		b.Fatal(err)
 	}
diff --git a/test/sched/setup.go b/test/sched/setup.go
index 03e40bfa..5dc80e32 100644
--- a/test/sched/setup.go
+++ b/test/sched/setup.go
@@ -14,21 +14,27 @@ import (
 	gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources"
 	"github.com/stretchr/testify/require"
 	v1 "k8s.io/api/core/v1"
+	schedv1 "k8s.io/api/scheduling/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
+	informers "k8s.io/client-go/informers"
+	clientsetfake "k8s.io/client-go/kubernetes/fake"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/events"
 	"k8s.io/klog/v2"
+	internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
+	internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
+	"k8s.io/kubernetes/pkg/scheduler/metrics"
 	st "k8s.io/kubernetes/pkg/scheduler/testing"
 	tf "k8s.io/kubernetes/pkg/scheduler/testing/framework"
+	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-	testutil "sigs.k8s.io/scheduler-plugins/test/util"
 )
 
 // BenchmarkConfig holds benchmark configuration
@@ -36,7 +42,6 @@ type BenchmarkConfig struct {
 	NumNodes  int
 	NumGPUs   int
 	NumPods   int
-	BatchSize int
 	PoolName  string
 	Namespace string
 	Timeout   time.Duration
@@ -85,23 +90,35 @@ func NewBenchmarkFixture(
 	b.Logf("%d Pods created, Needed TFLOPS: %f, Needed VRAM: %f", len(pods), neededTflops, neededVRAM)
 
 	// Batch create resources for better performance
-	batchCreateResources(b, ctx, client, nodes, gpus, pods, realAPIServer)
+	k8sNativeObjects := batchCreateResources(b, ctx, client, config.Namespace, nodes, gpus, pods, realAPIServer)
 
 	// Setup allocator
 	allocator := setupAllocator(b, ctx, client)
 
 	// Setup framework and plugin
-	fwk, plugin := setupFrameworkAndPlugin(b, ctx, client, allocator, pods, nodes)
-
-	return &BenchmarkFixture{
-		ctx:       ctx,
-		cancel:    cancel,
-		plugin:    plugin,
-		nodes:     nodes,
-		pods:      pods,
-		allocator: allocator,
-		client:    client,
-		fwk:       fwk,
+	if !realAPIServer {
+		fwk, plugin := setupFrameworkAndPlugin(b, ctx, client, allocator, k8sNativeObjects)
+		return &BenchmarkFixture{
+			ctx:       ctx,
+			cancel:    cancel,
+			plugin:    plugin,
+			nodes:     nodes,
+			pods:      pods,
+			allocator: allocator,
+			client:    client,
+			fwk:       fwk,
+		}
+	} else {
+		return &BenchmarkFixture{
+			ctx:       ctx,
+			cancel:    cancel,
+			plugin:    nil,
+			nodes:     nodes,
+			pods:      pods,
+			allocator: allocator,
+			client:    client,
+			fwk:       nil,
+		}
 	}
 }
 
@@ -162,10 +179,10 @@ func generateGPUs(totalGPUs int, nodes []*v1.Node, poolName string) ([]*tfv1.GPU
 
 	// Pre-define GPU specs to avoid repeated allocations
 	gpuSpecs := []struct{ tflops, vram string }{
-		{"2250", "141Gi"}, // High-end
-		{"989", "80Gi"},   // Mid-range
-		{"450", "48Gi"},   // Entry-level
-		{"312", "40Gi"},   // Budget
+		{"2250", "141Gi"}, // Simulate B200
+		{"989", "80Gi"},   // Simulate H100
+		{"450", "48Gi"},   // Simulate L40s
+		{"312", "40Gi"},   // Simulate A100
 	}
 
 	gpuIndex := 0
@@ -271,11 +288,27 @@ func generatePods(count int, namespace, poolName string) ([]*v1.Pod, float64, fl
 
 // Helper functions for setup
 func batchCreateResources(
-	b *testing.B, ctx context.Context, client client.Client,
+	b *testing.B, ctx context.Context, client client.Client, namespace string,
 	nodes []*v1.Node, gpus []*tfv1.GPU, pods []*v1.Pod, realAPIServer bool,
-) {
+) []runtime.Object {
+	// Create priority classes
+	require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{
+		ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelCritical},
+		Value:      100000,
+	}))
+	require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{
+		ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelHigh},
+		Value:      10000,
+	}))
+	require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{
+		ObjectMeta:       metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelMedium},
+		Value:            100,
+		PreemptionPolicy: ptr.To(v1.PreemptNever),
+	}))
+
+	k8sObjs := []runtime.Object{}
 	require.NoError(b, client.Create(ctx, &v1.Namespace{
-		ObjectMeta: metav1.ObjectMeta{Name: "benchmark-ns"},
+		ObjectMeta: metav1.ObjectMeta{Name: namespace},
 	}))
 
 	timer := time.Now()
@@ -283,6 +316,7 @@ func batchCreateResources(
 	for _, node := range nodes {
 		nodeCopy := node.DeepCopy()
 		require.NoError(b, client.Create(ctx, nodeCopy))
+		k8sObjs = append(k8sObjs, nodeCopy)
 
 		if realAPIServer {
 			node.ResourceVersion = nodeCopy.ResourceVersion
@@ -310,13 +344,15 @@ func batchCreateResources(
 	b.Logf("Creating %d pods", len(pods))
 	for _, pod := range pods {
 		require.NoError(b, client.Create(ctx, pod))
+		k8sObjs = append(k8sObjs, pod)
 	}
 	b.Logf("%d pods created, duration: %v", len(pods), time.Since(timer))
+	return k8sObjs
 }
 
 func setupFrameworkAndPlugin(
 	b *testing.B, ctx context.Context, client client.Client,
-	allocator *gpuallocator.GpuAllocator, pods []*v1.Pod, nodes []*v1.Node,
+	allocator *gpuallocator.GpuAllocator, k8sObjs []runtime.Object,
 ) (framework.Framework, *gpuResourceFitPlugin.GPUFit) {
 	// Register plugins including our GPU plugin
 	registeredPlugins := []tf.RegisterPluginFunc{
@@ -324,11 +360,16 @@ func setupFrameworkAndPlugin(
 		tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
 	}
 
-	// Create framework
-	fwk, err := tf.NewFramework(ctx, registeredPlugins, "",
-		frameworkruntime.WithPodNominator(testutil.NewPodNominator(nil)),
-		frameworkruntime.WithSnapshotSharedLister(testutil.NewFakeSharedLister(pods, nodes)),
+	fakeClientSet := clientsetfake.NewSimpleClientset(k8sObjs...)
+	informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
+	metrics.Register()
+	metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, ctx.Done())
+	fwk, err := tf.NewFramework(
+		ctx, registeredPlugins, "",
+		frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
+		frameworkruntime.WithSnapshotSharedLister(internalcache.NewEmptySnapshot()),
 		frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
+		frameworkruntime.WithMetricsRecorder(metricsRecorder),
 	)
 	require.NoError(b, err)