From c345426bbf46179d53794b29b226e83a16810e57 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Mon, 25 Aug 2025 06:46:26 -0700
Subject: [PATCH 01/34] support cel filter

---
 api/v1/schedulingconfigtemplate_types.go      |  32 ++
 api/v1/zz_generated.deepcopy.go               |  20 +
 ...r-fusion.ai_schedulingconfigtemplates.yaml |  35 ++
 ...r-fusion.ai_schedulingconfigtemplates.yaml |  35 ++
 config/samples/cel_filter_example.yaml        |  74 ++++
 docs/cel-filters.md                           | 264 +++++++++++++
 go.mod                                        |   2 +-
 internal/gpuallocator/cel_integration_test.go | 260 +++++++++++++
 .../gpuallocator/filter/cel_filter/adapter.go |  39 ++
 .../filter/cel_filter/cel_config.go           |  90 +++++
 .../filter/cel_filter/cel_config_test.go      | 246 +++++++++++++
 .../filter/cel_filter/cel_filter.go           | 213 +++++++++++
 .../filter/cel_filter/cel_filter_test.go      | 347 ++++++++++++++++++
 .../filter/cel_filter/constants.go            |  44 +++
 internal/gpuallocator/gpuallocator.go         |  12 +
 15 files changed, 1712 insertions(+), 1 deletion(-)
 create mode 100644 config/samples/cel_filter_example.yaml
 create mode 100644 docs/cel-filters.md
 create mode 100644 internal/gpuallocator/cel_integration_test.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/adapter.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/cel_config.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/cel_config_test.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/cel_filter.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/cel_filter_test.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/constants.go

diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go
index 44f07bef..80ef55e6 100644
--- a/api/v1/schedulingconfigtemplate_types.go
+++ b/api/v1/schedulingconfigtemplate_types.go
@@ -51,6 +51,10 @@ type PlacementConfig struct {
 
 	// +optional
 	GPUFilters []GPUFilter `json:"gpuFilters,omitempty"`
+
+	// CEL-based GPU filters for advanced filtering logic
+	// +optional
+	CELFilters []CELFilterConfig `json:"celFilters,omitempty"`
 }
 
 // +kubebuilder:validation:Enum=CompactFirst;LowLoadFirst
@@ -85,6 +89,34 @@ type GPUFilter struct {
 	Params runtime.RawExtension `json:"params,omitempty"`
 }
 
+// CELFilterConfig defines the configuration for CEL-based filtering
+//
+// example:
+// ```yaml
+//   - name: "avoid-overloaded-gpus"
+//     expression: "gpu.available.tflops > 0.5 && size(gpu.runningApps) < 3"
+//     priority: 100
+//   - name: "prefer-specific-model"
+//     expression: "gpu.gpuModel.startsWith('NVIDIA') && gpu.labels.has('gpu-tier') && gpu.labels['gpu-tier'] == 'premium'"
+//     priority: 50
+//
+// ```
+type CELFilterConfig struct {
+	// Name for this filter (for debugging/logging)
+	// +optional
+	Name string `json:"name,omitempty"`
+
+	// CEL expression for filtering GPUs
+	// The expression should return a boolean value
+	// Available variables: gpu, workerPodKey, request
+	Expression string `json:"expression"`
+
+	// Priority for this filter (higher priority filters run first)
+	// +kubebuilder:default=0
+	// +optional
+	Priority int `json:"priority,omitempty"`
+}
+
 type AutoScalingConfig struct {
 	// layer 1 vertical auto-scaling, turbo burst to existing GPU cards quickly
 	// VPA-like, aggregate metrics data <1m
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 9be4f47c..27f8e8fd 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -184,6 +184,21 @@ func (in *AutoSetRequests) DeepCopy() *AutoSetRequests {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CELFilterConfig) DeepCopyInto(out *CELFilterConfig) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CELFilterConfig.
+func (in *CELFilterConfig) DeepCopy() *CELFilterConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(CELFilterConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *CapacityConfig) DeepCopyInto(out *CapacityConfig) {
 	*out = *in
@@ -1681,6 +1696,11 @@ func (in *PlacementConfig) DeepCopyInto(out *PlacementConfig) {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
+	if in.CELFilters != nil {
+		in, out := &in.CELFilters, &out.CELFilters
+		*out = make([]CELFilterConfig, len(*in))
+		copy(*out, *in)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementConfig.
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
index 91a01eae..f7aeb8fa 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -203,6 +203,41 @@ spec:
                   allowUsingLocalGPU:
                     default: true
                     type: boolean
+                  celFilters:
+                    description: CEL-based GPU filters for advanced filtering logic
+                    items:
+                      description: |-
+                        CELFilterConfig defines the configuration for CEL-based filtering
+
+                        example:
+                        ```yaml
+                          - name: "avoid-overloaded-gpus"
+                            expression: "gpu.available.tflops > 0.5 && size(gpu.runningApps) < 3"
+                            priority: 100
+                          - name: "prefer-specific-model"
+                            expression: "gpu.gpuModel.startsWith('NVIDIA') && gpu.labels.has('gpu-tier') && gpu.labels['gpu-tier'] == 'premium'"
+                            priority: 50
+
+                        ```
+                      properties:
+                        expression:
+                          description: |-
+                            CEL expression for filtering GPUs
+                            The expression should return a boolean value
+                            Available variables: gpu, workerPodKey, request
+                          type: string
+                        name:
+                          description: Name for this filter (for debugging/logging)
+                          type: string
+                        priority:
+                          default: 0
+                          description: Priority for this filter (higher priority filters
+                            run first)
+                          type: integer
+                      required:
+                      - expression
+                      type: object
+                    type: array
                   gpuFilters:
                     items:
                       description: "GPUFilter is to select eligible GPUs for scheduling.\n\nexample:\n```yaml\n-
diff --git a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
index 91a01eae..f7aeb8fa 100644
--- a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -203,6 +203,41 @@ spec:
                   allowUsingLocalGPU:
                     default: true
                     type: boolean
+                  celFilters:
+                    description: CEL-based GPU filters for advanced filtering logic
+                    items:
+                      description: |-
+                        CELFilterConfig defines the configuration for CEL-based filtering
+
+                        example:
+                        ```yaml
+                          - name: "avoid-overloaded-gpus"
+                            expression: "gpu.available.tflops > 0.5 && size(gpu.runningApps) < 3"
+                            priority: 100
+                          - name: "prefer-specific-model"
+                            expression: "gpu.gpuModel.startsWith('NVIDIA') && gpu.labels.has('gpu-tier') && gpu.labels['gpu-tier'] == 'premium'"
+                            priority: 50
+
+                        ```
+                      properties:
+                        expression:
+                          description: |-
+                            CEL expression for filtering GPUs
+                            The expression should return a boolean value
+                            Available variables: gpu, workerPodKey, request
+                          type: string
+                        name:
+                          description: Name for this filter (for debugging/logging)
+                          type: string
+                        priority:
+                          default: 0
+                          description: Priority for this filter (higher priority filters
+                            run first)
+                          type: integer
+                      required:
+                      - expression
+                      type: object
+                    type: array
                   gpuFilters:
                     items:
                       description: "GPUFilter is to select eligible GPUs for scheduling.\n\nexample:\n```yaml\n-
diff --git a/config/samples/cel_filter_example.yaml b/config/samples/cel_filter_example.yaml
new file mode 100644
index 00000000..aaf4895e
--- /dev/null
+++ b/config/samples/cel_filter_example.yaml
@@ -0,0 +1,74 @@
+apiVersion: tensor-fusion.ai/v1
+kind: SchedulingConfigTemplate
+metadata:
+  name: cel-filter-example
+spec:
+  placement:
+    mode: CompactFirst
+    allowUsingLocalGPU: true
+    
+    # Traditional GPU filters (still supported)
+    gpuFilters:
+    - type: avoidTooMuchConnectionsOnSameGPU
+      params:
+        connectionNum: 150
+    
+    # CEL-based filters for advanced filtering logic
+    celFilters:
+    # High priority filter: only use running GPUs
+    - name: "only-running-gpus"
+      expression: "gpu.phase == 'Running'"
+      priority: 100
+      
+    # Medium-high priority: ensure sufficient resources available
+    - name: "sufficient-resources"
+      expression: "gpu.available.tflops >= 0.5 && gpu.available.vram >= 4096000000"
+      priority: 90
+      
+    # Medium priority: prefer premium tier GPUs
+    - name: "prefer-premium-gpus"
+      expression: "gpu.labels != null && 'gpu-tier' in gpu.labels && gpu.labels['gpu-tier'] == 'premium'"
+      priority: 80
+      
+    # Lower priority: avoid overloaded GPUs
+    - name: "avoid-overloaded-gpus"
+      expression: "size(gpu.runningApps) < 3"
+      priority: 70
+      
+    # GPU model specific filters
+    - name: "nvidia-only"
+      expression: "gpu.gpuModel.startsWith('NVIDIA')"
+      priority: 60
+      
+    # Complex condition example
+    - name: "complex-filter"
+      expression: |
+        gpu.phase == 'Running' && 
+        gpu.available.tflops > 0.3 &&
+        (
+          (gpu.labels != null && 'workload-type' in gpu.labels && gpu.labels['workload-type'] == 'training') ||
+          (size(gpu.runningApps) == 0)
+        )
+      priority: 50
+
+  # Optional: AutoScaling configuration
+  autoScaling:
+    autoSetLimits:
+      enable: true
+      targetResource: "all"
+      evaluationPeriod: "5m"
+      extraTFlopsBufferRatio: "0.1"
+
+---
+apiVersion: tensor-fusion.ai/v1
+kind: SchedulingConfigTemplate
+metadata:
+  name: simple-cel-example
+spec:
+  placement:
+    mode: LowLoadFirst
+    celFilters:
+    # Simple example: only use GPUs with more than 50% TFlops available
+    - name: "high-availability"
+      expression: "gpu.available.tflops > gpu.capacity.tflops * 0.5"
+      priority: 100
\ No newline at end of file
diff --git a/docs/cel-filters.md b/docs/cel-filters.md
new file mode 100644
index 00000000..590e1d90
--- /dev/null
+++ b/docs/cel-filters.md
@@ -0,0 +1,264 @@
+# CEL Filters for GPU Allocation
+
+CEL (Common Expression Language) filters provide a powerful and flexible way to define custom GPU filtering logic in TensorFusion. This feature allows you to write expressions that determine which GPUs are eligible for allocation based on various criteria.
+
+## Overview
+
+CEL filters are defined in the `SchedulingConfigTemplate` resource and are applied during the GPU allocation process. They work alongside traditional GPU filters and provide more sophisticated filtering capabilities.
+
+## Configuration
+
+CEL filters are configured in the `placement.celFilters` field of a `SchedulingConfigTemplate`:
+
+```yaml
+apiVersion: tensor-fusion.ai/v1
+kind: SchedulingConfigTemplate
+metadata:
+  name: my-template
+spec:
+  placement:
+    celFilters:
+    - name: "filter-name"
+      expression: "gpu.phase == 'Running'"
+      priority: 100
+```
+
+### Fields
+
+- `name` (optional): A descriptive name for the filter, used for logging and debugging
+- `expression` (required): The CEL expression that returns a boolean value
+- `priority` (optional, default: 0): Higher priority filters are applied first
+
+## Available Variables
+
+CEL expressions have access to the following variables:
+
+### `gpu` Object
+
+The `gpu` variable contains information about the GPU being evaluated:
+
+```javascript
+{
+  "name": "gpu-1",           // GPU name
+  "namespace": "default",     // GPU namespace
+  "gpuModel": "NVIDIA A100",  // GPU model
+  "uuid": "gpu-uuid",         // GPU UUID
+  "phase": "Running",         // GPU phase (Running, Pending, etc.)
+  "usedBy": "tensor-fusion",  // Usage system
+  "labels": {...},           // Kubernetes labels
+  "annotations": {...},      // Kubernetes annotations
+  "capacity": {              // Total GPU capacity
+    "tflops": 1.5,
+    "vram": 85899345920      // in bytes
+  },
+  "available": {             // Available GPU resources
+    "tflops": 1.0,
+    "vram": 64424509440      // in bytes
+  },
+  "nodeSelector": {...},     // Node selector information
+  "runningApps": [           // Currently running applications
+    {
+      "name": "app-1",
+      "namespace": "default",
+      "count": 1
+    }
+  ]
+}
+```
+
+### `workerPodKey` Object
+
+Information about the requesting worker pod:
+
+```javascript
+{
+  "name": "worker-pod",
+  "namespace": "default"
+}
+```
+
+## Expression Examples
+
+### Basic Filtering
+
+```yaml
+# Only use running GPUs
+- name: "running-only"
+  expression: "gpu.phase == 'Running'"
+  priority: 100
+
+# Filter by GPU model
+- name: "nvidia-only"
+  expression: "gpu.gpuModel.startsWith('NVIDIA')"
+  priority: 90
+
+# Ensure minimum resources available
+- name: "min-resources"
+  expression: "gpu.available.tflops >= 0.5 && gpu.available.vram >= 4294967296"
+  priority: 80
+```
+
+### Label-Based Filtering
+
+```yaml
+# Filter by labels
+- name: "premium-tier"
+  expression: "gpu.labels != null && 'gpu-tier' in gpu.labels && gpu.labels['gpu-tier'] == 'premium'"
+  priority: 70
+
+# Multiple label conditions
+- name: "training-gpus"
+  expression: |
+    gpu.labels != null && 
+    'workload-type' in gpu.labels && 
+    gpu.labels['workload-type'] == 'training' &&
+    'zone' in gpu.labels && 
+    gpu.labels['zone'].startsWith('us-west')
+  priority: 60
+```
+
+### Resource-Based Filtering
+
+```yaml
+# Percentage of available resources
+- name: "high-availability"
+  expression: "gpu.available.tflops > gpu.capacity.tflops * 0.7"
+  priority: 80
+
+# Avoid overloaded GPUs
+- name: "load-balancing"
+  expression: "size(gpu.runningApps) < 3"
+  priority: 50
+
+# Memory-intensive workloads
+- name: "high-memory"
+  expression: "gpu.available.vram > 34359738368"  # > 32GB
+  priority: 60
+```
+
+### Complex Conditions
+
+```yaml
+# Complex multi-criteria filter
+- name: "complex-filter"
+  expression: |
+    gpu.phase == 'Running' && 
+    gpu.gpuModel.contains('A100') &&
+    gpu.available.tflops > 0.8 &&
+    (
+      size(gpu.runningApps) == 0 ||
+      (size(gpu.runningApps) < 2 && gpu.available.vram > 42949672960)
+    )
+  priority: 90
+```
+
+## CEL Language Features
+
+CEL supports many built-in functions and operators:
+
+### String Operations
+- `startsWith()`, `endsWith()`, `contains()`
+- String concatenation with `+`
+- Regular expressions with `matches()`
+
+### Numeric Operations
+- Standard arithmetic operators: `+`, `-`, `*`, `/`, `%`
+- Comparison operators: `>`, `>=`, `<`, `<=`, `==`, `!=`
+
+### Logical Operations
+- `&&` (and), `||` (or), `!` (not)
+
+### Collection Operations
+- `size()` - get collection size
+- `in` operator - check membership
+- List/map access with `[]`
+
+### Conditional Expressions
+- Ternary operator: `condition ? true_value : false_value`
+
+## Best Practices
+
+### Performance
+1. **Order by Priority**: Place most restrictive filters first (highest priority)
+2. **Avoid Complex Expressions**: Keep expressions simple for better performance
+3. **Cache-Friendly**: Use consistent filter logic to benefit from any caching
+
+### Reliability
+1. **Null Checks**: Always check for null values when accessing optional fields
+2. **Fail-Safe Logic**: Design expressions to exclude GPUs on error rather than include them
+3. **Test Thoroughly**: Test expressions with various GPU configurations
+
+### Maintainability
+1. **Descriptive Names**: Use clear, descriptive names for filters
+2. **Comments**: Add comments for complex expressions
+3. **Modular Design**: Break complex logic into multiple simpler filters
+
+## Example Complete Configuration
+
+```yaml
+apiVersion: tensor-fusion.ai/v1
+kind: SchedulingConfigTemplate
+metadata:
+  name: production-gpu-scheduling
+spec:
+  placement:
+    mode: CompactFirst
+    
+    # Traditional filters (still supported)
+    gpuFilters:
+    - type: avoidTooMuchConnectionsOnSameGPU
+      params:
+        connectionNum: 100
+    
+    # CEL filters for advanced logic
+    celFilters:
+    # Critical filters (high priority)
+    - name: "operational-gpus-only"
+      expression: "gpu.phase == 'Running' && gpu.usedBy == 'tensor-fusion'"
+      priority: 100
+      
+    - name: "sufficient-resources"
+      expression: "gpu.available.tflops >= 0.3 && gpu.available.vram >= 2147483648"
+      priority: 95
+      
+    # Preference filters (medium priority)
+    - name: "prefer-nvidia"
+      expression: "gpu.gpuModel.startsWith('NVIDIA')"
+      priority: 80
+      
+    - name: "balanced-load"
+      expression: "size(gpu.runningApps) < 2"
+      priority: 70
+      
+    # Quality filters (lower priority)
+    - name: "premium-hardware"
+      expression: |
+        gpu.labels != null && 
+        'gpu-tier' in gpu.labels && 
+        gpu.labels['gpu-tier'] in ['premium', 'high-performance']
+      priority: 50
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Expression Compilation Errors**: Check syntax and ensure all referenced fields exist
+2. **Runtime Errors**: Add null checks for optional fields
+3. **No GPUs Selected**: Verify that at least some GPUs meet all filter criteria
+4. **Performance Issues**: Simplify complex expressions or reduce the number of filters
+
+### Debugging
+
+Enable debug logging to see detailed information about filter execution:
+
+```yaml
+# In your logging configuration
+logLevel: debug
+```
+
+Look for log entries containing "CEL filter applied" to see filtering results.
+
+## Migration from Traditional Filters
+
+CEL filters can be used alongside traditional GPU filters. They are applied after traditional filters in the filtering pipeline. You can gradually migrate complex traditional filters to CEL expressions for better maintainability.
\ No newline at end of file
diff --git a/go.mod b/go.mod
index 9bf5280f..6fefa5d5 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5
 	github.com/gin-contrib/gzip v1.2.3
 	github.com/gin-gonic/gin v1.10.1
+	github.com/google/cel-go v0.23.2
 	github.com/influxdata/line-protocol/v2 v2.2.1
 	github.com/lithammer/shortuuid/v4 v4.2.0
 	github.com/mitchellh/mapstructure v1.5.0
@@ -85,7 +86,6 @@ require (
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/btree v1.1.3 // indirect
-	github.com/google/cel-go v0.23.2 // indirect
 	github.com/google/gnostic-models v0.6.9 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
 	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
diff --git a/internal/gpuallocator/cel_integration_test.go b/internal/gpuallocator/cel_integration_test.go
new file mode 100644
index 00000000..7913c116
--- /dev/null
+++ b/internal/gpuallocator/cel_integration_test.go
@@ -0,0 +1,260 @@
+package gpuallocator
+
+import (
+	"context"
+	"testing"
+
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
+	cel_filter "github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter/cel_filter"
+	"github.com/stretchr/testify/require"
+)
+
+func TestGpuAllocator_CELFilters_Integration(t *testing.T) {
+	// Create test scheme
+	scheme := runtime.NewScheme()
+	err := tfv1.AddToScheme(scheme)
+	require.NoError(t, err)
+
+	// Create test resources
+	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			Placement: tfv1.PlacementConfig{
+				Mode: tfv1.PlacementModeCompactFirst,
+				CELFilters: []tfv1.CELFilterConfig{
+					{
+						Name:       "running-gpus-only",
+						Expression: "gpu.phase == 'Running'",
+						Priority:   100,
+					},
+					{
+						Name:       "sufficient-tflops",
+						Expression: "gpu.available.tflops >= 0.5",
+						Priority:   90,
+					},
+					{
+						Name:       "nvidia-gpus-only",
+						Expression: "gpu.gpuModel.contains('NVIDIA')",
+						Priority:   80,
+					},
+				},
+			},
+		},
+	}
+
+	pool := &tfv1.GPUPool{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-pool",
+		},
+		Spec: tfv1.GPUPoolSpec{
+			SchedulingConfigTemplate: &schedulingTemplate.Name,
+		},
+	}
+
+	// Create test GPUs
+	gpus := []tfv1.GPU{
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "gpu-1-pass-all",
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhaseRunning,
+				GPUModel: "NVIDIA A100",
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("1.0"),
+					Vram:   resource.MustParse("60Gi"),
+				},
+			},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "gpu-2-fail-phase",
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhasePending,
+				GPUModel: "NVIDIA A100",
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("1.0"),
+					Vram:   resource.MustParse("60Gi"),
+				},
+			},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "gpu-3-fail-tflops",
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhaseRunning,
+				GPUModel: "NVIDIA A100",
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("0.3"),
+					Vram:   resource.MustParse("60Gi"),
+				},
+			},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "gpu-4-fail-model",
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhaseRunning,
+				GPUModel: "AMD Radeon RX 7900 XTX",
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("1.0"),
+					Vram:   resource.MustParse("24Gi"),
+				},
+			},
+		},
+	}
+
+	// Create fake client
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(schedulingTemplate, pool).
+		Build()
+
+	// Test CEL filters using CELConfigManager
+	celConfigManager := cel_filter.NewCELConfigManager(fakeClient)
+	celFilters, err := celConfigManager.GetCELFiltersForPool(context.Background(), pool.Name)
+	require.NoError(t, err)
+	require.Len(t, celFilters, 3)
+
+	// Test filtering with CEL filters
+	celFilterAdapters := cel_filter.CreateCELFilterAdapters(celFilters)
+	filterRegistry := filter.NewFilterRegistry().With(celFilterAdapters...)
+
+	filteredGPUs, _, err := filterRegistry.Apply(
+		context.Background(),
+		tfv1.NameNamespace{Name: "test-pod", Namespace: "default"},
+		gpus,
+		false,
+	)
+	require.NoError(t, err)
+
+	// Only gpu-1 should pass all filters
+	require.Len(t, filteredGPUs, 1)
+	require.Equal(t, "gpu-1-pass-all", filteredGPUs[0].Name)
+}
+
+func TestGpuAllocator_CELFilters_ErrorHandling(t *testing.T) {
+	// Create test scheme
+	scheme := runtime.NewScheme()
+	err := tfv1.AddToScheme(scheme)
+	require.NoError(t, err)
+
+	// Create scheduling template with invalid CEL expression
+	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "invalid-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			Placement: tfv1.PlacementConfig{
+				Mode: tfv1.PlacementModeCompactFirst,
+				CELFilters: []tfv1.CELFilterConfig{
+					{
+						Name:       "invalid-expression",
+						Expression: "gpu.phase ==", // Invalid syntax
+						Priority:   100,
+					},
+				},
+			},
+		},
+	}
+
+	pool := &tfv1.GPUPool{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-pool",
+		},
+		Spec: tfv1.GPUPoolSpec{
+			SchedulingConfigTemplate: &schedulingTemplate.Name,
+		},
+	}
+
+	// Create fake client
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(schedulingTemplate, pool).
+		Build()
+
+	// Test that invalid CEL expression results in error
+	celConfigManager := cel_filter.NewCELConfigManager(fakeClient)
+	_, err = celConfigManager.GetCELFiltersForPool(context.Background(), pool.Name)
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "create CEL filter")
+}
+
+func TestGpuAllocator_CELFilters_Priority_Ordering(t *testing.T) {
+	// Create test scheme
+	scheme := runtime.NewScheme()
+	err := tfv1.AddToScheme(scheme)
+	require.NoError(t, err)
+
+	// Create scheduling template with multiple CEL filters with different priorities
+	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "priority-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			Placement: tfv1.PlacementConfig{
+				Mode: tfv1.PlacementModeCompactFirst,
+				CELFilters: []tfv1.CELFilterConfig{
+					{
+						Name:       "low-priority",
+						Expression: "gpu.name.contains('gpu')",
+						Priority:   10,
+					},
+					{
+						Name:       "high-priority",
+						Expression: "gpu.phase == 'Running'",
+						Priority:   100,
+					},
+					{
+						Name:       "medium-priority",
+						Expression: "gpu.gpuModel.contains('NVIDIA')",
+						Priority:   50,
+					},
+				},
+			},
+		},
+	}
+
+	pool := &tfv1.GPUPool{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-pool",
+		},
+		Spec: tfv1.GPUPoolSpec{
+			SchedulingConfigTemplate: &schedulingTemplate.Name,
+		},
+	}
+
+	// Create fake client
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(schedulingTemplate, pool).
+		Build()
+
+	// Test that CEL filters are sorted by priority
+	celConfigManager := cel_filter.NewCELConfigManager(fakeClient)
+	celFilters, err := celConfigManager.GetCELFiltersForPool(context.Background(), pool.Name)
+	require.NoError(t, err)
+	require.Len(t, celFilters, 3)
+
+	// Check that filters are ordered by priority (high to low)
+	// Note: We can't easily check the internal order without exposing more internals,
+	// but we can verify that all filters are created successfully
+	filterNames := make([]string, len(celFilters))
+	for i, filter := range celFilters {
+		filterNames[i] = filter.Name()
+	}
+
+	expectedFilters := []string{"high-priority", "medium-priority", "low-priority"}
+	require.ElementsMatch(t, expectedFilters, filterNames)
+}
diff --git a/internal/gpuallocator/filter/cel_filter/adapter.go b/internal/gpuallocator/filter/cel_filter/adapter.go
new file mode 100644
index 00000000..2d3877f3
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/adapter.go
@@ -0,0 +1,39 @@
+package cel_filter
+
+import (
+	"context"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
+)
+
+// CELFilterAdapter adapts CELFilter to implement filter.GPUFilter interface
+type CELFilterAdapter struct {
+	celFilter *CELFilter
+}
+
+// NewCELFilterAdapter creates a new adapter for CELFilter
+func NewCELFilterAdapter(celFilter *CELFilter) filter.GPUFilter {
+	return &CELFilterAdapter{
+		celFilter: celFilter,
+	}
+}
+
+// Filter implements the filter.GPUFilter interface
+func (a *CELFilterAdapter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []tfv1.GPU) ([]tfv1.GPU, error) {
+	return a.celFilter.Filter(ctx, workerPodKey, gpus)
+}
+
+// Name implements the filter.GPUFilter interface
+func (a *CELFilterAdapter) Name() string {
+	return a.celFilter.Name()
+}
+
+// CreateCELFilterAdapters creates filter.GPUFilter adapters from CELFilter instances
+func CreateCELFilterAdapters(celFilters []*CELFilter) []filter.GPUFilter {
+	adapters := make([]filter.GPUFilter, len(celFilters))
+	for i, celFilter := range celFilters {
+		adapters[i] = NewCELFilterAdapter(celFilter)
+	}
+	return adapters
+}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_config.go b/internal/gpuallocator/filter/cel_filter/cel_config.go
new file mode 100644
index 00000000..fc3a0f86
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/cel_config.go
@@ -0,0 +1,90 @@
+package cel_filter
+
+import (
+	"context"
+	"fmt"
+	"sort"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// CELConfigManager handles CEL filter configuration retrieval and creation
+type CELConfigManager struct {
+	client client.Client
+}
+
+// NewCELConfigManager creates a new CEL configuration manager
+func NewCELConfigManager(client client.Client) *CELConfigManager {
+	return &CELConfigManager{
+		client: client,
+	}
+}
+
+// GetCELFiltersForPool retrieves CEL filters from SchedulingConfigTemplate for a given pool
+func (m *CELConfigManager) GetCELFiltersForPool(ctx context.Context, poolName string) ([]*CELFilter, error) {
+	// Get pool to find SchedulingConfigTemplate
+	pool := &tfv1.GPUPool{}
+	if err := m.client.Get(ctx, client.ObjectKey{Name: poolName}, pool); err != nil {
+		return nil, fmt.Errorf("get pool %s: %w", poolName, err)
+	}
+
+	// If no SchedulingConfigTemplate is specified, return empty
+	if pool.Spec.SchedulingConfigTemplate == nil {
+		return nil, nil
+	}
+
+	return m.GetCELFiltersFromTemplate(ctx, *pool.Spec.SchedulingConfigTemplate)
+}
+
+// GetCELFiltersFromTemplate retrieves CEL filters directly from a SchedulingConfigTemplate
+func (m *CELConfigManager) GetCELFiltersFromTemplate(ctx context.Context, templateName string) ([]*CELFilter, error) {
+	// Get the SchedulingConfigTemplate
+	schedulingConfigTemplate := &tfv1.SchedulingConfigTemplate{}
+	if err := m.client.Get(ctx, client.ObjectKey{Name: templateName}, schedulingConfigTemplate); err != nil {
+		return nil, fmt.Errorf("get scheduling config template %s: %w", templateName, err)
+	}
+
+	return m.CreateCELFiltersFromConfig(schedulingConfigTemplate.Spec.Placement.CELFilters)
+}
+
+// CreateCELFiltersFromConfig creates CEL filters from configuration slice
+func (m *CELConfigManager) CreateCELFiltersFromConfig(celConfigs []tfv1.CELFilterConfig) ([]*CELFilter, error) {
+	if len(celConfigs) == 0 {
+		return nil, nil
+	}
+
+	// Sort CEL configs by priority (higher priority first)
+	sortedConfigs := make([]tfv1.CELFilterConfig, len(celConfigs))
+	copy(sortedConfigs, celConfigs)
+	sort.Slice(sortedConfigs, func(i, j int) bool {
+		return sortedConfigs[i].Priority > sortedConfigs[j].Priority
+	})
+
+	// Create CEL filters
+	var celFilters []*CELFilter
+	for _, config := range sortedConfigs {
+		celFilter, err := NewCELFilter(CELFilterConfig{
+			Name:       config.Name,
+			Expression: config.Expression,
+			Priority:   config.Priority,
+		})
+		if err != nil {
+			return nil, fmt.Errorf("create CEL filter %q: %w", config.Name, err)
+		}
+		celFilters = append(celFilters, celFilter)
+	}
+
+	return celFilters, nil
+}
+
+// ValidateCELConfig validates a CEL filter configuration
+func (m *CELConfigManager) ValidateCELConfig(config tfv1.CELFilterConfig) error {
+	// Try to create the filter to validate the expression
+	_, err := NewCELFilter(CELFilterConfig{
+		Name:       config.Name,
+		Expression: config.Expression,
+		Priority:   config.Priority,
+	})
+	return err
+}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_config_test.go b/internal/gpuallocator/filter/cel_filter/cel_config_test.go
new file mode 100644
index 00000000..8e8b0ad5
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/cel_config_test.go
@@ -0,0 +1,246 @@
+package cel_filter
+
+import (
+	"context"
+	"testing"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/stretchr/testify/require"
+)
+
+// Test constants for CEL expressions (same as in cel_filter_test.go)
+const (
+	// Phase expressions
+	testExamplePhaseRunning = `gpu.phase == 'Running'`
+
+	// Resource expressions
+	testExampleMinTFlops     = `gpu.available.tflops >= 0.5`
+	testExampleSpecificModel = `gpu.gpuModel.contains('A100')`
+
+	// Label expressions
+	testExampleNVIDIAOnly = `gpu.gpuModel.startsWith('NVIDIA')`
+
+	// Complex expressions
+	testExampleComplex = `gpu.phase == 'Running' && gpu.available.tflops > 0.5 && size(gpu.runningApps) < 2`
+)
+
+func TestCELConfigManager_GetCELFiltersForPool(t *testing.T) {
+	// Create test scheme
+	scheme := runtime.NewScheme()
+	err := tfv1.AddToScheme(scheme)
+	require.NoError(t, err)
+
+	// Create test resources
+	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			Placement: tfv1.PlacementConfig{
+				CELFilters: []tfv1.CELFilterConfig{
+					{
+						Name:       "high-priority",
+						Expression: testExamplePhaseRunning,
+						Priority:   100,
+					},
+					{
+						Name:       "low-priority",
+						Expression: testExampleNVIDIAOnly,
+						Priority:   10,
+					},
+				},
+			},
+		},
+	}
+
+	pool := &tfv1.GPUPool{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-pool",
+		},
+		Spec: tfv1.GPUPoolSpec{
+			SchedulingConfigTemplate: &schedulingTemplate.Name,
+		},
+	}
+
+	// Create fake client
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(schedulingTemplate, pool).
+		Build()
+
+	// Test CELConfigManager
+	manager := NewCELConfigManager(fakeClient)
+	celFilters, err := manager.GetCELFiltersForPool(context.Background(), pool.Name)
+	require.NoError(t, err)
+	require.Len(t, celFilters, 2)
+
+	// Verify filters are sorted by priority (high to low)
+	filterNames := make([]string, len(celFilters))
+	for i, filter := range celFilters {
+		filterNames[i] = filter.Name()
+	}
+	require.Equal(t, []string{"high-priority", "low-priority"}, filterNames)
+}
+
+func TestCELConfigManager_GetCELFiltersFromTemplate(t *testing.T) {
+	// Create test scheme
+	scheme := runtime.NewScheme()
+	err := tfv1.AddToScheme(scheme)
+	require.NoError(t, err)
+
+	// Create test template
+	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "direct-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			Placement: tfv1.PlacementConfig{
+				CELFilters: []tfv1.CELFilterConfig{
+					{
+						Name:       "simple-filter",
+						Expression: testExampleMinTFlops,
+						Priority:   50,
+					},
+				},
+			},
+		},
+	}
+
+	// Create fake client
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(schedulingTemplate).
+		Build()
+
+	// Test direct template access
+	manager := NewCELConfigManager(fakeClient)
+	celFilters, err := manager.GetCELFiltersFromTemplate(context.Background(), schedulingTemplate.Name)
+	require.NoError(t, err)
+	require.Len(t, celFilters, 1)
+	require.Equal(t, "simple-filter", celFilters[0].Name())
+}
+
+func TestCELConfigManager_CreateCELFiltersFromConfig(t *testing.T) {
+	manager := NewCELConfigManager(nil) // No client needed for this test
+
+	celConfigs := []tfv1.CELFilterConfig{
+		{
+			Name:       "filter-3",
+			Expression: testExamplePhaseRunning,
+			Priority:   30,
+		},
+		{
+			Name:       "filter-1",
+			Expression: testExampleMinTFlops,
+			Priority:   100,
+		},
+		{
+			Name:       "filter-2",
+			Expression: testExampleSpecificModel,
+			Priority:   50,
+		},
+	}
+
+	celFilters, err := manager.CreateCELFiltersFromConfig(celConfigs)
+	require.NoError(t, err)
+	require.Len(t, celFilters, 3)
+
+	// Verify priority ordering (high to low)
+	expectedOrder := []string{"filter-1", "filter-2", "filter-3"}
+	actualOrder := make([]string, len(celFilters))
+	for i, filter := range celFilters {
+		actualOrder[i] = filter.Name()
+	}
+	require.Equal(t, expectedOrder, actualOrder)
+}
+
+func TestCELConfigManager_ValidateCELConfig(t *testing.T) {
+	manager := NewCELConfigManager(nil)
+
+	tests := []struct {
+		name        string
+		config      tfv1.CELFilterConfig
+		expectError bool
+	}{
+		{
+			name: "valid config",
+			config: tfv1.CELFilterConfig{
+				Name:       "valid",
+				Expression: testExamplePhaseRunning,
+				Priority:   100,
+			},
+			expectError: false,
+		},
+		{
+			name: "invalid expression",
+			config: tfv1.CELFilterConfig{
+				Name:       "invalid",
+				Expression: "gpu.phase ==", // Invalid syntax
+				Priority:   100,
+			},
+			expectError: true,
+		},
+		{
+			name: "complex valid expression",
+			config: tfv1.CELFilterConfig{
+				Name:       "complex",
+				Expression: testExampleComplex,
+				Priority:   100,
+			},
+			expectError: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := manager.ValidateCELConfig(tt.config)
+			if tt.expectError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
+func TestCELConfigManager_NoTemplate(t *testing.T) {
+	// Create test scheme
+	scheme := runtime.NewScheme()
+	err := tfv1.AddToScheme(scheme)
+	require.NoError(t, err)
+
+	// Create pool without SchedulingConfigTemplate
+	pool := &tfv1.GPUPool{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "no-template-pool",
+		},
+		Spec: tfv1.GPUPoolSpec{
+			SchedulingConfigTemplate: nil, // No template specified
+		},
+	}
+
+	// Create fake client
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(pool).
+		Build()
+
+	// Test that no CEL filters are returned
+	manager := NewCELConfigManager(fakeClient)
+	celFilters, err := manager.GetCELFiltersForPool(context.Background(), pool.Name)
+	require.NoError(t, err)
+	require.Len(t, celFilters, 0)
+}
+
+func TestCELConfigManager_EmptyConfig(t *testing.T) {
+	manager := NewCELConfigManager(nil)
+
+	// Test empty config slice
+	celFilters, err := manager.CreateCELFiltersFromConfig([]tfv1.CELFilterConfig{})
+	require.NoError(t, err)
+	require.Len(t, celFilters, 0)
+}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
new file mode 100644
index 00000000..3165a5b6
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -0,0 +1,213 @@
+package cel_filter
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/google/cel-go/cel"
+	"github.com/google/cel-go/common/types"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// CELFilterConfig defines the configuration for CEL-based filtering
+type CELFilterConfig struct {
+	// CEL expression for filtering GPUs
+	Expression string `json:"expression"`
+	// Priority for this filter (higher priority filters run first)
+	Priority int `json:"priority"`
+	// Name for this filter (for debugging/logging)
+	Name string `json:"name"`
+}
+
+// CELFilter implements GPU filtering using CEL expressions
+type CELFilter struct {
+	name       string
+	expression string
+	program    cel.Program
+	env        *cel.Env
+	mu         sync.RWMutex
+}
+
+// NewCELFilter creates a new CEL-based GPU filter
+func NewCELFilter(config CELFilterConfig) (*CELFilter, error) {
+	env, err := createCELEnvironment()
+	if err != nil {
+		return nil, fmt.Errorf("failed to create CEL environment: %w", err)
+	}
+
+	ast, issues := env.Compile(config.Expression)
+	if issues != nil && issues.Err() != nil {
+		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", config.Expression, issues.Err())
+	}
+
+	program, err := env.Program(ast)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create CEL program: %w", err)
+	}
+
+	name := config.Name
+	if name == "" {
+		name = fmt.Sprintf("CELFilter-%d", config.Priority)
+	}
+
+	return &CELFilter{
+		name:       name,
+		expression: config.Expression,
+		program:    program,
+		env:        env,
+	}, nil
+}
+
+// Name returns the name of this filter
+func (f *CELFilter) Name() string {
+	f.mu.RLock()
+	defer f.mu.RUnlock()
+	return f.name
+}
+
+// Filter applies the CEL expression to filter GPUs
+func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []tfv1.GPU) ([]tfv1.GPU, error) {
+	log := log.FromContext(ctx)
+	if len(gpus) == 0 {
+		return gpus, nil
+	}
+
+	f.mu.RLock()
+	program := f.program
+	expression := f.expression
+	f.mu.RUnlock()
+
+	var filteredGPUs []tfv1.GPU
+
+	for _, gpu := range gpus {
+		// Create variables for CEL evaluation
+		vars := createCELVariables(gpu, workerPodKey)
+
+		// Evaluate the CEL expression
+		result, _, err := program.Eval(vars)
+		if err != nil {
+			log.Error(err, "CEL expression evaluation failed",
+				"expression", expression,
+				"gpu", gpu.Name,
+				"workerPodKey", workerPodKey)
+			// On error, exclude the GPU (fail-safe)
+			continue
+		}
+
+		// Convert result to boolean
+		if boolResult, ok := result.(types.Bool); ok {
+			if bool(boolResult) {
+				filteredGPUs = append(filteredGPUs, gpu)
+			}
+		} else {
+			log.Error(nil, "CEL expression did not return boolean",
+				"expression", expression,
+				"result", result,
+				"gpu", gpu.Name)
+			// On non-boolean result, exclude the GPU (fail-safe)
+			continue
+		}
+	}
+
+	log.V(1).Info("CEL filter applied",
+		"filter", f.name,
+		"expression", expression,
+		"inputGPUs", len(gpus),
+		"outputGPUs", len(filteredGPUs))
+
+	return filteredGPUs, nil
+}
+
+// UpdateExpression updates the CEL expression (thread-safe)
+func (f *CELFilter) UpdateExpression(newExpression string) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+
+	ast, issues := f.env.Compile(newExpression)
+	if issues != nil && issues.Err() != nil {
+		return fmt.Errorf("failed to compile new CEL expression %q: %w", newExpression, issues.Err())
+	}
+
+	program, err := f.env.Program(ast)
+	if err != nil {
+		return fmt.Errorf("failed to create new CEL program: %w", err)
+	}
+
+	f.expression = newExpression
+	f.program = program
+	return nil
+}
+
+// createCELEnvironment creates a CEL environment with GPU-related variables and functions
+func createCELEnvironment() (*cel.Env, error) {
+	return cel.NewEnv(
+		// Define GPU object structure
+		cel.Variable(CELVarGPU, cel.MapType(cel.StringType, cel.DynType)),
+		// Define worker pod key
+		cel.Variable(CELVarWorkerPodKey, cel.MapType(cel.StringType, cel.StringType)),
+		// Define request information (if needed in future)
+		cel.Variable(CELVarRequest, cel.MapType(cel.StringType, cel.DynType)),
+	)
+}
+
+// createCELVariables creates variables for CEL evaluation from GPU and request information
+func createCELVariables(gpu tfv1.GPU, workerPodKey tfv1.NameNamespace) map[string]interface{} {
+	// Convert GPU to a map for CEL evaluation
+	gpuMap := map[string]interface{}{
+		GPUFieldName:        gpu.Name,
+		GPUFieldNamespace:   gpu.Namespace,
+		GPUFieldGPUModel:    gpu.Status.GPUModel,
+		GPUFieldUUID:        gpu.Status.UUID,
+		GPUFieldPhase:       string(gpu.Status.Phase),
+		GPUFieldUsedBy:      string(gpu.Status.UsedBy),
+		GPUFieldMessage:     gpu.Status.Message,
+		GPUFieldLabels:      gpu.Labels,
+		GPUFieldAnnotations: gpu.Annotations,
+	}
+
+	// Add capacity information if available
+	if gpu.Status.Capacity != nil {
+		gpuMap[GPUFieldCapacity] = map[string]interface{}{
+			ResourceFieldTFlops: gpu.Status.Capacity.Tflops.AsApproximateFloat64(),
+			ResourceFieldVRAM:   gpu.Status.Capacity.Vram.AsApproximateFloat64(),
+		}
+	}
+
+	// Add available information if available
+	if gpu.Status.Available != nil {
+		gpuMap[GPUFieldAvailable] = map[string]interface{}{
+			ResourceFieldTFlops: gpu.Status.Available.Tflops.AsApproximateFloat64(),
+			ResourceFieldVRAM:   gpu.Status.Available.Vram.AsApproximateFloat64(),
+		}
+	}
+
+	// Add node selector information
+	if gpu.Status.NodeSelector != nil {
+		gpuMap[GPUFieldNodeSelector] = gpu.Status.NodeSelector
+	}
+
+	// Add running apps information (always set, even if empty)
+	runningApps := make([]map[string]interface{}, len(gpu.Status.RunningApps))
+	for i, app := range gpu.Status.RunningApps {
+		runningApps[i] = map[string]interface{}{
+			AppFieldName:      app.Name,
+			AppFieldNamespace: app.Namespace,
+			AppFieldCount:     app.Count,
+		}
+	}
+	gpuMap[GPUFieldRunningApps] = runningApps
+
+	// Worker pod key information
+	workerPodKeyMap := map[string]string{
+		PodKeyFieldName:      workerPodKey.Name,
+		PodKeyFieldNamespace: workerPodKey.Namespace,
+	}
+
+	return map[string]interface{}{
+		CELVarGPU:          gpuMap,
+		CELVarWorkerPodKey: workerPodKeyMap,
+		CELVarRequest:      map[string]interface{}{}, // Placeholder for future request info
+	}
+}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
new file mode 100644
index 00000000..ba2cc539
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
@@ -0,0 +1,347 @@
+package cel_filter
+
+import (
+	"context"
+	"testing"
+
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/stretchr/testify/require"
+)
+
+// Test constants for CEL expressions
+const (
+	// Phase expressions
+	ExamplePhaseRunning = `gpu.phase == 'Running'`
+	ExamplePhasePending = `gpu.phase == 'Pending'`
+
+	// Resource expressions
+	ExampleMinTFlops     = `gpu.available.tflops >= 0.5`
+	ExampleMinVRAM       = `gpu.available.vram >= 4294967296` // 4GB in bytes
+	ExampleResourceRatio = `gpu.available.tflops > gpu.capacity.tflops * 0.5`
+
+	// Model expressions
+	ExampleNVIDIAOnly    = `gpu.gpuModel.startsWith('NVIDIA')`
+	ExampleSpecificModel = `gpu.gpuModel.contains('A100')`
+
+	// Label expressions
+	ExampleHasLabel   = `'gpu-tier' in gpu.labels`
+	ExampleLabelValue = `gpu.labels != null && 'gpu-tier' in gpu.labels && gpu.labels['gpu-tier'] == 'premium'`
+
+	// Load balancing expressions
+	ExampleLowLoad = `size(gpu.runningApps) < 3`
+	ExampleNoApps  = `size(gpu.runningApps) == 0`
+
+	// Complex expressions
+	ExampleComplex = `gpu.phase == 'Running' && gpu.available.tflops > 0.5 && size(gpu.runningApps) < 2`
+)
+
+func TestNewCELFilter(t *testing.T) {
+	tests := []struct {
+		name        string
+		config      CELFilterConfig
+		expectError bool
+	}{
+		{
+			name: "valid basic expression",
+			config: CELFilterConfig{
+				Name:       "basic-test",
+				Expression: ExamplePhaseRunning,
+				Priority:   100,
+			},
+			expectError: false,
+		},
+		{
+			name: "valid resource expression",
+			config: CELFilterConfig{
+				Name:       "resource-test",
+				Expression: ExampleMinTFlops,
+				Priority:   50,
+			},
+			expectError: false,
+		},
+		{
+			name: "invalid expression syntax",
+			config: CELFilterConfig{
+				Name:       "invalid-test",
+				Expression: "gpu.phase ==", // Invalid syntax
+				Priority:   10,
+			},
+			expectError: true,
+		},
+		{
+			name: "expression with labels",
+			config: CELFilterConfig{
+				Name:       "label-test",
+				Expression: ExampleHasLabel,
+				Priority:   75,
+			},
+			expectError: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			filter, err := NewCELFilter(tt.config)
+			if tt.expectError {
+				require.Error(t, err)
+				require.Nil(t, filter)
+			} else {
+				require.NoError(t, err)
+				require.NotNil(t, filter)
+				require.Equal(t, tt.config.Name, filter.Name())
+			}
+		})
+	}
+}
+
+func TestCELFilter_Filter(t *testing.T) {
+	// Create test GPUs
+	gpus := []tfv1.GPU{
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "gpu-1",
+				Namespace: "default",
+				Labels: map[string]string{
+					"gpu-tier": "premium",
+				},
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhaseRunning,
+				GPUModel: "NVIDIA A100",
+				UUID:     "gpu-1-uuid",
+				Capacity: &tfv1.Resource{
+					Tflops: resource.MustParse("1.5"),
+					Vram:   resource.MustParse("80Gi"),
+				},
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("1.0"),
+					Vram:   resource.MustParse("60Gi"),
+				},
+				RunningApps: []*tfv1.RunningAppDetail{
+					{
+						Name:      "app-1",
+						Namespace: "default",
+						Count:     1,
+					},
+				},
+			},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "gpu-2",
+				Namespace: "default",
+				Labels: map[string]string{
+					"gpu-tier": "basic",
+				},
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhaseRunning,
+				GPUModel: "NVIDIA RTX 4090",
+				UUID:     "gpu-2-uuid",
+				Capacity: &tfv1.Resource{
+					Tflops: resource.MustParse("0.8"),
+					Vram:   resource.MustParse("24Gi"),
+				},
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("0.2"),
+					Vram:   resource.MustParse("8Gi"),
+				},
+				RunningApps: []*tfv1.RunningAppDetail{
+					{
+						Name:      "app-2",
+						Namespace: "default",
+						Count:     1,
+					},
+					{
+						Name:      "app-3",
+						Namespace: "default",
+						Count:     2,
+					},
+				},
+			},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "gpu-3",
+				Namespace: "default",
+			},
+			Status: tfv1.GPUStatus{
+				Phase:    tfv1.TensorFusionGPUPhasePending,
+				GPUModel: "NVIDIA A100",
+				UUID:     "gpu-3-uuid",
+				Capacity: &tfv1.Resource{
+					Tflops: resource.MustParse("1.5"),
+					Vram:   resource.MustParse("80Gi"),
+				},
+				Available: &tfv1.Resource{
+					Tflops: resource.MustParse("1.5"),
+					Vram:   resource.MustParse("80Gi"),
+				},
+			},
+		},
+	}
+
+	workerPodKey := tfv1.NameNamespace{
+		Name:      "test-pod",
+		Namespace: "default",
+	}
+
+	tests := []struct {
+		name         string
+		expression   string
+		expectedGPUs []string // GPU names that should pass the filter
+		expectError  bool
+	}{
+		{
+			name:         "filter by phase",
+			expression:   ExamplePhaseRunning,
+			expectedGPUs: []string{"gpu-1", "gpu-2"},
+		},
+		{
+			name:         "filter by available resources",
+			expression:   ExampleMinTFlops,
+			expectedGPUs: []string{"gpu-1", "gpu-3"},
+		},
+		{
+			name:         "filter by GPU model",
+			expression:   "gpu.gpuModel.startsWith('NVIDIA A100')",
+			expectedGPUs: []string{"gpu-1", "gpu-3"},
+		},
+		{
+			name:         "filter by labels",
+			expression:   ExampleLabelValue,
+			expectedGPUs: []string{"gpu-1"},
+		},
+		{
+			name:         "filter by running apps count",
+			expression:   ExampleLowLoad,
+			expectedGPUs: []string{"gpu-1", "gpu-2", "gpu-3"},
+		},
+		{
+			name:         "complex filter",
+			expression:   ExampleComplex,
+			expectedGPUs: []string{"gpu-1"},
+		},
+		{
+			name:         "filter none",
+			expression:   "false",
+			expectedGPUs: []string{},
+		},
+		{
+			name:         "filter all",
+			expression:   "true",
+			expectedGPUs: []string{"gpu-1", "gpu-2", "gpu-3"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			filter, err := NewCELFilter(CELFilterConfig{
+				Name:       tt.name,
+				Expression: tt.expression,
+				Priority:   100,
+			})
+			require.NoError(t, err)
+
+			filteredGPUs, err := filter.Filter(context.Background(), workerPodKey, gpus)
+			if tt.expectError {
+				require.Error(t, err)
+				return
+			}
+
+			require.NoError(t, err)
+			require.Len(t, filteredGPUs, len(tt.expectedGPUs))
+
+			// Check that the correct GPUs were filtered
+			actualNames := make([]string, len(filteredGPUs))
+			for i, gpu := range filteredGPUs {
+				actualNames[i] = gpu.Name
+			}
+
+			require.ElementsMatch(t, tt.expectedGPUs, actualNames)
+		})
+	}
+}
+
+func TestCELFilter_UpdateExpression(t *testing.T) {
+	// Create initial filter
+	filter, err := NewCELFilter(CELFilterConfig{
+		Name:       "update-test",
+		Expression: ExamplePhaseRunning,
+		Priority:   100,
+	})
+	require.NoError(t, err)
+
+	// Test valid update
+	err = filter.UpdateExpression(ExamplePhasePending)
+	require.NoError(t, err)
+
+	// Test invalid update
+	err = filter.UpdateExpression("gpu.phase ==")
+	require.Error(t, err)
+}
+
+func TestCELFilter_ThreadSafety(t *testing.T) {
+	filter, err := NewCELFilter(CELFilterConfig{
+		Name:       "thread-safety-test",
+		Expression: ExamplePhaseRunning,
+		Priority:   100,
+	})
+	require.NoError(t, err)
+
+	// Create test GPU
+	gpu := tfv1.GPU{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "gpu-1",
+			Namespace: "default",
+		},
+		Status: tfv1.GPUStatus{
+			Phase: tfv1.TensorFusionGPUPhaseRunning,
+		},
+	}
+
+	workerPodKey := tfv1.NameNamespace{
+		Name:      "test-pod",
+		Namespace: "default",
+	}
+
+	// Run concurrent operations
+	done := make(chan bool, 3)
+
+	// Concurrent filtering
+	go func() {
+		defer func() { done <- true }()
+		for i := 0; i < 100; i++ {
+			_, err := filter.Filter(context.Background(), workerPodKey, []tfv1.GPU{gpu})
+			require.NoError(t, err)
+		}
+	}()
+
+	// Concurrent name access
+	go func() {
+		defer func() { done <- true }()
+		for i := 0; i < 100; i++ {
+			name := filter.Name()
+			require.Equal(t, "thread-safety-test", name)
+		}
+	}()
+
+	// Concurrent expression updates
+	go func() {
+		defer func() { done <- true }()
+		for i := 0; i < 10; i++ {
+			err := filter.UpdateExpression(ExamplePhasePending)
+			require.NoError(t, err)
+			err = filter.UpdateExpression(ExamplePhaseRunning)
+			require.NoError(t, err)
+		}
+	}()
+
+	// Wait for all goroutines to complete
+	for i := 0; i < 3; i++ {
+		<-done
+	}
+}
diff --git a/internal/gpuallocator/filter/cel_filter/constants.go b/internal/gpuallocator/filter/cel_filter/constants.go
new file mode 100644
index 00000000..152f643f
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/constants.go
@@ -0,0 +1,44 @@
+package cel_filter
+
+// CEL variable names available in expressions
+const (
+	// Root variables
+	CELVarGPU          = "gpu"
+	CELVarWorkerPodKey = "workerPodKey"
+	CELVarRequest      = "request"
+)
+
+// GPU object field names
+const (
+	// Basic GPU metadata
+	GPUFieldName      = "name"
+	GPUFieldNamespace = "namespace"
+	GPUFieldGPUModel  = "gpuModel"
+	GPUFieldUUID      = "uuid"
+	GPUFieldPhase     = "phase"
+	GPUFieldUsedBy    = "usedBy"
+	GPUFieldMessage   = "message"
+
+	// Kubernetes metadata
+	GPUFieldLabels      = "labels"
+	GPUFieldAnnotations = "annotations"
+
+	// Resource information
+	GPUFieldCapacity     = "capacity"
+	GPUFieldAvailable    = "available"
+	GPUFieldNodeSelector = "nodeSelector"
+	GPUFieldRunningApps  = "runningApps"
+
+	// Resource sub-fields
+	ResourceFieldTFlops = "tflops"
+	ResourceFieldVRAM   = "vram"
+
+	// Running app sub-fields
+	AppFieldName      = "name"
+	AppFieldNamespace = "namespace"
+	AppFieldCount     = "count"
+
+	// WorkerPodKey fields
+	PodKeyFieldName      = "name"
+	PodKeyFieldNamespace = "namespace"
+)
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index b4fbbc2a..e1d87bae 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -18,6 +18,7 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
+	cel_filter "github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter/cel_filter"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	"github.com/NexusGPU/tensor-fusion/internal/quota"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
@@ -173,6 +174,17 @@ func (s *GpuAllocator) Filter(
 		filterRegistry = filterRegistry.With(filter.NewNodeAffinityFilter(s.Client, req.NodeAffinity))
 	}
 
+	// Add CEL filters from SchedulingConfigTemplate if available
+	celConfigManager := cel_filter.NewCELConfigManager(s.Client)
+	celFilters, err := celConfigManager.GetCELFiltersForPool(s.ctx, req.PoolName)
+	if err != nil {
+		return nil, nil, fmt.Errorf("get CEL filters: %w", err)
+	}
+	if len(celFilters) > 0 {
+		celFilterAdapters := cel_filter.CreateCELFilterAdapters(celFilters)
+		filterRegistry = filterRegistry.With(celFilterAdapters...)
+	}
+
 	// Apply the filters in sequence
 	filteredGPUs, filterDetails, err := filterRegistry.Apply(s.ctx, req.WorkloadNameNamespace, toFilterGPUs, isSimulateSchedule)
 	if err != nil {

From 7be8e25398ffd388bbadb8fb836ef982325e4421 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sat, 30 Aug 2025 08:36:39 -0700
Subject: [PATCH 02/34] covert  allocator request to cel filter

---
 api/v1/schedulingconfigtemplate_types.go      |  32 --
 api/v1/zz_generated.deepcopy.go               |  20 -
 ...r-fusion.ai_schedulingconfigtemplates.yaml |  35 --
 ...r-fusion.ai_schedulingconfigtemplates.yaml |  35 --
 internal/config/global_config.go              |   4 +
 internal/gpuallocator/cel_integration_test.go | 260 -------------
 .../gpuallocator/filter/cel_filter/adapter.go |  39 --
 .../filter/cel_filter/alloc_request_filter.go | 164 +++++++++
 .../filter/cel_filter/cel_config.go           |  90 -----
 .../filter/cel_filter/cel_config_test.go      | 246 -------------
 .../filter/cel_filter/cel_filter.go           |  46 +--
 .../filter/cel_filter/cel_filter_test.go      | 347 ------------------
 .../filter/cel_filter/constants.go            |   9 +
 .../filter/cel_filter/expression_cache.go     | 191 ++++++++++
 internal/gpuallocator/gpuallocator.go         |  75 +++-
 15 files changed, 434 insertions(+), 1159 deletions(-)
 delete mode 100644 internal/gpuallocator/cel_integration_test.go
 delete mode 100644 internal/gpuallocator/filter/cel_filter/adapter.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/alloc_request_filter.go
 delete mode 100644 internal/gpuallocator/filter/cel_filter/cel_config.go
 delete mode 100644 internal/gpuallocator/filter/cel_filter/cel_config_test.go
 delete mode 100644 internal/gpuallocator/filter/cel_filter/cel_filter_test.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/expression_cache.go

diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go
index 80ef55e6..44f07bef 100644
--- a/api/v1/schedulingconfigtemplate_types.go
+++ b/api/v1/schedulingconfigtemplate_types.go
@@ -51,10 +51,6 @@ type PlacementConfig struct {
 
 	// +optional
 	GPUFilters []GPUFilter `json:"gpuFilters,omitempty"`
-
-	// CEL-based GPU filters for advanced filtering logic
-	// +optional
-	CELFilters []CELFilterConfig `json:"celFilters,omitempty"`
 }
 
 // +kubebuilder:validation:Enum=CompactFirst;LowLoadFirst
@@ -89,34 +85,6 @@ type GPUFilter struct {
 	Params runtime.RawExtension `json:"params,omitempty"`
 }
 
-// CELFilterConfig defines the configuration for CEL-based filtering
-//
-// example:
-// ```yaml
-//   - name: "avoid-overloaded-gpus"
-//     expression: "gpu.available.tflops > 0.5 && size(gpu.runningApps) < 3"
-//     priority: 100
-//   - name: "prefer-specific-model"
-//     expression: "gpu.gpuModel.startsWith('NVIDIA') && gpu.labels.has('gpu-tier') && gpu.labels['gpu-tier'] == 'premium'"
-//     priority: 50
-//
-// ```
-type CELFilterConfig struct {
-	// Name for this filter (for debugging/logging)
-	// +optional
-	Name string `json:"name,omitempty"`
-
-	// CEL expression for filtering GPUs
-	// The expression should return a boolean value
-	// Available variables: gpu, workerPodKey, request
-	Expression string `json:"expression"`
-
-	// Priority for this filter (higher priority filters run first)
-	// +kubebuilder:default=0
-	// +optional
-	Priority int `json:"priority,omitempty"`
-}
-
 type AutoScalingConfig struct {
 	// layer 1 vertical auto-scaling, turbo burst to existing GPU cards quickly
 	// VPA-like, aggregate metrics data <1m
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 27f8e8fd..9be4f47c 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -184,21 +184,6 @@ func (in *AutoSetRequests) DeepCopy() *AutoSetRequests {
 	return out
 }
 
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *CELFilterConfig) DeepCopyInto(out *CELFilterConfig) {
-	*out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CELFilterConfig.
-func (in *CELFilterConfig) DeepCopy() *CELFilterConfig {
-	if in == nil {
-		return nil
-	}
-	out := new(CELFilterConfig)
-	in.DeepCopyInto(out)
-	return out
-}
-
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *CapacityConfig) DeepCopyInto(out *CapacityConfig) {
 	*out = *in
@@ -1696,11 +1681,6 @@ func (in *PlacementConfig) DeepCopyInto(out *PlacementConfig) {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
-	if in.CELFilters != nil {
-		in, out := &in.CELFilters, &out.CELFilters
-		*out = make([]CELFilterConfig, len(*in))
-		copy(*out, *in)
-	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementConfig.
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
index f7aeb8fa..91a01eae 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -203,41 +203,6 @@ spec:
                   allowUsingLocalGPU:
                     default: true
                     type: boolean
-                  celFilters:
-                    description: CEL-based GPU filters for advanced filtering logic
-                    items:
-                      description: |-
-                        CELFilterConfig defines the configuration for CEL-based filtering
-
-                        example:
-                        ```yaml
-                          - name: "avoid-overloaded-gpus"
-                            expression: "gpu.available.tflops > 0.5 && size(gpu.runningApps) < 3"
-                            priority: 100
-                          - name: "prefer-specific-model"
-                            expression: "gpu.gpuModel.startsWith('NVIDIA') && gpu.labels.has('gpu-tier') && gpu.labels['gpu-tier'] == 'premium'"
-                            priority: 50
-
-                        ```
-                      properties:
-                        expression:
-                          description: |-
-                            CEL expression for filtering GPUs
-                            The expression should return a boolean value
-                            Available variables: gpu, workerPodKey, request
-                          type: string
-                        name:
-                          description: Name for this filter (for debugging/logging)
-                          type: string
-                        priority:
-                          default: 0
-                          description: Priority for this filter (higher priority filters
-                            run first)
-                          type: integer
-                      required:
-                      - expression
-                      type: object
-                    type: array
                   gpuFilters:
                     items:
                       description: "GPUFilter is to select eligible GPUs for scheduling.\n\nexample:\n```yaml\n-
diff --git a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
index f7aeb8fa..91a01eae 100644
--- a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -203,41 +203,6 @@ spec:
                   allowUsingLocalGPU:
                     default: true
                     type: boolean
-                  celFilters:
-                    description: CEL-based GPU filters for advanced filtering logic
-                    items:
-                      description: |-
-                        CELFilterConfig defines the configuration for CEL-based filtering
-
-                        example:
-                        ```yaml
-                          - name: "avoid-overloaded-gpus"
-                            expression: "gpu.available.tflops > 0.5 && size(gpu.runningApps) < 3"
-                            priority: 100
-                          - name: "prefer-specific-model"
-                            expression: "gpu.gpuModel.startsWith('NVIDIA') && gpu.labels.has('gpu-tier') && gpu.labels['gpu-tier'] == 'premium'"
-                            priority: 50
-
-                        ```
-                      properties:
-                        expression:
-                          description: |-
-                            CEL expression for filtering GPUs
-                            The expression should return a boolean value
-                            Available variables: gpu, workerPodKey, request
-                          type: string
-                        name:
-                          description: Name for this filter (for debugging/logging)
-                          type: string
-                        priority:
-                          default: 0
-                          description: Priority for this filter (higher priority filters
-                            run first)
-                          type: integer
-                      required:
-                      - expression
-                      type: object
-                    type: array
                   gpuFilters:
                     items:
                       description: "GPUFilter is to select eligible GPUs for scheduling.\n\nexample:\n```yaml\n-
diff --git a/internal/config/global_config.go b/internal/config/global_config.go
index 75bddc22..0632c284 100644
--- a/internal/config/global_config.go
+++ b/internal/config/global_config.go
@@ -8,6 +8,9 @@ type GlobalConfig struct {
 	MetricsExtraPodLabels map[string]string `yaml:"metricsExtraPodLabels"`
 
 	AlertRules []AlertRule `yaml:"alertRules"`
+
+	// EnableCELFilter enables CEL-based filtering (default: false for rollback support)
+	EnableCELFilter bool `yaml:"enableCELFilter"`
 }
 
 var globalConfig *GlobalConfig
@@ -41,6 +44,7 @@ func MockGlobalConfig() *GlobalConfig {
 		MetricsTTL:            "30d",
 		MetricsFormat:         "influx",
 		MetricsExtraPodLabels: map[string]string{"kubernetes.io/app": "app"},
+		EnableCELFilter:       false, // Default to legacy filter for rollback support
 		AlertRules: []AlertRule{
 			{
 				Name:               "mock",
diff --git a/internal/gpuallocator/cel_integration_test.go b/internal/gpuallocator/cel_integration_test.go
deleted file mode 100644
index 7913c116..00000000
--- a/internal/gpuallocator/cel_integration_test.go
+++ /dev/null
@@ -1,260 +0,0 @@
-package gpuallocator
-
-import (
-	"context"
-	"testing"
-
-	"k8s.io/apimachinery/pkg/api/resource"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-
-	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
-	cel_filter "github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter/cel_filter"
-	"github.com/stretchr/testify/require"
-)
-
-func TestGpuAllocator_CELFilters_Integration(t *testing.T) {
-	// Create test scheme
-	scheme := runtime.NewScheme()
-	err := tfv1.AddToScheme(scheme)
-	require.NoError(t, err)
-
-	// Create test resources
-	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "test-template",
-		},
-		Spec: tfv1.SchedulingConfigTemplateSpec{
-			Placement: tfv1.PlacementConfig{
-				Mode: tfv1.PlacementModeCompactFirst,
-				CELFilters: []tfv1.CELFilterConfig{
-					{
-						Name:       "running-gpus-only",
-						Expression: "gpu.phase == 'Running'",
-						Priority:   100,
-					},
-					{
-						Name:       "sufficient-tflops",
-						Expression: "gpu.available.tflops >= 0.5",
-						Priority:   90,
-					},
-					{
-						Name:       "nvidia-gpus-only",
-						Expression: "gpu.gpuModel.contains('NVIDIA')",
-						Priority:   80,
-					},
-				},
-			},
-		},
-	}
-
-	pool := &tfv1.GPUPool{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "test-pool",
-		},
-		Spec: tfv1.GPUPoolSpec{
-			SchedulingConfigTemplate: &schedulingTemplate.Name,
-		},
-	}
-
-	// Create test GPUs
-	gpus := []tfv1.GPU{
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "gpu-1-pass-all",
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhaseRunning,
-				GPUModel: "NVIDIA A100",
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("1.0"),
-					Vram:   resource.MustParse("60Gi"),
-				},
-			},
-		},
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "gpu-2-fail-phase",
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhasePending,
-				GPUModel: "NVIDIA A100",
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("1.0"),
-					Vram:   resource.MustParse("60Gi"),
-				},
-			},
-		},
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "gpu-3-fail-tflops",
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhaseRunning,
-				GPUModel: "NVIDIA A100",
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("0.3"),
-					Vram:   resource.MustParse("60Gi"),
-				},
-			},
-		},
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "gpu-4-fail-model",
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhaseRunning,
-				GPUModel: "AMD Radeon RX 7900 XTX",
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("1.0"),
-					Vram:   resource.MustParse("24Gi"),
-				},
-			},
-		},
-	}
-
-	// Create fake client
-	fakeClient := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(schedulingTemplate, pool).
-		Build()
-
-	// Test CEL filters using CELConfigManager
-	celConfigManager := cel_filter.NewCELConfigManager(fakeClient)
-	celFilters, err := celConfigManager.GetCELFiltersForPool(context.Background(), pool.Name)
-	require.NoError(t, err)
-	require.Len(t, celFilters, 3)
-
-	// Test filtering with CEL filters
-	celFilterAdapters := cel_filter.CreateCELFilterAdapters(celFilters)
-	filterRegistry := filter.NewFilterRegistry().With(celFilterAdapters...)
-
-	filteredGPUs, _, err := filterRegistry.Apply(
-		context.Background(),
-		tfv1.NameNamespace{Name: "test-pod", Namespace: "default"},
-		gpus,
-		false,
-	)
-	require.NoError(t, err)
-
-	// Only gpu-1 should pass all filters
-	require.Len(t, filteredGPUs, 1)
-	require.Equal(t, "gpu-1-pass-all", filteredGPUs[0].Name)
-}
-
-func TestGpuAllocator_CELFilters_ErrorHandling(t *testing.T) {
-	// Create test scheme
-	scheme := runtime.NewScheme()
-	err := tfv1.AddToScheme(scheme)
-	require.NoError(t, err)
-
-	// Create scheduling template with invalid CEL expression
-	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "invalid-template",
-		},
-		Spec: tfv1.SchedulingConfigTemplateSpec{
-			Placement: tfv1.PlacementConfig{
-				Mode: tfv1.PlacementModeCompactFirst,
-				CELFilters: []tfv1.CELFilterConfig{
-					{
-						Name:       "invalid-expression",
-						Expression: "gpu.phase ==", // Invalid syntax
-						Priority:   100,
-					},
-				},
-			},
-		},
-	}
-
-	pool := &tfv1.GPUPool{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "test-pool",
-		},
-		Spec: tfv1.GPUPoolSpec{
-			SchedulingConfigTemplate: &schedulingTemplate.Name,
-		},
-	}
-
-	// Create fake client
-	fakeClient := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(schedulingTemplate, pool).
-		Build()
-
-	// Test that invalid CEL expression results in error
-	celConfigManager := cel_filter.NewCELConfigManager(fakeClient)
-	_, err = celConfigManager.GetCELFiltersForPool(context.Background(), pool.Name)
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "create CEL filter")
-}
-
-func TestGpuAllocator_CELFilters_Priority_Ordering(t *testing.T) {
-	// Create test scheme
-	scheme := runtime.NewScheme()
-	err := tfv1.AddToScheme(scheme)
-	require.NoError(t, err)
-
-	// Create scheduling template with multiple CEL filters with different priorities
-	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "priority-template",
-		},
-		Spec: tfv1.SchedulingConfigTemplateSpec{
-			Placement: tfv1.PlacementConfig{
-				Mode: tfv1.PlacementModeCompactFirst,
-				CELFilters: []tfv1.CELFilterConfig{
-					{
-						Name:       "low-priority",
-						Expression: "gpu.name.contains('gpu')",
-						Priority:   10,
-					},
-					{
-						Name:       "high-priority",
-						Expression: "gpu.phase == 'Running'",
-						Priority:   100,
-					},
-					{
-						Name:       "medium-priority",
-						Expression: "gpu.gpuModel.contains('NVIDIA')",
-						Priority:   50,
-					},
-				},
-			},
-		},
-	}
-
-	pool := &tfv1.GPUPool{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "test-pool",
-		},
-		Spec: tfv1.GPUPoolSpec{
-			SchedulingConfigTemplate: &schedulingTemplate.Name,
-		},
-	}
-
-	// Create fake client
-	fakeClient := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(schedulingTemplate, pool).
-		Build()
-
-	// Test that CEL filters are sorted by priority
-	celConfigManager := cel_filter.NewCELConfigManager(fakeClient)
-	celFilters, err := celConfigManager.GetCELFiltersForPool(context.Background(), pool.Name)
-	require.NoError(t, err)
-	require.Len(t, celFilters, 3)
-
-	// Check that filters are ordered by priority (high to low)
-	// Note: We can't easily check the internal order without exposing more internals,
-	// but we can verify that all filters are created successfully
-	filterNames := make([]string, len(celFilters))
-	for i, filter := range celFilters {
-		filterNames[i] = filter.Name()
-	}
-
-	expectedFilters := []string{"high-priority", "medium-priority", "low-priority"}
-	require.ElementsMatch(t, expectedFilters, filterNames)
-}
diff --git a/internal/gpuallocator/filter/cel_filter/adapter.go b/internal/gpuallocator/filter/cel_filter/adapter.go
deleted file mode 100644
index 2d3877f3..00000000
--- a/internal/gpuallocator/filter/cel_filter/adapter.go
+++ /dev/null
@@ -1,39 +0,0 @@
-package cel_filter
-
-import (
-	"context"
-
-	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
-)
-
-// CELFilterAdapter adapts CELFilter to implement filter.GPUFilter interface
-type CELFilterAdapter struct {
-	celFilter *CELFilter
-}
-
-// NewCELFilterAdapter creates a new adapter for CELFilter
-func NewCELFilterAdapter(celFilter *CELFilter) filter.GPUFilter {
-	return &CELFilterAdapter{
-		celFilter: celFilter,
-	}
-}
-
-// Filter implements the filter.GPUFilter interface
-func (a *CELFilterAdapter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []tfv1.GPU) ([]tfv1.GPU, error) {
-	return a.celFilter.Filter(ctx, workerPodKey, gpus)
-}
-
-// Name implements the filter.GPUFilter interface
-func (a *CELFilterAdapter) Name() string {
-	return a.celFilter.Name()
-}
-
-// CreateCELFilterAdapters creates filter.GPUFilter adapters from CELFilter instances
-func CreateCELFilterAdapters(celFilters []*CELFilter) []filter.GPUFilter {
-	adapters := make([]filter.GPUFilter, len(celFilters))
-	for i, celFilter := range celFilters {
-		adapters[i] = NewCELFilterAdapter(celFilter)
-	}
-	return adapters
-}
diff --git a/internal/gpuallocator/filter/cel_filter/alloc_request_filter.go b/internal/gpuallocator/filter/cel_filter/alloc_request_filter.go
new file mode 100644
index 00000000..bd3e06de
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/alloc_request_filter.go
@@ -0,0 +1,164 @@
+package cel_filter
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/google/cel-go/common/types"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// AllocRequestCELFilter converts AllocRequest to CEL filter and executes it
+type AllocRequestCELFilter struct {
+	cache      *ExpressionCache
+	expression string
+	name       string
+}
+
+// NewAllocRequestCELFilter creates a new CEL filter from allocation request
+func NewAllocRequestCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*AllocRequestCELFilter, error) {
+	// Convert AllocRequest to CEL expression
+	expression, err := convertAllocRequestToCEL(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to convert AllocRequest to CEL: %w", err)
+	}
+
+	return &AllocRequestCELFilter{
+		cache:      cache,
+		expression: expression,
+		name:       fmt.Sprintf("AllocRequest-%s", req.WorkloadNameNamespace.String()),
+	}, nil
+}
+
+// Name returns the filter name
+func (f *AllocRequestCELFilter) Name() string {
+	return f.name
+}
+
+// Filter applies the CEL expression derived from AllocRequest to filter GPUs
+func (f *AllocRequestCELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) {
+	log := log.FromContext(ctx)
+	if len(gpus) == 0 {
+		return gpus, nil
+	}
+
+	if f.expression == "" {
+		// If no expression, return all GPUs (no filtering needed)
+		return gpus, nil
+	}
+
+	// Get compiled program from cache
+	program, err := f.cache.GetOrCompileProgram(f.expression)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.expression, err)
+	}
+
+	var filteredGPUs []*tfv1.GPU
+
+	for _, gpu := range gpus {
+		// Create timeout context for CEL evaluation
+		evalCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
+
+		// Create variables for CEL evaluation
+		vars := createCELVariables(*gpu, workerPodKey)
+
+		// Evaluate with timeout
+		resultChan := make(chan evalResult, 1)
+		go func() {
+			result, _, evalErr := program.Eval(vars)
+			resultChan <- evalResult{result: result, err: evalErr}
+		}()
+
+		select {
+		case evalRes := <-resultChan:
+			cancel()
+			if evalRes.err != nil {
+				log.Error(evalRes.err, "CEL expression evaluation failed",
+					"expression", f.expression,
+					"gpu", gpu.Name,
+					"workerPodKey", workerPodKey)
+				// On error, exclude the GPU (fail-safe)
+				continue
+			}
+
+			// Convert result to boolean
+			if boolResult, ok := evalRes.result.(types.Bool); ok {
+				if bool(boolResult) {
+					filteredGPUs = append(filteredGPUs, gpu)
+				}
+			} else {
+				log.Error(nil, "CEL expression did not return boolean",
+					"expression", f.expression,
+					"result", evalRes.result,
+					"gpu", gpu.Name)
+				// On non-boolean result, exclude the GPU (fail-safe)
+				continue
+			}
+		case <-evalCtx.Done():
+			cancel()
+			// Timeout - skip this GPU (fail-safe behavior)
+			log.V(1).Info("CEL evaluation timeout", "gpu", gpu.Name, "expression", f.expression)
+			continue
+		}
+	}
+
+	log.V(1).Info("AllocRequest CEL filter applied",
+		"filter", f.name,
+		"expression", f.expression,
+		"inputGPUs", len(gpus),
+		"outputGPUs", len(filteredGPUs))
+
+	return filteredGPUs, nil
+}
+
+type evalResult struct {
+	result interface{}
+	err    error
+}
+
+// convertAllocRequestToCEL converts an allocation request to a CEL expression
+func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
+	if req == nil {
+		return "", nil
+	}
+
+	var conditions []string
+
+	// Add GPU phase condition (must be Ready)
+	conditions = append(conditions, "gpu.phase == 'Ready'")
+
+	// Add resource requirements
+	if req.Request.Tflops.Sign() > 0 {
+		tflopsValue := req.Request.Tflops.AsApproximateFloat64()
+		conditions = append(conditions, fmt.Sprintf("gpu.available.tflops >= %f", tflopsValue))
+	}
+
+	if req.Request.Vram.Sign() > 0 {
+		vramValue := req.Request.Vram.AsApproximateFloat64()
+		conditions = append(conditions, fmt.Sprintf("gpu.available.vram >= %f", vramValue))
+	}
+
+	// Add GPU model filter if specified
+	if req.GPUModel != "" {
+		conditions = append(conditions, fmt.Sprintf("gpu.gpuModel == '%s'", req.GPUModel))
+	}
+
+	// If no conditions, return empty expression (no filtering)
+	if len(conditions) == 0 {
+		return "", nil
+	}
+
+	// Combine all conditions with AND
+	if len(conditions) == 1 {
+		return conditions[0], nil
+	}
+
+	expression := conditions[0]
+	for i := 1; i < len(conditions); i++ {
+		expression += " && " + conditions[i]
+	}
+
+	return expression, nil
+}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_config.go b/internal/gpuallocator/filter/cel_filter/cel_config.go
deleted file mode 100644
index fc3a0f86..00000000
--- a/internal/gpuallocator/filter/cel_filter/cel_config.go
+++ /dev/null
@@ -1,90 +0,0 @@
-package cel_filter
-
-import (
-	"context"
-	"fmt"
-	"sort"
-
-	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-)
-
-// CELConfigManager handles CEL filter configuration retrieval and creation
-type CELConfigManager struct {
-	client client.Client
-}
-
-// NewCELConfigManager creates a new CEL configuration manager
-func NewCELConfigManager(client client.Client) *CELConfigManager {
-	return &CELConfigManager{
-		client: client,
-	}
-}
-
-// GetCELFiltersForPool retrieves CEL filters from SchedulingConfigTemplate for a given pool
-func (m *CELConfigManager) GetCELFiltersForPool(ctx context.Context, poolName string) ([]*CELFilter, error) {
-	// Get pool to find SchedulingConfigTemplate
-	pool := &tfv1.GPUPool{}
-	if err := m.client.Get(ctx, client.ObjectKey{Name: poolName}, pool); err != nil {
-		return nil, fmt.Errorf("get pool %s: %w", poolName, err)
-	}
-
-	// If no SchedulingConfigTemplate is specified, return empty
-	if pool.Spec.SchedulingConfigTemplate == nil {
-		return nil, nil
-	}
-
-	return m.GetCELFiltersFromTemplate(ctx, *pool.Spec.SchedulingConfigTemplate)
-}
-
-// GetCELFiltersFromTemplate retrieves CEL filters directly from a SchedulingConfigTemplate
-func (m *CELConfigManager) GetCELFiltersFromTemplate(ctx context.Context, templateName string) ([]*CELFilter, error) {
-	// Get the SchedulingConfigTemplate
-	schedulingConfigTemplate := &tfv1.SchedulingConfigTemplate{}
-	if err := m.client.Get(ctx, client.ObjectKey{Name: templateName}, schedulingConfigTemplate); err != nil {
-		return nil, fmt.Errorf("get scheduling config template %s: %w", templateName, err)
-	}
-
-	return m.CreateCELFiltersFromConfig(schedulingConfigTemplate.Spec.Placement.CELFilters)
-}
-
-// CreateCELFiltersFromConfig creates CEL filters from configuration slice
-func (m *CELConfigManager) CreateCELFiltersFromConfig(celConfigs []tfv1.CELFilterConfig) ([]*CELFilter, error) {
-	if len(celConfigs) == 0 {
-		return nil, nil
-	}
-
-	// Sort CEL configs by priority (higher priority first)
-	sortedConfigs := make([]tfv1.CELFilterConfig, len(celConfigs))
-	copy(sortedConfigs, celConfigs)
-	sort.Slice(sortedConfigs, func(i, j int) bool {
-		return sortedConfigs[i].Priority > sortedConfigs[j].Priority
-	})
-
-	// Create CEL filters
-	var celFilters []*CELFilter
-	for _, config := range sortedConfigs {
-		celFilter, err := NewCELFilter(CELFilterConfig{
-			Name:       config.Name,
-			Expression: config.Expression,
-			Priority:   config.Priority,
-		})
-		if err != nil {
-			return nil, fmt.Errorf("create CEL filter %q: %w", config.Name, err)
-		}
-		celFilters = append(celFilters, celFilter)
-	}
-
-	return celFilters, nil
-}
-
-// ValidateCELConfig validates a CEL filter configuration
-func (m *CELConfigManager) ValidateCELConfig(config tfv1.CELFilterConfig) error {
-	// Try to create the filter to validate the expression
-	_, err := NewCELFilter(CELFilterConfig{
-		Name:       config.Name,
-		Expression: config.Expression,
-		Priority:   config.Priority,
-	})
-	return err
-}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_config_test.go b/internal/gpuallocator/filter/cel_filter/cel_config_test.go
deleted file mode 100644
index 8e8b0ad5..00000000
--- a/internal/gpuallocator/filter/cel_filter/cel_config_test.go
+++ /dev/null
@@ -1,246 +0,0 @@
-package cel_filter
-
-import (
-	"context"
-	"testing"
-
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-
-	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/stretchr/testify/require"
-)
-
-// Test constants for CEL expressions (same as in cel_filter_test.go)
-const (
-	// Phase expressions
-	testExamplePhaseRunning = `gpu.phase == 'Running'`
-
-	// Resource expressions
-	testExampleMinTFlops     = `gpu.available.tflops >= 0.5`
-	testExampleSpecificModel = `gpu.gpuModel.contains('A100')`
-
-	// Label expressions
-	testExampleNVIDIAOnly = `gpu.gpuModel.startsWith('NVIDIA')`
-
-	// Complex expressions
-	testExampleComplex = `gpu.phase == 'Running' && gpu.available.tflops > 0.5 && size(gpu.runningApps) < 2`
-)
-
-func TestCELConfigManager_GetCELFiltersForPool(t *testing.T) {
-	// Create test scheme
-	scheme := runtime.NewScheme()
-	err := tfv1.AddToScheme(scheme)
-	require.NoError(t, err)
-
-	// Create test resources
-	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "test-template",
-		},
-		Spec: tfv1.SchedulingConfigTemplateSpec{
-			Placement: tfv1.PlacementConfig{
-				CELFilters: []tfv1.CELFilterConfig{
-					{
-						Name:       "high-priority",
-						Expression: testExamplePhaseRunning,
-						Priority:   100,
-					},
-					{
-						Name:       "low-priority",
-						Expression: testExampleNVIDIAOnly,
-						Priority:   10,
-					},
-				},
-			},
-		},
-	}
-
-	pool := &tfv1.GPUPool{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "test-pool",
-		},
-		Spec: tfv1.GPUPoolSpec{
-			SchedulingConfigTemplate: &schedulingTemplate.Name,
-		},
-	}
-
-	// Create fake client
-	fakeClient := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(schedulingTemplate, pool).
-		Build()
-
-	// Test CELConfigManager
-	manager := NewCELConfigManager(fakeClient)
-	celFilters, err := manager.GetCELFiltersForPool(context.Background(), pool.Name)
-	require.NoError(t, err)
-	require.Len(t, celFilters, 2)
-
-	// Verify filters are sorted by priority (high to low)
-	filterNames := make([]string, len(celFilters))
-	for i, filter := range celFilters {
-		filterNames[i] = filter.Name()
-	}
-	require.Equal(t, []string{"high-priority", "low-priority"}, filterNames)
-}
-
-func TestCELConfigManager_GetCELFiltersFromTemplate(t *testing.T) {
-	// Create test scheme
-	scheme := runtime.NewScheme()
-	err := tfv1.AddToScheme(scheme)
-	require.NoError(t, err)
-
-	// Create test template
-	schedulingTemplate := &tfv1.SchedulingConfigTemplate{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "direct-template",
-		},
-		Spec: tfv1.SchedulingConfigTemplateSpec{
-			Placement: tfv1.PlacementConfig{
-				CELFilters: []tfv1.CELFilterConfig{
-					{
-						Name:       "simple-filter",
-						Expression: testExampleMinTFlops,
-						Priority:   50,
-					},
-				},
-			},
-		},
-	}
-
-	// Create fake client
-	fakeClient := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(schedulingTemplate).
-		Build()
-
-	// Test direct template access
-	manager := NewCELConfigManager(fakeClient)
-	celFilters, err := manager.GetCELFiltersFromTemplate(context.Background(), schedulingTemplate.Name)
-	require.NoError(t, err)
-	require.Len(t, celFilters, 1)
-	require.Equal(t, "simple-filter", celFilters[0].Name())
-}
-
-func TestCELConfigManager_CreateCELFiltersFromConfig(t *testing.T) {
-	manager := NewCELConfigManager(nil) // No client needed for this test
-
-	celConfigs := []tfv1.CELFilterConfig{
-		{
-			Name:       "filter-3",
-			Expression: testExamplePhaseRunning,
-			Priority:   30,
-		},
-		{
-			Name:       "filter-1",
-			Expression: testExampleMinTFlops,
-			Priority:   100,
-		},
-		{
-			Name:       "filter-2",
-			Expression: testExampleSpecificModel,
-			Priority:   50,
-		},
-	}
-
-	celFilters, err := manager.CreateCELFiltersFromConfig(celConfigs)
-	require.NoError(t, err)
-	require.Len(t, celFilters, 3)
-
-	// Verify priority ordering (high to low)
-	expectedOrder := []string{"filter-1", "filter-2", "filter-3"}
-	actualOrder := make([]string, len(celFilters))
-	for i, filter := range celFilters {
-		actualOrder[i] = filter.Name()
-	}
-	require.Equal(t, expectedOrder, actualOrder)
-}
-
-func TestCELConfigManager_ValidateCELConfig(t *testing.T) {
-	manager := NewCELConfigManager(nil)
-
-	tests := []struct {
-		name        string
-		config      tfv1.CELFilterConfig
-		expectError bool
-	}{
-		{
-			name: "valid config",
-			config: tfv1.CELFilterConfig{
-				Name:       "valid",
-				Expression: testExamplePhaseRunning,
-				Priority:   100,
-			},
-			expectError: false,
-		},
-		{
-			name: "invalid expression",
-			config: tfv1.CELFilterConfig{
-				Name:       "invalid",
-				Expression: "gpu.phase ==", // Invalid syntax
-				Priority:   100,
-			},
-			expectError: true,
-		},
-		{
-			name: "complex valid expression",
-			config: tfv1.CELFilterConfig{
-				Name:       "complex",
-				Expression: testExampleComplex,
-				Priority:   100,
-			},
-			expectError: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			err := manager.ValidateCELConfig(tt.config)
-			if tt.expectError {
-				require.Error(t, err)
-			} else {
-				require.NoError(t, err)
-			}
-		})
-	}
-}
-
-func TestCELConfigManager_NoTemplate(t *testing.T) {
-	// Create test scheme
-	scheme := runtime.NewScheme()
-	err := tfv1.AddToScheme(scheme)
-	require.NoError(t, err)
-
-	// Create pool without SchedulingConfigTemplate
-	pool := &tfv1.GPUPool{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "no-template-pool",
-		},
-		Spec: tfv1.GPUPoolSpec{
-			SchedulingConfigTemplate: nil, // No template specified
-		},
-	}
-
-	// Create fake client
-	fakeClient := fake.NewClientBuilder().
-		WithScheme(scheme).
-		WithObjects(pool).
-		Build()
-
-	// Test that no CEL filters are returned
-	manager := NewCELConfigManager(fakeClient)
-	celFilters, err := manager.GetCELFiltersForPool(context.Background(), pool.Name)
-	require.NoError(t, err)
-	require.Len(t, celFilters, 0)
-}
-
-func TestCELConfigManager_EmptyConfig(t *testing.T) {
-	manager := NewCELConfigManager(nil)
-
-	// Test empty config slice
-	celFilters, err := manager.CreateCELFiltersFromConfig([]tfv1.CELFilterConfig{})
-	require.NoError(t, err)
-	require.Len(t, celFilters, 0)
-}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index 3165a5b6..90b60501 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -30,45 +30,8 @@ type CELFilter struct {
 	mu         sync.RWMutex
 }
 
-// NewCELFilter creates a new CEL-based GPU filter
-func NewCELFilter(config CELFilterConfig) (*CELFilter, error) {
-	env, err := createCELEnvironment()
-	if err != nil {
-		return nil, fmt.Errorf("failed to create CEL environment: %w", err)
-	}
-
-	ast, issues := env.Compile(config.Expression)
-	if issues != nil && issues.Err() != nil {
-		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", config.Expression, issues.Err())
-	}
-
-	program, err := env.Program(ast)
-	if err != nil {
-		return nil, fmt.Errorf("failed to create CEL program: %w", err)
-	}
-
-	name := config.Name
-	if name == "" {
-		name = fmt.Sprintf("CELFilter-%d", config.Priority)
-	}
-
-	return &CELFilter{
-		name:       name,
-		expression: config.Expression,
-		program:    program,
-		env:        env,
-	}, nil
-}
-
-// Name returns the name of this filter
-func (f *CELFilter) Name() string {
-	f.mu.RLock()
-	defer f.mu.RUnlock()
-	return f.name
-}
-
 // Filter applies the CEL expression to filter GPUs
-func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []tfv1.GPU) ([]tfv1.GPU, error) {
+func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) {
 	log := log.FromContext(ctx)
 	if len(gpus) == 0 {
 		return gpus, nil
@@ -79,11 +42,11 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 	expression := f.expression
 	f.mu.RUnlock()
 
-	var filteredGPUs []tfv1.GPU
+	var filteredGPUs []*tfv1.GPU
 
 	for _, gpu := range gpus {
 		// Create variables for CEL evaluation
-		vars := createCELVariables(gpu, workerPodKey)
+		vars := createCELVariables(*gpu, workerPodKey)
 
 		// Evaluate the CEL expression
 		result, _, err := program.Eval(vars)
@@ -147,7 +110,7 @@ func createCELEnvironment() (*cel.Env, error) {
 		cel.Variable(CELVarGPU, cel.MapType(cel.StringType, cel.DynType)),
 		// Define worker pod key
 		cel.Variable(CELVarWorkerPodKey, cel.MapType(cel.StringType, cel.StringType)),
-		// Define request information (if needed in future)
+		// Define request object structure
 		cel.Variable(CELVarRequest, cel.MapType(cel.StringType, cel.DynType)),
 	)
 }
@@ -208,6 +171,5 @@ func createCELVariables(gpu tfv1.GPU, workerPodKey tfv1.NameNamespace) map[strin
 	return map[string]interface{}{
 		CELVarGPU:          gpuMap,
 		CELVarWorkerPodKey: workerPodKeyMap,
-		CELVarRequest:      map[string]interface{}{}, // Placeholder for future request info
 	}
 }
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
deleted file mode 100644
index ba2cc539..00000000
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
+++ /dev/null
@@ -1,347 +0,0 @@
-package cel_filter
-
-import (
-	"context"
-	"testing"
-
-	"k8s.io/apimachinery/pkg/api/resource"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-
-	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/stretchr/testify/require"
-)
-
-// Test constants for CEL expressions
-const (
-	// Phase expressions
-	ExamplePhaseRunning = `gpu.phase == 'Running'`
-	ExamplePhasePending = `gpu.phase == 'Pending'`
-
-	// Resource expressions
-	ExampleMinTFlops     = `gpu.available.tflops >= 0.5`
-	ExampleMinVRAM       = `gpu.available.vram >= 4294967296` // 4GB in bytes
-	ExampleResourceRatio = `gpu.available.tflops > gpu.capacity.tflops * 0.5`
-
-	// Model expressions
-	ExampleNVIDIAOnly    = `gpu.gpuModel.startsWith('NVIDIA')`
-	ExampleSpecificModel = `gpu.gpuModel.contains('A100')`
-
-	// Label expressions
-	ExampleHasLabel   = `'gpu-tier' in gpu.labels`
-	ExampleLabelValue = `gpu.labels != null && 'gpu-tier' in gpu.labels && gpu.labels['gpu-tier'] == 'premium'`
-
-	// Load balancing expressions
-	ExampleLowLoad = `size(gpu.runningApps) < 3`
-	ExampleNoApps  = `size(gpu.runningApps) == 0`
-
-	// Complex expressions
-	ExampleComplex = `gpu.phase == 'Running' && gpu.available.tflops > 0.5 && size(gpu.runningApps) < 2`
-)
-
-func TestNewCELFilter(t *testing.T) {
-	tests := []struct {
-		name        string
-		config      CELFilterConfig
-		expectError bool
-	}{
-		{
-			name: "valid basic expression",
-			config: CELFilterConfig{
-				Name:       "basic-test",
-				Expression: ExamplePhaseRunning,
-				Priority:   100,
-			},
-			expectError: false,
-		},
-		{
-			name: "valid resource expression",
-			config: CELFilterConfig{
-				Name:       "resource-test",
-				Expression: ExampleMinTFlops,
-				Priority:   50,
-			},
-			expectError: false,
-		},
-		{
-			name: "invalid expression syntax",
-			config: CELFilterConfig{
-				Name:       "invalid-test",
-				Expression: "gpu.phase ==", // Invalid syntax
-				Priority:   10,
-			},
-			expectError: true,
-		},
-		{
-			name: "expression with labels",
-			config: CELFilterConfig{
-				Name:       "label-test",
-				Expression: ExampleHasLabel,
-				Priority:   75,
-			},
-			expectError: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			filter, err := NewCELFilter(tt.config)
-			if tt.expectError {
-				require.Error(t, err)
-				require.Nil(t, filter)
-			} else {
-				require.NoError(t, err)
-				require.NotNil(t, filter)
-				require.Equal(t, tt.config.Name, filter.Name())
-			}
-		})
-	}
-}
-
-func TestCELFilter_Filter(t *testing.T) {
-	// Create test GPUs
-	gpus := []tfv1.GPU{
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name:      "gpu-1",
-				Namespace: "default",
-				Labels: map[string]string{
-					"gpu-tier": "premium",
-				},
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhaseRunning,
-				GPUModel: "NVIDIA A100",
-				UUID:     "gpu-1-uuid",
-				Capacity: &tfv1.Resource{
-					Tflops: resource.MustParse("1.5"),
-					Vram:   resource.MustParse("80Gi"),
-				},
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("1.0"),
-					Vram:   resource.MustParse("60Gi"),
-				},
-				RunningApps: []*tfv1.RunningAppDetail{
-					{
-						Name:      "app-1",
-						Namespace: "default",
-						Count:     1,
-					},
-				},
-			},
-		},
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name:      "gpu-2",
-				Namespace: "default",
-				Labels: map[string]string{
-					"gpu-tier": "basic",
-				},
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhaseRunning,
-				GPUModel: "NVIDIA RTX 4090",
-				UUID:     "gpu-2-uuid",
-				Capacity: &tfv1.Resource{
-					Tflops: resource.MustParse("0.8"),
-					Vram:   resource.MustParse("24Gi"),
-				},
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("0.2"),
-					Vram:   resource.MustParse("8Gi"),
-				},
-				RunningApps: []*tfv1.RunningAppDetail{
-					{
-						Name:      "app-2",
-						Namespace: "default",
-						Count:     1,
-					},
-					{
-						Name:      "app-3",
-						Namespace: "default",
-						Count:     2,
-					},
-				},
-			},
-		},
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name:      "gpu-3",
-				Namespace: "default",
-			},
-			Status: tfv1.GPUStatus{
-				Phase:    tfv1.TensorFusionGPUPhasePending,
-				GPUModel: "NVIDIA A100",
-				UUID:     "gpu-3-uuid",
-				Capacity: &tfv1.Resource{
-					Tflops: resource.MustParse("1.5"),
-					Vram:   resource.MustParse("80Gi"),
-				},
-				Available: &tfv1.Resource{
-					Tflops: resource.MustParse("1.5"),
-					Vram:   resource.MustParse("80Gi"),
-				},
-			},
-		},
-	}
-
-	workerPodKey := tfv1.NameNamespace{
-		Name:      "test-pod",
-		Namespace: "default",
-	}
-
-	tests := []struct {
-		name         string
-		expression   string
-		expectedGPUs []string // GPU names that should pass the filter
-		expectError  bool
-	}{
-		{
-			name:         "filter by phase",
-			expression:   ExamplePhaseRunning,
-			expectedGPUs: []string{"gpu-1", "gpu-2"},
-		},
-		{
-			name:         "filter by available resources",
-			expression:   ExampleMinTFlops,
-			expectedGPUs: []string{"gpu-1", "gpu-3"},
-		},
-		{
-			name:         "filter by GPU model",
-			expression:   "gpu.gpuModel.startsWith('NVIDIA A100')",
-			expectedGPUs: []string{"gpu-1", "gpu-3"},
-		},
-		{
-			name:         "filter by labels",
-			expression:   ExampleLabelValue,
-			expectedGPUs: []string{"gpu-1"},
-		},
-		{
-			name:         "filter by running apps count",
-			expression:   ExampleLowLoad,
-			expectedGPUs: []string{"gpu-1", "gpu-2", "gpu-3"},
-		},
-		{
-			name:         "complex filter",
-			expression:   ExampleComplex,
-			expectedGPUs: []string{"gpu-1"},
-		},
-		{
-			name:         "filter none",
-			expression:   "false",
-			expectedGPUs: []string{},
-		},
-		{
-			name:         "filter all",
-			expression:   "true",
-			expectedGPUs: []string{"gpu-1", "gpu-2", "gpu-3"},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			filter, err := NewCELFilter(CELFilterConfig{
-				Name:       tt.name,
-				Expression: tt.expression,
-				Priority:   100,
-			})
-			require.NoError(t, err)
-
-			filteredGPUs, err := filter.Filter(context.Background(), workerPodKey, gpus)
-			if tt.expectError {
-				require.Error(t, err)
-				return
-			}
-
-			require.NoError(t, err)
-			require.Len(t, filteredGPUs, len(tt.expectedGPUs))
-
-			// Check that the correct GPUs were filtered
-			actualNames := make([]string, len(filteredGPUs))
-			for i, gpu := range filteredGPUs {
-				actualNames[i] = gpu.Name
-			}
-
-			require.ElementsMatch(t, tt.expectedGPUs, actualNames)
-		})
-	}
-}
-
-func TestCELFilter_UpdateExpression(t *testing.T) {
-	// Create initial filter
-	filter, err := NewCELFilter(CELFilterConfig{
-		Name:       "update-test",
-		Expression: ExamplePhaseRunning,
-		Priority:   100,
-	})
-	require.NoError(t, err)
-
-	// Test valid update
-	err = filter.UpdateExpression(ExamplePhasePending)
-	require.NoError(t, err)
-
-	// Test invalid update
-	err = filter.UpdateExpression("gpu.phase ==")
-	require.Error(t, err)
-}
-
-func TestCELFilter_ThreadSafety(t *testing.T) {
-	filter, err := NewCELFilter(CELFilterConfig{
-		Name:       "thread-safety-test",
-		Expression: ExamplePhaseRunning,
-		Priority:   100,
-	})
-	require.NoError(t, err)
-
-	// Create test GPU
-	gpu := tfv1.GPU{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "gpu-1",
-			Namespace: "default",
-		},
-		Status: tfv1.GPUStatus{
-			Phase: tfv1.TensorFusionGPUPhaseRunning,
-		},
-	}
-
-	workerPodKey := tfv1.NameNamespace{
-		Name:      "test-pod",
-		Namespace: "default",
-	}
-
-	// Run concurrent operations
-	done := make(chan bool, 3)
-
-	// Concurrent filtering
-	go func() {
-		defer func() { done <- true }()
-		for i := 0; i < 100; i++ {
-			_, err := filter.Filter(context.Background(), workerPodKey, []tfv1.GPU{gpu})
-			require.NoError(t, err)
-		}
-	}()
-
-	// Concurrent name access
-	go func() {
-		defer func() { done <- true }()
-		for i := 0; i < 100; i++ {
-			name := filter.Name()
-			require.Equal(t, "thread-safety-test", name)
-		}
-	}()
-
-	// Concurrent expression updates
-	go func() {
-		defer func() { done <- true }()
-		for i := 0; i < 10; i++ {
-			err := filter.UpdateExpression(ExamplePhasePending)
-			require.NoError(t, err)
-			err = filter.UpdateExpression(ExamplePhaseRunning)
-			require.NoError(t, err)
-		}
-	}()
-
-	// Wait for all goroutines to complete
-	for i := 0; i < 3; i++ {
-		<-done
-	}
-}
diff --git a/internal/gpuallocator/filter/cel_filter/constants.go b/internal/gpuallocator/filter/cel_filter/constants.go
index 152f643f..7ea0cc85 100644
--- a/internal/gpuallocator/filter/cel_filter/constants.go
+++ b/internal/gpuallocator/filter/cel_filter/constants.go
@@ -42,3 +42,12 @@ const (
 	PodKeyFieldName      = "name"
 	PodKeyFieldNamespace = "namespace"
 )
+
+// Request object field names
+const (
+	RequestFieldWorkerPodKey = "workerPodKey"
+	RequestFieldCount        = "count"
+	RequestFieldGPUModel     = "gpuModel"
+	RequestFieldRequest      = "request"
+	RequestFieldLimit        = "limit"
+)
diff --git a/internal/gpuallocator/filter/cel_filter/expression_cache.go b/internal/gpuallocator/filter/cel_filter/expression_cache.go
new file mode 100644
index 00000000..4065c3b9
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/expression_cache.go
@@ -0,0 +1,191 @@
+package cel_filter
+
+import (
+	"context"
+	"crypto/sha256"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/google/cel-go/cel"
+)
+
+// CachedCELProgram represents a compiled CEL program with metadata
+type CachedCELProgram struct {
+	Program     cel.Program
+	Expression  string
+	CreatedAt   time.Time
+	AccessedAt  time.Time
+	AccessCount int64
+}
+
+// ExpressionCache provides caching for compiled CEL expressions
+type ExpressionCache struct {
+	cache   map[string]*CachedCELProgram
+	mutex   sync.RWMutex
+	maxSize int
+	maxAge  time.Duration
+	env     *cel.Env
+
+	// Metrics
+	hits   int64
+	misses int64
+}
+
+// NewExpressionCache creates a new CEL expression cache
+func NewExpressionCache(maxSize int, maxAge time.Duration) (*ExpressionCache, error) {
+	env, err := createCELEnvironment()
+	if err != nil {
+		return nil, fmt.Errorf("failed to create CEL environment: %w", err)
+	}
+
+	cache := &ExpressionCache{
+		cache:   make(map[string]*CachedCELProgram, maxSize),
+		maxSize: maxSize,
+		maxAge:  maxAge,
+		env:     env,
+	}
+
+	// Start cleanup goroutine
+	go cache.cleanupExpiredEntries(context.Background())
+
+	return cache, nil
+}
+
+// GetOrCompileProgram returns a cached program or compiles and caches a new one
+func (c *ExpressionCache) GetOrCompileProgram(expression string) (cel.Program, error) {
+	hash := c.hashExpression(expression)
+
+	c.mutex.RLock()
+	if cached, exists := c.cache[hash]; exists {
+		// Check if entry is still valid
+		if time.Since(cached.CreatedAt) < c.maxAge {
+			cached.AccessedAt = time.Now()
+			cached.AccessCount++
+			c.hits++
+			c.mutex.RUnlock()
+			return cached.Program, nil
+		}
+	}
+	c.mutex.RUnlock()
+
+	// Cache miss or expired - compile new program
+	c.mutex.Lock()
+	defer c.mutex.Unlock()
+
+	// Double-check after acquiring write lock
+	if cached, exists := c.cache[hash]; exists && time.Since(cached.CreatedAt) < c.maxAge {
+		cached.AccessedAt = time.Now()
+		cached.AccessCount++
+		c.hits++
+		return cached.Program, nil
+	}
+
+	// Compile the expression
+	ast, issues := c.env.Compile(expression)
+	if issues != nil && issues.Err() != nil {
+		c.misses++
+		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", expression, issues.Err())
+	}
+
+	program, err := c.env.Program(ast)
+	if err != nil {
+		c.misses++
+		return nil, fmt.Errorf("failed to create CEL program: %w", err)
+	}
+
+	// Check if cache is full and evict least recently used entry
+	if len(c.cache) >= c.maxSize {
+		c.evictLRU()
+	}
+
+	// Cache the compiled program
+	c.cache[hash] = &CachedCELProgram{
+		Program:     program,
+		Expression:  expression,
+		CreatedAt:   time.Now(),
+		AccessedAt:  time.Now(),
+		AccessCount: 1,
+	}
+
+	c.misses++
+	return program, nil
+}
+
+// hashExpression creates a hash for caching expressions
+func (c *ExpressionCache) hashExpression(expression string) string {
+	hash := sha256.Sum256([]byte(expression))
+	return fmt.Sprintf("%x", hash)
+}
+
+// evictLRU removes the least recently used entry from cache
+func (c *ExpressionCache) evictLRU() {
+	var oldestKey string
+	var oldestTime time.Time = time.Now()
+
+	for key, cached := range c.cache {
+		if cached.AccessedAt.Before(oldestTime) {
+			oldestTime = cached.AccessedAt
+			oldestKey = key
+		}
+	}
+
+	if oldestKey != "" {
+		delete(c.cache, oldestKey)
+	}
+}
+
+// cleanupExpiredEntries removes expired entries periodically
+func (c *ExpressionCache) cleanupExpiredEntries(ctx context.Context) {
+	ticker := time.NewTicker(5 * time.Minute)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			c.mutex.Lock()
+			now := time.Now()
+			for key, cached := range c.cache {
+				if now.Sub(cached.CreatedAt) > c.maxAge {
+					delete(c.cache, key)
+				}
+			}
+			c.mutex.Unlock()
+		}
+	}
+}
+
+// GetStats returns cache statistics
+func (c *ExpressionCache) GetStats() CacheStats {
+	c.mutex.RLock()
+	defer c.mutex.RUnlock()
+
+	return CacheStats{
+		Size:     len(c.cache),
+		MaxSize:  c.maxSize,
+		Hits:     c.hits,
+		Misses:   c.misses,
+		HitRatio: float64(c.hits) / float64(c.hits+c.misses),
+	}
+}
+
+// CacheStats represents cache performance statistics
+type CacheStats struct {
+	Size     int
+	MaxSize  int
+	Hits     int64
+	Misses   int64
+	HitRatio float64
+}
+
+// Clear removes all entries from the cache
+func (c *ExpressionCache) Clear() {
+	c.mutex.Lock()
+	defer c.mutex.Unlock()
+
+	c.cache = make(map[string]*CachedCELProgram, c.maxSize)
+	c.hits = 0
+	c.misses = 0
+}
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index 0a657dc2..c1ce771b 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -166,7 +166,65 @@ func (s *GpuAllocator) Filter(
 	toFilterGPUs []*tfv1.GPU,
 	isSimulateSchedule bool,
 ) ([]*tfv1.GPU, []filter.FilterDetail, error) {
+
+	// Check if CEL filtering is enabled via config/flag
+	useCELFilter := config.GetGlobalConfig().EnableCELFilter
+
+	if useCELFilter {
+		// New CEL-based filtering approach
+		return s.applyCELFilter(req, toFilterGPUs, isSimulateSchedule)
+	} else {
+		// Legacy filter approach (for rollback support)
+		return s.applyLegacyFilters(req, toFilterGPUs, isSimulateSchedule)
+	}
+}
+
+// applyCELFilter applies the new CEL-based filtering
+func (s *GpuAllocator) applyCELFilter(
+	req *tfv1.AllocRequest,
+	toFilterGPUs []*tfv1.GPU,
+	isSimulateSchedule bool,
+) ([]*tfv1.GPU, []filter.FilterDetail, error) {
+	// Create CEL filter from AllocRequest
+	cache, err := cel_filter.NewExpressionCache(100, 5*time.Minute)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to create CEL cache: %w", err)
+	}
+
+	allocCELFilter, err := cel_filter.NewAllocRequestCELFilter(req, cache)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to create AllocRequest CEL filter: %w", err)
+	}
+
+	// Start with base registry and add CEL filter
+	filterRegistry := s.filterRegistry.With(allocCELFilter)
+
 	// Add SameNodeFilter if count > 1 to ensure GPUs are from the same node
+	if req.Count > 1 {
+		filterRegistry = filterRegistry.With(filter.NewSameNodeFilter(req.Count))
+	}
+
+	// Add NodeAffinityFilter if specified
+	if req.NodeAffinity != nil {
+		filterRegistry = filterRegistry.With(filter.NewNodeAffinityFilter(s.Client, req.NodeAffinity))
+	}
+
+	// Apply the filters
+	filteredGPUs, filterDetails, err := filterRegistry.Apply(s.ctx, req.WorkloadNameNamespace, toFilterGPUs, isSimulateSchedule)
+	if err != nil {
+		return nil, nil, fmt.Errorf("apply CEL filters: %w", err)
+	}
+
+	return filteredGPUs, filterDetails, nil
+}
+
+// applyLegacyFilters applies the legacy filter approach (for rollback support)
+func (s *GpuAllocator) applyLegacyFilters(
+	req *tfv1.AllocRequest,
+	toFilterGPUs []*tfv1.GPU,
+	isSimulateSchedule bool,
+) ([]*tfv1.GPU, []filter.FilterDetail, error) {
+	// Legacy filtering approach
 	filterRegistry := s.filterRegistry.With(filter.NewResourceFilter(req.Request))
 
 	// Add GPU model filter if specified
@@ -177,26 +235,16 @@ func (s *GpuAllocator) Filter(
 	if req.Count > 1 {
 		filterRegistry = filterRegistry.With(filter.NewSameNodeFilter(req.Count))
 	}
+
 	// Add NodeAffinityFilter if specified
 	if req.NodeAffinity != nil {
 		filterRegistry = filterRegistry.With(filter.NewNodeAffinityFilter(s.Client, req.NodeAffinity))
 	}
 
-	// Add CEL filters from SchedulingConfigTemplate if available
-	celConfigManager := cel_filter.NewCELConfigManager(s.Client)
-	celFilters, err := celConfigManager.GetCELFiltersForPool(s.ctx, req.PoolName)
-	if err != nil {
-		return nil, nil, fmt.Errorf("get CEL filters: %w", err)
-	}
-	if len(celFilters) > 0 {
-		celFilterAdapters := cel_filter.CreateCELFilterAdapters(celFilters)
-		filterRegistry = filterRegistry.With(celFilterAdapters...)
-	}
-
-	// Apply the filters in sequence
+	// Apply the legacy filters
 	filteredGPUs, filterDetails, err := filterRegistry.Apply(s.ctx, req.WorkloadNameNamespace, toFilterGPUs, isSimulateSchedule)
 	if err != nil {
-		return nil, nil, fmt.Errorf("apply filters: %w", err)
+		return nil, nil, fmt.Errorf("apply legacy filters: %w", err)
 	}
 
 	return filteredGPUs, filterDetails, nil
@@ -338,6 +386,7 @@ func (s *GpuAllocator) CheckQuotaAndFilter(ctx context.Context, req *tfv1.AllocR
 		return nil, nil, fmt.Errorf("no gpu devices in pool %s", req.PoolName)
 	}
 	filteredGPUs, filterDetails, err := s.Filter(req, poolGPUs, isSimulateSchedule)
+
 	if err != nil {
 		return nil, nil, err
 	}

From fc26511b25452a602a2b4e3c22ee12e250577f9c Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 31 Aug 2025 00:37:57 -0700
Subject: [PATCH 03/34] support annotaion cel

---
 api/v1/gpuresourcequota_types.go              |   6 +
 internal/config/global_config.go              |   4 -
 internal/constants/constants.go               |   3 +
 .../filter/cel_filter/alloc_request_filter.go | 164 --------
 .../filter/cel_filter/cel_filter.go           | 181 ++++++---
 .../cel_filter/cel_filter_benchmark_test.go   | 288 ++++++++++++++
 .../filter/cel_filter/cel_filter_test.go      | 368 ++++++++++++++++++
 .../filter/cel_filter/constants.go            |   1 -
 internal/gpuallocator/gpuallocator.go         |  27 +-
 9 files changed, 802 insertions(+), 240 deletions(-)
 delete mode 100644 internal/gpuallocator/filter/cel_filter/alloc_request_filter.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
 create mode 100644 internal/gpuallocator/filter/cel_filter/cel_filter_test.go

diff --git a/api/v1/gpuresourcequota_types.go b/api/v1/gpuresourcequota_types.go
index c6ac1dba..46e068b7 100644
--- a/api/v1/gpuresourcequota_types.go
+++ b/api/v1/gpuresourcequota_types.go
@@ -186,6 +186,12 @@ type AllocRequest struct {
 
 	// record the pod meta for quota check
 	PodMeta metav1.ObjectMeta
+
+	// enable cel filter
+	DisableCELFilter bool
+
+	// cel filter expression
+	CELFilterExpression string
 }
 
 type GPUAllocationInfo struct {
diff --git a/internal/config/global_config.go b/internal/config/global_config.go
index 0632c284..75bddc22 100644
--- a/internal/config/global_config.go
+++ b/internal/config/global_config.go
@@ -8,9 +8,6 @@ type GlobalConfig struct {
 	MetricsExtraPodLabels map[string]string `yaml:"metricsExtraPodLabels"`
 
 	AlertRules []AlertRule `yaml:"alertRules"`
-
-	// EnableCELFilter enables CEL-based filtering (default: false for rollback support)
-	EnableCELFilter bool `yaml:"enableCELFilter"`
 }
 
 var globalConfig *GlobalConfig
@@ -44,7 +41,6 @@ func MockGlobalConfig() *GlobalConfig {
 		MetricsTTL:            "30d",
 		MetricsFormat:         "influx",
 		MetricsExtraPodLabels: map[string]string{"kubernetes.io/app": "app"},
-		EnableCELFilter:       false, // Default to legacy filter for rollback support
 		AlertRules: []AlertRule{
 			{
 				Name:               "mock",
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index 32b3d6bc..b1aa6b64 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -74,6 +74,8 @@ const (
 	// In remote vGPU mode, selected workload is set by user with /workload annotation or generated by system
 	SelectedWorkloadAnnotation = Domain + "/selected-workload"
 
+	CELFilterExpressionAnnotation = Domain + "/cel-filter-expression"
+
 	WorkloadModeAnnotation = Domain + "/workload-mode"
 	WorkloadModeDynamic    = "dynamic"
 	WorkloadModeFixed      = "fixed"
@@ -86,6 +88,7 @@ const (
 	BuiltInFeaturesMemManager = "mem-manager"
 	// For debug purpose only of Remote vGPU, disable start worker to manual start with ad-hoc command inside Pod
 	BuiltInFeatureStartWorker = "start-worker"
+	BuiltInFeatureCELFilter   = "cel-filter"
 
 	GenHostPortLabel        = Domain + "/host-port"
 	GenHostPortLabelValue   = "auto"
diff --git a/internal/gpuallocator/filter/cel_filter/alloc_request_filter.go b/internal/gpuallocator/filter/cel_filter/alloc_request_filter.go
deleted file mode 100644
index bd3e06de..00000000
--- a/internal/gpuallocator/filter/cel_filter/alloc_request_filter.go
+++ /dev/null
@@ -1,164 +0,0 @@
-package cel_filter
-
-import (
-	"context"
-	"fmt"
-	"time"
-
-	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/google/cel-go/common/types"
-	"sigs.k8s.io/controller-runtime/pkg/log"
-)
-
-// AllocRequestCELFilter converts AllocRequest to CEL filter and executes it
-type AllocRequestCELFilter struct {
-	cache      *ExpressionCache
-	expression string
-	name       string
-}
-
-// NewAllocRequestCELFilter creates a new CEL filter from allocation request
-func NewAllocRequestCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*AllocRequestCELFilter, error) {
-	// Convert AllocRequest to CEL expression
-	expression, err := convertAllocRequestToCEL(req)
-	if err != nil {
-		return nil, fmt.Errorf("failed to convert AllocRequest to CEL: %w", err)
-	}
-
-	return &AllocRequestCELFilter{
-		cache:      cache,
-		expression: expression,
-		name:       fmt.Sprintf("AllocRequest-%s", req.WorkloadNameNamespace.String()),
-	}, nil
-}
-
-// Name returns the filter name
-func (f *AllocRequestCELFilter) Name() string {
-	return f.name
-}
-
-// Filter applies the CEL expression derived from AllocRequest to filter GPUs
-func (f *AllocRequestCELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) {
-	log := log.FromContext(ctx)
-	if len(gpus) == 0 {
-		return gpus, nil
-	}
-
-	if f.expression == "" {
-		// If no expression, return all GPUs (no filtering needed)
-		return gpus, nil
-	}
-
-	// Get compiled program from cache
-	program, err := f.cache.GetOrCompileProgram(f.expression)
-	if err != nil {
-		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.expression, err)
-	}
-
-	var filteredGPUs []*tfv1.GPU
-
-	for _, gpu := range gpus {
-		// Create timeout context for CEL evaluation
-		evalCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
-
-		// Create variables for CEL evaluation
-		vars := createCELVariables(*gpu, workerPodKey)
-
-		// Evaluate with timeout
-		resultChan := make(chan evalResult, 1)
-		go func() {
-			result, _, evalErr := program.Eval(vars)
-			resultChan <- evalResult{result: result, err: evalErr}
-		}()
-
-		select {
-		case evalRes := <-resultChan:
-			cancel()
-			if evalRes.err != nil {
-				log.Error(evalRes.err, "CEL expression evaluation failed",
-					"expression", f.expression,
-					"gpu", gpu.Name,
-					"workerPodKey", workerPodKey)
-				// On error, exclude the GPU (fail-safe)
-				continue
-			}
-
-			// Convert result to boolean
-			if boolResult, ok := evalRes.result.(types.Bool); ok {
-				if bool(boolResult) {
-					filteredGPUs = append(filteredGPUs, gpu)
-				}
-			} else {
-				log.Error(nil, "CEL expression did not return boolean",
-					"expression", f.expression,
-					"result", evalRes.result,
-					"gpu", gpu.Name)
-				// On non-boolean result, exclude the GPU (fail-safe)
-				continue
-			}
-		case <-evalCtx.Done():
-			cancel()
-			// Timeout - skip this GPU (fail-safe behavior)
-			log.V(1).Info("CEL evaluation timeout", "gpu", gpu.Name, "expression", f.expression)
-			continue
-		}
-	}
-
-	log.V(1).Info("AllocRequest CEL filter applied",
-		"filter", f.name,
-		"expression", f.expression,
-		"inputGPUs", len(gpus),
-		"outputGPUs", len(filteredGPUs))
-
-	return filteredGPUs, nil
-}
-
-type evalResult struct {
-	result interface{}
-	err    error
-}
-
-// convertAllocRequestToCEL converts an allocation request to a CEL expression
-func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
-	if req == nil {
-		return "", nil
-	}
-
-	var conditions []string
-
-	// Add GPU phase condition (must be Ready)
-	conditions = append(conditions, "gpu.phase == 'Ready'")
-
-	// Add resource requirements
-	if req.Request.Tflops.Sign() > 0 {
-		tflopsValue := req.Request.Tflops.AsApproximateFloat64()
-		conditions = append(conditions, fmt.Sprintf("gpu.available.tflops >= %f", tflopsValue))
-	}
-
-	if req.Request.Vram.Sign() > 0 {
-		vramValue := req.Request.Vram.AsApproximateFloat64()
-		conditions = append(conditions, fmt.Sprintf("gpu.available.vram >= %f", vramValue))
-	}
-
-	// Add GPU model filter if specified
-	if req.GPUModel != "" {
-		conditions = append(conditions, fmt.Sprintf("gpu.gpuModel == '%s'", req.GPUModel))
-	}
-
-	// If no conditions, return empty expression (no filtering)
-	if len(conditions) == 0 {
-		return "", nil
-	}
-
-	// Combine all conditions with AND
-	if len(conditions) == 1 {
-		return conditions[0], nil
-	}
-
-	expression := conditions[0]
-	for i := 1; i < len(conditions); i++ {
-		expression += " && " + conditions[i]
-	}
-
-	return expression, nil
-}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index 90b60501..a9369535 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -3,7 +3,7 @@ package cel_filter
 import (
 	"context"
 	"fmt"
-	"sync"
+	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/google/cel-go/cel"
@@ -11,96 +11,157 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
 
-// CELFilterConfig defines the configuration for CEL-based filtering
-type CELFilterConfig struct {
-	// CEL expression for filtering GPUs
-	Expression string `json:"expression"`
-	// Priority for this filter (higher priority filters run first)
-	Priority int `json:"priority"`
-	// Name for this filter (for debugging/logging)
-	Name string `json:"name"`
-}
-
-// CELFilter implements GPU filtering using CEL expressions
+// AllocRequestCELFilter converts AllocRequest to CEL filter and executes it
 type CELFilter struct {
-	name       string
+	cache      *ExpressionCache
 	expression string
-	program    cel.Program
-	env        *cel.Env
-	mu         sync.RWMutex
+	name       string
 }
 
-// Filter applies the CEL expression to filter GPUs
+// NewAllocRequestCELFilter creates a new CEL filter from allocation request
+func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, error) {
+	// Convert AllocRequest to CEL expression
+	expression, err := convertAllocRequestToCEL(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to convert AllocRequest to CEL: %w", err)
+	}
+
+	// Handle nil request case
+	name := "AllocRequest-unknown"
+	if req != nil {
+		name = fmt.Sprintf("AllocRequest-%s", req.WorkloadNameNamespace.String())
+	}
+
+	return &CELFilter{
+		cache:      cache,
+		expression: expression,
+		name:       name,
+	}, nil
+}
+
+// Name returns the filter name
+func (f *CELFilter) Name() string {
+	return f.name
+}
+
+// Filter applies the CEL expression derived from AllocRequest to filter GPUs
 func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) {
 	log := log.FromContext(ctx)
 	if len(gpus) == 0 {
 		return gpus, nil
 	}
 
-	f.mu.RLock()
-	program := f.program
-	expression := f.expression
-	f.mu.RUnlock()
+	if f.expression == "" {
+		// If no expression, return all GPUs (no filtering needed)
+		return gpus, nil
+	}
+
+	// Get compiled program from cache
+	program, err := f.cache.GetOrCompileProgram(f.expression)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.expression, err)
+	}
 
 	var filteredGPUs []*tfv1.GPU
 
 	for _, gpu := range gpus {
+		// Create timeout context for CEL evaluation
+		evalCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
+
 		// Create variables for CEL evaluation
 		vars := createCELVariables(*gpu, workerPodKey)
 
-		// Evaluate the CEL expression
-		result, _, err := program.Eval(vars)
-		if err != nil {
-			log.Error(err, "CEL expression evaluation failed",
-				"expression", expression,
-				"gpu", gpu.Name,
-				"workerPodKey", workerPodKey)
-			// On error, exclude the GPU (fail-safe)
-			continue
-		}
+		// Evaluate with timeout
+		resultChan := make(chan evalResult, 1)
+		go func() {
+			result, _, evalErr := program.Eval(vars)
+			resultChan <- evalResult{result: result, err: evalErr}
+		}()
+
+		select {
+		case evalRes := <-resultChan:
+			cancel()
+			if evalRes.err != nil {
+				log.Error(evalRes.err, "CEL expression evaluation failed",
+					"expression", f.expression,
+					"gpu", gpu.Name,
+					"workerPodKey", workerPodKey)
+				// On error, exclude the GPU (fail-safe)
+				continue
+			}
 
-		// Convert result to boolean
-		if boolResult, ok := result.(types.Bool); ok {
-			if bool(boolResult) {
-				filteredGPUs = append(filteredGPUs, gpu)
+			// Convert result to boolean
+			if boolResult, ok := evalRes.result.(types.Bool); ok {
+				if bool(boolResult) {
+					filteredGPUs = append(filteredGPUs, gpu)
+				}
+			} else {
+				log.Error(nil, "CEL expression did not return boolean",
+					"expression", f.expression,
+					"result", evalRes.result,
+					"gpu", gpu.Name)
+				// On non-boolean result, exclude the GPU (fail-safe)
+				continue
 			}
-		} else {
-			log.Error(nil, "CEL expression did not return boolean",
-				"expression", expression,
-				"result", result,
-				"gpu", gpu.Name)
-			// On non-boolean result, exclude the GPU (fail-safe)
+		case <-evalCtx.Done():
+			cancel()
+			// Timeout - skip this GPU (fail-safe behavior)
+			log.V(1).Info("CEL evaluation timeout", "gpu", gpu.Name, "expression", f.expression)
 			continue
 		}
 	}
 
-	log.V(1).Info("CEL filter applied",
+	log.V(1).Info("AllocRequest CEL filter applied",
 		"filter", f.name,
-		"expression", expression,
+		"expression", f.expression,
 		"inputGPUs", len(gpus),
 		"outputGPUs", len(filteredGPUs))
 
 	return filteredGPUs, nil
 }
 
-// UpdateExpression updates the CEL expression (thread-safe)
-func (f *CELFilter) UpdateExpression(newExpression string) error {
-	f.mu.Lock()
-	defer f.mu.Unlock()
+type evalResult struct {
+	result interface{}
+	err    error
+}
 
-	ast, issues := f.env.Compile(newExpression)
-	if issues != nil && issues.Err() != nil {
-		return fmt.Errorf("failed to compile new CEL expression %q: %w", newExpression, issues.Err())
+// convertAllocRequestToCEL converts an allocation request to a CEL expression
+func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
+	if req == nil {
+		return "", nil
 	}
 
-	program, err := f.env.Program(ast)
-	if err != nil {
-		return fmt.Errorf("failed to create new CEL program: %w", err)
+	var conditions []string
+
+	// Add custom CEL expression if provided by user
+	if req.CELFilterExpression != "" {
+		conditions = append(conditions, req.CELFilterExpression)
+	}
+
+	// Add GPU phase condition (must be Ready)
+	conditions = append(conditions, "gpu.phase == 'Ready'")
+
+	// Add GPU model filter if specified
+	if req.GPUModel != "" {
+		conditions = append(conditions, fmt.Sprintf("gpu.gpuModel == '%s'", req.GPUModel))
 	}
 
-	f.expression = newExpression
-	f.program = program
-	return nil
+	// If no conditions, return empty expression (no filtering)
+	if len(conditions) == 0 {
+		return "", nil
+	}
+
+	// Combine all conditions with AND
+	if len(conditions) == 1 {
+		return conditions[0], nil
+	}
+
+	expression := conditions[0]
+	for i := 1; i < len(conditions); i++ {
+		expression += " && " + conditions[i]
+	}
+
+	return expression, nil
 }
 
 // createCELEnvironment creates a CEL environment with GPU-related variables and functions
@@ -130,14 +191,6 @@ func createCELVariables(gpu tfv1.GPU, workerPodKey tfv1.NameNamespace) map[strin
 		GPUFieldAnnotations: gpu.Annotations,
 	}
 
-	// Add capacity information if available
-	if gpu.Status.Capacity != nil {
-		gpuMap[GPUFieldCapacity] = map[string]interface{}{
-			ResourceFieldTFlops: gpu.Status.Capacity.Tflops.AsApproximateFloat64(),
-			ResourceFieldVRAM:   gpu.Status.Capacity.Vram.AsApproximateFloat64(),
-		}
-	}
-
 	// Add available information if available
 	if gpu.Status.Available != nil {
 		gpuMap[GPUFieldAvailable] = map[string]interface{}{
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
new file mode 100644
index 00000000..8894db07
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -0,0 +1,288 @@
+package cel_filter
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
+)
+
+// Benchmark performance of the CEL filter compared to the original filter
+func BenchmarkFilterPerformance(b *testing.B) {
+	// Create test data
+	const numGPUs = 1000
+	gpus := make([]*tfv1.GPU, numGPUs)
+	for i := 0; i < numGPUs; i++ {
+		gpuModel := "A100"
+		if i%3 == 0 {
+			gpuModel = "V100"
+		} else if i%3 == 1 {
+			gpuModel = "H100"
+		}
+
+		phase := "Ready"
+		if i%10 == 0 {
+			phase = "Pending"
+		}
+
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", gpuModel, phase, 150.0, 40.0)
+		gpu.Labels["environment"] = "production"
+		if i%2 == 0 {
+			gpu.Labels["tier"] = "high-performance"
+		}
+		gpus[i] = gpu
+	}
+
+	workerPodKey := tfv1.NameNamespace{Name: "worker-pod", Namespace: "default"}
+	ctx := context.Background()
+
+	// Benchmark original filter combination (Phase + GPUModel)
+	b.Run("OriginalFilters", func(b *testing.B) {
+		// Import the original filter package
+		registry := filter.NewFilterRegistry().With(
+			filter.NewPhaseFilter("Ready"),
+			filter.NewGPUModelFilter("A100"),
+		)
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, _, err := registry.Apply(ctx, workerPodKey, gpus, false)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
+	// Benchmark CEL filter - basic filtering
+	b.Run("CELFilter_Basic", func(b *testing.B) {
+		request := createTestAllocRequest("default", "test-workload", "A100", "")
+		cache, err := NewExpressionCache(100, 5*time.Minute)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		celFilter, err := NewCELFilter(request, cache)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, err := celFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
+	// Benchmark CEL filter - complex expression
+	b.Run("CELFilter_Complex", func(b *testing.B) {
+		request := createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'")
+		cache, err := NewExpressionCache(100, 5*time.Minute)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		celFilter, err := NewCELFilter(request, cache)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, err := celFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
+	// Benchmark CEL filter with cache miss (different expressions each time)
+	b.Run("CELFilter_CacheMiss", func(b *testing.B) {
+		cache, err := NewExpressionCache(5, 5*time.Minute) // Small cache to force misses
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		expressions := []string{
+			"gpu.gpuModel == 'A100' && gpu.available.tflops > 100.0",
+			"gpu.gpuModel == 'V100' && gpu.available.tflops > 80.0",
+			"gpu.gpuModel == 'H100' && gpu.available.tflops > 180.0",
+			"gpu.labels['environment'] == 'production'",
+			"gpu.labels['tier'] == 'high-performance'",
+			"gpu.available.vram > 30000000000",
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			expression := expressions[i%len(expressions)]
+			request := createTestAllocRequest("default", "test-workload", "", expression)
+
+			celFilter, err := NewCELFilter(request, cache)
+			if err != nil {
+				b.Fatal(err)
+			}
+
+			filteredGPUs, err := celFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
+	// Print performance comparison report after benchmarks
+	printPerformanceComparison(b)
+}
+
+// Benchmark cache performance
+func BenchmarkCachePerformance(b *testing.B) {
+	cache, err := NewExpressionCache(100, 5*time.Minute)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	expression := "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0"
+
+	b.Run("CacheHit", func(b *testing.B) {
+		// Pre-warm cache
+		_, err := cache.GetOrCompileProgram(expression)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			_, err := cache.GetOrCompileProgram(expression)
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+
+	b.Run("CacheMiss", func(b *testing.B) {
+		expressions := make([]string, b.N)
+		for i := 0; i < b.N; i++ {
+			expressions[i] = fmt.Sprintf("gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			_, err := cache.GetOrCompileProgram(expressions[i])
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}
+
+// Benchmark expression complexity impact
+func BenchmarkExpressionComplexity(b *testing.B) {
+	const numGPUs = 100
+	gpus := make([]*tfv1.GPU, numGPUs)
+	for i := 0; i < numGPUs; i++ {
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", "A100", "Ready", 150.0, 40.0)
+		gpu.Labels["environment"] = "production"
+		gpu.Labels["tier"] = "high-performance"
+		gpu.Annotations["priority"] = "critical"
+		gpus[i] = gpu
+	}
+
+	workerPodKey := tfv1.NameNamespace{Name: "worker-pod", Namespace: "default"}
+	ctx := context.Background()
+
+	testCases := []struct {
+		name       string
+		expression string
+	}{
+		{
+			name:       "Simple",
+			expression: "gpu.phase == 'Ready'",
+		},
+		{
+			name:       "Medium",
+			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100'",
+		},
+		{
+			name:       "Complex",
+			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0",
+		},
+		{
+			name:       "VeryComplex",
+			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'",
+		},
+		{
+			name:       "UltraComplex",
+			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production' && gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'",
+		},
+	}
+
+	for _, tc := range testCases {
+		b.Run(tc.name, func(b *testing.B) {
+			cache, err := NewExpressionCache(100, 5*time.Minute)
+			if err != nil {
+				b.Fatal(err)
+			}
+
+			request := createTestAllocRequest("default", "test-workload", "", tc.expression)
+			celFilter, err := NewCELFilter(request, cache)
+			if err != nil {
+				b.Fatal(err)
+			}
+
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				_, err := celFilter.Filter(ctx, workerPodKey, gpus)
+				if err != nil {
+					b.Fatal(err)
+				}
+			}
+		})
+	}
+}
+
+// Performance comparison report function
+func printPerformanceComparison(b *testing.B) {
+	b.Helper()
+	b.Logf(`
+=== GPU Filter Performance Comparison ===
+
+Test Environment:
+- Number of GPUs: 1000
+- GPU Models: A100 (33%%), V100 (33%%), H100 (33%%)
+- GPU Phases: Ready (90%%), Pending (10%%)
+
+Expected Results:
+1. Original Filters: Fastest for simple conditions (direct field comparison)
+2. CEL Filter Basic: Slower than original due to expression evaluation overhead
+3. CEL Filter Complex: Similar to basic, cached compilation helps
+4. CEL Filter Cache Miss: Slowest due to compilation overhead
+
+Performance Analysis:
+- Original Filters: ~8,000 ns/op (optimized for static conditions)
+- CEL Filters: ~4,000,000 ns/op (runtime flexibility cost)
+- Cache Hit: ~350 ns/op (extremely fast cached access)
+- Cache Miss: ~47,000 ns/op (compilation overhead)
+
+Benefits Analysis:
+- Original Filters: 
+  * Pros: Fast, type-safe, compile-time validation
+  * Cons: Limited flexibility, requires code changes for new conditions
+  
+- CEL Filters:
+  * Pros: Runtime flexibility, powerful expressions, user-configurable
+  * Cons: Runtime compilation overhead, expression evaluation cost
+
+Recommendation:
+- Use Original Filters for well-defined, static conditions
+- Use CEL Filters for dynamic, user-configurable filtering requirements
+- Consider hybrid approach: Original filters for basic filtering + CEL for advanced conditions
+- Always use expression caching in production environments
+`)
+}
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
new file mode 100644
index 00000000..c21e4ee8
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
@@ -0,0 +1,368 @@
+package cel_filter
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// Helper functions for creating test data
+func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64) *tfv1.GPU {
+	gpu := &tfv1.GPU{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:        name,
+			Namespace:   namespace,
+			Labels:      make(map[string]string),
+			Annotations: make(map[string]string),
+		},
+		Status: tfv1.GPUStatus{
+			GPUModel: gpuModel,
+			UUID:     "test-uuid-" + name,
+			Phase:    tfv1.TensorFusionGPUPhase(phase),
+			Message:  "Test GPU",
+		},
+	}
+
+	// Set available resources
+	if tflops > 0 || vram > 0 {
+		gpu.Status.Available = &tfv1.Resource{
+			Tflops: *resource.NewMilliQuantity(int64(tflops*1000), resource.DecimalSI),
+			Vram:   *resource.NewQuantity(int64(vram*1024*1024*1024), resource.BinarySI),
+		}
+	}
+
+	return gpu
+}
+
+func createTestAllocRequest(namespace, name, gpuModel, celExpression string) *tfv1.AllocRequest {
+	return &tfv1.AllocRequest{
+		WorkloadNameNamespace: tfv1.NameNamespace{
+			Name:      name,
+			Namespace: namespace,
+		},
+		GPUModel:            gpuModel,
+		CELFilterExpression: celExpression,
+		Count:               1,
+	}
+}
+
+// Test normal cases of CEL filter (including basic filtering, custom expression, labels/annotations, etc.)
+func TestCELFilter_NormalCases(t *testing.T) {
+	ctx := context.Background()
+
+	tests := []struct {
+		name          string
+		request       *tfv1.AllocRequest
+		gpus          []*tfv1.GPU
+		expectedCount int
+		description   string
+	}{
+		{
+			name:    "filter by GPU model",
+			request: createTestAllocRequest("default", "test-workload", "A100", ""),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
+				createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0),
+			},
+			expectedCount: 2,
+			description:   "Should filter GPUs matching the specified model A100",
+		},
+		{
+			name:    "filter by GPU phase only",
+			request: createTestAllocRequest("default", "test-workload", "", ""),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "A100", "Pending", 150.0, 40.0),
+				createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-4", "default", "A100", "Failed", 150.0, 40.0),
+			},
+			expectedCount: 2,
+			description:   "Should only return GPUs in Ready phase",
+		},
+		{
+			name:    "custom CEL expression - filter by available TFLOPS",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.tflops > 120.0"),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
+				createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0),
+			},
+			expectedCount: 2,
+			description:   "Should filter GPUs with TFLOPS > 120 and Ready phase",
+		},
+		{
+			name:    "custom CEL expression - filter by available VRAM",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.vram > 35000000000"), // > 35GB in bytes
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), // 40GB
+				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), // 32GB
+				createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0), // 80GB
+			},
+			expectedCount: 2,
+			description:   "Should filter GPUs with VRAM > 35GB and Ready phase",
+		},
+		{
+			name:    "combined model and custom CEL expression",
+			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0"),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0),
+				createTestGPU("gpu-3", "default", "V100", "Ready", 160.0, 32.0),
+				createTestGPU("gpu-4", "default", "A100", "Ready", 180.0, 40.0),
+			},
+			expectedCount: 2,
+			description:   "Should filter A100 GPUs with TFLOPS >= 150 and Ready phase",
+		},
+		{
+			name:    "filter by labels",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['environment'] == 'production'"),
+			gpus: func() []*tfv1.GPU {
+				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1.Labels["environment"] = "production"
+				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu2.Labels["environment"] = "development"
+				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3.Labels["environment"] = "production"
+				return []*tfv1.GPU{gpu1, gpu2, gpu3}
+			}(),
+			expectedCount: 2,
+			description:   "Should filter GPUs with environment=production label",
+		},
+		{
+			name:    "filter by annotations",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.annotations['priority'] == 'critical'"),
+			gpus: func() []*tfv1.GPU {
+				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1.Annotations["priority"] = "critical"
+				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu2.Annotations["priority"] = "low"
+				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3.Annotations["priority"] = "critical"
+				return []*tfv1.GPU{gpu1, gpu2, gpu3}
+			}(),
+			expectedCount: 2,
+			description:   "Should filter GPUs with priority=critical annotation",
+		},
+		{
+			name:    "combined labels and annotations filter",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"),
+			gpus: func() []*tfv1.GPU {
+				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1.Labels["tier"] = "high-performance"
+				gpu1.Annotations["priority"] = "critical"
+				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu2.Labels["tier"] = "standard"
+				gpu2.Annotations["priority"] = "critical"
+				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3.Labels["tier"] = "high-performance"
+				gpu3.Annotations["priority"] = "low"
+				return []*tfv1.GPU{gpu1, gpu2, gpu3}
+			}(),
+			expectedCount: 1,
+			description:   "Should filter GPUs matching both label and annotation conditions",
+		},
+		{
+			name:          "empty GPU list",
+			request:       createTestAllocRequest("default", "test-workload", "A100", ""),
+			gpus:          []*tfv1.GPU{},
+			expectedCount: 0,
+			description:   "Should handle empty GPU list gracefully",
+		},
+		{
+			name:    "complex combined expression with model, resources, and metadata",
+			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"),
+			gpus: func() []*tfv1.GPU {
+				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 180.0, 40.0)
+				gpu1.Labels["environment"] = "production"
+				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0)
+				gpu2.Labels["environment"] = "production"
+				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 200.0, 40.0)
+				gpu3.Labels["environment"] = "development"
+				return []*tfv1.GPU{gpu1, gpu2, gpu3}
+			}(),
+			expectedCount: 1,
+			description:   "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Ready phase",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create cache and CEL filter
+			cache, err := NewExpressionCache(10, 5*time.Minute)
+			require.NoError(t, err, "Failed to create expression cache")
+
+			celFilter, err := NewCELFilter(tt.request, cache)
+			require.NoError(t, err, "Failed to create CEL filter")
+
+			// Execute filter
+			workerPodKey := tfv1.NameNamespace{Name: "worker-pod", Namespace: "default"}
+			filteredGPUs, err := celFilter.Filter(ctx, workerPodKey, tt.gpus)
+
+			// Verify results
+			require.NoError(t, err, "Filter execution should not fail")
+			assert.Len(t, filteredGPUs, tt.expectedCount, tt.description)
+
+			// Verify filter name
+			assert.Contains(t, celFilter.Name(), "AllocRequest-")
+			assert.Contains(t, celFilter.Name(), tt.request.WorkloadNameNamespace.String())
+		})
+	}
+}
+
+// Test edge and exception cases of CEL filter
+func TestCELFilter_EdgeAndExceptionCases(t *testing.T) {
+	ctx := context.Background()
+
+	// Test CEL expressions with various edge cases (compilation + execution)
+	t.Run("CEL expressions edge cases", func(t *testing.T) {
+		// Test GPUs for execution
+		testGPUs := []*tfv1.GPU{
+			createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
+			createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
+		}
+		// Add GPU with nil resources
+		gpuWithNilResources := createTestGPU("gpu-nil", "default", "A100", "Ready", 0, 0)
+		gpuWithNilResources.Status.Available = nil
+		testGPUs = append(testGPUs, gpuWithNilResources)
+
+		workerPodKey := tfv1.NameNamespace{Name: "worker-pod", Namespace: "default"}
+
+		edgeCases := []struct {
+			name          string
+			expression    string
+			shouldFail    bool // Whether compilation/creation should fail
+			expectedCount int  // Expected GPU count if execution succeeds
+			description   string
+		}{
+			// Compilation failures
+			{
+				name:        "syntax error - missing quotes",
+				expression:  "gpu.gpuModel == A100",
+				shouldFail:  true,
+				description: "Missing quotes should cause compilation error",
+			},
+			{
+				name:        "syntax error - invalid operator",
+				expression:  "gpu.phase === 'Ready'",
+				shouldFail:  true,
+				description: "Invalid operator should cause compilation error",
+			},
+			{
+				name:        "undefined variable",
+				expression:  "jdwquygfewqndwql",
+				shouldFail:  true,
+				description: "Undefined variable should fail when combined with other conditions",
+			},
+			{
+				name:        "whitespace only expression",
+				expression:  "   ",
+				shouldFail:  true,
+				description: "Whitespace-only expression should fail",
+			},
+
+			// Compilation success but runtime behavior testing
+			{
+				name:          "empty expression",
+				expression:    "",
+				shouldFail:    false,
+				expectedCount: 3, // All Ready GPUs pass
+				description:   "Empty expression should work (no additional filtering)",
+			},
+			{
+				name:          "logically contradictory expression",
+				expression:    "gpu.phase > 100 && gpu.phase < 100",
+				shouldFail:    false,
+				expectedCount: 0, // No GPUs pass impossible condition
+				description:   "Contradictory logic should compile but filter out all GPUs",
+			},
+			{
+				name:          "type mismatch comparison",
+				expression:    "gpu.phase == 123",
+				shouldFail:    false,
+				expectedCount: 0, // No GPUs pass type mismatch
+				description:   "Type mismatch should return false for all GPUs",
+			},
+			{
+				name:          "undefined nested field access",
+				expression:    "gpu.nonexistent.field == 'value'",
+				shouldFail:    false,
+				expectedCount: 0, // No GPUs pass undefined field check
+				description:   "Undefined nested field should return false (fail-safe)",
+			},
+			{
+				name:          "numeric comparison on string",
+				expression:    "gpu.gpuModel > 50",
+				shouldFail:    false,
+				expectedCount: 0, // No GPUs pass invalid comparison
+				description:   "Invalid type comparison should return false",
+			},
+			{
+				name:          "null field access",
+				expression:    "gpu.available.tflops > 100",
+				shouldFail:    false,
+				expectedCount: 1, // Only A100 with 150 TFLOPS passes (V100=100, nil=fails)
+				description:   "Null field access should be handled gracefully",
+			},
+			{
+				name:          "conditional null handling",
+				expression:    "has(gpu.available) ? gpu.available.tflops > 120 : false",
+				shouldFail:    false,
+				expectedCount: 1, // Only A100 with 150 TFLOPS
+				description:   "Conditional expressions should handle nulls correctly",
+			},
+			{
+				name:          "always true expression",
+				expression:    "true",
+				shouldFail:    false,
+				expectedCount: 3, // All Ready GPUs pass
+				description:   "Tautology should pass all Ready phase GPUs",
+			},
+			{
+				name:          "always false expression",
+				expression:    "false",
+				shouldFail:    false,
+				expectedCount: 0, // No GPUs pass
+				description:   "Contradiction should filter out all GPUs",
+			},
+		}
+
+		for _, tt := range edgeCases {
+			t.Run(tt.name, func(t *testing.T) {
+				cache, err := NewExpressionCache(10, 5*time.Minute)
+				require.NoError(t, err)
+
+				request := createTestAllocRequest("default", "test-workload", "", tt.expression)
+				celFilter, err := NewCELFilter(request, cache)
+
+				if tt.shouldFail {
+					// Should fail at creation or execution
+					if err != nil {
+						t.Logf("✅ Expected compilation failure: %v", err)
+						return
+					}
+
+					// If creation succeeded, should fail at execution
+					_, err = celFilter.Filter(ctx, workerPodKey, testGPUs)
+					assert.Error(t, err, "Should fail during execution: %s", tt.description)
+					t.Logf("✅ Expected execution failure: %v", err)
+				} else {
+					// Should succeed in both creation and execution
+					require.NoError(t, err, "Filter creation should succeed: %s", tt.description)
+
+					filteredGPUs, err := celFilter.Filter(ctx, workerPodKey, testGPUs)
+					require.NoError(t, err, "Filter execution should succeed: %s", tt.description)
+
+					assert.Len(t, filteredGPUs, tt.expectedCount, tt.description)
+					t.Logf("✅ Expression '%s': %d/%d GPUs filtered", tt.expression, len(filteredGPUs), len(testGPUs))
+				}
+			})
+		}
+	})
+}
diff --git a/internal/gpuallocator/filter/cel_filter/constants.go b/internal/gpuallocator/filter/cel_filter/constants.go
index 7ea0cc85..2e43ab74 100644
--- a/internal/gpuallocator/filter/cel_filter/constants.go
+++ b/internal/gpuallocator/filter/cel_filter/constants.go
@@ -24,7 +24,6 @@ const (
 	GPUFieldAnnotations = "annotations"
 
 	// Resource information
-	GPUFieldCapacity     = "capacity"
 	GPUFieldAvailable    = "available"
 	GPUFieldNodeSelector = "nodeSelector"
 	GPUFieldRunningApps  = "runningApps"
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index c1ce771b..44deb3c4 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -168,14 +168,14 @@ func (s *GpuAllocator) Filter(
 ) ([]*tfv1.GPU, []filter.FilterDetail, error) {
 
 	// Check if CEL filtering is enabled via config/flag
-	useCELFilter := config.GetGlobalConfig().EnableCELFilter
+	disableCELFilter := req.DisableCELFilter
 
-	if useCELFilter {
-		// New CEL-based filtering approach
-		return s.applyCELFilter(req, toFilterGPUs, isSimulateSchedule)
-	} else {
-		// Legacy filter approach (for rollback support)
+	if disableCELFilter {
+		// Legacy filter approach
 		return s.applyLegacyFilters(req, toFilterGPUs, isSimulateSchedule)
+	} else {
+		// CEL filter approach
+		return s.applyCELFilter(req, toFilterGPUs, isSimulateSchedule)
 	}
 }
 
@@ -191,7 +191,7 @@ func (s *GpuAllocator) applyCELFilter(
 		return nil, nil, fmt.Errorf("failed to create CEL cache: %w", err)
 	}
 
-	allocCELFilter, err := cel_filter.NewAllocRequestCELFilter(req, cache)
+	allocCELFilter, err := cel_filter.NewCELFilter(req, cache)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to create AllocRequest CEL filter: %w", err)
 	}
@@ -1339,11 +1339,24 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 		return &tfv1.AllocRequest{}, "gpu count annotation is too large", nil
 	}
 
+	disableCELFilter := false
+	if disabledFeatures, exists := pod.Annotations[constants.DisableFeaturesAnnotation]; exists {
+		disabledFeaturesList := strings.Split(disabledFeatures, ",")
+		for _, feature := range disabledFeaturesList {
+			if feature == constants.BuiltInFeatureCELFilter {
+				disableCELFilter = true
+			}
+		}
+	}
+
 	allocRequest := tfv1.AllocRequest{
 		PoolName: pod.Annotations[constants.GpuPoolKey],
 		Request:  gpuRequestResource,
 		Limit:    gpuLimitResource,
 
+		DisableCELFilter:    disableCELFilter,
+		CELFilterExpression: pod.Annotations[constants.CELFilterExpressionAnnotation],
+
 		Count:    uint(count),
 		GPUModel: pod.Annotations[constants.GPUModelAnnotation],
 		WorkloadNameNamespace: tfv1.NameNamespace{

From 69788075cb1a2e3299743a618b6b3d7616f4d405 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 31 Aug 2025 00:39:56 -0700
Subject: [PATCH 04/34] remove deperate config

---
 config/samples/cel_filter_example.yaml | 74 --------------------------
 1 file changed, 74 deletions(-)
 delete mode 100644 config/samples/cel_filter_example.yaml

diff --git a/config/samples/cel_filter_example.yaml b/config/samples/cel_filter_example.yaml
deleted file mode 100644
index aaf4895e..00000000
--- a/config/samples/cel_filter_example.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-apiVersion: tensor-fusion.ai/v1
-kind: SchedulingConfigTemplate
-metadata:
-  name: cel-filter-example
-spec:
-  placement:
-    mode: CompactFirst
-    allowUsingLocalGPU: true
-    
-    # Traditional GPU filters (still supported)
-    gpuFilters:
-    - type: avoidTooMuchConnectionsOnSameGPU
-      params:
-        connectionNum: 150
-    
-    # CEL-based filters for advanced filtering logic
-    celFilters:
-    # High priority filter: only use running GPUs
-    - name: "only-running-gpus"
-      expression: "gpu.phase == 'Running'"
-      priority: 100
-      
-    # Medium-high priority: ensure sufficient resources available
-    - name: "sufficient-resources"
-      expression: "gpu.available.tflops >= 0.5 && gpu.available.vram >= 4096000000"
-      priority: 90
-      
-    # Medium priority: prefer premium tier GPUs
-    - name: "prefer-premium-gpus"
-      expression: "gpu.labels != null && 'gpu-tier' in gpu.labels && gpu.labels['gpu-tier'] == 'premium'"
-      priority: 80
-      
-    # Lower priority: avoid overloaded GPUs
-    - name: "avoid-overloaded-gpus"
-      expression: "size(gpu.runningApps) < 3"
-      priority: 70
-      
-    # GPU model specific filters
-    - name: "nvidia-only"
-      expression: "gpu.gpuModel.startsWith('NVIDIA')"
-      priority: 60
-      
-    # Complex condition example
-    - name: "complex-filter"
-      expression: |
-        gpu.phase == 'Running' && 
-        gpu.available.tflops > 0.3 &&
-        (
-          (gpu.labels != null && 'workload-type' in gpu.labels && gpu.labels['workload-type'] == 'training') ||
-          (size(gpu.runningApps) == 0)
-        )
-      priority: 50
-
-  # Optional: AutoScaling configuration
-  autoScaling:
-    autoSetLimits:
-      enable: true
-      targetResource: "all"
-      evaluationPeriod: "5m"
-      extraTFlopsBufferRatio: "0.1"
-
----
-apiVersion: tensor-fusion.ai/v1
-kind: SchedulingConfigTemplate
-metadata:
-  name: simple-cel-example
-spec:
-  placement:
-    mode: LowLoadFirst
-    celFilters:
-    # Simple example: only use GPUs with more than 50% TFlops available
-    - name: "high-availability"
-      expression: "gpu.available.tflops > gpu.capacity.tflops * 0.5"
-      priority: 100
\ No newline at end of file

From d3c112afa06fdac3125351f6021de18a7be3c720 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 31 Aug 2025 00:40:36 -0700
Subject: [PATCH 05/34] remove docs

---
 docs/cel-filters.md | 264 --------------------------------------------
 1 file changed, 264 deletions(-)
 delete mode 100644 docs/cel-filters.md

diff --git a/docs/cel-filters.md b/docs/cel-filters.md
deleted file mode 100644
index 590e1d90..00000000
--- a/docs/cel-filters.md
+++ /dev/null
@@ -1,264 +0,0 @@
-# CEL Filters for GPU Allocation
-
-CEL (Common Expression Language) filters provide a powerful and flexible way to define custom GPU filtering logic in TensorFusion. This feature allows you to write expressions that determine which GPUs are eligible for allocation based on various criteria.
-
-## Overview
-
-CEL filters are defined in the `SchedulingConfigTemplate` resource and are applied during the GPU allocation process. They work alongside traditional GPU filters and provide more sophisticated filtering capabilities.
-
-## Configuration
-
-CEL filters are configured in the `placement.celFilters` field of a `SchedulingConfigTemplate`:
-
-```yaml
-apiVersion: tensor-fusion.ai/v1
-kind: SchedulingConfigTemplate
-metadata:
-  name: my-template
-spec:
-  placement:
-    celFilters:
-    - name: "filter-name"
-      expression: "gpu.phase == 'Running'"
-      priority: 100
-```
-
-### Fields
-
-- `name` (optional): A descriptive name for the filter, used for logging and debugging
-- `expression` (required): The CEL expression that returns a boolean value
-- `priority` (optional, default: 0): Higher priority filters are applied first
-
-## Available Variables
-
-CEL expressions have access to the following variables:
-
-### `gpu` Object
-
-The `gpu` variable contains information about the GPU being evaluated:
-
-```javascript
-{
-  "name": "gpu-1",           // GPU name
-  "namespace": "default",     // GPU namespace
-  "gpuModel": "NVIDIA A100",  // GPU model
-  "uuid": "gpu-uuid",         // GPU UUID
-  "phase": "Running",         // GPU phase (Running, Pending, etc.)
-  "usedBy": "tensor-fusion",  // Usage system
-  "labels": {...},           // Kubernetes labels
-  "annotations": {...},      // Kubernetes annotations
-  "capacity": {              // Total GPU capacity
-    "tflops": 1.5,
-    "vram": 85899345920      // in bytes
-  },
-  "available": {             // Available GPU resources
-    "tflops": 1.0,
-    "vram": 64424509440      // in bytes
-  },
-  "nodeSelector": {...},     // Node selector information
-  "runningApps": [           // Currently running applications
-    {
-      "name": "app-1",
-      "namespace": "default",
-      "count": 1
-    }
-  ]
-}
-```
-
-### `workerPodKey` Object
-
-Information about the requesting worker pod:
-
-```javascript
-{
-  "name": "worker-pod",
-  "namespace": "default"
-}
-```
-
-## Expression Examples
-
-### Basic Filtering
-
-```yaml
-# Only use running GPUs
-- name: "running-only"
-  expression: "gpu.phase == 'Running'"
-  priority: 100
-
-# Filter by GPU model
-- name: "nvidia-only"
-  expression: "gpu.gpuModel.startsWith('NVIDIA')"
-  priority: 90
-
-# Ensure minimum resources available
-- name: "min-resources"
-  expression: "gpu.available.tflops >= 0.5 && gpu.available.vram >= 4294967296"
-  priority: 80
-```
-
-### Label-Based Filtering
-
-```yaml
-# Filter by labels
-- name: "premium-tier"
-  expression: "gpu.labels != null && 'gpu-tier' in gpu.labels && gpu.labels['gpu-tier'] == 'premium'"
-  priority: 70
-
-# Multiple label conditions
-- name: "training-gpus"
-  expression: |
-    gpu.labels != null && 
-    'workload-type' in gpu.labels && 
-    gpu.labels['workload-type'] == 'training' &&
-    'zone' in gpu.labels && 
-    gpu.labels['zone'].startsWith('us-west')
-  priority: 60
-```
-
-### Resource-Based Filtering
-
-```yaml
-# Percentage of available resources
-- name: "high-availability"
-  expression: "gpu.available.tflops > gpu.capacity.tflops * 0.7"
-  priority: 80
-
-# Avoid overloaded GPUs
-- name: "load-balancing"
-  expression: "size(gpu.runningApps) < 3"
-  priority: 50
-
-# Memory-intensive workloads
-- name: "high-memory"
-  expression: "gpu.available.vram > 34359738368"  # > 32GB
-  priority: 60
-```
-
-### Complex Conditions
-
-```yaml
-# Complex multi-criteria filter
-- name: "complex-filter"
-  expression: |
-    gpu.phase == 'Running' && 
-    gpu.gpuModel.contains('A100') &&
-    gpu.available.tflops > 0.8 &&
-    (
-      size(gpu.runningApps) == 0 ||
-      (size(gpu.runningApps) < 2 && gpu.available.vram > 42949672960)
-    )
-  priority: 90
-```
-
-## CEL Language Features
-
-CEL supports many built-in functions and operators:
-
-### String Operations
-- `startsWith()`, `endsWith()`, `contains()`
-- String concatenation with `+`
-- Regular expressions with `matches()`
-
-### Numeric Operations
-- Standard arithmetic operators: `+`, `-`, `*`, `/`, `%`
-- Comparison operators: `>`, `>=`, `<`, `<=`, `==`, `!=`
-
-### Logical Operations
-- `&&` (and), `||` (or), `!` (not)
-
-### Collection Operations
-- `size()` - get collection size
-- `in` operator - check membership
-- List/map access with `[]`
-
-### Conditional Expressions
-- Ternary operator: `condition ? true_value : false_value`
-
-## Best Practices
-
-### Performance
-1. **Order by Priority**: Place most restrictive filters first (highest priority)
-2. **Avoid Complex Expressions**: Keep expressions simple for better performance
-3. **Cache-Friendly**: Use consistent filter logic to benefit from any caching
-
-### Reliability
-1. **Null Checks**: Always check for null values when accessing optional fields
-2. **Fail-Safe Logic**: Design expressions to exclude GPUs on error rather than include them
-3. **Test Thoroughly**: Test expressions with various GPU configurations
-
-### Maintainability
-1. **Descriptive Names**: Use clear, descriptive names for filters
-2. **Comments**: Add comments for complex expressions
-3. **Modular Design**: Break complex logic into multiple simpler filters
-
-## Example Complete Configuration
-
-```yaml
-apiVersion: tensor-fusion.ai/v1
-kind: SchedulingConfigTemplate
-metadata:
-  name: production-gpu-scheduling
-spec:
-  placement:
-    mode: CompactFirst
-    
-    # Traditional filters (still supported)
-    gpuFilters:
-    - type: avoidTooMuchConnectionsOnSameGPU
-      params:
-        connectionNum: 100
-    
-    # CEL filters for advanced logic
-    celFilters:
-    # Critical filters (high priority)
-    - name: "operational-gpus-only"
-      expression: "gpu.phase == 'Running' && gpu.usedBy == 'tensor-fusion'"
-      priority: 100
-      
-    - name: "sufficient-resources"
-      expression: "gpu.available.tflops >= 0.3 && gpu.available.vram >= 2147483648"
-      priority: 95
-      
-    # Preference filters (medium priority)
-    - name: "prefer-nvidia"
-      expression: "gpu.gpuModel.startsWith('NVIDIA')"
-      priority: 80
-      
-    - name: "balanced-load"
-      expression: "size(gpu.runningApps) < 2"
-      priority: 70
-      
-    # Quality filters (lower priority)
-    - name: "premium-hardware"
-      expression: |
-        gpu.labels != null && 
-        'gpu-tier' in gpu.labels && 
-        gpu.labels['gpu-tier'] in ['premium', 'high-performance']
-      priority: 50
-```
-
-## Troubleshooting
-
-### Common Issues
-
-1. **Expression Compilation Errors**: Check syntax and ensure all referenced fields exist
-2. **Runtime Errors**: Add null checks for optional fields
-3. **No GPUs Selected**: Verify that at least some GPUs meet all filter criteria
-4. **Performance Issues**: Simplify complex expressions or reduce the number of filters
-
-### Debugging
-
-Enable debug logging to see detailed information about filter execution:
-
-```yaml
-# In your logging configuration
-logLevel: debug
-```
-
-Look for log entries containing "CEL filter applied" to see filtering results.
-
-## Migration from Traditional Filters
-
-CEL filters can be used alongside traditional GPU filters. They are applied after traditional filters in the filtering pipeline. You can gradually migrate complex traditional filters to CEL expressions for better maintainability.
\ No newline at end of file

From d466cdaa133ee29f8d14441dbbc9c83f08c0914a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 3 Sep 2025 03:19:23 +0000
Subject: [PATCH 06/34] chore(deps): bump golang from 1.24 to 1.25 in
 /dockerfile (#325)

---
 dockerfile/node-discovery.Dockerfile | 2 +-
 dockerfile/operator.Dockerfile       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dockerfile/node-discovery.Dockerfile b/dockerfile/node-discovery.Dockerfile
index 3fae02bd..09ac6741 100644
--- a/dockerfile/node-discovery.Dockerfile
+++ b/dockerfile/node-discovery.Dockerfile
@@ -1,5 +1,5 @@
 # Build the manager binary
-FROM golang:1.24 AS builder
+FROM golang:1.25 AS builder
 ARG TARGETOS
 ARG TARGETARCH
 
diff --git a/dockerfile/operator.Dockerfile b/dockerfile/operator.Dockerfile
index fc76900c..65dfd514 100644
--- a/dockerfile/operator.Dockerfile
+++ b/dockerfile/operator.Dockerfile
@@ -1,5 +1,5 @@
 # Build the manager binary
-FROM golang:1.24 AS builder
+FROM golang:1.25 AS builder
 ARG TARGETOS
 ARG TARGETARCH
 ARG GO_LDFLAGS

From 8bd5e89b18d54ead703af8793c7c2999d2ce58c3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 3 Sep 2025 03:19:48 +0000
Subject: [PATCH 07/34] chore(deps): bump cycjimmy/semantic-release-action from
 4 to 5 (#338)

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index eeecf7e9..8dafe17b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
       - name: Semantic Release
         if: github.event_name == 'push'
         id: semantic
-        uses: cycjimmy/semantic-release-action@v4
+        uses: cycjimmy/semantic-release-action@v5
         with:
           extra_plugins: |
             @semantic-release/release-notes-generator@^10

From 67b1c6401d8f2d43555f8831817cf842b679d29d Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Wed, 3 Sep 2025 19:11:59 +0800
Subject: [PATCH 08/34] fix: helm chart issue (#346)

---
 charts/tensor-fusion/Chart.yaml         | 2 +-
 charts/tensor-fusion/values.schema.json | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
index 59de69d1..c72e6082 100644
--- a/charts/tensor-fusion/Chart.yaml
+++ b/charts/tensor-fusion/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.5.5
+version: 1.5.6
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/tensor-fusion/values.schema.json b/charts/tensor-fusion/values.schema.json
index 2c193cfd..67c74939 100644
--- a/charts/tensor-fusion/values.schema.json
+++ b/charts/tensor-fusion/values.schema.json
@@ -470,9 +470,9 @@
           "description": "Metrics format, default to 'influx', could be 'json' or 'otel'"
         },
         "metricsExtraPodLabels": {
-          "type": "array",
+          "type": "object",
           "description": "Extra pod labels to be added to metrics",
-          "items": {
+          "additionalProperties": {
             "type": "string"
           }
         },

From dbc088c192b1f283e649a82be1b7ebc7f6466244 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 4 Sep 2025 03:27:42 +0000
Subject: [PATCH 09/34] chore(deps): bump k8s.io/kubernetes (#347)

---
 go.mod | 6 +++---
 go.sum | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/go.mod b/go.mod
index 4d2299db..72d32a0f 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5
 	github.com/gin-contrib/gzip v1.2.3
 	github.com/gin-gonic/gin v1.10.1
+	github.com/go-sql-driver/mysql v1.8.1
 	github.com/influxdata/line-protocol/v2 v2.2.1
 	github.com/lithammer/shortuuid/v4 v4.2.0
 	github.com/mitchellh/mapstructure v1.5.0
@@ -21,6 +22,7 @@ require (
 	github.com/shirou/gopsutil v3.21.11+incompatible
 	github.com/stretchr/testify v1.11.0
 	go.opentelemetry.io/otel v1.37.0
+	go.uber.org/zap v1.27.0
 	golang.org/x/time v0.12.0
 	gomodules.xyz/jsonpatch/v2 v2.5.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
@@ -32,7 +34,7 @@ require (
 	k8s.io/component-base v0.33.3
 	k8s.io/component-helpers v0.33.3
 	k8s.io/klog/v2 v2.130.1
-	k8s.io/kubernetes v1.33.3
+	k8s.io/kubernetes v1.33.4
 	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
 	sigs.k8s.io/controller-runtime v0.21.0
 	sigs.k8s.io/karpenter v1.6.1
@@ -79,7 +81,6 @@ require (
 	github.com/go-playground/locales v0.14.1 // indirect
 	github.com/go-playground/universal-translator v0.18.1 // indirect
 	github.com/go-playground/validator/v10 v10.26.0 // indirect
-	github.com/go-sql-driver/mysql v1.8.1 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/goccy/go-json v0.10.5 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
@@ -139,7 +140,6 @@ require (
 	go.opentelemetry.io/proto/otlp v1.4.0 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
-	go.uber.org/zap v1.27.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
 	golang.org/x/arch v0.15.0 // indirect
 	golang.org/x/crypto v0.39.0 // indirect
diff --git a/go.sum b/go.sum
index e98c785d..0f62b153 100644
--- a/go.sum
+++ b/go.sum
@@ -499,8 +499,8 @@ k8s.io/kube-scheduler v0.32.7 h1:QOvu/fNEYGg1gzzpowWHFCI8SD3vJs5Iz0qebEQADd4=
 k8s.io/kube-scheduler v0.32.7/go.mod h1:ez/2BnvZv2Bq1K9LpBsDgRsTvwJLAzkcpRMfY7rhLMA=
 k8s.io/kubelet v0.33.1 h1:x4LCw1/iZVWOKA4RoITnuB8gMHnw31HPB3S0EF0EexE=
 k8s.io/kubelet v0.33.1/go.mod h1:8WpdC9M95VmsqIdGSQrajXooTfT5otEj8pGWOm+KKfQ=
-k8s.io/kubernetes v1.33.3 h1:dBx5Z2ZhR8kNzAwCoCz4j1niUbUrNUDVxeSj4/Ienu0=
-k8s.io/kubernetes v1.33.3/go.mod h1:nrt8sldmckKz2fCZhgRX3SKfS2e+CzXATPv6ITNkU00=
+k8s.io/kubernetes v1.33.4 h1:T1d5FLUYm3/KyUeV7YJhKTR980zHCHb7K2xhCSo3lE8=
+k8s.io/kubernetes v1.33.4/go.mod h1:nrt8sldmckKz2fCZhgRX3SKfS2e+CzXATPv6ITNkU00=
 k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
 k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=

From 865bdf5d1bb365639d7f4356d76893b5f52cd4c3 Mon Sep 17 00:00:00 2001
From: 0x5457 <0x5457@protonmail.com>
Date: Thu, 4 Sep 2025 12:00:51 +0800
Subject: [PATCH 10/34] fix: Potential fix for code scanning alert no. 36:
 Workflow does not contain permissions (#349)

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 .github/workflows/test-e2e.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml
index 3b4fec3f..42354dfe 100644
--- a/.github/workflows/test-e2e.yml
+++ b/.github/workflows/test-e2e.yml
@@ -1,5 +1,8 @@
 name: E2E Tests
 
+permissions:
+  contents: read
+
 on:
   workflow_dispatch:
 

From 9006e96b172ee0d099d75f2a2a05acd859cad93a Mon Sep 17 00:00:00 2001
From: D <wangqianqianjun@gmail.com>
Date: Thu, 4 Sep 2025 12:01:44 +0800
Subject: [PATCH 11/34] support dedicated-gpu  (#345)

* support dedicated gpus

* support dedicated GPU

* support dedicated GPU

* fix test issue
---
 cmd/main.go                               | 12 +++++--
 internal/cloudprovider/pricing/pricing.go | 44 +++++++++++++++++++----
 internal/constants/constants.go           |  1 +
 internal/metrics/recorder.go              | 38 ++++++++++++++------
 internal/webhook/v1/pod_webhook.go        | 19 +++++-----
 internal/webhook/v1/pod_webhook_test.go   |  5 ++-
 internal/webhook/v1/tf_parser.go          | 35 ++++++++++++++++++
 internal/webhook/v1/webhook_suite_test.go |  5 ++-
 8 files changed, 130 insertions(+), 29 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index 92021131..23cd69b8 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -217,7 +217,9 @@ func main() {
 	// Initialize GPU allocator and set up watches
 	allocator, portAllocator := startTensorFusionAllocators(ctx, mgr)
 
-	startWebhook(mgr, portAllocator)
+	// Create pricing provider for webhook
+	pricingProvider := pricing.NewStaticPricingProvider()
+	startWebhook(mgr, portAllocator, pricingProvider)
 
 	scheduler := startScheduler(ctx, allocator, mgr)
 
@@ -441,11 +443,15 @@ func startCustomResourceController(
 	}
 }
 
-func startWebhook(mgr manager.Manager, portAllocator *portallocator.PortAllocator) {
+func startWebhook(
+	mgr manager.Manager,
+	portAllocator *portallocator.PortAllocator,
+	pricingProvider pricing.PricingProvider,
+) {
 	if os.Getenv(constants.EnableWebhookEnv) == constants.FalseStringValue {
 		return
 	}
-	if err := webhookcorev1.SetupPodWebhookWithManager(mgr, portAllocator); err != nil {
+	if err := webhookcorev1.SetupPodWebhookWithManager(mgr, portAllocator, pricingProvider); err != nil {
 		setupLog.Error(err, "unable to create webhook", "webhook", "Pod")
 		os.Exit(1)
 	}
diff --git a/internal/cloudprovider/pricing/pricing.go b/internal/cloudprovider/pricing/pricing.go
index 33ee529f..e8854583 100644
--- a/internal/cloudprovider/pricing/pricing.go
+++ b/internal/cloudprovider/pricing/pricing.go
@@ -31,6 +31,7 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/types"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"k8s.io/apimachinery/pkg/api/resource"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
 
@@ -39,11 +40,17 @@ const (
 	providerAzure = "azure"
 )
 
+// CompleteGPUInfo combines GpuInfo with VRAM information from instance data
+type CompleteGPUInfo struct {
+	*config.GpuInfo
+	VRAMGigabytes int32
+}
+
 // Global data initialized at package load time
 var (
 	globalAWSGPUInstanceData   map[string]GPUNodeInstanceInfoAndPrice
 	globalAzureGPUInstanceData map[string]GPUNodeInstanceInfoAndPrice
-	tflopsMap                  map[string]*config.GpuInfo
+	tflopsMap                  map[string]*CompleteGPUInfo
 )
 
 var readyCh = make(chan struct{})
@@ -51,8 +58,9 @@ var initOnce sync.Once
 
 // PricingProvider provides pricing information and calculations for instance types
 type PricingProvider interface {
-	GetPricing(instanceType, capacityType tfv1.CapacityTypeEnum) (float64, bool)
-	GetGPUNodeInstanceTypeInfo(region string) ([]string, bool)
+	GetPricing(instanceType string, capacityType tfv1.CapacityTypeEnum, region string) (float64, bool)
+	GetRegionalGPUNodeInstanceTypes(region string) ([]types.GPUNodeInstanceInfo, bool)
+	GetGPUCapacityByModel(gpuModel string) (resource.Quantity, resource.Quantity, bool)
 }
 
 type GPUNodeInstanceInfoAndPrice struct {
@@ -77,7 +85,7 @@ var awsCSV string
 var azureCSV string
 
 func init() {
-	tflopsMap = make(map[string]*config.GpuInfo, 100)
+	tflopsMap = make(map[string]*CompleteGPUInfo, 100)
 }
 
 func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.GpuInfo) {
@@ -86,8 +94,11 @@ func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.G
 		return
 	}
 	for _, gpuInfo := range *gpuInfos {
-		tflopsMap[gpuInfo.FullModelName] = &gpuInfo
-		tflopsMap[gpuInfo.Model] = &gpuInfo
+		completeInfo := &CompleteGPUInfo{
+			GpuInfo: &gpuInfo,
+		}
+		tflopsMap[gpuInfo.FullModelName] = completeInfo
+		tflopsMap[gpuInfo.Model] = completeInfo
 	}
 
 	initOnce.Do(func() {
@@ -151,6 +162,11 @@ func loadCSVInstanceDataFromPath(ctx context.Context, data []byte, provider stri
 		}
 		instanceInfo.FP16TFlopsPerGPU = gpuInfo.Fp16TFlops.AsApproximateFloat64()
 
+		// Fill VRAM information if not already set
+		if gpuInfo.VRAMGigabytes == 0 {
+			gpuInfo.VRAMGigabytes = instanceInfo.VRAMGigabytesPerGPU
+		}
+
 		instanceInfoAndPrice := GPUNodeInstanceInfoAndPrice{
 			GPUNodeInstanceInfo: instanceInfo,
 			onDemandPrice:       prices[0],
@@ -416,3 +432,19 @@ func (p *StaticPricingProvider) GetRegionalGPUNodeInstanceTypes(region string) (
 
 	return instanceTypes, len(instanceTypes) > 0
 }
+
+// GetGPUCapacityByModel gets the full capacity (TFlops and VRAM) for a GPU model
+// Returns TFlops, VRAM, and whether found
+func (p *StaticPricingProvider) GetGPUCapacityByModel(gpuModel string) (resource.Quantity, resource.Quantity, bool) {
+	<-readyCh
+
+	gpuInfo, exists := tflopsMap[gpuModel]
+	if !exists {
+		return resource.Quantity{}, resource.Quantity{}, false
+	}
+
+	tflops := gpuInfo.Fp16TFlops
+	vram := *resource.NewQuantity(int64(gpuInfo.VRAMGigabytes)*constants.GiBToBytes, resource.BinarySI)
+
+	return tflops, vram, true
+}
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index 32b3d6bc..bf95b3d9 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -69,6 +69,7 @@ const (
 	GPUModelAnnotation = Domain + "/gpu-model"
 	// GPU ID list is assigned by scheduler, should not specified by user
 	GPUDeviceIDsAnnotation            = Domain + "/gpu-ids"
+	DedicatedGPUAnnotation            = Domain + "/dedicated-gpu"
 	SetPendingOwnedWorkloadAnnotation = Domain + "/pending-owned-workload"
 	PricingAnnotation                 = Domain + "/hourly-pricing"
 	// In remote vGPU mode, selected workload is set by user with /workload annotation or generated by system
diff --git a/internal/metrics/recorder.go b/internal/metrics/recorder.go
index 9050df00..d01ad315 100644
--- a/internal/metrics/recorder.go
+++ b/internal/metrics/recorder.go
@@ -187,19 +187,37 @@ func SetPoolMetrics(poolObj *tfv1.GPUPool) {
 	}
 
 	if poolObj.Status.VirtualAvailableTFlops != nil && poolObj.Status.VirtualAvailableVRAM != nil {
-		poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedVramBytes /
-			poolObj.Status.VirtualVRAM.AsApproximateFloat64() * 100
+		virtualVRAM := poolObj.Status.VirtualVRAM.AsApproximateFloat64()
+		virtualTFlops := poolObj.Status.VirtualTFlops.AsApproximateFloat64()
 
-		poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedTflops /
-			poolObj.Status.VirtualTFlops.AsApproximateFloat64() * 100
-		poolMetricsMap[poolObj.Name].AssignedLimitedTFlops = poolObj.Status.VirtualTFlops.AsApproximateFloat64() -
+		if virtualVRAM > 0 {
+			poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedVramBytes / virtualVRAM * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = 0
+		}
+
+		if virtualTFlops > 0 {
+			poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedTflops / virtualTFlops * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = 0
+		}
+
+		poolMetricsMap[poolObj.Name].AssignedLimitedTFlops = virtualTFlops -
 			poolObj.Status.VirtualAvailableTFlops.AsApproximateFloat64()
-		poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes = poolObj.Status.VirtualVRAM.AsApproximateFloat64() -
+		poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes = virtualVRAM -
 			poolObj.Status.VirtualAvailableVRAM.AsApproximateFloat64()
-		poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedTFlops /
-			poolObj.Status.VirtualTFlops.AsApproximateFloat64() * 100
-		poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes /
-			poolObj.Status.VirtualVRAM.AsApproximateFloat64() * 100
+
+		if virtualTFlops > 0 {
+			poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedTFlops / virtualTFlops * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = 0
+		}
+
+		if virtualVRAM > 0 {
+			poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes / virtualVRAM * 100
+		} else {
+			poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = 0
+		}
 	}
 	poolMetricsMap[poolObj.Name].GPUCount = int(poolObj.Status.TotalGPUs)
 }
diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go
index 53610ffe..542a3ab0 100644
--- a/internal/webhook/v1/pod_webhook.go
+++ b/internal/webhook/v1/pod_webhook.go
@@ -37,6 +37,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/portallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
@@ -46,24 +47,26 @@ import (
 var httpClient = &http.Client{Timeout: 10 * time.Second}
 
 // SetupPodWebhookWithManager registers the webhook for Pod in the manager.
-func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator) error {
+func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator, pricingProvider pricing.PricingProvider) error {
 	webhookServer := mgr.GetWebhookServer()
 
 	webhookServer.Register("/mutate-v1-pod",
 		&admission.Webhook{
 			Handler: &TensorFusionPodMutator{
-				decoder:       admission.NewDecoder(runtime.NewScheme()),
-				Client:        mgr.GetClient(),
-				portAllocator: portAllocator,
+				decoder:         admission.NewDecoder(runtime.NewScheme()),
+				Client:          mgr.GetClient(),
+				portAllocator:   portAllocator,
+				pricingProvider: pricingProvider,
 			},
 		})
 	return nil
 }
 
 type TensorFusionPodMutator struct {
-	Client        client.Client
-	decoder       admission.Decoder
-	portAllocator *portallocator.PortAllocator
+	Client          client.Client
+	decoder         admission.Decoder
+	portAllocator   *portallocator.PortAllocator
+	pricingProvider pricing.PricingProvider
 }
 
 // Handle implements admission.Handler interface.
@@ -100,7 +103,7 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 		return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal current pod: %w", err))
 	}
 
-	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod)
+	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod, m.pricingProvider)
 	if err != nil {
 		return admission.Errored(http.StatusInternalServerError, fmt.Errorf("parse tf resources: %w", err))
 	}
diff --git a/internal/webhook/v1/pod_webhook_test.go b/internal/webhook/v1/pod_webhook_test.go
index 55f29233..d72770cc 100644
--- a/internal/webhook/v1/pod_webhook_test.go
+++ b/internal/webhook/v1/pod_webhook_test.go
@@ -23,6 +23,7 @@ import (
 	"net/http"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	. "github.com/onsi/ginkgo/v2"
@@ -532,7 +533,9 @@ var _ = Describe("TensorFusionPodMutator", func() {
 					},
 				},
 			}
-			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod)
+			// Create a mock pricing provider for testing
+			mockPricingProvider := &pricing.StaticPricingProvider{}
+			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod, mockPricingProvider)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(tfInfo.ContainerNames).To(HaveLen(1))
 			Expect(tfInfo.ContainerNames[0]).To(Equal("test-container"))
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
index bf805b76..cd72fbc1 100644
--- a/internal/webhook/v1/tf_parser.go
+++ b/internal/webhook/v1/tf_parser.go
@@ -7,6 +7,7 @@ import (
 	"strings"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	corev1 "k8s.io/api/core/v1"
@@ -29,6 +30,7 @@ func ParseTensorFusionInfo(
 	ctx context.Context,
 	k8sClient client.Client,
 	pod *corev1.Pod,
+	pricingProvider pricing.PricingProvider,
 ) (utils.TensorFusionInfo, error) {
 	var info utils.TensorFusionInfo
 	if pod.Annotations == nil {
@@ -115,6 +117,12 @@ func ParseTensorFusionInfo(
 		workloadProfile.Spec.GPUModel = gpuModel
 	}
 
+	// Handle dedicated GPU logic
+	err = handleDedicatedGPU(pod, workloadProfile, pricingProvider)
+	if err != nil {
+		return info, fmt.Errorf("handle dedicated GPU: %w", err)
+	}
+
 	info.Profile = &workloadProfile.Spec
 	info.ContainerNames = containerNames
 	return info, nil
@@ -227,3 +235,30 @@ func setDefaultQuotasIfExists(workloadProfile *tfv1.WorkloadProfile, single tfv1
 		}
 	}
 }
+
+// handleDedicatedGPU handles dedicated GPU annotation by setting full GPU capacity
+func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile, pricingProvider pricing.PricingProvider) error {
+	dedicatedGPU, ok := pod.Annotations[constants.DedicatedGPUAnnotation]
+	if !ok || dedicatedGPU != constants.TrueStringValue {
+		return nil // Not a dedicated GPU request
+	}
+
+	// Must have GPU model specified for dedicated GPU
+	if workloadProfile.Spec.GPUModel == "" {
+		return fmt.Errorf("dedicated GPU requires gpu-model annotation to be specified")
+	}
+
+	// Get full GPU capacity from pricing provider
+	tflops, vram, found := pricingProvider.GetGPUCapacityByModel(workloadProfile.Spec.GPUModel)
+	if !found {
+		return fmt.Errorf("could not find capacity information for GPU model: %s", workloadProfile.Spec.GPUModel)
+	}
+
+	// Set full capacity for both requests and limits
+	workloadProfile.Spec.Resources.Requests.Tflops = tflops
+	workloadProfile.Spec.Resources.Requests.Vram = vram
+	workloadProfile.Spec.Resources.Limits.Tflops = tflops
+	workloadProfile.Spec.Resources.Limits.Vram = vram
+
+	return nil
+}
diff --git a/internal/webhook/v1/webhook_suite_test.go b/internal/webhook/v1/webhook_suite_test.go
index 4e5d369b..26a6685d 100644
--- a/internal/webhook/v1/webhook_suite_test.go
+++ b/internal/webhook/v1/webhook_suite_test.go
@@ -27,6 +27,7 @@ import (
 	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/portallocator"
 	. "github.com/onsi/ginkgo/v2"
@@ -134,11 +135,13 @@ var _ = BeforeSuite(func() {
 	})
 	Expect(err).NotTo(HaveOccurred())
 
+	// Create a mock pricing provider for testing
+	mockPricingProvider := &pricing.StaticPricingProvider{}
 	err = SetupPodWebhookWithManager(mgr, &portallocator.PortAllocator{
 		PortRangeStartCluster: 42000,
 		PortRangeEndCluster:   62000,
 		BitmapCluster:         make([]uint64, (62000-42000)/64+1),
-	})
+	}, mockPricingProvider)
 	Expect(err).NotTo(HaveOccurred())
 
 	// +kubebuilder:scaffold:webhook

From 0389852aa9fd8780c82b31f9c28fda05223cfd47 Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Thu, 4 Sep 2025 18:44:59 +0800
Subject: [PATCH 12/34] fix: skip gpu limiter not working issue, observability
 optimize (#350)

* fix: skip gpu limiter not working issue

* fix: avoid k8s QoS side effect for inject lib init container

* fix: potential panic issues

* fix: remove unused event
---
 config/samples/dynamic-config.yaml            | 275 ++++++++++++++++--
 internal/cloudprovider/common/utils.go        |  14 +-
 internal/cloudprovider/karpenter/nodeclaim.go |   6 +-
 .../tensorfusioncluster_controller.go         |   3 -
 .../tensorfusionworkload_controller.go        |   1 -
 internal/metrics/encoders/influx.go           |   8 +-
 internal/metrics/recorder.go                  |  20 +-
 internal/utils/compose.go                     |  45 ++-
 8 files changed, 329 insertions(+), 43 deletions(-)

diff --git a/config/samples/dynamic-config.yaml b/config/samples/dynamic-config.yaml
index c3102f3b..ae9350a3 100644
--- a/config/samples/dynamic-config.yaml
+++ b/config/samples/dynamic-config.yaml
@@ -1,23 +1,260 @@
 metricsTTL: 30d
 
 # default to 'influx', influx v2 line protocol
-metricsFormat: json
+metricsFormat: influx
 
-alertRules:
-- name: GPUTFlopsFull
-  query: |
-    SELECT
-      node,
-      pool,
-      uuid,
-      avg(compute_percentage) AS compute_used
-    FROM tf_gpu_usage
-    WHERE compute_percentage > {{ .Threshold }} AND {{ .Conditions }}
-    GROUP BY node, pool, uuid
-  threshold: 97
-  evaluationInterval: 30s
-  consecutiveCount: 4
-  severity: P1
-  summary: "GPU TFlops Full, used {{ .compute_used }}% on {{ .node }} {{ .uuid }}"
-  alertTargetInstance: "{{ .uuid }}"
-  description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has TFlops usage above {{ .Threshold }}% for 4 consecutive 30s, average usage: {{ .compute_used }}%"
\ No newline at end of file
+alertRules:    
+  # Worker TFlops throttled alert
+  - name: WorkerTFlopsThrottled
+    query: |
+      SELECT workload, worker, uuid, node, MAX(compute_throttled_cnt)-MIN(compute_throttled_cnt) as throttled_increase
+      FROM tf_worker_usage
+      WHERE {{ .Conditions }}
+      GROUP BY workload, worker, uuid, node
+      HAVING throttled_increase > {{ .Threshold }}
+    threshold: 0
+    evaluationInterval: 15s
+    consecutiveCount: 3
+    severity: P1
+    summary: "Worker TFlops Throttled"
+    description: "Worker {{ .worker }} from Node {{ .node }} is using more than {{ .Threshold }}% of its TFlops limit"
+    alertTargetInstance: "{{ .worker }}-{{ .uuid }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # Worker VRAM switching too frequent alert
+  - name: WorkerVRAMSwitchCountIncreasing
+    query: |
+      SELECT workload, worker, uuid, node, MAX(vram_resumed_cnt)-MIN(vram_resumed_cnt) as switch_increase
+      FROM tf_worker_usage 
+      WHERE {{ .Conditions }}
+      GROUP BY workload, worker, uuid, node
+      HAVING switch_increase > {{ .Threshold }}
+    threshold: 0
+    evaluationInterval: 2m
+    consecutiveCount: 1
+    severity: P1
+    summary: "Worker VRAM Switch Count Increasing"
+    description: "Worker {{ .worker }} from Node {{ .node }} has switched VRAM {{ .switch_increase }} times in last 2 minutes, GPU may be too hot"
+    alertTargetInstance: "{{ .worker }}-{{ .uuid }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # Worker can not scale up/scheduled alert
+  - name: WorkerAllocationFailed
+    query: |
+      SELECT pool, (MAX(total_allocation_fail_cnt) - MIN(total_allocation_fail_cnt)) as failure_increase,
+      FROM tf_system_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING failure_increase > {{ .Threshold }}
+    threshold: 0
+    evaluationInterval: 30s
+    consecutiveCount: 1
+    severity: P1
+    summary: "Worker allocation failed for GPU Pool {{ .pool }}"
+    description: "Worker allocation failed, {{ .failure_increase }} times in last 30 seconds for GPU Pool {{ .pool }}"
+    alertTargetInstance: "{{ .pool }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # Single GPU Alerts
+  
+  # GPU VRAM Full Alert
+  - name: GPUVRAMFull
+    query: |
+      SELECT
+        node,
+        pool,
+        uuid,
+        avg(memory_percentage) AS memory_used
+      FROM tf_gpu_usage
+      WHERE memory_percentage > {{ .Threshold }} AND {{ .Conditions }}
+      GROUP BY node, pool, uuid
+    threshold: 97
+    evaluationInterval: 30s
+    consecutiveCount: 2
+    severity: P1
+    summary: "GPU VRAM Full, used {{ .memory_used }}% on {{ .node }} {{ .uuid }}"
+    alertTargetInstance: "{{ .uuid }}"
+    description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has VRAM usage above {{ .Threshold }}% for 2 consecutive 30s, average usage: {{ .memory_used }}%"
+  
+  # GPU TFlops Full Alert
+  - name: GPUTFlopsFull
+    query: |
+      SELECT
+        node,
+        pool,
+        uuid,
+        avg(compute_percentage) AS compute_used
+      FROM tf_gpu_usage
+      WHERE compute_percentage > {{ .Threshold }} AND {{ .Conditions }}
+      GROUP BY node, pool, uuid
+    threshold: 97
+    evaluationInterval: 30s
+    consecutiveCount: 4
+    severity: P1
+    summary: "GPU TFlops Full, used {{ .compute_used }}% on {{ .node }} {{ .uuid }}"
+    alertTargetInstance: "{{ .uuid }}"
+    description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has TFlops usage above {{ .Threshold }}% for 4 consecutive 30s, average usage: {{ .compute_used }}%"
+  
+  # GPU Temperature alert
+  - name: GPUTemperatureHigh
+    query: |
+      SELECT
+        node,
+        pool,
+        uuid,
+        avg(temperature) AS avg_temperature
+      FROM tf_gpu_usage
+      WHERE temperature > {{ .Threshold }} AND {{ .Conditions }}
+      GROUP BY node, pool, uuid
+    threshold: 90
+    evaluationInterval: 30s
+    consecutiveCount: 3
+    severity: P1
+    summary: "GPU Temperature High, {{ .avg_temperature }}°C on {{ .node }} {{ .uuid }}"
+    alertTargetInstance: "{{ .uuid }}"
+    description: "GPU {{ .uuid }} from Node {{ .node }} has temperature above {{ .Threshold }}°C, Average temperature: {{ .avg_temperature }}, GPU Pool: {{ .pool }}"
+    runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook"
+  
+  # GPU Pool Alerts
+  
+  # Node TFlops allocation alert
+  - name: NodeTFlopsAllocationCritical
+    query: | 
+      SELECT node, pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 5
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P0
+    summary: "Available TFlops below threshold, remaining {{ .tflops_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+  
+  - name: NodeTFlopsAllocationWarning
+    query: | 
+      SELECT node, pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Node available TFlops below threshold, remaining {{ .tflops_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+  
+  # Pool TFlops allocation alert - Total
+  - name: PoolTotalTFlopsAllocationCritical
+    query: |
+      SELECT pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 5
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P0
+    summary: "Pool available TFlops below threshold, remaining {{ .tflops_available }}%"
+    description: "Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .pool }}"
+  
+  - name: PoolTotalTFlopsAllocationWarning
+    query: |
+      SELECT pool, (100 - avg(allocated_tflops_percent)) as tflops_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING tflops_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Pool available TFlops below threshold, remaining {{ .tflops_available }}%"
+    description: "Pool {{ .pool }} has available TFlops below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .pool }}"
+  
+  # Node VRAM allocation alert
+  - name: NodeVRAMAllocationCritical
+    query: |
+      SELECT node, pool, (100 - avg(allocated_vram_percent)) as vram_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING vram_available < {{ .Threshold }}
+    threshold: 5
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Node available VRAM below threshold, remaining {{ .vram_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available VRAM below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+
+  - name: NodeVRAMAllocationWarning
+    query: |
+      SELECT node, pool, (100 - avg(allocated_vram_percent)) as vram_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool
+      HAVING vram_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Node available VRAM below threshold, remaining {{ .vram_available }}% for {{ .node }}"
+    description: "Node {{ .node }} in Pool {{ .pool }} has available VRAM below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .node }}"
+  
+  # Pool VRAM allocation alert
+  - name: PoolVRAMAllocationWarning
+    query: |
+      SELECT pool, (100 - avg(allocated_vram_percent)) as vram_available
+      FROM tf_node_metrics
+      WHERE {{ .Conditions }}
+      GROUP BY pool
+      HAVING vram_available < {{ .Threshold }}
+    threshold: 10
+    evaluationInterval: 1m
+    consecutiveCount: 2
+    severity: P1
+    summary: "Pool available VRAM below threshold, remaining {{ .vram_available }}% for {{ .pool }}"
+    description: "Pool {{ .pool }} has available VRAM below {{ .Threshold }}%"
+    alertTargetInstance: "{{ .pool }}"
+  
+  # Empty or Idle GPU Alert
+  - name: EmptyGPU
+    query: |
+      SELECT DISTINCT node 
+      FROM tf_node_metrics 
+      WHERE {{ .Conditions }} AND node NOT IN (
+          SELECT DISTINCT node 
+          FROM tf_worker_usage 
+          WHERE {{ .Conditions }}
+      )
+    threshold: 0
+    evaluationInterval: 5m
+    consecutiveCount: 2
+    severity: P2
+    summary: "Empty GPU without any workload, Node {{ .node }}"
+    description: "GPU Node {{ .node }} has no workload running, should be decommissioned"
+    alertTargetInstance: "{{ .node }}"
+  
+  - name: IdleGPU
+    query: |
+      SELECT node, pool, uuid, avg(compute_percentage) as compute, avg(memory_percentage) vram
+      FROM tf_gpu_usage
+      WHERE {{ .Conditions }}
+      GROUP BY node, pool, uuid
+      HAVING compute < 1 and vram < {{ .Threshold }};
+    threshold: 5
+    evaluationInterval: 10m
+    consecutiveCount: 3
+    severity: P2
+    summary: "Idle GPU found: {{ .uuid }} on Node {{ .node }}"
+    description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has been idle for 3 consecutive 10m, compute: {{ .compute }}, vram: {{ .vram }}"
+    alertTargetInstance: "{{ .uuid }}"
diff --git a/internal/cloudprovider/common/utils.go b/internal/cloudprovider/common/utils.go
index fbe882ab..788ba96d 100644
--- a/internal/cloudprovider/common/utils.go
+++ b/internal/cloudprovider/common/utils.go
@@ -131,6 +131,16 @@ func CalculateLeastCostGPUNodes(ctx context.Context, provider types.GPUNodeProvi
 
 	nodes := make([]tfv1.GPUNodeClaimSpec, 0, bestNumInstances)
 	for i := int64(0); i < bestNumInstances; i++ {
+
+		tflopsQuantity, err := resource.ParseQuantity(fmt.Sprintf("%f", bestInstance.FP16TFlopsPerGPU*float64(bestInstance.GPUCount)))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse GPUDeviceOffered: %v", err)
+		}
+
+		vramQuantity, err := resource.ParseQuantity(fmt.Sprintf("%dGi", bestInstance.VRAMGigabytesPerGPU*bestInstance.GPUCount))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse VRAMOffered: %v", err)
+		}
 		nodes = append(nodes, tfv1.GPUNodeClaimSpec{
 			NodeName:     fmt.Sprintf("%s-%s", pool.Name, generateRandomString(8)),
 			InstanceType: bestInstance.InstanceType,
@@ -139,8 +149,8 @@ func CalculateLeastCostGPUNodes(ctx context.Context, provider types.GPUNodeProvi
 			Zone:         zone,
 			CapacityType: preferredCapacityType,
 
-			TFlopsOffered:    resource.MustParse(fmt.Sprintf("%f", bestInstance.FP16TFlopsPerGPU*float64(bestInstance.GPUCount))),
-			VRAMOffered:      resource.MustParse(fmt.Sprintf("%dGi", bestInstance.VRAMGigabytesPerGPU*bestInstance.GPUCount)),
+			TFlopsOffered:    tflopsQuantity,
+			VRAMOffered:      vramQuantity,
 			GPUDeviceOffered: bestInstance.GPUCount,
 
 			ExtraParams: cluster.Spec.ComputingVendor.Params.ExtraParams,
diff --git a/internal/cloudprovider/karpenter/nodeclaim.go b/internal/cloudprovider/karpenter/nodeclaim.go
index 2877e80d..15c8dcc0 100644
--- a/internal/cloudprovider/karpenter/nodeclaim.go
+++ b/internal/cloudprovider/karpenter/nodeclaim.go
@@ -318,7 +318,11 @@ func (p KarpenterGPUNodeProvider) buildNodeClaim(ctx context.Context, param *tfv
 
 	// Add GPU resources if specified (Karpenter supports nvidia.com/gpu)
 	if param.GPUDeviceOffered > 0 {
-		resourceRequests[karpenterConfig.GPUResourceName] = resource.MustParse(fmt.Sprintf("%d", param.GPUDeviceOffered))
+		quantity, err := resource.ParseQuantity(fmt.Sprintf("%d", param.GPUDeviceOffered))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse GPUDeviceOffered: %v", err)
+		}
+		resourceRequests[karpenterConfig.GPUResourceName] = quantity
 	}
 
 	// query nodeClass and build NodeClassRef
diff --git a/internal/controller/tensorfusioncluster_controller.go b/internal/controller/tensorfusioncluster_controller.go
index a2f8ba12..d4f464c3 100644
--- a/internal/controller/tensorfusioncluster_controller.go
+++ b/internal/controller/tensorfusioncluster_controller.go
@@ -43,7 +43,6 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	utils "github.com/NexusGPU/tensor-fusion/internal/utils"
-	corev1 "k8s.io/api/core/v1"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
@@ -382,7 +381,6 @@ func (r *TensorFusionClusterReconciler) checkTFClusterComponentsReady(ctx contex
 		constants.LabelKeyOwner: tfc.GetName(),
 	}))
 	if err != nil {
-		r.Recorder.Eventf(tfc, corev1.EventTypeWarning, "CheckComponentStatusError", err.Error())
 		return false, nil, fmt.Errorf("failed to list GPUPools: %w", err)
 	}
 	if len(pools.Items) != len(tfc.Spec.GPUPools) {
@@ -411,7 +409,6 @@ func (r *TensorFusionClusterReconciler) updateTFClusterStatus(ctx context.Contex
 		}
 	}
 	if err := r.Status().Update(ctx, tfc); err != nil {
-		r.Recorder.Eventf(tfc, corev1.EventTypeWarning, "UpdateClusterStatusError", err.Error())
 		return err
 	}
 	return nil
diff --git a/internal/controller/tensorfusionworkload_controller.go b/internal/controller/tensorfusionworkload_controller.go
index 1ec0d722..bc8ced78 100644
--- a/internal/controller/tensorfusionworkload_controller.go
+++ b/internal/controller/tensorfusionworkload_controller.go
@@ -347,7 +347,6 @@ func (r *TensorFusionWorkloadReconciler) updateStatus(
 		readyCondition.Status = metav1.ConditionFalse
 		readyCondition.Reason = "WorkerFailed"
 		readyCondition.Message = fmt.Sprintf("Failed workers num: %d", failedWorkers)
-		r.Recorder.Eventf(workload, corev1.EventTypeWarning, "WorkerFailed", "Failed workers num: %d", failedWorkers)
 	} else if workload.Spec.IsDynamicReplica() {
 		// for dynamic replicas, if no worker failed, indicate workload is running
 		phase = tfv1.TensorFusionWorkloadPhaseRunning
diff --git a/internal/metrics/encoders/influx.go b/internal/metrics/encoders/influx.go
index a459c7ee..4d089759 100644
--- a/internal/metrics/encoders/influx.go
+++ b/internal/metrics/encoders/influx.go
@@ -4,6 +4,7 @@ import (
 	"time"
 
 	metricsProto "github.com/influxdata/line-protocol/v2/lineprotocol"
+	"k8s.io/klog/v2"
 )
 
 // InfluxStrategy implements InfluxDB line protocol encoding
@@ -28,7 +29,12 @@ func (s *InfluxStrategy) AddTag(key, value string) {
 }
 
 func (s *InfluxStrategy) AddField(key string, value any) {
-	s.enc.AddField(key, metricsProto.MustNewValue(value))
+	v, parsed := metricsProto.NewValue(value)
+	if !parsed {
+		klog.Error("metrics influx encoder failed to parse value: ", key, value)
+		return
+	}
+	s.enc.AddField(key, v)
 }
 
 func (s *InfluxStrategy) EndLine(timestamp time.Time) {
diff --git a/internal/metrics/recorder.go b/internal/metrics/recorder.go
index d01ad315..f1c14a39 100644
--- a/internal/metrics/recorder.go
+++ b/internal/metrics/recorder.go
@@ -413,16 +413,16 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 		enc.StartLine("tf_pool_metrics")
 		enc.AddTag("pool", metrics.PoolName)
 		enc.AddTag("phase", metrics.Phase)
-		enc.AddField("allocatedTflops", metrics.AllocatedTflops)
-		enc.AddField("allocatedTflopsPercent", metrics.AllocatedTflopsPercent)
-		enc.AddField("allocatedTflopsPercentVirtual", metrics.AllocatedTflopsPercentToVirtualCap)
-		enc.AddField("allocatedVramBytes", metrics.AllocatedVramBytes)
-		enc.AddField("allocatedVramPercent", metrics.AllocatedVramPercent)
-		enc.AddField("allocatedVramPercentVirtual", metrics.AllocatedVramPercentToVirtualCap)
-		enc.AddField("assignedLimitedTFlops", metrics.AssignedLimitedTFlops)
-		enc.AddField("assignedLimitedVramBytes", metrics.AssignedLimitedVramBytes)
-		enc.AddField("assignedLimitedTFlopsPercentVirtual", metrics.AssignedLimitedTFlopsPercentToVirtualCap)
-		enc.AddField("assignedLimitedVramPercentVirtual", metrics.AssignedLimitedVramPercentToVirtualCap)
+		enc.AddField("allocated_tflops", metrics.AllocatedTflops)
+		enc.AddField("allocated_tflops_percent", metrics.AllocatedTflopsPercent)
+		enc.AddField("allocated_tflops_percent_virtual", metrics.AllocatedTflopsPercentToVirtualCap)
+		enc.AddField("allocated_vram_bytes", metrics.AllocatedVramBytes)
+		enc.AddField("allocated_vram_percent", metrics.AllocatedVramPercent)
+		enc.AddField("allocated_vram_percent_virtual", metrics.AllocatedVramPercentToVirtualCap)
+		enc.AddField("limited_tflops", metrics.AssignedLimitedTFlops)
+		enc.AddField("limited_vram_bytes", metrics.AssignedLimitedVramBytes)
+		enc.AddField("limited_tflops_percent_virtual", metrics.AssignedLimitedTFlopsPercentToVirtualCap)
+		enc.AddField("limited_vram_percent_virtual", metrics.AssignedLimitedVramPercentToVirtualCap)
 		enc.AddField("gpu_count", int64(metrics.GPUCount))
 		enc.EndLine(now)
 	}
diff --git a/internal/utils/compose.go b/internal/utils/compose.go
index e7170881..344228b0 100644
--- a/internal/utils/compose.go
+++ b/internal/utils/compose.go
@@ -16,6 +16,10 @@ import (
 	"k8s.io/utils/ptr"
 )
 
+var injectLibResource v1.ResourceList = v1.ResourceList{
+	v1.ResourceCPU:    resource.MustParse("20m"),
+	v1.ResourceMemory: resource.MustParse("64Mi"),
+}
 var nodeDiscoveryDefaultRequests v1.ResourceList = v1.ResourceList{
 	v1.ResourceCPU:    resource.MustParse("20m"),
 	v1.ResourceMemory: resource.MustParse("64Mi"),
@@ -175,6 +179,11 @@ func AddTFDefaultClientConfBeforePatch(
 				MountPath: constants.TFLibsVolumeMountPath,
 			},
 		},
+		Resources: v1.ResourceRequirements{
+			Requests: injectLibResource,
+			Limits:   injectLibResource,
+		},
+		Env: convertDisabledFeatures4InjectLib(pod.Annotations[constants.DisableFeaturesAnnotation]),
 	})
 	pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{
 		Name: constants.TFLibsVolumeName,
@@ -302,18 +311,42 @@ func AddTFDefaultClientConfBeforePatch(
 }
 
 func convertDisabledFeaturesToEnvs(disabledFeatures string, envList []v1.EnvVar) []v1.EnvVar {
-	disabledFeaturesList := strings.Split(disabledFeatures, ",")
-	for _, feature := range disabledFeaturesList {
+	disabledFeaturesList := strings.SplitSeq(disabledFeatures, ",")
+	for feature := range disabledFeaturesList {
 		if feat, ok := featureShortcutMap[feature]; ok {
-			envList = append(envList, v1.EnvVar{
-				Name:  feat.EnvName,
-				Value: feat.EnvValue,
-			})
+			if !lo.ContainsBy(envList, func(item v1.EnvVar) bool {
+				return item.Name == feat.EnvName
+			}) {
+				envList = append(envList, v1.EnvVar{
+					Name:  feat.EnvName,
+					Value: feat.EnvValue,
+				})
+			}
 		}
 	}
 	return envList
 }
 
+func convertDisabledFeatures4InjectLib(disabledFeatures string) []v1.EnvVar {
+	if disabledFeatures == "" {
+		return []v1.EnvVar{}
+	}
+	disabledFeaturesList := strings.SplitSeq(disabledFeatures, ",")
+
+	// GPU limiter by-pass take effect in bootstrap stage, add special handling here
+	for feature := range disabledFeaturesList {
+		if feature == constants.BuiltInFeaturesGpuLimiter {
+			return []v1.EnvVar{
+				{
+					Name:  featureShortcutMap[feature].EnvName,
+					Value: featureShortcutMap[feature].EnvValue,
+				},
+			}
+		}
+	}
+	return []v1.EnvVar{}
+}
+
 func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, pool *tfv1.GPUPool) {
 	// Hypervisor needs to read /proc to map pod with processID
 	spec.HostPID = true

From c0a35005c7bde6bc79bb42d2454bb3391a236ada Mon Sep 17 00:00:00 2001
From: D <wangqianqianjun@gmail.com>
Date: Sun, 7 Sep 2025 22:49:29 +0800
Subject: [PATCH 13/34] fix: init pricing overwrite vram to 0 (#351)

* support dedicated gpus

* support dedicated GPU

* support dedicated GPU

* fix test issue

* fix init pricing override vran

* Revert "fix init pricing override vran"

This reverts commit d0bea18f1b6777af66c71b300e9ba891453b5359.

* fix init pricing override vram
---
 internal/cloudprovider/pricing/pricing.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/internal/cloudprovider/pricing/pricing.go b/internal/cloudprovider/pricing/pricing.go
index e8854583..45dd09bb 100644
--- a/internal/cloudprovider/pricing/pricing.go
+++ b/internal/cloudprovider/pricing/pricing.go
@@ -94,6 +94,9 @@ func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.G
 		return
 	}
 	for _, gpuInfo := range *gpuInfos {
+		if tflopsMap[gpuInfo.FullModelName] != nil {
+			continue
+		}
 		completeInfo := &CompleteGPUInfo{
 			GpuInfo: &gpuInfo,
 		}

From f25c65db2e467d3c9c0dd0476b0622abd552fd1e Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Mon, 8 Sep 2025 13:37:50 +0800
Subject: [PATCH 14/34] fix: add node hash for gpu k8s node, owner ref for
 hypervisor, isolate shm (#352)

---
 internal/constants/constants.go               |  1 +
 internal/controller/gpunode_controller.go     | 25 ++++++--
 internal/controller/gpupool_controller.go     | 63 +++++++++++++++++++
 .../controller/gpupool_controller_test.go     |  8 +++
 internal/controller/node_controller.go        | 31 +++++----
 internal/utils/compose.go                     | 16 ++---
 internal/utils/reconcile.go                   |  9 +++
 7 files changed, 125 insertions(+), 28 deletions(-)

diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index bf95b3d9..81470022 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -30,6 +30,7 @@ const (
 	LabelKeyClusterOwner    = Domain + "/cluster"
 	LabelKeyNodeClass       = Domain + "/node-class"
 	LabelKeyPodTemplateHash = Domain + "/pod-template-hash"
+	LabelNodeSelectorHash   = Domain + "/node-selector-hash"
 	LabelComponent          = Domain + "/component"
 	// used by TF connection, for matching the related connections when worker Pod state changed
 	LabelWorkerName = Domain + "/worker-name"
diff --git a/internal/controller/gpunode_controller.go b/internal/controller/gpunode_controller.go
index 9035ecdd..054d5922 100644
--- a/internal/controller/gpunode_controller.go
+++ b/internal/controller/gpunode_controller.go
@@ -140,7 +140,7 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 		return ctrl.Result{}, nil
 	}
 
-	hypervisorName, err := r.reconcileHypervisorPod(ctx, node, poolObj)
+	hypervisorName, err := r.reconcileHypervisorPod(ctx, node, poolObj, coreNode)
 	if err != nil {
 		return ctrl.Result{}, err
 	}
@@ -319,7 +319,12 @@ func (r *GPUNodeReconciler) reconcileNodeDiscoveryJob(
 	return nil
 }
 
-func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tfv1.GPUNode, pool *tfv1.GPUPool) (string, error) {
+func (r *GPUNodeReconciler) reconcileHypervisorPod(
+	ctx context.Context,
+	node *tfv1.GPUNode,
+	pool *tfv1.GPUPool,
+	k8sNode *corev1.Node,
+) (string, error) {
 	log := log.FromContext(ctx)
 
 	if pool.Spec.ComponentConfig == nil || pool.Spec.ComponentConfig.Hypervisor == nil {
@@ -361,7 +366,7 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf
 	}
 
 	log.Info("hypervisor pod not found, creating new one", "node", node.Name)
-	if err := r.createHypervisorPod(ctx, key, node, pool); err != nil {
+	if err := r.createHypervisorPod(ctx, key, node, pool, k8sNode); err != nil {
 		if errors.IsAlreadyExists(err) {
 			log.Info("hypervisor pod already exists, skip creation", "node", node.Name)
 			return "", nil
@@ -372,7 +377,13 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf
 	return key.Name, nil
 }
 
-func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.ObjectKey, node *tfv1.GPUNode, pool *tfv1.GPUPool) error {
+func (r *GPUNodeReconciler) createHypervisorPod(
+	ctx context.Context,
+	key client.ObjectKey,
+	node *tfv1.GPUNode,
+	pool *tfv1.GPUPool,
+	k8sNode *corev1.Node,
+) error {
 	log := log.FromContext(ctx)
 
 	podTmpl := &corev1.PodTemplate{}
@@ -447,7 +458,11 @@ func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.
 	})
 	err = controllerutil.SetControllerReference(node, newPod, r.Scheme)
 	if err != nil {
-		return fmt.Errorf("failed to set controller reference: %w", err)
+		return fmt.Errorf("failed to set controller reference for hypervisor: %w", err)
+	}
+	// also set node owned by k8s node to allow Karpenter to delete the node while hypervisor exists
+	if err := controllerutil.SetOwnerReference(k8sNode, newPod, r.Scheme); err != nil {
+		return fmt.Errorf("failed to set owner reference for hypervisor: %w", err)
 	}
 
 	// create hypervisor pod
diff --git a/internal/controller/gpupool_controller.go b/internal/controller/gpupool_controller.go
index 987eb81b..da8c63aa 100644
--- a/internal/controller/gpupool_controller.go
+++ b/internal/controller/gpupool_controller.go
@@ -30,13 +30,16 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	utils "github.com/NexusGPU/tensor-fusion/internal/utils"
 	"golang.org/x/time/rate"
+	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/equality"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilerrors "k8s.io/apimachinery/pkg/util/errors"
 	"k8s.io/client-go/tools/record"
+	"k8s.io/client-go/util/retry"
 	"k8s.io/client-go/util/workqueue"
+	schedulingcorev1 "k8s.io/component-helpers/scheduling/corev1"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -83,6 +86,9 @@ type GPUPoolReconciler struct {
 // and requeue until current time after that, start provisioning loop
 var provisioningInitializationMinTime = map[string]time.Time{}
 
+// When GPU nodeSelector changed, trigger all node update
+var poolSelectorChangeMap = map[string]string{}
+
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools/status,verbs=get;update;patch
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools/finalizers,verbs=update
@@ -116,6 +122,10 @@ func (r *GPUPoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 		return ctrl.Result{}, nil
 	}
 
+	if err := r.reconcilePoolSelectorChange(ctx, pool); err != nil {
+		return ctrl.Result{}, err
+	}
+
 	if err := r.reconcilePoolCurrentCapacityAndReadiness(ctx, pool); err != nil {
 		return ctrl.Result{}, err
 	}
@@ -404,6 +414,59 @@ func (r *GPUPoolReconciler) reconcilePoolComponents(ctx context.Context, pool *t
 	return ctrlResult, utilerrors.NewAggregate(errs)
 }
 
+func (r *GPUPoolReconciler) reconcilePoolSelectorChange(ctx context.Context, pool *tfv1.GPUPool) error {
+	if pool.Spec.NodeManagerConfig != nil && pool.Spec.NodeManagerConfig.NodeSelector != nil {
+		hash := utils.GetObjectHash(pool.Spec.NodeManagerConfig.NodeSelector)
+		if poolSelectorChangeMap[pool.Name] == hash {
+			return nil
+		}
+
+		// hash has changed, or first reconcile, should check all k8s nodes
+		nodes := &corev1.NodeList{}
+		selectors := utils.GetInitialGPUNodeSelector()
+		if err := r.List(ctx, nodes, client.MatchingLabels{selectors[0]: selectors[1]}); err != nil {
+			return err
+		}
+		for _, node := range nodes.Items {
+			// skip no label or deleting nodes
+			if node.Labels == nil || !node.DeletionTimestamp.IsZero() {
+				continue
+			}
+			matches, err := schedulingcorev1.MatchNodeSelectorTerms(&node, pool.Spec.NodeManagerConfig.NodeSelector)
+			if err != nil {
+				return err
+			}
+			if matches {
+				if err := UpdateK8SNodeSelectorHash(ctx, r.Client, &node, hash); err != nil {
+					return err
+				}
+			}
+		}
+		poolSelectorChangeMap[pool.Name] = hash
+		return nil
+	}
+	return nil
+}
+
+func UpdateK8SNodeSelectorHash(ctx context.Context, k8sClient client.Client, node *corev1.Node, hash string) error {
+	// skip nodes that already injected the hash
+	if node.Labels[constants.LabelNodeSelectorHash] == hash {
+		return nil
+	}
+	// update label to trigger the GPUNode reconcile
+	if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
+		latest := &corev1.Node{}
+		if err := k8sClient.Get(ctx, client.ObjectKey{Name: node.Name}, latest); err != nil {
+			return err
+		}
+		latest.Labels[constants.LabelNodeSelectorHash] = hash
+		return k8sClient.Update(ctx, latest)
+	}); err != nil {
+		return err
+	}
+	return nil
+}
+
 func (r *GPUPoolReconciler) cleanUpPool(ctx context.Context, pool *tfv1.GPUPool) (bool, error) {
 	log := log.FromContext(ctx)
 	log.Info("TensorFusionGPUPool is being deleted", "name", pool.Name)
diff --git a/internal/controller/gpupool_controller_test.go b/internal/controller/gpupool_controller_test.go
index 50b033cd..e3be7a99 100644
--- a/internal/controller/gpupool_controller_test.go
+++ b/internal/controller/gpupool_controller_test.go
@@ -42,6 +42,14 @@ var _ = Describe("GPUPool Controller", func() {
 				pool := tfEnv.GetGPUPool(0)
 				g.Expect(pool.Status.Phase).Should(Equal(tfv1.TensorFusionPoolPhaseRunning))
 			}).Should(Succeed())
+			Eventually(func(g Gomega) {
+				nodeList := tfEnv.GetGPUNodeList(0)
+				for _, gpuNode := range nodeList.Items {
+					node := &corev1.Node{}
+					g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: gpuNode.Name}, node)).Should(Succeed())
+					g.Expect(node.Labels).To(HaveKey(constants.LabelNodeSelectorHash))
+				}
+			}).Should(Succeed())
 			tfEnv.Cleanup()
 		})
 	})
diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go
index 3a9c652d..caedc903 100644
--- a/internal/controller/node_controller.go
+++ b/internal/controller/node_controller.go
@@ -19,8 +19,6 @@ package controller
 import (
 	"context"
 	"fmt"
-	"os"
-	"strings"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
@@ -86,12 +84,15 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
 		return ctrl.Result{}, err
 	}
 	if !matched {
-		// delete gpunode if no matched pool
-		if err := r.Delete(ctx, &tfv1.GPUNode{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: node.Name,
-			},
-		}); err != nil {
+		existingGPUNode := &tfv1.GPUNode{}
+		if err := r.Get(ctx, client.ObjectKey{Name: node.Name}, existingGPUNode); err != nil {
+			if errors.IsNotFound(err) {
+				return ctrl.Result{}, nil
+			}
+			return ctrl.Result{}, fmt.Errorf("can not get gpuNode(%s) : %w", node.Name, err)
+		}
+		// delete existing gpunode if no matched pool
+		if err := r.Delete(ctx, existingGPUNode); err != nil {
 			// requeue if the gpunode is not generated
 			if errors.IsNotFound(err) {
 				return ctrl.Result{}, nil
@@ -121,6 +122,14 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
 		return ctrl.Result{}, nil
 	}
 
+	// update k8s node hash
+	hash := utils.GetObjectHash(pool.Spec.NodeManagerConfig.NodeSelector)
+	if node.Labels[constants.LabelNodeSelectorHash] != hash {
+		if err := UpdateK8SNodeSelectorHash(ctx, r.Client, node, hash); err != nil {
+			return ctrl.Result{}, fmt.Errorf("failed to update k8s node hash: %w", err)
+		}
+	}
+
 	provisioningMode := pool.Spec.NodeManagerConfig.ProvisioningMode
 	isDirectManagedMode := provisioningMode == tfv1.ProvisioningModeProvisioned
 	isManagedNode := isDirectManagedMode || provisioningMode == tfv1.ProvisioningModeKarpenter
@@ -199,11 +208,7 @@ func (r *NodeReconciler) generateGPUNode(node *corev1.Node, pool *tfv1.GPUPool,
 // SetupWithManager sets up the controller with the Manager.
 func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	// must choose an initial label selector to avoid performance impact in large Kubernetes clusters
-	selector := os.Getenv("INITIAL_GPU_NODE_LABEL_SELECTOR")
-	if selector == "" {
-		selector = constants.InitialGPUNodeSelector
-	}
-	selectors := strings.Split(selector, "=")
+	selectors := utils.GetInitialGPUNodeSelector()
 	p, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{
 		MatchLabels: map[string]string{
 			selectors[0]: selectors[1],
diff --git a/internal/utils/compose.go b/internal/utils/compose.go
index 344228b0..93e8248c 100644
--- a/internal/utils/compose.go
+++ b/internal/utils/compose.go
@@ -229,10 +229,9 @@ func AddTFDefaultClientConfBeforePatch(
 			pod.Spec.Containers[injectContainerIndex].VolumeMounts = append(
 				pod.Spec.Containers[injectContainerIndex].VolumeMounts,
 				v1.VolumeMount{
-					Name:      constants.DataVolumeName,
-					MountPath: constants.SharedMemDeviceName,
-					SubPath:   constants.SharedMemMountSubPath,
-					//  + constants.TFLibsVolumeMountPath, SubPathExpr:      constants.TFDataPathWorkerExpr,
+					Name:             constants.DataVolumeName,
+					MountPath:        constants.TFLibsVolumeMountPath,
+					SubPathExpr:      constants.TFDataPathWorkerExpr,
 					MountPropagation: ptr.To(v1.MountPropagationHostToContainer),
 				})
 
@@ -682,12 +681,9 @@ func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerCon
 	spec.Containers[0].VolumeMounts = append(
 		spec.Containers[0].VolumeMounts,
 		v1.VolumeMount{
-			Name:      constants.DataVolumeName,
-			MountPath: constants.SharedMemDeviceName,
-			// TODO not working.
-			// + constants.TFLibsVolumeMountPath
-			// SubPathExpr: constants.TFDataPathWorkerExpr,
-			SubPath:          constants.SharedMemMountSubPath,
+			Name:             constants.DataVolumeName,
+			MountPath:        constants.TFLibsVolumeMountPath,
+			SubPathExpr:      constants.TFDataPathWorkerExpr,
 			MountPropagation: ptr.To(v1.MountPropagationHostToContainer),
 		})
 	spec.Containers[0].Env = append(spec.Containers[0].Env, v1.EnvVar{
diff --git a/internal/utils/reconcile.go b/internal/utils/reconcile.go
index ebc091ac..23026cf7 100644
--- a/internal/utils/reconcile.go
+++ b/internal/utils/reconcile.go
@@ -214,6 +214,15 @@ func IsTensorFusionWorker(pod *corev1.Pod) bool {
 	return pod.Labels[constants.LabelComponent] == constants.ComponentWorker
 }
 
+func GetInitialGPUNodeSelector() []string {
+	selector := os.Getenv("INITIAL_GPU_NODE_LABEL_SELECTOR")
+	if selector == "" {
+		selector = constants.InitialGPUNodeSelector
+	}
+	selectors := strings.Split(selector, "=")
+	return selectors
+}
+
 var GPUResourceNames = []corev1.ResourceName{
 	"nvidia.com/gpu",
 	"amd.com/gpu",

From e6281872ea961bd535804eb439392090aaa86b78 Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Tue, 9 Sep 2025 17:09:21 +0800
Subject: [PATCH 15/34] fix: upgrade k8s 1.34, fix shm path, helm chart issues
 (#355)

* chore: lint issue

* fix: kubernetes upgrade, fix scheduler deps issue

* fix: upgrade k8s version to 1.34, use fixed operator version in helm chart

* fix: update shm path

* chore: comment & wording

* fix: connection naming

* fix: upgrade github action

* fix: add test for dedicated gpu allocation mode
---
 .github/workflows/lint.yml                    |   2 +-
 .github/workflows/test.yml                    |   4 +-
 .vscode/settings.json                         |   4 +
 api/v1/gpuresourcequota_types.go              |   8 +-
 charts/tensor-fusion/Chart.yaml               |   2 +-
 charts/tensor-fusion/values.yaml              |   4 +-
 cmd/main.go                                   |  55 +-
 cmd/sched/setup.go                            |   9 +
 go.mod                                        | 207 ++++----
 go.sum                                        | 470 +++++++++---------
 internal/constants/env.go                     |  10 +-
 internal/controller/node_controller.go        |   2 +
 internal/gpuallocator/gpuallocator.go         |  16 +-
 .../scheduler/gpuresources/gpuresources.go    |  63 +--
 .../gpuresources/gpuresources_test.go         |  78 +--
 .../scheduler/gputopo/gpu_network_topo.go     |   5 +-
 internal/server/router/allocator_info.go      |  11 +-
 internal/utils/compose.go                     |   7 +-
 internal/webhook/v1/pod_webhook.go            |  18 +-
 internal/webhook/v1/pod_webhook_test.go       |  56 ++-
 internal/webhook/v1/tf_parser.go              |  17 +-
 patches/scheduler-csi-capacity-3.patch        |  53 +-
 patches/scheduler-pdb-2.patch                 |  17 +-
 scripts/patch-scheduler.sh                    |   4 +
 test/sched/gpufit_bench_test.go               |   6 +-
 test/sched/scheduler_bench_test.go            |  35 +-
 test/sched/setup.go                           |  63 ++-
 27 files changed, 717 insertions(+), 509 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index dfae921c..f56d3f6c 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/checkout@v5
 
       - name: Setup Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v6
         with:
           go-version: '~1.24'
 
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1528d13e..b4be4381 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -28,13 +28,13 @@ jobs:
     strategy:
       matrix:
         # from https://github.com/kubernetes-sigs/controller-tools/blob/main/envtest-releases.yaml
-        envtest_k8s_version: [1.23.5, 1.33.0]
+        envtest_k8s_version: [1.23.5, 1.34.0]
     steps:
       - name: Clone the code
         uses: actions/checkout@v5
 
       - name: Setup Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v6
         with:
           go-version: '~1.24'
 
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1285d84e..2a261510 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -25,6 +25,7 @@
         "clientcmdapi",
         "clientgoscheme",
         "clientset",
+        "clientsetfake",
         "cloudnative",
         "cloudprovider",
         "clusterissuers",
@@ -46,6 +47,7 @@
         "envtest",
         "essd",
         "Eventf",
+        "featuregate",
         "finalizer",
         "Finalizers",
         "frameworkruntime",
@@ -78,6 +80,8 @@
         "iface",
         "imageutils",
         "influxdata",
+        "internalcache",
+        "internalqueue",
         "jsonpatch",
         "karpenter",
         "karpv",
diff --git a/api/v1/gpuresourcequota_types.go b/api/v1/gpuresourcequota_types.go
index c6ac1dba..1b28520a 100644
--- a/api/v1/gpuresourcequota_types.go
+++ b/api/v1/gpuresourcequota_types.go
@@ -19,7 +19,7 @@ package v1
 import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
+	fwk "k8s.io/kube-scheduler/framework"
 )
 
 // GPUResourceQuotaSpec defines the desired state of GPUResourceQuota
@@ -188,6 +188,10 @@ type AllocRequest struct {
 	PodMeta metav1.ObjectMeta
 }
 
+func (p *AllocRequest) Clone() fwk.StateData {
+	return p
+}
+
 type GPUAllocationInfo struct {
 	Request   Resource `json:"request,omitempty"`
 	Limit     Resource `json:"limit,omitempty"`
@@ -203,7 +207,7 @@ type AdjustRequest struct {
 	NewLimit   Resource
 }
 
-func (ar *AllocRequest) Clone() framework.StateData {
+func (ar *AdjustRequest) Clone() fwk.StateData {
 	return ar
 }
 
diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
index c72e6082..d18568b7 100644
--- a/charts/tensor-fusion/Chart.yaml
+++ b/charts/tensor-fusion/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.5.6
+version: 1.5.7
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/tensor-fusion/values.yaml b/charts/tensor-fusion/values.yaml
index cf4865f4..6b9fcc0c 100644
--- a/charts/tensor-fusion/values.yaml
+++ b/charts/tensor-fusion/values.yaml
@@ -31,7 +31,7 @@ controller:
   image:
     repository: tensorfusion/tensor-fusion-operator
     # Overrides the image tag whose default is the chart appVersion.
-    tag: "latest"
+    tag: "1.43.4"
   # This is for setting Kubernetes Annotations to a Pod.
   # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ 
   
@@ -120,7 +120,7 @@ agent:
   
   image:
     repository: tensorfusion/tensor-fusion-agent
-    tag: "latest"
+    tag: "1.0.0"
   
   resources:
     requests:
diff --git a/cmd/main.go b/cmd/main.go
index 23cd69b8..f4f2f0ab 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -27,27 +27,6 @@ import (
 
 	// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
 	// to ensure that exec-entrypoint and run can make use of them.
-
-	"k8s.io/client-go/kubernetes"
-	_ "k8s.io/client-go/plugin/pkg/client/auth"
-	"k8s.io/client-go/rest"
-	"k8s.io/klog/v2"
-
-	"k8s.io/apimachinery/pkg/runtime"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	"k8s.io/kubernetes/cmd/kube-scheduler/app"
-	"k8s.io/kubernetes/pkg/scheduler"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/healthz"
-	"sigs.k8s.io/controller-runtime/pkg/manager"
-	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
-	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
-	"sigs.k8s.io/controller-runtime/pkg/webhook"
-
-	"sigs.k8s.io/yaml"
-
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/cmd/sched"
 	"github.com/NexusGPU/tensor-fusion/internal/alert"
@@ -65,6 +44,25 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/NexusGPU/tensor-fusion/internal/version"
 	webhookcorev1 "github.com/NexusGPU/tensor-fusion/internal/webhook/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	k8sVer "k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apiserver/pkg/util/feature"
+	"k8s.io/client-go/kubernetes"
+	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+	_ "k8s.io/client-go/plugin/pkg/client/auth"
+	"k8s.io/client-go/rest"
+	"k8s.io/klog/v2"
+	"k8s.io/kubernetes/cmd/kube-scheduler/app"
+	"k8s.io/kubernetes/pkg/scheduler"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/healthz"
+	"sigs.k8s.io/controller-runtime/pkg/manager"
+	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
+	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+	"sigs.k8s.io/controller-runtime/pkg/webhook"
+	"sigs.k8s.io/yaml"
 	// +kubebuilder:scaffold:imports
 )
 
@@ -204,6 +202,14 @@ func main() {
 	_ = os.Setenv(constants.KubeApiVersionMajorEnv, version.Major)
 	_ = os.Setenv(constants.KubeApiVersionMinorEnv, version.Minor)
 
+	// TODO: there will still be risk after FeatureGate removed when the feature is stable for a long time
+	// To be compatible with long-term k8s version, need to patch Kubernetes source code
+	k8sVersion := k8sVer.MustParseSemantic(version.String())
+	err = feature.DefaultMutableFeatureGate.SetEmulationVersion(k8sVersion)
+	if err != nil {
+		setupLog.Error(err, "unable to set k8s version for feature gating")
+	}
+
 	alertEvaluatorReady = make(chan struct{})
 	setupTimeSeriesAndWatchGlobalConfigChanges(ctx, mgr)
 
@@ -221,7 +227,7 @@ func main() {
 	pricingProvider := pricing.NewStaticPricingProvider()
 	startWebhook(mgr, portAllocator, pricingProvider)
 
-	scheduler := startScheduler(ctx, allocator, mgr)
+	scheduler := startScheduler(ctx, allocator, mgr, k8sVersion)
 
 	startCustomResourceController(ctx, mgr, metricsRecorder, allocator, portAllocator)
 
@@ -461,6 +467,7 @@ func startScheduler(
 	ctx context.Context,
 	allocator *gpuallocator.GpuAllocator,
 	mgr manager.Manager,
+	k8sVersion *k8sVer.Version,
 ) *scheduler.Scheduler {
 	if os.Getenv(constants.EnableSchedulerEnv) == constants.FalseStringValue {
 		return nil
@@ -479,7 +486,9 @@ func startScheduler(
 		gpuTopoPlugin.NewWithDeps(allocator, mgr.GetClient()),
 	)
 
-	cc, scheduler, err := sched.SetupScheduler(ctx, mgr, schedulerConfigPath, false, gpuResourceFitOpt, gpuTopoOpt)
+	cc, scheduler, err := sched.SetupScheduler(
+		ctx, mgr, schedulerConfigPath, false, k8sVersion, gpuResourceFitOpt, gpuTopoOpt,
+	)
 	if err != nil {
 		setupLog.Error(err, "unable to create tensor fusion scheduler")
 		os.Exit(1)
diff --git a/cmd/sched/setup.go b/cmd/sched/setup.go
index 2818fba2..20b28f96 100644
--- a/cmd/sched/setup.go
+++ b/cmd/sched/setup.go
@@ -22,6 +22,8 @@ import (
 	"strings"
 
 	utilerrors "k8s.io/apimachinery/pkg/util/errors"
+	k8sVer "k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/client-go/tools/events"
 	"k8s.io/component-base/configz"
 	"k8s.io/klog/v2"
@@ -50,6 +52,7 @@ func SetupScheduler(
 	mgr manager.Manager,
 	schedulerConfigPath string,
 	disableHttpEndpoint bool,
+	k8sVersion *k8sVer.Version,
 	outOfTreeRegistryOptions ...app.Option,
 ) (*schedulerserverconfig.CompletedConfig, *scheduler.Scheduler, error) {
 	opts := options.NewOptions()
@@ -73,6 +76,12 @@ func SetupScheduler(
 		return nil, nil, err
 	}
 
+	// Setup enumerationVersion again since it's overridden by the config
+	err = feature.DefaultMutableFeatureGate.SetEmulationVersion(k8sVersion)
+	if err != nil {
+		return nil, nil, err
+	}
+
 	if cfg, err := latest.Default(); err != nil {
 		return nil, nil, err
 	} else {
diff --git a/go.mod b/go.mod
index 72d32a0f..e8da7faf 100644
--- a/go.mod
+++ b/go.mod
@@ -6,171 +6,184 @@ require (
 	github.com/DATA-DOG/go-sqlmock v1.5.2
 	github.com/NVIDIA/go-nvml v0.13.0-1
 	github.com/aliyun/alibaba-cloud-sdk-go v1.63.107
-	github.com/aws/aws-sdk-go-v2 v1.38.1
-	github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0
-	github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5
+	github.com/aws/aws-sdk-go-v2 v1.38.3
+	github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0
+	github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5
 	github.com/gin-contrib/gzip v1.2.3
 	github.com/gin-gonic/gin v1.10.1
-	github.com/go-sql-driver/mysql v1.8.1
+	github.com/go-sql-driver/mysql v1.9.3
 	github.com/influxdata/line-protocol/v2 v2.2.1
 	github.com/lithammer/shortuuid/v4 v4.2.0
 	github.com/mitchellh/mapstructure v1.5.0
-	github.com/onsi/ginkgo/v2 v2.23.4
-	github.com/onsi/gomega v1.38.0
+	github.com/onsi/ginkgo/v2 v2.25.3
+	github.com/onsi/gomega v1.38.2
 	github.com/pkg/errors v0.9.1
 	github.com/samber/lo v1.51.0
 	github.com/shirou/gopsutil v3.21.11+incompatible
-	github.com/stretchr/testify v1.11.0
-	go.opentelemetry.io/otel v1.37.0
+	github.com/stretchr/testify v1.11.1
+	go.opentelemetry.io/otel v1.38.0
 	go.uber.org/zap v1.27.0
-	golang.org/x/time v0.12.0
+	golang.org/x/time v0.13.0
 	gomodules.xyz/jsonpatch/v2 v2.5.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	gorm.io/driver/mysql v1.6.0
-	gorm.io/gorm v1.30.1
-	k8s.io/api v0.33.3
-	k8s.io/apimachinery v0.33.3
-	k8s.io/client-go v0.33.3
-	k8s.io/component-base v0.33.3
-	k8s.io/component-helpers v0.33.3
+	gorm.io/gorm v1.30.3
+	k8s.io/api v0.34.0
+	k8s.io/apimachinery v0.34.0
+	k8s.io/client-go v0.34.0
+	k8s.io/component-base v0.34.0
+	k8s.io/component-helpers v0.34.0
 	k8s.io/klog/v2 v2.130.1
-	k8s.io/kubernetes v1.33.4
-	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
-	sigs.k8s.io/controller-runtime v0.21.0
-	sigs.k8s.io/karpenter v1.6.1
-	sigs.k8s.io/scheduler-plugins v0.32.7
+	k8s.io/kube-scheduler v0.34.0
+	k8s.io/kubernetes v1.34.0
+	k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
+	sigs.k8s.io/controller-runtime v0.22.0
+	sigs.k8s.io/karpenter v1.6.2
 	sigs.k8s.io/yaml v1.6.0
 )
 
 require (
-	cel.dev/expr v0.23.1 // indirect
+	cel.dev/expr v0.24.0 // indirect
 	filippo.io/edwards25519 v1.1.0 // indirect
-	github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
+	github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
+	github.com/Masterminds/semver/v3 v3.4.0 // indirect
 	github.com/NYTimes/gziphandler v1.1.1 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2 // indirect
-	github.com/aws/smithy-go v1.22.5 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 // indirect
+	github.com/aws/smithy-go v1.23.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
-	github.com/bytedance/sonic v1.13.2 // indirect
-	github.com/bytedance/sonic/loader v0.2.4 // indirect
-	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/bytedance/gopkg v0.1.3 // indirect
+	github.com/bytedance/sonic v1.14.1 // indirect
+	github.com/bytedance/sonic/loader v0.3.0 // indirect
+	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.5 // indirect
+	github.com/cloudwego/base64x v0.1.6 // indirect
 	github.com/coreos/go-semver v0.3.1 // indirect
-	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
+	github.com/coreos/go-systemd/v22 v22.6.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/distribution/reference v0.6.0 // indirect
-	github.com/emicklei/go-restful/v3 v3.12.1 // indirect
+	github.com/emicklei/go-restful/v3 v3.13.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
-	github.com/fsnotify/fsnotify v1.8.0 // indirect
-	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
-	github.com/gabriel-vasile/mimetype v1.4.8 // indirect
-	github.com/gin-contrib/sse v1.0.0 // indirect
+	github.com/fsnotify/fsnotify v1.9.0 // indirect
+	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.10 // indirect
+	github.com/gin-contrib/sse v1.1.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-logr/zapr v1.3.0 // indirect
-	github.com/go-ole/go-ole v1.2.6 // indirect
-	github.com/go-openapi/jsonpointer v0.21.0 // indirect
-	github.com/go-openapi/jsonreference v0.21.0 // indirect
-	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/go-ole/go-ole v1.3.0 // indirect
+	github.com/go-openapi/jsonpointer v0.22.0 // indirect
+	github.com/go-openapi/jsonreference v0.21.1 // indirect
+	github.com/go-openapi/swag v0.24.1 // indirect
+	github.com/go-openapi/swag/cmdutils v0.24.0 // indirect
+	github.com/go-openapi/swag/conv v0.24.0 // indirect
+	github.com/go-openapi/swag/fileutils v0.24.0 // indirect
+	github.com/go-openapi/swag/jsonname v0.24.0 // indirect
+	github.com/go-openapi/swag/jsonutils v0.24.0 // indirect
+	github.com/go-openapi/swag/loading v0.24.0 // indirect
+	github.com/go-openapi/swag/mangling v0.24.0 // indirect
+	github.com/go-openapi/swag/netutils v0.24.0 // indirect
+	github.com/go-openapi/swag/stringutils v0.24.0 // indirect
+	github.com/go-openapi/swag/typeutils v0.24.0 // indirect
+	github.com/go-openapi/swag/yamlutils v0.24.0 // indirect
 	github.com/go-playground/locales v0.14.1 // indirect
 	github.com/go-playground/universal-translator v0.18.1 // indirect
-	github.com/go-playground/validator/v10 v10.26.0 // indirect
+	github.com/go-playground/validator/v10 v10.27.0 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/goccy/go-json v0.10.5 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/btree v1.1.3 // indirect
-	github.com/google/cel-go v0.23.2 // indirect
-	github.com/google/gnostic-models v0.6.9 // indirect
+	github.com/google/cel-go v0.26.1 // indirect
+	github.com/google/gnostic-models v0.7.0 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
-	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
+	github.com/google/pprof v0.0.0-20250903194437-c28834ac2320 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
-	github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/jinzhu/now v1.1.5 // indirect
 	github.com/jmespath/go-jmespath v0.4.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.10 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/kylelemons/godebug v1.1.0 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect
-	github.com/moby/term v0.5.0 // indirect
+	github.com/moby/term v0.5.2 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
-	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
 	github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
-	github.com/pelletier/go-toml/v2 v2.2.3 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
-	github.com/prometheus/client_golang v1.22.0 // indirect
+	github.com/prometheus/client_golang v1.23.2 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
-	github.com/prometheus/common v0.62.0 // indirect
-	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/prometheus/common v0.66.1 // indirect
+	github.com/prometheus/procfs v0.17.0 // indirect
 	github.com/robfig/cron/v3 v3.0.1 // indirect
-	github.com/spf13/cobra v1.8.1 // indirect
-	github.com/spf13/pflag v1.0.6 // indirect
-	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/spf13/cobra v1.10.1 // indirect
+	github.com/spf13/pflag v1.0.10 // indirect
+	github.com/stoewer/go-strcase v1.3.1 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/ugorji/go/codec v1.3.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
-	go.etcd.io/etcd/api/v3 v3.5.21 // indirect
-	go.etcd.io/etcd/client/pkg/v3 v3.5.21 // indirect
-	go.etcd.io/etcd/client/v3 v3.5.21 // indirect
+	go.etcd.io/etcd/api/v3 v3.6.4 // indirect
+	go.etcd.io/etcd/client/pkg/v3 v3.6.4 // indirect
+	go.etcd.io/etcd/client/v3 v3.6.4 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
-	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 // indirect
-	go.opentelemetry.io/otel/metric v1.37.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.33.0 // indirect
-	go.opentelemetry.io/otel/trace v1.37.0 // indirect
-	go.opentelemetry.io/proto/otlp v1.4.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
+	go.opentelemetry.io/otel/metric v1.38.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.38.0 // indirect
+	go.opentelemetry.io/otel/trace v1.38.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.8.0 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
-	golang.org/x/arch v0.15.0 // indirect
-	golang.org/x/crypto v0.39.0 // indirect
-	golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
-	golang.org/x/net v0.41.0 // indirect
-	golang.org/x/oauth2 v0.27.0 // indirect
-	golang.org/x/sync v0.15.0 // indirect
-	golang.org/x/sys v0.33.0 // indirect
-	golang.org/x/term v0.32.0 // indirect
-	golang.org/x/text v0.26.0 // indirect
-	golang.org/x/tools v0.33.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d // indirect
-	google.golang.org/grpc v1.69.4 // indirect
-	google.golang.org/protobuf v1.36.6 // indirect
-	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+	go.yaml.in/yaml/v3 v3.0.4 // indirect
+	golang.org/x/arch v0.21.0 // indirect
+	golang.org/x/crypto v0.41.0 // indirect
+	golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect
+	golang.org/x/net v0.43.0 // indirect
+	golang.org/x/oauth2 v0.31.0 // indirect
+	golang.org/x/sync v0.17.0 // indirect
+	golang.org/x/sys v0.36.0 // indirect
+	golang.org/x/term v0.35.0 // indirect
+	golang.org/x/text v0.29.0 // indirect
+	golang.org/x/tools v0.36.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 // indirect
+	google.golang.org/grpc v1.75.0 // indirect
+	google.golang.org/protobuf v1.36.8 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/apiextensions-apiserver v0.33.2 // indirect
-	k8s.io/apiserver v0.33.2 // indirect
-	k8s.io/cloud-provider v0.33.2 // indirect
-	k8s.io/controller-manager v0.33.2 // indirect
-	k8s.io/csi-translation-lib v0.33.2 // indirect
-	k8s.io/dynamic-resource-allocation v0.33.1 // indirect
-	k8s.io/kms v0.33.2 // indirect
-	k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a // indirect
-	k8s.io/kube-scheduler v0.32.7 // indirect
-	k8s.io/kubelet v0.33.1 // indirect
-	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
-	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
+	k8s.io/apiextensions-apiserver v0.34.0 // indirect
+	k8s.io/apiserver v0.34.0 // indirect
+	k8s.io/cloud-provider v0.34.0 // indirect
+	k8s.io/controller-manager v0.34.0 // indirect
+	k8s.io/csi-translation-lib v0.34.0 // indirect
+	k8s.io/dynamic-resource-allocation v0.34.0 // indirect
+	k8s.io/kms v0.34.0 // indirect
+	k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 // indirect
+	k8s.io/kubelet v0.34.0 // indirect
+	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect
+	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
-	sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
+	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 0f62b153..446e3470 100644
--- a/go.sum
+++ b/go.sum
@@ -1,14 +1,16 @@
-cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg=
-cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
+cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY=
+cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
 filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
+github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
 github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
 github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
 github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo=
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
 github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
 github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
@@ -22,43 +24,43 @@ github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYW
 github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
 github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
 github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
-github.com/aws/aws-sdk-go-v2 v1.38.1 h1:j7sc33amE74Rz0M/PoCpsZQ6OunLqys/m5antM0J+Z8=
-github.com/aws/aws-sdk-go-v2 v1.38.1/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 h1:sPiRHLVUIIQcoVZTNwqQcdtjkqkPopyYmIX0M5ElRf4=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2/go.mod h1:ik86P3sgV+Bk7c1tBFCwI3VxMoSEwl4YkRB9xn1s340=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 h1:ZdzDAg075H6stMZtbD2o+PyB933M/f20e9WmCBC17wA=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2/go.mod h1:eE1IIzXG9sdZCB0pNNpMpsYTLl4YdOQD3njiVN1e/E4=
-github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0 h1:twGX//bv1QH/9pyJaqynNSo0eXGkDEdDTFy8GNPsz5M=
-github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0/go.mod h1:HDxGArx3/bUnkoFsuvTNIxEj/cR3f+IgsVh1B7Pvay8=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2 h1:oxmDEO14NBZJbK/M8y3brhMFEIGN4j8a6Aq8eY0sqlo=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2/go.mod h1:4hH+8QCrk1uRWDPsVfsNDUup3taAjO8Dnx63au7smAU=
-github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw=
-github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
-github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5 h1:m/qynRSKYe4RKSroVqRRgMlp/cUXO54SY2upSUqfcqw=
-github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5/go.mod h1:3Lf3VaiJyr3IP0gH53sZp16Tu5CmoaDSUv4KQwFQO/I=
+github.com/aws/aws-sdk-go-v2 v1.38.3 h1:B6cV4oxnMs45fql4yRH+/Po/YU+597zgWqvDpYMturk=
+github.com/aws/aws-sdk-go-v2 v1.38.3/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 h1:uF68eJA6+S9iVr9WgX1NaRGyQ/6MdIyc4JNUo6TN1FA=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6/go.mod h1:qlPeVZCGPiobx8wb1ft0GHT5l+dc6ldnwInDFaMvC7Y=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 h1:pa1DEC6JoI0zduhZePp3zmhWvk/xxm4NB8Hy/Tlsgos=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6/go.mod h1:gxEjPebnhWGJoaDdtDkA0JX46VRg1wcTHYe63OfX5pE=
+github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0 h1:hGHSNZDTFnhLGUpRkQORM8uBY9R/FOkxCkuUUJBEOQ4=
+github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0/go.mod h1:SmMqzfS4HVsOD58lwLZ79oxF58f8zVe5YdK3o+/o1Ck=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 h1:oegbebPEMA/1Jny7kvwejowCaHz1FWZAQ94WXFNCyTM=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1/go.mod h1:kemo5Myr9ac0U9JfSjMo9yHLtw+pECEHsFtJ9tqCEI8=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 h1:LHS1YAIJXJ4K9zS+1d/xa9JAA9sL2QyXIQCQFQW/X08=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6/go.mod h1:c9PCiTEuh0wQID5/KqA32J+HAgZxN9tOGXKCiYJjTZI=
+github.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE=
+github.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
+github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5 h1:MM4Y7+YqhWLZiRuZfWrAXD2rZ0maVePbzARP3adeJ+g=
+github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5/go.mod h1:OCT5DIzVB2740qVgfRz0zQe/dDdvnsnFarzy6VdYNoA=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
 github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
-github.com/bytedance/sonic v1.13.2 h1:8/H1FempDZqC4VqjptGo14QQlJx8VdZJegxs6wwfqpQ=
-github.com/bytedance/sonic v1.13.2/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4=
-github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
-github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY=
-github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
-github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
-github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
+github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
+github.com/bytedance/sonic v1.14.1 h1:FBMC0zVz5XUmE4z9wF4Jey0An5FueFvOsTKKKtwIl7w=
+github.com/bytedance/sonic v1.14.1/go.mod h1:gi6uhQLMbTdeP0muCnrjHLeCUPyb70ujhnNlhOylAFc=
+github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
+github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
+github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
+github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4=
-github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
-github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
+github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
 github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4=
 github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec=
-github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
-github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
+github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
@@ -70,8 +72,8 @@ github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5Qvfr
 github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU=
-github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=
+github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
 github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -83,16 +85,16 @@ github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P
 github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
 github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
 github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
-github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
-github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
-github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
-github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
-github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
-github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
+github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0=
+github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
 github.com/gin-contrib/gzip v1.2.3 h1:dAhT722RuEG330ce2agAs75z7yB+NKvX/ZM1r8w0u2U=
 github.com/gin-contrib/gzip v1.2.3/go.mod h1:ad72i4Bzmaypk8M762gNXa2wkxxjbz0icRNnuLJ9a/c=
-github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E=
-github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9Sp6Slhe0=
+github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
+github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
 github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ=
 github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
@@ -103,66 +105,87 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
-github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
-github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
-github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
-github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
-github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
+github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
+github.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM=
+github.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU=
+github.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA=
+github.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8=
+github.com/go-openapi/swag v0.24.1 h1:DPdYTZKo6AQCRqzwr/kGkxJzHhpKxZ9i/oX0zag+MF8=
+github.com/go-openapi/swag v0.24.1/go.mod h1:sm8I3lCPlspsBBwUm1t5oZeWZS0s7m/A+Psg0ooRU0A=
+github.com/go-openapi/swag/cmdutils v0.24.0 h1:KlRCffHwXFI6E5MV9n8o8zBRElpY4uK4yWyAMWETo9I=
+github.com/go-openapi/swag/cmdutils v0.24.0/go.mod h1:uxib2FAeQMByyHomTlsP8h1TtPd54Msu2ZDU/H5Vuf8=
+github.com/go-openapi/swag/conv v0.24.0 h1:ejB9+7yogkWly6pnruRX45D1/6J+ZxRu92YFivx54ik=
+github.com/go-openapi/swag/conv v0.24.0/go.mod h1:jbn140mZd7EW2g8a8Y5bwm8/Wy1slLySQQ0ND6DPc2c=
+github.com/go-openapi/swag/fileutils v0.24.0 h1:U9pCpqp4RUytnD689Ek/N1d2N/a//XCeqoH508H5oak=
+github.com/go-openapi/swag/fileutils v0.24.0/go.mod h1:3SCrCSBHyP1/N+3oErQ1gP+OX1GV2QYFSnrTbzwli90=
+github.com/go-openapi/swag/jsonname v0.24.0 h1:2wKS9bgRV/xB8c62Qg16w4AUiIrqqiniJFtZGi3dg5k=
+github.com/go-openapi/swag/jsonname v0.24.0/go.mod h1:GXqrPzGJe611P7LG4QB9JKPtUZ7flE4DOVechNaDd7Q=
+github.com/go-openapi/swag/jsonutils v0.24.0 h1:F1vE1q4pg1xtO3HTyJYRmEuJ4jmIp2iZ30bzW5XgZts=
+github.com/go-openapi/swag/jsonutils v0.24.0/go.mod h1:vBowZtF5Z4DDApIoxcIVfR8v0l9oq5PpYRUuteVu6f0=
+github.com/go-openapi/swag/loading v0.24.0 h1:ln/fWTwJp2Zkj5DdaX4JPiddFC5CHQpvaBKycOlceYc=
+github.com/go-openapi/swag/loading v0.24.0/go.mod h1:gShCN4woKZYIxPxbfbyHgjXAhO61m88tmjy0lp/LkJk=
+github.com/go-openapi/swag/mangling v0.24.0 h1:PGOQpViCOUroIeak/Uj/sjGAq9LADS3mOyjznmHy2pk=
+github.com/go-openapi/swag/mangling v0.24.0/go.mod h1:Jm5Go9LHkycsz0wfoaBDkdc4CkpuSnIEf62brzyCbhc=
+github.com/go-openapi/swag/netutils v0.24.0 h1:Bz02HRjYv8046Ycg/w80q3g9QCWeIqTvlyOjQPDjD8w=
+github.com/go-openapi/swag/netutils v0.24.0/go.mod h1:WRgiHcYTnx+IqfMCtu0hy9oOaPR0HnPbmArSRN1SkZM=
+github.com/go-openapi/swag/stringutils v0.24.0 h1:i4Z/Jawf9EvXOLUbT97O0HbPUja18VdBxeadyAqS1FM=
+github.com/go-openapi/swag/stringutils v0.24.0/go.mod h1:5nUXB4xA0kw2df5PRipZDslPJgJut+NjL7D25zPZ/4w=
+github.com/go-openapi/swag/typeutils v0.24.0 h1:d3szEGzGDf4L2y1gYOSSLeK6h46F+zibnEas2Jm/wIw=
+github.com/go-openapi/swag/typeutils v0.24.0/go.mod h1:q8C3Kmk/vh2VhpCLaoR2MVWOGP8y7Jc8l82qCTd1DYI=
+github.com/go-openapi/swag/yamlutils v0.24.0 h1:bhw4894A7Iw6ne+639hsBNRHg9iZg/ISrOVr+sJGp4c=
+github.com/go-openapi/swag/yamlutils v0.24.0/go.mod h1:DpKv5aYuaGm/sULePoeiG8uwMpZSfReo1HR3Ik0yaG8=
 github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
 github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
 github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
 github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
 github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
 github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.26.0 h1:SP05Nqhjcvz81uJaRfEV0YBSSSGMc/iMaVtFbr3Sw2k=
-github.com/go-playground/validator/v10 v10.26.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
-github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
-github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4=
+github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
+github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=
+github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
 github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
 github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
-github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A=
-github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
-github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
+github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
+github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
 github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
-github.com/google/cel-go v0.23.2 h1:UdEe3CvQh3Nv+E/j9r1Y//WO0K0cSyD7/y0bzyLIMI4=
-github.com/google/cel-go v0.23.2/go.mod h1:52Pb6QsDbC5kvgxvZhiL9QX1oZEkcUF/ZqaPx1J5Wwo=
-github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
-github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
+github.com/google/cel-go v0.26.1 h1:iPbVVEdkhTX++hpe3lzSk7D3G3QSYqLGoHOcEio+UXQ=
+github.com/google/cel-go v0.26.1/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM=
+github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
+github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
 github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/pprof v0.0.0-20250903194437-c28834ac2320 h1:c7ayAhbRP9HnEl/hg/WQOM9s0snWztfW6feWXZbGHw0=
+github.com/google/pprof v0.0.0-20250903194437-c28834ac2320/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
 github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
-github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
-github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
+github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
+github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
+github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0 h1:FbSCl+KggFl+Ocym490i/EyXF4lPgLoUtcSWquBM0Rs=
+github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0/go.mod h1:qOchhhIlmRcqk/O9uCo/puJlyo07YINaIqdZfZG3Jkc=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -182,24 +205,20 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y
 github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
-github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4=
-github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc=
+github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
+github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
-github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2 h1:uAwqOtyrFYggq3pVf3hs1XKkBxrQ8dkgjWz3LCLJsiY=
-github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2/go.mod h1:LBzS4n6GX1C69tzSd5EibZ9cGOXFuHP7GxEMDYVe1sM=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE=
-github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
-github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
@@ -221,28 +240,29 @@ github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4
 github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
-github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
-github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
+github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
-github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
-github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
-github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=
-github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=
+github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
+github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
+github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
+github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A=
 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU=
 github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
 github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
-github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
-github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
+github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
+github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -250,14 +270,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
 github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
-github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
-github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
 github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
-github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
-github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
-github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
-github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
+github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
+github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
 github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
 github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
 github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
@@ -271,13 +291,13 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
 github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
-github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
-github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
-github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
-github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
+github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
+github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
+github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stoewer/go-strcase v1.3.1 h1:iS0MdW+kVTxgMoE1LAZyMiYJFKlOzLooE4MxjirtkAs=
+github.com/stoewer/go-strcase v1.3.1/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -288,10 +308,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/stretchr/testify v1.11.0 h1:ib4sjIrwZKxE5u/Japgo/7SJV3PvgjGiRNAvTVGqQl8=
-github.com/stretchr/testify v1.11.0/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk=
 github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
@@ -300,8 +318,8 @@ github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaO
 github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
 github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg=
 github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
-github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
-github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA=
+github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk=
@@ -310,44 +328,42 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
-go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I=
-go.etcd.io/etcd/api/v3 v3.5.21 h1:A6O2/JDb3tvHhiIz3xf9nJ7REHvtEFJJ3veW3FbCnS8=
-go.etcd.io/etcd/api/v3 v3.5.21/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY=
-go.etcd.io/etcd/client/pkg/v3 v3.5.21 h1:lPBu71Y7osQmzlflM9OfeIV2JlmpBjqBNlLtcoBqUTc=
-go.etcd.io/etcd/client/pkg/v3 v3.5.21/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs=
-go.etcd.io/etcd/client/v2 v2.305.21 h1:eLiFfexc2mE+pTLz9WwnoEsX5JTTpLCYVivKkmVXIRA=
-go.etcd.io/etcd/client/v2 v2.305.21/go.mod h1:OKkn4hlYNf43hpjEM3Ke3aRdUkhSl8xjKjSf8eCq2J8=
-go.etcd.io/etcd/client/v3 v3.5.21 h1:T6b1Ow6fNjOLOtM0xSoKNQt1ASPCLWrF9XMHcH9pEyY=
-go.etcd.io/etcd/client/v3 v3.5.21/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU=
-go.etcd.io/etcd/pkg/v3 v3.5.21 h1:jUItxeKyrDuVuWhdh0HtjUANwyuzcb7/FAeUfABmQsk=
-go.etcd.io/etcd/pkg/v3 v3.5.21/go.mod h1:wpZx8Egv1g4y+N7JAsqi2zoUiBIUWznLjqJbylDjWgU=
-go.etcd.io/etcd/raft/v3 v3.5.21 h1:dOmE0mT55dIUsX77TKBLq+RgyumsQuYeiRQnW/ylugk=
-go.etcd.io/etcd/raft/v3 v3.5.21/go.mod h1:fmcuY5R2SNkklU4+fKVBQi2biVp5vafMrWUEj4TJ4Cs=
-go.etcd.io/etcd/server/v3 v3.5.21 h1:9w0/k12majtgarGmlMVuhwXRI2ob3/d1Ik3X5TKo0yU=
-go.etcd.io/etcd/server/v3 v3.5.21/go.mod h1:G1mOzdwuzKT1VRL7SqRchli/qcFrtLBTAQ4lV20sXXo=
+go.etcd.io/bbolt v1.4.2 h1:IrUHp260R8c+zYx/Tm8QZr04CX+qWS5PGfPdevhdm1I=
+go.etcd.io/bbolt v1.4.2/go.mod h1:Is8rSHO/b4f3XigBC0lL0+4FwAQv3HXEEIgFMuKHceM=
+go.etcd.io/etcd/api/v3 v3.6.4 h1:7F6N7toCKcV72QmoUKa23yYLiiljMrT4xCeBL9BmXdo=
+go.etcd.io/etcd/api/v3 v3.6.4/go.mod h1:eFhhvfR8Px1P6SEuLT600v+vrhdDTdcfMzmnxVXXSbk=
+go.etcd.io/etcd/client/pkg/v3 v3.6.4 h1:9HBYrjppeOfFjBjaMTRxT3R7xT0GLK8EJMVC4xg6ok0=
+go.etcd.io/etcd/client/pkg/v3 v3.6.4/go.mod h1:sbdzr2cl3HzVmxNw//PH7aLGVtY4QySjQFuaCgcRFAI=
+go.etcd.io/etcd/client/v3 v3.6.4 h1:YOMrCfMhRzY8NgtzUsHl8hC2EBSnuqbR3dh84Uryl7A=
+go.etcd.io/etcd/client/v3 v3.6.4/go.mod h1:jaNNHCyg2FdALyKWnd7hxZXZxZANb0+KGY+YQaEMISo=
+go.etcd.io/etcd/pkg/v3 v3.6.4 h1:fy8bmXIec1Q35/jRZ0KOes8vuFxbvdN0aAFqmEfJZWA=
+go.etcd.io/etcd/pkg/v3 v3.6.4/go.mod h1:kKcYWP8gHuBRcteyv6MXWSN0+bVMnfgqiHueIZnKMtE=
+go.etcd.io/etcd/server/v3 v3.6.4 h1:LsCA7CzjVt+8WGrdsnh6RhC0XqCsLkBly3ve5rTxMAU=
+go.etcd.io/etcd/server/v3 v3.6.4/go.mod h1:aYCL/h43yiONOv0QIR82kH/2xZ7m+IWYjzRmyQfnCAg=
+go.etcd.io/raft/v3 v3.6.0 h1:5NtvbDVYpnfZWcIHgGRk9DyzkBIXOi8j+DDp1IcnUWQ=
+go.etcd.io/raft/v3 v3.6.0/go.mod h1:nLvLevg6+xrVtHUmVaTcTz603gQPHfh7kUAwV6YpfGo=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc=
-go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q=
-go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
-go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA=
-go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
-go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
-go.opentelemetry.io/otel/sdk v1.33.0 h1:iax7M131HuAm9QkZotNHEfstof92xM+N8sr3uHXc2IM=
-go.opentelemetry.io/otel/sdk v1.33.0/go.mod h1:A1Q5oi7/9XaMlIWzPSxLRWOI8nG3FnzHJNbiENQuihM=
-go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc=
-go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8=
-go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
-go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
-go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg=
-go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
+go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
+go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
+go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
+go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
+go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
+go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
+go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
+go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
+go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
+go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
+go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8N5CE=
+go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0=
 go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
 go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
@@ -360,23 +376,23 @@ go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
 go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
 go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
 go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
-go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
-go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI=
-golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw=
-golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw=
+golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
-golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
+golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
+golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
-golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI=
-golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ=
+golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0=
+golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4=
 golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
 golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
@@ -388,32 +404,33 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
-golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
-golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
-golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
+golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo=
+golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
-golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
-golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
-golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
+golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
+golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
+golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
-golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
-golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
-golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
+golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
+golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
+golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
+golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -421,8 +438,8 @@ golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
-golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
+golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
+golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -431,24 +448,24 @@ gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0
 gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
 gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
 gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
 gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
-google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ=
-google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro=
-google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8 h1:st3LcW/BPi75W4q1jJTEor/QWwbNlPlDG0JTn6XhZu0=
-google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8/go.mod h1:klhJGKFyG8Tn50enBn7gizg4nXGXJ+jqEREdCWaPcV4=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d h1:xJJRGY7TJcvIlpSrN3K6LAWgNFUILlO+OMAqtg9aqnw=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d/go.mod h1:3ENsm/5D1mzDyhpzeRi1NR784I0BcofWBoSc5QqqMK4=
-google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A=
-google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4=
-google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
-google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1 h1:APHvLLYBhtZvsbnpkfknDZ7NyH4z5+ub/I0u8L3Oz6g=
+google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1/go.mod h1:xUjFWUnWDpZ/C0Gu0qloASKFb6f8/QXiiXhSPFsD668=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 h1:pmJpJEvT846VzausCQ5d7KreSROcDqmO388w5YbnltA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1/go.mod h1:GmFNa4BdJZ2a8G+wCe9Bg3wwThLrJun751XstdJt5Og=
+google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4=
+google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ=
+google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
+google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
-gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
+gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
@@ -465,61 +482,56 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg=
 gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo=
-gorm.io/gorm v1.30.1 h1:lSHg33jJTBxs2mgJRfRZeLDG+WZaHYCk3Wtfl6Ngzo4=
-gorm.io/gorm v1.30.1/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
-k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8=
-k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE=
-k8s.io/apiextensions-apiserver v0.33.2 h1:6gnkIbngnaUflR3XwE1mCefN3YS8yTD631JXQhsU6M8=
-k8s.io/apiextensions-apiserver v0.33.2/go.mod h1:IvVanieYsEHJImTKXGP6XCOjTwv2LUMos0YWc9O+QP8=
-k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA=
-k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
-k8s.io/apiserver v0.33.2 h1:KGTRbxn2wJagJowo29kKBp4TchpO1DRO3g+dB/KOJN4=
-k8s.io/apiserver v0.33.2/go.mod h1:9qday04wEAMLPWWo9AwqCZSiIn3OYSZacDyu/AcoM/M=
-k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA=
-k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg=
-k8s.io/cloud-provider v0.33.2 h1:tP/18SbhytAapqg2/tGD5PFUR6VLYra+QfJ7Qn3FN34=
-k8s.io/cloud-provider v0.33.2/go.mod h1:yS8ArLLLZV1+Tv6hkSYrZuYEVz+wQgiekUtaqe9Wxao=
-k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA=
-k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4=
-k8s.io/component-helpers v0.33.3 h1:fjWVORSQfI0WKzPeIFSju/gMD9sybwXBJ7oPbqQu6eM=
-k8s.io/component-helpers v0.33.3/go.mod h1:7iwv+Y9Guw6X4RrnNQOyQlXcvJrVjPveHVqUA5dm31c=
-k8s.io/controller-manager v0.33.2 h1:HIs8PbdTOaY6wTOvKKLwoAHSO6GeDjmYS0Gjnd6rF+c=
-k8s.io/controller-manager v0.33.2/go.mod h1:n8maAdN06E3cD0h5N0wuYBv9Qi9FePl7y6Iz3pfc9PY=
-k8s.io/csi-translation-lib v0.33.2 h1:QyWkVcf0rbNjc53uAqCyl9kmHCRn1O0Z4QT69y/jwHQ=
-k8s.io/csi-translation-lib v0.33.2/go.mod h1:nFPX6BA20EDdIQpitb6p2wVtvLBuXsmm6D1Cwi3rDnE=
-k8s.io/dynamic-resource-allocation v0.33.1 h1:xnEWV764LIsRQDTQ0tLFQMz1lY34Ep7D+/NNbrODfm4=
-k8s.io/dynamic-resource-allocation v0.33.1/go.mod h1:AgBLCrIi+//A4VKljjJ7YPpJ+LeyDyTvUk7v8+Qf3pI=
+gorm.io/gorm v1.30.3 h1:QiG8upl0Sg9ba2Zatfjy0fy4It2iNBL2/eMdvEkdXNs=
+gorm.io/gorm v1.30.3/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
+k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE=
+k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug=
+k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc=
+k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0=
+k8s.io/apimachinery v0.34.0 h1:eR1WO5fo0HyoQZt1wdISpFDffnWOvFLOOeJ7MgIv4z0=
+k8s.io/apimachinery v0.34.0/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
+k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg=
+k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ=
+k8s.io/client-go v0.34.0 h1:YoWv5r7bsBfb0Hs2jh8SOvFbKzzxyNo0nSb0zC19KZo=
+k8s.io/client-go v0.34.0/go.mod h1:ozgMnEKXkRjeMvBZdV1AijMHLTh3pbACPvK7zFR+QQY=
+k8s.io/cloud-provider v0.34.0 h1:OgrNE+WSgfvDBQf6WS9qFM7Xr37bc0Og5kkL4hyWDmU=
+k8s.io/cloud-provider v0.34.0/go.mod h1:JbMa0t6JIGDMLI7Py6bdp9TN6cfuHrWGq+E/X+Ljkmo=
+k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8=
+k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg=
+k8s.io/component-helpers v0.34.0 h1:5T7P9XGMoUy1JDNKzHf0p/upYbeUf8ZaSf9jbx0QlIo=
+k8s.io/component-helpers v0.34.0/go.mod h1:kaOyl5tdtnymriYcVZg4uwDBe2d1wlIpXyDkt6sVnt4=
+k8s.io/controller-manager v0.34.0 h1:oCHoqS8dcFp7zDSu7HUvTpakq3isSxil3GprGGlJMsE=
+k8s.io/controller-manager v0.34.0/go.mod h1:XFto21U+Mm9BT8r/Jd5E4tHCGtwjKAUFOuDcqaj2VK0=
+k8s.io/csi-translation-lib v0.34.0 h1:WhCkq35XATZ+x6NKqI4u7XSYtmucuCN7jDk+mmm9XUU=
+k8s.io/csi-translation-lib v0.34.0/go.mod h1:lZ+vpT3/6hx7GxXcI1mcoHxZSONvxgl2NwawzFnJP4Y=
+k8s.io/dynamic-resource-allocation v0.34.0 h1:RrFNZXb2s5cvvf+KKdO92ss/e+zjGFFaDKAIpzA+Pu8=
+k8s.io/dynamic-resource-allocation v0.34.0/go.mod h1:aqmoDIvXjQRhSgxQkFLl6+Ndg6MfdEOI+TQsj1j9V+g=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
-k8s.io/kms v0.33.2 h1:GFwNXX4CZGQCg9DPOaJi1/+iKidCtB9/OIAGdzRo8FI=
-k8s.io/kms v0.33.2/go.mod h1:C1I8mjFFBNzfUZXYt9FZVJ8MJl7ynFbGgZFbBzkBJ3E=
-k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a h1:ZV3Zr+/7s7aVbjNGICQt+ppKWsF1tehxggNfbM7XnG8=
-k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
-k8s.io/kube-scheduler v0.32.7 h1:QOvu/fNEYGg1gzzpowWHFCI8SD3vJs5Iz0qebEQADd4=
-k8s.io/kube-scheduler v0.32.7/go.mod h1:ez/2BnvZv2Bq1K9LpBsDgRsTvwJLAzkcpRMfY7rhLMA=
-k8s.io/kubelet v0.33.1 h1:x4LCw1/iZVWOKA4RoITnuB8gMHnw31HPB3S0EF0EexE=
-k8s.io/kubelet v0.33.1/go.mod h1:8WpdC9M95VmsqIdGSQrajXooTfT5otEj8pGWOm+KKfQ=
-k8s.io/kubernetes v1.33.4 h1:T1d5FLUYm3/KyUeV7YJhKTR980zHCHb7K2xhCSo3lE8=
-k8s.io/kubernetes v1.33.4/go.mod h1:nrt8sldmckKz2fCZhgRX3SKfS2e+CzXATPv6ITNkU00=
-k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
-k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
-nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+k8s.io/kms v0.34.0 h1:u+/rcxQ3Jr7gC9AY5nXuEnBcGEB7ZOIJ9cdLdyHyEjQ=
+k8s.io/kms v0.34.0/go.mod h1:s1CFkLG7w9eaTYvctOxosx88fl4spqmixnNpys0JAtM=
+k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 h1:o4oKOsvSymDkZRsMAPZU7bRdwL+lPOK5VS10Dr1D6eg=
+k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=
+k8s.io/kube-scheduler v0.34.0 h1:iUT5spyg0RlZ9W5dImrxSxv0yTqbsI+/J72/Iuv9ed8=
+k8s.io/kube-scheduler v0.34.0/go.mod h1:7pt2HDb32lZOihbt/aamuMBvSe1o+rrd2rQC8aJyfP0=
+k8s.io/kubelet v0.34.0 h1:1nZt1Q6Kfx7xCaTS9vnqR9sjZDxf3cRSQkAFCczULmc=
+k8s.io/kubelet v0.34.0/go.mod h1:NqbF8ViVettlZbf9hw9DJhubaWn7rGvDDTcLMDm6tQ0=
+k8s.io/kubernetes v1.34.0 h1:NvUrwPAVB4W3mSOpJ/RtNGHWWYyUP/xPaX5rUSpzA0w=
+k8s.io/kubernetes v1.34.0/go.mod h1:iu+FhII+Oc/1gGWLJcer6wpyih441aNFHl7Pvm8yPto=
+k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0=
+k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
-sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM=
-sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
-sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
-sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
-sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
-sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
-sigs.k8s.io/karpenter v1.6.1 h1:ZAC802Prk/GyKoGUu0LuzEn9fFmJLfUtMfo64derQgw=
-sigs.k8s.io/karpenter v1.6.1/go.mod h1:AxCaeRjv1Pgw/Ff7vT4aqyXcg8v1UdBcfzWMCaKSVjA=
-sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 h1:qPrZsv1cwQiFeieFlRqT627fVZ+tyfou/+S5S0H5ua0=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
+sigs.k8s.io/controller-runtime v0.22.0 h1:mTOfibb8Hxwpx3xEkR56i7xSjB+nH4hZG37SrlCY5e0=
+sigs.k8s.io/controller-runtime v0.22.0/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/karpenter v1.6.2 h1:WFayZ49CSOaDMku1iYBTsD3A9hOB2yU/U95VcSAJ8KM=
+sigs.k8s.io/karpenter v1.6.2/go.mod h1:AxCaeRjv1Pgw/Ff7vT4aqyXcg8v1UdBcfzWMCaKSVjA=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
 sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
-sigs.k8s.io/scheduler-plugins v0.32.7 h1:fGr4JKraaTe6it4PIqUlXStfctFKYxJgYkDsiU6699o=
-sigs.k8s.io/scheduler-plugins v0.32.7/go.mod h1:Oem5rktj6wgFr2SUqcaInUTIBX8tlY8c4qid5vp2lBw=
-sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
-sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
-sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
 sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
 sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
diff --git a/internal/constants/env.go b/internal/constants/env.go
index 1e26a392..06212f20 100644
--- a/internal/constants/env.go
+++ b/internal/constants/env.go
@@ -73,9 +73,10 @@ const (
 	LdPreloadFileName = "ld.so.preload"
 	LdPreloadFile     = "/etc/ld.so.preload"
 
-	TFLibsVolumeName       = "tf-libs"
-	TFLibsVolumeMountPath  = "/tensor-fusion"
-	TFConnectionNamePrefix = "-tf-vgpu-"
+	TFLibsVolumeName         = "tf-libs"
+	TFLibsVolumeMountPath    = "/tensor-fusion"
+	TFConnectionNamePrefix   = "-tf-vgpu-"
+	TFConnectionNameNoPrefix = "tf-vgpu-"
 
 	HostIPFieldRef       = "status.hostIP"
 	NodeNameFieldRef     = "spec.nodeName"
@@ -98,8 +99,7 @@ const (
 	LdPreloadEnv     = "LD_PRELOAD"
 	LdPreloadLimiter = "/home/app/libcuda_limiter.so"
 
-	SharedMemDeviceName   = "/dev/shm"
-	SharedMemMountSubPath = "shm"
+	SharedMemMountSubPath = "/shm"
 
 	// disable GPU limiter, for emergency use
 	DisableGpuLimiterEnv = "DISABLE_GPU_LIMITER"
diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go
index caedc903..d8908847 100644
--- a/internal/controller/node_controller.go
+++ b/internal/controller/node_controller.go
@@ -53,6 +53,8 @@ type NodeReconciler struct {
 // +kubebuilder:rbac:groups=core,resources=nodes/finalizers,verbs=create;get;patch;update
 
 // Reconcile k8s nodes to create and update GPUNode
+//
+//nolint:gocyclo
 func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	log := log.FromContext(ctx)
 	node := &corev1.Node{}
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index 6987ab77..c4a36980 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -30,7 +30,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/client-go/util/retry"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
+	fwk "k8s.io/kube-scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -40,6 +40,8 @@ import (
 const MaxGPUCounterPerAllocation = 128
 const CleanUpCheckInterval = 3 * time.Minute
 
+var GPUCapacityMap = map[string]tfv1.Resource{}
+
 type Strategy interface {
 	Score(gpu *tfv1.GPU) int
 
@@ -51,7 +53,7 @@ type SimulateSchedulingFilterDetail struct {
 	FilterStageDetails []filter.FilterDetail
 }
 
-func (p *SimulateSchedulingFilterDetail) Clone() framework.StateData {
+func (p *SimulateSchedulingFilterDetail) Clone() fwk.StateData {
 	return p
 }
 
@@ -882,6 +884,10 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
 			s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem
 		}
 	}
+
+	if gpu.Status.GPUModel != "" {
+		GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
+	}
 	log.Info("Added GPU to store", "name", key.Name, "phase", gpu.Status.Phase)
 }
 
@@ -930,6 +936,12 @@ func (s *GpuAllocator) handleGPUUpdate(ctx context.Context, gpu *tfv1.GPU) {
 		s.gpuStore[key] = gpu.DeepCopy()
 		log.V(6).Info("Updated GPU in store (new entry)", "name", key.Name, "phase", gpu.Status.Phase)
 	}
+
+	if gpu.Status.GPUModel != "" {
+		if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists {
+			GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
+		}
+	}
 }
 
 func syncGPUMetadataAndStatusFromCluster(old *tfv1.GPU, gpu *tfv1.GPU) {
diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
index 16dd1c61..ee6b6e58 100644
--- a/internal/scheduler/gpuresources/gpuresources.go
+++ b/internal/scheduler/gpuresources/gpuresources.go
@@ -19,6 +19,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/klog/v2"
+	fwk "k8s.io/kube-scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
@@ -58,7 +59,7 @@ type GPUSchedulingStateData struct {
 	FinalGPUs []string
 }
 
-func (p *GPUSchedulingStateData) Clone() framework.StateData {
+func (p *GPUSchedulingStateData) Clone() fwk.StateData {
 	return p
 }
 
@@ -93,7 +94,7 @@ func (s *GPUFit) Name() string {
 	return Name
 }
 
-func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
+func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, _ []fwk.NodeInfo) (*framework.PreFilterResult, *fwk.Status) {
 	// Handle progressive migration case
 	if utils.IsProgressiveMigration() && utils.HasGPUResourceRequest(pod) {
 		nodeNames := s.allocator.ListNonUsingNodes()
@@ -102,19 +103,19 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 			"use native GPU resources, available native GPU nodes: "+strconv.Itoa(len(nodeNames)))
 		return &framework.PreFilterResult{
 			NodeNames: nodeNames,
-		}, framework.NewStatus(framework.Success, "progressive migration for native resources claim")
+		}, fwk.NewStatus(fwk.Success, "progressive migration for native resources claim")
 	}
 
 	// Skip non tensor-fusion mode
 	if !utils.IsTensorFusionWorker(pod) {
-		return nil, framework.NewStatus(framework.Skip, "skip for non tensor-fusion mode")
+		return nil, fwk.NewStatus(fwk.Skip, "skip for non tensor-fusion mode")
 	}
 
 	// Handle tensor-fusion mode scheduling
 	s.logger.Info("checking GPU node resources for pod", "pod", pod.Name)
 	allocRequest, reason, err := s.allocator.ComposeAllocationRequest(pod)
 	if err != nil {
-		return nil, framework.NewStatus(framework.Error, reason)
+		return nil, fwk.NewStatus(fwk.Error, reason)
 	}
 	state.Write(CycleStateAllocateRequest, allocRequest)
 
@@ -134,7 +135,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 		s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeWarning, "GPUQuotaOrCapacityNotEnough",
 			"check quota and filter", "TensorFusion schedule failed, no enough resource or quotas: "+err.Error())
 		s.logger.Error(err, "failed to check quota and filter", "pod", pod.Name)
-		return nil, framework.NewStatus(framework.Unschedulable, err.Error())
+		return nil, fwk.NewStatus(fwk.Unschedulable, err.Error())
 	}
 
 	validNodesValidGPUs := lo.GroupBy(filteredGPUs, func(gpu *tfv1.GPU) string {
@@ -199,51 +200,51 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 
 	return &framework.PreFilterResult{
 		NodeNames: nodeNames,
-	}, framework.NewStatus(framework.Success)
+	}, fwk.NewStatus(fwk.Success)
 }
 
 func (s *GPUFit) PreFilterExtensions() framework.PreFilterExtensions {
 	return nil
 }
 
-func (s *GPUFit) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
+func (s *GPUFit) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
 	if !utils.IsTensorFusionWorker(pod) {
-		return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode")
+		return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode")
 	}
 
 	filterResult, err := state.Read(CycleStateGPUSchedulingResult)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
-	nodeName := nodeInfo.GetName()
+	nodeName := nodeInfo.Node().Name
 	if _, ok := filterResult.(*GPUSchedulingStateData).NodeGPUs[nodeName]; !ok {
-		return framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough")
+		return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
 	}
-	return framework.NewStatus(framework.Success, "")
+	return fwk.NewStatus(fwk.Success, "")
 }
 
 func (s *GPUFit) Score(
 	ctx context.Context,
-	state *framework.CycleState,
+	state fwk.CycleState,
 	pod *v1.Pod,
-	nodeInfo *framework.NodeInfo,
-) (int64, *framework.Status) {
+	nodeInfo fwk.NodeInfo,
+) (int64, *fwk.Status) {
 	// Skip non tensor-fusion mode scheduling
 	if !utils.IsTensorFusionWorker(pod) {
-		return 0, framework.NewStatus(framework.Success, "")
+		return 0, fwk.NewStatus(fwk.Success, "")
 	}
 
 	if state == nil {
-		return 0, framework.NewStatus(framework.Error, "no valid node found, gpu capacity not enough")
+		return 0, fwk.NewStatus(fwk.Error, "no valid node found, gpu capacity not enough")
 	}
 	filterResult, err := state.Read(CycleStateGPUSchedulingResult)
 	if err != nil {
-		return 0, framework.NewStatus(framework.Error, err.Error())
+		return 0, fwk.NewStatus(fwk.Error, err.Error())
 	}
 	scheduledState := filterResult.(*GPUSchedulingStateData)
-	gpuScoreMap, ok := scheduledState.ValidNodeGPUScore[nodeInfo.GetName()]
+	gpuScoreMap, ok := scheduledState.ValidNodeGPUScore[nodeInfo.Node().Name]
 	if !ok {
-		return 0, framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough")
+		return 0, fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
 	}
 	// normalize to 0-100, when node has more GPUs but filtered out,
 	// should consider it as 100 when strategy is compact_first, and consider as 0 when is low_load_first
@@ -252,7 +253,7 @@ func (s *GPUFit) Score(
 		sum += score
 	}
 
-	notMatchingGPUScoreMap, ok := scheduledState.ValidNodeNotMatchingGPUScore[nodeInfo.GetName()]
+	notMatchingGPUScoreMap, ok := scheduledState.ValidNodeNotMatchingGPUScore[nodeInfo.Node().Name]
 	if ok {
 		for _, score := range notMatchingGPUScoreMap {
 			sum += score
@@ -265,27 +266,27 @@ func (s *GPUFit) ScoreExtensions() framework.ScoreExtensions {
 	return nil
 }
 
-func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
+func (s *GPUFit) Reserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) *fwk.Status {
 	if !utils.IsTensorFusionWorker(pod) {
-		return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode")
+		return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode")
 	}
 
 	s.logger.Info("Reserving pod for GPU resources", "pod", pod.Name, "node", nodeName)
 	allocRequest, err := state.Read(CycleStateAllocateRequest)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
 
 	schedulingResultRaw, err := state.Read(CycleStateGPUSchedulingResult)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
 
 	// set final GPUs and try update GPU allocator cache
 	schedulingResult := schedulingResultRaw.(*GPUSchedulingStateData)
 	gpuScoreMap, ok := schedulingResult.ValidNodeGPUScore[nodeName]
 	if !ok {
-		return framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough")
+		return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
 	}
 
 	// find top N score GPUs in this node
@@ -306,12 +307,12 @@ func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *
 		allocRequest.(*tfv1.AllocRequest),
 	)
 	if err != nil {
-		return framework.NewStatus(framework.Error, err.Error())
+		return fwk.NewStatus(fwk.Error, err.Error())
 	}
-	return framework.NewStatus(framework.Success, "")
+	return fwk.NewStatus(fwk.Success, "")
 }
 
-func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+func (s *GPUFit) Unreserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) {
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
@@ -330,7 +331,7 @@ func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod
 	}, schedulingResult.FinalGPUs, pod.ObjectMeta)
 }
 
-func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+func (s *GPUFit) PostBind(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) {
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
diff --git a/internal/scheduler/gpuresources/gpuresources_test.go b/internal/scheduler/gpuresources/gpuresources_test.go
index fb7e45b5..71af8c0f 100644
--- a/internal/scheduler/gpuresources/gpuresources_test.go
+++ b/internal/scheduler/gpuresources/gpuresources_test.go
@@ -14,23 +14,28 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/informers"
+	clientsetfake "k8s.io/client-go/kubernetes/fake"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/events"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
+	fwk "k8s.io/kube-scheduler/framework"
+	framework "k8s.io/kubernetes/pkg/scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
+	"k8s.io/kubernetes/pkg/scheduler/metrics"
 	st "k8s.io/kubernetes/pkg/scheduler/testing"
 	tf "k8s.io/kubernetes/pkg/scheduler/testing/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	testutil "sigs.k8s.io/scheduler-plugins/test/util"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
+	internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
 )
 
 type GPUResourcesSuite struct {
@@ -201,6 +206,7 @@ func (s *GPUResourcesSuite) SetupTest() {
 			},
 		},
 	}
+
 	s.client = fake.NewClientBuilder().WithScheme(scheme.Scheme).
 		WithRuntimeObjects(objList...).
 		WithStatusSubresource(
@@ -213,9 +219,11 @@ func (s *GPUResourcesSuite) SetupTest() {
 		).
 		Build()
 
+	k8sObjs := make([]runtime.Object, 0, len(pods)+len(nodes))
 	for _, pod := range pods {
 		err := s.client.Create(s.ctx, pod)
 		s.NoError(err)
+		k8sObjs = append(k8sObjs, pod)
 	}
 	for _, gpu := range gpus {
 		err := s.client.Create(s.ctx, gpu)
@@ -224,6 +232,7 @@ func (s *GPUResourcesSuite) SetupTest() {
 	for _, node := range nodes {
 		err := s.client.Create(s.ctx, node)
 		s.NoError(err)
+		k8sObjs = append(k8sObjs, node)
 	}
 
 	var registerPlugins []tf.RegisterPluginFunc
@@ -233,11 +242,16 @@ func (s *GPUResourcesSuite) SetupTest() {
 		tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
 	)
 
+	fakeClientSet := clientsetfake.NewSimpleClientset(k8sObjs...)
+	informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
+	metrics.Register()
+	metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, s.ctx.Done())
 	fwk, err := tf.NewFramework(
 		s.ctx, registeredPlugins, "",
-		frameworkruntime.WithPodNominator(testutil.NewPodNominator(nil)),
-		frameworkruntime.WithSnapshotSharedLister(testutil.NewFakeSharedLister(pods, nodes)),
+		frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
+		frameworkruntime.WithSnapshotSharedLister(internalcache.NewEmptySnapshot()),
 		frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
+		frameworkruntime.WithMetricsRecorder(metricsRecorder),
 	)
 	s.NoError(err)
 	s.fwk = fwk
@@ -271,7 +285,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 	tests := []struct {
 		name           string
 		pod            *v1.Pod
-		expectedStatus framework.Code
+		expectedStatus fwk.Code
 		expectedNodes  string
 	}{
 		{
@@ -282,7 +296,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "100",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-a node-b",
 		},
 		{
@@ -293,7 +307,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "2000",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b",
 		},
 		{
@@ -304,7 +318,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "100",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b",
 		},
 		{
@@ -315,7 +329,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "2000",
 					constants.VRAMRequestAnnotation:   "80Gi",
 				}),
-			expectedStatus: framework.Unschedulable,
+			expectedStatus: fwk.Unschedulable,
 			expectedNodes:  "",
 		},
 		{
@@ -326,7 +340,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 					constants.TFLOPSRequestAnnotation: "100",
 					constants.VRAMRequestAnnotation:   "10Gi",
 				}),
-			expectedStatus: framework.Unschedulable,
+			expectedStatus: fwk.Unschedulable,
 			expectedNodes:  "",
 		},
 	}
@@ -334,9 +348,9 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 	for _, tt := range tests {
 		s.Run(tt.name, func() {
 			state := framework.NewCycleState()
-			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod)
+			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod, []fwk.NodeInfo{})
 			s.Equal(tt.expectedStatus, status.Code(), status.Message())
-			if tt.expectedStatus == framework.Success {
+			if tt.expectedStatus == fwk.Success {
 				s.Require().NotNil(res)
 				nodes := sort.StringSlice(res.NodeNames.UnsortedList())
 				nodes.Sort()
@@ -351,19 +365,19 @@ func (s *GPUResourcesSuite) TestPreFilterForNonTensorFusionPod() {
 	tests := []struct {
 		name           string
 		pod            *v1.Pod
-		expectedStatus framework.Code
+		expectedStatus fwk.Code
 		expectedNodes  string
 	}{
 		{
 			name:           "pod requires 1 GPU, enough capacity",
 			pod:            s.makeNonTensorFusionPod("p1", 1),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b node-c",
 		},
 		{
 			name:           "pod requires 2 GPU, enough capacity",
 			pod:            s.makeNonTensorFusionPod("p1", 2),
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 			expectedNodes:  "node-b node-c",
 		},
 	}
@@ -371,9 +385,9 @@ func (s *GPUResourcesSuite) TestPreFilterForNonTensorFusionPod() {
 	for _, tt := range tests {
 		s.Run(tt.name, func() {
 			state := framework.NewCycleState()
-			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod)
+			res, status := s.plugin.PreFilter(s.ctx, state, tt.pod, []fwk.NodeInfo{})
 			s.Equal(tt.expectedStatus, status.Code(), status.Message())
-			if tt.expectedStatus == framework.Success {
+			if tt.expectedStatus == fwk.Success {
 				s.Require().NotNil(res)
 				nodes := sort.StringSlice(res.NodeNames.UnsortedList())
 				nodes.Sort()
@@ -394,23 +408,23 @@ func (s *GPUResourcesSuite) TestFilter() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	tests := []struct {
 		name           string
 		nodeName       string
-		expectedStatus framework.Code
+		expectedStatus fwk.Code
 	}{
 		{
 			name:           "node with available GPU",
 			nodeName:       "node-a",
-			expectedStatus: framework.Success,
+			expectedStatus: fwk.Success,
 		},
 		{
 			name:           "node without available GPU",
 			nodeName:       "node-c",
-			expectedStatus: framework.Unschedulable,
+			expectedStatus: fwk.Unschedulable,
 		},
 	}
 
@@ -435,7 +449,7 @@ func (s *GPUResourcesSuite) TestScore() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	// node a as one worker consumed 10% GPU resources
@@ -466,7 +480,7 @@ func (s *GPUResourcesSuite) TestReserveAndUnreserve() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	// Reserve on node-a
@@ -507,7 +521,7 @@ func (s *GPUResourcesSuite) TestPostBind() {
 			constants.TFLOPSLimitAnnotation:   "100",
 			constants.VRAMLimitAnnotation:     "40Gi",
 		})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 
 	reserveStatus := s.plugin.Reserve(s.ctx, state, pod, "node-a")
@@ -629,13 +643,13 @@ func (s *GPUResourcesSuite) TestReserve_ErrorHandling() {
 	// No pre-filter call, so state is empty
 	status := s.plugin.Reserve(s.ctx, state, pod, "node-a")
 	s.Error(status.AsError())
-	s.Equal(framework.Error, status.Code())
+	s.Equal(fwk.Error, status.Code())
 
 	// Pre-filter, but for a different node
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 	status = s.plugin.Reserve(s.ctx, state, pod, "node-c-non-existent")
-	s.Equal(framework.Unschedulable, status.Code())
+	s.Equal(fwk.Unschedulable, status.Code())
 }
 
 func (s *GPUResourcesSuite) TestUnreserve_ErrorHandling() {
@@ -668,7 +682,7 @@ func (s *GPUResourcesSuite) TestPostBind_ErrorHandling() {
 	s.plugin.PostBind(s.ctx, state, pod, "node-a")
 
 	// Test with a pod that doesn't exist in the client
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 	reserveStatus := s.plugin.Reserve(s.ctx, state, pod, "node-a")
 	s.Require().True(reserveStatus.IsSuccess())
@@ -688,7 +702,7 @@ func (s *GPUResourcesSuite) TestFilter_ErrorHandling() {
 	// No pre-filter call, so state is empty
 	status := s.plugin.Filter(s.ctx, state, pod, nodeInfo)
 	s.Error(status.AsError())
-	s.Equal(framework.Error, status.Code())
+	s.Equal(fwk.Error, status.Code())
 }
 
 func (s *GPUResourcesSuite) TestScore_ErrorHandling() {
@@ -704,13 +718,13 @@ func (s *GPUResourcesSuite) TestScore_ErrorHandling() {
 	nodeInfo.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-a"}})
 	_, status := s.plugin.Score(s.ctx, state, pod, nodeInfo)
 	s.Error(status.AsError())
-	s.Equal(framework.Error, status.Code())
+	s.Equal(fwk.Error, status.Code())
 
 	// Pre-filter, but for a different node
 	nodeInfo = &framework.NodeInfo{}
 	nodeInfo.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-c-non-existent"}})
-	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod)
+	_, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{})
 	s.Require().True(preFilterStatus.IsSuccess())
 	_, status = s.plugin.Score(s.ctx, state, pod, nodeInfo)
-	s.Equal(framework.Unschedulable, status.Code())
+	s.Equal(fwk.Unschedulable, status.Code())
 }
diff --git a/internal/scheduler/gputopo/gpu_network_topo.go b/internal/scheduler/gputopo/gpu_network_topo.go
index f481ea8c..197e3995 100644
--- a/internal/scheduler/gputopo/gpu_network_topo.go
+++ b/internal/scheduler/gputopo/gpu_network_topo.go
@@ -9,6 +9,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/klog/v2"
+	fwk "k8s.io/kube-scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
@@ -53,6 +54,6 @@ func (s *GPUNetworkTopologyAware) Name() string {
 	return Name
 }
 
-func (s *GPUNetworkTopologyAware) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
-	return framework.NewStatus(framework.Success, "")
+func (s *GPUNetworkTopologyAware) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
+	return fwk.NewStatus(fwk.Success, "")
 }
diff --git a/internal/server/router/allocator_info.go b/internal/server/router/allocator_info.go
index 7c8c4f78..58a949cf 100644
--- a/internal/server/router/allocator_info.go
+++ b/internal/server/router/allocator_info.go
@@ -17,6 +17,7 @@ import (
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	fwk "k8s.io/kube-scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -104,20 +105,20 @@ func (r *AllocatorInfoRouter) SimulateScheduleOnePod(ctx *gin.Context) {
 	state.SetRecordPluginMetrics(false)
 	podsToActivate := framework.NewPodsToActivate()
 	state.Write(framework.PodsToActivateKey, podsToActivate)
-	state.Write(framework.StateKey(constants.SchedulerSimulationKey), &gpuallocator.SimulateSchedulingFilterDetail{
+	state.Write(fwk.StateKey(constants.SchedulerSimulationKey), &gpuallocator.SimulateSchedulingFilterDetail{
 		FilterStageDetails: []filter.FilterDetail{},
 	})
 
 	// simulate schedulingCycle non side effect part
-	fwk := r.scheduler.Profiles[pod.Spec.SchedulerName]
-	if fwk == nil {
+	fwkInstance := r.scheduler.Profiles[pod.Spec.SchedulerName]
+	if fwkInstance == nil {
 		log.FromContext(ctx).Error(nil, "scheduler framework not found", "pod", pod.Name, "namespace", pod.Namespace)
 		ctx.JSON(http.StatusInternalServerError, gin.H{"error": "scheduler framework not found"})
 		return
 	}
-	scheduleResult, err := r.scheduler.SchedulePod(ctx, fwk, state, pod)
+	scheduleResult, err := r.scheduler.SchedulePod(ctx, fwkInstance, state, pod)
 	gpuCycleState, _ := state.Read(gpuresources.CycleStateGPUSchedulingResult)
-	simulateSchedulingFilterDetail, _ := state.Read(framework.StateKey(constants.SchedulerSimulationKey))
+	simulateSchedulingFilterDetail, _ := state.Read(fwk.StateKey(constants.SchedulerSimulationKey))
 	if err != nil {
 		if fitError, ok := err.(*framework.FitError); ok {
 			ctx.JSON(http.StatusOK, gin.H{
diff --git a/internal/utils/compose.go b/internal/utils/compose.go
index 93e8248c..2a62af0b 100644
--- a/internal/utils/compose.go
+++ b/internal/utils/compose.go
@@ -230,7 +230,7 @@ func AddTFDefaultClientConfBeforePatch(
 				pod.Spec.Containers[injectContainerIndex].VolumeMounts,
 				v1.VolumeMount{
 					Name:             constants.DataVolumeName,
-					MountPath:        constants.TFLibsVolumeMountPath,
+					MountPath:        constants.TFDataPath + constants.SharedMemMountSubPath,
 					SubPathExpr:      constants.TFDataPathWorkerExpr,
 					MountPropagation: ptr.To(v1.MountPropagationHostToContainer),
 				})
@@ -462,8 +462,7 @@ func composeHypervisorContainer(spec *v1.PodSpec, pool *tfv1.GPUPool, enableVect
 	spec.Containers[0].VolumeMounts = append(spec.Containers[0].VolumeMounts, v1.VolumeMount{
 		Name:      constants.DataVolumeName,
 		ReadOnly:  false,
-		MountPath: constants.SharedMemDeviceName,
-		SubPath:   constants.SharedMemMountSubPath,
+		MountPath: constants.TFDataPath,
 	}, v1.VolumeMount{
 		Name:      constants.TensorFusionGPUInfoConfigVolumeName,
 		MountPath: constants.TensorFusionGPUInfoConfigMountPath,
@@ -682,7 +681,7 @@ func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerCon
 		spec.Containers[0].VolumeMounts,
 		v1.VolumeMount{
 			Name:             constants.DataVolumeName,
-			MountPath:        constants.TFLibsVolumeMountPath,
+			MountPath:        constants.TFDataPath + constants.SharedMemMountSubPath,
 			SubPathExpr:      constants.TFDataPathWorkerExpr,
 			MountPropagation: ptr.To(v1.MountPropagationHostToContainer),
 		})
diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go
index 542a3ab0..8c5aca06 100644
--- a/internal/webhook/v1/pod_webhook.go
+++ b/internal/webhook/v1/pod_webhook.go
@@ -53,20 +53,18 @@ func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.P
 	webhookServer.Register("/mutate-v1-pod",
 		&admission.Webhook{
 			Handler: &TensorFusionPodMutator{
-				decoder:         admission.NewDecoder(runtime.NewScheme()),
-				Client:          mgr.GetClient(),
-				portAllocator:   portAllocator,
-				pricingProvider: pricingProvider,
+				decoder:       admission.NewDecoder(runtime.NewScheme()),
+				Client:        mgr.GetClient(),
+				portAllocator: portAllocator,
 			},
 		})
 	return nil
 }
 
 type TensorFusionPodMutator struct {
-	Client          client.Client
-	decoder         admission.Decoder
-	portAllocator   *portallocator.PortAllocator
-	pricingProvider pricing.PricingProvider
+	Client        client.Client
+	decoder       admission.Decoder
+	portAllocator *portallocator.PortAllocator
 }
 
 // Handle implements admission.Handler interface.
@@ -103,7 +101,7 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 		return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal current pod: %w", err))
 	}
 
-	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod, m.pricingProvider)
+	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod)
 	if err != nil {
 		return admission.Errored(http.StatusInternalServerError, fmt.Errorf("parse tf resources: %w", err))
 	}
@@ -395,7 +393,7 @@ func addConnectionForRemoteFixedReplicaVirtualGPU(pod *corev1.Pod, container *co
 	if pod.GenerateName == "" && pod.Name != "" {
 		prefix = pod.Name + constants.TFConnectionNamePrefix
 	} else {
-		prefix = pod.GenerateName + constants.TFConnectionNamePrefix
+		prefix = pod.GenerateName + constants.TFConnectionNameNoPrefix
 	}
 	connectionName := fmt.Sprintf("%s%s", prefix, utils.NewShortID(10))
 	connectionNamespace := pod.Namespace
diff --git a/internal/webhook/v1/pod_webhook_test.go b/internal/webhook/v1/pod_webhook_test.go
index d72770cc..374f2620 100644
--- a/internal/webhook/v1/pod_webhook_test.go
+++ b/internal/webhook/v1/pod_webhook_test.go
@@ -23,9 +23,9 @@ import (
 	"net/http"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/samber/lo"
@@ -241,6 +241,56 @@ var _ = Describe("TensorFusionPodMutator", func() {
 			Expect(resp.Patches).To(BeEmpty())
 		})
 
+		It("should handle dedicated GPU", func() {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod-local-gpu",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionEnabledLabelKey: "true",
+					},
+					Annotations: map[string]string{
+						constants.DedicatedGPUAnnotation: constants.TrueStringValue,
+						constants.GPUModelAnnotation:     "A100",
+						constants.GpuPoolKey:             "mock",
+					},
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{
+						{
+							Name:  "main",
+							Image: "test-image",
+						},
+					},
+				},
+			}
+			podBytes, err := json.Marshal(pod)
+			Expect(err).NotTo(HaveOccurred())
+			req := admission.Request{
+				AdmissionRequest: admissionv1.AdmissionRequest{
+					Object: runtime.RawExtension{
+						Raw: podBytes,
+					},
+					Operation: admissionv1.Create,
+					Namespace: "default",
+				},
+			}
+
+			gpuallocator.GPUCapacityMap["A100"] = tfv1.Resource{
+				Tflops: resource.MustParse("312"),
+				Vram:   resource.MustParse("40Gi"),
+			}
+			resp := mutator.Handle(ctx, req)
+			Expect(resp.Allowed).To(BeTrue())
+
+			op, found := lo.Find(resp.Patches, func(patch jsonpatch.JsonPatchOperation) bool {
+				return patch.Operation == "add" &&
+					patch.Path == "/metadata/annotations/tensor-fusion.ai~1tflops-request"
+			})
+			Expect(found).To(BeTrue())
+			Expect(op.Value).To(Equal("312"))
+		})
+
 		It("should handle invalid pod specification", func() {
 			req := admission.Request{
 				AdmissionRequest: admissionv1.AdmissionRequest{
@@ -533,9 +583,7 @@ var _ = Describe("TensorFusionPodMutator", func() {
 					},
 				},
 			}
-			// Create a mock pricing provider for testing
-			mockPricingProvider := &pricing.StaticPricingProvider{}
-			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod, mockPricingProvider)
+			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(tfInfo.ContainerNames).To(HaveLen(1))
 			Expect(tfInfo.ContainerNames[0]).To(Equal("test-container"))
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
index cd72fbc1..2fa7b744 100644
--- a/internal/webhook/v1/tf_parser.go
+++ b/internal/webhook/v1/tf_parser.go
@@ -7,8 +7,8 @@ import (
 	"strings"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
-	"github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -30,7 +30,6 @@ func ParseTensorFusionInfo(
 	ctx context.Context,
 	k8sClient client.Client,
 	pod *corev1.Pod,
-	pricingProvider pricing.PricingProvider,
 ) (utils.TensorFusionInfo, error) {
 	var info utils.TensorFusionInfo
 	if pod.Annotations == nil {
@@ -118,7 +117,7 @@ func ParseTensorFusionInfo(
 	}
 
 	// Handle dedicated GPU logic
-	err = handleDedicatedGPU(pod, workloadProfile, pricingProvider)
+	err = handleDedicatedGPU(pod, workloadProfile)
 	if err != nil {
 		return info, fmt.Errorf("handle dedicated GPU: %w", err)
 	}
@@ -237,7 +236,7 @@ func setDefaultQuotasIfExists(workloadProfile *tfv1.WorkloadProfile, single tfv1
 }
 
 // handleDedicatedGPU handles dedicated GPU annotation by setting full GPU capacity
-func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile, pricingProvider pricing.PricingProvider) error {
+func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile) error {
 	dedicatedGPU, ok := pod.Annotations[constants.DedicatedGPUAnnotation]
 	if !ok || dedicatedGPU != constants.TrueStringValue {
 		return nil // Not a dedicated GPU request
@@ -249,16 +248,16 @@ func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile,
 	}
 
 	// Get full GPU capacity from pricing provider
-	tflops, vram, found := pricingProvider.GetGPUCapacityByModel(workloadProfile.Spec.GPUModel)
+	resource, found := gpuallocator.GPUCapacityMap[workloadProfile.Spec.GPUModel]
 	if !found {
 		return fmt.Errorf("could not find capacity information for GPU model: %s", workloadProfile.Spec.GPUModel)
 	}
 
 	// Set full capacity for both requests and limits
-	workloadProfile.Spec.Resources.Requests.Tflops = tflops
-	workloadProfile.Spec.Resources.Requests.Vram = vram
-	workloadProfile.Spec.Resources.Limits.Tflops = tflops
-	workloadProfile.Spec.Resources.Limits.Vram = vram
+	workloadProfile.Spec.Resources.Requests.Tflops = resource.Tflops
+	workloadProfile.Spec.Resources.Requests.Vram = resource.Vram
+	workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops
+	workloadProfile.Spec.Resources.Limits.Vram = resource.Vram
 
 	return nil
 }
diff --git a/patches/scheduler-csi-capacity-3.patch b/patches/scheduler-csi-capacity-3.patch
index 29a21ae8..c5841d08 100644
--- a/patches/scheduler-csi-capacity-3.patch
+++ b/patches/scheduler-csi-capacity-3.patch
@@ -9,11 +9,11 @@
  	"strings"
  	"time"
  
-@@ -514,6 +516,14 @@
+@@ -543,6 +545,14 @@
  			}
  			handlers = append(handlers, handlerRegistration)
- 		case framework.CSIStorageCapacity:
-+			// FIX kubernetes 1.24 and lower version API missing issue
+ 		case fwk.CSIStorageCapacity:
++			// FIX kubernetes 1.23 and lower version API missing issue
 +			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
 +			if minorVersionStr != "" {
 +				minorVersion, err := strconv.Atoi(minorVersionStr)
@@ -22,5 +22,50 @@
 +				}
 +			}
  			if handlerRegistration, err = informerFactory.Storage().V1().CSIStorageCapacities().Informer().AddEventHandler(
- 				buildEvtResHandler(at, framework.CSIStorageCapacity),
+ 				buildEvtResHandler(at, fwk.CSIStorageCapacity),
  			); err != nil {
+@@ -578,6 +588,14 @@
+ 			}
+ 			handlers = append(handlers, handlerRegistration)
+ 		case fwk.ResourceClaim:
++			// FIX kubernetes lower version API missing issue
++			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
++			if minorVersionStr != "" {
++				minorVersion, err := strconv.Atoi(minorVersionStr)
++				if err != nil || minorVersion < 34 {
++					continue
++				}
++			}
+ 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
+ 				handlerRegistration = resourceClaimCache.AddEventHandler(
+ 					buildEvtResHandler(at, fwk.ResourceClaim),
+@@ -585,6 +603,14 @@
+ 				handlers = append(handlers, handlerRegistration)
+ 			}
+ 		case fwk.ResourceSlice:
++			// FIX kubernetes lower version API missing issue
++			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
++			if minorVersionStr != "" {
++				minorVersion, err := strconv.Atoi(minorVersionStr)
++				if err != nil || minorVersion < 34 {
++					continue
++				}
++			}
+ 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
+ 				if handlerRegistration, err = resourceSliceTracker.AddEventHandler(
+ 					buildEvtResHandler(at, fwk.ResourceSlice),
+@@ -594,6 +620,14 @@
+ 				handlers = append(handlers, handlerRegistration)
+ 			}
+ 		case fwk.DeviceClass:
++			// FIX kubernetes lower version API missing issue
++			minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
++			if minorVersionStr != "" {
++				minorVersion, err := strconv.Atoi(minorVersionStr)
++				if err != nil || minorVersion < 34 {
++					continue
++				}
++			}
+ 			if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
+ 				if handlerRegistration, err = informerFactory.Resource().V1().DeviceClasses().Informer().AddEventHandler(
+ 					buildEvtResHandler(at, fwk.DeviceClass),
diff --git a/patches/scheduler-pdb-2.patch b/patches/scheduler-pdb-2.patch
index 810bb0c6..12af371e 100644
--- a/patches/scheduler-pdb-2.patch
+++ b/patches/scheduler-pdb-2.patch
@@ -9,22 +9,14 @@
  	"sync"
  	"sync/atomic"
  	"time"
-@@ -34,6 +36,7 @@
- 	"k8s.io/apimachinery/pkg/util/sets"
- 	corelisters "k8s.io/client-go/listers/core/v1"
- 	policylisters "k8s.io/client-go/listers/policy/v1"
-+	policyv1 "k8s.io/client-go/listers/policy/v1"
- 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
- 	"k8s.io/klog/v2"
- 	extenderv1 "k8s.io/kube-scheduler/extender/v1"
-@@ -145,7 +148,16 @@
+@@ -148,8 +150,17 @@
  
  func NewEvaluator(pluginName string, fh framework.Handle, i Interface, enableAsyncPreemption bool) *Evaluator {
  	podLister := fh.SharedInformerFactory().Core().V1().Pods().Lister()
 -	pdbLister := fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister()
-+
+ 
 +	// FIX kubernetes 1.21 and lower version API missing issue
-+	var pdbLister policyv1.PodDisruptionBudgetLister
++	var pdbLister policylisters.PodDisruptionBudgetLister
 +	minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR")
 +	if minorVersionStr != "" {
 +		minorVersion, err := strconv.Atoi(minorVersionStr)
@@ -32,6 +24,7 @@
 +			pdbLister = fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister()
 +		}
 +	}
- 
++
  	ev := &Evaluator{
  		PluginName:            pluginName,
+ 		Handler:               fh,
diff --git a/scripts/patch-scheduler.sh b/scripts/patch-scheduler.sh
index 9afbc43b..23c2837d 100755
--- a/scripts/patch-scheduler.sh
+++ b/scripts/patch-scheduler.sh
@@ -1,6 +1,10 @@
 #!/bin/bash
 git apply ./patches/scheduler-csi-capacity-1.patch
 git apply ./patches/scheduler-csi-capacity-2.patch
+
+# diff -u eventhandlers.go eventhandlers-new.go > changes.patch
 git apply ./patches/scheduler-csi-capacity-3.patch
 git apply ./patches/scheduler-pdb-1.patch
+
+# diff -u original_file.go modified_file.go > changes.patch
 git apply ./patches/scheduler-pdb-2.patch
\ No newline at end of file
diff --git a/test/sched/gpufit_bench_test.go b/test/sched/gpufit_bench_test.go
index 20be047e..3acb53d4 100644
--- a/test/sched/gpufit_bench_test.go
+++ b/test/sched/gpufit_bench_test.go
@@ -42,7 +42,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 				break
 			}
 			testPod := fixture.pods[i]
-			fixture.plugin.PreFilter(fixture.ctx, state, testPod)
+			fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil)
 			filterResult, err := state.Read(gpuResourceFitPlugin.CycleStateGPUSchedulingResult)
 			if err != nil {
 				b.Fatal(err)
@@ -82,7 +82,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 
 	b.Run("Filter", func(b *testing.B) {
 		state := framework.NewCycleState()
-		fixture.plugin.PreFilter(fixture.ctx, state, testPod)
+		fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil)
 		nodeInfo := &framework.NodeInfo{}
 
 		b.ResetTimer()
@@ -94,7 +94,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 
 	b.Run("Score", func(b *testing.B) {
 		state := framework.NewCycleState()
-		fixture.plugin.PreFilter(fixture.ctx, state, testPod)
+		fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil)
 		nodeInfo := &framework.NodeInfo{}
 
 		b.ResetTimer()
diff --git a/test/sched/scheduler_bench_test.go b/test/sched/scheduler_bench_test.go
index 65f43a13..fde318bd 100644
--- a/test/sched/scheduler_bench_test.go
+++ b/test/sched/scheduler_bench_test.go
@@ -6,14 +6,18 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+	"strings"
 	"testing"
 	"time"
 
 	"github.com/NexusGPU/tensor-fusion/cmd/sched"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources"
 	gpuTopoPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gputopo"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"go.uber.org/zap/zapcore"
+	"k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
@@ -41,7 +45,19 @@ func defaultBenchmarkConfig() BenchmarkConfig {
 
 var testEnv *envtest.Environment
 
-func setupKubernetes() (*rest.Config, error) {
+func setupKubernetes() (*version.Version, *rest.Config, error) {
+	// export ENVTEST_K8S_VERSION=1.34.0
+	// Run `./bin/setup-envtest use ${ENVTEST_K8S_VERSION} --bin-dir ./bin` before running the test
+	k8sVersion := os.Getenv("ENVTEST_K8S_VERSION")
+	if k8sVersion == "" {
+		k8sVersion = "1.31.0"
+	}
+	majorVersion := k8sVersion[:strings.Index(k8sVersion, ".")]
+	minorVersion := k8sVersion[strings.Index(k8sVersion, ".")+1 : strings.LastIndex(k8sVersion, ".")]
+	_ = os.Setenv(constants.KubeApiVersionMajorEnv, majorVersion)
+	_ = os.Setenv(constants.KubeApiVersionMinorEnv, minorVersion)
+	ver := version.MustParse(k8sVersion)
+	_ = feature.DefaultMutableFeatureGate.SetEmulationVersion(ver)
 	testEnv = &envtest.Environment{
 		CRDDirectoryPaths: []string{
 			filepath.Join("..", "..", "config", "crd", "bases"),
@@ -49,15 +65,14 @@ func setupKubernetes() (*rest.Config, error) {
 		},
 		ErrorIfCRDPathMissing: true,
 
-		// The BinaryAssetsDirectory is only required if you want to run the tests directly
-		// without call the makefile target test. If not informed it will look for the
-		// default path defined in controller-runtime which is /usr/local/kubebuilder/.
-		// Note that you must have the required binaries setup under the bin directory to perform
-		// the tests directly. When we run make test it will be setup and used automatically.
 		BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s",
-			fmt.Sprintf("1.31.0-%s-%s", runtime.GOOS, runtime.GOARCH)),
+			fmt.Sprintf("%s-%s-%s", k8sVersion, runtime.GOOS, runtime.GOARCH)),
+	}
+	cfg, err := testEnv.Start()
+	if err != nil {
+		return nil, nil, err
 	}
-	return testEnv.Start()
+	return ver, cfg, nil
 }
 
 // Estimated Performance: 400-500 pods/second for 1K nodes, 10K Pods cluster on Mac M4 Pro
@@ -65,7 +80,7 @@ func setupKubernetes() (*rest.Config, error) {
 func BenchmarkScheduler(b *testing.B) {
 	klog.SetLogger(zap.New(zap.WriteTo(os.Stderr), zap.UseDevMode(false), zap.Level(zapcore.ErrorLevel)))
 	// Setup phase - runs once before all benchmark iterations
-	cfg, err := setupKubernetes()
+	ver, cfg, err := setupKubernetes()
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -99,7 +114,7 @@ func BenchmarkScheduler(b *testing.B) {
 	testCtx := ctx
 
 	cc, scheduler, err := sched.SetupScheduler(testCtx, nil,
-		"../../config/samples/scheduler-config.yaml", true, gpuResourceFitOpt, gpuTopoOpt)
+		"../../config/samples/scheduler-config.yaml", true, ver, gpuResourceFitOpt, gpuTopoOpt)
 	if err != nil {
 		b.Fatal(err)
 	}
diff --git a/test/sched/setup.go b/test/sched/setup.go
index 03e40bfa..6fa4167d 100644
--- a/test/sched/setup.go
+++ b/test/sched/setup.go
@@ -17,18 +17,22 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
+	informers "k8s.io/client-go/informers"
+	clientsetfake "k8s.io/client-go/kubernetes/fake"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/events"
 	"k8s.io/klog/v2"
+	internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
+	internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
 	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
+	"k8s.io/kubernetes/pkg/scheduler/metrics"
 	st "k8s.io/kubernetes/pkg/scheduler/testing"
 	tf "k8s.io/kubernetes/pkg/scheduler/testing/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-	testutil "sigs.k8s.io/scheduler-plugins/test/util"
 )
 
 // BenchmarkConfig holds benchmark configuration
@@ -85,23 +89,35 @@ func NewBenchmarkFixture(
 	b.Logf("%d Pods created, Needed TFLOPS: %f, Needed VRAM: %f", len(pods), neededTflops, neededVRAM)
 
 	// Batch create resources for better performance
-	batchCreateResources(b, ctx, client, nodes, gpus, pods, realAPIServer)
+	k8sNativeObjects := batchCreateResources(b, ctx, client, nodes, gpus, pods, realAPIServer)
 
 	// Setup allocator
 	allocator := setupAllocator(b, ctx, client)
 
 	// Setup framework and plugin
-	fwk, plugin := setupFrameworkAndPlugin(b, ctx, client, allocator, pods, nodes)
-
-	return &BenchmarkFixture{
-		ctx:       ctx,
-		cancel:    cancel,
-		plugin:    plugin,
-		nodes:     nodes,
-		pods:      pods,
-		allocator: allocator,
-		client:    client,
-		fwk:       fwk,
+	if !realAPIServer {
+		fwk, plugin := setupFrameworkAndPlugin(b, ctx, client, allocator, k8sNativeObjects)
+		return &BenchmarkFixture{
+			ctx:       ctx,
+			cancel:    cancel,
+			plugin:    plugin,
+			nodes:     nodes,
+			pods:      pods,
+			allocator: allocator,
+			client:    client,
+			fwk:       fwk,
+		}
+	} else {
+		return &BenchmarkFixture{
+			ctx:       ctx,
+			cancel:    cancel,
+			plugin:    nil,
+			nodes:     nodes,
+			pods:      pods,
+			allocator: allocator,
+			client:    client,
+			fwk:       nil,
+		}
 	}
 }
 
@@ -273,7 +289,8 @@ func generatePods(count int, namespace, poolName string) ([]*v1.Pod, float64, fl
 func batchCreateResources(
 	b *testing.B, ctx context.Context, client client.Client,
 	nodes []*v1.Node, gpus []*tfv1.GPU, pods []*v1.Pod, realAPIServer bool,
-) {
+) []runtime.Object {
+	k8sObjs := []runtime.Object{}
 	require.NoError(b, client.Create(ctx, &v1.Namespace{
 		ObjectMeta: metav1.ObjectMeta{Name: "benchmark-ns"},
 	}))
@@ -283,6 +300,7 @@ func batchCreateResources(
 	for _, node := range nodes {
 		nodeCopy := node.DeepCopy()
 		require.NoError(b, client.Create(ctx, nodeCopy))
+		k8sObjs = append(k8sObjs, nodeCopy)
 
 		if realAPIServer {
 			node.ResourceVersion = nodeCopy.ResourceVersion
@@ -310,13 +328,15 @@ func batchCreateResources(
 	b.Logf("Creating %d pods", len(pods))
 	for _, pod := range pods {
 		require.NoError(b, client.Create(ctx, pod))
+		k8sObjs = append(k8sObjs, pod)
 	}
 	b.Logf("%d pods created, duration: %v", len(pods), time.Since(timer))
+	return k8sObjs
 }
 
 func setupFrameworkAndPlugin(
 	b *testing.B, ctx context.Context, client client.Client,
-	allocator *gpuallocator.GpuAllocator, pods []*v1.Pod, nodes []*v1.Node,
+	allocator *gpuallocator.GpuAllocator, k8sObjs []runtime.Object,
 ) (framework.Framework, *gpuResourceFitPlugin.GPUFit) {
 	// Register plugins including our GPU plugin
 	registeredPlugins := []tf.RegisterPluginFunc{
@@ -324,11 +344,16 @@ func setupFrameworkAndPlugin(
 		tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New),
 	}
 
-	// Create framework
-	fwk, err := tf.NewFramework(ctx, registeredPlugins, "",
-		frameworkruntime.WithPodNominator(testutil.NewPodNominator(nil)),
-		frameworkruntime.WithSnapshotSharedLister(testutil.NewFakeSharedLister(pods, nodes)),
+	fakeClientSet := clientsetfake.NewSimpleClientset(k8sObjs...)
+	informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
+	metrics.Register()
+	metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, ctx.Done())
+	fwk, err := tf.NewFramework(
+		ctx, registeredPlugins, "",
+		frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
+		frameworkruntime.WithSnapshotSharedLister(internalcache.NewEmptySnapshot()),
 		frameworkruntime.WithEventRecorder(&events.FakeRecorder{}),
+		frameworkruntime.WithMetricsRecorder(metricsRecorder),
 	)
 	require.NoError(b, err)
 

From 52d4fd24a3d2d53a7fa0e659fb468636b1623a27 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Tue, 9 Sep 2025 07:57:51 -0700
Subject: [PATCH 16/34] cel fliter enhancement

---
 .../filter/cel_filter/cel_filter.go           | 1073 +++++++++++++++--
 .../cel_filter/cel_filter_benchmark_test.go   |    9 +-
 .../filter/cel_filter/expression_cache.go     |    6 +-
 3 files changed, 957 insertions(+), 131 deletions(-)

diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index a9369535..ea463b0f 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -3,29 +3,431 @@ package cel_filter
 import (
 	"context"
 	"fmt"
-	"time"
+	"reflect"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/google/cel-go/cel"
 	"github.com/google/cel-go/common/types"
+	"github.com/google/cel-go/common/types/ref"
+	"github.com/google/cel-go/interpreter"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
 
+// Parallel processing constants
+const (
+	// Threshold for enabling parallel processing
+	ParallelThreshold = 2000
+	// Default number of worker goroutines
+	DefaultWorkerCount = 4
+)
+
+// fieldUsage tracks which GPU fields are used in the expression
+type fieldUsage struct {
+	labels       bool
+	annotations  bool
+	available    bool
+	nodeSelector bool
+	runningApps  bool
+}
+
+// FastPathPredicate represents a compiled fast-path predicate function
+type FastPathPredicate func(gpu *tfv1.GPU) bool
+
+// ExpressionPattern represents a recognized expression pattern for fast path
+type ExpressionPattern struct {
+	Pattern    *regexp.Regexp
+	Generator  func(matches []string) FastPathPredicate
+}
+
+// Common fast path patterns - order matters (most specific first)
+var fastPathPatterns = []ExpressionPattern{
+	// Complex AND pattern: gpu.available.tflops >= NUMBER && gpu.labels['KEY'] == 'VALUE'
+	{
+		Pattern: regexp.MustCompile(`^gpu\.available\.tflops\s*>=\s*([0-9]+(?:\.[0-9]+)?)\s*&&\s*gpu\.labels\['([^']+)'\]\s*==\s*'([^']+)'$`),
+		Generator: func(matches []string) FastPathPredicate {
+			threshold, _ := strconv.ParseFloat(matches[1], 64)
+			labelKey, labelValue := matches[2], matches[3]
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Status.Available != nil && 
+					gpu.Status.Available.Tflops.AsApproximateFloat64() >= threshold &&
+					gpu.Labels != nil && gpu.Labels[labelKey] == labelValue
+			}
+		},
+	},
+	// gpu.available.tflops >= NUMBER
+	{
+		Pattern: regexp.MustCompile(`^gpu\.available\.tflops\s*>=\s*([0-9]+(?:\.[0-9]+)?)$`),
+		Generator: func(matches []string) FastPathPredicate {
+			threshold, _ := strconv.ParseFloat(matches[1], 64)
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Status.Available != nil && gpu.Status.Available.Tflops.AsApproximateFloat64() >= threshold
+			}
+		},
+	},
+	// gpu.available.tflops > NUMBER
+	{
+		Pattern: regexp.MustCompile(`^gpu\.available\.tflops\s*>\s*([0-9]+(?:\.[0-9]+)?)$`),
+		Generator: func(matches []string) FastPathPredicate {
+			threshold, _ := strconv.ParseFloat(matches[1], 64)
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Status.Available != nil && gpu.Status.Available.Tflops.AsApproximateFloat64() > threshold
+			}
+		},
+	},
+	// gpu.available.vram >= NUMBER
+	{
+		Pattern: regexp.MustCompile(`^gpu\.available\.vram\s*>=\s*([0-9]+(?:\.[0-9]+)?)$`),
+		Generator: func(matches []string) FastPathPredicate {
+			threshold, _ := strconv.ParseFloat(matches[1], 64)
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Status.Available != nil && gpu.Status.Available.Vram.AsApproximateFloat64() >= threshold
+			}
+		},
+	},
+	// gpu.available.vram > NUMBER  
+	{
+		Pattern: regexp.MustCompile(`^gpu\.available\.vram\s*>\s*([0-9]+(?:\.[0-9]+)?)$`),
+		Generator: func(matches []string) FastPathPredicate {
+			threshold, _ := strconv.ParseFloat(matches[1], 64)
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Status.Available != nil && gpu.Status.Available.Vram.AsApproximateFloat64() > threshold
+			}
+		},
+	},
+	// gpu.labels['KEY'] == 'VALUE'
+	{
+		Pattern: regexp.MustCompile(`^gpu\.labels\['([^']+)'\]\s*==\s*'([^']+)'$`),
+		Generator: func(matches []string) FastPathPredicate {
+			key, value := matches[1], matches[2]
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Labels != nil && gpu.Labels[key] == value
+			}
+		},
+	},
+	// gpu.annotations['KEY'] == 'VALUE'
+	{
+		Pattern: regexp.MustCompile(`^gpu\.annotations\['([^']+)'\]\s*==\s*'([^']+)'$`),
+		Generator: func(matches []string) FastPathPredicate {
+			key, value := matches[1], matches[2]
+			return func(gpu *tfv1.GPU) bool {
+				return gpu.Annotations != nil && gpu.Annotations[key] == value
+			}
+		},
+	},
+}
+
+
+// ZeroAllocActivation provides zero-allocation variable resolution for CEL
+// This eliminates the need to create map[string]interface{} for each GPU
+type ZeroAllocActivation struct {
+	gpu          *tfv1.GPU
+	workerPodKey tfv1.NameNamespace
+	usage        fieldUsage
+}
+
+// ResolveName implements interpreter.Activation interface
+func (a *ZeroAllocActivation) ResolveName(name string) (interface{}, bool) {
+	switch name {
+	case CELVarGPU:
+		return a.createGPUObject(), true
+	case CELVarWorkerPodKey:
+		return a.createWorkerPodKeyObject(), true
+	default:
+		return nil, false
+	}
+}
+
+// Parent implements interpreter.Activation interface  
+func (a *ZeroAllocActivation) Parent() interpreter.Activation {
+	return nil
+}
+
+// createGPUObject creates GPU object on-demand without maps
+func (a *ZeroAllocActivation) createGPUObject() interface{} {
+	// Return GPU value with lazy caching
+	return &gpuVal{GPU: a.gpu}
+}
+
+
+// createWorkerPodKeyObject creates worker pod key object
+func (a *ZeroAllocActivation) createWorkerPodKeyObject() interface{} {
+	return map[string]interface{}{
+		"name":      a.workerPodKey.Name,
+		"namespace": a.workerPodKey.Namespace,
+	}
+}
+
+// gpuVal implements CEL value interface for GPU objects to eliminate map allocations
+type gpuVal struct {
+	*tfv1.GPU
+	// Cached sub-values to avoid repeated allocations
+	labels       ref.Val
+	annotations  ref.Val 
+	nodeSelector ref.Val
+	available    ref.Val
+	runningApps  ref.Val
+}
+
+// Type implements ref.Val interface
+func (v *gpuVal) Type() ref.Type {
+	return types.MapType
+}
+
+// Value implements ref.Val interface
+func (v *gpuVal) Value() interface{} {
+	return v.GPU
+}
+
+// Equal implements ref.Val interface
+func (v *gpuVal) Equal(other ref.Val) ref.Val {
+	if otherGPU, ok := other.(*gpuVal); ok {
+		return types.Bool(v.GPU.UID == otherGPU.GPU.UID)
+	}
+	return types.False
+}
+
+// ConvertToNative implements ref.Val interface
+func (v *gpuVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
+	return v.GPU, nil
+}
+
+// ConvertToType implements ref.Val interface  
+func (v *gpuVal) ConvertToType(typeValue ref.Type) ref.Val {
+	switch typeValue {
+	case types.TypeType:
+		return types.MapType
+	default:
+		return types.NewErr("type conversion error")
+	}
+}
+
+// HasField implements traits.FieldTester interface
+func (v *gpuVal) HasField(field string) bool {
+	switch field {
+	case GPUFieldName, GPUFieldNamespace, GPUFieldGPUModel, GPUFieldUUID,
+		 GPUFieldPhase, GPUFieldUsedBy, GPUFieldMessage, GPUFieldLabels,
+		 GPUFieldAnnotations, GPUFieldAvailable, GPUFieldNodeSelector, GPUFieldRunningApps:
+		return true
+	default:
+		return false
+	}
+}
+
+// Get implements traits.Indexer interface for field access with lazy caching
+func (v *gpuVal) Get(index ref.Val) ref.Val {
+	field, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+	
+	switch field {
+	case GPUFieldName:
+		return types.String(v.GPU.Name)
+	case GPUFieldNamespace:
+		return types.String(v.GPU.Namespace)
+	case GPUFieldGPUModel:
+		return types.String(v.GPU.Status.GPUModel)
+	case GPUFieldUUID:
+		return types.String(v.GPU.Status.UUID)
+	case GPUFieldPhase:
+		return types.String(string(v.GPU.Status.Phase))
+	case GPUFieldUsedBy:
+		return types.String(string(v.GPU.Status.UsedBy))
+	case GPUFieldMessage:
+		return types.String(v.GPU.Status.Message)
+	case GPUFieldLabels:
+		// Lazy initialization with caching
+		if v.labels == nil {
+			v.labels = &labelsVal{labels: v.GPU.Labels}
+		}
+		return v.labels
+	case GPUFieldAnnotations:
+		// Lazy initialization with caching  
+		if v.annotations == nil {
+			v.annotations = &labelsVal{labels: v.GPU.Annotations}
+		}
+		return v.annotations
+	case GPUFieldAvailable:
+		// Lazy initialization with caching
+		if v.available == nil {
+			v.available = &availableVal{available: v.GPU.Status.Available}
+		}
+		return v.available
+	case GPUFieldNodeSelector:
+		// Lazy initialization with caching
+		if v.nodeSelector == nil {
+			v.nodeSelector = &labelsVal{labels: v.GPU.Status.NodeSelector}
+		}
+		return v.nodeSelector
+	case GPUFieldRunningApps:
+		// For now, keep simple implementation - can optimize later if needed
+		if v.runningApps == nil {
+			apps := make([]interface{}, len(v.GPU.Status.RunningApps))
+			for i, app := range v.GPU.Status.RunningApps {
+				apps[i] = map[string]interface{}{
+					"name":      app.Name,
+					"namespace": app.Namespace,
+				}
+			}
+			v.runningApps = types.NewDynamicList(types.DefaultTypeAdapter, apps)
+		}
+		return v.runningApps
+	default:
+		return types.NewErr("no such field: %s", field)
+	}
+}
+
+// availableVal provides direct access to GPU available resources without maps
+type availableVal struct {
+	available *tfv1.Resource
+}
+
+// Type implements ref.Val interface  
+func (v *availableVal) Type() ref.Type {
+	return types.MapType
+}
+
+// Value implements ref.Val interface
+func (v *availableVal) Value() interface{} {
+	return v.available
+}
+
+// Equal implements ref.Val interface
+func (v *availableVal) Equal(other ref.Val) ref.Val {
+	return types.False // Not used in comparisons
+}
+
+// ConvertToNative implements ref.Val interface
+func (v *availableVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
+	return v.available, nil
+}
+
+// ConvertToType implements ref.Val interface
+func (v *availableVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+
+// Get implements field access for available resources
+func (v *availableVal) Get(index ref.Val) ref.Val {
+	field, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+	
+	if v.available == nil {
+		switch field {
+		case "tflops":
+			return types.Double(0.0)
+		case "vram":
+			return types.Int(0)
+		default:
+			return types.NewErr("no such field: %s", field)
+		}
+	}
+	
+	switch field {
+	case "tflops":
+		return types.Double(v.available.Tflops.AsApproximateFloat64())
+	case "vram":
+		return types.Int(v.available.Vram.Value())
+	default:
+		return types.NewErr("no such field: %s", field)
+	}
+}
+
+// HasField implements field testing
+func (v *availableVal) HasField(field string) bool {
+	return field == "tflops" || field == "vram"
+}
+
+// labelsVal provides direct access to GPU labels without copying
+type labelsVal struct {
+	labels map[string]string
+}
+
+// Type implements ref.Val interface
+func (v *labelsVal) Type() ref.Type {
+	return types.MapType
+}
+
+// Value implements ref.Val interface  
+func (v *labelsVal) Value() interface{} {
+	return v.labels
+}
+
+// Equal implements ref.Val interface
+func (v *labelsVal) Equal(other ref.Val) ref.Val {
+	return types.False // Not used in comparisons
+}
+
+// ConvertToNative implements ref.Val interface
+func (v *labelsVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
+	return v.labels, nil
+}
+
+// ConvertToType implements ref.Val interface
+func (v *labelsVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+
+// Get implements map access for labels
+func (v *labelsVal) Get(index ref.Val) ref.Val {
+	key, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+	
+	if v.labels == nil {
+		return types.String("")
+	}
+	
+	value, exists := v.labels[key]
+	if !exists {
+		return types.String("")
+	}
+	return types.String(value)
+}
+
 // AllocRequestCELFilter converts AllocRequest to CEL filter and executes it
 type CELFilter struct {
-	cache      *ExpressionCache
-	expression string
-	name       string
+	cache *ExpressionCache
+	name  string
+	// Store early filtering criteria for optimization
+	requiredPhase    string
+	requiredGPUModel string
+	userExpression   string
+	// Track which fields are actually used
+	usage fieldUsage
+	// Display expression for logging (read-only)
+	displayExpression string
+	// Fast path predicate for common patterns
+	fastPathPredicate FastPathPredicate
 }
 
 // NewAllocRequestCELFilter creates a new CEL filter from allocation request
 func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, error) {
-	// Convert AllocRequest to CEL expression
-	expression, err := convertAllocRequestToCEL(req)
-	if err != nil {
-		return nil, fmt.Errorf("failed to convert AllocRequest to CEL: %w", err)
+	// Extract early filtering criteria
+	var requiredPhase, requiredGPUModel, userExpression, displayExpression string
+
+	if req != nil {
+		requiredPhase = "Ready" // Keep as Ready for compatibility with tests
+		requiredGPUModel = req.GPUModel
+		userExpression = req.CELFilterExpression
+
+		// Build display expression for logging (not used for execution)
+		displayExpression = buildDisplayExpression(req)
 	}
 
+	// Analyze field usage in user expression only
+	usage := analyzeFieldUsage(userExpression)
+	
+	// Try to compile fast path predicate
+	fastPath := compileFastPath(userExpression)
+
 	// Handle nil request case
 	name := "AllocRequest-unknown"
 	if req != nil {
@@ -33,9 +435,14 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e
 	}
 
 	return &CELFilter{
-		cache:      cache,
-		expression: expression,
-		name:       name,
+		cache:             cache,
+		name:              name,
+		requiredPhase:     requiredPhase,
+		requiredGPUModel:  requiredGPUModel,
+		userExpression:    userExpression,
+		usage:             usage,
+		displayExpression: displayExpression,
+		fastPathPredicate: fastPath,
 	}, nil
 }
 
@@ -51,84 +458,93 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 		return gpus, nil
 	}
 
-	if f.expression == "" {
-		// If no expression, return all GPUs (no filtering needed)
-		return gpus, nil
+	// Pre-allocate result slice with estimated capacity
+	filteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
+
+	// Early filtering phase: apply basic filters first to reduce CEL evaluation overhead
+	earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
+	for _, gpu := range gpus {
+		// Fast path: check phase first (most common filter)
+		if f.requiredPhase != "" && string(gpu.Status.Phase) != f.requiredPhase {
+			continue
+		}
+
+		// Fast path: check GPU model (second most common filter)
+		if f.requiredGPUModel != "" && gpu.Status.GPUModel != f.requiredGPUModel {
+			continue
+		}
+
+		earlyFilteredGPUs = append(earlyFilteredGPUs, gpu)
 	}
 
-	// Get compiled program from cache
-	program, err := f.cache.GetOrCompileProgram(f.expression)
+	// If no user expression, return early filtered results
+	if f.userExpression == "" {
+		log.V(1).Info("CEL filter applied (early filtering only)",
+			"filter", f.name,
+			"inputGPUs", len(gpus),
+			"earlyFilteredGPUs", len(earlyFilteredGPUs),
+			"outputGPUs", len(earlyFilteredGPUs))
+		return earlyFilteredGPUs, nil
+	}
+
+	// If no GPUs passed early filtering, return empty result
+	if len(earlyFilteredGPUs) == 0 {
+		return earlyFilteredGPUs, nil
+	}
+
+	// Get compiled program from cache for user expression
+	program, err := f.cache.GetOrCompileProgram(f.userExpression)
 	if err != nil {
-		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.expression, err)
+		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.userExpression, err)
 	}
 
-	var filteredGPUs []*tfv1.GPU
 
-	for _, gpu := range gpus {
-		// Create timeout context for CEL evaluation
-		evalCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
-
-		// Create variables for CEL evaluation
-		vars := createCELVariables(*gpu, workerPodKey)
-
-		// Evaluate with timeout
-		resultChan := make(chan evalResult, 1)
-		go func() {
-			result, _, evalErr := program.Eval(vars)
-			resultChan <- evalResult{result: result, err: evalErr}
-		}()
-
-		select {
-		case evalRes := <-resultChan:
-			cancel()
-			if evalRes.err != nil {
-				log.Error(evalRes.err, "CEL expression evaluation failed",
-					"expression", f.expression,
-					"gpu", gpu.Name,
-					"workerPodKey", workerPodKey)
-				// On error, exclude the GPU (fail-safe)
-				continue
-			}
-
-			// Convert result to boolean
-			if boolResult, ok := evalRes.result.(types.Bool); ok {
-				if bool(boolResult) {
+	// Use fast path if available, otherwise fall back to CEL
+	if f.fastPathPredicate != nil {
+		// Fast path: direct Go function evaluation with optional parallelization
+		if len(earlyFilteredGPUs) >= ParallelThreshold {
+			filteredGPUs = f.filterParallel(earlyFilteredGPUs)
+		} else {
+			for _, gpu := range earlyFilteredGPUs {
+				if f.fastPathPredicate(gpu) {
 					filteredGPUs = append(filteredGPUs, gpu)
 				}
-			} else {
-				log.Error(nil, "CEL expression did not return boolean",
-					"expression", f.expression,
-					"result", evalRes.result,
-					"gpu", gpu.Name)
-				// On non-boolean result, exclude the GPU (fail-safe)
-				continue
 			}
-		case <-evalCtx.Done():
-			cancel()
-			// Timeout - skip this GPU (fail-safe behavior)
-			log.V(1).Info("CEL evaluation timeout", "gpu", gpu.Name, "expression", f.expression)
-			continue
 		}
+		
+		log.V(1).Info("CEL filter applied (fast path)",
+			"filter", f.name,
+			"displayExpression", f.displayExpression,
+			"userExpression", f.userExpression,
+			"inputGPUs", len(gpus),
+			"earlyFilteredGPUs", len(earlyFilteredGPUs),
+			"outputGPUs", len(filteredGPUs))
+	} else {
+		// Fallback to CEL evaluation for complex expressions
+		if len(earlyFilteredGPUs) >= ParallelThreshold {
+			// Use parallel evaluation for large GPU sets
+			filteredGPUs = f.filterFallbackParallel(ctx, program, earlyFilteredGPUs, workerPodKey)
+		} else {
+			// Sequential evaluation for smaller sets
+			filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey)
+		}
+		
+		log.V(1).Info("CEL filter applied (CEL evaluation)",
+			"filter", f.name,
+			"displayExpression", f.displayExpression,
+			"userExpression", f.userExpression,
+			"inputGPUs", len(gpus),
+			"earlyFilteredGPUs", len(earlyFilteredGPUs),
+			"outputGPUs", len(filteredGPUs))
 	}
 
-	log.V(1).Info("AllocRequest CEL filter applied",
-		"filter", f.name,
-		"expression", f.expression,
-		"inputGPUs", len(gpus),
-		"outputGPUs", len(filteredGPUs))
-
 	return filteredGPUs, nil
 }
 
-type evalResult struct {
-	result interface{}
-	err    error
-}
-
-// convertAllocRequestToCEL converts an allocation request to a CEL expression
-func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
+// buildDisplayExpression creates a readable expression string for logging purposes only
+func buildDisplayExpression(req *tfv1.AllocRequest) string {
 	if req == nil {
-		return "", nil
+		return ""
 	}
 
 	var conditions []string
@@ -138,30 +554,24 @@ func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) {
 		conditions = append(conditions, req.CELFilterExpression)
 	}
 
-	// Add GPU phase condition (must be Ready)
-	conditions = append(conditions, "gpu.phase == 'Ready'")
-
-	// Add GPU model filter if specified
-	if req.GPUModel != "" {
-		conditions = append(conditions, fmt.Sprintf("gpu.gpuModel == '%s'", req.GPUModel))
-	}
-
-	// If no conditions, return empty expression (no filtering)
+	// If no conditions, return empty expression
 	if len(conditions) == 0 {
-		return "", nil
+		return ""
 	}
 
-	// Combine all conditions with AND
+	// Combine all conditions with AND using strings.Builder for efficiency
 	if len(conditions) == 1 {
-		return conditions[0], nil
+		return conditions[0]
 	}
 
-	expression := conditions[0]
+	var builder strings.Builder
+	builder.WriteString(conditions[0])
 	for i := 1; i < len(conditions); i++ {
-		expression += " && " + conditions[i]
+		builder.WriteString(" && ")
+		builder.WriteString(conditions[i])
 	}
 
-	return expression, nil
+	return builder.String()
 }
 
 // createCELEnvironment creates a CEL environment with GPU-related variables and functions
@@ -171,58 +581,469 @@ func createCELEnvironment() (*cel.Env, error) {
 		cel.Variable(CELVarGPU, cel.MapType(cel.StringType, cel.DynType)),
 		// Define worker pod key
 		cel.Variable(CELVarWorkerPodKey, cel.MapType(cel.StringType, cel.StringType)),
-		// Define request object structure
-		cel.Variable(CELVarRequest, cel.MapType(cel.StringType, cel.DynType)),
 	)
 }
 
-// createCELVariables creates variables for CEL evaluation from GPU and request information
-func createCELVariables(gpu tfv1.GPU, workerPodKey tfv1.NameNamespace) map[string]interface{} {
-	// Convert GPU to a map for CEL evaluation
-	gpuMap := map[string]interface{}{
-		GPUFieldName:        gpu.Name,
-		GPUFieldNamespace:   gpu.Namespace,
-		GPUFieldGPUModel:    gpu.Status.GPUModel,
-		GPUFieldUUID:        gpu.Status.UUID,
-		GPUFieldPhase:       string(gpu.Status.Phase),
-		GPUFieldUsedBy:      string(gpu.Status.UsedBy),
-		GPUFieldMessage:     gpu.Status.Message,
-		GPUFieldLabels:      gpu.Labels,
-		GPUFieldAnnotations: gpu.Annotations,
+
+// filterParallel processes GPUs in parallel for large datasets
+func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
+	numGPUs := len(gpus)
+	numWorkers := runtime.NumCPU()
+	if numWorkers > DefaultWorkerCount {
+		numWorkers = DefaultWorkerCount
+	}
+	
+	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
+	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
+	
+	// Create workers
+	for i := 0; i < numWorkers; i++ {
+		start := i * chunkSize
+		end := start + chunkSize
+		if end > numGPUs {
+			end = numGPUs
+		}
+		
+		if start >= end {
+			// No work for this worker
+			ch := make(chan []*tfv1.GPU, 1)
+			ch <- []*tfv1.GPU{}
+			close(ch)
+			resultChannels[i] = ch
+			continue
+		}
+		
+		chunk := gpus[start:end]
+		resultCh := make(chan []*tfv1.GPU, 1)
+		resultChannels[i] = resultCh
+		
+		// Start worker goroutine
+		go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) {
+			defer close(resultCh)
+			
+			filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate
+			for _, gpu := range gpuChunk {
+				if f.fastPathPredicate(gpu) {
+					filtered = append(filtered, gpu)
+				}
+			}
+			resultCh <- filtered
+		}(chunk, resultCh)
+	}
+	
+	// Collect results
+	var totalFiltered []*tfv1.GPU
+	for _, ch := range resultChannels {
+		chunkResults := <-ch
+		totalFiltered = append(totalFiltered, chunkResults...)
+	}
+	
+	return totalFiltered
+}
+
+// filterFallbackSequential performs sequential CEL evaluation for smaller GPU sets
+func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
+	filteredGPUs := make([]*tfv1.GPU, 0, len(gpus)/2)
+	log := log.FromContext(ctx)
+	
+	for i, gpu := range gpus {
+		// Periodic context check every 64 GPUs for very large sets
+		if i&63 == 0 {
+			select {
+			case <-ctx.Done():
+				log.V(1).Info("CEL evaluation cancelled", "processedGPUs", len(filteredGPUs), "totalGPUs", len(gpus))
+				return filteredGPUs
+			default:
+			}
+		}
+
+		// Use zero-allocation activation instead of maps
+		activation := &ZeroAllocActivation{
+			gpu:          gpu,
+			workerPodKey: workerPodKey,
+			usage:        f.usage,
+		}
+
+		// Direct synchronous evaluation with custom activation
+		result, _, evalErr := program.Eval(activation)
+
+		if evalErr != nil {
+			log.Error(evalErr, "CEL expression evaluation failed",
+				"expression", f.userExpression,
+				"gpu", gpu.Name,
+				"workerPodKey", workerPodKey)
+			// On error, exclude the GPU (fail-safe)
+			continue
+		}
+
+		// Convert result to boolean
+		if boolResult, ok := result.(types.Bool); ok {
+			if bool(boolResult) {
+				filteredGPUs = append(filteredGPUs, gpu)
+			}
+		} else {
+			log.Error(nil, "CEL expression did not return boolean",
+				"expression", f.userExpression,
+				"result", result,
+				"gpu", gpu.Name)
+			// On non-boolean result, exclude the GPU (fail-safe)
+			continue
+		}
+	}
+	
+	return filteredGPUs
+}
+
+// filterFallbackParallel performs parallel CEL evaluation for large GPU sets
+func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
+	numGPUs := len(gpus)
+	numWorkers := runtime.NumCPU()
+	if numWorkers > DefaultWorkerCount {
+		numWorkers = DefaultWorkerCount
+	}
+	
+	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
+	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
+	
+	// Create workers
+	for i := 0; i < numWorkers; i++ {
+		start := i * chunkSize
+		end := start + chunkSize
+		if end > numGPUs {
+			end = numGPUs
+		}
+		
+		if start >= end {
+			// No work for this worker
+			ch := make(chan []*tfv1.GPU, 1)
+			ch <- []*tfv1.GPU{}
+			close(ch)
+			resultChannels[i] = ch
+			continue
+		}
+		
+		chunk := gpus[start:end]
+		resultCh := make(chan []*tfv1.GPU, 1)
+		resultChannels[i] = resultCh
+		
+		// Start worker goroutine
+		go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) {
+			defer close(resultCh)
+			
+			filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate
+			
+			for _, gpu := range gpuChunk {
+				// Use zero-allocation activation
+				activation := &ZeroAllocActivation{
+					gpu:          gpu,
+					workerPodKey: workerPodKey,
+					usage:        f.usage,
+				}
+
+				// Direct synchronous evaluation
+				result, _, evalErr := program.Eval(activation)
+				if evalErr != nil {
+					// On error, exclude the GPU (fail-safe)
+					continue
+				}
+
+				// Convert result to boolean
+				if boolResult, ok := result.(types.Bool); ok {
+					if bool(boolResult) {
+						filtered = append(filtered, gpu)
+					}
+				}
+				// On non-boolean result, exclude the GPU (fail-safe)
+			}
+			resultCh <- filtered
+		}(chunk, resultCh)
+	}
+	
+	// Collect results
+	var totalFiltered []*tfv1.GPU
+	for _, ch := range resultChannels {
+		chunkResults := <-ch
+		totalFiltered = append(totalFiltered, chunkResults...)
+	}
+	
+	return totalFiltered
+}
+
+
+// compileFastPath tries to compile expression into a fast path predicate
+// Uses AST analysis for better pattern matching than regex
+func compileFastPath(expression string) FastPathPredicate {
+	if expression == "" {
+		return nil
+	}
+	
+	// Try AST-based compilation first (more flexible)  
+	if pred := compileASTFastPath(expression); pred != nil {
+		return pred
+	}
+	
+	// Fall back to regex patterns for backward compatibility
+	for _, pattern := range fastPathPatterns {
+		matches := pattern.Pattern.FindStringSubmatch(expression)
+		if matches != nil {
+			return pattern.Generator(matches)
+		}
+	}
+	
+	return nil
+}
+
+// compileASTFastPath analyzes AST to generate fast path predicates
+func compileASTFastPath(expression string) FastPathPredicate {
+	// Parse expression to AST
+	env, err := createCELEnvironment()
+	if err != nil {
+		return nil
+	}
+	
+	_, issues := env.Parse(expression)
+	if issues != nil && issues.Err() != nil {
+		return nil
+	}
+	
+	// Extract conditions from expression string (simplified approach)
+	conditions := extractConditionsFromString(expression)
+	if len(conditions) == 0 {
+		return nil
+	}
+	
+	// Generate fast path predicate
+	return func(gpu *tfv1.GPU) bool {
+		for _, condition := range conditions {
+			if !evaluateCondition(gpu, condition) {
+				return false // Short-circuit on first failure (AND logic)
+			}
+		}
+		return true
 	}
+}
+
+// astCondition represents a simple condition extracted from AST
+type astCondition struct {
+	field    string    // e.g., "gpu.available.tflops", "gpu.labels['env']"
+	operator string    // "==", "!=", ">=", ">"
+	value    interface{} // expected value
+}
 
-	// Add available information if available
-	if gpu.Status.Available != nil {
-		gpuMap[GPUFieldAvailable] = map[string]interface{}{
-			ResourceFieldTFlops: gpu.Status.Available.Tflops.AsApproximateFloat64(),
-			ResourceFieldVRAM:   gpu.Status.Available.Vram.AsApproximateFloat64(),
+
+// extractConditionsFromString uses enhanced pattern matching to extract conditions
+// This bridges the gap between regex and full AST until full AST implementation
+func extractConditionsFromString(exprStr string) []astCondition {
+	var conditions []astCondition
+	
+	// Split by && to handle multiple conditions
+	parts := strings.Split(exprStr, " && ")
+	
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		
+		// Handle gpu.available.tflops >= X
+		if strings.Contains(part, "gpu.available.tflops") && strings.Contains(part, ">=") {
+			if condition := parseNumericCondition(part, "gpu.available.tflops", ">="); condition != nil {
+				conditions = append(conditions, *condition)
+			}
+		} else if strings.Contains(part, "gpu.available.tflops") && strings.Contains(part, ">") {
+			if condition := parseNumericCondition(part, "gpu.available.tflops", ">"); condition != nil {
+				conditions = append(conditions, *condition)
+			}
+		}
+		
+		// Handle gpu.available.vram >= X
+		if strings.Contains(part, "gpu.available.vram") && strings.Contains(part, ">=") {
+			if condition := parseNumericCondition(part, "gpu.available.vram", ">="); condition != nil {
+				conditions = append(conditions, *condition)
+			}
+		}
+		
+		// Handle gpu.labels['key'] == 'value'
+		if strings.Contains(part, "gpu.labels[") && strings.Contains(part, "==") {
+			if condition := parseLabelCondition(part, "gpu.labels"); condition != nil {
+				conditions = append(conditions, *condition)
+			}
 		}
+		
+		// Handle gpu.annotations['key'] == 'value'  
+		if strings.Contains(part, "gpu.annotations[") && strings.Contains(part, "==") {
+			if condition := parseLabelCondition(part, "gpu.annotations"); condition != nil {
+				conditions = append(conditions, *condition)
+			}
+		}
+		
+		// Handle gpu.gpuModel == 'value'
+		if strings.Contains(part, "gpu.gpuModel") && strings.Contains(part, "==") {
+			if condition := parseStringCondition(part, "gpu.gpuModel", "=="); condition != nil {
+				conditions = append(conditions, *condition)
+			}
+		}
+	}
+	
+	return conditions
+}
+
+// parseNumericCondition parses numeric comparison conditions
+func parseNumericCondition(expr, field, operator string) *astCondition {
+	parts := strings.Split(expr, operator)
+	if len(parts) != 2 {
+		return nil
 	}
+	
+	valueStr := strings.TrimSpace(parts[1])
+	value, err := strconv.ParseFloat(valueStr, 64)
+	if err != nil {
+		return nil
+	}
+	
+	return &astCondition{
+		field:    field,
+		operator: operator,
+		value:    value,
+	}
+}
+
+// parseLabelCondition parses label/annotation map access conditions  
+func parseLabelCondition(expr, fieldPrefix string) *astCondition {
+	// Extract key from gpu.labels['key'] == 'value' format
+	keyStart := strings.Index(expr, "['") + 2
+	keyEnd := strings.Index(expr[keyStart:], "']")
+	if keyEnd == -1 {
+		return nil
+	}
+	key := expr[keyStart : keyStart+keyEnd]
+	
+	// Extract value
+	valueStart := strings.LastIndex(expr, "'") 
+	if valueStart == -1 {
+		return nil
+	}
+	// Find the quote before the last quote
+	prevQuotePos := strings.LastIndex(expr[:valueStart], "'")
+	if prevQuotePos == -1 {
+		return nil
+	}
+	value := expr[prevQuotePos+1 : valueStart]
+	
+	return &astCondition{
+		field:    fieldPrefix + "['" + key + "']",
+		operator: "==",
+		value:    value,
+	}
+}
 
-	// Add node selector information
-	if gpu.Status.NodeSelector != nil {
-		gpuMap[GPUFieldNodeSelector] = gpu.Status.NodeSelector
+// parseStringCondition parses simple string equality conditions
+func parseStringCondition(expr, field, operator string) *astCondition {
+	parts := strings.Split(expr, operator)
+	if len(parts) != 2 {
+		return nil
+	}
+	
+	valueStr := strings.TrimSpace(parts[1])
+	// Remove quotes
+	if strings.HasPrefix(valueStr, "'") && strings.HasSuffix(valueStr, "'") {
+		valueStr = valueStr[1 : len(valueStr)-1]
 	}
+	
+	return &astCondition{
+		field:    field,
+		operator: operator,
+		value:    valueStr,
+	}
+}
 
-	// Add running apps information (always set, even if empty)
-	runningApps := make([]map[string]interface{}, len(gpu.Status.RunningApps))
-	for i, app := range gpu.Status.RunningApps {
-		runningApps[i] = map[string]interface{}{
-			AppFieldName:      app.Name,
-			AppFieldNamespace: app.Namespace,
-			AppFieldCount:     app.Count,
+// evaluateCondition evaluates a single condition against a GPU
+func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
+	switch condition.field {
+	case "gpu.available.tflops":
+		if gpu.Status.Available == nil {
+			return false
+		}
+		actualValue := gpu.Status.Available.Tflops.AsApproximateFloat64()
+		expectedValue, ok := condition.value.(float64)
+		if !ok {
+			return false
 		}
+		
+		switch condition.operator {
+		case ">=":
+			return actualValue >= expectedValue
+		case ">":
+			return actualValue > expectedValue
+		default:
+			return false
+		}
+		
+	case "gpu.available.vram":
+		if gpu.Status.Available == nil {
+			return false
+		}
+		actualValue := float64(gpu.Status.Available.Vram.Value())
+		expectedValue, ok := condition.value.(float64)
+		if !ok {
+			return false
+		}
+		
+		switch condition.operator {
+		case ">=":
+			return actualValue >= expectedValue
+		case ">":
+			return actualValue > expectedValue
+		default:
+			return false
+		}
+		
+	case "gpu.gpuModel":
+		expectedValue, ok := condition.value.(string)
+		if !ok {
+			return false
+		}
+		return gpu.Status.GPUModel == expectedValue
+		
+	default:
+		// Handle label/annotation access
+		if strings.HasPrefix(condition.field, "gpu.labels['") {
+			key := strings.TrimSuffix(strings.TrimPrefix(condition.field, "gpu.labels['"), "']")
+			expectedValue, ok := condition.value.(string)
+			if !ok {
+				return false
+			}
+			if gpu.Labels == nil {
+				return expectedValue == ""
+			}
+			return gpu.Labels[key] == expectedValue
+		}
+		
+		if strings.HasPrefix(condition.field, "gpu.annotations['") {
+			key := strings.TrimSuffix(strings.TrimPrefix(condition.field, "gpu.annotations['"), "']")
+			expectedValue, ok := condition.value.(string)
+			if !ok {
+				return false
+			}
+			if gpu.Annotations == nil {
+				return expectedValue == ""
+			}
+			return gpu.Annotations[key] == expectedValue
+		}
+		
+		return false
 	}
-	gpuMap[GPUFieldRunningApps] = runningApps
+}
 
-	// Worker pod key information
-	workerPodKeyMap := map[string]string{
-		PodKeyFieldName:      workerPodKey.Name,
-		PodKeyFieldNamespace: workerPodKey.Namespace,
+// analyzeFieldUsage performs simple heuristic analysis of which fields are used in the expression
+func analyzeFieldUsage(expression string) fieldUsage {
+	if expression == "" {
+		return fieldUsage{}
 	}
 
-	return map[string]interface{}{
-		CELVarGPU:          gpuMap,
-		CELVarWorkerPodKey: workerPodKeyMap,
+	return fieldUsage{
+		labels:       strings.Contains(expression, "labels"),
+		annotations:  strings.Contains(expression, "annotations"),
+		available:    strings.Contains(expression, "available") || strings.Contains(expression, "tflops") || strings.Contains(expression, "vram"),
+		nodeSelector: strings.Contains(expression, "nodeSelector"),
+		runningApps:  strings.Contains(expression, "runningApps"),
 	}
 }
+
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
index 8894db07..5020114e 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -13,13 +13,14 @@ import (
 // Benchmark performance of the CEL filter compared to the original filter
 func BenchmarkFilterPerformance(b *testing.B) {
 	// Create test data
-	const numGPUs = 1000
+	const numGPUs = 10000
 	gpus := make([]*tfv1.GPU, numGPUs)
 	for i := 0; i < numGPUs; i++ {
 		gpuModel := "A100"
-		if i%3 == 0 {
+		switch i % 3 {
+		case 0:
 			gpuModel = "V100"
-		} else if i%3 == 1 {
+		case 1:
 			gpuModel = "H100"
 		}
 
@@ -254,7 +255,7 @@ func printPerformanceComparison(b *testing.B) {
 === GPU Filter Performance Comparison ===
 
 Test Environment:
-- Number of GPUs: 1000
+- Number of GPUs: 10000
 - GPU Models: A100 (33%%), V100 (33%%), H100 (33%%)
 - GPU Phases: Ready (90%%), Pending (10%%)
 
diff --git a/internal/gpuallocator/filter/cel_filter/expression_cache.go b/internal/gpuallocator/filter/cel_filter/expression_cache.go
index 4065c3b9..f98fb1d1 100644
--- a/internal/gpuallocator/filter/cel_filter/expression_cache.go
+++ b/internal/gpuallocator/filter/cel_filter/expression_cache.go
@@ -88,6 +88,10 @@ func (c *ExpressionCache) GetOrCompileProgram(expression string) (cel.Program, e
 		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", expression, issues.Err())
 	}
 
+	// Validate result type - must return boolean
+	// Note: Skip type validation for now as CEL type system is complex
+	// Runtime validation in Filter method is sufficient
+
 	program, err := c.env.Program(ast)
 	if err != nil {
 		c.misses++
@@ -121,7 +125,7 @@ func (c *ExpressionCache) hashExpression(expression string) string {
 // evictLRU removes the least recently used entry from cache
 func (c *ExpressionCache) evictLRU() {
 	var oldestKey string
-	var oldestTime time.Time = time.Now()
+	var oldestTime = time.Now()
 
 	for key, cached := range c.cache {
 		if cached.AccessedAt.Before(oldestTime) {

From e55e53d957cc16f1bf037357dd89cedbc7854658 Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Wed, 10 Sep 2025 09:12:54 +0800
Subject: [PATCH 17/34] fix: dedicated gpu annotation causing webhook failure
 issue (#356)

---
 internal/gpuallocator/gpuallocator.go | 5 +++++
 internal/webhook/v1/tf_parser.go      | 1 +
 2 files changed, 6 insertions(+)

diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index c4a36980..c43cae70 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -850,6 +850,11 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
 	defer s.storeMutex.Unlock()
 
 	if s.gpuStore[key] != nil {
+		if gpu.Status.GPUModel != "" {
+			if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists {
+				GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
+			}
+		}
 		syncGPUMetadataAndStatusFromCluster(s.gpuStore[key], gpu)
 		log.V(6).Info("GPU already exists in store", "name", key.Name)
 		return
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
index 2fa7b744..51da5358 100644
--- a/internal/webhook/v1/tf_parser.go
+++ b/internal/webhook/v1/tf_parser.go
@@ -258,6 +258,7 @@ func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile)
 	workloadProfile.Spec.Resources.Requests.Vram = resource.Vram
 	workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops
 	workloadProfile.Spec.Resources.Limits.Vram = resource.Vram
+	workloadProfile.Spec.Qos = tfv1.QoSCritical
 
 	return nil
 }

From 0d7702431b3ddf8ca9f01605b8436f1c1000c040 Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Thu, 11 Sep 2025 17:27:34 +0800
Subject: [PATCH 18/34] fix: extract GPU map update logic into separate method
 and fix webhook domain name, virtual cap calculation (#357)

* fix: virtual tflops/vram not calculated bug

* fix: extract GPU map update logic into separate method and fix webhook domain name

* fix: nvidia device plugin compatible mode state consistent issue

* fix: nvidia device plugin compatible mode issue
---
 .vscode/launch.json                           |  3 +-
 charts/tensor-fusion/Chart.yaml               |  2 +-
 .../admission-webhooks/mutating-webhook.yaml  |  2 +-
 cmd/main.go                                   |  7 +-
 internal/controller/gpunode_controller.go     | 11 ++-
 internal/controller/suite_test.go             |  7 +-
 internal/gpuallocator/gpuallocator.go         | 77 ++++++++++++-------
 internal/gpuallocator/gpuallocator_test.go    |  8 +-
 internal/gpuallocator/node_capacity.go        | 17 +++-
 .../scheduler/gpuresources/gpuresources.go    | 10 ++-
 10 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index bce7b733..ef1ab245 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -61,7 +61,8 @@
                 "KUBECONFIG": "~/.kube/config-local-studio",
                 "ENABLE_WEBHOOKS": "false",
                 "ENABLE_SCHEDULER": "true",
-                "ENABLE_CR_CONTROLLER": "true"
+                "ENABLE_CR_CONTROLLER": "true",
+                "NVIDIA_OPERATOR_PROGRESSIVE_MIGRATION": "true"
             },
             "args": [
                 "--metrics-path", "${workspaceFolder}/logs/metrics.log",
diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
index d18568b7..d2dc9f06 100644
--- a/charts/tensor-fusion/Chart.yaml
+++ b/charts/tensor-fusion/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.5.7
+version: 1.5.8
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
index 7fcdda1a..242d17e0 100644
--- a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
+++ b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
@@ -11,7 +11,7 @@ webhooks:
       namespace: {{ include "tensor-fusion.namespace" . }}
       path: /mutate-v1-pod
   failurePolicy: {{ .Values.controller.admissionWebhooks.failurePolicy }}
-  name: mpod-v1.kb.io
+  name: mpod.tensor-fusion.ai
   rules:
   - apiGroups:
     - ""
diff --git a/cmd/main.go b/cmd/main.go
index f4f2f0ab..f00a6b2e 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -364,9 +364,10 @@ func startCustomResourceController(
 	}
 
 	if err = (&controller.GPUNodeReconciler{
-		Client:   mgr.GetClient(),
-		Scheme:   mgr.GetScheme(),
-		Recorder: mgr.GetEventRecorderFor("GPUNode"),
+		Client:    mgr.GetClient(),
+		Scheme:    mgr.GetScheme(),
+		Recorder:  mgr.GetEventRecorderFor("GPUNode"),
+		Allocator: allocator,
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "GPUNode")
 		os.Exit(1)
diff --git a/internal/controller/gpunode_controller.go b/internal/controller/gpunode_controller.go
index 054d5922..ae503f28 100644
--- a/internal/controller/gpunode_controller.go
+++ b/internal/controller/gpunode_controller.go
@@ -47,8 +47,9 @@ import (
 // GPUNodeReconciler reconciles a GPUNode object
 type GPUNodeReconciler struct {
 	client.Client
-	Scheme   *runtime.Scheme
-	Recorder record.EventRecorder
+	Scheme    *runtime.Scheme
+	Recorder  record.EventRecorder
+	Allocator *gpuallocator.GpuAllocator
 }
 
 // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpunodes,verbs=get;list;watch;create;update;patch;delete
@@ -158,7 +159,9 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 	return ctrl.Result{}, err
 }
 
-func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(ctx context.Context, hypervisorName string, node *tfv1.GPUNode, poolObj *tfv1.GPUPool) error {
+func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(
+	ctx context.Context, hypervisorName string, node *tfv1.GPUNode, poolObj *tfv1.GPUPool,
+) error {
 	pod := &corev1.Pod{}
 	fetchErr := r.Get(ctx, client.ObjectKey{Name: hypervisorName, Namespace: utils.CurrentNamespace()}, pod)
 	if fetchErr != nil {
@@ -183,7 +186,7 @@ func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(ctx context.Cont
 
 		return nil
 	} else {
-		gpuModels, err := gpuallocator.RefreshGPUNodeCapacity(ctx, r.Client, node, poolObj)
+		gpuModels, err := gpuallocator.RefreshGPUNodeCapacity(ctx, r.Client, node, poolObj, r.Allocator)
 		if err != nil {
 			return err
 		}
diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go
index 388b938f..0ba3228a 100644
--- a/internal/controller/suite_test.go
+++ b/internal/controller/suite_test.go
@@ -180,9 +180,10 @@ var _ = BeforeSuite(func() {
 	Expect(err).ToNot(HaveOccurred())
 
 	err = (&GPUNodeReconciler{
-		Client:   mgr.GetClient(),
-		Scheme:   mgr.GetScheme(),
-		Recorder: mgr.GetEventRecorderFor("GPUNode"),
+		Client:    mgr.GetClient(),
+		Scheme:    mgr.GetScheme(),
+		Recorder:  mgr.GetEventRecorderFor("GPUNode"),
+		Allocator: allocator,
 	}).SetupWithManager(mgr)
 	Expect(err).ToNot(HaveOccurred())
 
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index c43cae70..d2259a34 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -545,12 +545,13 @@ func (s *GpuAllocator) AdjustAllocation(ctx context.Context, adjustRequest tfv1.
 }
 
 func (s *GpuAllocator) ListNonUsingNodes() sets.Set[string] {
+	<-s.initializedCh
 	set := sets.New[string]()
-	for nodeName, gpuNames := range s.nodeWorkerStore {
+	for nodeName, podNames := range s.nodeWorkerStore {
 		// If using by TF, the node can not be used by original scheduler
 		// If using by other scheduler, won't record as TF worker, thus the map is empty
 		// Return non using nodes can ensure original scheduler not conflict with TF
-		if len(gpuNames) == 0 {
+		if len(podNames) == 0 {
 			set.Insert(nodeName)
 		}
 	}
@@ -564,6 +565,20 @@ func (s *GpuAllocator) DeallocByPodIdentifier(ctx context.Context, podIdentifier
 	}
 }
 
+func (s *GpuAllocator) GetAllocationReqByNodeName(nodeName string) []*tfv1.AllocRequest {
+	allocRequests := make([]*tfv1.AllocRequest, 0, 8)
+	for workerName := range s.nodeWorkerStore[nodeName] {
+		podUID := s.podNamespaceNsToPodUID[workerName.String()]
+		if podUID == "" {
+			continue
+		}
+		if request, exists := s.uniqueAllocation[podUID]; exists {
+			allocRequests = append(allocRequests, request)
+		}
+	}
+	return allocRequests
+}
+
 func (s *GpuAllocator) checkGPUCapacityAndQuota(gpu *tfv1.GPU, oldRes, newRes tfv1.Resource) (tfv1.Resource, error) {
 	if gpu.Status.Available == nil {
 		return tfv1.Resource{}, fmt.Errorf("GPU available is nil, skip check")
@@ -870,29 +885,7 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) {
 	}
 	s.gpuStore[key] = gpuInMem
 
-	if gpuInMem.Status.NodeSelector != nil {
-		gpuNodeName := gpuInMem.Status.NodeSelector[constants.KubernetesHostNameLabel]
-		if gpuNodeName != "" {
-			if _, exists := s.nodeGpuStore[gpuNodeName]; !exists {
-				s.nodeGpuStore[gpuNodeName] = make(map[string]*tfv1.GPU, 4)
-			}
-			s.nodeGpuStore[gpuNodeName][gpuInMem.Name] = gpuInMem
-		}
-	}
-
-	if gpuInMem.Labels != nil {
-		pool := gpuInMem.Labels[constants.GpuPoolKey]
-		if pool != "" {
-			if _, exists := s.poolGpuStore[pool]; !exists {
-				s.poolGpuStore[pool] = make(map[string]*tfv1.GPU, 128)
-			}
-			s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem
-		}
-	}
-
-	if gpu.Status.GPUModel != "" {
-		GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
-	}
+	s.addOrUpdateGPUMaps(gpuInMem)
 	log.Info("Added GPU to store", "name", key.Name, "phase", gpu.Status.Phase)
 }
 
@@ -942,10 +935,36 @@ func (s *GpuAllocator) handleGPUUpdate(ctx context.Context, gpu *tfv1.GPU) {
 		log.V(6).Info("Updated GPU in store (new entry)", "name", key.Name, "phase", gpu.Status.Phase)
 	}
 
-	if gpu.Status.GPUModel != "" {
-		if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists {
-			GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity
+	s.addOrUpdateGPUMaps(gpu)
+}
+
+func (s *GpuAllocator) addOrUpdateGPUMaps(gpuInMem *tfv1.GPU) {
+	if gpuInMem.Status.NodeSelector != nil {
+		gpuNodeName := gpuInMem.Status.NodeSelector[constants.KubernetesHostNameLabel]
+		if gpuNodeName != "" {
+			if _, exists := s.nodeGpuStore[gpuNodeName]; !exists {
+				s.nodeGpuStore[gpuNodeName] = make(map[string]*tfv1.GPU, 4)
+			}
+			s.nodeGpuStore[gpuNodeName][gpuInMem.Name] = gpuInMem
+			if _, exists := s.nodeWorkerStore[gpuNodeName]; !exists {
+				s.nodeWorkerStore[gpuNodeName] = make(map[types.NamespacedName]struct{}, 4)
+			}
 		}
+
+	}
+
+	if gpuInMem.Labels != nil {
+		pool := gpuInMem.Labels[constants.GpuPoolKey]
+		if pool != "" {
+			if _, exists := s.poolGpuStore[pool]; !exists {
+				s.poolGpuStore[pool] = make(map[string]*tfv1.GPU, 128)
+			}
+			s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem
+		}
+	}
+
+	if gpuInMem.Status.GPUModel != "" {
+		GPUCapacityMap[gpuInMem.Status.GPUModel] = *gpuInMem.Status.Capacity
 	}
 }
 
@@ -1166,7 +1185,7 @@ func (s *GpuAllocator) reconcileAllocationState() {
 		// No workers, but node contains GPU, need include into nodeWorkerStore with empty map
 		gpuNodeName := gpu.Status.NodeSelector[constants.KubernetesHostNameLabel]
 		if _, exists := s.nodeWorkerStore[gpuNodeName]; !exists {
-			s.nodeWorkerStore[gpuNodeName] = map[types.NamespacedName]struct{}{}
+			s.nodeWorkerStore[gpuNodeName] = make(map[types.NamespacedName]struct{}, 4)
 		}
 	}
 
diff --git a/internal/gpuallocator/gpuallocator_test.go b/internal/gpuallocator/gpuallocator_test.go
index 08d78130..bb3a494d 100644
--- a/internal/gpuallocator/gpuallocator_test.go
+++ b/internal/gpuallocator/gpuallocator_test.go
@@ -97,7 +97,7 @@ var _ = Describe("GPU Allocator", func() {
 			if err := k8sClient.Get(ctx, types.NamespacedName{Name: "test-pool"}, pool); err != nil {
 				Expect(err).NotTo(HaveOccurred())
 			}
-			_, _ = RefreshGPUNodeCapacity(ctx, k8sClient, gpuNode, pool)
+			_, _ = RefreshGPUNodeCapacity(ctx, k8sClient, gpuNode, pool, allocator)
 
 			// Verify resources were reduced on the allocated GPU
 			gpu := getGPU(gpus[0].Name)
@@ -107,8 +107,14 @@ var _ = Describe("GPU Allocator", func() {
 			node := getGPUNode(gpu)
 			diffTflops := node.Status.TotalTFlops.Value() - node.Status.AvailableTFlops.Value()
 			diffVRAM := node.Status.TotalVRAM.Value() - node.Status.AvailableVRAM.Value()
+
+			diffVirtualTflops := node.Status.VirtualTFlops.Value() - node.Status.VirtualAvailableTFlops.Value()
+			diffVirtualVRAM := node.Status.VirtualVRAM.Value() - node.Status.VirtualAvailableVRAM.Value()
 			Expect(diffTflops).To(BeEquivalentTo(50))
 			Expect(diffVRAM).To(BeEquivalentTo(8 * 1024 * 1024 * 1024))
+
+			Expect(diffVirtualTflops).To(BeEquivalentTo(50))
+			Expect(diffVirtualVRAM).To(BeEquivalentTo(8 * 1024 * 1024 * 1024))
 		})
 
 		It("should allocate multiple GPUs from the same node", func() {
diff --git a/internal/gpuallocator/node_capacity.go b/internal/gpuallocator/node_capacity.go
index dc7488f6..43cce870 100644
--- a/internal/gpuallocator/node_capacity.go
+++ b/internal/gpuallocator/node_capacity.go
@@ -11,7 +11,11 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node *tfv1.GPUNode, pool *tfv1.GPUPool) ([]string, error) {
+func RefreshGPUNodeCapacity(
+	ctx context.Context, k8sClient client.Client,
+	node *tfv1.GPUNode, pool *tfv1.GPUPool,
+	allocator *GpuAllocator,
+) ([]string, error) {
 	gpuList := &tfv1.GPUList{}
 	if err := k8sClient.List(ctx, gpuList, client.MatchingLabels{constants.LabelKeyOwner: node.Name}); err != nil {
 		return nil, fmt.Errorf("failed to list GPUs: %w", err)
@@ -54,6 +58,17 @@ func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node *
 	node.Status.VirtualTFlops = virtualTFlops
 	node.Status.VirtualVRAM = virtualVRAM
 
+	vramAvailable := virtualVRAM.DeepCopy()
+	tflopsAvailable := virtualTFlops.DeepCopy()
+
+	allocRequests := allocator.GetAllocationReqByNodeName(node.Name)
+	for _, allocRequest := range allocRequests {
+		vramAvailable.Sub(allocRequest.Limit.Vram)
+		tflopsAvailable.Sub(allocRequest.Limit.Tflops)
+	}
+	node.Status.VirtualAvailableVRAM = &vramAvailable
+	node.Status.VirtualAvailableTFlops = &tflopsAvailable
+
 	node.Status.Phase = tfv1.TensorFusionGPUNodePhaseRunning
 
 	if !equality.Semantic.DeepEqual(node.Status, statusCopy) {
diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
index ee6b6e58..861b95eb 100644
--- a/internal/scheduler/gpuresources/gpuresources.go
+++ b/internal/scheduler/gpuresources/gpuresources.go
@@ -158,11 +158,17 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 			continue
 		}
 
+		preAllocSize := total - matched
+		if preAllocSize <= 0 {
+			s.logger.Error(nil, "Filtering GPU error, unexpected less than 0", "pod",
+				pod.Name, "node", k, "totalGPU count", total, "matchedGPU count", matched)
+			preAllocSize = 2
+		}
 		// range if it's not in validNodesValidGPUs, add to validNodeNonMatchingGPUs
-		validNodeNonMatchingGPUs[k] = make([]*tfv1.GPU, 0, total-matched)
+		validNodeNonMatchingGPUs[k] = make([]*tfv1.GPU, 0, preAllocSize)
 		for gpuName, gpu := range allGPUs {
 			seen := false
-			// just loop because the number always <= 8
+			// just loop because the number always <= 8/16
 			for _, matchedGPU := range matchedGPUs {
 				if gpuName == matchedGPU.Name {
 					seen = true

From 52dc0a45d11af1c2c79d577ade95890a1ca55a2b Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 14 Sep 2025 04:18:28 -0700
Subject: [PATCH 19/34] cel fix phase filter

---
 .../filter/cel_filter/cel_filter.go           | 181 +++++++++---------
 .../cel_filter/cel_filter_benchmark_test.go   |  25 +--
 .../filter/cel_filter/cel_filter_test.go      |  77 ++++----
 3 files changed, 144 insertions(+), 139 deletions(-)

diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index ea463b0f..1c3e01c9 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -10,10 +10,12 @@ import (
 	"strings"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/google/cel-go/cel"
 	"github.com/google/cel-go/common/types"
 	"github.com/google/cel-go/common/types/ref"
 	"github.com/google/cel-go/interpreter"
+	"github.com/samber/lo"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
 
@@ -39,8 +41,8 @@ type FastPathPredicate func(gpu *tfv1.GPU) bool
 
 // ExpressionPattern represents a recognized expression pattern for fast path
 type ExpressionPattern struct {
-	Pattern    *regexp.Regexp
-	Generator  func(matches []string) FastPathPredicate
+	Pattern   *regexp.Regexp
+	Generator func(matches []string) FastPathPredicate
 }
 
 // Common fast path patterns - order matters (most specific first)
@@ -52,7 +54,7 @@ var fastPathPatterns = []ExpressionPattern{
 			threshold, _ := strconv.ParseFloat(matches[1], 64)
 			labelKey, labelValue := matches[2], matches[3]
 			return func(gpu *tfv1.GPU) bool {
-				return gpu.Status.Available != nil && 
+				return gpu.Status.Available != nil &&
 					gpu.Status.Available.Tflops.AsApproximateFloat64() >= threshold &&
 					gpu.Labels != nil && gpu.Labels[labelKey] == labelValue
 			}
@@ -88,7 +90,7 @@ var fastPathPatterns = []ExpressionPattern{
 			}
 		},
 	},
-	// gpu.available.vram > NUMBER  
+	// gpu.available.vram > NUMBER
 	{
 		Pattern: regexp.MustCompile(`^gpu\.available\.vram\s*>\s*([0-9]+(?:\.[0-9]+)?)$`),
 		Generator: func(matches []string) FastPathPredicate {
@@ -120,7 +122,6 @@ var fastPathPatterns = []ExpressionPattern{
 	},
 }
 
-
 // ZeroAllocActivation provides zero-allocation variable resolution for CEL
 // This eliminates the need to create map[string]interface{} for each GPU
 type ZeroAllocActivation struct {
@@ -141,7 +142,7 @@ func (a *ZeroAllocActivation) ResolveName(name string) (interface{}, bool) {
 	}
 }
 
-// Parent implements interpreter.Activation interface  
+// Parent implements interpreter.Activation interface
 func (a *ZeroAllocActivation) Parent() interpreter.Activation {
 	return nil
 }
@@ -152,7 +153,6 @@ func (a *ZeroAllocActivation) createGPUObject() interface{} {
 	return &gpuVal{GPU: a.gpu}
 }
 
-
 // createWorkerPodKeyObject creates worker pod key object
 func (a *ZeroAllocActivation) createWorkerPodKeyObject() interface{} {
 	return map[string]interface{}{
@@ -166,7 +166,7 @@ type gpuVal struct {
 	*tfv1.GPU
 	// Cached sub-values to avoid repeated allocations
 	labels       ref.Val
-	annotations  ref.Val 
+	annotations  ref.Val
 	nodeSelector ref.Val
 	available    ref.Val
 	runningApps  ref.Val
@@ -195,7 +195,7 @@ func (v *gpuVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) {
 	return v.GPU, nil
 }
 
-// ConvertToType implements ref.Val interface  
+// ConvertToType implements ref.Val interface
 func (v *gpuVal) ConvertToType(typeValue ref.Type) ref.Val {
 	switch typeValue {
 	case types.TypeType:
@@ -209,8 +209,8 @@ func (v *gpuVal) ConvertToType(typeValue ref.Type) ref.Val {
 func (v *gpuVal) HasField(field string) bool {
 	switch field {
 	case GPUFieldName, GPUFieldNamespace, GPUFieldGPUModel, GPUFieldUUID,
-		 GPUFieldPhase, GPUFieldUsedBy, GPUFieldMessage, GPUFieldLabels,
-		 GPUFieldAnnotations, GPUFieldAvailable, GPUFieldNodeSelector, GPUFieldRunningApps:
+		GPUFieldPhase, GPUFieldUsedBy, GPUFieldMessage, GPUFieldLabels,
+		GPUFieldAnnotations, GPUFieldAvailable, GPUFieldNodeSelector, GPUFieldRunningApps:
 		return true
 	default:
 		return false
@@ -223,7 +223,7 @@ func (v *gpuVal) Get(index ref.Val) ref.Val {
 	if !ok {
 		return types.NewErr("index must be string")
 	}
-	
+
 	switch field {
 	case GPUFieldName:
 		return types.String(v.GPU.Name)
@@ -246,7 +246,7 @@ func (v *gpuVal) Get(index ref.Val) ref.Val {
 		}
 		return v.labels
 	case GPUFieldAnnotations:
-		// Lazy initialization with caching  
+		// Lazy initialization with caching
 		if v.annotations == nil {
 			v.annotations = &labelsVal{labels: v.GPU.Annotations}
 		}
@@ -286,7 +286,7 @@ type availableVal struct {
 	available *tfv1.Resource
 }
 
-// Type implements ref.Val interface  
+// Type implements ref.Val interface
 func (v *availableVal) Type() ref.Type {
 	return types.MapType
 }
@@ -317,22 +317,22 @@ func (v *availableVal) Get(index ref.Val) ref.Val {
 	if !ok {
 		return types.NewErr("index must be string")
 	}
-	
+
 	if v.available == nil {
 		switch field {
-		case "tflops":
+		case ResourceFieldTFlops:
 			return types.Double(0.0)
-		case "vram":
+		case ResourceFieldVRAM:
 			return types.Int(0)
 		default:
 			return types.NewErr("no such field: %s", field)
 		}
 	}
-	
+
 	switch field {
-	case "tflops":
+	case ResourceFieldTFlops:
 		return types.Double(v.available.Tflops.AsApproximateFloat64())
-	case "vram":
+	case ResourceFieldVRAM:
 		return types.Int(v.available.Vram.Value())
 	default:
 		return types.NewErr("no such field: %s", field)
@@ -341,7 +341,7 @@ func (v *availableVal) Get(index ref.Val) ref.Val {
 
 // HasField implements field testing
 func (v *availableVal) HasField(field string) bool {
-	return field == "tflops" || field == "vram"
+	return field == ResourceFieldTFlops || field == ResourceFieldVRAM
 }
 
 // labelsVal provides direct access to GPU labels without copying
@@ -354,7 +354,7 @@ func (v *labelsVal) Type() ref.Type {
 	return types.MapType
 }
 
-// Value implements ref.Val interface  
+// Value implements ref.Val interface
 func (v *labelsVal) Value() interface{} {
 	return v.labels
 }
@@ -380,11 +380,11 @@ func (v *labelsVal) Get(index ref.Val) ref.Val {
 	if !ok {
 		return types.NewErr("index must be string")
 	}
-	
+
 	if v.labels == nil {
 		return types.String("")
 	}
-	
+
 	value, exists := v.labels[key]
 	if !exists {
 		return types.String("")
@@ -397,7 +397,7 @@ type CELFilter struct {
 	cache *ExpressionCache
 	name  string
 	// Store early filtering criteria for optimization
-	requiredPhase    string
+	requiredPhases   []tfv1.TensorFusionGPUPhase
 	requiredGPUModel string
 	userExpression   string
 	// Track which fields are actually used
@@ -411,10 +411,14 @@ type CELFilter struct {
 // NewAllocRequestCELFilter creates a new CEL filter from allocation request
 func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, error) {
 	// Extract early filtering criteria
-	var requiredPhase, requiredGPUModel, userExpression, displayExpression string
+	var requiredPhases []tfv1.TensorFusionGPUPhase
+	var requiredGPUModel, userExpression, displayExpression string
 
 	if req != nil {
-		requiredPhase = "Ready" // Keep as Ready for compatibility with tests
+		requiredPhases = []tfv1.TensorFusionGPUPhase{
+			tfv1.TensorFusionGPUPhaseRunning,
+			tfv1.TensorFusionGPUPhasePending,
+		}
 		requiredGPUModel = req.GPUModel
 		userExpression = req.CELFilterExpression
 
@@ -424,7 +428,7 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e
 
 	// Analyze field usage in user expression only
 	usage := analyzeFieldUsage(userExpression)
-	
+
 	// Try to compile fast path predicate
 	fastPath := compileFastPath(userExpression)
 
@@ -437,7 +441,7 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e
 	return &CELFilter{
 		cache:             cache,
 		name:              name,
-		requiredPhase:     requiredPhase,
+		requiredPhases:    requiredPhases,
 		requiredGPUModel:  requiredGPUModel,
 		userExpression:    userExpression,
 		usage:             usage,
@@ -464,8 +468,12 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 	// Early filtering phase: apply basic filters first to reduce CEL evaluation overhead
 	earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
 	for _, gpu := range gpus {
+		// when running progressive migration mode, only return GPUs used by tensor-fusion
+		if utils.IsProgressiveMigration() && gpu.Status.UsedBy != tfv1.UsedByTensorFusion {
+			continue
+		}
 		// Fast path: check phase first (most common filter)
-		if f.requiredPhase != "" && string(gpu.Status.Phase) != f.requiredPhase {
+		if f.requiredPhases != nil && !lo.Contains(f.requiredPhases, gpu.Status.Phase) {
 			continue
 		}
 
@@ -498,7 +506,6 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 		return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.userExpression, err)
 	}
 
-
 	// Use fast path if available, otherwise fall back to CEL
 	if f.fastPathPredicate != nil {
 		// Fast path: direct Go function evaluation with optional parallelization
@@ -511,7 +518,7 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 				}
 			}
 		}
-		
+
 		log.V(1).Info("CEL filter applied (fast path)",
 			"filter", f.name,
 			"displayExpression", f.displayExpression,
@@ -528,7 +535,7 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 			// Sequential evaluation for smaller sets
 			filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey)
 		}
-		
+
 		log.V(1).Info("CEL filter applied (CEL evaluation)",
 			"filter", f.name,
 			"displayExpression", f.displayExpression,
@@ -584,7 +591,6 @@ func createCELEnvironment() (*cel.Env, error) {
 	)
 }
 
-
 // filterParallel processes GPUs in parallel for large datasets
 func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
 	numGPUs := len(gpus)
@@ -592,10 +598,10 @@ func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
 	if numWorkers > DefaultWorkerCount {
 		numWorkers = DefaultWorkerCount
 	}
-	
+
 	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
 	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
-	
+
 	// Create workers
 	for i := 0; i < numWorkers; i++ {
 		start := i * chunkSize
@@ -603,7 +609,7 @@ func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
 		if end > numGPUs {
 			end = numGPUs
 		}
-		
+
 		if start >= end {
 			// No work for this worker
 			ch := make(chan []*tfv1.GPU, 1)
@@ -612,15 +618,15 @@ func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
 			resultChannels[i] = ch
 			continue
 		}
-		
+
 		chunk := gpus[start:end]
 		resultCh := make(chan []*tfv1.GPU, 1)
 		resultChannels[i] = resultCh
-		
+
 		// Start worker goroutine
 		go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) {
 			defer close(resultCh)
-			
+
 			filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate
 			for _, gpu := range gpuChunk {
 				if f.fastPathPredicate(gpu) {
@@ -630,14 +636,14 @@ func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
 			resultCh <- filtered
 		}(chunk, resultCh)
 	}
-	
+
 	// Collect results
 	var totalFiltered []*tfv1.GPU
 	for _, ch := range resultChannels {
 		chunkResults := <-ch
 		totalFiltered = append(totalFiltered, chunkResults...)
 	}
-	
+
 	return totalFiltered
 }
 
@@ -645,7 +651,7 @@ func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
 func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
 	filteredGPUs := make([]*tfv1.GPU, 0, len(gpus)/2)
 	log := log.FromContext(ctx)
-	
+
 	for i, gpu := range gpus {
 		// Periodic context check every 64 GPUs for very large sets
 		if i&63 == 0 {
@@ -690,7 +696,7 @@ func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Pr
 			continue
 		}
 	}
-	
+
 	return filteredGPUs
 }
 
@@ -701,10 +707,10 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 	if numWorkers > DefaultWorkerCount {
 		numWorkers = DefaultWorkerCount
 	}
-	
+
 	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
 	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
-	
+
 	// Create workers
 	for i := 0; i < numWorkers; i++ {
 		start := i * chunkSize
@@ -712,7 +718,7 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 		if end > numGPUs {
 			end = numGPUs
 		}
-		
+
 		if start >= end {
 			// No work for this worker
 			ch := make(chan []*tfv1.GPU, 1)
@@ -721,17 +727,17 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 			resultChannels[i] = ch
 			continue
 		}
-		
+
 		chunk := gpus[start:end]
 		resultCh := make(chan []*tfv1.GPU, 1)
 		resultChannels[i] = resultCh
-		
+
 		// Start worker goroutine
 		go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) {
 			defer close(resultCh)
-			
+
 			filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate
-			
+
 			for _, gpu := range gpuChunk {
 				// Use zero-allocation activation
 				activation := &ZeroAllocActivation{
@@ -758,30 +764,29 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 			resultCh <- filtered
 		}(chunk, resultCh)
 	}
-	
+
 	// Collect results
 	var totalFiltered []*tfv1.GPU
 	for _, ch := range resultChannels {
 		chunkResults := <-ch
 		totalFiltered = append(totalFiltered, chunkResults...)
 	}
-	
+
 	return totalFiltered
 }
 
-
 // compileFastPath tries to compile expression into a fast path predicate
 // Uses AST analysis for better pattern matching than regex
 func compileFastPath(expression string) FastPathPredicate {
 	if expression == "" {
 		return nil
 	}
-	
-	// Try AST-based compilation first (more flexible)  
+
+	// Try AST-based compilation first (more flexible)
 	if pred := compileASTFastPath(expression); pred != nil {
 		return pred
 	}
-	
+
 	// Fall back to regex patterns for backward compatibility
 	for _, pattern := range fastPathPatterns {
 		matches := pattern.Pattern.FindStringSubmatch(expression)
@@ -789,7 +794,7 @@ func compileFastPath(expression string) FastPathPredicate {
 			return pattern.Generator(matches)
 		}
 	}
-	
+
 	return nil
 }
 
@@ -800,18 +805,18 @@ func compileASTFastPath(expression string) FastPathPredicate {
 	if err != nil {
 		return nil
 	}
-	
+
 	_, issues := env.Parse(expression)
 	if issues != nil && issues.Err() != nil {
 		return nil
 	}
-	
+
 	// Extract conditions from expression string (simplified approach)
 	conditions := extractConditionsFromString(expression)
 	if len(conditions) == 0 {
 		return nil
 	}
-	
+
 	// Generate fast path predicate
 	return func(gpu *tfv1.GPU) bool {
 		for _, condition := range conditions {
@@ -825,23 +830,22 @@ func compileASTFastPath(expression string) FastPathPredicate {
 
 // astCondition represents a simple condition extracted from AST
 type astCondition struct {
-	field    string    // e.g., "gpu.available.tflops", "gpu.labels['env']"
-	operator string    // "==", "!=", ">=", ">"
+	field    string      // e.g., "gpu.available.tflops", "gpu.labels['env']"
+	operator string      // "==", "!=", ">=", ">"
 	value    interface{} // expected value
 }
 
-
 // extractConditionsFromString uses enhanced pattern matching to extract conditions
 // This bridges the gap between regex and full AST until full AST implementation
 func extractConditionsFromString(exprStr string) []astCondition {
 	var conditions []astCondition
-	
+
 	// Split by && to handle multiple conditions
 	parts := strings.Split(exprStr, " && ")
-	
+
 	for _, part := range parts {
 		part = strings.TrimSpace(part)
-		
+
 		// Handle gpu.available.tflops >= X
 		if strings.Contains(part, "gpu.available.tflops") && strings.Contains(part, ">=") {
 			if condition := parseNumericCondition(part, "gpu.available.tflops", ">="); condition != nil {
@@ -852,28 +856,28 @@ func extractConditionsFromString(exprStr string) []astCondition {
 				conditions = append(conditions, *condition)
 			}
 		}
-		
+
 		// Handle gpu.available.vram >= X
 		if strings.Contains(part, "gpu.available.vram") && strings.Contains(part, ">=") {
 			if condition := parseNumericCondition(part, "gpu.available.vram", ">="); condition != nil {
 				conditions = append(conditions, *condition)
 			}
 		}
-		
+
 		// Handle gpu.labels['key'] == 'value'
 		if strings.Contains(part, "gpu.labels[") && strings.Contains(part, "==") {
 			if condition := parseLabelCondition(part, "gpu.labels"); condition != nil {
 				conditions = append(conditions, *condition)
 			}
 		}
-		
-		// Handle gpu.annotations['key'] == 'value'  
+
+		// Handle gpu.annotations['key'] == 'value'
 		if strings.Contains(part, "gpu.annotations[") && strings.Contains(part, "==") {
 			if condition := parseLabelCondition(part, "gpu.annotations"); condition != nil {
 				conditions = append(conditions, *condition)
 			}
 		}
-		
+
 		// Handle gpu.gpuModel == 'value'
 		if strings.Contains(part, "gpu.gpuModel") && strings.Contains(part, "==") {
 			if condition := parseStringCondition(part, "gpu.gpuModel", "=="); condition != nil {
@@ -881,7 +885,7 @@ func extractConditionsFromString(exprStr string) []astCondition {
 			}
 		}
 	}
-	
+
 	return conditions
 }
 
@@ -891,13 +895,13 @@ func parseNumericCondition(expr, field, operator string) *astCondition {
 	if len(parts) != 2 {
 		return nil
 	}
-	
+
 	valueStr := strings.TrimSpace(parts[1])
 	value, err := strconv.ParseFloat(valueStr, 64)
 	if err != nil {
 		return nil
 	}
-	
+
 	return &astCondition{
 		field:    field,
 		operator: operator,
@@ -905,7 +909,7 @@ func parseNumericCondition(expr, field, operator string) *astCondition {
 	}
 }
 
-// parseLabelCondition parses label/annotation map access conditions  
+// parseLabelCondition parses label/annotation map access conditions
 func parseLabelCondition(expr, fieldPrefix string) *astCondition {
 	// Extract key from gpu.labels['key'] == 'value' format
 	keyStart := strings.Index(expr, "['") + 2
@@ -914,9 +918,9 @@ func parseLabelCondition(expr, fieldPrefix string) *astCondition {
 		return nil
 	}
 	key := expr[keyStart : keyStart+keyEnd]
-	
+
 	// Extract value
-	valueStart := strings.LastIndex(expr, "'") 
+	valueStart := strings.LastIndex(expr, "'")
 	if valueStart == -1 {
 		return nil
 	}
@@ -926,7 +930,7 @@ func parseLabelCondition(expr, fieldPrefix string) *astCondition {
 		return nil
 	}
 	value := expr[prevQuotePos+1 : valueStart]
-	
+
 	return &astCondition{
 		field:    fieldPrefix + "['" + key + "']",
 		operator: "==",
@@ -940,13 +944,13 @@ func parseStringCondition(expr, field, operator string) *astCondition {
 	if len(parts) != 2 {
 		return nil
 	}
-	
+
 	valueStr := strings.TrimSpace(parts[1])
 	// Remove quotes
 	if strings.HasPrefix(valueStr, "'") && strings.HasSuffix(valueStr, "'") {
 		valueStr = valueStr[1 : len(valueStr)-1]
 	}
-	
+
 	return &astCondition{
 		field:    field,
 		operator: operator,
@@ -966,7 +970,7 @@ func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
 		if !ok {
 			return false
 		}
-		
+
 		switch condition.operator {
 		case ">=":
 			return actualValue >= expectedValue
@@ -975,7 +979,7 @@ func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
 		default:
 			return false
 		}
-		
+
 	case "gpu.available.vram":
 		if gpu.Status.Available == nil {
 			return false
@@ -985,7 +989,7 @@ func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
 		if !ok {
 			return false
 		}
-		
+
 		switch condition.operator {
 		case ">=":
 			return actualValue >= expectedValue
@@ -994,14 +998,14 @@ func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
 		default:
 			return false
 		}
-		
+
 	case "gpu.gpuModel":
 		expectedValue, ok := condition.value.(string)
 		if !ok {
 			return false
 		}
 		return gpu.Status.GPUModel == expectedValue
-		
+
 	default:
 		// Handle label/annotation access
 		if strings.HasPrefix(condition.field, "gpu.labels['") {
@@ -1015,7 +1019,7 @@ func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
 			}
 			return gpu.Labels[key] == expectedValue
 		}
-		
+
 		if strings.HasPrefix(condition.field, "gpu.annotations['") {
 			key := strings.TrimSuffix(strings.TrimPrefix(condition.field, "gpu.annotations['"), "']")
 			expectedValue, ok := condition.value.(string)
@@ -1027,7 +1031,7 @@ func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
 			}
 			return gpu.Annotations[key] == expectedValue
 		}
-		
+
 		return false
 	}
 }
@@ -1046,4 +1050,3 @@ func analyzeFieldUsage(expression string) fieldUsage {
 		runningApps:  strings.Contains(expression, "runningApps"),
 	}
 }
-
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
index 5020114e..5d1e7091 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -7,13 +7,14 @@ import (
 	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
 )
 
 // Benchmark performance of the CEL filter compared to the original filter
 func BenchmarkFilterPerformance(b *testing.B) {
 	// Create test data
-	const numGPUs = 10000
+	const numGPUs = 1000000
 	gpus := make([]*tfv1.GPU, numGPUs)
 	for i := 0; i < numGPUs; i++ {
 		gpuModel := "A100"
@@ -24,9 +25,9 @@ func BenchmarkFilterPerformance(b *testing.B) {
 			gpuModel = "H100"
 		}
 
-		phase := "Ready"
+		phase := constants.PhaseRunning
 		if i%10 == 0 {
-			phase = "Pending"
+			phase = constants.PhasePending
 		}
 
 		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", gpuModel, phase, 150.0, 40.0)
@@ -44,7 +45,7 @@ func BenchmarkFilterPerformance(b *testing.B) {
 	b.Run("OriginalFilters", func(b *testing.B) {
 		// Import the original filter package
 		registry := filter.NewFilterRegistry().With(
-			filter.NewPhaseFilter("Ready"),
+			filter.NewPhaseFilter(constants.PhaseRunning),
 			filter.NewGPUModelFilter("A100"),
 		)
 
@@ -149,7 +150,7 @@ func BenchmarkCachePerformance(b *testing.B) {
 		b.Fatal(err)
 	}
 
-	expression := "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0"
+	expression := "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0"
 
 	b.Run("CacheHit", func(b *testing.B) {
 		// Pre-warm cache
@@ -170,7 +171,7 @@ func BenchmarkCachePerformance(b *testing.B) {
 	b.Run("CacheMiss", func(b *testing.B) {
 		expressions := make([]string, b.N)
 		for i := 0; i < b.N; i++ {
-			expressions[i] = fmt.Sprintf("gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50)
+			expressions[i] = fmt.Sprintf("gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50)
 		}
 
 		b.ResetTimer()
@@ -188,7 +189,7 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 	const numGPUs = 100
 	gpus := make([]*tfv1.GPU, numGPUs)
 	for i := 0; i < numGPUs; i++ {
-		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", "A100", "Ready", 150.0, 40.0)
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 		gpu.Labels["environment"] = "production"
 		gpu.Labels["tier"] = "high-performance"
 		gpu.Annotations["priority"] = "critical"
@@ -204,23 +205,23 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 	}{
 		{
 			name:       "Simple",
-			expression: "gpu.phase == 'Ready'",
+			expression: "gpu.phase == 'Running'",
 		},
 		{
 			name:       "Medium",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100'",
 		},
 		{
 			name:       "Complex",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0",
 		},
 		{
 			name:       "VeryComplex",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'",
 		},
 		{
 			name:       "UltraComplex",
-			expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production' && gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production' && gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'",
 		},
 	}
 
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
index c21e4ee8..ffc903ea 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
@@ -6,6 +6,7 @@ import (
 	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -67,9 +68,9 @@ func TestCELFilter_NormalCases(t *testing.T) {
 			name:    "filter by GPU model",
 			request: createTestAllocRequest("default", "test-workload", "A100", ""),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
-				createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0),
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0),
+				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
 			},
 			expectedCount: 2,
 			description:   "Should filter GPUs matching the specified model A100",
@@ -78,57 +79,57 @@ func TestCELFilter_NormalCases(t *testing.T) {
 			name:    "filter by GPU phase only",
 			request: createTestAllocRequest("default", "test-workload", "", ""),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "A100", "Pending", 150.0, 40.0),
-				createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-4", "default", "A100", "Failed", 150.0, 40.0),
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "A100", constants.PhasePending, 150.0, 40.0),
+				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-4", "default", "A100", constants.PhaseFailed, 150.0, 40.0),
 			},
-			expectedCount: 2,
-			description:   "Should only return GPUs in Ready phase",
+			expectedCount: 3,
+			description:   "Should return GPUs in Running and Pending phases",
 		},
 		{
 			name:    "custom CEL expression - filter by available TFLOPS",
 			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.tflops > 120.0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
-				createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0),
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0),
+				createTestGPU("gpu-3", "default", "H100", constants.PhaseRunning, 200.0, 80.0),
 			},
 			expectedCount: 2,
-			description:   "Should filter GPUs with TFLOPS > 120 and Ready phase",
+			description:   "Should filter GPUs with TFLOPS > 120 and Running/Pending phase",
 		},
 		{
 			name:    "custom CEL expression - filter by available VRAM",
 			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.vram > 35000000000"), // > 35GB in bytes
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), // 40GB
-				createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), // 32GB
-				createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0), // 80GB
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // 40GB
+				createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0), // 32GB
+				createTestGPU("gpu-3", "default", "H100", constants.PhaseRunning, 200.0, 80.0), // 80GB
 			},
 			expectedCount: 2,
-			description:   "Should filter GPUs with VRAM > 35GB and Ready phase",
+			description:   "Should filter GPUs with VRAM > 35GB and Running/Pending phase",
 		},
 		{
 			name:    "combined model and custom CEL expression",
 			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0),
-				createTestGPU("gpu-3", "default", "V100", "Ready", 160.0, 32.0),
-				createTestGPU("gpu-4", "default", "A100", "Ready", 180.0, 40.0),
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 120.0, 40.0),
+				createTestGPU("gpu-3", "default", "V100", constants.PhaseRunning, 160.0, 32.0),
+				createTestGPU("gpu-4", "default", "A100", constants.PhaseRunning, 180.0, 40.0),
 			},
 			expectedCount: 2,
-			description:   "Should filter A100 GPUs with TFLOPS >= 150 and Ready phase",
+			description:   "Should filter A100 GPUs with TFLOPS >= 150 and Running/Pending phase",
 		},
 		{
 			name:    "filter by labels",
 			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['environment'] == 'production'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu1.Labels["environment"] = "production"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Labels["environment"] = "development"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu3.Labels["environment"] = "production"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
@@ -139,11 +140,11 @@ func TestCELFilter_NormalCases(t *testing.T) {
 			name:    "filter by annotations",
 			request: createTestAllocRequest("default", "test-workload", "", "gpu.annotations['priority'] == 'critical'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu1.Annotations["priority"] = "critical"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Annotations["priority"] = "low"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu3.Annotations["priority"] = "critical"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
@@ -154,13 +155,13 @@ func TestCELFilter_NormalCases(t *testing.T) {
 			name:    "combined labels and annotations filter",
 			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu1.Labels["tier"] = "high-performance"
 				gpu1.Annotations["priority"] = "critical"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0)
+				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Labels["tier"] = "standard"
 				gpu2.Annotations["priority"] = "critical"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu3.Labels["tier"] = "high-performance"
 				gpu3.Annotations["priority"] = "low"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
@@ -179,16 +180,16 @@ func TestCELFilter_NormalCases(t *testing.T) {
 			name:    "complex combined expression with model, resources, and metadata",
 			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 180.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 180.0, 40.0)
 				gpu1.Labels["environment"] = "production"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0)
+				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 120.0, 40.0)
 				gpu2.Labels["environment"] = "production"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 200.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 200.0, 40.0)
 				gpu3.Labels["environment"] = "development"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
 			expectedCount: 1,
-			description:   "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Ready phase",
+			description:   "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Running/Pending phase",
 		},
 	}
 
@@ -224,11 +225,11 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) {
 	t.Run("CEL expressions edge cases", func(t *testing.T) {
 		// Test GPUs for execution
 		testGPUs := []*tfv1.GPU{
-			createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0),
-			createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0),
+			createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+			createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0),
 		}
 		// Add GPU with nil resources
-		gpuWithNilResources := createTestGPU("gpu-nil", "default", "A100", "Ready", 0, 0)
+		gpuWithNilResources := createTestGPU("gpu-nil", "default", "A100", constants.PhaseRunning, 0, 0)
 		gpuWithNilResources.Status.Available = nil
 		testGPUs = append(testGPUs, gpuWithNilResources)
 

From cd1d7ddb8f5b81067526a89b95e569c99d1ab01b Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 14 Sep 2025 06:39:22 -0700
Subject: [PATCH 20/34] disable predicate fast path

---
 .../filter/cel_filter/cel_filter.go           | 705 ++++++------------
 .../cel_filter/cel_filter_benchmark_test.go   |   2 +-
 .../filter/cel_filter/cel_filter_test.go      | 116 +++
 3 files changed, 343 insertions(+), 480 deletions(-)

diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index 1c3e01c9..80622f23 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -6,14 +6,16 @@ import (
 	"reflect"
 	"regexp"
 	"runtime"
-	"strconv"
 	"strings"
+	"sync"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/google/cel-go/cel"
 	"github.com/google/cel-go/common/types"
 	"github.com/google/cel-go/common/types/ref"
+	"github.com/google/cel-go/common/types/traits"
 	"github.com/google/cel-go/interpreter"
 	"github.com/samber/lo"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -27,6 +29,30 @@ const (
 	DefaultWorkerCount = 4
 )
 
+// Global string pool for GPU Phase values to reduce allocations
+var (
+	gpuPhaseStringPool = sync.OnceValue(func() map[string]types.String {
+		return map[string]types.String{
+			constants.PhaseUnknown:    types.String(constants.PhaseUnknown),
+			constants.PhasePending:    types.String(constants.PhasePending),
+			constants.PhaseUpdating:   types.String(constants.PhaseUpdating),
+			constants.PhaseRunning:    types.String(constants.PhaseRunning),
+			constants.PhaseMigrating:  types.String(constants.PhaseMigrating),
+			constants.PhaseDestroying: types.String(constants.PhaseDestroying),
+		}
+	})
+)
+
+// getPooledPhaseString returns a pooled CEL String for the given phase
+func getPooledPhaseString(phase string) ref.Val {
+	pool := gpuPhaseStringPool()
+	if pooled, exists := pool[phase]; exists {
+		return pooled
+	}
+	// Return error for unexpected phase values
+	return types.NewErr("unknown GPU phase: %s", phase)
+}
+
 // fieldUsage tracks which GPU fields are used in the expression
 type fieldUsage struct {
 	labels       bool
@@ -45,98 +71,33 @@ type ExpressionPattern struct {
 	Generator func(matches []string) FastPathPredicate
 }
 
-// Common fast path patterns - order matters (most specific first)
-var fastPathPatterns = []ExpressionPattern{
-	// Complex AND pattern: gpu.available.tflops >= NUMBER && gpu.labels['KEY'] == 'VALUE'
-	{
-		Pattern: regexp.MustCompile(`^gpu\.available\.tflops\s*>=\s*([0-9]+(?:\.[0-9]+)?)\s*&&\s*gpu\.labels\['([^']+)'\]\s*==\s*'([^']+)'$`),
-		Generator: func(matches []string) FastPathPredicate {
-			threshold, _ := strconv.ParseFloat(matches[1], 64)
-			labelKey, labelValue := matches[2], matches[3]
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Status.Available != nil &&
-					gpu.Status.Available.Tflops.AsApproximateFloat64() >= threshold &&
-					gpu.Labels != nil && gpu.Labels[labelKey] == labelValue
-			}
-		},
-	},
-	// gpu.available.tflops >= NUMBER
-	{
-		Pattern: regexp.MustCompile(`^gpu\.available\.tflops\s*>=\s*([0-9]+(?:\.[0-9]+)?)$`),
-		Generator: func(matches []string) FastPathPredicate {
-			threshold, _ := strconv.ParseFloat(matches[1], 64)
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Status.Available != nil && gpu.Status.Available.Tflops.AsApproximateFloat64() >= threshold
-			}
-		},
-	},
-	// gpu.available.tflops > NUMBER
-	{
-		Pattern: regexp.MustCompile(`^gpu\.available\.tflops\s*>\s*([0-9]+(?:\.[0-9]+)?)$`),
-		Generator: func(matches []string) FastPathPredicate {
-			threshold, _ := strconv.ParseFloat(matches[1], 64)
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Status.Available != nil && gpu.Status.Available.Tflops.AsApproximateFloat64() > threshold
-			}
-		},
-	},
-	// gpu.available.vram >= NUMBER
-	{
-		Pattern: regexp.MustCompile(`^gpu\.available\.vram\s*>=\s*([0-9]+(?:\.[0-9]+)?)$`),
-		Generator: func(matches []string) FastPathPredicate {
-			threshold, _ := strconv.ParseFloat(matches[1], 64)
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Status.Available != nil && gpu.Status.Available.Vram.AsApproximateFloat64() >= threshold
-			}
-		},
-	},
-	// gpu.available.vram > NUMBER
-	{
-		Pattern: regexp.MustCompile(`^gpu\.available\.vram\s*>\s*([0-9]+(?:\.[0-9]+)?)$`),
-		Generator: func(matches []string) FastPathPredicate {
-			threshold, _ := strconv.ParseFloat(matches[1], 64)
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Status.Available != nil && gpu.Status.Available.Vram.AsApproximateFloat64() > threshold
-			}
-		},
-	},
-	// gpu.labels['KEY'] == 'VALUE'
-	{
-		Pattern: regexp.MustCompile(`^gpu\.labels\['([^']+)'\]\s*==\s*'([^']+)'$`),
-		Generator: func(matches []string) FastPathPredicate {
-			key, value := matches[1], matches[2]
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Labels != nil && gpu.Labels[key] == value
-			}
-		},
-	},
-	// gpu.annotations['KEY'] == 'VALUE'
-	{
-		Pattern: regexp.MustCompile(`^gpu\.annotations\['([^']+)'\]\s*==\s*'([^']+)'$`),
-		Generator: func(matches []string) FastPathPredicate {
-			key, value := matches[1], matches[2]
-			return func(gpu *tfv1.GPU) bool {
-				return gpu.Annotations != nil && gpu.Annotations[key] == value
-			}
-		},
-	},
-}
-
 // ZeroAllocActivation provides zero-allocation variable resolution for CEL
 // This eliminates the need to create map[string]interface{} for each GPU
 type ZeroAllocActivation struct {
-	gpu          *tfv1.GPU
-	workerPodKey tfv1.NameNamespace
+	gpuVal       gpuVal
+	workerPodKey workerPodKeyVal
 	usage        fieldUsage
 }
 
+func (a *ZeroAllocActivation) init(g *tfv1.GPU, k tfv1.NameNamespace, usage fieldUsage) {
+	a.gpuVal.GPU = g
+	a.gpuVal.labels = nil
+	a.gpuVal.annotations = nil
+	a.gpuVal.nodeSelector = nil
+	a.gpuVal.available = nil
+	a.gpuVal.runningApps = nil
+	a.workerPodKey.name = k.Name
+	a.workerPodKey.namespace = k.Namespace
+	a.usage = usage
+}
+
 // ResolveName implements interpreter.Activation interface
 func (a *ZeroAllocActivation) ResolveName(name string) (interface{}, bool) {
 	switch name {
 	case CELVarGPU:
-		return a.createGPUObject(), true
+		return &a.gpuVal, true
 	case CELVarWorkerPodKey:
-		return a.createWorkerPodKeyObject(), true
+		return &a.workerPodKey, true
 	default:
 		return nil, false
 	}
@@ -147,20 +108,165 @@ func (a *ZeroAllocActivation) Parent() interpreter.Activation {
 	return nil
 }
 
-// createGPUObject creates GPU object on-demand without maps
-func (a *ZeroAllocActivation) createGPUObject() interface{} {
-	// Return GPU value with lazy caching
-	return &gpuVal{GPU: a.gpu}
+type workerPodKeyVal struct {
+	name      string
+	namespace string
+}
+
+func (w *workerPodKeyVal) Type() ref.Type { return types.MapType }
+func (w *workerPodKeyVal) Value() interface{} {
+	return map[string]string{"name": w.name, "namespace": w.namespace}
+}
+func (w *workerPodKeyVal) Equal(other ref.Val) ref.Val { return types.False }
+func (w *workerPodKeyVal) ConvertToNative(t reflect.Type) (interface{}, error) {
+	return map[string]string{"name": w.name, "namespace": w.namespace}, nil
+}
+func (w *workerPodKeyVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (w *workerPodKeyVal) Get(index ref.Val) ref.Val {
+	key, ok := index.Value().(string)
+	if !ok {
+		return types.NewErr("index must be string")
+	}
+	switch key {
+	case "name":
+		return types.String(w.name)
+	case "namespace":
+		return types.String(w.namespace)
+	default:
+		return types.String("")
+	}
+}
+func (w *workerPodKeyVal) HasField(field string) bool {
+	return field == "name" || field == "namespace"
+}
+
+type appVal struct {
+	name      string
+	namespace string
+	count     int64
 }
 
-// createWorkerPodKeyObject creates worker pod key object
-func (a *ZeroAllocActivation) createWorkerPodKeyObject() interface{} {
+func (a *appVal) Type() ref.Type              { return types.MapType }
+func (a *appVal) Value() interface{}          { return nil }
+func (a *appVal) Equal(other ref.Val) ref.Val { return types.False }
+func (a *appVal) ConvertToNative(t reflect.Type) (interface{}, error) {
 	return map[string]interface{}{
-		"name":      a.workerPodKey.Name,
-		"namespace": a.workerPodKey.Namespace,
+		"name":      a.name,
+		"namespace": a.namespace,
+		"count":     a.count,
+	}, nil
+}
+func (a *appVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (a *appVal) Get(index ref.Val) ref.Val {
+	key, _ := index.Value().(string)
+	switch key {
+	case "name":
+		return types.String(a.name)
+	case "namespace":
+		return types.String(a.namespace)
+	case "count":
+		return types.Int(a.count)
+	default:
+		return types.String("")
+	}
+}
+func (a *appVal) HasField(field string) bool {
+	return field == "name" || field == "namespace" || field == "count"
+}
+
+type runningAppsVal struct {
+	apps []tfv1.RunningAppDetail
+}
+
+func (r *runningAppsVal) Type() ref.Type              { return types.ListType }
+func (r *runningAppsVal) Value() interface{}          { return r.apps }
+func (r *runningAppsVal) Equal(other ref.Val) ref.Val { return types.False }
+func (r *runningAppsVal) ConvertToNative(t reflect.Type) (interface{}, error) {
+	if t.Kind() == reflect.Slice {
+		out := make([]map[string]interface{}, len(r.apps))
+		for i, a := range r.apps {
+			out[i] = map[string]interface{}{
+				"name":      a.Name,
+				"namespace": a.Namespace,
+				"count":     a.Count,
+			}
+		}
+		return out, nil
 	}
+	return r.apps, nil
+}
+func (r *runningAppsVal) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (r *runningAppsVal) Get(index ref.Val) ref.Val {
+	i, ok := index.Value().(int)
+	if !ok {
+		if i64, ok2 := index.Value().(int64); ok2 {
+			i = int(i64)
+			ok = true
+		}
+	}
+	if !ok || i < 0 || i >= len(r.apps) {
+		return types.NewErr("index out of range")
+	}
+	app := r.apps[i]
+	return &appVal{name: app.Name, namespace: app.Namespace, count: int64(app.Count)}
+}
+
+func (r *runningAppsVal) Size() ref.Val { return types.Int(len(r.apps)) }
+
+func (r *runningAppsVal) Contains(elem ref.Val) ref.Val {
+	av, ok := elem.(*appVal)
+	if !ok {
+		return types.False
+	}
+	for _, a := range r.apps {
+		if a.Name == av.name && a.Namespace == av.namespace && int64(a.Count) == av.count {
+			return types.True
+		}
+	}
+	return types.False
+}
+func (r *runningAppsVal) Iterator() traits.Iterator {
+	return &runningAppsIterator{apps: r.apps}
+}
+func (r *runningAppsVal) Add(elem ref.Val) ref.Val {
+	return types.NewErr("runningApps list is read-only")
 }
 
+type runningAppsIterator struct {
+	apps []tfv1.RunningAppDetail
+	i    int
+}
+
+func (it *runningAppsIterator) Type() ref.Type              { return types.IteratorType }
+func (it *runningAppsIterator) Value() interface{}          { return nil }
+func (it *runningAppsIterator) Equal(other ref.Val) ref.Val { return types.False }
+func (it *runningAppsIterator) ConvertToNative(t reflect.Type) (interface{}, error) {
+	return nil, fmt.Errorf("iterator cannot convert to native")
+}
+func (it *runningAppsIterator) ConvertToType(typeValue ref.Type) ref.Val {
+	return types.NewErr("type conversion not supported")
+}
+func (it *runningAppsIterator) HasNext() ref.Val {
+	return types.Bool(it.i < len(it.apps))
+}
+func (it *runningAppsIterator) Next() ref.Val {
+	if it.i >= len(it.apps) {
+		return types.NewErr("iterator past end")
+	}
+	a := it.apps[it.i]
+	it.i++
+	return &appVal{name: a.Name, namespace: a.Namespace, count: int64(a.Count)}
+}
+
+var _ traits.Lister = (*runningAppsVal)(nil)
+var _ traits.Iterator = (*runningAppsIterator)(nil)
+
 // gpuVal implements CEL value interface for GPU objects to eliminate map allocations
 type gpuVal struct {
 	*tfv1.GPU
@@ -234,7 +340,7 @@ func (v *gpuVal) Get(index ref.Val) ref.Val {
 	case GPUFieldUUID:
 		return types.String(v.GPU.Status.UUID)
 	case GPUFieldPhase:
-		return types.String(string(v.GPU.Status.Phase))
+		return getPooledPhaseString(string(v.GPU.Status.Phase))
 	case GPUFieldUsedBy:
 		return types.String(string(v.GPU.Status.UsedBy))
 	case GPUFieldMessage:
@@ -266,14 +372,11 @@ func (v *gpuVal) Get(index ref.Val) ref.Val {
 	case GPUFieldRunningApps:
 		// For now, keep simple implementation - can optimize later if needed
 		if v.runningApps == nil {
-			apps := make([]interface{}, len(v.GPU.Status.RunningApps))
+			apps := make([]tfv1.RunningAppDetail, len(v.GPU.Status.RunningApps))
 			for i, app := range v.GPU.Status.RunningApps {
-				apps[i] = map[string]interface{}{
-					"name":      app.Name,
-					"namespace": app.Namespace,
-				}
+				apps[i] = *app
 			}
-			v.runningApps = types.NewDynamicList(types.DefaultTypeAdapter, apps)
+			v.runningApps = &runningAppsVal{apps: apps}
 		}
 		return v.runningApps
 	default:
@@ -323,7 +426,7 @@ func (v *availableVal) Get(index ref.Val) ref.Val {
 		case ResourceFieldTFlops:
 			return types.Double(0.0)
 		case ResourceFieldVRAM:
-			return types.Int(0)
+			return types.Double(0.0)
 		default:
 			return types.NewErr("no such field: %s", field)
 		}
@@ -333,7 +436,7 @@ func (v *availableVal) Get(index ref.Val) ref.Val {
 	case ResourceFieldTFlops:
 		return types.Double(v.available.Tflops.AsApproximateFloat64())
 	case ResourceFieldVRAM:
-		return types.Int(v.available.Vram.Value())
+		return types.Double(float64(v.available.Vram.Value()))
 	default:
 		return types.NewErr("no such field: %s", field)
 	}
@@ -404,8 +507,6 @@ type CELFilter struct {
 	usage fieldUsage
 	// Display expression for logging (read-only)
 	displayExpression string
-	// Fast path predicate for common patterns
-	fastPathPredicate FastPathPredicate
 }
 
 // NewAllocRequestCELFilter creates a new CEL filter from allocation request
@@ -429,9 +530,6 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e
 	// Analyze field usage in user expression only
 	usage := analyzeFieldUsage(userExpression)
 
-	// Try to compile fast path predicate
-	fastPath := compileFastPath(userExpression)
-
 	// Handle nil request case
 	name := "AllocRequest-unknown"
 	if req != nil {
@@ -446,7 +544,6 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e
 		userExpression:    userExpression,
 		usage:             usage,
 		displayExpression: displayExpression,
-		fastPathPredicate: fastPath,
 	}, nil
 }
 
@@ -507,44 +604,24 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 	}
 
 	// Use fast path if available, otherwise fall back to CEL
-	if f.fastPathPredicate != nil {
-		// Fast path: direct Go function evaluation with optional parallelization
-		if len(earlyFilteredGPUs) >= ParallelThreshold {
-			filteredGPUs = f.filterParallel(earlyFilteredGPUs)
-		} else {
-			for _, gpu := range earlyFilteredGPUs {
-				if f.fastPathPredicate(gpu) {
-					filteredGPUs = append(filteredGPUs, gpu)
-				}
-			}
-		}
 
-		log.V(1).Info("CEL filter applied (fast path)",
-			"filter", f.name,
-			"displayExpression", f.displayExpression,
-			"userExpression", f.userExpression,
-			"inputGPUs", len(gpus),
-			"earlyFilteredGPUs", len(earlyFilteredGPUs),
-			"outputGPUs", len(filteredGPUs))
+	// Fallback to CEL evaluation for complex expressions
+	if len(earlyFilteredGPUs) >= ParallelThreshold {
+		// Use parallel evaluation for large GPU sets
+		filteredGPUs = f.filterFallbackParallel(ctx, program, earlyFilteredGPUs, workerPodKey)
 	} else {
-		// Fallback to CEL evaluation for complex expressions
-		if len(earlyFilteredGPUs) >= ParallelThreshold {
-			// Use parallel evaluation for large GPU sets
-			filteredGPUs = f.filterFallbackParallel(ctx, program, earlyFilteredGPUs, workerPodKey)
-		} else {
-			// Sequential evaluation for smaller sets
-			filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey)
-		}
-
-		log.V(1).Info("CEL filter applied (CEL evaluation)",
-			"filter", f.name,
-			"displayExpression", f.displayExpression,
-			"userExpression", f.userExpression,
-			"inputGPUs", len(gpus),
-			"earlyFilteredGPUs", len(earlyFilteredGPUs),
-			"outputGPUs", len(filteredGPUs))
+		// Sequential evaluation for smaller sets
+		filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey)
 	}
 
+	log.V(1).Info("CEL filter applied (CEL evaluation)",
+		"filter", f.name,
+		"displayExpression", f.displayExpression,
+		"userExpression", f.userExpression,
+		"inputGPUs", len(gpus),
+		"earlyFilteredGPUs", len(earlyFilteredGPUs),
+		"outputGPUs", len(filteredGPUs))
+
 	return filteredGPUs, nil
 }
 
@@ -591,67 +668,11 @@ func createCELEnvironment() (*cel.Env, error) {
 	)
 }
 
-// filterParallel processes GPUs in parallel for large datasets
-func (f *CELFilter) filterParallel(gpus []*tfv1.GPU) []*tfv1.GPU {
-	numGPUs := len(gpus)
-	numWorkers := runtime.NumCPU()
-	if numWorkers > DefaultWorkerCount {
-		numWorkers = DefaultWorkerCount
-	}
-
-	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
-	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
-
-	// Create workers
-	for i := 0; i < numWorkers; i++ {
-		start := i * chunkSize
-		end := start + chunkSize
-		if end > numGPUs {
-			end = numGPUs
-		}
-
-		if start >= end {
-			// No work for this worker
-			ch := make(chan []*tfv1.GPU, 1)
-			ch <- []*tfv1.GPU{}
-			close(ch)
-			resultChannels[i] = ch
-			continue
-		}
-
-		chunk := gpus[start:end]
-		resultCh := make(chan []*tfv1.GPU, 1)
-		resultChannels[i] = resultCh
-
-		// Start worker goroutine
-		go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) {
-			defer close(resultCh)
-
-			filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate
-			for _, gpu := range gpuChunk {
-				if f.fastPathPredicate(gpu) {
-					filtered = append(filtered, gpu)
-				}
-			}
-			resultCh <- filtered
-		}(chunk, resultCh)
-	}
-
-	// Collect results
-	var totalFiltered []*tfv1.GPU
-	for _, ch := range resultChannels {
-		chunkResults := <-ch
-		totalFiltered = append(totalFiltered, chunkResults...)
-	}
-
-	return totalFiltered
-}
-
 // filterFallbackSequential performs sequential CEL evaluation for smaller GPU sets
 func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
 	filteredGPUs := make([]*tfv1.GPU, 0, len(gpus)/2)
 	log := log.FromContext(ctx)
-
+	var activation ZeroAllocActivation
 	for i, gpu := range gpus {
 		// Periodic context check every 64 GPUs for very large sets
 		if i&63 == 0 {
@@ -664,14 +685,10 @@ func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Pr
 		}
 
 		// Use zero-allocation activation instead of maps
-		activation := &ZeroAllocActivation{
-			gpu:          gpu,
-			workerPodKey: workerPodKey,
-			usage:        f.usage,
-		}
+		activation.init(gpu, workerPodKey, f.usage)
 
 		// Direct synchronous evaluation with custom activation
-		result, _, evalErr := program.Eval(activation)
+		result, _, evalErr := program.Eval(&activation)
 
 		if evalErr != nil {
 			log.Error(evalErr, "CEL expression evaluation failed",
@@ -683,10 +700,8 @@ func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Pr
 		}
 
 		// Convert result to boolean
-		if boolResult, ok := result.(types.Bool); ok {
-			if bool(boolResult) {
-				filteredGPUs = append(filteredGPUs, gpu)
-			}
+		if boolResult, ok := result.(types.Bool); ok && bool(boolResult) {
+			filteredGPUs = append(filteredGPUs, gpu)
 		} else {
 			log.Error(nil, "CEL expression did not return boolean",
 				"expression", f.userExpression,
@@ -710,7 +725,7 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 
 	chunkSize := (numGPUs + numWorkers - 1) / numWorkers
 	resultChannels := make([]<-chan []*tfv1.GPU, numWorkers)
-
+	var activation ZeroAllocActivation
 	// Create workers
 	for i := 0; i < numWorkers; i++ {
 		start := i * chunkSize
@@ -740,24 +755,18 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 
 			for _, gpu := range gpuChunk {
 				// Use zero-allocation activation
-				activation := &ZeroAllocActivation{
-					gpu:          gpu,
-					workerPodKey: workerPodKey,
-					usage:        f.usage,
-				}
+				activation.init(gpu, workerPodKey, f.usage)
 
 				// Direct synchronous evaluation
-				result, _, evalErr := program.Eval(activation)
+				result, _, evalErr := program.Eval(&activation)
 				if evalErr != nil {
 					// On error, exclude the GPU (fail-safe)
 					continue
 				}
 
 				// Convert result to boolean
-				if boolResult, ok := result.(types.Bool); ok {
-					if bool(boolResult) {
-						filtered = append(filtered, gpu)
-					}
+				if boolResult, ok := result.(types.Bool); ok && bool(boolResult) {
+					filtered = append(filtered, gpu)
 				}
 				// On non-boolean result, exclude the GPU (fail-safe)
 			}
@@ -775,273 +784,11 @@ func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Prog
 	return totalFiltered
 }
 
-// compileFastPath tries to compile expression into a fast path predicate
-// Uses AST analysis for better pattern matching than regex
-func compileFastPath(expression string) FastPathPredicate {
-	if expression == "" {
-		return nil
-	}
-
-	// Try AST-based compilation first (more flexible)
-	if pred := compileASTFastPath(expression); pred != nil {
-		return pred
-	}
-
-	// Fall back to regex patterns for backward compatibility
-	for _, pattern := range fastPathPatterns {
-		matches := pattern.Pattern.FindStringSubmatch(expression)
-		if matches != nil {
-			return pattern.Generator(matches)
-		}
-	}
-
-	return nil
-}
-
-// compileASTFastPath analyzes AST to generate fast path predicates
-func compileASTFastPath(expression string) FastPathPredicate {
-	// Parse expression to AST
-	env, err := createCELEnvironment()
-	if err != nil {
-		return nil
-	}
-
-	_, issues := env.Parse(expression)
-	if issues != nil && issues.Err() != nil {
-		return nil
-	}
-
-	// Extract conditions from expression string (simplified approach)
-	conditions := extractConditionsFromString(expression)
-	if len(conditions) == 0 {
-		return nil
-	}
-
-	// Generate fast path predicate
-	return func(gpu *tfv1.GPU) bool {
-		for _, condition := range conditions {
-			if !evaluateCondition(gpu, condition) {
-				return false // Short-circuit on first failure (AND logic)
-			}
-		}
-		return true
-	}
-}
-
-// astCondition represents a simple condition extracted from AST
-type astCondition struct {
-	field    string      // e.g., "gpu.available.tflops", "gpu.labels['env']"
-	operator string      // "==", "!=", ">=", ">"
-	value    interface{} // expected value
-}
-
-// extractConditionsFromString uses enhanced pattern matching to extract conditions
-// This bridges the gap between regex and full AST until full AST implementation
-func extractConditionsFromString(exprStr string) []astCondition {
-	var conditions []astCondition
-
-	// Split by && to handle multiple conditions
-	parts := strings.Split(exprStr, " && ")
-
-	for _, part := range parts {
-		part = strings.TrimSpace(part)
-
-		// Handle gpu.available.tflops >= X
-		if strings.Contains(part, "gpu.available.tflops") && strings.Contains(part, ">=") {
-			if condition := parseNumericCondition(part, "gpu.available.tflops", ">="); condition != nil {
-				conditions = append(conditions, *condition)
-			}
-		} else if strings.Contains(part, "gpu.available.tflops") && strings.Contains(part, ">") {
-			if condition := parseNumericCondition(part, "gpu.available.tflops", ">"); condition != nil {
-				conditions = append(conditions, *condition)
-			}
-		}
-
-		// Handle gpu.available.vram >= X
-		if strings.Contains(part, "gpu.available.vram") && strings.Contains(part, ">=") {
-			if condition := parseNumericCondition(part, "gpu.available.vram", ">="); condition != nil {
-				conditions = append(conditions, *condition)
-			}
-		}
-
-		// Handle gpu.labels['key'] == 'value'
-		if strings.Contains(part, "gpu.labels[") && strings.Contains(part, "==") {
-			if condition := parseLabelCondition(part, "gpu.labels"); condition != nil {
-				conditions = append(conditions, *condition)
-			}
-		}
-
-		// Handle gpu.annotations['key'] == 'value'
-		if strings.Contains(part, "gpu.annotations[") && strings.Contains(part, "==") {
-			if condition := parseLabelCondition(part, "gpu.annotations"); condition != nil {
-				conditions = append(conditions, *condition)
-			}
-		}
-
-		// Handle gpu.gpuModel == 'value'
-		if strings.Contains(part, "gpu.gpuModel") && strings.Contains(part, "==") {
-			if condition := parseStringCondition(part, "gpu.gpuModel", "=="); condition != nil {
-				conditions = append(conditions, *condition)
-			}
-		}
-	}
-
-	return conditions
-}
-
-// parseNumericCondition parses numeric comparison conditions
-func parseNumericCondition(expr, field, operator string) *astCondition {
-	parts := strings.Split(expr, operator)
-	if len(parts) != 2 {
-		return nil
-	}
-
-	valueStr := strings.TrimSpace(parts[1])
-	value, err := strconv.ParseFloat(valueStr, 64)
-	if err != nil {
-		return nil
-	}
-
-	return &astCondition{
-		field:    field,
-		operator: operator,
-		value:    value,
-	}
-}
-
-// parseLabelCondition parses label/annotation map access conditions
-func parseLabelCondition(expr, fieldPrefix string) *astCondition {
-	// Extract key from gpu.labels['key'] == 'value' format
-	keyStart := strings.Index(expr, "['") + 2
-	keyEnd := strings.Index(expr[keyStart:], "']")
-	if keyEnd == -1 {
-		return nil
-	}
-	key := expr[keyStart : keyStart+keyEnd]
-
-	// Extract value
-	valueStart := strings.LastIndex(expr, "'")
-	if valueStart == -1 {
-		return nil
-	}
-	// Find the quote before the last quote
-	prevQuotePos := strings.LastIndex(expr[:valueStart], "'")
-	if prevQuotePos == -1 {
-		return nil
-	}
-	value := expr[prevQuotePos+1 : valueStart]
-
-	return &astCondition{
-		field:    fieldPrefix + "['" + key + "']",
-		operator: "==",
-		value:    value,
-	}
-}
-
-// parseStringCondition parses simple string equality conditions
-func parseStringCondition(expr, field, operator string) *astCondition {
-	parts := strings.Split(expr, operator)
-	if len(parts) != 2 {
-		return nil
-	}
-
-	valueStr := strings.TrimSpace(parts[1])
-	// Remove quotes
-	if strings.HasPrefix(valueStr, "'") && strings.HasSuffix(valueStr, "'") {
-		valueStr = valueStr[1 : len(valueStr)-1]
-	}
-
-	return &astCondition{
-		field:    field,
-		operator: operator,
-		value:    valueStr,
-	}
-}
-
-// evaluateCondition evaluates a single condition against a GPU
-func evaluateCondition(gpu *tfv1.GPU, condition astCondition) bool {
-	switch condition.field {
-	case "gpu.available.tflops":
-		if gpu.Status.Available == nil {
-			return false
-		}
-		actualValue := gpu.Status.Available.Tflops.AsApproximateFloat64()
-		expectedValue, ok := condition.value.(float64)
-		if !ok {
-			return false
-		}
-
-		switch condition.operator {
-		case ">=":
-			return actualValue >= expectedValue
-		case ">":
-			return actualValue > expectedValue
-		default:
-			return false
-		}
-
-	case "gpu.available.vram":
-		if gpu.Status.Available == nil {
-			return false
-		}
-		actualValue := float64(gpu.Status.Available.Vram.Value())
-		expectedValue, ok := condition.value.(float64)
-		if !ok {
-			return false
-		}
-
-		switch condition.operator {
-		case ">=":
-			return actualValue >= expectedValue
-		case ">":
-			return actualValue > expectedValue
-		default:
-			return false
-		}
-
-	case "gpu.gpuModel":
-		expectedValue, ok := condition.value.(string)
-		if !ok {
-			return false
-		}
-		return gpu.Status.GPUModel == expectedValue
-
-	default:
-		// Handle label/annotation access
-		if strings.HasPrefix(condition.field, "gpu.labels['") {
-			key := strings.TrimSuffix(strings.TrimPrefix(condition.field, "gpu.labels['"), "']")
-			expectedValue, ok := condition.value.(string)
-			if !ok {
-				return false
-			}
-			if gpu.Labels == nil {
-				return expectedValue == ""
-			}
-			return gpu.Labels[key] == expectedValue
-		}
-
-		if strings.HasPrefix(condition.field, "gpu.annotations['") {
-			key := strings.TrimSuffix(strings.TrimPrefix(condition.field, "gpu.annotations['"), "']")
-			expectedValue, ok := condition.value.(string)
-			if !ok {
-				return false
-			}
-			if gpu.Annotations == nil {
-				return expectedValue == ""
-			}
-			return gpu.Annotations[key] == expectedValue
-		}
-
-		return false
-	}
-}
-
 // analyzeFieldUsage performs simple heuristic analysis of which fields are used in the expression
 func analyzeFieldUsage(expression string) fieldUsage {
 	if expression == "" {
 		return fieldUsage{}
 	}
-
 	return fieldUsage{
 		labels:       strings.Contains(expression, "labels"),
 		annotations:  strings.Contains(expression, "annotations"),
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
index 5d1e7091..26b825fd 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -256,7 +256,7 @@ func printPerformanceComparison(b *testing.B) {
 === GPU Filter Performance Comparison ===
 
 Test Environment:
-- Number of GPUs: 10000
+- Number of GPUs: 1000000
 - GPU Models: A100 (33%%), V100 (33%%), H100 (33%%)
 - GPU Phases: Ready (90%%), Pending (10%%)
 
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
index ffc903ea..f882747b 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
@@ -191,6 +191,120 @@ func TestCELFilter_NormalCases(t *testing.T) {
 			expectedCount: 1,
 			description:   "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Running/Pending phase",
 		},
+		{
+			name:    "filter by running apps - no running apps",
+			request: createTestAllocRequest("default", "test-workload", "", "size(gpu.runningApps) == 0"),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "app1", Namespace: "default", Count: 1},
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+			},
+			expectedCount: 2,
+			description:   "Should return GPUs with no running apps",
+		},
+		{
+			name:    "filter by running apps - has specific app",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.runningApps.exists(app, app.name == 'training-job' && app.namespace == 'ml-team')"),
+			gpus: []*tfv1.GPU{
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "training-job", Namespace: "ml-team", Count: 2},
+						{Name: "other-job", Namespace: "default", Count: 1},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "other-job", Namespace: "ml-team", Count: 1},
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+			},
+			expectedCount: 1,
+			description:   "Should return GPUs running specific training job",
+		},
+		{
+			name:    "filter by running apps - count threshold",
+			request: createTestAllocRequest("default", "test-workload", "", "gpu.runningApps.all(app, app.count <= 2) && size(gpu.runningApps) > 0"),
+			gpus: []*tfv1.GPU{
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "default", Count: 1},
+						{Name: "job2", Namespace: "default", Count: 2},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "default", Count: 5}, // Count > 2
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+			},
+			expectedCount: 1,
+			description:   "Should return GPUs where all running apps have count <= 2",
+		},
+		{
+			name:    "filter by running apps - complex condition",
+			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && (size(gpu.runningApps) == 0 || gpu.runningApps.all(app, app.namespace != 'restricted'))"),
+			gpus: []*tfv1.GPU{
+				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "allowed", Count: 1},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "restricted", Count: 1}, // Restricted namespace
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-4", "default", "V100", constants.PhaseRunning, 150.0, 40.0), // Wrong model
+			},
+			expectedCount: 2,
+			description:   "Should return A100 GPUs with sufficient resources and no restricted apps",
+		},
+		{
+			name:    "filter by running apps - namespace isolation",
+			request: createTestAllocRequest("default", "test-workload", "", "!gpu.runningApps.exists(app, app.namespace == 'tenant-a')"),
+			gpus: []*tfv1.GPU{
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "tenant-b", Count: 1},
+						{Name: "job2", Namespace: "shared", Count: 1},
+					}
+					return gpu
+				}(),
+				func() *tfv1.GPU {
+					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
+						{Name: "job1", Namespace: "tenant-a", Count: 1}, // Should be excluded
+						{Name: "job2", Namespace: "tenant-b", Count: 1},
+					}
+					return gpu
+				}(),
+				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+			},
+			expectedCount: 2,
+			description:   "Should return GPUs not running apps from tenant-a",
+		},
 	}
 
 	for _, tt := range tests {
@@ -208,6 +322,8 @@ func TestCELFilter_NormalCases(t *testing.T) {
 
 			// Verify results
 			require.NoError(t, err, "Filter execution should not fail")
+
+			// Debug output for complex condition test
 			assert.Len(t, filteredGPUs, tt.expectedCount, tt.description)
 
 			// Verify filter name

From f700eac4d7457b4240be3fe1dae7bb8aef0f1277 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 14 Sep 2025 06:48:56 -0700
Subject: [PATCH 21/34] fix lint issue

---
 .../filter/cel_filter/cel_filter.go           |  42 ++---
 .../cel_filter/cel_filter_benchmark_test.go   |  37 +++--
 .../filter/cel_filter/cel_filter_test.go      | 156 +++++++++---------
 3 files changed, 124 insertions(+), 111 deletions(-)

diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go
index 80622f23..18a0d176 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go
@@ -130,16 +130,16 @@ func (w *workerPodKeyVal) Get(index ref.Val) ref.Val {
 		return types.NewErr("index must be string")
 	}
 	switch key {
-	case "name":
+	case GPUFieldName:
 		return types.String(w.name)
-	case "namespace":
+	case GPUFieldNamespace:
 		return types.String(w.namespace)
 	default:
 		return types.String("")
 	}
 }
 func (w *workerPodKeyVal) HasField(field string) bool {
-	return field == "name" || field == "namespace"
+	return field == GPUFieldName || field == GPUFieldNamespace
 }
 
 type appVal struct {
@@ -291,7 +291,7 @@ func (v *gpuVal) Value() interface{} {
 // Equal implements ref.Val interface
 func (v *gpuVal) Equal(other ref.Val) ref.Val {
 	if otherGPU, ok := other.(*gpuVal); ok {
-		return types.Bool(v.GPU.UID == otherGPU.GPU.UID)
+		return types.Bool(v.UID == otherGPU.UID)
 	}
 	return types.False
 }
@@ -332,48 +332,48 @@ func (v *gpuVal) Get(index ref.Val) ref.Val {
 
 	switch field {
 	case GPUFieldName:
-		return types.String(v.GPU.Name)
+		return types.String(v.Name)
 	case GPUFieldNamespace:
-		return types.String(v.GPU.Namespace)
+		return types.String(v.Namespace)
 	case GPUFieldGPUModel:
-		return types.String(v.GPU.Status.GPUModel)
+		return types.String(v.Status.GPUModel)
 	case GPUFieldUUID:
-		return types.String(v.GPU.Status.UUID)
+		return types.String(v.Status.UUID)
 	case GPUFieldPhase:
-		return getPooledPhaseString(string(v.GPU.Status.Phase))
+		return getPooledPhaseString(string(v.Status.Phase))
 	case GPUFieldUsedBy:
-		return types.String(string(v.GPU.Status.UsedBy))
+		return types.String(string(v.Status.UsedBy))
 	case GPUFieldMessage:
-		return types.String(v.GPU.Status.Message)
+		return types.String(v.Status.Message)
 	case GPUFieldLabels:
 		// Lazy initialization with caching
 		if v.labels == nil {
-			v.labels = &labelsVal{labels: v.GPU.Labels}
+			v.labels = &labelsVal{labels: v.Labels}
 		}
 		return v.labels
 	case GPUFieldAnnotations:
 		// Lazy initialization with caching
 		if v.annotations == nil {
-			v.annotations = &labelsVal{labels: v.GPU.Annotations}
+			v.annotations = &labelsVal{labels: v.Annotations}
 		}
 		return v.annotations
 	case GPUFieldAvailable:
 		// Lazy initialization with caching
 		if v.available == nil {
-			v.available = &availableVal{available: v.GPU.Status.Available}
+			v.available = &availableVal{available: v.Status.Available}
 		}
 		return v.available
 	case GPUFieldNodeSelector:
 		// Lazy initialization with caching
 		if v.nodeSelector == nil {
-			v.nodeSelector = &labelsVal{labels: v.GPU.Status.NodeSelector}
+			v.nodeSelector = &labelsVal{labels: v.Status.NodeSelector}
 		}
 		return v.nodeSelector
 	case GPUFieldRunningApps:
 		// For now, keep simple implementation - can optimize later if needed
 		if v.runningApps == nil {
-			apps := make([]tfv1.RunningAppDetail, len(v.GPU.Status.RunningApps))
-			for i, app := range v.GPU.Status.RunningApps {
+			apps := make([]tfv1.RunningAppDetail, len(v.Status.RunningApps))
+			for i, app := range v.Status.RunningApps {
 				apps[i] = *app
 			}
 			v.runningApps = &runningAppsVal{apps: apps}
@@ -559,8 +559,8 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 		return gpus, nil
 	}
 
-	// Pre-allocate result slice with estimated capacity
-	filteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
+	// Pre-allocate result slice with estimated capacity for early filtering
+	var filteredGPUs []*tfv1.GPU
 
 	// Early filtering phase: apply basic filters first to reduce CEL evaluation overhead
 	earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
@@ -608,7 +608,7 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace,
 	// Fallback to CEL evaluation for complex expressions
 	if len(earlyFilteredGPUs) >= ParallelThreshold {
 		// Use parallel evaluation for large GPU sets
-		filteredGPUs = f.filterFallbackParallel(ctx, program, earlyFilteredGPUs, workerPodKey)
+		filteredGPUs = f.filterFallbackParallel(program, earlyFilteredGPUs, workerPodKey)
 	} else {
 		// Sequential evaluation for smaller sets
 		filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey)
@@ -716,7 +716,7 @@ func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Pr
 }
 
 // filterFallbackParallel performs parallel CEL evaluation for large GPU sets
-func (f *CELFilter) filterFallbackParallel(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
+func (f *CELFilter) filterFallbackParallel(program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU {
 	numGPUs := len(gpus)
 	numWorkers := runtime.NumCPU()
 	if numWorkers > DefaultWorkerCount {
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
index 26b825fd..0cd46d02 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -11,6 +11,13 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
 )
 
+// Test constants for repeated strings
+const (
+	testEnvironmentProduction = "production"
+	testTierHighPerformance   = "high-performance"
+	testPriorityCritical      = "critical"
+)
+
 // Benchmark performance of the CEL filter compared to the original filter
 func BenchmarkFilterPerformance(b *testing.B) {
 	// Create test data
@@ -30,10 +37,10 @@ func BenchmarkFilterPerformance(b *testing.B) {
 			phase = constants.PhasePending
 		}
 
-		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", gpuModel, phase, 150.0, 40.0)
-		gpu.Labels["environment"] = "production"
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), gpuModel, phase, 150.0, 40.0)
+		gpu.Labels["environment"] = testEnvironmentProduction
 		if i%2 == 0 {
-			gpu.Labels["tier"] = "high-performance"
+			gpu.Labels["tier"] = testTierHighPerformance
 		}
 		gpus[i] = gpu
 	}
@@ -61,7 +68,7 @@ func BenchmarkFilterPerformance(b *testing.B) {
 
 	// Benchmark CEL filter - basic filtering
 	b.Run("CELFilter_Basic", func(b *testing.B) {
-		request := createTestAllocRequest("default", "test-workload", "A100", "")
+		request := createTestAllocRequest("A100", "")
 		cache, err := NewExpressionCache(100, 5*time.Minute)
 		if err != nil {
 			b.Fatal(err)
@@ -84,7 +91,7 @@ func BenchmarkFilterPerformance(b *testing.B) {
 
 	// Benchmark CEL filter - complex expression
 	b.Run("CELFilter_Complex", func(b *testing.B) {
-		request := createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'")
+		request := createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '"+testEnvironmentProduction+"'")
 		cache, err := NewExpressionCache(100, 5*time.Minute)
 		if err != nil {
 			b.Fatal(err)
@@ -116,15 +123,15 @@ func BenchmarkFilterPerformance(b *testing.B) {
 			"gpu.gpuModel == 'A100' && gpu.available.tflops > 100.0",
 			"gpu.gpuModel == 'V100' && gpu.available.tflops > 80.0",
 			"gpu.gpuModel == 'H100' && gpu.available.tflops > 180.0",
-			"gpu.labels['environment'] == 'production'",
-			"gpu.labels['tier'] == 'high-performance'",
+			"gpu.labels['environment'] == '" + testEnvironmentProduction + "'",
+			"gpu.labels['tier'] == '" + testTierHighPerformance + "'",
 			"gpu.available.vram > 30000000000",
 		}
 
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
 			expression := expressions[i%len(expressions)]
-			request := createTestAllocRequest("default", "test-workload", "", expression)
+			request := createTestAllocRequest("", expression)
 
 			celFilter, err := NewCELFilter(request, cache)
 			if err != nil {
@@ -189,10 +196,10 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 	const numGPUs = 100
 	gpus := make([]*tfv1.GPU, numGPUs)
 	for i := 0; i < numGPUs; i++ {
-		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", "A100", constants.PhaseRunning, 150.0, 40.0)
-		gpu.Labels["environment"] = "production"
-		gpu.Labels["tier"] = "high-performance"
-		gpu.Annotations["priority"] = "critical"
+		gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "A100", constants.PhaseRunning, 150.0, 40.0)
+		gpu.Labels["environment"] = testEnvironmentProduction
+		gpu.Labels["tier"] = testTierHighPerformance
+		gpu.Annotations["priority"] = testPriorityCritical
 		gpus[i] = gpu
 	}
 
@@ -217,11 +224,11 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 		},
 		{
 			name:       "VeryComplex",
-			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '" + testEnvironmentProduction + "'",
 		},
 		{
 			name:       "UltraComplex",
-			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production' && gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'",
+			expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '" + testEnvironmentProduction + "' && gpu.labels['tier'] == '" + testTierHighPerformance + "' && gpu.annotations['priority'] == '" + testPriorityCritical + "'",
 		},
 	}
 
@@ -232,7 +239,7 @@ func BenchmarkExpressionComplexity(b *testing.B) {
 				b.Fatal(err)
 			}
 
-			request := createTestAllocRequest("default", "test-workload", "", tc.expression)
+			request := createTestAllocRequest("", tc.expression)
 			celFilter, err := NewCELFilter(request, cache)
 			if err != nil {
 				b.Fatal(err)
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
index f882747b..72481ee9 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go
@@ -13,12 +13,18 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
+// Test constants for repeated strings used only in cel_filter_test.go
+const (
+	testEnvProduction = "production"
+	testPriorCritical = "critical"
+)
+
 // Helper functions for creating test data
-func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64) *tfv1.GPU {
+func createTestGPU(name, gpuModel, phase string, tflops, vram float64) *tfv1.GPU {
 	gpu := &tfv1.GPU{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:        name,
-			Namespace:   namespace,
+			Namespace:   "default",
 			Labels:      make(map[string]string),
 			Annotations: make(map[string]string),
 		},
@@ -41,11 +47,11 @@ func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64
 	return gpu
 }
 
-func createTestAllocRequest(namespace, name, gpuModel, celExpression string) *tfv1.AllocRequest {
+func createTestAllocRequest(gpuModel, celExpression string) *tfv1.AllocRequest {
 	return &tfv1.AllocRequest{
 		WorkloadNameNamespace: tfv1.NameNamespace{
-			Name:      name,
-			Namespace: namespace,
+			Name:      "test-workload",
+			Namespace: "default",
 		},
 		GPUModel:            gpuModel,
 		CELFilterExpression: celExpression,
@@ -66,71 +72,71 @@ func TestCELFilter_NormalCases(t *testing.T) {
 	}{
 		{
 			name:    "filter by GPU model",
-			request: createTestAllocRequest("default", "test-workload", "A100", ""),
+			request: createTestAllocRequest("A100", ""),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0),
-				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
 			},
 			expectedCount: 2,
 			description:   "Should filter GPUs matching the specified model A100",
 		},
 		{
 			name:    "filter by GPU phase only",
-			request: createTestAllocRequest("default", "test-workload", "", ""),
+			request: createTestAllocRequest("", ""),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "A100", constants.PhasePending, 150.0, 40.0),
-				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
-				createTestGPU("gpu-4", "default", "A100", constants.PhaseFailed, 150.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "A100", constants.PhasePending, 150.0, 40.0),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-4", "A100", constants.PhaseFailed, 150.0, 40.0),
 			},
 			expectedCount: 3,
 			description:   "Should return GPUs in Running and Pending phases",
 		},
 		{
 			name:    "custom CEL expression - filter by available TFLOPS",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.tflops > 120.0"),
+			request: createTestAllocRequest("", "gpu.available.tflops > 120.0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0),
-				createTestGPU("gpu-3", "default", "H100", constants.PhaseRunning, 200.0, 80.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0),
+				createTestGPU("gpu-3", "H100", constants.PhaseRunning, 200.0, 80.0),
 			},
 			expectedCount: 2,
 			description:   "Should filter GPUs with TFLOPS > 120 and Running/Pending phase",
 		},
 		{
 			name:    "custom CEL expression - filter by available VRAM",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.available.vram > 35000000000"), // > 35GB in bytes
+			request: createTestAllocRequest("", "gpu.available.vram > 35000000000"), // > 35GB in bytes
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // 40GB
-				createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0), // 32GB
-				createTestGPU("gpu-3", "default", "H100", constants.PhaseRunning, 200.0, 80.0), // 80GB
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), // 40GB
+				createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0), // 32GB
+				createTestGPU("gpu-3", "H100", constants.PhaseRunning, 200.0, 80.0), // 80GB
 			},
 			expectedCount: 2,
 			description:   "Should filter GPUs with VRAM > 35GB and Running/Pending phase",
 		},
 		{
 			name:    "combined model and custom CEL expression",
-			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0"),
+			request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
-				createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 120.0, 40.0),
-				createTestGPU("gpu-3", "default", "V100", constants.PhaseRunning, 160.0, 32.0),
-				createTestGPU("gpu-4", "default", "A100", constants.PhaseRunning, 180.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-2", "A100", constants.PhaseRunning, 120.0, 40.0),
+				createTestGPU("gpu-3", "V100", constants.PhaseRunning, 160.0, 32.0),
+				createTestGPU("gpu-4", "A100", constants.PhaseRunning, 180.0, 40.0),
 			},
 			expectedCount: 2,
 			description:   "Should filter A100 GPUs with TFLOPS >= 150 and Running/Pending phase",
 		},
 		{
 			name:    "filter by labels",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['environment'] == 'production'"),
+			request: createTestAllocRequest("", "gpu.labels['environment'] == '"+testEnvProduction+"'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
-				gpu1.Labels["environment"] = "production"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1.Labels["environment"] = testEnvProduction
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Labels["environment"] = "development"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
-				gpu3.Labels["environment"] = "production"
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu3.Labels["environment"] = testEnvProduction
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
 			expectedCount: 2,
@@ -138,14 +144,14 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:    "filter by annotations",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.annotations['priority'] == 'critical'"),
+			request: createTestAllocRequest("", "gpu.annotations['priority'] == '"+testPriorCritical+"'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
-				gpu1.Annotations["priority"] = "critical"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1.Annotations["priority"] = testPriorCritical
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Annotations["priority"] = "low"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
-				gpu3.Annotations["priority"] = "critical"
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu3.Annotations["priority"] = testPriorCritical
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
 			expectedCount: 2,
@@ -153,15 +159,15 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:    "combined labels and annotations filter",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"),
+			request: createTestAllocRequest("", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu1.Labels["tier"] = "high-performance"
-				gpu1.Annotations["priority"] = "critical"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu1.Annotations["priority"] = testPriorCritical
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu2.Labels["tier"] = "standard"
 				gpu2.Annotations["priority"] = "critical"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
 				gpu3.Labels["tier"] = "high-performance"
 				gpu3.Annotations["priority"] = "low"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
@@ -171,20 +177,20 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:          "empty GPU list",
-			request:       createTestAllocRequest("default", "test-workload", "A100", ""),
+			request:       createTestAllocRequest("A100", ""),
 			gpus:          []*tfv1.GPU{},
 			expectedCount: 0,
 			description:   "Should handle empty GPU list gracefully",
 		},
 		{
 			name:    "complex combined expression with model, resources, and metadata",
-			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"),
+			request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"),
 			gpus: func() []*tfv1.GPU {
-				gpu1 := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 180.0, 40.0)
-				gpu1.Labels["environment"] = "production"
-				gpu2 := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 120.0, 40.0)
+				gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 180.0, 40.0)
+				gpu1.Labels["environment"] = testEnvProduction
+				gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 120.0, 40.0)
 				gpu2.Labels["environment"] = "production"
-				gpu3 := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 200.0, 40.0)
+				gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 200.0, 40.0)
 				gpu3.Labels["environment"] = "development"
 				return []*tfv1.GPU{gpu1, gpu2, gpu3}
 			}(),
@@ -193,27 +199,27 @@ func TestCELFilter_NormalCases(t *testing.T) {
 		},
 		{
 			name:    "filter by running apps - no running apps",
-			request: createTestAllocRequest("default", "test-workload", "", "size(gpu.runningApps) == 0"),
+			request: createTestAllocRequest("", "size(gpu.runningApps) == 0"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "app1", Namespace: "default", Count: 1},
 					}
 					return gpu
 				}(),
-				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
 			},
 			expectedCount: 2,
 			description:   "Should return GPUs with no running apps",
 		},
 		{
 			name:    "filter by running apps - has specific app",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.runningApps.exists(app, app.name == 'training-job' && app.namespace == 'ml-team')"),
+			request: createTestAllocRequest("", "gpu.runningApps.exists(app, app.name == 'training-job' && app.namespace == 'ml-team')"),
 			gpus: []*tfv1.GPU{
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "training-job", Namespace: "ml-team", Count: 2},
 						{Name: "other-job", Namespace: "default", Count: 1},
@@ -221,23 +227,23 @@ func TestCELFilter_NormalCases(t *testing.T) {
 					return gpu
 				}(),
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "other-job", Namespace: "ml-team", Count: 1},
 					}
 					return gpu
 				}(),
-				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0),
 			},
 			expectedCount: 1,
 			description:   "Should return GPUs running specific training job",
 		},
 		{
 			name:    "filter by running apps - count threshold",
-			request: createTestAllocRequest("default", "test-workload", "", "gpu.runningApps.all(app, app.count <= 2) && size(gpu.runningApps) > 0"),
+			request: createTestAllocRequest("", "gpu.runningApps.all(app, app.count <= 2) && size(gpu.runningApps) > 0"),
 			gpus: []*tfv1.GPU{
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "job1", Namespace: "default", Count: 1},
 						{Name: "job2", Namespace: "default", Count: 2},
@@ -245,47 +251,47 @@ func TestCELFilter_NormalCases(t *testing.T) {
 					return gpu
 				}(),
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "job1", Namespace: "default", Count: 5}, // Count > 2
 					}
 					return gpu
 				}(),
-				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
 			},
 			expectedCount: 1,
 			description:   "Should return GPUs where all running apps have count <= 2",
 		},
 		{
 			name:    "filter by running apps - complex condition",
-			request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && (size(gpu.runningApps) == 0 || gpu.runningApps.all(app, app.namespace != 'restricted'))"),
+			request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && (size(gpu.runningApps) == 0 || gpu.runningApps.all(app, app.namespace != 'restricted'))"),
 			gpus: []*tfv1.GPU{
-				createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+				createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "job1", Namespace: "allowed", Count: 1},
 					}
 					return gpu
 				}(),
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "job1", Namespace: "restricted", Count: 1}, // Restricted namespace
 					}
 					return gpu
 				}(),
-				createTestGPU("gpu-4", "default", "V100", constants.PhaseRunning, 150.0, 40.0), // Wrong model
+				createTestGPU("gpu-4", "V100", constants.PhaseRunning, 150.0, 40.0), // Wrong model
 			},
 			expectedCount: 2,
 			description:   "Should return A100 GPUs with sufficient resources and no restricted apps",
 		},
 		{
 			name:    "filter by running apps - namespace isolation",
-			request: createTestAllocRequest("default", "test-workload", "", "!gpu.runningApps.exists(app, app.namespace == 'tenant-a')"),
+			request: createTestAllocRequest("", "!gpu.runningApps.exists(app, app.namespace == 'tenant-a')"),
 			gpus: []*tfv1.GPU{
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "job1", Namespace: "tenant-b", Count: 1},
 						{Name: "job2", Namespace: "shared", Count: 1},
@@ -293,14 +299,14 @@ func TestCELFilter_NormalCases(t *testing.T) {
 					return gpu
 				}(),
 				func() *tfv1.GPU {
-					gpu := createTestGPU("gpu-2", "default", "A100", constants.PhaseRunning, 150.0, 40.0)
+					gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0)
 					gpu.Status.RunningApps = []*tfv1.RunningAppDetail{
 						{Name: "job1", Namespace: "tenant-a", Count: 1}, // Should be excluded
 						{Name: "job2", Namespace: "tenant-b", Count: 1},
 					}
 					return gpu
 				}(),
-				createTestGPU("gpu-3", "default", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
+				createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps
 			},
 			expectedCount: 2,
 			description:   "Should return GPUs not running apps from tenant-a",
@@ -341,11 +347,11 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) {
 	t.Run("CEL expressions edge cases", func(t *testing.T) {
 		// Test GPUs for execution
 		testGPUs := []*tfv1.GPU{
-			createTestGPU("gpu-1", "default", "A100", constants.PhaseRunning, 150.0, 40.0),
-			createTestGPU("gpu-2", "default", "V100", constants.PhaseRunning, 100.0, 32.0),
+			createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0),
+			createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0),
 		}
 		// Add GPU with nil resources
-		gpuWithNilResources := createTestGPU("gpu-nil", "default", "A100", constants.PhaseRunning, 0, 0)
+		gpuWithNilResources := createTestGPU("gpu-nil", "A100", constants.PhaseRunning, 0, 0)
 		gpuWithNilResources.Status.Available = nil
 		testGPUs = append(testGPUs, gpuWithNilResources)
 
@@ -455,7 +461,7 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) {
 				cache, err := NewExpressionCache(10, 5*time.Minute)
 				require.NoError(t, err)
 
-				request := createTestAllocRequest("default", "test-workload", "", tt.expression)
+				request := createTestAllocRequest("", tt.expression)
 				celFilter, err := NewCELFilter(request, cache)
 
 				if tt.shouldFail {

From de5b0c1df59d94df33e233cd069a131482ba7e08 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Sep 2025 12:14:52 +0000
Subject: [PATCH 22/34] chore(deps): bump github.com/aws/aws-sdk-go-v2 from
 1.38.3 to 1.39.0 (#362)

---
 go.mod | 4 ++--
 go.sum | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/go.mod b/go.mod
index e8da7faf..ff05257d 100644
--- a/go.mod
+++ b/go.mod
@@ -6,7 +6,7 @@ require (
 	github.com/DATA-DOG/go-sqlmock v1.5.2
 	github.com/NVIDIA/go-nvml v0.13.0-1
 	github.com/aliyun/alibaba-cloud-sdk-go v1.63.107
-	github.com/aws/aws-sdk-go-v2 v1.38.3
+	github.com/aws/aws-sdk-go-v2 v1.39.0
 	github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0
 	github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5
 	github.com/gin-contrib/gzip v1.2.3
@@ -30,6 +30,7 @@ require (
 	gorm.io/gorm v1.30.3
 	k8s.io/api v0.34.0
 	k8s.io/apimachinery v0.34.0
+	k8s.io/apiserver v0.34.0
 	k8s.io/client-go v0.34.0
 	k8s.io/component-base v0.34.0
 	k8s.io/component-helpers v0.34.0
@@ -174,7 +175,6 @@ require (
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	k8s.io/apiextensions-apiserver v0.34.0 // indirect
-	k8s.io/apiserver v0.34.0 // indirect
 	k8s.io/cloud-provider v0.34.0 // indirect
 	k8s.io/controller-manager v0.34.0 // indirect
 	k8s.io/csi-translation-lib v0.34.0 // indirect
diff --git a/go.sum b/go.sum
index 446e3470..b924ab09 100644
--- a/go.sum
+++ b/go.sum
@@ -24,8 +24,8 @@ github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYW
 github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
 github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
 github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
-github.com/aws/aws-sdk-go-v2 v1.38.3 h1:B6cV4oxnMs45fql4yRH+/Po/YU+597zgWqvDpYMturk=
-github.com/aws/aws-sdk-go-v2 v1.38.3/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY=
+github.com/aws/aws-sdk-go-v2 v1.39.0 h1:xm5WV/2L4emMRmMjHFykqiA4M/ra0DJVSWUkDyBjbg4=
+github.com/aws/aws-sdk-go-v2 v1.39.0/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 h1:uF68eJA6+S9iVr9WgX1NaRGyQ/6MdIyc4JNUo6TN1FA=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6/go.mod h1:qlPeVZCGPiobx8wb1ft0GHT5l+dc6ldnwInDFaMvC7Y=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 h1:pa1DEC6JoI0zduhZePp3zmhWvk/xxm4NB8Hy/Tlsgos=

From 3d9b2c43340e0e63f7384c7399fabe306342ed9d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Sep 2025 12:50:55 +0000
Subject: [PATCH 23/34] chore(deps): bump gorm.io/gorm from 1.30.3 to 1.31.0
 (#361)

---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index ff05257d..f70cbce3 100644
--- a/go.mod
+++ b/go.mod
@@ -27,7 +27,7 @@ require (
 	gomodules.xyz/jsonpatch/v2 v2.5.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	gorm.io/driver/mysql v1.6.0
-	gorm.io/gorm v1.30.3
+	gorm.io/gorm v1.31.0
 	k8s.io/api v0.34.0
 	k8s.io/apimachinery v0.34.0
 	k8s.io/apiserver v0.34.0
diff --git a/go.sum b/go.sum
index b924ab09..b5f04e5f 100644
--- a/go.sum
+++ b/go.sum
@@ -482,8 +482,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg=
 gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo=
-gorm.io/gorm v1.30.3 h1:QiG8upl0Sg9ba2Zatfjy0fy4It2iNBL2/eMdvEkdXNs=
-gorm.io/gorm v1.30.3/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
+gorm.io/gorm v1.31.0 h1:0VlycGreVhK7RF/Bwt51Fk8v0xLiiiFdbGDPIZQ7mJY=
+gorm.io/gorm v1.31.0/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=
 k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE=
 k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug=
 k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc=

From ec36d4ad845dac0fa788ba52f6f74f2346aac0e0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Sep 2025 12:59:46 +0000
Subject: [PATCH 24/34] chore(deps): bump k8s.io/client-go from 0.34.0 to
 0.34.1 (#364)

---
 go.mod |  6 +++---
 go.sum | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/go.mod b/go.mod
index f70cbce3..991d385c 100644
--- a/go.mod
+++ b/go.mod
@@ -28,10 +28,10 @@ require (
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	gorm.io/driver/mysql v1.6.0
 	gorm.io/gorm v1.31.0
-	k8s.io/api v0.34.0
-	k8s.io/apimachinery v0.34.0
+	k8s.io/api v0.34.1
+	k8s.io/apimachinery v0.34.1
 	k8s.io/apiserver v0.34.0
-	k8s.io/client-go v0.34.0
+	k8s.io/client-go v0.34.1
 	k8s.io/component-base v0.34.0
 	k8s.io/component-helpers v0.34.0
 	k8s.io/klog/v2 v2.130.1
diff --git a/go.sum b/go.sum
index b5f04e5f..59cbea42 100644
--- a/go.sum
+++ b/go.sum
@@ -484,16 +484,16 @@ gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg=
 gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo=
 gorm.io/gorm v1.31.0 h1:0VlycGreVhK7RF/Bwt51Fk8v0xLiiiFdbGDPIZQ7mJY=
 gorm.io/gorm v1.31.0/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=
-k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE=
-k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug=
+k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=
+k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=
 k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc=
 k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0=
-k8s.io/apimachinery v0.34.0 h1:eR1WO5fo0HyoQZt1wdISpFDffnWOvFLOOeJ7MgIv4z0=
-k8s.io/apimachinery v0.34.0/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
+k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4=
+k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
 k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg=
 k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ=
-k8s.io/client-go v0.34.0 h1:YoWv5r7bsBfb0Hs2jh8SOvFbKzzxyNo0nSb0zC19KZo=
-k8s.io/client-go v0.34.0/go.mod h1:ozgMnEKXkRjeMvBZdV1AijMHLTh3pbACPvK7zFR+QQY=
+k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY=
+k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8=
 k8s.io/cloud-provider v0.34.0 h1:OgrNE+WSgfvDBQf6WS9qFM7Xr37bc0Og5kkL4hyWDmU=
 k8s.io/cloud-provider v0.34.0/go.mod h1:JbMa0t6JIGDMLI7Py6bdp9TN6cfuHrWGq+E/X+Ljkmo=
 k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8=

From 40b98a8f40fe5be2b8f4ddeee9d71babec0453fe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Sep 2025 13:47:37 +0000
Subject: [PATCH 25/34] chore(deps): bump k8s.io/component-helpers from 0.34.0
 to 0.34.1 (#360)

---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 991d385c..e955a347 100644
--- a/go.mod
+++ b/go.mod
@@ -33,7 +33,7 @@ require (
 	k8s.io/apiserver v0.34.0
 	k8s.io/client-go v0.34.1
 	k8s.io/component-base v0.34.0
-	k8s.io/component-helpers v0.34.0
+	k8s.io/component-helpers v0.34.1
 	k8s.io/klog/v2 v2.130.1
 	k8s.io/kube-scheduler v0.34.0
 	k8s.io/kubernetes v1.34.0
diff --git a/go.sum b/go.sum
index 59cbea42..d46ef39a 100644
--- a/go.sum
+++ b/go.sum
@@ -498,8 +498,8 @@ k8s.io/cloud-provider v0.34.0 h1:OgrNE+WSgfvDBQf6WS9qFM7Xr37bc0Og5kkL4hyWDmU=
 k8s.io/cloud-provider v0.34.0/go.mod h1:JbMa0t6JIGDMLI7Py6bdp9TN6cfuHrWGq+E/X+Ljkmo=
 k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8=
 k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg=
-k8s.io/component-helpers v0.34.0 h1:5T7P9XGMoUy1JDNKzHf0p/upYbeUf8ZaSf9jbx0QlIo=
-k8s.io/component-helpers v0.34.0/go.mod h1:kaOyl5tdtnymriYcVZg4uwDBe2d1wlIpXyDkt6sVnt4=
+k8s.io/component-helpers v0.34.1 h1:gWhH3CCdwAx5P3oJqZKb4Lg5FYZTWVbdWtOI8n9U4XY=
+k8s.io/component-helpers v0.34.1/go.mod h1:4VgnUH7UA/shuBur+OWoQC0xfb69sy/93ss0ybZqm3c=
 k8s.io/controller-manager v0.34.0 h1:oCHoqS8dcFp7zDSu7HUvTpakq3isSxil3GprGGlJMsE=
 k8s.io/controller-manager v0.34.0/go.mod h1:XFto21U+Mm9BT8r/Jd5E4tHCGtwjKAUFOuDcqaj2VK0=
 k8s.io/csi-translation-lib v0.34.0 h1:WhCkq35XATZ+x6NKqI4u7XSYtmucuCN7jDk+mmm9XUU=

From a45ba609eede9a2146098a93fa2ba32c96556ebe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Sep 2025 13:48:08 +0000
Subject: [PATCH 26/34] chore(deps): bump sigs.k8s.io/controller-runtime from
 0.22.0 to 0.22.1 (#363)

---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index e955a347..dc198d32 100644
--- a/go.mod
+++ b/go.mod
@@ -38,7 +38,7 @@ require (
 	k8s.io/kube-scheduler v0.34.0
 	k8s.io/kubernetes v1.34.0
 	k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
-	sigs.k8s.io/controller-runtime v0.22.0
+	sigs.k8s.io/controller-runtime v0.22.1
 	sigs.k8s.io/karpenter v1.6.2
 	sigs.k8s.io/yaml v1.6.0
 )
diff --git a/go.sum b/go.sum
index d46ef39a..0130fbbf 100644
--- a/go.sum
+++ b/go.sum
@@ -523,8 +523,8 @@ k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 h1:qPrZsv1cwQiFeieFlRqT627fVZ+tyfou/+S5S0H5ua0=
 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
-sigs.k8s.io/controller-runtime v0.22.0 h1:mTOfibb8Hxwpx3xEkR56i7xSjB+nH4hZG37SrlCY5e0=
-sigs.k8s.io/controller-runtime v0.22.0/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
+sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg=
+sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
 sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
 sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/karpenter v1.6.2 h1:WFayZ49CSOaDMku1iYBTsD3A9hOB2yU/U95VcSAJ8KM=

From 5867f3ccc17a48ffc4cf19a2fd5f1a28deae6ede Mon Sep 17 00:00:00 2001
From: Joey Yang <14833440+Code2Life@users.noreply.github.com>
Date: Wed, 17 Sep 2025 22:13:01 +0800
Subject: [PATCH 27/34] feat: preempt support for GPU workers (#366)

* fix: gpu info update

* feat: preempt scheduling, fix metrics scheduling bugs, add evict protection

* fix: unit test issue

* fix: preempt unit testing

* fix: lint issue, add qos to priorityClassName converting
---
 .vscode/settings.json                         |   2 +
 api/v1/gpupool_types.go                       |   6 +
 api/v1/gpuresourcequota_types.go              |   2 +
 charts/tensor-fusion/Chart.yaml               |   2 +-
 .../crds/tensor-fusion.ai_gpupools.yaml       |   6 +
 ...tensor-fusion.ai_tensorfusionclusters.yaml |   6 +
 .../templates/controller-deployment.yaml      |   1 +
 .../templates/gpu-public-gpu-info.yaml        |  18 +-
 .../templates/priorityclass.yaml              |  23 ++
 charts/tensor-fusion/values.yaml              |   4 +-
 cmd/main.go                                   |   2 +-
 .../crd/bases/tensor-fusion.ai_gpupools.yaml  |   6 +
 ...tensor-fusion.ai_tensorfusionclusters.yaml |   6 +
 internal/config/rules.go                      |   2 +-
 internal/constants/constants.go               |   6 +-
 internal/controller/pod_controller.go         |  10 +-
 internal/controller/pod_controller_test.go    |   3 -
 .../tensorfusioncluster_controller.go         |  36 +--
 internal/gpuallocator/gpuallocator.go         | 104 +++++-
 internal/metrics/recorder.go                  | 125 ++++++--
 internal/metrics/types.go                     |   4 +
 internal/quota/quota_store.go                 |  71 +++--
 .../scheduler/gpuresources/gpuresources.go    | 108 ++++++-
 .../gpuresources/gpuresources_test.go         |  13 +-
 internal/utils/compose.go                     |   1 +
 internal/webhook/v1/pod_webhook.go            |  23 +-
 internal/webhook/v1/tf_parser.go              |   2 -
 patches/scheduler-pdb-1.patch                 |  31 +-
 test/sched/gpufit_bench_test.go               |   1 -
 test/sched/preemption_test.go                 | 299 ++++++++++++++++++
 test/sched/scheduler_bench_test.go            |   3 +-
 test/sched/setup.go                           |  32 +-
 32 files changed, 833 insertions(+), 125 deletions(-)
 create mode 100644 charts/tensor-fusion/templates/priorityclass.yaml
 create mode 100644 test/sched/preemption_test.go

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 2a261510..a5da5620 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -47,6 +47,7 @@
         "envtest",
         "essd",
         "Eventf",
+        "evictable",
         "featuregate",
         "finalizer",
         "Finalizers",
@@ -133,6 +134,7 @@
         "schedulingconfigtemplate",
         "schedulingconfigtemplates",
         "schedulingcorev",
+        "schedv",
         "serviceaccount",
         "shirou",
         "shortuuid",
diff --git a/api/v1/gpupool_types.go b/api/v1/gpupool_types.go
index 08d139b5..ca9224c4 100644
--- a/api/v1/gpupool_types.go
+++ b/api/v1/gpupool_types.go
@@ -238,6 +238,12 @@ type QosConfig struct {
 	Definitions []QosDefinition `json:"definitions,omitempty"`
 	DefaultQoS  QoSLevel        `json:"defaultQoS,omitempty"`
 	Pricing     []QosPricing    `json:"pricing,omitempty"`
+
+	// Eviction protection price ratio applied to cost calculation during protection period
+	// This multiplier increases pricing for protected workloads to discourage preemption
+	// +optional
+	// +kubebuilder:default="1.2"
+	EvictionProtectionPriceRatio string `json:"evictionProtectionPriceRatio,omitempty"`
 }
 
 type QosDefinition struct {
diff --git a/api/v1/gpuresourcequota_types.go b/api/v1/gpuresourcequota_types.go
index 1b28520a..bb8a5ff8 100644
--- a/api/v1/gpuresourcequota_types.go
+++ b/api/v1/gpuresourcequota_types.go
@@ -186,6 +186,8 @@ type AllocRequest struct {
 
 	// record the pod meta for quota check
 	PodMeta metav1.ObjectMeta
+
+	QoS QoSLevel
 }
 
 func (p *AllocRequest) Clone() fwk.StateData {
diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
index d2dc9f06..042d05c2 100644
--- a/charts/tensor-fusion/Chart.yaml
+++ b/charts/tensor-fusion/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.5.8
+version: 1.5.9
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
index 8bc65e66..2158529c 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
@@ -562,6 +562,12 @@ spec:
                           type: integer
                       type: object
                     type: array
+                  evictionProtectionPriceRatio:
+                    default: "1.2"
+                    description: |-
+                      Eviction protection price ratio applied to cost calculation during protection period
+                      This multiplier increases pricing for protected workloads to discourage preemption
+                    type: string
                   pricing:
                     items:
                       properties:
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
index 45bc9a47..496541bc 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
@@ -629,6 +629,12 @@ spec:
                                     type: integer
                                 type: object
                               type: array
+                            evictionProtectionPriceRatio:
+                              default: "1.2"
+                              description: |-
+                                Eviction protection price ratio applied to cost calculation during protection period
+                                This multiplier increases pricing for protected workloads to discourage preemption
+                              type: string
                             pricing:
                               items:
                                 properties:
diff --git a/charts/tensor-fusion/templates/controller-deployment.yaml b/charts/tensor-fusion/templates/controller-deployment.yaml
index ca09a6a1..c16c4aab 100644
--- a/charts/tensor-fusion/templates/controller-deployment.yaml
+++ b/charts/tensor-fusion/templates/controller-deployment.yaml
@@ -32,6 +32,7 @@ spec:
       {{- end }}
       serviceAccountName: {{ include "tensor-fusion.serviceAccountName" . }}
       enableServiceLinks: false
+      priorityClassName: "system-cluster-critical"
       containers:
         - name: controller
           image: "{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag | default .Chart.AppVersion }}"
diff --git a/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml b/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml
index d473fcfa..2c88583b 100644
--- a/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml
+++ b/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml
@@ -45,6 +45,18 @@ data:
       costPerHour: 1.64
       fp16TFlops: 312
 
+    - model: A100_PCIe_40GB
+      fullModelName: "NVIDIA A100-PCIE-40GB"
+      vendor: NVIDIA
+      costPerHour: 1.64
+      fp16TFlops: 312
+
+    - model: A100_PCIe_80GB
+      fullModelName: "NVIDIA A100-PCIE-80GB"
+      vendor: NVIDIA
+      costPerHour: 1.64
+      fp16TFlops: 312
+
     - model: A100_SXM_40G
       fullModelName: "NVIDIA A100-SXM4-40GB"
       vendor: NVIDIA
@@ -70,13 +82,13 @@ data:
       fp16TFlops: 312
     
     - model: A800_PCIe_80G
-      fullModelName: "NVIDIA A800 80GB PCIe"
+      fullModelName: "NVIDIA A800-PCIE-80GB"
       vendor: NVIDIA
       costPerHour: 1.64
       fp16TFlops: 312
 
     - model: A800_PCIe_40G
-      fullModelName: "NVIDIA A800 40GB PCIe"
+      fullModelName: "NVIDIA A800-PCIE-40GB"
       vendor: NVIDIA
       costPerHour: 1.64
       fp16TFlops: 312  
@@ -95,7 +107,7 @@ data:
       fp16TFlops: 125
 
     - model: A40
-      fullModelName: "NVIDIA A40 48GB PCIe"
+      fullModelName: "NVIDIA A40-PCIE-48GB"
       vendor: NVIDIA
       costPerHour: 0.4
       fp16TFlops: 149.7
diff --git a/charts/tensor-fusion/templates/priorityclass.yaml b/charts/tensor-fusion/templates/priorityclass.yaml
new file mode 100644
index 00000000..e1f493b8
--- /dev/null
+++ b/charts/tensor-fusion/templates/priorityclass.yaml
@@ -0,0 +1,23 @@
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-critical
+value: 100000
+globalDefault: false
+description: "TensorFusion critical priority"
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-high
+value: 10000
+globalDefault: false
+description: "TensorFusion high priority"
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-medium
+value: 0
+globalDefault: false
+description: "TensorFusion medium priority"
diff --git a/charts/tensor-fusion/values.yaml b/charts/tensor-fusion/values.yaml
index 6b9fcc0c..2c06aba6 100644
--- a/charts/tensor-fusion/values.yaml
+++ b/charts/tensor-fusion/values.yaml
@@ -169,8 +169,8 @@ schedulerConfig:
   kind: KubeSchedulerConfiguration
   clientConnection:
     kubeconfig: ""
-    qps: 50
-    burst: 100
+    qps: 1000
+    burst: 2000
   profiles:
   # Refer: https://kubernetes.io/docs/reference/scheduling/config/
   - schedulerName: tensor-fusion-scheduler
diff --git a/cmd/main.go b/cmd/main.go
index f00a6b2e..c0bd95ea 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -586,7 +586,7 @@ func startMetricsRecorder(
 
 		// Worker level map will be updated by cluster reconcile
 		// Key is poolName, second level key is QoS level
-		WorkerUnitPriceMap: make(map[string]map[string]metrics.RawBillingPricing),
+		WorkerUnitPriceMap: make(map[string]map[string]metrics.RawBillingPricing, 8),
 	}
 	if enableLeaderElection {
 		go func() {
diff --git a/config/crd/bases/tensor-fusion.ai_gpupools.yaml b/config/crd/bases/tensor-fusion.ai_gpupools.yaml
index 8bc65e66..2158529c 100644
--- a/config/crd/bases/tensor-fusion.ai_gpupools.yaml
+++ b/config/crd/bases/tensor-fusion.ai_gpupools.yaml
@@ -562,6 +562,12 @@ spec:
                           type: integer
                       type: object
                     type: array
+                  evictionProtectionPriceRatio:
+                    default: "1.2"
+                    description: |-
+                      Eviction protection price ratio applied to cost calculation during protection period
+                      This multiplier increases pricing for protected workloads to discourage preemption
+                    type: string
                   pricing:
                     items:
                       properties:
diff --git a/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml b/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml
index 45bc9a47..496541bc 100644
--- a/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml
+++ b/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml
@@ -629,6 +629,12 @@ spec:
                                     type: integer
                                 type: object
                               type: array
+                            evictionProtectionPriceRatio:
+                              default: "1.2"
+                              description: |-
+                                Eviction protection price ratio applied to cost calculation during protection period
+                                This multiplier increases pricing for protected workloads to discourage preemption
+                              type: string
                             pricing:
                               items:
                                 properties:
diff --git a/internal/config/rules.go b/internal/config/rules.go
index dd3713bd..8bbfb556 100644
--- a/internal/config/rules.go
+++ b/internal/config/rules.go
@@ -132,7 +132,7 @@ func (r *AlertRule) toPostableAlert(alertQueryResult map[string]interface{}, sta
 	labels := LabelSet{
 		"alertname": r.Name,
 		"severity":  r.Severity,
-		"job":       constants.AlertJobName,
+		"job":       constants.TensorFusionSystemName,
 		"instance":  instance,
 	}
 	annotations := LabelSet{
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index 81470022..8ccbcba1 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -107,6 +107,8 @@ const (
 	// For grey release
 	TensorFusionEnabledReplicasAnnotation = Domain + "/enabled-replicas"
 	TensorFusionDefaultPoolKeyAnnotation  = Domain + "/is-default-pool"
+	// Eviction protection annotation for controlling pod eviction timing
+	EvictionProtectionAnnotation = Domain + "/eviction-protection"
 
 	NamespaceDefaultVal = "tensor-fusion-sys"
 
@@ -176,7 +178,7 @@ const TFDataPath = "/run/tensor-fusion"
 const TFDataPathWorkerExpr = "shm/$(POD_NAMESPACE)/$(POD_NAME)"
 const DataVolumeName = "tf-data"
 const TensorFusionPoolManualCompaction = Domain + "/manual-compaction"
-const AlertJobName = "tensor-fusion"
+const TensorFusionSystemName = "tensor-fusion"
 
 const (
 	LeaderInfoConfigMapName        = "tensor-fusion-operator-leader-info"
@@ -202,3 +204,5 @@ const ExtraVerificationInfoPodIDKey = "authentication.kubernetes.io/pod-uid"
 const SchedulerSimulationKey = "simulate-schedule"
 
 const MobileGpuClockSpeedMultiplier = 0.75
+const DefaultEvictionProtectionPriceRatio = 1.2
+const NodeCriticalPriorityClassName = "system-node-critical"
diff --git a/internal/controller/pod_controller.go b/internal/controller/pod_controller.go
index ab335948..a7bf7c2f 100644
--- a/internal/controller/pod_controller.go
+++ b/internal/controller/pod_controller.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"fmt"
 	"strconv"
+	"time"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
@@ -66,6 +67,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 	if err := r.Get(ctx, req.NamespacedName, pod); err != nil {
 		if errors.IsNotFound(err) {
 			r.Allocator.DeallocByPodIdentifier(ctx, req.NamespacedName)
+			metrics.RemoveWorkerMetrics(req.Name, time.Now())
 			log.Info("Released GPU resources when pod deleted", "pod", req.NamespacedName)
 			return ctrl.Result{}, nil
 		}
@@ -106,8 +108,9 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 	}
 
 	if pod.Labels[constants.LabelComponent] == constants.ComponentWorker {
-		metrics.SetWorkerMetricsByWorkload(pod)
-
+		if pod.DeletionTimestamp.IsZero() {
+			metrics.SetWorkerMetricsByWorkload(pod)
+		}
 		shouldReturn, err := r.handleWorkerPodFinalizer(ctx, pod)
 		if err != nil {
 			return ctrl.Result{}, err
@@ -148,7 +151,8 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 func (r *PodReconciler) handleWorkerPodFinalizer(ctx context.Context, pod *corev1.Pod) (bool, error) {
 	// Handle our GPU resource cleanup finalizer
 	shouldReturn, err := utils.HandleFinalizer(ctx, pod, r.Client, func(ctx context.Context, obj *corev1.Pod) (bool, error) {
-		metrics.RemoveWorkerMetrics(pod.Name, pod.DeletionTimestamp.Time)
+		// if the Pod keep terminating, should update deletion timestamp for raw cost calculation
+		metrics.RemoveWorkerMetrics(pod.Name, time.Now())
 		counter := &v1.TensorFusionPodCounter{Client: r.Client}
 		if err := counter.Decrease(ctx, pod); err != nil {
 			return false, err
diff --git a/internal/controller/pod_controller_test.go b/internal/controller/pod_controller_test.go
index b36f140f..cf53d119 100644
--- a/internal/controller/pod_controller_test.go
+++ b/internal/controller/pod_controller_test.go
@@ -230,9 +230,6 @@ var _ = Describe("Pod Controller", func() {
 				},
 			}
 			_ = k8sClient.Delete(ctx, connection)
-			Eventually(func() error {
-				return k8sClient.Get(ctx, client.ObjectKeyFromObject(connection), connection)
-			}).Should(Satisfy(errors.IsNotFound))
 		})
 
 		It("should successfully create TensorFusion connection for client pod", func() {
diff --git a/internal/controller/tensorfusioncluster_controller.go b/internal/controller/tensorfusioncluster_controller.go
index d4f464c3..3c64429e 100644
--- a/internal/controller/tensorfusioncluster_controller.go
+++ b/internal/controller/tensorfusioncluster_controller.go
@@ -20,7 +20,6 @@ import (
 	"context"
 	"fmt"
 	"strconv"
-	"strings"
 	"sync"
 
 	"golang.org/x/time/rate"
@@ -304,7 +303,7 @@ func (r *TensorFusionClusterReconciler) reconcileGPUPool(ctx context.Context, tf
 			}
 			err = r.Create(ctx, gpupool)
 			anyPoolChanged = true
-			r.updateMetricsRecorder(ctx, gpupool)
+			r.MetricsRecorder.UpdateMetricsRecorder(gpupool, true)
 			if err != nil {
 				errors = append(errors, fmt.Errorf("failed to create GPUPool %s: %w", key, err))
 				continue
@@ -327,7 +326,7 @@ func (r *TensorFusionClusterReconciler) reconcileGPUPool(ctx context.Context, tf
 				}
 				anyPoolChanged = true
 			}
-			r.updateMetricsRecorder(ctx, existingPool)
+			r.MetricsRecorder.UpdateMetricsRecorder(existingPool, specChanged)
 		}
 	}
 
@@ -440,34 +439,3 @@ func (r *TensorFusionClusterReconciler) SetupWithManager(mgr ctrl.Manager, addLi
 		Owns(&tfv1.GPUPool{}).
 		Complete(r)
 }
-
-// Update metrics recorder's raw billing map
-func (r *TensorFusionClusterReconciler) updateMetricsRecorder(ctx context.Context, pool *tfv1.GPUPool) {
-	const dollarSign = "$"
-	log := log.FromContext(ctx)
-	if pool.Spec.QosConfig == nil {
-		log.Info("QosConfig is nil, skip updating metrics recorder", "pool", pool.Name)
-		return
-	}
-
-	qosConfig := pool.Spec.QosConfig
-	if _, ok := r.MetricsRecorder.WorkerUnitPriceMap[pool.Name]; !ok {
-		r.MetricsRecorder.WorkerUnitPriceMap[pool.Name] = make(map[string]metrics.RawBillingPricing)
-	}
-	pricingDetail := r.MetricsRecorder.WorkerUnitPriceMap[pool.Name]
-	for _, pricing := range qosConfig.Pricing {
-		tflopsPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerFP16TFlopsPerHour, dollarSign), 64)
-		vramPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerGBOfVRAMPerHour, dollarSign), 64)
-		limitOverRequestChargingRatio, _ := strconv.ParseFloat(pricing.LimitsOverRequestsChargingRatio, 64)
-
-		pricingDetail[string(pricing.Qos)] = metrics.RawBillingPricing{
-			TflopsPerSecond: tflopsPerHour / float64(3600),
-			VramPerSecond:   vramPerHour / float64(3600),
-
-			TflopsOverRequestPerSecond: tflopsPerHour / float64(3600) * limitOverRequestChargingRatio,
-			VramOverRequestPerSecond:   vramPerHour / float64(3600) * limitOverRequestChargingRatio,
-		}
-	}
-
-	log.V(5).Info("Updated metrics recorder", "pool", pool.Name, "pricing", pricingDetail)
-}
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index d2259a34..fb475377 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -192,6 +192,37 @@ func (s *GpuAllocator) Filter(
 	return filteredGPUs, filterDetails, nil
 }
 
+func (s *GpuAllocator) FilterWithPreempt(
+	req *tfv1.AllocRequest,
+	preemptAllocRequests []*tfv1.AllocRequest,
+) ([]*tfv1.GPU, []filter.FilterDetail, error) {
+	toFilterGPUs := []*tfv1.GPU{}
+	for _, preemptAllocRequest := range preemptAllocRequests {
+		for _, gpuName := range preemptAllocRequest.GPUNames {
+			gpu := s.gpuStore[types.NamespacedName{Name: gpuName}]
+			if gpu == nil {
+				return nil, nil, fmt.Errorf("gpu %s not found", gpuName)
+			}
+			gpuCopy := gpu.DeepCopy()
+			gpuCopy.Status.Available.Tflops.Add(preemptAllocRequest.Request.Tflops)
+			gpuCopy.Status.Available.Vram.Add(preemptAllocRequest.Request.Vram)
+			toFilterGPUs = append(toFilterGPUs, gpuCopy)
+		}
+	}
+
+	filterRegistry := s.filterRegistry.With(filter.NewResourceFilter(req.Request))
+	// Add GPU model filter if specified
+	if req.GPUModel != "" {
+		filterRegistry = filterRegistry.With(filter.NewGPUModelFilter(req.GPUModel))
+	}
+	// No need to check count and other filters since it's always in the same node during each preempt trial
+	filteredGPUs, filterDetails, err := filterRegistry.Apply(s.ctx, req.WorkloadNameNamespace, toFilterGPUs, false)
+	if err != nil {
+		return nil, nil, fmt.Errorf("apply filters: %w", err)
+	}
+	return filteredGPUs, filterDetails, nil
+}
+
 func (s *GpuAllocator) Select(req *tfv1.AllocRequest, filteredGPUs []*tfv1.GPU) ([]*tfv1.GPU, error) {
 	pool := &tfv1.GPUPool{}
 	if err := s.Get(s.ctx, client.ObjectKey{Name: req.PoolName}, pool); err != nil {
@@ -314,9 +345,8 @@ func (s *GpuAllocator) Alloc(req *tfv1.AllocRequest) ([]*tfv1.GPU, error) {
 
 func (s *GpuAllocator) CheckQuotaAndFilter(ctx context.Context, req *tfv1.AllocRequest, isSimulateSchedule bool) ([]*tfv1.GPU, []filter.FilterDetail, error) {
 	<-s.initializedCh
-	// Fast quota check (fail fast if quota insufficient)
 	if err := s.quotaStore.CheckQuotaAvailable(req.WorkloadNameNamespace.Namespace, req); err != nil {
-		return nil, nil, fmt.Errorf("quota check failed: %w", err)
+		return nil, nil, err
 	}
 
 	// Get GPUs from the pool using the in-memory store
@@ -935,7 +965,7 @@ func (s *GpuAllocator) handleGPUUpdate(ctx context.Context, gpu *tfv1.GPU) {
 		log.V(6).Info("Updated GPU in store (new entry)", "name", key.Name, "phase", gpu.Status.Phase)
 	}
 
-	s.addOrUpdateGPUMaps(gpu)
+	s.addOrUpdateGPUMaps(s.gpuStore[key])
 }
 
 func (s *GpuAllocator) addOrUpdateGPUMaps(gpuInMem *tfv1.GPU) {
@@ -1138,6 +1168,68 @@ func (s *GpuAllocator) ReconcileAllocationState() {
 	})
 }
 
+func (s *GpuAllocator) ReconcileAllocationStateForTesting() {
+	s.reconcileAllocationState()
+}
+
+func (s *GpuAllocator) CheckQuotaAndFilterSingleNodePreempt(
+	nodeName string, allocReq *tfv1.AllocRequest, toPreemptPods sets.Set[types.NamespacedName],
+) error {
+	<-s.initializedCh
+	// Only need to check total quotas when preempting
+	toPreemptUsage := &tfv1.GPUResourceUsage{
+		Requests: tfv1.Resource{
+			Tflops: resource.Quantity{},
+			Vram:   resource.Quantity{},
+		},
+		Limits: tfv1.Resource{
+			Tflops: resource.Quantity{},
+			Vram:   resource.Quantity{},
+		},
+	}
+	workers := s.nodeWorkerStore[nodeName]
+	preemptAllocRequests := make([]*tfv1.AllocRequest, 0, len(workers))
+	for workerName := range workers {
+		if !toPreemptPods.Has(workerName) {
+			continue
+		}
+		podUID := s.podNamespaceNsToPodUID[workerName.String()]
+		if podUID == "" {
+			continue
+		}
+		existingAllocation := s.uniqueAllocation[podUID]
+		if existingAllocation == nil {
+			continue
+		}
+		toPreemptUsage.Requests.Tflops.Add(existingAllocation.Request.Tflops)
+		toPreemptUsage.Requests.Vram.Add(existingAllocation.Request.Vram)
+		toPreemptUsage.Limits.Tflops.Add(existingAllocation.Limit.Tflops)
+		toPreemptUsage.Limits.Vram.Add(existingAllocation.Limit.Vram)
+		preemptAllocRequests = append(preemptAllocRequests, existingAllocation)
+	}
+
+	if log.FromContext(s.ctx).V(5).Enabled() {
+		log.FromContext(s.ctx).V(5).Info("Preempting node and check quotas", "nodeName", nodeName, "toPreemptUsage", toPreemptUsage)
+	}
+
+	if err := s.quotaStore.CheckTotalQuotaRelaxed(allocReq, toPreemptUsage); err != nil {
+		return fmt.Errorf("quota check failed during preempt: %w", err)
+	}
+
+	// Get GPUs from the pool using the in-memory store
+	if allocReq.PoolName == "" {
+		return fmt.Errorf("GPU Pool name is empty, can not find GPUs during preempt")
+	}
+	filteredGPUs, _, err := s.FilterWithPreempt(allocReq, preemptAllocRequests)
+	if err != nil {
+		return err
+	}
+	if len(filteredGPUs) < int(allocReq.Count) {
+		return fmt.Errorf("no gpus available or valid in pool %s after filtering during preempt", allocReq.PoolName)
+	}
+	return nil
+}
+
 func (s *GpuAllocator) reconcileAllocationState() {
 	ctx := s.ctx
 	logger := log.FromContext(ctx)
@@ -1314,6 +1406,11 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 		return &tfv1.AllocRequest{}, "gpu count annotation is too large", nil
 	}
 
+	qosLevel := tfv1.QoSLevel(pod.Annotations[constants.QoSLevelAnnotation])
+	if qosLevel == "" {
+		qosLevel = tfv1.QoSMedium
+	}
+
 	allocRequest := tfv1.AllocRequest{
 		PoolName: pod.Annotations[constants.GpuPoolKey],
 		Request:  gpuRequestResource,
@@ -1326,6 +1423,7 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 			Namespace: pod.Namespace,
 		},
 		PodMeta: pod.ObjectMeta,
+		QoS:     qosLevel,
 	}
 
 	// for already allocated workers, set the GPU device IDs for further scaling and retrieval
diff --git a/internal/metrics/recorder.go b/internal/metrics/recorder.go
index f1c14a39..7f47bab6 100644
--- a/internal/metrics/recorder.go
+++ b/internal/metrics/recorder.go
@@ -4,6 +4,7 @@ import (
 	"io"
 	"math"
 	"strconv"
+	"strings"
 	"sync"
 	"time"
 
@@ -19,15 +20,17 @@ import (
 // Worker level metrics, include worker resources/costs status
 // map updated in one reconcile loop in single goroutine, thus no RW lock needed
 var workerMetricsLock sync.RWMutex
-var workerMetricsMap = map[string]*WorkerResourceMetrics{}
+var workerMetricsMap = make(map[string]*WorkerResourceMetrics, 200)
 
 // Node level metrics, include node allocation/costs status
 var nodeMetricsLock sync.RWMutex
-var nodeMetricsMap = map[string]*NodeResourceMetrics{}
+var nodeMetricsMap = make(map[string]*NodeResourceMetrics, 100)
 
 // Pool level metrics, include pool allocation/costs status
 var poolMetricsLock sync.RWMutex
-var poolMetricsMap = map[string]*PoolResourceMetrics{}
+var poolMetricsMap = make(map[string]*PoolResourceMetrics, 4)
+
+var settingLock sync.RWMutex
 
 var log = ctrl.Log.WithName("metrics-recorder")
 
@@ -37,6 +40,9 @@ type MetricsRecorder struct {
 	// Raw billing result for node and workers
 	HourlyUnitPriceMap map[string]float64
 
+	// Pool level eviction protection price ratio map, key is pool name
+	PoolEvictionProtectionPriceRatioMap map[string]string
+
 	// Worker level unit price map, key is pool name, second level key is QoS level
 	WorkerUnitPriceMap map[string]map[string]RawBillingPricing
 }
@@ -80,14 +86,16 @@ func SetWorkerMetricsByWorkload(pod *corev1.Pod) {
 	// Initialize metrics
 	if _, ok := workerMetricsMap[pod.Name]; !ok {
 		workerMetricsMap[pod.Name] = &WorkerResourceMetrics{
-			WorkerName:     pod.Name,
-			WorkloadName:   pod.Labels[constants.WorkloadKey],
-			PoolName:       pod.Annotations[constants.GpuPoolKey],
-			Namespace:      pod.Namespace,
-			QoS:            pod.Annotations[constants.QoSLevelAnnotation],
-			podLabels:      pod.Labels,
-			RawCost:        0,
-			LastRecordTime: time.Now(),
+			WorkerName:         pod.Name,
+			WorkloadName:       pod.Labels[constants.WorkloadKey],
+			PoolName:           pod.Annotations[constants.GpuPoolKey],
+			Namespace:          pod.Namespace,
+			QoS:                pod.Annotations[constants.QoSLevelAnnotation],
+			podLabels:          pod.Labels,
+			RawCost:            0,
+			LastRecordTime:     time.Now(),
+			creationTime:       pod.CreationTimestamp.Time,
+			evictionProtection: pod.Annotations[constants.EvictionProtectionAnnotation],
 		}
 	}
 
@@ -287,13 +295,17 @@ func (mr *MetricsRecorder) Start() {
 	// Clean up worker metrics that have been deleted
 	go func() {
 		for {
-			time.Sleep(5 * time.Minute)
+			time.Sleep(1 * time.Minute)
 			workerMetricsLock.Lock()
-			for _, metrics := range workerMetricsMap {
+			var keysToDelete []string
+			for key, metrics := range workerMetricsMap {
 				if metrics.deletionTimestamp != nil && !metrics.deletionTimestamp.IsZero() {
-					delete(workerMetricsMap, metrics.WorkerName)
+					keysToDelete = append(keysToDelete, key)
 				}
 			}
+			for _, key := range keysToDelete {
+				delete(workerMetricsMap, key)
+			}
 			workerMetricsLock.Unlock()
 		}
 	}()
@@ -306,13 +318,12 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 
 	now := time.Now()
 	enc := NewEncoder(config.GetGlobalConfig().MetricsFormat)
-	workerMetricsLock.RLock()
+	workerMetricsLock.Lock()
 
 	activeWorkerCnt := 0
 	activeWorkerAndNodeByPool := map[string]*ActiveNodeAndWorker{}
 
 	for _, metrics := range workerMetricsMap {
-
 		if metrics.deletionTimestamp != nil && !metrics.deletionTimestamp.IsZero() {
 			metrics.RawCost = mr.getWorkerRawCost(metrics, metrics.deletionTimestamp.Sub(metrics.LastRecordTime))
 		} else {
@@ -333,7 +344,9 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 				nodeCnt:   0,
 			}
 		}
-		activeWorkerAndNodeByPool[metrics.PoolName].workerCnt++
+		if metrics.deletionTimestamp == nil || metrics.deletionTimestamp.IsZero() {
+			activeWorkerAndNodeByPool[metrics.PoolName].workerCnt++
+		}
 
 		enc.StartLine("tf_worker_resources")
 		enc.AddTag("namespace", metrics.Namespace)
@@ -362,7 +375,7 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 
 		enc.EndLine(now)
 	}
-	workerMetricsLock.RUnlock()
+	workerMetricsLock.Unlock()
 
 	nodeMetricsLock.RLock()
 
@@ -439,7 +452,51 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) {
 	log.Info("metrics and raw billing recorded:", "workerCount", activeWorkerCnt, "nodeCount", len(nodeMetricsMap))
 }
 
+// Update metrics recorder's raw billing map
+func (r *MetricsRecorder) UpdateMetricsRecorder(pool *tfv1.GPUPool, specChanged bool) {
+	const dollarSign = "$"
+	settingLock.Lock()
+	defer settingLock.Unlock()
+	if pool.Spec.QosConfig == nil {
+		log.Info("QosConfig is nil, skip updating metrics recorder", "pool", pool.Name)
+		return
+	}
+
+	qosConfig := pool.Spec.QosConfig
+	if _, ok := r.WorkerUnitPriceMap[pool.Name]; !ok {
+		r.WorkerUnitPriceMap[pool.Name] = make(map[string]RawBillingPricing)
+	}
+
+	if r.PoolEvictionProtectionPriceRatioMap == nil {
+		r.PoolEvictionProtectionPriceRatioMap = make(map[string]string, 4)
+	}
+	r.PoolEvictionProtectionPriceRatioMap[pool.Name] = qosConfig.EvictionProtectionPriceRatio
+
+	pricingDetail := r.WorkerUnitPriceMap[pool.Name]
+	if !specChanged && len(pricingDetail) == 0 {
+		return
+	}
+	// Pricing potentially changed
+	for _, pricing := range qosConfig.Pricing {
+		tflopsPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerFP16TFlopsPerHour, dollarSign), 64)
+		vramPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerGBOfVRAMPerHour, dollarSign), 64)
+		limitOverRequestChargingRatio, _ := strconv.ParseFloat(pricing.LimitsOverRequestsChargingRatio, 64)
+
+		pricingDetail[string(pricing.Qos)] = RawBillingPricing{
+			TflopsPerSecond: tflopsPerHour / float64(3600),
+			VramPerSecond:   vramPerHour / float64(3600),
+
+			TflopsOverRequestPerSecond: tflopsPerHour / float64(3600) * limitOverRequestChargingRatio,
+			VramOverRequestPerSecond:   vramPerHour / float64(3600) * limitOverRequestChargingRatio,
+		}
+	}
+
+	log.V(5).Info("Updated metrics recorder", "pool", pool.Name, "pricing", pricingDetail)
+}
+
 func (mr *MetricsRecorder) getWorkerRawCost(metrics *WorkerResourceMetrics, duration time.Duration) float64 {
+	settingLock.RLock()
+	defer settingLock.RUnlock()
 	qosPricing, ok := mr.WorkerUnitPriceMap[metrics.PoolName]
 	// The qos pricing for this pool not set
 	if !ok {
@@ -464,7 +521,37 @@ func (mr *MetricsRecorder) getWorkerRawCost(metrics *WorkerResourceMetrics, dura
 	rawCostVRAMLimitOverRequest := (metrics.VramBytesLimit - metrics.VramBytesRequest) * pricing.VramOverRequestPerSecond / constants.GiBToBytes
 	rawCostPerVRAM := pricing.VramPerSecond * metrics.VramBytesRequest / constants.GiBToBytes
 
-	return (rawCostPerTflops + rawCostPerVRAM + rawCostTflopsLimitOverRequest + rawCostVRAMLimitOverRequest) * duration.Seconds() * float64(metrics.GPUCount)
+	baseCost := (rawCostPerTflops + rawCostPerVRAM + rawCostTflopsLimitOverRequest + rawCostVRAMLimitOverRequest) * duration.Seconds() * float64(metrics.GPUCount)
+
+	// Apply eviction protection price ratio if the pod is under protection and QoS is not critical
+	if metrics.evictionProtection != "" && qosLevel != constants.QoSLevelCritical {
+		if isUnderProtection := mr.isUnderEvictionProtection(metrics); isUnderProtection {
+			protectionPriceRatio := mr.PoolEvictionProtectionPriceRatioMap[metrics.PoolName]
+			protectionPriceRatioFloat, _ := strconv.ParseFloat(protectionPriceRatio, 64)
+			if protectionPriceRatioFloat < 1 {
+				protectionPriceRatioFloat = constants.DefaultEvictionProtectionPriceRatio
+			}
+			baseCost *= protectionPriceRatioFloat
+		}
+	}
+
+	return baseCost
+}
+
+// isUnderEvictionProtection checks if a worker is under eviction protection
+func (mr *MetricsRecorder) isUnderEvictionProtection(metrics *WorkerResourceMetrics) bool {
+	if metrics.evictionProtection == "" {
+		return false
+	}
+
+	// Parse protection duration (1h, 5h, 24h, etc.)
+	duration, err := time.ParseDuration(metrics.evictionProtection)
+	if err != nil {
+		return false
+	}
+
+	protectionEndTime := metrics.creationTime.Add(duration)
+	return time.Now().Before(protectionEndTime)
 }
 
 // unit price data comes from global config map, and multi-GPU instance should normalized with per GPU pricing, e.g. 8xA100 p4d.24xlarge price should divide by 8
diff --git a/internal/metrics/types.go b/internal/metrics/types.go
index ff3449cb..df06f169 100644
--- a/internal/metrics/types.go
+++ b/internal/metrics/types.go
@@ -51,6 +51,10 @@ type WorkerResourceMetrics struct {
 	// For more accurate metrics, should record the deletion timestamp to calculate duration for the last metrics
 	deletionTimestamp *time.Time
 
+	// Fields for eviction protection tracking - private, not stored in TSDB
+	creationTime       time.Time
+	evictionProtection string
+
 	podLabels map[string]string
 }
 
diff --git a/internal/quota/quota_store.go b/internal/quota/quota_store.go
index 4edc7445..d9450236 100644
--- a/internal/quota/quota_store.go
+++ b/internal/quota/quota_store.go
@@ -79,7 +79,16 @@ func (qs *QuotaStore) CheckQuotaAvailable(namespace string, req *tfv1.AllocReque
 	if err := qs.checkSingleQuotas(entry, req); err != nil {
 		return err
 	}
-	return qs.checkTotalQuotas(entry, req)
+	return qs.checkTotalQuotas(entry, req, nil)
+}
+
+func (qs *QuotaStore) CheckTotalQuotaRelaxed(req *tfv1.AllocRequest, toReleaseResource *tfv1.GPUResourceUsage) error {
+	entry, exists := qs.QuotaStore[req.WorkloadNameNamespace.Namespace]
+	if !exists {
+		// No quota defined for this namespace, allow allocation
+		return nil
+	}
+	return qs.checkTotalQuotas(entry, req, toReleaseResource)
 }
 
 func (qs *QuotaStore) AdjustQuota(namespace string, reqDelta tfv1.Resource, limitDelta tfv1.Resource) {
@@ -103,41 +112,51 @@ func (qs *QuotaStore) checkSingleQuotas(entry *QuotaStoreEntry, req *tfv1.AllocR
 	if single.MaxLimits != nil {
 		if !single.MaxLimits.Tflops.IsZero() && req.Limit.Tflops.Cmp(single.MaxLimits.Tflops) > 0 {
 			return &QuotaExceededError{
-				Namespace: entry.Quota.Namespace,
-				Resource:  MaxTFlopsLimitResource,
-				Requested: req.Limit.Tflops,
-				Limit:     single.MaxLimits.Tflops,
+				Namespace:    entry.Quota.Namespace,
+				Resource:     MaxTFlopsLimitResource,
+				Requested:    req.Limit.Tflops,
+				Limit:        single.MaxLimits.Tflops,
+				Unresolvable: true,
 			}
 		}
 
 		// Check single VRAM limit (per GPU)
 		if !single.MaxLimits.Vram.IsZero() && req.Request.Vram.Cmp(single.MaxLimits.Vram) > 0 {
 			return &QuotaExceededError{
-				Namespace: entry.Quota.Namespace,
-				Resource:  MaxVRAMLimitResource,
-				Requested: req.Request.Vram,
-				Limit:     single.MaxLimits.Vram,
+				Namespace:    entry.Quota.Namespace,
+				Resource:     MaxVRAMLimitResource,
+				Requested:    req.Request.Vram,
+				Limit:        single.MaxLimits.Vram,
+				Unresolvable: true,
 			}
 		}
 
 		// Check single GPU count limit (per worker)
 		if single.MaxGPUCount != nil && int32(req.Count) > *single.MaxGPUCount {
 			return &QuotaExceededError{
-				Namespace: entry.Quota.Namespace,
-				Resource:  MaxGPULimitResource,
-				Requested: *resource.NewQuantity(int64(req.Count), resource.DecimalSI),
-				Limit:     *resource.NewQuantity(int64(*single.MaxGPUCount), resource.DecimalSI),
+				Namespace:    entry.Quota.Namespace,
+				Resource:     MaxGPULimitResource,
+				Requested:    *resource.NewQuantity(int64(req.Count), resource.DecimalSI),
+				Limit:        *resource.NewQuantity(int64(*single.MaxGPUCount), resource.DecimalSI),
+				Unresolvable: true,
 			}
 		}
 	}
 	return nil
 }
 
-func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRequest) error {
+func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRequest, toReleaseResource *tfv1.GPUResourceUsage) error {
 	quotaNs := entry.Quota.Namespace
+
+	// Check total requests
 	if entry.Quota.Spec.Total.Requests != nil {
 		total := entry.Quota.Spec.Total.Requests
-		current := entry.CurrentUsage.Requests
+		current := *entry.CurrentUsage.Requests.DeepCopy()
+
+		if toReleaseResource != nil {
+			current.Tflops.Sub(toReleaseResource.Requests.Tflops)
+			current.Vram.Sub(toReleaseResource.Requests.Vram)
+		}
 		err := checkTotalExceeded(req, total, current, quotaNs, true)
 		if err != nil {
 			return err
@@ -147,13 +166,24 @@ func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRe
 	// Check total limits
 	if entry.Quota.Spec.Total.Limits != nil {
 		total := entry.Quota.Spec.Total.Limits
-		usage := entry.CurrentUsage.Limits
+		usage := *entry.CurrentUsage.Limits.DeepCopy()
+
+		if toReleaseResource != nil {
+			usage.Tflops.Sub(toReleaseResource.Limits.Tflops)
+			usage.Vram.Sub(toReleaseResource.Limits.Vram)
+		}
 		err := checkTotalExceeded(req, total, usage, quotaNs, false)
 		if err != nil {
 			return err
 		}
 	}
 
+	// If it's preempt case, skip checking total workers since it's
+	// replacing existing workers rather than creating new ones
+	if toReleaseResource != nil {
+		return nil
+	}
+
 	// Check total workers, each allocation will create one worker instance
 	if entry.Quota.Spec.Total.MaxWorkers != nil {
 		if entry.CurrentUsage.Workers >= *entry.Quota.Spec.Total.MaxWorkers {
@@ -451,10 +481,11 @@ func (qs *QuotaStore) SyncQuotasToK8s(ctx context.Context) {
 
 // QuotaExceededError represents a quota exceeded error with detailed information
 type QuotaExceededError struct {
-	Namespace string
-	Resource  string
-	Requested resource.Quantity
-	Limit     resource.Quantity
+	Namespace    string
+	Resource     string
+	Requested    resource.Quantity
+	Limit        resource.Quantity
+	Unresolvable bool
 }
 
 func (e *QuotaExceededError) Error() string {
diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
index 861b95eb..8dbd16cc 100644
--- a/internal/scheduler/gpuresources/gpuresources.go
+++ b/internal/scheduler/gpuresources/gpuresources.go
@@ -6,12 +6,14 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
+	"github.com/NexusGPU/tensor-fusion/internal/quota"
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/samber/lo"
 	v1 "k8s.io/api/core/v1"
@@ -57,6 +59,12 @@ type GPUSchedulingStateData struct {
 	// In Reserve stage, bind GPUs to pod, update allocator cache
 	// In PostBind stage, fetch final GPUs call Pod patch API to update annotation
 	FinalGPUs []string
+
+	// Preempt pods
+	PreemptPods sync.Map
+
+	// IsPreemption
+	IsPreemption bool
 }
 
 func (p *GPUSchedulingStateData) Clone() fwk.StateData {
@@ -135,7 +143,16 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 		s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeWarning, "GPUQuotaOrCapacityNotEnough",
 			"check quota and filter", "TensorFusion schedule failed, no enough resource or quotas: "+err.Error())
 		s.logger.Error(err, "failed to check quota and filter", "pod", pod.Name)
-		return nil, fwk.NewStatus(fwk.Unschedulable, err.Error())
+
+		if quotaErr, ok := err.(*quota.QuotaExceededError); ok {
+			if quotaErr.Unresolvable {
+				return nil, fwk.NewStatus(fwk.UnschedulableAndUnresolvable, quotaErr.Error())
+			} else {
+				return nil, fwk.NewStatus(fwk.Unschedulable, err.Error())
+			}
+		} else {
+			return nil, fwk.NewStatus(fwk.Unschedulable, err.Error())
+		}
 	}
 
 	validNodesValidGPUs := lo.GroupBy(filteredGPUs, func(gpu *tfv1.GPU) string {
@@ -143,10 +160,14 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 	})
 	validNodeNonMatchingGPUs := make(map[string][]*tfv1.GPU, len(validNodesValidGPUs))
 
-	nodeNames := sets.New[string]()
+	cnt := 0
+	allGPUNodeNames := sets.New[string]()
 	nodeGPUs := s.allocator.GetNodeGpuStore()
+	for k := range nodeGPUs {
+		allGPUNodeNames.Insert(k)
+	}
 	for k, matchedGPUs := range validNodesValidGPUs {
-		nodeNames.Insert(k)
+		cnt++
 
 		// get all GPUs on this node
 		allGPUs := nodeGPUs[k]
@@ -180,7 +201,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 			}
 		}
 	}
-	s.logger.Info("filtered valid node GPUs", "nodes count", nodeNames.Len(), "pod", pod.Name)
+	s.logger.Info("filtered valid node GPUs", "nodes count", cnt, "pod", pod.Name)
 
 	// assign score based on different strategies
 	score := s.allocator.Score(ctx, s.cfg, allocRequest, validNodesValidGPUs)
@@ -189,7 +210,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 	notMatchingGPUScore := s.allocator.Score(ctx, s.cfg, allocRequest, validNodeNonMatchingGPUs)
 
 	s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeNormal, "PreScheduleDone", "pre filter for TensorFusion workload",
-		"TensorFusion pre schedule done, valid GPU node count: "+strconv.Itoa(nodeNames.Len()))
+		"TensorFusion pre schedule done, valid GPU node count: "+strconv.Itoa(cnt))
 
 	if s.logger.V(6).Enabled() {
 		jsonStr, _ := json.Marshal(validNodesValidGPUs)
@@ -202,15 +223,66 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 		ValidNodeGPUScore:            score,
 		ValidNodeNotMatchingGPUScore: notMatchingGPUScore,
 		FinalGPUs:                    []string{},
+		PreemptPods:                  sync.Map{},
+		IsPreemption:                 false,
 	})
 
 	return &framework.PreFilterResult{
-		NodeNames: nodeNames,
+		NodeNames: allGPUNodeNames,
 	}, fwk.NewStatus(fwk.Success)
 }
 
 func (s *GPUFit) PreFilterExtensions() framework.PreFilterExtensions {
-	return nil
+	return s
+}
+
+func (s *GPUFit) AddPod(ctx context.Context, state fwk.CycleState, pod *v1.Pod, podInfoToAdd fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status {
+	stateData, err := state.Read(CycleStateGPUSchedulingResult)
+	if err != nil {
+		return fwk.NewStatus(fwk.Error, err.Error())
+	}
+	stateDataParsed := stateData.(*GPUSchedulingStateData)
+	if pods, ok := stateDataParsed.PreemptPods.Load(nodeInfo.Node().Name); ok {
+		podsParsed := pods.(sets.Set[types.NamespacedName])
+
+		nameNs := types.NamespacedName{
+			Namespace: podInfoToAdd.GetPod().Namespace,
+			Name:      podInfoToAdd.GetPod().Name,
+		}
+		if podsParsed.Has(nameNs) {
+			podsParsed.Delete(nameNs)
+		}
+	}
+	return fwk.NewStatus(fwk.Success, "")
+}
+
+func (s *GPUFit) RemovePod(ctx context.Context, state fwk.CycleState, pod *v1.Pod, podInfoToRemove fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status {
+	stateData, err := state.Read(CycleStateGPUSchedulingResult)
+	if err != nil {
+		if fwk.ErrNotFound == err {
+			stateData = &GPUSchedulingStateData{
+				PreemptPods: sync.Map{},
+			}
+			state.Write(CycleStateGPUSchedulingResult, stateData)
+		} else {
+			return fwk.NewStatus(fwk.Error, err.Error())
+		}
+	}
+	stateDataParsed := stateData.(*GPUSchedulingStateData)
+	stateDataParsed.IsPreemption = true
+	if pods, ok := stateDataParsed.PreemptPods.Load(nodeInfo.Node().Name); ok {
+		parsedPods := pods.(sets.Set[types.NamespacedName])
+		parsedPods.Insert(types.NamespacedName{
+			Namespace: podInfoToRemove.GetPod().Namespace,
+			Name:      podInfoToRemove.GetPod().Name,
+		})
+	} else {
+		stateDataParsed.PreemptPods.Store(nodeInfo.Node().Name, sets.New(types.NamespacedName{
+			Namespace: podInfoToRemove.GetPod().Namespace,
+			Name:      podInfoToRemove.GetPod().Name,
+		}))
+	}
+	return fwk.NewStatus(fwk.Success, "")
 }
 
 func (s *GPUFit) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
@@ -222,6 +294,28 @@ func (s *GPUFit) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod,
 	if err != nil {
 		return fwk.NewStatus(fwk.Error, err.Error())
 	}
+
+	// k8s will RemoveAll Pods, and run Filter for high priority pod,
+	// then Scheduler framework will reprieve victims one by one until filter returns unschedulable
+	if filterResult.(*GPUSchedulingStateData).IsPreemption {
+		allocRequest, err := state.Read(CycleStateAllocateRequest)
+		allocRequestParsed := allocRequest.(*tfv1.AllocRequest)
+		if err != nil {
+			return fwk.NewStatus(fwk.Error, err.Error())
+		}
+		podsToPreempt, ok := filterResult.(*GPUSchedulingStateData).PreemptPods.Load(nodeInfo.Node().Name)
+		if !ok {
+			return fwk.NewStatus(fwk.Unschedulable, "no pods to preempt")
+		}
+		podsToPreemptParsed := podsToPreempt.(sets.Set[types.NamespacedName])
+		err = s.allocator.CheckQuotaAndFilterSingleNodePreempt(
+			nodeInfo.Node().Name, allocRequestParsed, podsToPreemptParsed)
+		if err != nil {
+			return fwk.NewStatus(fwk.Unschedulable, err.Error())
+		}
+		return fwk.NewStatus(fwk.Success, "")
+	}
+
 	nodeName := nodeInfo.Node().Name
 	if _, ok := filterResult.(*GPUSchedulingStateData).NodeGPUs[nodeName]; !ok {
 		return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough")
diff --git a/internal/scheduler/gpuresources/gpuresources_test.go b/internal/scheduler/gpuresources/gpuresources_test.go
index 71af8c0f..5fa25150 100644
--- a/internal/scheduler/gpuresources/gpuresources_test.go
+++ b/internal/scheduler/gpuresources/gpuresources_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/samber/lo"
 	"github.com/stretchr/testify/suite"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
@@ -352,7 +353,7 @@ func (s *GPUResourcesSuite) TestPreFilter() {
 			s.Equal(tt.expectedStatus, status.Code(), status.Message())
 			if tt.expectedStatus == fwk.Success {
 				s.Require().NotNil(res)
-				nodes := sort.StringSlice(res.NodeNames.UnsortedList())
+				nodes := sort.StringSlice(getPreFilterResult(state))
 				nodes.Sort()
 				s.Equal(tt.expectedNodes, strings.Join(nodes, " "))
 			}
@@ -623,7 +624,7 @@ func (s *GPUResourcesSuite) TestScoreExtensions() {
 
 func (s *GPUResourcesSuite) TestPreFilterExtensions() {
 	log.FromContext(s.ctx).Info("Running TestPreFilterExtensions")
-	s.Nil(s.plugin.PreFilterExtensions())
+	s.NotNil(s.plugin.PreFilterExtensions())
 }
 
 func (s *GPUResourcesSuite) TestName() {
@@ -728,3 +729,11 @@ func (s *GPUResourcesSuite) TestScore_ErrorHandling() {
 	_, status = s.plugin.Score(s.ctx, state, pod, nodeInfo)
 	s.Equal(fwk.Unschedulable, status.Code())
 }
+
+func getPreFilterResult(state *framework.CycleState) []string {
+	data, err := state.Read(CycleStateGPUSchedulingResult)
+	if err != nil {
+		return nil
+	}
+	return lo.Keys(data.(*GPUSchedulingStateData).NodeGPUs)
+}
diff --git a/internal/utils/compose.go b/internal/utils/compose.go
index 2a62af0b..8802c6ce 100644
--- a/internal/utils/compose.go
+++ b/internal/utils/compose.go
@@ -350,6 +350,7 @@ func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, poo
 	// Hypervisor needs to read /proc to map pod with processID
 	spec.HostPID = true
 	spec.TerminationGracePeriodSeconds = constants.GracefulPeriodSeconds
+	spec.PriorityClassName = constants.NodeCriticalPriorityClassName
 
 	enableVector := pool.Spec.ComponentConfig.Hypervisor != nil && pool.Spec.ComponentConfig.Hypervisor.EnableVector
 
diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go
index 8c5aca06..6ea04125 100644
--- a/internal/webhook/v1/pod_webhook.go
+++ b/internal/webhook/v1/pod_webhook.go
@@ -164,6 +164,12 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 	utils.AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod, tfInfo)
 	utils.AddTFDefaultClientConfBeforePatch(ctx, pod, pool, tfInfo, containerIndices)
 
+	// Add priorityClass if contains higher QoS level and Pod priority class not specified
+	if pod.Spec.PriorityClassName == "" &&
+		(tfInfo.Profile.Qos == tfv1.QoSHigh || tfInfo.Profile.Qos == tfv1.QoSCritical) {
+		pod.Spec.PriorityClassName = constants.TensorFusionSystemName + string(tfInfo.Profile.Qos)
+	}
+
 	// Inject initContainer and env variables
 	patches, err := m.patchTFClient(
 		pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices,
@@ -517,16 +523,17 @@ func (m *TensorFusionPodMutator) assignClusterHostPortFromLeader(pod *corev1.Pod
 }
 
 func calculateQoSLevel(profile *tfv1.WorkloadProfileSpec, pool *tfv1.GPUPool) tfv1.QoSLevel {
-	sameReqLimits := profile.Resources.Limits.Tflops.Cmp(profile.Resources.Requests.Tflops) == 0 &&
-		profile.Resources.Limits.Vram.Cmp(profile.Resources.Requests.Vram) == 0
-
-	// set to critical if req == limits, same logic as Kubernetes QoS
-	if sameReqLimits {
-		return constants.QoSLevelCritical
-	}
-
 	// when not set, assign default QoS
 	if profile.Qos == "" {
+		sameReqLimits := profile.Resources.Limits.Tflops.Cmp(profile.Resources.Requests.Tflops) == 0 &&
+			profile.Resources.Limits.Vram.Cmp(profile.Resources.Requests.Vram) == 0
+
+		// set to high if req == limits, same logic as Kubernetes QoS
+		// critical QoS can preempt other pods, have to be set manually
+		if sameReqLimits {
+			return constants.QoSLevelHigh
+		}
+
 		if pool.Spec.QosConfig == nil || pool.Spec.QosConfig.DefaultQoS == "" {
 			return constants.QoSLevelMedium
 		}
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
index 51da5358..2541b08b 100644
--- a/internal/webhook/v1/tf_parser.go
+++ b/internal/webhook/v1/tf_parser.go
@@ -258,7 +258,5 @@ func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile)
 	workloadProfile.Spec.Resources.Requests.Vram = resource.Vram
 	workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops
 	workloadProfile.Spec.Resources.Limits.Vram = resource.Vram
-	workloadProfile.Spec.Qos = tfv1.QoSCritical
-
 	return nil
 }
diff --git a/patches/scheduler-pdb-1.patch b/patches/scheduler-pdb-1.patch
index ae9b966e..3a35e841 100644
--- a/patches/scheduler-pdb-1.patch
+++ b/patches/scheduler-pdb-1.patch
@@ -1,16 +1,38 @@
---- ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-08-06 17:45:27
-+++ ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-08-06 17:45:19
-@@ -20,7 +20,9 @@
+--- ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-09-15 17:45:27
++++ ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go	2025-09-15 17:45:19
+@@ -20,7 +20,10 @@
  	"context"
  	"fmt"
  	"math/rand"
 +	"os"
  	"sort"
 +	"strconv"
++	"time"
  
  	v1 "k8s.io/api/core/v1"
  	policy "k8s.io/api/policy/v1"
-@@ -364,5 +366,13 @@
+@@ -119,6 +122,20 @@
+ 	// Default behavior: No additional filtering, beyond the internal requirement that the victim pod
+ 	// have lower priority than the preemptor pod.
+ 	pl.IsEligiblePod = func(nodeInfo fwk.NodeInfo, victim fwk.PodInfo, preemptor *v1.Pod) bool {
++		victimAnnotation := victim.GetPod().Annotations
++		if victimAnnotation == nil {
++			return true
++		}
++		if protectionPeriod, ok := victimAnnotation["tensor-fusion.ai/eviction-protection"]; ok {
++			duration, err := time.ParseDuration(protectionPeriod)
++			if err != nil {
++				return true
++			}
++			// Still in protection period, not allow to preempt
++			if time.Now().Before(victim.GetPod().CreationTimestamp.Add(duration)) {
++				return false
++			}
++		}
+ 		return true
+ 	}
+ 
+@@ -430,5 +447,13 @@
  }
  
  func getPDBLister(informerFactory informers.SharedInformerFactory) policylisters.PodDisruptionBudgetLister {
@@ -24,3 +46,4 @@
 +	}
  	return informerFactory.Policy().V1().PodDisruptionBudgets().Lister()
  }
+ 
\ No newline at end of file
diff --git a/test/sched/gpufit_bench_test.go b/test/sched/gpufit_bench_test.go
index 3acb53d4..147d31e8 100644
--- a/test/sched/gpufit_bench_test.go
+++ b/test/sched/gpufit_bench_test.go
@@ -20,7 +20,6 @@ func BenchmarkGPUFitPlugin(b *testing.B) {
 		NumNodes:  500,
 		NumGPUs:   3000,
 		NumPods:   10000,
-		BatchSize: 1,
 		PoolName:  "test-pool",
 		Namespace: "test-ns",
 		Timeout:   5 * time.Minute,
diff --git a/test/sched/preemption_test.go b/test/sched/preemption_test.go
new file mode 100644
index 00000000..1715d61b
--- /dev/null
+++ b/test/sched/preemption_test.go
@@ -0,0 +1,299 @@
+package sched
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/NexusGPU/tensor-fusion/cmd/sched"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources"
+	gpuTopoPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gputopo"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/zap/zapcore"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/klog/v2"
+	"k8s.io/kubernetes/cmd/kube-scheduler/app"
+	"k8s.io/kubernetes/pkg/scheduler"
+	st "k8s.io/kubernetes/pkg/scheduler/testing"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/envtest"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+)
+
+// PreemptionTestSuite holds common test setup for preemption tests
+type PreemptionTestSuite struct {
+	ctx            context.Context
+	cancel         context.CancelFunc
+	k8sClient      client.Client
+	scheduler      *scheduler.Scheduler
+	fixture        *BenchmarkFixture
+	testEnv        *envtest.Environment
+	kubeconfigPath string
+}
+
+// SetupSuite initializes the test environment for preemption tests
+func (pts *PreemptionTestSuite) SetupSuite(t *testing.T) {
+	klog.SetLogger(zap.New(zap.WriteTo(discardWriter{}), zap.UseDevMode(false), zap.Level(zapcore.InfoLevel)))
+
+	// Setup test environment
+	ver, cfg, err := setupKubernetes()
+	require.NoError(t, err)
+	pts.testEnv = testEnv
+
+	kubeconfigPath, err := writeKubeconfigToTempFileAndSetEnv(cfg)
+	require.NoError(t, err)
+	pts.kubeconfigPath = kubeconfigPath
+
+	k8sClient, err := client.New(cfg, client.Options{Scheme: scheme.Scheme})
+	require.NoError(t, err)
+	pts.k8sClient = k8sClient
+
+	// Configure test with limited resources for preemption scenarios
+	benchConfig := BenchmarkConfig{
+		NumNodes:  2,
+		NumGPUs:   4,
+		PoolName:  "preemption-test-pool",
+		Namespace: "preemption-test-ns",
+		Timeout:   1 * time.Minute,
+	}
+
+	mockBench := &testing.B{}
+	fixture := NewBenchmarkFixture(mockBench, benchConfig, k8sClient, true)
+	pts.fixture = fixture
+
+	utils.SetProgressiveMigration(false)
+
+	gpuResourceFitOpt := app.WithPlugin(
+		gpuResourceFitPlugin.Name,
+		gpuResourceFitPlugin.NewWithDeps(fixture.allocator, fixture.client),
+	)
+	gpuTopoOpt := app.WithPlugin(
+		gpuTopoPlugin.Name,
+		gpuTopoPlugin.NewWithDeps(fixture.allocator, fixture.client),
+	)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	pts.ctx = ctx
+	pts.cancel = cancel
+
+	cc, scheduler, err := sched.SetupScheduler(ctx, nil,
+		"../../config/samples/scheduler-config.yaml", true, ver, gpuResourceFitOpt, gpuTopoOpt)
+	require.NoError(t, err)
+	pts.scheduler = scheduler
+	scheduler.SchedulingQueue.Run(klog.FromContext(ctx))
+
+	// Start scheduler components
+	cc.EventBroadcaster.StartRecordingToSink(ctx.Done())
+	cc.InformerFactory.Start(ctx.Done())
+	cc.InformerFactory.WaitForCacheSync(ctx.Done())
+	require.NoError(t, scheduler.WaitForHandlersSync(ctx))
+}
+
+// TearDownSuite cleans up the test environment
+func (pts *PreemptionTestSuite) TearDownSuite(t *testing.T) {
+	if pts.cancel != nil {
+		pts.cancel()
+	}
+	if pts.fixture != nil {
+		pts.fixture.Close()
+	}
+	if pts.kubeconfigPath != "" {
+		require.NoError(t, cleanupKubeconfigTempFile(pts.kubeconfigPath))
+	}
+	if pts.testEnv != nil {
+		require.NoError(t, pts.testEnv.Stop())
+	}
+}
+
+// discardWriter implements io.Writer to discard log output during tests
+type discardWriter struct{}
+
+func (discardWriter) Write(p []byte) (n int, err error) {
+	return len(p), nil
+}
+
+// TestPreemption tests comprehensive preemption scenarios
+func TestPreemption(t *testing.T) {
+	suite := &PreemptionTestSuite{}
+	suite.SetupSuite(t)
+	defer suite.TearDownSuite(t)
+	testGPUResourcePreemption(t, suite)
+}
+
+// TestPreemptionEvictProtection tests comprehensive preemption scenarios
+func TestPreemptionEvictProtection(t *testing.T) {
+	suite := &PreemptionTestSuite{}
+	suite.SetupSuite(t)
+	defer suite.TearDownSuite(t)
+	testGPUResourceEvictProtection(t, suite)
+}
+
+// testGPUResourcePreemption tests GPU shortage detection logic
+func testGPUResourcePreemption(t *testing.T, suite *PreemptionTestSuite) {
+	// Mock cluster resources
+	// {"2250", "141Gi"}, // Simulate B200
+	// {"989", "80Gi"},   // Simulate H100
+	// {"450", "48Gi"},   // Simulate L40s
+	// {"312", "40Gi"},   // Simulate A100
+
+	// Create pods that will exhaust resources
+	toBeVictimPods := createPreemptionTestPodsWithQoS("victim", constants.QoSLevelMedium, 7+3+1+1, "300", "1Gi")
+
+	for _, pod := range toBeVictimPods {
+		require.NoError(t, suite.k8sClient.Create(suite.ctx, pod))
+		defer func() {
+			_ = suite.k8sClient.Delete(suite.ctx, pod)
+		}()
+	}
+
+	// Try scheduling all pending pods
+	for range 12 {
+		suite.scheduler.ScheduleOne(suite.ctx)
+	}
+
+	// schedule high priority pod
+	highPriorityPod := createPreemptionTestPodsWithQoS("high-priority", constants.QoSLevelHigh, 1, "300", "1Gi")[0]
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, highPriorityPod))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, highPriorityPod)
+	}()
+
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	// schedule critical priority pod
+	criticalPriorityPod := createPreemptionTestPodsWithQoS(
+		"critical-priority", constants.QoSLevelCritical, 1, "300", "1Gi")[0]
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, criticalPriorityPod))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, criticalPriorityPod)
+	}()
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	// Preemption should be triggered and victims deleted, wait informer sync
+	time.Sleep(1 * time.Second)
+
+	podList := &v1.PodList{}
+	err := suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	scheduledNodeMap := make(map[string]string)
+	for _, pod := range podList.Items {
+		scheduledNodeMap[pod.Name] = pod.Spec.NodeName
+	}
+	// 2 Pods deleted, 14 - 2 = 12
+	require.Equal(t, 12, len(podList.Items))
+
+	// without Pod Controller, directly reconcile all state to simulate the Pod deletion
+	suite.fixture.allocator.ReconcileAllocationStateForTesting()
+
+	// Trigger next 2 scheduling cycle, make sure the two higher priority pods are scheduled
+	suite.scheduler.ScheduleOne(suite.ctx)
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	time.Sleep(1 * time.Second)
+
+	err = suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	for _, pod := range podList.Items {
+		if strings.Contains(pod.Name, "victim") {
+			continue
+		}
+		scheduledNodeMap[pod.Name] = pod.Spec.NodeName
+	}
+	// not empty indicates the high priority pod is scheduled
+	require.NotEmpty(t, scheduledNodeMap["high-priority-0"])
+	require.NotEmpty(t, scheduledNodeMap["critical-priority-0"])
+}
+
+func testGPUResourceEvictProtection(t *testing.T, suite *PreemptionTestSuite) {
+	toBeVictimPods := createPreemptionTestPodsWithQoS("victim", constants.QoSLevelMedium, 1, "2000", "2Gi")
+	toBeVictimPods[0].Annotations[constants.EvictionProtectionAnnotation] = "2s"
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, toBeVictimPods[0]))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, toBeVictimPods[0])
+	}()
+
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	toBeVictimPods = createPreemptionTestPodsWithQoS("high-priority", constants.QoSLevelHigh, 1, "2000", "2Gi")
+	require.NoError(t, suite.k8sClient.Create(suite.ctx, toBeVictimPods[0]))
+	defer func() {
+		_ = suite.k8sClient.Delete(suite.ctx, toBeVictimPods[0])
+	}()
+
+	// should not evict since it's inside protection period
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	podList := &v1.PodList{}
+	err := suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	require.Equal(t, 2, len(podList.Items))
+
+	// should evict since protection period over
+	time.Sleep(2 * time.Second)
+	suite.scheduler.ScheduleOne(suite.ctx)
+
+	suite.fixture.allocator.ReconcileAllocationStateForTesting()
+
+	// Should schedule the new high priority pod
+	suite.scheduler.ScheduleOne(suite.ctx)
+	// waiting for binding cycle take effect
+	time.Sleep(300 * time.Millisecond)
+
+	podList = &v1.PodList{}
+	err = suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"})
+	require.NoError(t, err)
+	require.Equal(t, 1, len(podList.Items))
+	require.Equal(t, "high-priority-0", podList.Items[0].Name)
+	require.Equal(t, "node-0", podList.Items[0].Spec.NodeName)
+}
+
+// Helper functions
+func createPreemptionTestPodsWithQoS(baseName, qosLevel string, count int, tflops, vram string) []*v1.Pod {
+	pods := make([]*v1.Pod, count)
+	for i := 0; i < count; i++ {
+		pod := st.MakePod().
+			Namespace("preemption-test-ns").
+			Name(fmt.Sprintf("%s-%d", baseName, i)).
+			UID(fmt.Sprintf("%s-%d", baseName, i)).
+			SchedulerName("tensor-fusion-scheduler").
+			Res(map[v1.ResourceName]string{
+				v1.ResourceCPU:    "100m",
+				v1.ResourceMemory: "256Mi",
+			}).
+			Toleration("node.kubernetes.io/not-ready").
+			ZeroTerminationGracePeriod().Obj()
+
+		pod.Labels = map[string]string{
+			constants.LabelComponent: constants.ComponentWorker,
+			constants.WorkloadKey:    "test-workload",
+		}
+
+		pod.Annotations = map[string]string{
+			constants.GpuPoolKey:              "preemption-test-pool",
+			constants.QoSLevelAnnotation:      qosLevel,
+			constants.TFLOPSRequestAnnotation: tflops,
+			constants.VRAMRequestAnnotation:   vram,
+			constants.TFLOPSLimitAnnotation:   tflops,
+			constants.VRAMLimitAnnotation:     vram,
+			constants.GpuCountAnnotation:      "1",
+		}
+		pod.Spec.PriorityClassName = "tensor-fusion-" + qosLevel
+
+		pods[i] = pod
+	}
+	return pods
+}
+
+// func createPreemptionTestPodsWithEvictionProtection(
+// 	namespace, baseName, qosLevel, protectionDuration string, count int, tflops, vram string) []*v1.Pod {
+// 	pods := createPreemptionTestPodsWithQoS(namespace, baseName, qosLevel, count, tflops, vram)
+// 	for _, pod := range pods {
+// 		pod.Annotations[constants.EvictionProtectionAnnotation] = protectionDuration
+// 	}
+// 	return pods
+// }
diff --git a/test/sched/scheduler_bench_test.go b/test/sched/scheduler_bench_test.go
index fde318bd..bbed548f 100644
--- a/test/sched/scheduler_bench_test.go
+++ b/test/sched/scheduler_bench_test.go
@@ -36,10 +36,9 @@ func defaultBenchmarkConfig() BenchmarkConfig {
 		NumNodes:  1000,
 		NumGPUs:   4000,
 		NumPods:   10000,
-		BatchSize: 100,
 		PoolName:  "benchmark-pool",
 		Namespace: "benchmark-ns",
-		Timeout:   10 * time.Minute,
+		Timeout:   5 * time.Minute,
 	}
 }
 
diff --git a/test/sched/setup.go b/test/sched/setup.go
index 6fa4167d..5dc80e32 100644
--- a/test/sched/setup.go
+++ b/test/sched/setup.go
@@ -14,6 +14,7 @@ import (
 	gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources"
 	"github.com/stretchr/testify/require"
 	v1 "k8s.io/api/core/v1"
+	schedv1 "k8s.io/api/scheduling/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
@@ -31,6 +32,7 @@ import (
 	"k8s.io/kubernetes/pkg/scheduler/metrics"
 	st "k8s.io/kubernetes/pkg/scheduler/testing"
 	tf "k8s.io/kubernetes/pkg/scheduler/testing/framework"
+	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 )
@@ -40,7 +42,6 @@ type BenchmarkConfig struct {
 	NumNodes  int
 	NumGPUs   int
 	NumPods   int
-	BatchSize int
 	PoolName  string
 	Namespace string
 	Timeout   time.Duration
@@ -89,7 +90,7 @@ func NewBenchmarkFixture(
 	b.Logf("%d Pods created, Needed TFLOPS: %f, Needed VRAM: %f", len(pods), neededTflops, neededVRAM)
 
 	// Batch create resources for better performance
-	k8sNativeObjects := batchCreateResources(b, ctx, client, nodes, gpus, pods, realAPIServer)
+	k8sNativeObjects := batchCreateResources(b, ctx, client, config.Namespace, nodes, gpus, pods, realAPIServer)
 
 	// Setup allocator
 	allocator := setupAllocator(b, ctx, client)
@@ -178,10 +179,10 @@ func generateGPUs(totalGPUs int, nodes []*v1.Node, poolName string) ([]*tfv1.GPU
 
 	// Pre-define GPU specs to avoid repeated allocations
 	gpuSpecs := []struct{ tflops, vram string }{
-		{"2250", "141Gi"}, // High-end
-		{"989", "80Gi"},   // Mid-range
-		{"450", "48Gi"},   // Entry-level
-		{"312", "40Gi"},   // Budget
+		{"2250", "141Gi"}, // Simulate B200
+		{"989", "80Gi"},   // Simulate H100
+		{"450", "48Gi"},   // Simulate L40s
+		{"312", "40Gi"},   // Simulate A100
 	}
 
 	gpuIndex := 0
@@ -287,12 +288,27 @@ func generatePods(count int, namespace, poolName string) ([]*v1.Pod, float64, fl
 
 // Helper functions for setup
 func batchCreateResources(
-	b *testing.B, ctx context.Context, client client.Client,
+	b *testing.B, ctx context.Context, client client.Client, namespace string,
 	nodes []*v1.Node, gpus []*tfv1.GPU, pods []*v1.Pod, realAPIServer bool,
 ) []runtime.Object {
+	// Create priority classes
+	require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{
+		ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelCritical},
+		Value:      100000,
+	}))
+	require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{
+		ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelHigh},
+		Value:      10000,
+	}))
+	require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{
+		ObjectMeta:       metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelMedium},
+		Value:            100,
+		PreemptionPolicy: ptr.To(v1.PreemptNever),
+	}))
+
 	k8sObjs := []runtime.Object{}
 	require.NoError(b, client.Create(ctx, &v1.Namespace{
-		ObjectMeta: metav1.ObjectMeta{Name: "benchmark-ns"},
+		ObjectMeta: metav1.ObjectMeta{Name: namespace},
 	}))
 
 	timer := time.Now()

From 4fc9dc9bbf43e00a8e63aec889b087be7b994aa9 Mon Sep 17 00:00:00 2001
From: 0x5457 <0x5457@protonmail.com>
Date: Wed, 17 Sep 2025 22:16:01 +0800
Subject: [PATCH 28/34] fix: add resource validation in Bind to prevent GPU
 over-allocation (#365)

- Add double-check for TFLOPs and VRAM availability before allocation
---
 internal/gpuallocator/gpuallocator.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index fb475377..a4ec9958 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -285,6 +285,19 @@ func (s *GpuAllocator) Bind(
 			gpuNodeName = gpu.Status.NodeSelector[constants.KubernetesHostNameLabel]
 		}
 
+		// Double-check resource availability to prevent over-allocation
+		if gpu.Status.Available == nil {
+			return nil, fmt.Errorf("GPU %s has nil available resources", selectedGPU)
+		}
+		if gpu.Status.Available.Tflops.Cmp(req.Request.Tflops) < 0 {
+			return nil, fmt.Errorf("GPU %s insufficient TFLOPs: available %s, requested %s",
+				selectedGPU, gpu.Status.Available.Tflops.String(), req.Request.Tflops.String())
+		}
+		if gpu.Status.Available.Vram.Cmp(req.Request.Vram) < 0 {
+			return nil, fmt.Errorf("GPU %s insufficient VRAM: available %s, requested %s",
+				selectedGPU, gpu.Status.Available.Vram.String(), req.Request.Vram.String())
+		}
+
 		// reduce available resource on the GPU status
 		gpu.Status.Available.Tflops.Sub(req.Request.Tflops)
 		gpu.Status.Available.Vram.Sub(req.Request.Vram)

From 5f25794b3dbd34edd8716e1cfcaef59cf0a2bbdd Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Mon, 22 Sep 2025 08:35:59 -0700
Subject: [PATCH 29/34] webhook & gpu resource fit dra support

---
 api/v1/schedulingconfigtemplate_types.go      |  16 +
 api/v1/zz_generated.deepcopy.go               |  25 ++
 ...r-fusion.ai_schedulingconfigtemplates.yaml |  14 +
 cmd/main.go                                   |   2 +
 ...r-fusion.ai_schedulingconfigtemplates.yaml |  14 +
 internal/constants/constants.go               |   9 +
 .../scheduler/gpuresources/gpuresources.go    |  45 ++
 .../gpuresources/gpuresources_dra_test.go     | 237 ++++++++++
 internal/utils/compose.go                     |   6 +
 internal/webhook/v1/pod_dra.go                | 307 +++++++++++++
 internal/webhook/v1/pod_webhook.go            |  45 +-
 internal/webhook/v1/pod_webhook_dra_test.go   | 413 ++++++++++++++++++
 internal/webhook/v1/pod_webhook_test.go       |   9 +-
 internal/webhook/v1/tf_parser.go              |   6 +
 14 files changed, 1132 insertions(+), 16 deletions(-)
 create mode 100644 internal/scheduler/gpuresources/gpuresources_dra_test.go
 create mode 100644 internal/webhook/v1/pod_dra.go
 create mode 100644 internal/webhook/v1/pod_webhook_dra_test.go

diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go
index 44f07bef..b3243344 100644
--- a/api/v1/schedulingconfigtemplate_types.go
+++ b/api/v1/schedulingconfigtemplate_types.go
@@ -39,6 +39,10 @@ type SchedulingConfigTemplateSpec struct {
 	// single GPU device multi-process queuing and fair scheduling with QoS constraint
 	// +optional
 	Hypervisor *HypervisorScheduling `json:"hypervisor,omitempty"`
+
+	// enable Dynamic Resource Allocation (DRA) for GPU resource management
+	// +optional
+	DRA *DRAConfig `json:"dra,omitempty"`
 }
 
 type PlacementConfig struct {
@@ -206,6 +210,18 @@ type MultiProcessQueuing struct {
 	QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"`
 }
 
+// DRAConfig configures Dynamic Resource Allocation support
+type DRAConfig struct {
+	// Enable DRA mode for all workloads in this configuration template
+	// +optional
+	Enable *bool `json:"enable,omitempty"`
+
+	// ResourceClass specifies the DRA resource class name to use
+	// +kubebuilder:default="tensorfusion.ai/gpu"
+	// +optional
+	ResourceClass string `json:"resourceClass,omitempty"`
+}
+
 // SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.
 type SchedulingConfigTemplateStatus struct {
 	// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 9be4f47c..5699677a 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -332,6 +332,26 @@ func (in *ComputingVendorParams) DeepCopy() *ComputingVendorParams {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DRAConfig) DeepCopyInto(out *DRAConfig) {
+	*out = *in
+	if in.Enable != nil {
+		in, out := &in.Enable, &out.Enable
+		*out = new(bool)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRAConfig.
+func (in *DRAConfig) DeepCopy() *DRAConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(DRAConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GPU) DeepCopyInto(out *GPU) {
 	*out = *in
@@ -1963,6 +1983,11 @@ func (in *SchedulingConfigTemplateSpec) DeepCopyInto(out *SchedulingConfigTempla
 		*out = new(HypervisorScheduling)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.DRA != nil {
+		in, out := &in.DRA, &out.DRA
+		*out = new(DRAConfig)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateSpec.
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
index 91a01eae..7c0c281b 100644
--- a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -143,6 +143,20 @@ spec:
                         type: string
                     type: object
                 type: object
+              dra:
+                description: enable Dynamic Resource Allocation (DRA) for GPU resource
+                  management
+                properties:
+                  enable:
+                    description: Enable DRA mode for all workloads in this configuration
+                      template
+                    type: boolean
+                  resourceClass:
+                    default: tensorfusion.ai/gpu
+                    description: ResourceClass specifies the DRA resource class name
+                      to use
+                    type: string
+                type: object
               hypervisor:
                 description: single GPU device multi-process queuing and fair scheduling
                   with QoS constraint
diff --git a/cmd/main.go b/cmd/main.go
index 92021131..7f5f8721 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -33,6 +33,7 @@ import (
 	"k8s.io/client-go/rest"
 	"k8s.io/klog/v2"
 
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
@@ -99,6 +100,7 @@ var alertEvaluatorReady chan struct{}
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 	utilruntime.Must(tfv1.AddToScheme(scheme))
+	utilruntime.Must(resourcev1beta2.AddToScheme(scheme))
 	// +kubebuilder:scaffold:scheme
 }
 
diff --git a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
index 91a01eae..7c0c281b 100644
--- a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
+++ b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml
@@ -143,6 +143,20 @@ spec:
                         type: string
                     type: object
                 type: object
+              dra:
+                description: enable Dynamic Resource Allocation (DRA) for GPU resource
+                  management
+                properties:
+                  enable:
+                    description: Enable DRA mode for all workloads in this configuration
+                      template
+                    type: boolean
+                  resourceClass:
+                    default: tensorfusion.ai/gpu
+                    description: ResourceClass specifies the DRA resource class name
+                      to use
+                    type: string
+                type: object
               hypervisor:
                 description: single GPU device multi-process queuing and fair scheduling
                   with QoS constraint
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index b1aa6b64..22d465cc 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -121,6 +121,15 @@ const (
 	QoSLevelMedium   = "medium"
 	QoSLevelHigh     = "high"
 	QoSLevelCritical = "critical"
+
+	// DRA support
+	// annotation for pod to indicate if DRA is enabled
+	DRAEnabledAnnotation = Domain + "/dra-enabled"
+	DRAResourceClaimName = "tensor-fusion-resource-claim-%s"
+	// resource claim name for request
+	DRAResourceClaimRequestName = "tensor-fusion-resource-claim-request-%s"
+
+	DRAClaimDefineName = "tensor-fusion-gpu-claim"
 )
 
 // for avoid golang lint issues
diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
index 16dd1c61..949b7582 100644
--- a/internal/scheduler/gpuresources/gpuresources.go
+++ b/internal/scheduler/gpuresources/gpuresources.go
@@ -26,6 +26,7 @@ import (
 const Name = "GPUResourcesFit"
 const CycleStateAllocateRequest = "allocateRequest"
 const CycleStateGPUSchedulingResult = "gpuSchedulingResult"
+
 const SchedulerSimulationKey = "schedulerSimulation"
 
 var _ framework.PreFilterPlugin = &GPUFit{}
@@ -105,6 +106,11 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
 		}, framework.NewStatus(framework.Success, "progressive migration for native resources claim")
 	}
 
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return nil, framework.NewStatus(framework.Skip, "DRA mode enabled, skipping custom GPU prefilter")
+	}
+
 	// Skip non tensor-fusion mode
 	if !utils.IsTensorFusionWorker(pod) {
 		return nil, framework.NewStatus(framework.Skip, "skip for non tensor-fusion mode")
@@ -207,6 +213,11 @@ func (s *GPUFit) PreFilterExtensions() framework.PreFilterExtensions {
 }
 
 func (s *GPUFit) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return framework.NewStatus(framework.Skip, "DRA mode enabled, skipping custom GPU filter")
+	}
+
 	if !utils.IsTensorFusionWorker(pod) {
 		return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode")
 	}
@@ -228,6 +239,11 @@ func (s *GPUFit) Score(
 	pod *v1.Pod,
 	nodeInfo *framework.NodeInfo,
 ) (int64, *framework.Status) {
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return 0, framework.NewStatus(framework.Skip, "DRA mode enabled, skipping custom GPU scoring")
+	}
+
 	// Skip non tensor-fusion mode scheduling
 	if !utils.IsTensorFusionWorker(pod) {
 		return 0, framework.NewStatus(framework.Success, "")
@@ -266,6 +282,11 @@ func (s *GPUFit) ScoreExtensions() framework.ScoreExtensions {
 }
 
 func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return framework.NewStatus(framework.Success, "DRA mode enabled, skipping custom GPU reservation")
+	}
+
 	if !utils.IsTensorFusionWorker(pod) {
 		return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode")
 	}
@@ -312,6 +333,11 @@ func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *
 }
 
 func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return // DRA handles unreservation
+	}
+
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
@@ -331,6 +357,11 @@ func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod
 }
 
 func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+	// Check if DRA mode is enabled for this pod
+	if isDRAEnabled(pod) && hasDRAClaim(pod) {
+		return // DRA handles post-bind actions
+	}
+
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
@@ -359,3 +390,17 @@ func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod
 			"Attach GPU device ID info", "Attach TensorFusion GPU device IDs to Pod: "+gpuIDs)
 	}
 }
+
+// isDRAEnabled checks if DRA is enabled for a pod
+func isDRAEnabled(pod *v1.Pod) bool {
+	if pod.Annotations == nil {
+		return false
+	}
+	val, ok := pod.Annotations[constants.DRAEnabledAnnotation]
+	return ok && val == constants.TrueStringValue
+}
+
+// hasDRAClaim checks if a pod has DRA ResourceClaim references
+func hasDRAClaim(pod *v1.Pod) bool {
+	return len(pod.Spec.ResourceClaims) > 0
+}
diff --git a/internal/scheduler/gpuresources/gpuresources_dra_test.go b/internal/scheduler/gpuresources/gpuresources_dra_test.go
new file mode 100644
index 00000000..021be137
--- /dev/null
+++ b/internal/scheduler/gpuresources/gpuresources_dra_test.go
@@ -0,0 +1,237 @@
+package gpuresources
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+func TestIsDRAEnabled(t *testing.T) {
+	tests := []struct {
+		name        string
+		annotations map[string]string
+		expected    bool
+	}{
+		{
+			name: "DRA enabled annotation",
+			annotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.TrueStringValue,
+			},
+			expected: true,
+		},
+		{
+			name: "DRA disabled annotation",
+			annotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.FalseStringValue,
+			},
+			expected: false,
+		},
+		{
+			name:     "no annotation",
+			expected: false,
+		},
+		{
+			name: "other annotations",
+			annotations: map[string]string{
+				"other.annotation": "value",
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: tt.annotations,
+				},
+			}
+
+			result := isDRAEnabled(pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestHasDRAClaimScheduler(t *testing.T) {
+	tests := []struct {
+		name     string
+		pod      *corev1.Pod
+		expected bool
+	}{
+		{
+			name: "pod with resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{
+						{Name: "gpu-claim"},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "pod with multiple resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{
+						{Name: "gpu-claim"},
+						{Name: "other-claim"},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "pod without resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{},
+			},
+			expected: false,
+		},
+		{
+			name: "pod with empty resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{},
+				},
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := hasDRAClaim(tt.pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+// Integration test for DRA detection logic
+func TestDRADetectionIntegration(t *testing.T) {
+	tests := []struct {
+		name              string
+		draAnnotation     string
+		hasResourceClaims bool
+		expectedDRA       bool
+		expectedClaim     bool
+	}{
+		{
+			name:              "DRA enabled with claims",
+			draAnnotation:     constants.TrueStringValue,
+			hasResourceClaims: true,
+			expectedDRA:       true,
+			expectedClaim:     true,
+		},
+		{
+			name:              "DRA enabled without claims",
+			draAnnotation:     constants.TrueStringValue,
+			hasResourceClaims: false,
+			expectedDRA:       true,
+			expectedClaim:     false,
+		},
+		{
+			name:              "DRA disabled with claims",
+			draAnnotation:     constants.FalseStringValue,
+			hasResourceClaims: true,
+			expectedDRA:       false,
+			expectedClaim:     true,
+		},
+		{
+			name:              "no DRA annotation, no claims",
+			hasResourceClaims: false,
+			expectedDRA:       false,
+			expectedClaim:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: make(map[string]string),
+				},
+				Spec: corev1.PodSpec{},
+			}
+
+			if tt.draAnnotation != "" {
+				pod.Annotations[constants.DRAEnabledAnnotation] = tt.draAnnotation
+			}
+
+			if tt.hasResourceClaims {
+				pod.Spec.ResourceClaims = []corev1.PodResourceClaim{
+					{Name: "test-claim"},
+				}
+			}
+
+			draEnabled := isDRAEnabled(pod)
+			hasClaim := hasDRAClaim(pod)
+
+			assert.Equal(t, tt.expectedDRA, draEnabled, "DRA enabled detection mismatch")
+			assert.Equal(t, tt.expectedClaim, hasClaim, "Resource claim detection mismatch")
+		})
+	}
+}
+
+// Test the combination logic that scheduler uses
+func TestSchedulerDRALogic(t *testing.T) {
+	tests := []struct {
+		name                string
+		draAnnotation       string
+		hasResourceClaims   bool
+		shouldSkipScheduler bool
+	}{
+		{
+			name:                "DRA enabled with claims - should skip",
+			draAnnotation:       constants.TrueStringValue,
+			hasResourceClaims:   true,
+			shouldSkipScheduler: true,
+		},
+		{
+			name:                "DRA enabled without claims - should not skip",
+			draAnnotation:       constants.TrueStringValue,
+			hasResourceClaims:   false,
+			shouldSkipScheduler: false,
+		},
+		{
+			name:                "DRA disabled with claims - should not skip",
+			draAnnotation:       constants.FalseStringValue,
+			hasResourceClaims:   true,
+			shouldSkipScheduler: false,
+		},
+		{
+			name:                "no DRA, no claims - should not skip",
+			shouldSkipScheduler: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: make(map[string]string),
+				},
+				Spec: corev1.PodSpec{},
+			}
+
+			if tt.draAnnotation != "" {
+				pod.Annotations[constants.DRAEnabledAnnotation] = tt.draAnnotation
+			}
+
+			if tt.hasResourceClaims {
+				pod.Spec.ResourceClaims = []corev1.PodResourceClaim{
+					{Name: "test-claim"},
+				}
+			}
+
+			// This is the actual logic used in the scheduler
+			shouldSkip := isDRAEnabled(pod) && hasDRAClaim(pod)
+			assert.Equal(t, tt.shouldSkipScheduler, shouldSkip)
+		})
+	}
+}
diff --git a/internal/utils/compose.go b/internal/utils/compose.go
index e7170881..e9fb79ce 100644
--- a/internal/utils/compose.go
+++ b/internal/utils/compose.go
@@ -79,6 +79,8 @@ type TensorFusionInfo struct {
 	// Pod mutating webhook can not get Pod UID sometimes,
 	// thus need pod controller to set the owner reference
 	PendingSetPodAsOwner bool
+	// DRA support
+	DRAEnabled bool
 }
 
 func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo) {
@@ -113,6 +115,10 @@ func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo Tens
 	pod.Annotations[constants.IsLocalGPUAnnotation] = strconv.FormatBool(tfInfo.Profile.IsLocalGPU)
 	// add inject container annotation for client Pod, in case user doesn't specify it
 	pod.Annotations[constants.InjectContainerAnnotation] = strings.Join(tfInfo.ContainerNames, ",")
+	// add DRA enabled annotation
+	if tfInfo.DRAEnabled {
+		pod.Annotations[constants.DRAEnabledAnnotation] = constants.TrueStringValue
+	}
 }
 
 func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(
diff --git a/internal/webhook/v1/pod_dra.go b/internal/webhook/v1/pod_dra.go
new file mode 100644
index 00000000..ef2bd0b3
--- /dev/null
+++ b/internal/webhook/v1/pod_dra.go
@@ -0,0 +1,307 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"strings"
+
+	corev1 "k8s.io/api/core/v1"
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+)
+
+// DRAProcessor handles all DRA-related operations for pod admission
+type DRAProcessor struct {
+	client.Client
+	enableDRA     bool
+	resourceClass string // cached resource class to avoid repeated API calls
+	configLoaded  bool   // tracks if configuration has been loaded
+}
+
+// generateUniqueID creates a random 8-character hex string for resource claim names
+func generateUniqueID() string {
+	bytes := make([]byte, 4)
+	_, _ = rand.Read(bytes) // crypto/rand.Read always returns len(bytes), nil on success
+	return hex.EncodeToString(bytes)
+}
+
+// NewDRAProcessor creates a new DRA processor
+func NewDRAProcessor(client client.Client) *DRAProcessor {
+	return &DRAProcessor{
+		Client:    client,
+		enableDRA: false,
+	}
+}
+
+// InitializeDRAConfig is kept for backward compatibility but now does nothing
+// Configuration is loaded lazily on first use
+func (p *DRAProcessor) InitializeDRAConfig(ctx context.Context) error {
+	// No-op - configuration is now loaded lazily
+	if p.configLoaded {
+		return nil
+	}
+
+	// Set defaults first
+	p.enableDRA = false
+
+	templateList := &tfv1.SchedulingConfigTemplateList{}
+	// Use the provided context to respect cancellation
+	err := p.List(ctx, templateList)
+	if err != nil {
+		// Log error but don't fail - fall back to defaults
+		// This allows webhook to work even if templates are unavailable
+		p.configLoaded = true
+		return nil
+	}
+
+	// Check if any template has DRA enabled and cache the resource class
+	for _, template := range templateList.Items {
+		if template.Spec.DRA != nil {
+			if template.Spec.DRA.Enable != nil && *template.Spec.DRA.Enable {
+				p.enableDRA = true
+			}
+			// Cache the resource class from the template
+			if template.Spec.DRA.ResourceClass != "" {
+				p.resourceClass = template.Spec.DRA.ResourceClass
+			}
+		}
+	}
+
+	if p.enableDRA && p.resourceClass == "" {
+		return fmt.Errorf("resource class is not set")
+	}
+
+	p.configLoaded = true
+	return nil
+}
+
+// IsDRAEnabled checks if DRA is enabled for a specific pod
+func (p *DRAProcessor) IsDRAEnabled(ctx context.Context, pod *corev1.Pod) bool {
+
+	// Check pod-level annotation first (explicit override)
+	if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.TrueStringValue {
+		return true
+	}
+
+	// Check pod-level annotation for explicit disable
+	if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.FalseStringValue {
+		return false
+	}
+
+	// Fall back to global configuration
+	return p.enableDRA
+}
+
+// HasDRAClaim checks if a pod has DRA ResourceClaim references
+func HasDRAClaim(pod *corev1.Pod) bool {
+	return len(pod.Spec.ResourceClaims) > 0
+}
+
+// convertToResourceClaim converts GPU resource requests to ResourceClaim
+func (p *DRAProcessor) convertToResourceClaim(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (*resourcev1beta2.ResourceClaim, error) {
+
+	// Build CEL selector using DRA helper
+	celSelector, err := BuildCELSelector(pod, tfInfo)
+	if err != nil {
+		return nil, fmt.Errorf("failed to build CEL selector: %w", err)
+	}
+
+	// Generate unique claim name with random suffix to avoid conflicts
+	var baseName string
+
+	if pod.GenerateName != "" {
+		baseName = strings.TrimSuffix(pod.GenerateName, "-")
+	} else if pod.Name != "" {
+		baseName = pod.Name
+	}
+
+	uniqueID := generateUniqueID()
+	claimName := fmt.Sprintf(constants.DRAResourceClaimName, baseName, uniqueID)
+
+	// Use cached resource class instead of making API calls
+	resourceClass := p.resourceClass
+
+	claim := &resourcev1beta2.ResourceClaim{
+		TypeMeta: metav1.TypeMeta{
+			APIVersion: "resource.k8s.io/v1beta2",
+			Kind:       "ResourceClaim",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      claimName,
+			Namespace: pod.Namespace,
+			// Note: We cannot set OwnerReference here because pod.UID is empty during admission.
+			// The controller will set the proper owner reference once the Pod is created.
+		},
+		Spec: resourcev1beta2.ResourceClaimSpec{
+			Devices: resourcev1beta2.DeviceClaim{
+				Requests: []resourcev1beta2.DeviceRequest{
+					{
+						Name: fmt.Sprintf(constants.DRAResourceClaimRequestName, generateUniqueID()),
+						Exactly: &resourcev1beta2.ExactDeviceRequest{
+							DeviceClassName: resourceClass,
+							Selectors: []resourcev1beta2.DeviceSelector{
+								{
+									CEL: &resourcev1beta2.CELDeviceSelector{
+										Expression: celSelector,
+									},
+								},
+							},
+							Count: int64(tfInfo.Profile.GPUCount),
+						},
+					},
+				},
+			},
+		},
+	}
+
+	return claim, nil
+}
+
+// injectResourceClaimRef adds ResourceClaim reference to Pod spec
+func (p *DRAProcessor) injectResourceClaimRef(pod *corev1.Pod, claim *resourcev1beta2.ResourceClaim, containerIndices []int) {
+	// Add ResourceClaim reference to pod.Spec.ResourceClaims
+	if pod.Spec.ResourceClaims == nil {
+		pod.Spec.ResourceClaims = []corev1.PodResourceClaim{}
+	}
+
+	claimRef := corev1.PodResourceClaim{
+		Name:              constants.DRAClaimDefineName,
+		ResourceClaimName: &claim.Name,
+	}
+
+	// Check if the claim reference already exists to maintain idempotency
+	claimExists := false
+	for i, existingClaim := range pod.Spec.ResourceClaims {
+		if existingClaim.Name == constants.DRAClaimDefineName {
+			// Update existing claim to point to the new ResourceClaim name
+			pod.Spec.ResourceClaims[i].ResourceClaimName = &claim.Name
+			claimExists = true
+			break
+		}
+	}
+
+	if !claimExists {
+		pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, claimRef)
+	}
+
+	// Add resource claim consumption to containers
+	for _, containerIndex := range containerIndices {
+		container := &pod.Spec.Containers[containerIndex]
+		if container.Resources.Claims == nil {
+			container.Resources.Claims = []corev1.ResourceClaim{}
+		}
+
+		// Check if the container already has this claim to maintain idempotency
+		hasGPUClaim := false
+		for _, existingClaim := range container.Resources.Claims {
+			if existingClaim.Name == constants.DRAClaimDefineName {
+				hasGPUClaim = true
+				break
+			}
+		}
+
+		if !hasGPUClaim {
+			container.Resources.Claims = append(container.Resources.Claims, corev1.ResourceClaim{
+				Name: constants.DRAClaimDefineName,
+			})
+		}
+	}
+}
+
+// createResourceClaim creates a ResourceClaim object with proper error handling and retries
+func (p *DRAProcessor) createResourceClaim(ctx context.Context, claim *resourcev1beta2.ResourceClaim) error {
+	// Try to create the ResourceClaim
+	if err := p.Create(ctx, claim); err != nil {
+		if errors.IsAlreadyExists(err) {
+			// Check if the existing claim is for the same pod
+			existingClaim := &resourcev1beta2.ResourceClaim{}
+			getErr := p.Get(ctx, client.ObjectKey{Name: claim.Name, Namespace: claim.Namespace}, existingClaim)
+			if getErr != nil {
+				return fmt.Errorf("failed to check existing ResourceClaim: %w", getErr)
+			}
+			// Different pod or missing labels, this is an error
+			return fmt.Errorf("ResourceClaim %s already exists for a different pod", claim.Name)
+		}
+
+		if errors.IsInvalid(err) {
+			return fmt.Errorf("ResourceClaim is invalid: %w", err)
+		}
+
+		if errors.IsForbidden(err) {
+			return fmt.Errorf("insufficient permissions to create ResourceClaim: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// Note: patchTFClientForDRA is temporarily handled in the main pod_webhook.go
+// until we can properly abstract all the TF client patching logic
+
+// HandleDRAAdmission handles the complete DRA admission process
+func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod, tfInfo *utils.TensorFusionInfo, containerIndices []int) error {
+	// Convert GPU resources to ResourceClaim
+	resourceClaim, err := p.convertToResourceClaim(pod, tfInfo)
+	if err != nil {
+		return fmt.Errorf("failed to convert to ResourceClaim: %w", err)
+	}
+
+	// Create ResourceClaim
+	if err := p.createResourceClaim(ctx, resourceClaim); err != nil {
+		return fmt.Errorf("failed to create ResourceClaim: %w", err)
+	}
+	// Inject ResourceClaim reference to Pod
+	p.injectResourceClaimRef(pod, resourceClaim, containerIndices)
+	return nil
+}
+
+// TODO: support more attributes for filtering
+func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string, error) {
+	var conditions []string
+
+	// 1. Basic resource requirements using standard DRA quantity attributes
+	requests := tfInfo.Profile.Resources.Requests
+	if !requests.Tflops.IsZero() {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["tflops"].quantity >= quantity("%s")`, requests.Tflops.String()))
+	}
+	if !requests.Vram.IsZero() {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["vram"].quantity >= quantity("%s")`, requests.Vram.String()))
+	}
+
+	// 2. GPU model filter (if specified - basic attribute that should be widely supported)
+	if tfInfo.Profile.GPUModel != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["model"] == "%s"`, tfInfo.Profile.GPUModel))
+	}
+
+	// Return a basic condition if no specific requirements
+	if len(conditions) == 0 {
+		// Simple condition that should work with most DRA drivers
+		return `device.attributes.exists("type")`, nil
+	}
+
+	return strings.Join(conditions, " && "), nil
+}
diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go
index 53610ffe..08962028 100644
--- a/internal/webhook/v1/pod_webhook.go
+++ b/internal/webhook/v1/pod_webhook.go
@@ -49,14 +49,21 @@ var httpClient = &http.Client{Timeout: 10 * time.Second}
 func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator) error {
 	webhookServer := mgr.GetWebhookServer()
 
-	webhookServer.Register("/mutate-v1-pod",
-		&admission.Webhook{
-			Handler: &TensorFusionPodMutator{
-				decoder:       admission.NewDecoder(runtime.NewScheme()),
-				Client:        mgr.GetClient(),
-				portAllocator: portAllocator,
-			},
-		})
+	// Initialize DRA processor
+	draProcessor := NewDRAProcessor(mgr.GetClient())
+	if err := draProcessor.InitializeDRAConfig(context.Background()); err != nil {
+		return fmt.Errorf("failed to initialize DRA config: %w", err)
+	}
+
+	// Initialize DRA setting from global configuration
+	mutator := &TensorFusionPodMutator{
+		decoder:       admission.NewDecoder(runtime.NewScheme()),
+		Client:        mgr.GetClient(),
+		portAllocator: portAllocator,
+		draProcessor:  draProcessor,
+	}
+
+	webhookServer.Register("/mutate-v1-pod", &admission.Webhook{Handler: mutator})
 	return nil
 }
 
@@ -64,6 +71,7 @@ type TensorFusionPodMutator struct {
 	Client        client.Client
 	decoder       admission.Decoder
 	portAllocator *portallocator.PortAllocator
+	draProcessor  *DRAProcessor
 }
 
 // Handle implements admission.Handler interface.
@@ -100,7 +108,7 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 		return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal current pod: %w", err))
 	}
 
-	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod)
+	tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, m.draProcessor, pod)
 	if err != nil {
 		return admission.Errored(http.StatusInternalServerError, fmt.Errorf("parse tf resources: %w", err))
 	}
@@ -159,16 +167,28 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 		return admission.Allowed("no valid container to inject tensor-fusion, skipped")
 	}
 
-	// Add defaults and tensor-fusion injection logic
+	// Handle DRA-specific processing if enabled
+	if tfInfo.DRAEnabled {
+		// Process DRA workload
+		if err := m.draProcessor.HandleDRAAdmission(ctx, pod, &tfInfo, containerIndices); err != nil {
+			return admission.Errored(http.StatusInternalServerError, fmt.Errorf("failed to handle DRA admission: %w", err))
+		}
+	}
+
+	// Common processing for both DRA and regular modes
 	utils.AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod, tfInfo)
 	utils.AddTFDefaultClientConfBeforePatch(ctx, pod, pool, tfInfo, containerIndices)
 
 	// Inject initContainer and env variables
 	patches, err := m.patchTFClient(
-		pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices,
+		ctx, pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices,
 	)
 	if err != nil {
-		log.Error(err, "failed to patch tf client", "pod", req.Name, "namespace", req.Namespace)
+		mode := "regular"
+		if tfInfo.DRAEnabled {
+			mode = "DRA"
+		}
+		log.Error(err, "failed to patch tf client", "mode", mode, "pod", req.Name, "namespace", req.Namespace)
 		return admission.Errored(http.StatusInternalServerError, err)
 	}
 
@@ -266,6 +286,7 @@ func (m *TensorFusionPodMutator) createOrUpdateWorkload(ctx context.Context, pod
 }
 
 func (m *TensorFusionPodMutator) patchTFClient(
+	ctx context.Context,
 	pod *corev1.Pod,
 	pool *tfv1.GPUPool,
 	isLocalGPU bool,
diff --git a/internal/webhook/v1/pod_webhook_dra_test.go b/internal/webhook/v1/pod_webhook_dra_test.go
new file mode 100644
index 00000000..09a738b0
--- /dev/null
+++ b/internal/webhook/v1/pod_webhook_dra_test.go
@@ -0,0 +1,413 @@
+package v1
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+)
+
+func TestDRAProcessor_IsDRAEnabled(t *testing.T) {
+	tests := []struct {
+		name           string
+		processorDRA   bool
+		podAnnotations map[string]string
+		expected       bool
+	}{
+		{
+			name:         "global DRA enabled, no pod annotation",
+			processorDRA: true,
+			expected:     true,
+		},
+		{
+			name:         "global DRA disabled, no pod annotation",
+			processorDRA: false,
+			expected:     false,
+		},
+		{
+			name:         "global DRA disabled, pod annotation enabled",
+			processorDRA: false,
+			podAnnotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.TrueStringValue,
+			},
+			expected: true,
+		},
+		{
+			name:         "global DRA enabled, pod annotation disabled",
+			processorDRA: true,
+			podAnnotations: map[string]string{
+				constants.DRAEnabledAnnotation: constants.FalseStringValue,
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			processor := &DRAProcessor{
+				enableDRA:    tt.processorDRA,
+				configLoaded: true, // Skip config loading in tests
+			}
+
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: tt.podAnnotations,
+				},
+			}
+
+			result := processor.IsDRAEnabled(context.Background(), pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestDRAProcessor_convertToResourceClaim(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, tfv1.AddToScheme(scheme))
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+
+	// Create a SchedulingConfigTemplate with DRA config
+	template := &tfv1.SchedulingConfigTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "test-template",
+		},
+		Spec: tfv1.SchedulingConfigTemplateSpec{
+			DRA: &tfv1.DRAConfig{
+				Enable:        &[]bool{true}[0],
+				ResourceClass: "custom.tensorfusion.ai/gpu",
+			},
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(template).
+		Build()
+
+	processor := &DRAProcessor{
+		Client: fakeClient,
+	}
+
+	// Initialize DRA config to set up the resource class cache
+	err := processor.InitializeDRAConfig(context.Background())
+	require.NoError(t, err)
+
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:         "test-pod",
+			Namespace:    "test-namespace",
+			GenerateName: "test-pod-",
+			UID:          types.UID("test-uid"),
+		},
+	}
+
+	tfInfo := &utils.TensorFusionInfo{
+		Profile: &tfv1.WorkloadProfileSpec{
+			GPUCount: 1,
+			Resources: tfv1.Resources{
+				Requests: tfv1.Resource{
+					Tflops: resource.MustParse("10"),
+					Vram:   resource.MustParse("8Gi"),
+				},
+			},
+		},
+	}
+
+	claim, err := processor.convertToResourceClaim(pod, tfInfo)
+	require.NoError(t, err)
+	require.NotNil(t, claim)
+
+	// Verify claim structure
+	assert.Contains(t, claim.Name, "test-pod-")
+	assert.Contains(t, claim.Name, "-gpu-claim")
+	assert.Equal(t, "test-namespace", claim.Namespace)
+	assert.Equal(t, "resource.k8s.io/v1beta2", claim.APIVersion)
+	assert.Equal(t, "ResourceClaim", claim.Kind)
+
+	// Verify labels instead of owner references (since we removed owner references during admission)
+	require.NotNil(t, claim.Labels)
+	assert.Equal(t, "test-pod-", claim.Labels["tensorfusion.ai/pod"]) // Uses GenerateName as podIdentifier
+	assert.Equal(t, "gpu", claim.Labels["tensorfusion.ai/claim-for"])
+
+	// Verify device claim
+	require.Len(t, claim.Spec.Devices.Requests, 1)
+	deviceReq := claim.Spec.Devices.Requests[0]
+	assert.Equal(t, "gpu", deviceReq.Name)
+
+	// Verify ExactDeviceRequest structure
+	require.NotNil(t, deviceReq.Exactly)
+	exactReq := deviceReq.Exactly
+	assert.Equal(t, "custom.tensorfusion.ai/gpu", exactReq.DeviceClassName) // Uses cached resource class from template
+	assert.Equal(t, int64(1), exactReq.Count)
+
+	// Verify CEL selector
+	require.Len(t, exactReq.Selectors, 1)
+	require.NotNil(t, exactReq.Selectors[0].CEL)
+
+	// The simplified CEL selector should only contain basic resource requirements
+	celExpression := exactReq.Selectors[0].CEL.Expression
+
+	// Verify it contains the expected resource filters (simplified version)
+	assert.Contains(t, celExpression, `device.attributes["tflops"].quantity >= quantity("10")`)
+	assert.Contains(t, celExpression, `device.attributes["vram"].quantity >= quantity("8Gi")`)
+
+	// Verify conditions are combined with AND
+	assert.Contains(t, celExpression, " && ")
+}
+
+func TestDRAProcessor_injectResourceClaimRef(t *testing.T) {
+	processor := &DRAProcessor{}
+
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-pod",
+			Namespace: "test-namespace",
+		},
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{
+				{Name: "container1"},
+				{Name: "container2"},
+			},
+		},
+	}
+
+	claim := &resourcev1beta2.ResourceClaim{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-claim",
+			Namespace: "test-namespace",
+		},
+	}
+
+	containerIndices := []int{0, 1}
+
+	processor.injectResourceClaimRef(pod, claim, containerIndices)
+
+	// Verify pod resource claims
+	require.Len(t, pod.Spec.ResourceClaims, 1)
+	podClaim := pod.Spec.ResourceClaims[0]
+	assert.Equal(t, "gpu-claim", podClaim.Name)
+	require.NotNil(t, podClaim.ResourceClaimName)
+	assert.Equal(t, "test-claim", *podClaim.ResourceClaimName)
+
+	// Verify container resource claims
+	for _, idx := range containerIndices {
+		container := pod.Spec.Containers[idx]
+		require.Len(t, container.Resources.Claims, 1)
+		assert.Equal(t, "gpu-claim", container.Resources.Claims[0].Name)
+	}
+
+	// Verify annotations
+	require.NotNil(t, pod.Annotations)
+	assert.Equal(t, constants.TrueStringValue, pod.Annotations[constants.DRAEnabledAnnotation])
+}
+
+func TestDRAProcessor_createResourceClaim(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+
+	tests := []struct {
+		name          string
+		existingClaim *resourcev1beta2.ResourceClaim
+		expectError   bool
+		errorType     string
+	}{
+		{
+			name:        "successful creation",
+			expectError: false,
+		},
+		{
+			name: "claim already exists with same pod",
+			existingClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "test-namespace",
+					Labels: map[string]string{
+						"tensorfusion.ai/pod":       "test-pod",
+						"tensorfusion.ai/claim-for": "gpu",
+					},
+				},
+			},
+			expectError: false,
+		},
+		{
+			name: "claim already exists with different pod",
+			existingClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "test-namespace",
+					Labels: map[string]string{
+						"tensorfusion.ai/pod":       "different-pod",
+						"tensorfusion.ai/claim-for": "gpu",
+					},
+				},
+			},
+			expectError: true,
+			errorType:   "conflict",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var fakeClient client.Client
+			if tt.existingClaim != nil {
+				fakeClient = fake.NewClientBuilder().
+					WithScheme(scheme).
+					WithObjects(tt.existingClaim).
+					Build()
+			} else {
+				fakeClient = fake.NewClientBuilder().
+					WithScheme(scheme).
+					Build()
+			}
+
+			processor := &DRAProcessor{
+				Client: fakeClient,
+			}
+
+			claim := &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "test-namespace",
+					Labels: map[string]string{
+						"tensorfusion.ai/pod":       "test-pod",
+						"tensorfusion.ai/claim-for": "gpu",
+					},
+				},
+			}
+
+			err := processor.createResourceClaim(context.Background(), claim)
+
+			if tt.expectError {
+				require.Error(t, err)
+				if tt.errorType == "conflict" {
+					assert.Contains(t, err.Error(), "already exists for a different pod")
+				}
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
+func TestHasDRAClaim(t *testing.T) {
+	tests := []struct {
+		name     string
+		pod      *corev1.Pod
+		expected bool
+	}{
+		{
+			name: "pod with resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{
+					ResourceClaims: []corev1.PodResourceClaim{
+						{Name: "gpu-claim"},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "pod without resource claims",
+			pod: &corev1.Pod{
+				Spec: corev1.PodSpec{},
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := HasDRAClaim(tt.pod)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestDRAProcessor_LazyConfigLoading(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, tfv1.AddToScheme(scheme))
+
+	tests := []struct {
+		name      string
+		templates []tfv1.SchedulingConfigTemplate
+		expected  bool
+	}{
+		{
+			name: "DRA enabled in template",
+			templates: []tfv1.SchedulingConfigTemplate{
+				{
+					ObjectMeta: metav1.ObjectMeta{Name: "template1"},
+					Spec: tfv1.SchedulingConfigTemplateSpec{
+						DRA: &tfv1.DRAConfig{
+							Enable:        &[]bool{true}[0],
+							ResourceClass: "test.ai/gpu",
+						},
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "DRA disabled in template",
+			templates: []tfv1.SchedulingConfigTemplate{
+				{
+					ObjectMeta: metav1.ObjectMeta{Name: "template1"},
+					Spec: tfv1.SchedulingConfigTemplateSpec{
+						DRA: &tfv1.DRAConfig{
+							Enable: &[]bool{false}[0],
+						},
+					},
+				},
+			},
+			expected: false,
+		},
+		{
+			name:     "no templates",
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			objects := make([]client.Object, len(tt.templates))
+			for i, template := range tt.templates {
+				objects[i] = &template
+			}
+
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithObjects(objects...).
+				Build()
+
+			processor := &DRAProcessor{
+				Client: fakeClient,
+			}
+
+			// Test lazy loading by calling a method that triggers config loading
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{},
+				},
+			}
+
+			result := processor.IsDRAEnabled(context.Background(), pod)
+			assert.Equal(t, tt.expected, result)
+
+			// Verify config was loaded
+			assert.True(t, processor.configLoaded)
+		})
+	}
+}
diff --git a/internal/webhook/v1/pod_webhook_test.go b/internal/webhook/v1/pod_webhook_test.go
index 55f29233..718b0f8d 100644
--- a/internal/webhook/v1/pod_webhook_test.go
+++ b/internal/webhook/v1/pod_webhook_test.go
@@ -57,8 +57,9 @@ var _ = Describe("TensorFusionPodMutator", func() {
 		decoder = admission.NewDecoder(scheme)
 
 		mutator = &TensorFusionPodMutator{
-			Client:  k8sClient,
-			decoder: decoder,
+			Client:       k8sClient,
+			decoder:      decoder,
+			draProcessor: NewDRAProcessor(k8sClient),
 		}
 	})
 
@@ -532,7 +533,7 @@ var _ = Describe("TensorFusionPodMutator", func() {
 					},
 				},
 			}
-			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod)
+			tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, mutator.draProcessor, pod)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(tfInfo.ContainerNames).To(HaveLen(1))
 			Expect(tfInfo.ContainerNames[0]).To(Equal("test-container"))
@@ -564,7 +565,7 @@ var _ = Describe("TensorFusionPodMutator", func() {
 
 			currentBytes, err := json.Marshal(pod)
 			Expect(err).NotTo(HaveOccurred())
-			patch, err := mutator.patchTFClient(pod, pool, false, currentBytes, []int{0})
+			patch, err := mutator.patchTFClient(context.Background(), pod, pool, false, currentBytes, []int{0})
 			Expect(err).NotTo(HaveOccurred())
 			Expect(patch).NotTo(BeEmpty())
 			// There should be at least 2 patches (initContainers and the container env patches)
diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
index bf805b76..1cfcd8f9 100644
--- a/internal/webhook/v1/tf_parser.go
+++ b/internal/webhook/v1/tf_parser.go
@@ -28,6 +28,7 @@ type TFResource struct {
 func ParseTensorFusionInfo(
 	ctx context.Context,
 	k8sClient client.Client,
+	draProcessor *DRAProcessor,
 	pod *corev1.Pod,
 ) (utils.TensorFusionInfo, error) {
 	var info utils.TensorFusionInfo
@@ -115,6 +116,11 @@ func ParseTensorFusionInfo(
 		workloadProfile.Spec.GPUModel = gpuModel
 	}
 
+	// Parse DRA enabled annotation
+	if draProcessor.IsDRAEnabled(ctx, pod) {
+		info.DRAEnabled = true
+	}
+
 	info.Profile = &workloadProfile.Spec
 	info.ContainerNames = containerNames
 	return info, nil

From 4959c61425890c7d4b83def9eff9862b595c993b Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Tue, 23 Sep 2025 09:29:39 -0700
Subject: [PATCH 30/34] resource template support

---
 api/v1/schedulingconfigtemplate_types.go      |   5 +-
 cmd/main.go                                   |  10 +
 internal/constants/constants.go               |  12 +-
 .../dra/resourceclaim_controller.go           | 184 ++++++
 .../dra/resourceclaim_controller_test.go      | 557 ++++++++++++++++++
 internal/webhook/v1/pod_dra.go                | 224 ++-----
 internal/webhook/v1/pod_webhook_dra_test.go   | 220 ++-----
 7 files changed, 867 insertions(+), 345 deletions(-)
 create mode 100644 internal/controller/dra/resourceclaim_controller.go
 create mode 100644 internal/controller/dra/resourceclaim_controller_test.go

diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go
index b3243344..8611ed99 100644
--- a/api/v1/schedulingconfigtemplate_types.go
+++ b/api/v1/schedulingconfigtemplate_types.go
@@ -216,10 +216,9 @@ type DRAConfig struct {
 	// +optional
 	Enable *bool `json:"enable,omitempty"`
 
-	// ResourceClass specifies the DRA resource class name to use
-	// +kubebuilder:default="tensorfusion.ai/gpu"
+	// ResourceClaimTemplateName specifies the ResourceClaim template name to use
 	// +optional
-	ResourceClass string `json:"resourceClass,omitempty"`
+	ResourceClaimTemplateName string `json:"resourceClaimTemplateName,omitempty"`
 }
 
 // SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.
diff --git a/cmd/main.go b/cmd/main.go
index 7f5f8721..5c994fcc 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -56,6 +56,7 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/config"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/controller"
+	"github.com/NexusGPU/tensor-fusion/internal/controller/dra"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator"
 	"github.com/NexusGPU/tensor-fusion/internal/metrics"
 	"github.com/NexusGPU/tensor-fusion/internal/portallocator"
@@ -397,6 +398,15 @@ func startCustomResourceController(
 		setupLog.Error(err, "unable to create controller", "controller", "Pod")
 		os.Exit(1)
 	}
+
+	// Setup ResourceClaim controller for DRA Phase 2
+	if err = (&dra.ResourceClaimReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "ResourceClaim")
+		os.Exit(1)
+	}
 	if err = (&controller.NodeReconciler{
 		Client:   mgr.GetClient(),
 		Scheme:   mgr.GetScheme(),
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index 22d465cc..9a44345c 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -124,12 +124,20 @@ const (
 
 	// DRA support
 	// annotation for pod to indicate if DRA is enabled
-	DRAEnabledAnnotation = Domain + "/dra-enabled"
-	DRAResourceClaimName = "tensor-fusion-resource-claim-%s"
+	DRAEnabledAnnotation       = Domain + "/dra-enabled"
+	DRACelExpressionAnnotation = Domain + "/dra-cel-expression"
+
+	DRADriverName        = Domain + ".dra-driver"
+	DRAResourceClaimName = "tensor-fusion-resource-claim-%s-%s"
 	// resource claim name for request
 	DRAResourceClaimRequestName = "tensor-fusion-resource-claim-request-%s"
 
 	DRAClaimDefineName = "tensor-fusion-gpu-claim"
+
+	TensorFusionResourceClaimTemplateLabel = Domain + "/resource-claim-template"
+
+	// ResourceClaimTemplate related constants
+	DRAResourceClaimTemplateName = "tensor-fusion-gpu-template"
 )
 
 // for avoid golang lint issues
diff --git a/internal/controller/dra/resourceclaim_controller.go b/internal/controller/dra/resourceclaim_controller.go
new file mode 100644
index 00000000..679fb8cb
--- /dev/null
+++ b/internal/controller/dra/resourceclaim_controller.go
@@ -0,0 +1,184 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"context"
+	"fmt"
+
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+// ResourceClaimReconciler reconciles ResourceClaim objects
+type ResourceClaimReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaims,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *ResourceClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	// Fetch the ResourceClaim instance
+	resourceClaim := &resourcev1beta2.ResourceClaim{}
+	if err := r.Get(ctx, req.NamespacedName, resourceClaim); err != nil {
+		if errors.IsNotFound(err) {
+			// Request object not found, could have been deleted after reconcile request.
+			// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
+			// Return and don't requeue
+			log.Info("ResourceClaim resource not found. Ignoring since object must be deleted")
+			return ctrl.Result{}, nil
+		}
+		// Error reading the object - requeue the request.
+		log.Error(err, "Failed to get ResourceClaim")
+		return ctrl.Result{}, err
+	}
+
+	// Check if this ResourceClaim is created from our ResourceClaimTemplate
+	if resourceClaim.Labels == nil {
+		// No labels, not our ResourceClaim
+		return ctrl.Result{}, nil
+	}
+
+	labelValue, exists := resourceClaim.Labels[constants.TensorFusionResourceClaimTemplateLabel]
+	if !exists || labelValue != constants.TrueStringValue {
+		// Not our ResourceClaim, ignore
+		return ctrl.Result{}, nil
+	}
+
+	log.Info("Processing TensorFusion ResourceClaim", "name", resourceClaim.Name, "namespace", resourceClaim.Namespace)
+
+	// Find the owner Pod to get the CEL expression annotation
+	ownerPod, err := r.findOwnerPod(ctx, resourceClaim)
+	if err != nil {
+		log.Error(err, "Failed to find owner Pod")
+		return ctrl.Result{}, err
+	}
+
+	if ownerPod == nil {
+		log.Info("Owner Pod not found, ResourceClaim may not have OwnerReference yet")
+		return ctrl.Result{RequeueAfter: constants.PendingRequeueDuration}, nil
+	}
+
+	// Get CEL expression from Pod annotation
+	celExpression := ownerPod.Annotations[constants.DRACelExpressionAnnotation]
+	if celExpression == "" {
+		log.Info("No CEL expression found in Pod annotation", "pod", ownerPod.Name)
+		return ctrl.Result{}, nil
+	}
+
+	// Update ResourceClaim with CEL expression
+	if err := r.updateResourceClaimCEL(ctx, resourceClaim, celExpression); err != nil {
+		log.Error(err, "Failed to update ResourceClaim CEL expression")
+		return ctrl.Result{}, err
+	}
+
+	log.Info("Successfully updated ResourceClaim with CEL expression", "cel", celExpression)
+	return ctrl.Result{}, nil
+}
+
+// findOwnerPod finds the Pod that owns this ResourceClaim
+func (r *ResourceClaimReconciler) findOwnerPod(ctx context.Context, resourceClaim *resourcev1beta2.ResourceClaim) (*corev1.Pod, error) {
+	// Find the Pod OwnerReference (there should be exactly one)
+	var podOwnerRef *metav1.OwnerReference
+	for i, ownerRef := range resourceClaim.OwnerReferences {
+		if ownerRef.Kind == "Pod" && ownerRef.APIVersion == "v1" {
+			podOwnerRef = &resourceClaim.OwnerReferences[i]
+			break
+		}
+	}
+
+	if podOwnerRef == nil {
+		return nil, nil // No Pod owner found
+	}
+
+	// Get the Pod by name and namespace (UID is automatically verified by Kubernetes)
+	pod := &corev1.Pod{}
+	err := r.Get(ctx, types.NamespacedName{
+		Name:      podOwnerRef.Name,
+		Namespace: resourceClaim.Namespace,
+	}, pod)
+	if err != nil {
+		if errors.IsNotFound(err) {
+			return nil, nil // Pod was deleted
+		}
+		return nil, fmt.Errorf("failed to get owner Pod %s/%s: %w", resourceClaim.Namespace, podOwnerRef.Name, err)
+	}
+
+	// Verify the UID matches (additional safety check)
+	if pod.UID != podOwnerRef.UID {
+		return nil, fmt.Errorf("Pod UID mismatch: expected %s, got %s", podOwnerRef.UID, pod.UID)
+	}
+
+	return pod, nil
+}
+
+// updateResourceClaimCEL updates the ResourceClaim's CEL selector expression
+func (r *ResourceClaimReconciler) updateResourceClaimCEL(ctx context.Context, resourceClaim *resourcev1beta2.ResourceClaim, celExpression string) error {
+	// Check if we need to update
+	if len(resourceClaim.Spec.Devices.Requests) == 0 {
+		return fmt.Errorf("no device requests found in ResourceClaim")
+	}
+
+	deviceReq := &resourceClaim.Spec.Devices.Requests[0]
+	if deviceReq.Exactly == nil {
+		return fmt.Errorf("no ExactDeviceRequest found")
+	}
+
+	// Check if CEL expression is already set correctly
+	if len(deviceReq.Exactly.Selectors) > 0 &&
+		deviceReq.Exactly.Selectors[0].CEL != nil &&
+		deviceReq.Exactly.Selectors[0].CEL.Expression == celExpression {
+		// Already updated
+		return nil
+	}
+
+	// Update the CEL expression
+	if len(deviceReq.Exactly.Selectors) == 0 {
+		deviceReq.Exactly.Selectors = []resourcev1beta2.DeviceSelector{{}}
+	}
+
+	if deviceReq.Exactly.Selectors[0].CEL == nil {
+		deviceReq.Exactly.Selectors[0].CEL = &resourcev1beta2.CELDeviceSelector{}
+	}
+
+	deviceReq.Exactly.Selectors[0].CEL.Expression = celExpression
+
+	// Update the ResourceClaim
+	return r.Update(ctx, resourceClaim)
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *ResourceClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&resourcev1beta2.ResourceClaim{}).
+		Complete(r)
+}
\ No newline at end of file
diff --git a/internal/controller/dra/resourceclaim_controller_test.go b/internal/controller/dra/resourceclaim_controller_test.go
new file mode 100644
index 00000000..ece541f9
--- /dev/null
+++ b/internal/controller/dra/resourceclaim_controller_test.go
@@ -0,0 +1,557 @@
+package dra
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+func TestResourceClaimReconciler_Reconcile(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+	require.NoError(t, corev1.AddToScheme(scheme))
+
+	tests := []struct {
+		name           string
+		resourceClaim  *resourcev1beta2.ResourceClaim
+		pod            *corev1.Pod
+		expectedResult ctrl.Result
+		expectError    bool
+		expectUpdate   bool
+	}{
+		{
+			name: "ResourceClaim not found",
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "ResourceClaim without TensorFusion label",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "ResourceClaim with wrong label value",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: "false",
+					},
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "ResourceClaim without owner Pod",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue,
+					},
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			expectedResult: ctrl.Result{RequeueAfter: constants.PendingRequeueDuration},
+			expectError:    false,
+		},
+		{
+			name: "Owner Pod without CEL annotation",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue,
+					},
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+		},
+		{
+			name: "Successful CEL expression update",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					Labels: map[string]string{
+						constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue,
+					},
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+					Annotations: map[string]string{
+						constants.DRACelExpressionAnnotation: `device.attributes["tflops"].quantity >= quantity("10")`,
+					},
+				},
+			},
+			expectedResult: ctrl.Result{},
+			expectError:    false,
+			expectUpdate:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var objects []runtime.Object
+			if tt.resourceClaim != nil {
+				objects = append(objects, tt.resourceClaim)
+			}
+			if tt.pod != nil {
+				objects = append(objects, tt.pod)
+			}
+
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithRuntimeObjects(objects...).
+				Build()
+
+			reconciler := &ResourceClaimReconciler{
+				Client: fakeClient,
+				Scheme: scheme,
+			}
+
+			req := ctrl.Request{
+				NamespacedName: types.NamespacedName{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+			}
+
+			result, err := reconciler.Reconcile(context.Background(), req)
+
+			if tt.expectError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			assert.Equal(t, tt.expectedResult, result)
+
+			// Check if ResourceClaim was updated with CEL expression
+			if tt.expectUpdate && tt.resourceClaim != nil {
+				updatedClaim := &resourcev1beta2.ResourceClaim{}
+				err := fakeClient.Get(context.Background(), types.NamespacedName{
+					Name:      tt.resourceClaim.Name,
+					Namespace: tt.resourceClaim.Namespace,
+				}, updatedClaim)
+				require.NoError(t, err)
+
+				require.Len(t, updatedClaim.Spec.Devices.Requests, 1)
+				deviceReq := updatedClaim.Spec.Devices.Requests[0]
+				require.NotNil(t, deviceReq.Exactly)
+				require.Len(t, deviceReq.Exactly.Selectors, 1)
+				require.NotNil(t, deviceReq.Exactly.Selectors[0].CEL)
+				assert.Equal(t, `device.attributes["tflops"].quantity >= quantity("10")`, deviceReq.Exactly.Selectors[0].CEL.Expression)
+			}
+		})
+	}
+}
+
+func TestResourceClaimReconciler_findOwnerPod(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, corev1.AddToScheme(scheme))
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+
+	tests := []struct {
+		name          string
+		resourceClaim *resourcev1beta2.ResourceClaim
+		pod           *corev1.Pod
+		expectedPod   *corev1.Pod
+		expectError   bool
+	}{
+		{
+			name: "No owner references",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+			},
+			expectedPod: nil,
+			expectError: false,
+		},
+		{
+			name: "No Pod owner reference",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "apps/v1",
+							Kind:       "Deployment",
+							Name:       "test-deployment",
+							UID:        "deployment-uid-123",
+						},
+					},
+				},
+			},
+			expectedPod: nil,
+			expectError: false,
+		},
+		{
+			name: "Pod owner not found",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "nonexistent-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+			},
+			expectedPod: nil,
+			expectError: false,
+		},
+		{
+			name: "Pod UID mismatch",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "different-uid",
+				},
+			},
+			expectedPod: nil,
+			expectError: true,
+		},
+		{
+			name: "Successful Pod lookup",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "v1",
+							Kind:       "Pod",
+							Name:       "test-pod",
+							UID:        "pod-uid-123",
+						},
+					},
+				},
+			},
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+				},
+			},
+			expectedPod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+					UID:       "pod-uid-123",
+				},
+			},
+			expectError: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var objects []runtime.Object
+			if tt.pod != nil {
+				objects = append(objects, tt.pod)
+			}
+
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithRuntimeObjects(objects...).
+				Build()
+
+			reconciler := &ResourceClaimReconciler{
+				Client: fakeClient,
+				Scheme: scheme,
+			}
+
+			pod, err := reconciler.findOwnerPod(context.Background(), tt.resourceClaim)
+
+			if tt.expectError {
+				require.Error(t, err)
+				assert.Nil(t, pod)
+			} else {
+				require.NoError(t, err)
+				if tt.expectedPod == nil {
+					assert.Nil(t, pod)
+				} else {
+					require.NotNil(t, pod)
+					assert.Equal(t, tt.expectedPod.Name, pod.Name)
+					assert.Equal(t, tt.expectedPod.Namespace, pod.Namespace)
+					assert.Equal(t, tt.expectedPod.UID, pod.UID)
+				}
+			}
+		})
+	}
+}
+
+func TestResourceClaimReconciler_updateResourceClaimCEL(t *testing.T) {
+	scheme := runtime.NewScheme()
+	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
+
+	tests := []struct {
+		name          string
+		resourceClaim *resourcev1beta2.ResourceClaim
+		celExpression string
+		expectError   bool
+		expectUpdate  bool
+	}{
+		{
+			name: "No device requests",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   true,
+		},
+		{
+			name: "No ExactDeviceRequest",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								// Exactly is nil
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   true,
+		},
+		{
+			name: "CEL expression already set correctly",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+									Selectors: []resourcev1beta2.DeviceSelector{
+										{
+											CEL: &resourcev1beta2.CELDeviceSelector{
+												Expression: `device.attributes["tflops"].quantity >= quantity("10")`,
+											},
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   false,
+			expectUpdate:  false, // No update needed
+		},
+		{
+			name: "Successful CEL expression update - empty selectors",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["tflops"].quantity >= quantity("10")`,
+			expectError:   false,
+			expectUpdate:  true,
+		},
+		{
+			name: "Successful CEL expression update - nil CEL",
+			resourceClaim: &resourcev1beta2.ResourceClaim{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-claim",
+					Namespace: "default",
+				},
+				Spec: resourcev1beta2.ResourceClaimSpec{
+					Devices: resourcev1beta2.DeviceClaim{
+						Requests: []resourcev1beta2.DeviceRequest{
+							{
+								Name: "gpu-request",
+								Exactly: &resourcev1beta2.ExactDeviceRequest{
+									Count: 1,
+									Selectors: []resourcev1beta2.DeviceSelector{
+										{
+											// CEL is nil
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			celExpression: `device.attributes["vram"].quantity >= quantity("8Gi")`,
+			expectError:   false,
+			expectUpdate:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(scheme).
+				WithRuntimeObjects(tt.resourceClaim).
+				Build()
+
+			reconciler := &ResourceClaimReconciler{
+				Client: fakeClient,
+				Scheme: scheme,
+			}
+
+			err := reconciler.updateResourceClaimCEL(context.Background(), tt.resourceClaim, tt.celExpression)
+
+			if tt.expectError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+
+				if tt.expectUpdate {
+					// Verify the CEL expression was set correctly
+					require.Len(t, tt.resourceClaim.Spec.Devices.Requests, 1)
+					deviceReq := tt.resourceClaim.Spec.Devices.Requests[0]
+					require.NotNil(t, deviceReq.Exactly)
+					require.Len(t, deviceReq.Exactly.Selectors, 1)
+					require.NotNil(t, deviceReq.Exactly.Selectors[0].CEL)
+					assert.Equal(t, tt.celExpression, deviceReq.Exactly.Selectors[0].CEL.Expression)
+				}
+			}
+		})
+	}
+}
\ No newline at end of file
diff --git a/internal/webhook/v1/pod_dra.go b/internal/webhook/v1/pod_dra.go
index ef2bd0b3..87f73154 100644
--- a/internal/webhook/v1/pod_dra.go
+++ b/internal/webhook/v1/pod_dra.go
@@ -18,15 +18,10 @@ package v1
 
 import (
 	"context"
-	"crypto/rand"
-	"encoding/hex"
 	"fmt"
 	"strings"
 
 	corev1 "k8s.io/api/core/v1"
-	resourcev1beta2 "k8s.io/api/resource/v1beta2"
-	"k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
@@ -37,16 +32,9 @@ import (
 // DRAProcessor handles all DRA-related operations for pod admission
 type DRAProcessor struct {
 	client.Client
-	enableDRA     bool
-	resourceClass string // cached resource class to avoid repeated API calls
-	configLoaded  bool   // tracks if configuration has been loaded
-}
-
-// generateUniqueID creates a random 8-character hex string for resource claim names
-func generateUniqueID() string {
-	bytes := make([]byte, 4)
-	_, _ = rand.Read(bytes) // crypto/rand.Read always returns len(bytes), nil on success
-	return hex.EncodeToString(bytes)
+	enableDRA                 bool
+	resourceClaimTemplateName string // cached ResourceClaimTemplate name
+	configLoaded              bool   // tracks if configuration has been loaded
 }
 
 // NewDRAProcessor creates a new DRA processor
@@ -67,6 +55,7 @@ func (p *DRAProcessor) InitializeDRAConfig(ctx context.Context) error {
 
 	// Set defaults first
 	p.enableDRA = false
+	p.resourceClaimTemplateName = constants.DRAResourceClaimTemplateName
 
 	templateList := &tfv1.SchedulingConfigTemplateList{}
 	// Use the provided context to respect cancellation
@@ -78,29 +67,29 @@ func (p *DRAProcessor) InitializeDRAConfig(ctx context.Context) error {
 		return nil
 	}
 
-	// Check if any template has DRA enabled and cache the resource class
+	// Check if any template has DRA enabled and cache the ResourceClaimTemplateName
 	for _, template := range templateList.Items {
 		if template.Spec.DRA != nil {
 			if template.Spec.DRA.Enable != nil && *template.Spec.DRA.Enable {
 				p.enableDRA = true
 			}
-			// Cache the resource class from the template
-			if template.Spec.DRA.ResourceClass != "" {
-				p.resourceClass = template.Spec.DRA.ResourceClass
+			// Cache the ResourceClaimTemplateName from the template
+			if template.Spec.DRA.ResourceClaimTemplateName != "" {
+				p.resourceClaimTemplateName = template.Spec.DRA.ResourceClaimTemplateName
 			}
 		}
 	}
 
-	if p.enableDRA && p.resourceClass == "" {
-		return fmt.Errorf("resource class is not set")
-	}
-
 	p.configLoaded = true
 	return nil
 }
 
 // IsDRAEnabled checks if DRA is enabled for a specific pod
 func (p *DRAProcessor) IsDRAEnabled(ctx context.Context, pod *corev1.Pod) bool {
+	// Load configuration if not yet loaded (lazy loading)
+	if !p.configLoaded {
+		_ = p.InitializeDRAConfig(ctx) // Ignore error to maintain backward compatibility
+	}
 
 	// Check pod-level annotation first (explicit override)
 	if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.TrueStringValue {
@@ -121,161 +110,29 @@ func HasDRAClaim(pod *corev1.Pod) bool {
 	return len(pod.Spec.ResourceClaims) > 0
 }
 
-// convertToResourceClaim converts GPU resource requests to ResourceClaim
-func (p *DRAProcessor) convertToResourceClaim(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (*resourcev1beta2.ResourceClaim, error) {
+// HandleDRAAdmission handles the complete DRA admission process
+func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod, tfInfo *utils.TensorFusionInfo, containerIndices []int) error {
+	// Load DRA configuration if needed
+	if err := p.InitializeDRAConfig(ctx); err != nil {
+		return fmt.Errorf("failed to load DRA config: %w", err)
+	}
 
-	// Build CEL selector using DRA helper
+	// Convert GPU resources to ResourceClaimTemplate reference and store CEL in annotation
 	celSelector, err := BuildCELSelector(pod, tfInfo)
 	if err != nil {
-		return nil, fmt.Errorf("failed to build CEL selector: %w", err)
-	}
-
-	// Generate unique claim name with random suffix to avoid conflicts
-	var baseName string
-
-	if pod.GenerateName != "" {
-		baseName = strings.TrimSuffix(pod.GenerateName, "-")
-	} else if pod.Name != "" {
-		baseName = pod.Name
-	}
-
-	uniqueID := generateUniqueID()
-	claimName := fmt.Sprintf(constants.DRAResourceClaimName, baseName, uniqueID)
-
-	// Use cached resource class instead of making API calls
-	resourceClass := p.resourceClass
-
-	claim := &resourcev1beta2.ResourceClaim{
-		TypeMeta: metav1.TypeMeta{
-			APIVersion: "resource.k8s.io/v1beta2",
-			Kind:       "ResourceClaim",
-		},
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      claimName,
-			Namespace: pod.Namespace,
-			// Note: We cannot set OwnerReference here because pod.UID is empty during admission.
-			// The controller will set the proper owner reference once the Pod is created.
-		},
-		Spec: resourcev1beta2.ResourceClaimSpec{
-			Devices: resourcev1beta2.DeviceClaim{
-				Requests: []resourcev1beta2.DeviceRequest{
-					{
-						Name: fmt.Sprintf(constants.DRAResourceClaimRequestName, generateUniqueID()),
-						Exactly: &resourcev1beta2.ExactDeviceRequest{
-							DeviceClassName: resourceClass,
-							Selectors: []resourcev1beta2.DeviceSelector{
-								{
-									CEL: &resourcev1beta2.CELDeviceSelector{
-										Expression: celSelector,
-									},
-								},
-							},
-							Count: int64(tfInfo.Profile.GPUCount),
-						},
-					},
-				},
-			},
-		},
-	}
-
-	return claim, nil
-}
-
-// injectResourceClaimRef adds ResourceClaim reference to Pod spec
-func (p *DRAProcessor) injectResourceClaimRef(pod *corev1.Pod, claim *resourcev1beta2.ResourceClaim, containerIndices []int) {
-	// Add ResourceClaim reference to pod.Spec.ResourceClaims
-	if pod.Spec.ResourceClaims == nil {
-		pod.Spec.ResourceClaims = []corev1.PodResourceClaim{}
-	}
-
-	claimRef := corev1.PodResourceClaim{
-		Name:              constants.DRAClaimDefineName,
-		ResourceClaimName: &claim.Name,
-	}
-
-	// Check if the claim reference already exists to maintain idempotency
-	claimExists := false
-	for i, existingClaim := range pod.Spec.ResourceClaims {
-		if existingClaim.Name == constants.DRAClaimDefineName {
-			// Update existing claim to point to the new ResourceClaim name
-			pod.Spec.ResourceClaims[i].ResourceClaimName = &claim.Name
-			claimExists = true
-			break
-		}
-	}
-
-	if !claimExists {
-		pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, claimRef)
-	}
-
-	// Add resource claim consumption to containers
-	for _, containerIndex := range containerIndices {
-		container := &pod.Spec.Containers[containerIndex]
-		if container.Resources.Claims == nil {
-			container.Resources.Claims = []corev1.ResourceClaim{}
-		}
-
-		// Check if the container already has this claim to maintain idempotency
-		hasGPUClaim := false
-		for _, existingClaim := range container.Resources.Claims {
-			if existingClaim.Name == constants.DRAClaimDefineName {
-				hasGPUClaim = true
-				break
-			}
-		}
-
-		if !hasGPUClaim {
-			container.Resources.Claims = append(container.Resources.Claims, corev1.ResourceClaim{
-				Name: constants.DRAClaimDefineName,
-			})
-		}
-	}
-}
-
-// createResourceClaim creates a ResourceClaim object with proper error handling and retries
-func (p *DRAProcessor) createResourceClaim(ctx context.Context, claim *resourcev1beta2.ResourceClaim) error {
-	// Try to create the ResourceClaim
-	if err := p.Create(ctx, claim); err != nil {
-		if errors.IsAlreadyExists(err) {
-			// Check if the existing claim is for the same pod
-			existingClaim := &resourcev1beta2.ResourceClaim{}
-			getErr := p.Get(ctx, client.ObjectKey{Name: claim.Name, Namespace: claim.Namespace}, existingClaim)
-			if getErr != nil {
-				return fmt.Errorf("failed to check existing ResourceClaim: %w", getErr)
-			}
-			// Different pod or missing labels, this is an error
-			return fmt.Errorf("ResourceClaim %s already exists for a different pod", claim.Name)
-		}
-
-		if errors.IsInvalid(err) {
-			return fmt.Errorf("ResourceClaim is invalid: %w", err)
-		}
-
-		if errors.IsForbidden(err) {
-			return fmt.Errorf("insufficient permissions to create ResourceClaim: %w", err)
-		}
+		return fmt.Errorf("failed to build CEL selector: %w", err)
 	}
 
-	return nil
-}
-
-// Note: patchTFClientForDRA is temporarily handled in the main pod_webhook.go
-// until we can properly abstract all the TF client patching logic
+	// Inject ResourceClaimTemplate reference to Pod
+	p.injectResourceClaimTemplateRef(pod)
 
-// HandleDRAAdmission handles the complete DRA admission process
-func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod, tfInfo *utils.TensorFusionInfo, containerIndices []int) error {
-	// Convert GPU resources to ResourceClaim
-	resourceClaim, err := p.convertToResourceClaim(pod, tfInfo)
-	if err != nil {
-		return fmt.Errorf("failed to convert to ResourceClaim: %w", err)
+	// Mark pod with DRA enabled annotation
+	if pod.Annotations == nil {
+		pod.Annotations = make(map[string]string)
 	}
+	pod.Annotations[constants.DRAEnabledAnnotation] = constants.TrueStringValue
+	pod.Annotations[constants.DRACelExpressionAnnotation] = celSelector
 
-	// Create ResourceClaim
-	if err := p.createResourceClaim(ctx, resourceClaim); err != nil {
-		return fmt.Errorf("failed to create ResourceClaim: %w", err)
-	}
-	// Inject ResourceClaim reference to Pod
-	p.injectResourceClaimRef(pod, resourceClaim, containerIndices)
 	return nil
 }
 
@@ -305,3 +162,30 @@ func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string,
 
 	return strings.Join(conditions, " && "), nil
 }
+
+// injectResourceClaimTemplateRef adds ResourceClaimTemplate reference to Pod spec
+func (p *DRAProcessor) injectResourceClaimTemplateRef(pod *corev1.Pod) {
+	// Add ResourceClaimTemplate reference to pod.Spec.ResourceClaims
+	if pod.Spec.ResourceClaims == nil {
+		pod.Spec.ResourceClaims = []corev1.PodResourceClaim{}
+	}
+
+	// Use ResourceClaimTemplate instead of direct ResourceClaim
+	claimRef := corev1.PodResourceClaim{
+		Name:                      constants.DRAClaimDefineName,
+		ResourceClaimTemplateName: &p.resourceClaimTemplateName,
+	}
+
+	// Check if the claim reference already exists to maintain idempotency
+	claimExists := false
+	for _, existingClaim := range pod.Spec.ResourceClaims {
+		if existingClaim.Name == constants.DRAClaimDefineName {
+			claimExists = true
+			break
+		}
+	}
+
+	if !claimExists {
+		pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, claimRef)
+	}
+}
diff --git a/internal/webhook/v1/pod_webhook_dra_test.go b/internal/webhook/v1/pod_webhook_dra_test.go
index 09a738b0..fd625cc9 100644
--- a/internal/webhook/v1/pod_webhook_dra_test.go
+++ b/internal/webhook/v1/pod_webhook_dra_test.go
@@ -7,11 +7,9 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	corev1 "k8s.io/api/core/v1"
-	resourcev1beta2 "k8s.io/api/resource/v1beta2"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 
@@ -74,10 +72,9 @@ func TestDRAProcessor_IsDRAEnabled(t *testing.T) {
 	}
 }
 
-func TestDRAProcessor_convertToResourceClaim(t *testing.T) {
+func TestDRAProcessor_HandleDRAAdmission(t *testing.T) {
 	scheme := runtime.NewScheme()
 	require.NoError(t, tfv1.AddToScheme(scheme))
-	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
 
 	// Create a SchedulingConfigTemplate with DRA config
 	template := &tfv1.SchedulingConfigTemplate{
@@ -86,8 +83,8 @@ func TestDRAProcessor_convertToResourceClaim(t *testing.T) {
 		},
 		Spec: tfv1.SchedulingConfigTemplateSpec{
 			DRA: &tfv1.DRAConfig{
-				Enable:        &[]bool{true}[0],
-				ResourceClass: "custom.tensorfusion.ai/gpu",
+				Enable:                    &[]bool{true}[0],
+				ResourceClaimTemplateName: "custom-gpu-template",
 			},
 		},
 	}
@@ -101,16 +98,15 @@ func TestDRAProcessor_convertToResourceClaim(t *testing.T) {
 		Client: fakeClient,
 	}
 
-	// Initialize DRA config to set up the resource class cache
-	err := processor.InitializeDRAConfig(context.Background())
-	require.NoError(t, err)
-
 	pod := &corev1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
-			Name:         "test-pod",
-			Namespace:    "test-namespace",
-			GenerateName: "test-pod-",
-			UID:          types.UID("test-uid"),
+			Name:      "test-pod",
+			Namespace: "test-namespace",
+		},
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{
+				{Name: "test-container"},
+			},
 		},
 	}
 
@@ -126,180 +122,64 @@ func TestDRAProcessor_convertToResourceClaim(t *testing.T) {
 		},
 	}
 
-	claim, err := processor.convertToResourceClaim(pod, tfInfo)
+	containerIndices := []int{0}
+
+	// Test HandleDRAAdmission
+	err := processor.HandleDRAAdmission(context.Background(), pod, tfInfo, containerIndices)
 	require.NoError(t, err)
-	require.NotNil(t, claim)
-
-	// Verify claim structure
-	assert.Contains(t, claim.Name, "test-pod-")
-	assert.Contains(t, claim.Name, "-gpu-claim")
-	assert.Equal(t, "test-namespace", claim.Namespace)
-	assert.Equal(t, "resource.k8s.io/v1beta2", claim.APIVersion)
-	assert.Equal(t, "ResourceClaim", claim.Kind)
-
-	// Verify labels instead of owner references (since we removed owner references during admission)
-	require.NotNil(t, claim.Labels)
-	assert.Equal(t, "test-pod-", claim.Labels["tensorfusion.ai/pod"]) // Uses GenerateName as podIdentifier
-	assert.Equal(t, "gpu", claim.Labels["tensorfusion.ai/claim-for"])
-
-	// Verify device claim
-	require.Len(t, claim.Spec.Devices.Requests, 1)
-	deviceReq := claim.Spec.Devices.Requests[0]
-	assert.Equal(t, "gpu", deviceReq.Name)
-
-	// Verify ExactDeviceRequest structure
-	require.NotNil(t, deviceReq.Exactly)
-	exactReq := deviceReq.Exactly
-	assert.Equal(t, "custom.tensorfusion.ai/gpu", exactReq.DeviceClassName) // Uses cached resource class from template
-	assert.Equal(t, int64(1), exactReq.Count)
-
-	// Verify CEL selector
-	require.Len(t, exactReq.Selectors, 1)
-	require.NotNil(t, exactReq.Selectors[0].CEL)
-
-	// The simplified CEL selector should only contain basic resource requirements
-	celExpression := exactReq.Selectors[0].CEL.Expression
-
-	// Verify it contains the expected resource filters (simplified version)
+
+	// Verify CEL expression is stored in Pod annotation
+	celExpression := pod.Annotations[constants.DRACelExpressionAnnotation]
+	require.NotEmpty(t, celExpression)
 	assert.Contains(t, celExpression, `device.attributes["tflops"].quantity >= quantity("10")`)
 	assert.Contains(t, celExpression, `device.attributes["vram"].quantity >= quantity("8Gi")`)
 
-	// Verify conditions are combined with AND
-	assert.Contains(t, celExpression, " && ")
-}
+	// Verify DRA enabled annotation is set
+	assert.Equal(t, constants.TrueStringValue, pod.Annotations[constants.DRAEnabledAnnotation])
 
-func TestDRAProcessor_injectResourceClaimRef(t *testing.T) {
-	processor := &DRAProcessor{}
+	// Verify ResourceClaimTemplate reference is added to Pod
+	require.Len(t, pod.Spec.ResourceClaims, 1)
+	podClaim := pod.Spec.ResourceClaims[0]
+	assert.Equal(t, constants.DRAClaimDefineName, podClaim.Name)
+	require.NotNil(t, podClaim.ResourceClaimTemplateName)
+	assert.Equal(t, "custom-gpu-template", *podClaim.ResourceClaimTemplateName)
+
+	// Verify processor has cached the ResourceClaimTemplateName
+	assert.Equal(t, "custom-gpu-template", processor.resourceClaimTemplateName)
+}
 
+func TestBuildCELSelector(t *testing.T) {
 	pod := &corev1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "test-pod",
 			Namespace: "test-namespace",
 		},
-		Spec: corev1.PodSpec{
-			Containers: []corev1.Container{
-				{Name: "container1"},
-				{Name: "container2"},
-			},
-		},
-	}
-
-	claim := &resourcev1beta2.ResourceClaim{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "test-claim",
-			Namespace: "test-namespace",
-		},
-	}
-
-	containerIndices := []int{0, 1}
-
-	processor.injectResourceClaimRef(pod, claim, containerIndices)
-
-	// Verify pod resource claims
-	require.Len(t, pod.Spec.ResourceClaims, 1)
-	podClaim := pod.Spec.ResourceClaims[0]
-	assert.Equal(t, "gpu-claim", podClaim.Name)
-	require.NotNil(t, podClaim.ResourceClaimName)
-	assert.Equal(t, "test-claim", *podClaim.ResourceClaimName)
-
-	// Verify container resource claims
-	for _, idx := range containerIndices {
-		container := pod.Spec.Containers[idx]
-		require.Len(t, container.Resources.Claims, 1)
-		assert.Equal(t, "gpu-claim", container.Resources.Claims[0].Name)
 	}
 
-	// Verify annotations
-	require.NotNil(t, pod.Annotations)
-	assert.Equal(t, constants.TrueStringValue, pod.Annotations[constants.DRAEnabledAnnotation])
-}
-
-func TestDRAProcessor_createResourceClaim(t *testing.T) {
-	scheme := runtime.NewScheme()
-	require.NoError(t, resourcev1beta2.AddToScheme(scheme))
-
-	tests := []struct {
-		name          string
-		existingClaim *resourcev1beta2.ResourceClaim
-		expectError   bool
-		errorType     string
-	}{
-		{
-			name:        "successful creation",
-			expectError: false,
-		},
-		{
-			name: "claim already exists with same pod",
-			existingClaim: &resourcev1beta2.ResourceClaim{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "test-claim",
-					Namespace: "test-namespace",
-					Labels: map[string]string{
-						"tensorfusion.ai/pod":       "test-pod",
-						"tensorfusion.ai/claim-for": "gpu",
-					},
-				},
-			},
-			expectError: false,
-		},
-		{
-			name: "claim already exists with different pod",
-			existingClaim: &resourcev1beta2.ResourceClaim{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "test-claim",
-					Namespace: "test-namespace",
-					Labels: map[string]string{
-						"tensorfusion.ai/pod":       "different-pod",
-						"tensorfusion.ai/claim-for": "gpu",
-					},
+	tfInfo := &utils.TensorFusionInfo{
+		Profile: &tfv1.WorkloadProfileSpec{
+			GPUCount: 2,
+			Resources: tfv1.Resources{
+				Requests: tfv1.Resource{
+					Tflops: resource.MustParse("20"),
+					Vram:   resource.MustParse("16Gi"),
 				},
 			},
-			expectError: true,
-			errorType:   "conflict",
+			GPUModel: "H100",
 		},
 	}
 
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			var fakeClient client.Client
-			if tt.existingClaim != nil {
-				fakeClient = fake.NewClientBuilder().
-					WithScheme(scheme).
-					WithObjects(tt.existingClaim).
-					Build()
-			} else {
-				fakeClient = fake.NewClientBuilder().
-					WithScheme(scheme).
-					Build()
-			}
-
-			processor := &DRAProcessor{
-				Client: fakeClient,
-			}
-
-			claim := &resourcev1beta2.ResourceClaim{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "test-claim",
-					Namespace: "test-namespace",
-					Labels: map[string]string{
-						"tensorfusion.ai/pod":       "test-pod",
-						"tensorfusion.ai/claim-for": "gpu",
-					},
-				},
-			}
+	celExpression, err := BuildCELSelector(pod, tfInfo)
+	require.NoError(t, err)
+	require.NotEmpty(t, celExpression)
 
-			err := processor.createResourceClaim(context.Background(), claim)
+	// Verify it contains the expected resource filters
+	assert.Contains(t, celExpression, `device.attributes["tflops"].quantity >= quantity("20")`)
+	assert.Contains(t, celExpression, `device.attributes["vram"].quantity >= quantity("16Gi")`)
+	assert.Contains(t, celExpression, `device.attributes["model"] == "H100"`)
 
-			if tt.expectError {
-				require.Error(t, err)
-				if tt.errorType == "conflict" {
-					assert.Contains(t, err.Error(), "already exists for a different pod")
-				}
-			} else {
-				require.NoError(t, err)
-			}
-		})
-	}
+	// Verify conditions are combined with AND
+	assert.Contains(t, celExpression, " && ")
 }
 
 func TestHasDRAClaim(t *testing.T) {
@@ -352,8 +232,8 @@ func TestDRAProcessor_LazyConfigLoading(t *testing.T) {
 					ObjectMeta: metav1.ObjectMeta{Name: "template1"},
 					Spec: tfv1.SchedulingConfigTemplateSpec{
 						DRA: &tfv1.DRAConfig{
-							Enable:        &[]bool{true}[0],
-							ResourceClass: "test.ai/gpu",
+							Enable:                    &[]bool{true}[0],
+							ResourceClaimTemplateName: "test-gpu-template",
 						},
 					},
 				},

From ff9efd2244b1550ebc2e5364b430a867332606ea Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Wed, 24 Sep 2025 07:48:13 -0700
Subject: [PATCH 31/34] support resource claim cel builder

---
 internal/constants/constants.go             |   2 -
 internal/gpuallocator/gpuallocator.go       |   2 +-
 internal/webhook/v1/pod_dra.go              |  24 +++-
 internal/webhook/v1/pod_webhook_dra_test.go | 146 +++++++++++++++++---
 4 files changed, 147 insertions(+), 27 deletions(-)

diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index 9a44345c..2d8eae1a 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -74,8 +74,6 @@ const (
 	// In remote vGPU mode, selected workload is set by user with /workload annotation or generated by system
 	SelectedWorkloadAnnotation = Domain + "/selected-workload"
 
-	CELFilterExpressionAnnotation = Domain + "/cel-filter-expression"
-
 	WorkloadModeAnnotation = Domain + "/workload-mode"
 	WorkloadModeDynamic    = "dynamic"
 	WorkloadModeFixed      = "fixed"
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
index 44deb3c4..2e8e54fd 100644
--- a/internal/gpuallocator/gpuallocator.go
+++ b/internal/gpuallocator/gpuallocator.go
@@ -1355,7 +1355,7 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest
 		Limit:    gpuLimitResource,
 
 		DisableCELFilter:    disableCELFilter,
-		CELFilterExpression: pod.Annotations[constants.CELFilterExpressionAnnotation],
+		CELFilterExpression: pod.Annotations[constants.DRACelExpressionAnnotation],
 
 		Count:    uint(count),
 		GPUModel: pod.Annotations[constants.GPUModelAnnotation],
diff --git a/internal/webhook/v1/pod_dra.go b/internal/webhook/v1/pod_dra.go
index 87f73154..cd1b7c8c 100644
--- a/internal/webhook/v1/pod_dra.go
+++ b/internal/webhook/v1/pod_dra.go
@@ -136,7 +136,7 @@ func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod,
 	return nil
 }
 
-// TODO: support more attributes for filtering
+// BuildCELSelector constructs a CEL expression for DRA device selection based on TensorFusion requirements
 func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string, error) {
 	var conditions []string
 
@@ -154,6 +154,28 @@ func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string,
 		conditions = append(conditions, fmt.Sprintf(`device.attributes["model"] == "%s"`, tfInfo.Profile.GPUModel))
 	}
 
+	// 3. GPU count requirement (important for multi-GPU workloads)
+	if tfInfo.Profile.GPUCount > 0 {
+		conditions = append(conditions, fmt.Sprintf(`int(device.attributes["gpu_count"]) >= %d`, tfInfo.Profile.GPUCount))
+	}
+
+	// 4. Pool name filter (for resource isolation and scheduling preferences)
+	if tfInfo.Profile.PoolName != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["pool_name"] == "%s"`, tfInfo.Profile.PoolName))
+	}
+
+	// 5. Workload name filter (for workload-specific device assignment)
+	if tfInfo.WorkloadName != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["workload_name"] == "%s"`, tfInfo.WorkloadName))
+		// Workload namespace is same as pod namespace in TensorFusion
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["workload_namespace"] == "%s"`, pod.Namespace))
+	}
+
+	// 6. Pod namespace filter (for namespace-based device isolation)
+	if pod.Namespace != "" {
+		conditions = append(conditions, fmt.Sprintf(`device.attributes["pod_namespace"] == "%s"`, pod.Namespace))
+	}
+
 	// Return a basic condition if no specific requirements
 	if len(conditions) == 0 {
 		// Simple condition that should work with most DRA drivers
diff --git a/internal/webhook/v1/pod_webhook_dra_test.go b/internal/webhook/v1/pod_webhook_dra_test.go
index fd625cc9..e6fce827 100644
--- a/internal/webhook/v1/pod_webhook_dra_test.go
+++ b/internal/webhook/v1/pod_webhook_dra_test.go
@@ -149,37 +149,137 @@ func TestDRAProcessor_HandleDRAAdmission(t *testing.T) {
 }
 
 func TestBuildCELSelector(t *testing.T) {
-	pod := &corev1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "test-pod",
-			Namespace: "test-namespace",
+	tests := []struct {
+		name                 string
+		pod                  *corev1.Pod
+		tfInfo               *utils.TensorFusionInfo
+		expectedConditions   []string
+		unexpectedConditions []string
+	}{
+		{
+			name: "Basic resource filters",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					GPUCount: 2,
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							Tflops: resource.MustParse("20"),
+							Vram:   resource.MustParse("16Gi"),
+						},
+					},
+					GPUModel: "H100",
+				},
+			},
+			expectedConditions: []string{
+				`device.attributes["tflops"].quantity >= quantity("20")`,
+				`device.attributes["vram"].quantity >= quantity("16Gi")`,
+				`device.attributes["model"] == "H100"`,
+				`int(device.attributes["gpu_count"]) >= 2`,
+				`device.attributes["pod_namespace"] == "test-namespace"`,
+			},
 		},
-	}
-
-	tfInfo := &utils.TensorFusionInfo{
-		Profile: &tfv1.WorkloadProfileSpec{
-			GPUCount: 2,
-			Resources: tfv1.Resources{
-				Requests: tfv1.Resource{
-					Tflops: resource.MustParse("20"),
-					Vram:   resource.MustParse("16Gi"),
+		{
+			name: "All filters including pool and workload",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "production",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					GPUCount: 1,
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							Tflops: resource.MustParse("10"),
+							Vram:   resource.MustParse("8Gi"),
+						},
+					},
+					GPUModel: "A100",
+					PoolName: "high-priority",
+				},
+				WorkloadName: "ml-training-job",
+			},
+			expectedConditions: []string{
+				`device.attributes["tflops"].quantity >= quantity("10")`,
+				`device.attributes["vram"].quantity >= quantity("8Gi")`,
+				`device.attributes["model"] == "A100"`,
+				`int(device.attributes["gpu_count"]) >= 1`,
+				`device.attributes["pool_name"] == "high-priority"`,
+				`device.attributes["workload_name"] == "ml-training-job"`,
+				`device.attributes["workload_namespace"] == "production"`,
+				`device.attributes["pod_namespace"] == "production"`,
+			},
+		},
+		{
+			name: "Zero resources fallback to default condition",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "default",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					GPUCount: 0, // Zero count should not add condition
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							// Zero resources
+						},
+					},
+				},
+			},
+			expectedConditions: []string{
+				`device.attributes["pod_namespace"] == "default"`,
+			},
+		},
+		{
+			name: "Empty resources fallback to basic condition",
+			pod: &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "",
+				},
+			},
+			tfInfo: &utils.TensorFusionInfo{
+				Profile: &tfv1.WorkloadProfileSpec{
+					// All empty/zero values
 				},
 			},
-			GPUModel: "H100",
+			expectedConditions: []string{
+				`device.attributes.exists("type")`,
+			},
 		},
 	}
 
-	celExpression, err := BuildCELSelector(pod, tfInfo)
-	require.NoError(t, err)
-	require.NotEmpty(t, celExpression)
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			celExpression, err := BuildCELSelector(tt.pod, tt.tfInfo)
+			require.NoError(t, err)
+			require.NotEmpty(t, celExpression)
+
+			// Verify expected conditions are present
+			for _, condition := range tt.expectedConditions {
+				assert.Contains(t, celExpression, condition, "Expected condition not found: %s", condition)
+			}
 
-	// Verify it contains the expected resource filters
-	assert.Contains(t, celExpression, `device.attributes["tflops"].quantity >= quantity("20")`)
-	assert.Contains(t, celExpression, `device.attributes["vram"].quantity >= quantity("16Gi")`)
-	assert.Contains(t, celExpression, `device.attributes["model"] == "H100"`)
+			// Verify unexpected conditions are not present
+			for _, condition := range tt.unexpectedConditions {
+				assert.NotContains(t, celExpression, condition, "Unexpected condition found: %s", condition)
+			}
 
-	// Verify conditions are combined with AND
-	assert.Contains(t, celExpression, " && ")
+			// Verify proper AND joining (unless it's the fallback condition)
+			if len(tt.expectedConditions) > 1 {
+				assert.Contains(t, celExpression, " && ", "Conditions should be joined with &&")
+			}
+		})
+	}
 }
 
 func TestHasDRAClaim(t *testing.T) {

From 1afc62de324099fd876d7554152dd4c8e07e2736 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 28 Sep 2025 05:24:38 -0700
Subject: [PATCH 32/34] fix conflict for gpuresources.go

---
 internal/scheduler/gpuresources/gpuresources.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
index 23ba873b..bc893087 100644
--- a/internal/scheduler/gpuresources/gpuresources.go
+++ b/internal/scheduler/gpuresources/gpuresources.go
@@ -117,7 +117,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Po
 
 	// Check if DRA mode is enabled for this pod
 	if isDRAEnabled(pod) && hasDRAClaim(pod) {
-		return nil, framework.NewStatus(framework.Skip, "DRA mode enabled, skipping custom GPU prefilter")
+		return nil, fwk.NewStatus(fwk.Skip, "DRA mode enabled, skipping custom GPU prefilter")
 	}
 
 	// Skip non tensor-fusion mode
@@ -294,7 +294,7 @@ func (s *GPUFit) RemovePod(ctx context.Context, state fwk.CycleState, pod *v1.Po
 func (s *GPUFit) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
 	// Check if DRA mode is enabled for this pod
 	if isDRAEnabled(pod) && hasDRAClaim(pod) {
-		return framework.NewStatus(framework.Skip, "DRA mode enabled, skipping custom GPU filter")
+		return fwk.NewStatus(fwk.Skip, "DRA mode enabled, skipping custom GPU filter")
 	}
 
 	if !utils.IsTensorFusionWorker(pod) {
@@ -338,8 +338,8 @@ func (s *GPUFit) Score(
 	ctx context.Context,
 	state fwk.CycleState,
 	pod *v1.Pod,
-	nodeInfo *framework.NodeInfo,
-) (int64, *framework.Status) {
+	nodeInfo fwk.NodeInfo,
+) (int64, *fwk.Status) {
 	// Skip non tensor-fusion mode scheduling
 	if !utils.IsTensorFusionWorker(pod) {
 		return 0, fwk.NewStatus(fwk.Success, "")
@@ -377,7 +377,7 @@ func (s *GPUFit) ScoreExtensions() framework.ScoreExtensions {
 	return nil
 }
 
-func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
+func (s *GPUFit) Reserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) *fwk.Status {
 	if !utils.IsTensorFusionWorker(pod) {
 		return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode")
 	}
@@ -423,7 +423,7 @@ func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *
 	return fwk.NewStatus(fwk.Success, "")
 }
 
-func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+func (s *GPUFit) Unreserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) {
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}
@@ -442,7 +442,7 @@ func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod
 	}, schedulingResult.FinalGPUs, pod.ObjectMeta)
 }
 
-func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+func (s *GPUFit) PostBind(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) {
 	if !utils.IsTensorFusionWorker(pod) {
 		return
 	}

From efbce3fcf00949e6c8363b2e054bc1adcc0e0495 Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sun, 28 Sep 2025 08:17:33 -0700
Subject: [PATCH 33/34] 1. support resource slice build and destory 2. make
 resource slice build and dra request build in the same logic

---
 cmd/main.go                                   |  25 +-
 internal/constants/constants.go               |   4 +
 .../dra/resourceclaim_controller.go           |  58 +++--
 .../dra/resourceclaim_controller_test.go      |  13 +-
 .../dra/resourceslice_controller.go           | 216 ++++++++++++++++++
 internal/webhook/v1/pod_dra.go                |  26 +--
 6 files changed, 287 insertions(+), 55 deletions(-)
 create mode 100644 internal/controller/dra/resourceslice_controller.go

diff --git a/cmd/main.go b/cmd/main.go
index a50ce745..b0ec36e7 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -67,25 +67,8 @@ import (
 	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	"github.com/NexusGPU/tensor-fusion/internal/version"
 	webhookcorev1 "github.com/NexusGPU/tensor-fusion/internal/webhook/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	k8sVer "k8s.io/apimachinery/pkg/util/version"
 	"k8s.io/apiserver/pkg/util/feature"
-	"k8s.io/client-go/kubernetes"
-	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
-	_ "k8s.io/client-go/plugin/pkg/client/auth"
-	"k8s.io/client-go/rest"
-	"k8s.io/klog/v2"
-	"k8s.io/kubernetes/cmd/kube-scheduler/app"
-	"k8s.io/kubernetes/pkg/scheduler"
-	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/controller-runtime/pkg/healthz"
-	"sigs.k8s.io/controller-runtime/pkg/manager"
-	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
-	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
-	"sigs.k8s.io/controller-runtime/pkg/webhook"
-	"sigs.k8s.io/yaml"
 	// +kubebuilder:scaffold:imports
 )
 
@@ -437,6 +420,14 @@ func startCustomResourceController(
 		setupLog.Error(err, "unable to create controller", "controller", "ResourceClaim")
 		os.Exit(1)
 	}
+	// Setup ResourceSlice controller for DRA Phase 2
+	if err = (&dra.ResourceSliceReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "ResourceSlice")
+		os.Exit(1)
+	}
 	if err = (&controller.NodeReconciler{
 		Client:   mgr.GetClient(),
 		Scheme:   mgr.GetScheme(),
diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index 67a3dde6..77648769 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -140,6 +140,10 @@ const (
 
 	// ResourceClaimTemplate related constants
 	DRAResourceClaimTemplateName = "tensor-fusion-gpu-template"
+
+	// ResourceSlice related constants
+	DRAResourceSliceName = "tensor-fusion-resource-slice-%s"
+	DRAResourceSlicePool = "tensor-fusion-resource-slice-pool-%s"
 )
 
 // for avoid golang lint issues
diff --git a/internal/controller/dra/resourceclaim_controller.go b/internal/controller/dra/resourceclaim_controller.go
index 679fb8cb..6d18b234 100644
--- a/internal/controller/dra/resourceclaim_controller.go
+++ b/internal/controller/dra/resourceclaim_controller.go
@@ -20,8 +20,9 @@ import (
 	"context"
 	"fmt"
 
-	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
 	corev1 "k8s.io/api/core/v1"
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
 	"k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
@@ -88,20 +89,23 @@ func (r *ResourceClaimReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 		return ctrl.Result{RequeueAfter: constants.PendingRequeueDuration}, nil
 	}
 
-	// Get CEL expression from Pod annotation
-	celExpression := ownerPod.Annotations[constants.DRACelExpressionAnnotation]
-	if celExpression == "" {
-		log.Info("No CEL expression found in Pod annotation", "pod", ownerPod.Name)
-		return ctrl.Result{}, nil
-	}
-
 	// Update ResourceClaim with CEL expression
-	if err := r.updateResourceClaimCEL(ctx, resourceClaim, celExpression); err != nil {
+	if err := r.updateResourceClaimCEL(resourceClaim, ownerPod); err != nil {
 		log.Error(err, "Failed to update ResourceClaim CEL expression")
 		return ctrl.Result{}, err
 	}
+	// Update ResourceClaim with capacity request
+	if err := r.updateCapacityRequest(resourceClaim, ownerPod); err != nil {
+		log.Error(err, "Failed to update ResourceClaim capacity request")
+		return ctrl.Result{}, err
+	}
+
+	if err := r.Update(ctx, resourceClaim); err != nil {
+		log.Error(err, "Failed to update ResourceClaim")
+		return ctrl.Result{}, err
+	}
 
-	log.Info("Successfully updated ResourceClaim with CEL expression", "cel", celExpression)
+	log.Info("Successfully updated ResourceClaim")
 	return ctrl.Result{}, nil
 }
 
@@ -142,7 +146,7 @@ func (r *ResourceClaimReconciler) findOwnerPod(ctx context.Context, resourceClai
 }
 
 // updateResourceClaimCEL updates the ResourceClaim's CEL selector expression
-func (r *ResourceClaimReconciler) updateResourceClaimCEL(ctx context.Context, resourceClaim *resourcev1beta2.ResourceClaim, celExpression string) error {
+func (r *ResourceClaimReconciler) updateResourceClaimCEL(resourceClaim *resourcev1beta2.ResourceClaim, pod *corev1.Pod) error {
 	// Check if we need to update
 	if len(resourceClaim.Spec.Devices.Requests) == 0 {
 		return fmt.Errorf("no device requests found in ResourceClaim")
@@ -153,6 +157,13 @@ func (r *ResourceClaimReconciler) updateResourceClaimCEL(ctx context.Context, re
 		return fmt.Errorf("no ExactDeviceRequest found")
 	}
 
+	// Get CEL expression from Pod annotation
+	celExpression := pod.Annotations[constants.DRACelExpressionAnnotation]
+
+	if celExpression == "" {
+		return nil
+	}
+
 	// Check if CEL expression is already set correctly
 	if len(deviceReq.Exactly.Selectors) > 0 &&
 		deviceReq.Exactly.Selectors[0].CEL != nil &&
@@ -172,8 +183,27 @@ func (r *ResourceClaimReconciler) updateResourceClaimCEL(ctx context.Context, re
 
 	deviceReq.Exactly.Selectors[0].CEL.Expression = celExpression
 
-	// Update the ResourceClaim
-	return r.Update(ctx, resourceClaim)
+	return nil
+}
+
+func (r *ResourceClaimReconciler) updateCapacityRequest(resourceClaim *resourcev1beta2.ResourceClaim, pod *corev1.Pod) error {
+	if len(resourceClaim.Spec.Devices.Requests) == 0 {
+		return fmt.Errorf("no device requests found in ResourceClaim")
+	}
+
+	deviceReq := &resourceClaim.Spec.Devices.Requests[0]
+	if deviceReq.Exactly == nil {
+		return fmt.Errorf("no ExactDeviceRequest found")
+	}
+	gpuRequestResource, err := utils.GetGPUResource(pod, true)
+	if err != nil {
+		return fmt.Errorf("failed to get GPU resource: %w", err)
+	}
+	//TODO extract to constants
+	deviceReq.Exactly.Capacity.Requests["tflops"] = gpuRequestResource.Tflops
+	deviceReq.Exactly.Capacity.Requests["vram"] = gpuRequestResource.Vram
+
+	return nil
 }
 
 // SetupWithManager sets up the controller with the Manager.
@@ -181,4 +211,4 @@ func (r *ResourceClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
 		For(&resourcev1beta2.ResourceClaim{}).
 		Complete(r)
-}
\ No newline at end of file
+}
diff --git a/internal/controller/dra/resourceclaim_controller_test.go b/internal/controller/dra/resourceclaim_controller_test.go
index ece541f9..aeebbda7 100644
--- a/internal/controller/dra/resourceclaim_controller_test.go
+++ b/internal/controller/dra/resourceclaim_controller_test.go
@@ -31,7 +31,7 @@ func TestResourceClaimReconciler_Reconcile(t *testing.T) {
 		expectUpdate   bool
 	}{
 		{
-			name: "ResourceClaim not found",
+			name:           "ResourceClaim not found",
 			expectedResult: ctrl.Result{},
 			expectError:    false,
 		},
@@ -535,7 +535,14 @@ func TestResourceClaimReconciler_updateResourceClaimCEL(t *testing.T) {
 				Scheme: scheme,
 			}
 
-			err := reconciler.updateResourceClaimCEL(context.Background(), tt.resourceClaim, tt.celExpression)
+			mockPod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{
+						constants.DRACelExpressionAnnotation: tt.celExpression,
+					},
+				},
+			}
+			err := reconciler.updateResourceClaimCEL(tt.resourceClaim, mockPod)
 
 			if tt.expectError {
 				require.Error(t, err)
@@ -554,4 +561,4 @@ func TestResourceClaimReconciler_updateResourceClaimCEL(t *testing.T) {
 			}
 		})
 	}
-}
\ No newline at end of file
+}
diff --git a/internal/controller/dra/resourceslice_controller.go b/internal/controller/dra/resourceslice_controller.go
new file mode 100644
index 00000000..64a00ce8
--- /dev/null
+++ b/internal/controller/dra/resourceslice_controller.go
@@ -0,0 +1,216 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package dra
+
+import (
+	"context"
+	"fmt"
+
+	resourcev1beta2 "k8s.io/api/resource/v1beta2"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+)
+
+// ResourceSliceReconciler reconciles ResourceSlice objects based on GPUNode and GPU changes
+type ResourceSliceReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceslices,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpunodes,verbs=get;list;watch
+//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpus,verbs=get;list;watch
+//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools,verbs=get;list;watch
+
+// Reconcile processes GPUNode changes and generates/updates corresponding ResourceSlices
+func (r *ResourceSliceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+	log.Info("Reconciling ResourceSlice for GPUNode", "name", req.Name)
+
+	// Fetch the GPUNode
+	gpuNode := &tfv1.GPUNode{}
+	if err := r.Get(ctx, req.NamespacedName, gpuNode); err != nil {
+		if errors.IsNotFound(err) {
+			// GPUNode was deleted, clean up associated ResourceSlice
+			return r.cleanupResourceSlice(ctx, req.Name)
+		}
+		log.Error(err, "Failed to get GPUNode")
+		return ctrl.Result{}, err
+	}
+
+	// If GPUNode is being deleted, clean up ResourceSlice
+	if !gpuNode.DeletionTimestamp.IsZero() {
+		return r.cleanupResourceSlice(ctx, gpuNode.Name)
+	}
+	// Get all GPUs owned by this node
+	gpuList := &tfv1.GPUList{}
+	if err := r.List(ctx, gpuList, client.MatchingLabels{constants.LabelKeyOwner: gpuNode.Name}); err != nil {
+		log.Error(err, "Failed to list GPUs for node")
+		return ctrl.Result{}, err
+	}
+
+	// Skip if no GPUs discovered yet
+	if len(gpuList.Items) == 0 {
+		log.Info("No GPUs discovered for node yet, skipping ResourceSlice generation")
+		return ctrl.Result{}, nil
+	}
+
+	// Generate/update ResourceSlice for this node
+	if err := r.reconcileResourceSlice(ctx, gpuNode, gpuList.Items); err != nil {
+		log.Error(err, "Failed to reconcile ResourceSlice")
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// reconcileResourceSlice creates or updates the ResourceSlice for a GPUNode
+func (r *ResourceSliceReconciler) reconcileResourceSlice(ctx context.Context, gpuNode *tfv1.GPUNode, gpus []tfv1.GPU) error {
+	log := log.FromContext(ctx)
+
+	resourceSliceName := fmt.Sprintf(constants.DRAResourceSliceName, gpuNode.Name)
+	resourceSlice := &resourcev1beta2.ResourceSlice{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: resourceSliceName,
+		},
+	}
+
+	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, resourceSlice, func() error {
+		// Set basic spec fields
+		resourceSlice.Spec.Driver = constants.DRADriverName
+		resourceSlice.Spec.NodeName = &gpuNode.Name
+		resourceSlice.Spec.Pool = resourcev1beta2.ResourcePool{
+			Name:               gpuNode.Labels[constants.GpuPoolKey],
+			Generation:         gpuNode.Generation,
+			ResourceSliceCount: 1,
+		}
+
+		// Generate devices list
+		devices, err := r.generateDevices(ctx, gpus)
+		if err != nil {
+			return fmt.Errorf("failed to generate devices: %w", err)
+		}
+		resourceSlice.Spec.Devices = devices
+
+		// Set labels for easy identification
+		if resourceSlice.Labels == nil {
+			resourceSlice.Labels = make(map[string]string)
+		}
+		resourceSlice.Labels[constants.LabelKeyOwner] = gpuNode.Name
+		return nil
+	})
+
+	if err != nil {
+		return fmt.Errorf("failed to create or update ResourceSlice: %w", err)
+	}
+
+	log.Info("Successfully reconciled ResourceSlice", "resourceSlice", resourceSliceName)
+	return nil
+}
+
+// generateDevices creates the device list for ResourceSlice based on physical GPUs
+func (r *ResourceSliceReconciler) generateDevices(_ context.Context, gpus []tfv1.GPU) ([]resourcev1beta2.Device, error) {
+	devices := make([]resourcev1beta2.Device, 0, len(gpus))
+
+	// Calculate virtual capacities for proportional allocation
+
+	for _, gpu := range gpus {
+		if gpu.Status.Capacity == nil {
+			continue
+		}
+		//TODO extract to constants
+		poolName := gpu.Labels[constants.GpuPoolKey]
+		device := resourcev1beta2.Device{
+			Name: gpu.Status.UUID,
+			Attributes: map[resourcev1beta2.QualifiedName]resourcev1beta2.DeviceAttribute{
+				"model": {
+					StringValue: &gpu.Status.GPUModel,
+				},
+				"pool_name": {
+					StringValue: &poolName,
+				},
+				"pod_namespace": {
+					StringValue: &gpu.Namespace,
+				},
+			},
+			Capacity: map[resourcev1beta2.QualifiedName]resourcev1beta2.DeviceCapacity{
+				"tflops": {
+					Value: gpu.Status.Capacity.Tflops,
+				},
+				"vram": {
+					Value: gpu.Status.Capacity.Vram,
+				},
+			},
+			AllowMultipleAllocations: func() *bool { b := true; return &b }(),
+		}
+
+		devices = append(devices, device)
+	}
+
+	return devices, nil
+}
+
+// cleanupResourceSlice removes the ResourceSlice associated with a deleted GPUNode
+func (r *ResourceSliceReconciler) cleanupResourceSlice(ctx context.Context, nodeName string) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+
+	resourceSliceName := fmt.Sprintf(constants.DRAResourceSliceName, nodeName)
+	resourceSlice := &resourcev1beta2.ResourceSlice{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: resourceSliceName,
+		},
+	}
+
+	err := r.Delete(ctx, resourceSlice)
+	if err != nil && !errors.IsNotFound(err) {
+		log.Error(err, "Failed to delete ResourceSlice", "name", resourceSliceName)
+		return ctrl.Result{}, err
+	}
+
+	log.Info("Successfully cleaned up ResourceSlice", "name", resourceSliceName)
+	return ctrl.Result{}, nil
+}
+
+// SetupWithManager sets up the controller with the Manager
+func (r *ResourceSliceReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&tfv1.GPUNode{}).
+		Watches(&tfv1.GPU{}, handler.EnqueueRequestsFromMapFunc(
+			func(ctx context.Context, obj client.Object) []reconcile.Request {
+				// Get the owner GPUNode name from GPU labels
+				if labels := obj.GetLabels(); labels != nil {
+					if nodeName, ok := labels[constants.LabelKeyOwner]; ok {
+						return []reconcile.Request{
+							{NamespacedName: types.NamespacedName{Name: nodeName}},
+						}
+					}
+				}
+				return nil
+			})).
+		Complete(r)
+}
diff --git a/internal/webhook/v1/pod_dra.go b/internal/webhook/v1/pod_dra.go
index cd1b7c8c..6a55fc4f 100644
--- a/internal/webhook/v1/pod_dra.go
+++ b/internal/webhook/v1/pod_dra.go
@@ -140,38 +140,22 @@ func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod,
 func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string, error) {
 	var conditions []string
 
-	// 1. Basic resource requirements using standard DRA quantity attributes
-	requests := tfInfo.Profile.Resources.Requests
-	if !requests.Tflops.IsZero() {
-		conditions = append(conditions, fmt.Sprintf(`device.attributes["tflops"].quantity >= quantity("%s")`, requests.Tflops.String()))
-	}
-	if !requests.Vram.IsZero() {
-		conditions = append(conditions, fmt.Sprintf(`device.attributes["vram"].quantity >= quantity("%s")`, requests.Vram.String()))
-	}
-
-	// 2. GPU model filter (if specified - basic attribute that should be widely supported)
+	// 1. GPU model filter (if specified - basic attribute that should be widely supported)
 	if tfInfo.Profile.GPUModel != "" {
 		conditions = append(conditions, fmt.Sprintf(`device.attributes["model"] == "%s"`, tfInfo.Profile.GPUModel))
 	}
 
-	// 3. GPU count requirement (important for multi-GPU workloads)
+	// 2. GPU count requirement (important for multi-GPU workloads)
 	if tfInfo.Profile.GPUCount > 0 {
-		conditions = append(conditions, fmt.Sprintf(`int(device.attributes["gpu_count"]) >= %d`, tfInfo.Profile.GPUCount))
+		conditions = append(conditions, fmt.Sprintf(`size(devices) >= %d`, tfInfo.Profile.GPUCount))
 	}
 
-	// 4. Pool name filter (for resource isolation and scheduling preferences)
+	// 3. Pool name filter (for resource isolation and scheduling preferences)
 	if tfInfo.Profile.PoolName != "" {
 		conditions = append(conditions, fmt.Sprintf(`device.attributes["pool_name"] == "%s"`, tfInfo.Profile.PoolName))
 	}
 
-	// 5. Workload name filter (for workload-specific device assignment)
-	if tfInfo.WorkloadName != "" {
-		conditions = append(conditions, fmt.Sprintf(`device.attributes["workload_name"] == "%s"`, tfInfo.WorkloadName))
-		// Workload namespace is same as pod namespace in TensorFusion
-		conditions = append(conditions, fmt.Sprintf(`device.attributes["workload_namespace"] == "%s"`, pod.Namespace))
-	}
-
-	// 6. Pod namespace filter (for namespace-based device isolation)
+	// 4. Pod namespace filter (for namespace-based device isolation)
 	if pod.Namespace != "" {
 		conditions = append(conditions, fmt.Sprintf(`device.attributes["pod_namespace"] == "%s"`, pod.Namespace))
 	}

From 7d95fef8e230e4dad255dead35a07674bf8f8d9a Mon Sep 17 00:00:00 2001
From: dylan <wangqianqianjun@gmail.com>
Date: Sat, 4 Oct 2025 07:47:12 -0700
Subject: [PATCH 34/34] feat: Added DRA CEL filter support

- Implemented DRA CEL filters in GPU allocation requests
- Added benchmarks for basic and complex expressions
- Updated the resource slice controller to support Kubernetes hostname labels
---
 .../dra/resourceslice_controller.go           |   2 +
 .../cel_filter/cel_filter_benchmark_test.go   |  41 ++++
 .../filter/cel_filter/dra_cel_filter.go       | 216 ++++++++++++++++++
 3 files changed, 259 insertions(+)
 create mode 100644 internal/gpuallocator/filter/cel_filter/dra_cel_filter.go

diff --git a/internal/controller/dra/resourceslice_controller.go b/internal/controller/dra/resourceslice_controller.go
index 64a00ce8..fbd03f6f 100644
--- a/internal/controller/dra/resourceslice_controller.go
+++ b/internal/controller/dra/resourceslice_controller.go
@@ -122,6 +122,7 @@ func (r *ResourceSliceReconciler) reconcileResourceSlice(ctx context.Context, gp
 			resourceSlice.Labels = make(map[string]string)
 		}
 		resourceSlice.Labels[constants.LabelKeyOwner] = gpuNode.Name
+		resourceSlice.Labels[constants.KubernetesHostNameLabel] = gpuNode.Name
 		return nil
 	})
 
@@ -144,6 +145,7 @@ func (r *ResourceSliceReconciler) generateDevices(_ context.Context, gpus []tfv1
 			continue
 		}
 		//TODO extract to constants
+		//TODO quota support
 		poolName := gpu.Labels[constants.GpuPoolKey]
 		device := resourcev1beta2.Device{
 			Name: gpu.Status.UUID,
diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
index 39fcd907..254baf7c 100644
--- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
+++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go
@@ -9,6 +9,7 @@ import (
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
 	"github.com/NexusGPU/tensor-fusion/internal/constants"
 	"github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter"
+	dracel "k8s.io/dynamic-resource-allocation/cel"
 )
 
 // Test constants for repeated strings
@@ -147,6 +148,46 @@ func BenchmarkFilterPerformance(b *testing.B) {
 		}
 	})
 
+	// Benchmark DRA CEL filter - basic filtering
+	b.Run("DRACELFilter_Basic", func(b *testing.B) {
+		request := createTestAllocRequest("A100", "")
+		cache := dracel.NewCache(100, dracel.Features{})
+
+		draFilter, err := NewDRACELFilter(request, cache)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, err := draFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
+	// Benchmark DRA CEL filter - complex expression
+	b.Run("DRACELFilter_Complex", func(b *testing.B) {
+		request := createTestAllocRequest("", "device.attributes['model'].string == 'A100' && device.attributes['label.environment'].string == '"+testEnvironmentProduction+"'")
+		cache := dracel.NewCache(100, dracel.Features{})
+
+		draFilter, err := NewDRACELFilter(request, cache)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			filteredGPUs, err := draFilter.Filter(ctx, workerPodKey, gpus)
+			if err != nil {
+				b.Fatal(err)
+			}
+			_ = filteredGPUs
+		}
+	})
+
 	// Print performance comparison report after benchmarks
 	printPerformanceComparison(b)
 }
diff --git a/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go b/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go
new file mode 100644
index 00000000..83b73c93
--- /dev/null
+++ b/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go
@@ -0,0 +1,216 @@
+package cel_filter
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
+	"github.com/NexusGPU/tensor-fusion/internal/constants"
+	"github.com/NexusGPU/tensor-fusion/internal/utils"
+	"github.com/samber/lo"
+	resourceapi "k8s.io/api/resource/v1"
+	dracel "k8s.io/dynamic-resource-allocation/cel"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// DRACELFilter implements CEL filtering using k8s.io/dynamic-resource-allocation/cel
+type DRACELFilter struct {
+	name              string
+	requiredPhases    []tfv1.TensorFusionGPUPhase
+	userExpression    string
+	cache             *dracel.Cache
+	displayExpression string
+}
+
+// NewDRACELFilter creates a new DRA-based CEL filter from allocation request
+func NewDRACELFilter(req *tfv1.AllocRequest, cache *dracel.Cache) (*DRACELFilter, error) {
+	// Extract early filtering criteria
+	var requiredPhases []tfv1.TensorFusionGPUPhase
+	var userExpression, displayExpression string
+
+	if req != nil {
+		requiredPhases = []tfv1.TensorFusionGPUPhase{
+			tfv1.TensorFusionGPUPhaseRunning,
+			tfv1.TensorFusionGPUPhasePending,
+		}
+		userExpression = req.CELFilterExpression
+		displayExpression = buildDisplayExpression(req)
+	}
+
+	// Handle nil request case
+	name := "AllocRequest-unknown"
+	if req != nil {
+		name = fmt.Sprintf("AllocRequest-%s", req.WorkloadNameNamespace.String())
+	}
+
+	// Validate expression if provided
+	if userExpression != "" && cache != nil {
+		result := cache.Check(userExpression)
+		if result.Error != nil {
+			return nil, fmt.Errorf("failed to compile CEL expression %q: %w", userExpression, result.Error)
+		}
+	}
+
+	return &DRACELFilter{
+		name:              name,
+		requiredPhases:    requiredPhases,
+		userExpression:    userExpression,
+		cache:             cache,
+		displayExpression: displayExpression,
+	}, nil
+}
+
+// Name returns the filter name
+func (f *DRACELFilter) Name() string {
+	return f.name
+}
+
+// Filter applies the CEL expression to filter GPUs
+func (f *DRACELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) {
+	log := log.FromContext(ctx)
+	if len(gpus) == 0 {
+		return gpus, nil
+	}
+
+	// Early filtering phase: apply basic filters first
+	earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus))
+	for _, gpu := range gpus {
+		// Progressive migration mode check
+		if utils.IsProgressiveMigration() && gpu.Status.UsedBy != tfv1.UsedByTensorFusion {
+			continue
+		}
+
+		// Fast path: check phase first (most common filter)
+		if f.requiredPhases != nil && !lo.Contains(f.requiredPhases, gpu.Status.Phase) {
+			continue
+		}
+
+		earlyFilteredGPUs = append(earlyFilteredGPUs, gpu)
+	}
+
+	// If no user expression, return early filtered results
+	if f.userExpression == "" {
+		log.V(1).Info("DRA CEL filter applied (early filtering only)",
+			"filter", f.name,
+			"inputGPUs", len(gpus),
+			"outputGPUs", len(earlyFilteredGPUs))
+		return earlyFilteredGPUs, nil
+	}
+
+	// If no GPUs passed early filtering, return empty result
+	if len(earlyFilteredGPUs) == 0 {
+		return earlyFilteredGPUs, nil
+	}
+
+	// Get compiled expression from cache
+	compiledExpr := f.cache.GetOrCompile(f.userExpression)
+	if compiledExpr.Error != nil {
+		return nil, fmt.Errorf("failed to compile CEL expression %q: %w", f.userExpression, compiledExpr.Error)
+	}
+
+	// Apply CEL filtering using DRA
+	filteredGPUs := make([]*tfv1.GPU, 0, len(earlyFilteredGPUs))
+	for _, gpu := range earlyFilteredGPUs {
+		// Convert GPU to DRA Device
+		device, err := convertGPUToDevice(gpu)
+		if err != nil {
+			log.Error(err, "Failed to convert GPU to Device", "gpu", gpu.Name)
+			continue
+		}
+
+		// Evaluate CEL expression
+		matches, details, err := compiledExpr.DeviceMatches(ctx, device)
+		if err != nil {
+			log.Error(err, "CEL expression evaluation failed",
+				"expression", f.userExpression,
+				"gpu", gpu.Name,
+				"details", details)
+			// On error, exclude the GPU (fail-safe)
+			continue
+		}
+
+		if matches {
+			filteredGPUs = append(filteredGPUs, gpu)
+		}
+	}
+
+	log.V(1).Info("DRA CEL filter applied",
+		"filter", f.name,
+		"displayExpression", f.displayExpression,
+		"userExpression", f.userExpression,
+		"inputGPUs", len(gpus),
+		"earlyFilteredGPUs", len(earlyFilteredGPUs),
+		"outputGPUs", len(filteredGPUs))
+
+	return filteredGPUs, nil
+}
+
+// convertGPUToDevice converts tfv1.GPU to dracel.Device
+func convertGPUToDevice(gpu *tfv1.GPU) (dracel.Device, error) {
+	if gpu == nil {
+		return dracel.Device{}, fmt.Errorf("GPU is nil")
+	}
+
+	allowMultiple := true
+	device := dracel.Device{
+		Driver:                   constants.DRADriverName,
+		AllowMultipleAllocations: &allowMultiple,
+		Attributes:               make(map[resourceapi.QualifiedName]resourceapi.DeviceAttribute),
+		Capacity:                 make(map[resourceapi.QualifiedName]resourceapi.DeviceCapacity),
+	}
+
+	// Map basic attributes
+	device.Attributes[GPUFieldName] = resourceapi.DeviceAttribute{StringValue: &gpu.Name}
+	device.Attributes[GPUFieldNamespace] = resourceapi.DeviceAttribute{StringValue: &gpu.Namespace}
+	model := gpu.Status.GPUModel
+	device.Attributes[GPUFieldGPUModel] = resourceapi.DeviceAttribute{StringValue: &model}
+	uuid := gpu.Status.UUID
+	device.Attributes[GPUFieldUUID] = resourceapi.DeviceAttribute{StringValue: &uuid}
+	usedBy := string(gpu.Status.UsedBy)
+	device.Attributes[GPUFieldUsedBy] = resourceapi.DeviceAttribute{StringValue: &usedBy}
+	message := gpu.Status.Message
+	device.Attributes[GPUFieldMessage] = resourceapi.DeviceAttribute{StringValue: &message}
+
+	// Map labels with prefix
+	if len(gpu.Labels) > 0 {
+		for k, v := range gpu.Labels {
+			labelValue := v
+			device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldLabels, k))] = resourceapi.DeviceAttribute{StringValue: &labelValue}
+		}
+	}
+
+	// Map annotations with prefix
+	if len(gpu.Annotations) > 0 {
+		for k, v := range gpu.Annotations {
+			annotationValue := v
+			device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldAnnotations, k))] = resourceapi.DeviceAttribute{StringValue: &annotationValue}
+		}
+	}
+
+	// Map nodeSelector with prefix
+	if len(gpu.Status.NodeSelector) > 0 {
+		for k, v := range gpu.Status.NodeSelector {
+			selectorValue := v
+			device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldNodeSelector, k))] = resourceapi.DeviceAttribute{StringValue: &selectorValue}
+		}
+	}
+
+	// Map runningApps as JSON string
+	if len(gpu.Status.RunningApps) > 0 {
+		appsJSON, err := json.Marshal(gpu.Status.RunningApps)
+		if err != nil {
+			return dracel.Device{}, fmt.Errorf("failed to marshal runningApps: %w", err)
+		}
+		appsStr := string(appsJSON)
+		device.Attributes[GPUFieldRunningApps] = resourceapi.DeviceAttribute{StringValue: &appsStr}
+	}
+
+	// Map capacity (tflops and vram) - DRA experimental version maintains capacity state
+	if gpu.Status.Capacity != nil {
+		device.Capacity[ResourceFieldTFlops] = resourceapi.DeviceCapacity{Value: gpu.Status.Capacity.Tflops}
+		device.Capacity[ResourceFieldVRAM] = resourceapi.DeviceCapacity{Value: gpu.Status.Capacity.Vram}
+	}
+
+	return device, nil
+}