diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index dfae921c..f56d3f6c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -29,7 +29,7 @@ jobs: uses: actions/checkout@v5 - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: '~1.24' diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 3b4fec3f..42354dfe 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -1,5 +1,8 @@ name: E2E Tests +permissions: + contents: read + on: workflow_dispatch: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1528d13e..b4be4381 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,13 +28,13 @@ jobs: strategy: matrix: # from https://github.com/kubernetes-sigs/controller-tools/blob/main/envtest-releases.yaml - envtest_k8s_version: [1.23.5, 1.33.0] + envtest_k8s_version: [1.23.5, 1.34.0] steps: - name: Clone the code uses: actions/checkout@v5 - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: '~1.24' diff --git a/.vscode/launch.json b/.vscode/launch.json index bce7b733..ef1ab245 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -61,7 +61,8 @@ "KUBECONFIG": "~/.kube/config-local-studio", "ENABLE_WEBHOOKS": "false", "ENABLE_SCHEDULER": "true", - "ENABLE_CR_CONTROLLER": "true" + "ENABLE_CR_CONTROLLER": "true", + "NVIDIA_OPERATOR_PROGRESSIVE_MIGRATION": "true" }, "args": [ "--metrics-path", "${workspaceFolder}/logs/metrics.log", diff --git a/.vscode/settings.json b/.vscode/settings.json index 1285d84e..a5da5620 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -25,6 +25,7 @@ "clientcmdapi", "clientgoscheme", "clientset", + "clientsetfake", "cloudnative", "cloudprovider", "clusterissuers", @@ -46,6 +47,8 @@ "envtest", "essd", "Eventf", + "evictable", + "featuregate", "finalizer", "Finalizers", "frameworkruntime", @@ -78,6 +81,8 @@ "iface", "imageutils", "influxdata", + "internalcache", + "internalqueue", "jsonpatch", "karpenter", "karpv", @@ -129,6 +134,7 @@ "schedulingconfigtemplate", "schedulingconfigtemplates", "schedulingcorev", + "schedv", "serviceaccount", "shirou", "shortuuid", diff --git a/api/v1/gpupool_types.go b/api/v1/gpupool_types.go index 08d139b5..ca9224c4 100644 --- a/api/v1/gpupool_types.go +++ b/api/v1/gpupool_types.go @@ -238,6 +238,12 @@ type QosConfig struct { Definitions []QosDefinition `json:"definitions,omitempty"` DefaultQoS QoSLevel `json:"defaultQoS,omitempty"` Pricing []QosPricing `json:"pricing,omitempty"` + + // Eviction protection price ratio applied to cost calculation during protection period + // This multiplier increases pricing for protected workloads to discourage preemption + // +optional + // +kubebuilder:default="1.2" + EvictionProtectionPriceRatio string `json:"evictionProtectionPriceRatio,omitempty"` } type QosDefinition struct { diff --git a/api/v1/gpuresourcequota_types.go b/api/v1/gpuresourcequota_types.go index 46e068b7..2b41b512 100644 --- a/api/v1/gpuresourcequota_types.go +++ b/api/v1/gpuresourcequota_types.go @@ -19,7 +19,7 @@ package v1 import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/kubernetes/pkg/scheduler/framework" + fwk "k8s.io/kube-scheduler/framework" ) // GPUResourceQuotaSpec defines the desired state of GPUResourceQuota @@ -192,6 +192,12 @@ type AllocRequest struct { // cel filter expression CELFilterExpression string + + QoS QoSLevel +} + +func (p *AllocRequest) Clone() fwk.StateData { + return p } type GPUAllocationInfo struct { @@ -209,7 +215,7 @@ type AdjustRequest struct { NewLimit Resource } -func (ar *AllocRequest) Clone() framework.StateData { +func (ar *AdjustRequest) Clone() fwk.StateData { return ar } diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go index 44f07bef..8611ed99 100644 --- a/api/v1/schedulingconfigtemplate_types.go +++ b/api/v1/schedulingconfigtemplate_types.go @@ -39,6 +39,10 @@ type SchedulingConfigTemplateSpec struct { // single GPU device multi-process queuing and fair scheduling with QoS constraint // +optional Hypervisor *HypervisorScheduling `json:"hypervisor,omitempty"` + + // enable Dynamic Resource Allocation (DRA) for GPU resource management + // +optional + DRA *DRAConfig `json:"dra,omitempty"` } type PlacementConfig struct { @@ -206,6 +210,17 @@ type MultiProcessQueuing struct { QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"` } +// DRAConfig configures Dynamic Resource Allocation support +type DRAConfig struct { + // Enable DRA mode for all workloads in this configuration template + // +optional + Enable *bool `json:"enable,omitempty"` + + // ResourceClaimTemplateName specifies the ResourceClaim template name to use + // +optional + ResourceClaimTemplateName string `json:"resourceClaimTemplateName,omitempty"` +} + // SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate. type SchedulingConfigTemplateStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 9be4f47c..5699677a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -332,6 +332,26 @@ func (in *ComputingVendorParams) DeepCopy() *ComputingVendorParams { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DRAConfig) DeepCopyInto(out *DRAConfig) { + *out = *in + if in.Enable != nil { + in, out := &in.Enable, &out.Enable + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRAConfig. +func (in *DRAConfig) DeepCopy() *DRAConfig { + if in == nil { + return nil + } + out := new(DRAConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GPU) DeepCopyInto(out *GPU) { *out = *in @@ -1963,6 +1983,11 @@ func (in *SchedulingConfigTemplateSpec) DeepCopyInto(out *SchedulingConfigTempla *out = new(HypervisorScheduling) (*in).DeepCopyInto(*out) } + if in.DRA != nil { + in, out := &in.DRA, &out.DRA + *out = new(DRAConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingConfigTemplateSpec. diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml index 59de69d1..042d05c2 100644 --- a/charts/tensor-fusion/Chart.yaml +++ b/charts/tensor-fusion/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.5.5 +version: 1.5.9 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml index 8bc65e66..2158529c 100644 --- a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml +++ b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml @@ -562,6 +562,12 @@ spec: type: integer type: object type: array + evictionProtectionPriceRatio: + default: "1.2" + description: |- + Eviction protection price ratio applied to cost calculation during protection period + This multiplier increases pricing for protected workloads to discourage preemption + type: string pricing: items: properties: diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml index 91a01eae..7c0c281b 100644 --- a/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml +++ b/charts/tensor-fusion/crds/tensor-fusion.ai_schedulingconfigtemplates.yaml @@ -143,6 +143,20 @@ spec: type: string type: object type: object + dra: + description: enable Dynamic Resource Allocation (DRA) for GPU resource + management + properties: + enable: + description: Enable DRA mode for all workloads in this configuration + template + type: boolean + resourceClass: + default: tensorfusion.ai/gpu + description: ResourceClass specifies the DRA resource class name + to use + type: string + type: object hypervisor: description: single GPU device multi-process queuing and fair scheduling with QoS constraint diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml index 45bc9a47..496541bc 100644 --- a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml +++ b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml @@ -629,6 +629,12 @@ spec: type: integer type: object type: array + evictionProtectionPriceRatio: + default: "1.2" + description: |- + Eviction protection price ratio applied to cost calculation during protection period + This multiplier increases pricing for protected workloads to discourage preemption + type: string pricing: items: properties: diff --git a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml index 7fcdda1a..242d17e0 100644 --- a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml +++ b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml @@ -11,7 +11,7 @@ webhooks: namespace: {{ include "tensor-fusion.namespace" . }} path: /mutate-v1-pod failurePolicy: {{ .Values.controller.admissionWebhooks.failurePolicy }} - name: mpod-v1.kb.io + name: mpod.tensor-fusion.ai rules: - apiGroups: - "" diff --git a/charts/tensor-fusion/templates/controller-deployment.yaml b/charts/tensor-fusion/templates/controller-deployment.yaml index ca09a6a1..c16c4aab 100644 --- a/charts/tensor-fusion/templates/controller-deployment.yaml +++ b/charts/tensor-fusion/templates/controller-deployment.yaml @@ -32,6 +32,7 @@ spec: {{- end }} serviceAccountName: {{ include "tensor-fusion.serviceAccountName" . }} enableServiceLinks: false + priorityClassName: "system-cluster-critical" containers: - name: controller image: "{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag | default .Chart.AppVersion }}" diff --git a/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml b/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml index d473fcfa..2c88583b 100644 --- a/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml +++ b/charts/tensor-fusion/templates/gpu-public-gpu-info.yaml @@ -45,6 +45,18 @@ data: costPerHour: 1.64 fp16TFlops: 312 + - model: A100_PCIe_40GB + fullModelName: "NVIDIA A100-PCIE-40GB" + vendor: NVIDIA + costPerHour: 1.64 + fp16TFlops: 312 + + - model: A100_PCIe_80GB + fullModelName: "NVIDIA A100-PCIE-80GB" + vendor: NVIDIA + costPerHour: 1.64 + fp16TFlops: 312 + - model: A100_SXM_40G fullModelName: "NVIDIA A100-SXM4-40GB" vendor: NVIDIA @@ -70,13 +82,13 @@ data: fp16TFlops: 312 - model: A800_PCIe_80G - fullModelName: "NVIDIA A800 80GB PCIe" + fullModelName: "NVIDIA A800-PCIE-80GB" vendor: NVIDIA costPerHour: 1.64 fp16TFlops: 312 - model: A800_PCIe_40G - fullModelName: "NVIDIA A800 40GB PCIe" + fullModelName: "NVIDIA A800-PCIE-40GB" vendor: NVIDIA costPerHour: 1.64 fp16TFlops: 312 @@ -95,7 +107,7 @@ data: fp16TFlops: 125 - model: A40 - fullModelName: "NVIDIA A40 48GB PCIe" + fullModelName: "NVIDIA A40-PCIE-48GB" vendor: NVIDIA costPerHour: 0.4 fp16TFlops: 149.7 diff --git a/charts/tensor-fusion/templates/priorityclass.yaml b/charts/tensor-fusion/templates/priorityclass.yaml new file mode 100644 index 00000000..e1f493b8 --- /dev/null +++ b/charts/tensor-fusion/templates/priorityclass.yaml @@ -0,0 +1,23 @@ +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: tensor-fusion-critical +value: 100000 +globalDefault: false +description: "TensorFusion critical priority" +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: tensor-fusion-high +value: 10000 +globalDefault: false +description: "TensorFusion high priority" +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: tensor-fusion-medium +value: 0 +globalDefault: false +description: "TensorFusion medium priority" diff --git a/charts/tensor-fusion/values.yaml b/charts/tensor-fusion/values.yaml index cf4865f4..2c06aba6 100644 --- a/charts/tensor-fusion/values.yaml +++ b/charts/tensor-fusion/values.yaml @@ -31,7 +31,7 @@ controller: image: repository: tensorfusion/tensor-fusion-operator # Overrides the image tag whose default is the chart appVersion. - tag: "latest" + tag: "1.43.4" # This is for setting Kubernetes Annotations to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ @@ -120,7 +120,7 @@ agent: image: repository: tensorfusion/tensor-fusion-agent - tag: "latest" + tag: "1.0.0" resources: requests: @@ -169,8 +169,8 @@ schedulerConfig: kind: KubeSchedulerConfiguration clientConnection: kubeconfig: "" - qps: 50 - burst: 100 + qps: 1000 + burst: 2000 profiles: # Refer: https://kubernetes.io/docs/reference/scheduling/config/ - schedulerName: tensor-fusion-scheduler diff --git a/cmd/main.go b/cmd/main.go index 92021131..b0ec36e7 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -33,6 +33,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/klog/v2" + resourcev1beta2 "k8s.io/api/resource/v1beta2" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -55,6 +56,7 @@ import ( "github.com/NexusGPU/tensor-fusion/internal/config" "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/NexusGPU/tensor-fusion/internal/controller" + "github.com/NexusGPU/tensor-fusion/internal/controller/dra" "github.com/NexusGPU/tensor-fusion/internal/gpuallocator" "github.com/NexusGPU/tensor-fusion/internal/metrics" "github.com/NexusGPU/tensor-fusion/internal/portallocator" @@ -65,6 +67,8 @@ import ( "github.com/NexusGPU/tensor-fusion/internal/utils" "github.com/NexusGPU/tensor-fusion/internal/version" webhookcorev1 "github.com/NexusGPU/tensor-fusion/internal/webhook/v1" + k8sVer "k8s.io/apimachinery/pkg/util/version" + "k8s.io/apiserver/pkg/util/feature" // +kubebuilder:scaffold:imports ) @@ -99,6 +103,7 @@ var alertEvaluatorReady chan struct{} func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(tfv1.AddToScheme(scheme)) + utilruntime.Must(resourcev1beta2.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -204,6 +209,14 @@ func main() { _ = os.Setenv(constants.KubeApiVersionMajorEnv, version.Major) _ = os.Setenv(constants.KubeApiVersionMinorEnv, version.Minor) + // TODO: there will still be risk after FeatureGate removed when the feature is stable for a long time + // To be compatible with long-term k8s version, need to patch Kubernetes source code + k8sVersion := k8sVer.MustParseSemantic(version.String()) + err = feature.DefaultMutableFeatureGate.SetEmulationVersion(k8sVersion) + if err != nil { + setupLog.Error(err, "unable to set k8s version for feature gating") + } + alertEvaluatorReady = make(chan struct{}) setupTimeSeriesAndWatchGlobalConfigChanges(ctx, mgr) @@ -217,9 +230,11 @@ func main() { // Initialize GPU allocator and set up watches allocator, portAllocator := startTensorFusionAllocators(ctx, mgr) - startWebhook(mgr, portAllocator) + // Create pricing provider for webhook + pricingProvider := pricing.NewStaticPricingProvider() + startWebhook(mgr, portAllocator, pricingProvider) - scheduler := startScheduler(ctx, allocator, mgr) + scheduler := startScheduler(ctx, allocator, mgr, k8sVersion) startCustomResourceController(ctx, mgr, metricsRecorder, allocator, portAllocator) @@ -356,9 +371,10 @@ func startCustomResourceController( } if err = (&controller.GPUNodeReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("GPUNode"), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("GPUNode"), + Allocator: allocator, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "GPUNode") os.Exit(1) @@ -395,6 +411,23 @@ func startCustomResourceController( setupLog.Error(err, "unable to create controller", "controller", "Pod") os.Exit(1) } + + // Setup ResourceClaim controller for DRA Phase 2 + if err = (&dra.ResourceClaimReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ResourceClaim") + os.Exit(1) + } + // Setup ResourceSlice controller for DRA Phase 2 + if err = (&dra.ResourceSliceReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ResourceSlice") + os.Exit(1) + } if err = (&controller.NodeReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), @@ -441,11 +474,15 @@ func startCustomResourceController( } } -func startWebhook(mgr manager.Manager, portAllocator *portallocator.PortAllocator) { +func startWebhook( + mgr manager.Manager, + portAllocator *portallocator.PortAllocator, + pricingProvider pricing.PricingProvider, +) { if os.Getenv(constants.EnableWebhookEnv) == constants.FalseStringValue { return } - if err := webhookcorev1.SetupPodWebhookWithManager(mgr, portAllocator); err != nil { + if err := webhookcorev1.SetupPodWebhookWithManager(mgr, portAllocator, pricingProvider); err != nil { setupLog.Error(err, "unable to create webhook", "webhook", "Pod") os.Exit(1) } @@ -455,6 +492,7 @@ func startScheduler( ctx context.Context, allocator *gpuallocator.GpuAllocator, mgr manager.Manager, + k8sVersion *k8sVer.Version, ) *scheduler.Scheduler { if os.Getenv(constants.EnableSchedulerEnv) == constants.FalseStringValue { return nil @@ -473,7 +511,9 @@ func startScheduler( gpuTopoPlugin.NewWithDeps(allocator, mgr.GetClient()), ) - cc, scheduler, err := sched.SetupScheduler(ctx, mgr, schedulerConfigPath, false, gpuResourceFitOpt, gpuTopoOpt) + cc, scheduler, err := sched.SetupScheduler( + ctx, mgr, schedulerConfigPath, false, k8sVersion, gpuResourceFitOpt, gpuTopoOpt, + ) if err != nil { setupLog.Error(err, "unable to create tensor fusion scheduler") os.Exit(1) @@ -570,7 +610,7 @@ func startMetricsRecorder( // Worker level map will be updated by cluster reconcile // Key is poolName, second level key is QoS level - WorkerUnitPriceMap: make(map[string]map[string]metrics.RawBillingPricing), + WorkerUnitPriceMap: make(map[string]map[string]metrics.RawBillingPricing, 8), } if enableLeaderElection { go func() { diff --git a/cmd/sched/setup.go b/cmd/sched/setup.go index 2818fba2..20b28f96 100644 --- a/cmd/sched/setup.go +++ b/cmd/sched/setup.go @@ -22,6 +22,8 @@ import ( "strings" utilerrors "k8s.io/apimachinery/pkg/util/errors" + k8sVer "k8s.io/apimachinery/pkg/util/version" + "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/events" "k8s.io/component-base/configz" "k8s.io/klog/v2" @@ -50,6 +52,7 @@ func SetupScheduler( mgr manager.Manager, schedulerConfigPath string, disableHttpEndpoint bool, + k8sVersion *k8sVer.Version, outOfTreeRegistryOptions ...app.Option, ) (*schedulerserverconfig.CompletedConfig, *scheduler.Scheduler, error) { opts := options.NewOptions() @@ -73,6 +76,12 @@ func SetupScheduler( return nil, nil, err } + // Setup enumerationVersion again since it's overridden by the config + err = feature.DefaultMutableFeatureGate.SetEmulationVersion(k8sVersion) + if err != nil { + return nil, nil, err + } + if cfg, err := latest.Default(); err != nil { return nil, nil, err } else { diff --git a/config/crd/bases/tensor-fusion.ai_gpupools.yaml b/config/crd/bases/tensor-fusion.ai_gpupools.yaml index 8bc65e66..2158529c 100644 --- a/config/crd/bases/tensor-fusion.ai_gpupools.yaml +++ b/config/crd/bases/tensor-fusion.ai_gpupools.yaml @@ -562,6 +562,12 @@ spec: type: integer type: object type: array + evictionProtectionPriceRatio: + default: "1.2" + description: |- + Eviction protection price ratio applied to cost calculation during protection period + This multiplier increases pricing for protected workloads to discourage preemption + type: string pricing: items: properties: diff --git a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml index 91a01eae..7c0c281b 100644 --- a/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml +++ b/config/crd/bases/tensor-fusion.ai_schedulingconfigtemplates.yaml @@ -143,6 +143,20 @@ spec: type: string type: object type: object + dra: + description: enable Dynamic Resource Allocation (DRA) for GPU resource + management + properties: + enable: + description: Enable DRA mode for all workloads in this configuration + template + type: boolean + resourceClass: + default: tensorfusion.ai/gpu + description: ResourceClass specifies the DRA resource class name + to use + type: string + type: object hypervisor: description: single GPU device multi-process queuing and fair scheduling with QoS constraint diff --git a/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml b/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml index 45bc9a47..496541bc 100644 --- a/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml +++ b/config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml @@ -629,6 +629,12 @@ spec: type: integer type: object type: array + evictionProtectionPriceRatio: + default: "1.2" + description: |- + Eviction protection price ratio applied to cost calculation during protection period + This multiplier increases pricing for protected workloads to discourage preemption + type: string pricing: items: properties: diff --git a/config/samples/dynamic-config.yaml b/config/samples/dynamic-config.yaml index c3102f3b..ae9350a3 100644 --- a/config/samples/dynamic-config.yaml +++ b/config/samples/dynamic-config.yaml @@ -1,23 +1,260 @@ metricsTTL: 30d # default to 'influx', influx v2 line protocol -metricsFormat: json +metricsFormat: influx -alertRules: -- name: GPUTFlopsFull - query: | - SELECT - node, - pool, - uuid, - avg(compute_percentage) AS compute_used - FROM tf_gpu_usage - WHERE compute_percentage > {{ .Threshold }} AND {{ .Conditions }} - GROUP BY node, pool, uuid - threshold: 97 - evaluationInterval: 30s - consecutiveCount: 4 - severity: P1 - summary: "GPU TFlops Full, used {{ .compute_used }}% on {{ .node }} {{ .uuid }}" - alertTargetInstance: "{{ .uuid }}" - description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has TFlops usage above {{ .Threshold }}% for 4 consecutive 30s, average usage: {{ .compute_used }}%" \ No newline at end of file +alertRules: + # Worker TFlops throttled alert + - name: WorkerTFlopsThrottled + query: | + SELECT workload, worker, uuid, node, MAX(compute_throttled_cnt)-MIN(compute_throttled_cnt) as throttled_increase + FROM tf_worker_usage + WHERE {{ .Conditions }} + GROUP BY workload, worker, uuid, node + HAVING throttled_increase > {{ .Threshold }} + threshold: 0 + evaluationInterval: 15s + consecutiveCount: 3 + severity: P1 + summary: "Worker TFlops Throttled" + description: "Worker {{ .worker }} from Node {{ .node }} is using more than {{ .Threshold }}% of its TFlops limit" + alertTargetInstance: "{{ .worker }}-{{ .uuid }}" + runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook" + + # Worker VRAM switching too frequent alert + - name: WorkerVRAMSwitchCountIncreasing + query: | + SELECT workload, worker, uuid, node, MAX(vram_resumed_cnt)-MIN(vram_resumed_cnt) as switch_increase + FROM tf_worker_usage + WHERE {{ .Conditions }} + GROUP BY workload, worker, uuid, node + HAVING switch_increase > {{ .Threshold }} + threshold: 0 + evaluationInterval: 2m + consecutiveCount: 1 + severity: P1 + summary: "Worker VRAM Switch Count Increasing" + description: "Worker {{ .worker }} from Node {{ .node }} has switched VRAM {{ .switch_increase }} times in last 2 minutes, GPU may be too hot" + alertTargetInstance: "{{ .worker }}-{{ .uuid }}" + runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook" + + # Worker can not scale up/scheduled alert + - name: WorkerAllocationFailed + query: | + SELECT pool, (MAX(total_allocation_fail_cnt) - MIN(total_allocation_fail_cnt)) as failure_increase, + FROM tf_system_metrics + WHERE {{ .Conditions }} + GROUP BY pool + HAVING failure_increase > {{ .Threshold }} + threshold: 0 + evaluationInterval: 30s + consecutiveCount: 1 + severity: P1 + summary: "Worker allocation failed for GPU Pool {{ .pool }}" + description: "Worker allocation failed, {{ .failure_increase }} times in last 30 seconds for GPU Pool {{ .pool }}" + alertTargetInstance: "{{ .pool }}" + runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook" + + # Single GPU Alerts + + # GPU VRAM Full Alert + - name: GPUVRAMFull + query: | + SELECT + node, + pool, + uuid, + avg(memory_percentage) AS memory_used + FROM tf_gpu_usage + WHERE memory_percentage > {{ .Threshold }} AND {{ .Conditions }} + GROUP BY node, pool, uuid + threshold: 97 + evaluationInterval: 30s + consecutiveCount: 2 + severity: P1 + summary: "GPU VRAM Full, used {{ .memory_used }}% on {{ .node }} {{ .uuid }}" + alertTargetInstance: "{{ .uuid }}" + description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has VRAM usage above {{ .Threshold }}% for 2 consecutive 30s, average usage: {{ .memory_used }}%" + + # GPU TFlops Full Alert + - name: GPUTFlopsFull + query: | + SELECT + node, + pool, + uuid, + avg(compute_percentage) AS compute_used + FROM tf_gpu_usage + WHERE compute_percentage > {{ .Threshold }} AND {{ .Conditions }} + GROUP BY node, pool, uuid + threshold: 97 + evaluationInterval: 30s + consecutiveCount: 4 + severity: P1 + summary: "GPU TFlops Full, used {{ .compute_used }}% on {{ .node }} {{ .uuid }}" + alertTargetInstance: "{{ .uuid }}" + description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has TFlops usage above {{ .Threshold }}% for 4 consecutive 30s, average usage: {{ .compute_used }}%" + + # GPU Temperature alert + - name: GPUTemperatureHigh + query: | + SELECT + node, + pool, + uuid, + avg(temperature) AS avg_temperature + FROM tf_gpu_usage + WHERE temperature > {{ .Threshold }} AND {{ .Conditions }} + GROUP BY node, pool, uuid + threshold: 90 + evaluationInterval: 30s + consecutiveCount: 3 + severity: P1 + summary: "GPU Temperature High, {{ .avg_temperature }}°C on {{ .node }} {{ .uuid }}" + alertTargetInstance: "{{ .uuid }}" + description: "GPU {{ .uuid }} from Node {{ .node }} has temperature above {{ .Threshold }}°C, Average temperature: {{ .avg_temperature }}, GPU Pool: {{ .pool }}" + runbookURL: "https://tensor-fusion.ai/guide/troubleshooting/handbook" + + # GPU Pool Alerts + + # Node TFlops allocation alert + - name: NodeTFlopsAllocationCritical + query: | + SELECT node, pool, (100 - avg(allocated_tflops_percent)) as tflops_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY node, pool + HAVING tflops_available < {{ .Threshold }} + threshold: 5 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P0 + summary: "Available TFlops below threshold, remaining {{ .tflops_available }}% for {{ .node }}" + description: "Node {{ .node }} in Pool {{ .pool }} has available TFlops below {{ .Threshold }}%" + alertTargetInstance: "{{ .node }}" + + - name: NodeTFlopsAllocationWarning + query: | + SELECT node, pool, (100 - avg(allocated_tflops_percent)) as tflops_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY node, pool + HAVING tflops_available < {{ .Threshold }} + threshold: 10 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P1 + summary: "Node available TFlops below threshold, remaining {{ .tflops_available }}% for {{ .node }}" + description: "Node {{ .node }} in Pool {{ .pool }} has available TFlops below {{ .Threshold }}%" + alertTargetInstance: "{{ .node }}" + + # Pool TFlops allocation alert - Total + - name: PoolTotalTFlopsAllocationCritical + query: | + SELECT pool, (100 - avg(allocated_tflops_percent)) as tflops_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY pool + HAVING tflops_available < {{ .Threshold }} + threshold: 5 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P0 + summary: "Pool available TFlops below threshold, remaining {{ .tflops_available }}%" + description: "Pool {{ .pool }} has available TFlops below {{ .Threshold }}%" + alertTargetInstance: "{{ .pool }}" + + - name: PoolTotalTFlopsAllocationWarning + query: | + SELECT pool, (100 - avg(allocated_tflops_percent)) as tflops_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY pool + HAVING tflops_available < {{ .Threshold }} + threshold: 10 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P1 + summary: "Pool available TFlops below threshold, remaining {{ .tflops_available }}%" + description: "Pool {{ .pool }} has available TFlops below {{ .Threshold }}%" + alertTargetInstance: "{{ .pool }}" + + # Node VRAM allocation alert + - name: NodeVRAMAllocationCritical + query: | + SELECT node, pool, (100 - avg(allocated_vram_percent)) as vram_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY node, pool + HAVING vram_available < {{ .Threshold }} + threshold: 5 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P1 + summary: "Node available VRAM below threshold, remaining {{ .vram_available }}% for {{ .node }}" + description: "Node {{ .node }} in Pool {{ .pool }} has available VRAM below {{ .Threshold }}%" + alertTargetInstance: "{{ .node }}" + + - name: NodeVRAMAllocationWarning + query: | + SELECT node, pool, (100 - avg(allocated_vram_percent)) as vram_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY node, pool + HAVING vram_available < {{ .Threshold }} + threshold: 10 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P1 + summary: "Node available VRAM below threshold, remaining {{ .vram_available }}% for {{ .node }}" + description: "Node {{ .node }} in Pool {{ .pool }} has available VRAM below {{ .Threshold }}%" + alertTargetInstance: "{{ .node }}" + + # Pool VRAM allocation alert + - name: PoolVRAMAllocationWarning + query: | + SELECT pool, (100 - avg(allocated_vram_percent)) as vram_available + FROM tf_node_metrics + WHERE {{ .Conditions }} + GROUP BY pool + HAVING vram_available < {{ .Threshold }} + threshold: 10 + evaluationInterval: 1m + consecutiveCount: 2 + severity: P1 + summary: "Pool available VRAM below threshold, remaining {{ .vram_available }}% for {{ .pool }}" + description: "Pool {{ .pool }} has available VRAM below {{ .Threshold }}%" + alertTargetInstance: "{{ .pool }}" + + # Empty or Idle GPU Alert + - name: EmptyGPU + query: | + SELECT DISTINCT node + FROM tf_node_metrics + WHERE {{ .Conditions }} AND node NOT IN ( + SELECT DISTINCT node + FROM tf_worker_usage + WHERE {{ .Conditions }} + ) + threshold: 0 + evaluationInterval: 5m + consecutiveCount: 2 + severity: P2 + summary: "Empty GPU without any workload, Node {{ .node }}" + description: "GPU Node {{ .node }} has no workload running, should be decommissioned" + alertTargetInstance: "{{ .node }}" + + - name: IdleGPU + query: | + SELECT node, pool, uuid, avg(compute_percentage) as compute, avg(memory_percentage) vram + FROM tf_gpu_usage + WHERE {{ .Conditions }} + GROUP BY node, pool, uuid + HAVING compute < 1 and vram < {{ .Threshold }}; + threshold: 5 + evaluationInterval: 10m + consecutiveCount: 3 + severity: P2 + summary: "Idle GPU found: {{ .uuid }} on Node {{ .node }}" + description: "GPU {{ .uuid }} on Node {{ .node }} in Pool {{ .pool }} has been idle for 3 consecutive 10m, compute: {{ .compute }}, vram: {{ .vram }}" + alertTargetInstance: "{{ .uuid }}" diff --git a/go.mod b/go.mod index 63af41a8..0a8a5ece 100644 --- a/go.mod +++ b/go.mod @@ -6,171 +6,185 @@ require ( github.com/DATA-DOG/go-sqlmock v1.5.2 github.com/NVIDIA/go-nvml v0.13.0-1 github.com/aliyun/alibaba-cloud-sdk-go v1.63.107 - github.com/aws/aws-sdk-go-v2 v1.38.1 - github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0 - github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5 + github.com/aws/aws-sdk-go-v2 v1.39.0 + github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0 + github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5 github.com/gin-contrib/gzip v1.2.3 github.com/gin-gonic/gin v1.10.1 + github.com/go-sql-driver/mysql v1.9.3 github.com/google/cel-go v0.23.2 github.com/influxdata/line-protocol/v2 v2.2.1 github.com/lithammer/shortuuid/v4 v4.2.0 github.com/mitchellh/mapstructure v1.5.0 - github.com/onsi/ginkgo/v2 v2.23.4 - github.com/onsi/gomega v1.38.0 + github.com/onsi/ginkgo/v2 v2.25.3 + github.com/onsi/gomega v1.38.2 github.com/pkg/errors v0.9.1 github.com/samber/lo v1.51.0 github.com/shirou/gopsutil v3.21.11+incompatible - github.com/stretchr/testify v1.11.0 - go.opentelemetry.io/otel v1.37.0 - golang.org/x/time v0.12.0 + github.com/stretchr/testify v1.11.1 + go.opentelemetry.io/otel v1.38.0 + go.uber.org/zap v1.27.0 + golang.org/x/time v0.13.0 gomodules.xyz/jsonpatch/v2 v2.5.0 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gorm.io/driver/mysql v1.6.0 - gorm.io/gorm v1.30.1 - k8s.io/api v0.33.3 - k8s.io/apimachinery v0.33.3 - k8s.io/client-go v0.33.3 - k8s.io/component-base v0.33.3 - k8s.io/component-helpers v0.33.3 + gorm.io/gorm v1.31.0 + k8s.io/api v0.34.1 + k8s.io/apimachinery v0.34.1 + k8s.io/apiserver v0.34.0 + k8s.io/client-go v0.34.1 + k8s.io/component-base v0.34.0 + k8s.io/component-helpers v0.34.1 k8s.io/klog/v2 v2.130.1 - k8s.io/kubernetes v1.33.3 - k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 - sigs.k8s.io/controller-runtime v0.21.0 - sigs.k8s.io/karpenter v1.6.1 - sigs.k8s.io/scheduler-plugins v0.32.7 + k8s.io/kube-scheduler v0.34.0 + k8s.io/kubernetes v1.34.0 + k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d + sigs.k8s.io/controller-runtime v0.22.1 + sigs.k8s.io/karpenter v1.6.2 sigs.k8s.io/yaml v1.6.0 ) require ( - cel.dev/expr v0.23.1 // indirect + cel.dev/expr v0.24.0 // indirect filippo.io/edwards25519 v1.1.0 // indirect - github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/NYTimes/gziphandler v1.1.1 // indirect github.com/antlr4-go/antlr/v4 v4.13.1 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2 // indirect - github.com/aws/smithy-go v1.22.5 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 // indirect + github.com/aws/smithy-go v1.23.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect - github.com/bytedance/sonic v1.13.2 // indirect - github.com/bytedance/sonic/loader v0.2.4 // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/bytedance/gopkg v0.1.3 // indirect + github.com/bytedance/sonic v1.14.1 // indirect + github.com/bytedance/sonic/loader v0.3.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/cloudwego/base64x v0.1.5 // indirect + github.com/cloudwego/base64x v0.1.6 // indirect github.com/coreos/go-semver v0.3.1 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect + github.com/coreos/go-systemd/v22 v22.6.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/distribution/reference v0.6.0 // indirect - github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.8.0 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect - github.com/gabriel-vasile/mimetype v1.4.8 // indirect - github.com/gin-contrib/sse v1.0.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.10 // indirect + github.com/gin-contrib/sse v1.1.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect - github.com/go-ole/go-ole v1.2.6 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.21.0 // indirect - github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-ole/go-ole v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.22.0 // indirect + github.com/go-openapi/jsonreference v0.21.1 // indirect + github.com/go-openapi/swag v0.24.1 // indirect + github.com/go-openapi/swag/cmdutils v0.24.0 // indirect + github.com/go-openapi/swag/conv v0.24.0 // indirect + github.com/go-openapi/swag/fileutils v0.24.0 // indirect + github.com/go-openapi/swag/jsonname v0.24.0 // indirect + github.com/go-openapi/swag/jsonutils v0.24.0 // indirect + github.com/go-openapi/swag/loading v0.24.0 // indirect + github.com/go-openapi/swag/mangling v0.24.0 // indirect + github.com/go-openapi/swag/netutils v0.24.0 // indirect + github.com/go-openapi/swag/stringutils v0.24.0 // indirect + github.com/go-openapi/swag/typeutils v0.24.0 // indirect + github.com/go-openapi/swag/yamlutils v0.24.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.26.0 // indirect - github.com/go-sql-driver/mysql v1.8.1 // indirect + github.com/go-playground/validator/v10 v10.27.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/btree v1.1.3 // indirect - github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/cel-go v0.26.1 // indirect + github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect + github.com/google/pprof v0.0.0-20250903194437-c28834ac2320 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2 // indirect - github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect - github.com/moby/term v0.5.0 // indirect + github.com/moby/term v0.5.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect - github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.22.0 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.62.0 // indirect - github.com/prometheus/procfs v0.15.1 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.17.0 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect - github.com/spf13/cobra v1.8.1 // indirect - github.com/spf13/pflag v1.0.6 // indirect - github.com/stoewer/go-strcase v1.3.0 // indirect + github.com/spf13/cobra v1.10.1 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/stoewer/go-strcase v1.3.1 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/ugorji/go/codec v1.2.12 // indirect + github.com/ugorji/go/codec v1.3.0 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect - go.etcd.io/etcd/api/v3 v3.5.21 // indirect - go.etcd.io/etcd/client/pkg/v3 v3.5.21 // indirect - go.etcd.io/etcd/client/v3 v3.5.21 // indirect + go.etcd.io/etcd/api/v3 v3.6.4 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.6.4 // indirect + go.etcd.io/etcd/client/v3 v3.6.4 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 // indirect - go.opentelemetry.io/otel/metric v1.37.0 // indirect - go.opentelemetry.io/otel/sdk v1.33.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect - go.opentelemetry.io/proto/otlp v1.4.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/sdk v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect + go.opentelemetry.io/proto/otlp v1.8.0 // indirect go.uber.org/automaxprocs v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect - go.uber.org/zap v1.27.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect - golang.org/x/arch v0.15.0 // indirect - golang.org/x/crypto v0.39.0 // indirect - golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect - golang.org/x/net v0.41.0 // indirect - golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sync v0.15.0 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/term v0.32.0 // indirect - golang.org/x/text v0.26.0 // indirect - golang.org/x/tools v0.33.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d // indirect - google.golang.org/grpc v1.69.4 // indirect - google.golang.org/protobuf v1.36.6 // indirect - gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/arch v0.21.0 // indirect + golang.org/x/crypto v0.41.0 // indirect + golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect + golang.org/x/net v0.43.0 // indirect + golang.org/x/oauth2 v0.31.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/tools v0.36.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 // indirect + google.golang.org/grpc v1.75.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.33.2 // indirect - k8s.io/apiserver v0.33.2 // indirect - k8s.io/cloud-provider v0.33.2 // indirect - k8s.io/controller-manager v0.33.2 // indirect - k8s.io/csi-translation-lib v0.33.2 // indirect - k8s.io/dynamic-resource-allocation v0.33.1 // indirect - k8s.io/kms v0.33.2 // indirect - k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a // indirect - k8s.io/kube-scheduler v0.32.7 // indirect - k8s.io/kubelet v0.33.1 // indirect - sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect - sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + k8s.io/apiextensions-apiserver v0.34.0 // indirect + k8s.io/cloud-provider v0.34.0 // indirect + k8s.io/controller-manager v0.34.0 // indirect + k8s.io/csi-translation-lib v0.34.0 // indirect + k8s.io/dynamic-resource-allocation v0.34.0 // indirect + k8s.io/kms v0.34.0 // indirect + k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 // indirect + k8s.io/kubelet v0.34.0 // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect ) diff --git a/go.sum b/go.sum index e98c785d..0130fbbf 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,16 @@ -cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg= -cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= +cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY= +cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= @@ -22,43 +24,43 @@ github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYW github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= -github.com/aws/aws-sdk-go-v2 v1.38.1 h1:j7sc33amE74Rz0M/PoCpsZQ6OunLqys/m5antM0J+Z8= -github.com/aws/aws-sdk-go-v2 v1.38.1/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 h1:sPiRHLVUIIQcoVZTNwqQcdtjkqkPopyYmIX0M5ElRf4= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2/go.mod h1:ik86P3sgV+Bk7c1tBFCwI3VxMoSEwl4YkRB9xn1s340= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 h1:ZdzDAg075H6stMZtbD2o+PyB933M/f20e9WmCBC17wA= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2/go.mod h1:eE1IIzXG9sdZCB0pNNpMpsYTLl4YdOQD3njiVN1e/E4= -github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0 h1:twGX//bv1QH/9pyJaqynNSo0eXGkDEdDTFy8GNPsz5M= -github.com/aws/aws-sdk-go-v2/service/ec2 v1.241.0/go.mod h1:HDxGArx3/bUnkoFsuvTNIxEj/cR3f+IgsVh1B7Pvay8= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2 h1:oxmDEO14NBZJbK/M8y3brhMFEIGN4j8a6Aq8eY0sqlo= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.2/go.mod h1:4hH+8QCrk1uRWDPsVfsNDUup3taAjO8Dnx63au7smAU= -github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw= -github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= -github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5 h1:m/qynRSKYe4RKSroVqRRgMlp/cUXO54SY2upSUqfcqw= -github.com/awslabs/operatorpkg v0.0.0-20250721225858-4e7491c57aa5/go.mod h1:3Lf3VaiJyr3IP0gH53sZp16Tu5CmoaDSUv4KQwFQO/I= +github.com/aws/aws-sdk-go-v2 v1.39.0 h1:xm5WV/2L4emMRmMjHFykqiA4M/ra0DJVSWUkDyBjbg4= +github.com/aws/aws-sdk-go-v2 v1.39.0/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6 h1:uF68eJA6+S9iVr9WgX1NaRGyQ/6MdIyc4JNUo6TN1FA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.6/go.mod h1:qlPeVZCGPiobx8wb1ft0GHT5l+dc6ldnwInDFaMvC7Y= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6 h1:pa1DEC6JoI0zduhZePp3zmhWvk/xxm4NB8Hy/Tlsgos= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.6/go.mod h1:gxEjPebnhWGJoaDdtDkA0JX46VRg1wcTHYe63OfX5pE= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0 h1:hGHSNZDTFnhLGUpRkQORM8uBY9R/FOkxCkuUUJBEOQ4= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.0/go.mod h1:SmMqzfS4HVsOD58lwLZ79oxF58f8zVe5YdK3o+/o1Ck= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 h1:oegbebPEMA/1Jny7kvwejowCaHz1FWZAQ94WXFNCyTM= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1/go.mod h1:kemo5Myr9ac0U9JfSjMo9yHLtw+pECEHsFtJ9tqCEI8= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6 h1:LHS1YAIJXJ4K9zS+1d/xa9JAA9sL2QyXIQCQFQW/X08= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.6/go.mod h1:c9PCiTEuh0wQID5/KqA32J+HAgZxN9tOGXKCiYJjTZI= +github.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE= +github.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= +github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5 h1:MM4Y7+YqhWLZiRuZfWrAXD2rZ0maVePbzARP3adeJ+g= +github.com/awslabs/operatorpkg v0.0.0-20250903180825-ba7ac0af36e5/go.mod h1:OCT5DIzVB2740qVgfRz0zQe/dDdvnsnFarzy6VdYNoA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= -github.com/bytedance/sonic v1.13.2 h1:8/H1FempDZqC4VqjptGo14QQlJx8VdZJegxs6wwfqpQ= -github.com/bytedance/sonic v1.13.2/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4= -github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= -github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY= -github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= -github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= -github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M= +github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM= +github.com/bytedance/sonic v1.14.1 h1:FBMC0zVz5XUmE4z9wF4Jey0An5FueFvOsTKKKtwIl7w= +github.com/bytedance/sonic v1.14.1/go.mod h1:gi6uhQLMbTdeP0muCnrjHLeCUPyb70ujhnNlhOylAFc= +github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA= +github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4= -github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= -github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M= +github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU= github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo= +github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= @@ -70,8 +72,8 @@ github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5Qvfr github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= -github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= @@ -83,16 +85,16 @@ github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk= github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= -github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= -github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= -github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= -github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0= +github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= github.com/gin-contrib/gzip v1.2.3 h1:dAhT722RuEG330ce2agAs75z7yB+NKvX/ZM1r8w0u2U= github.com/gin-contrib/gzip v1.2.3/go.mod h1:ad72i4Bzmaypk8M762gNXa2wkxxjbz0icRNnuLJ9a/c= -github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E= -github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9Sp6Slhe0= +github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w= +github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM= github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ= github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= @@ -103,66 +105,87 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= -github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= -github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= -github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= +github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= +github.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM= +github.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU= +github.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA= +github.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8= +github.com/go-openapi/swag v0.24.1 h1:DPdYTZKo6AQCRqzwr/kGkxJzHhpKxZ9i/oX0zag+MF8= +github.com/go-openapi/swag v0.24.1/go.mod h1:sm8I3lCPlspsBBwUm1t5oZeWZS0s7m/A+Psg0ooRU0A= +github.com/go-openapi/swag/cmdutils v0.24.0 h1:KlRCffHwXFI6E5MV9n8o8zBRElpY4uK4yWyAMWETo9I= +github.com/go-openapi/swag/cmdutils v0.24.0/go.mod h1:uxib2FAeQMByyHomTlsP8h1TtPd54Msu2ZDU/H5Vuf8= +github.com/go-openapi/swag/conv v0.24.0 h1:ejB9+7yogkWly6pnruRX45D1/6J+ZxRu92YFivx54ik= +github.com/go-openapi/swag/conv v0.24.0/go.mod h1:jbn140mZd7EW2g8a8Y5bwm8/Wy1slLySQQ0ND6DPc2c= +github.com/go-openapi/swag/fileutils v0.24.0 h1:U9pCpqp4RUytnD689Ek/N1d2N/a//XCeqoH508H5oak= +github.com/go-openapi/swag/fileutils v0.24.0/go.mod h1:3SCrCSBHyP1/N+3oErQ1gP+OX1GV2QYFSnrTbzwli90= +github.com/go-openapi/swag/jsonname v0.24.0 h1:2wKS9bgRV/xB8c62Qg16w4AUiIrqqiniJFtZGi3dg5k= +github.com/go-openapi/swag/jsonname v0.24.0/go.mod h1:GXqrPzGJe611P7LG4QB9JKPtUZ7flE4DOVechNaDd7Q= +github.com/go-openapi/swag/jsonutils v0.24.0 h1:F1vE1q4pg1xtO3HTyJYRmEuJ4jmIp2iZ30bzW5XgZts= +github.com/go-openapi/swag/jsonutils v0.24.0/go.mod h1:vBowZtF5Z4DDApIoxcIVfR8v0l9oq5PpYRUuteVu6f0= +github.com/go-openapi/swag/loading v0.24.0 h1:ln/fWTwJp2Zkj5DdaX4JPiddFC5CHQpvaBKycOlceYc= +github.com/go-openapi/swag/loading v0.24.0/go.mod h1:gShCN4woKZYIxPxbfbyHgjXAhO61m88tmjy0lp/LkJk= +github.com/go-openapi/swag/mangling v0.24.0 h1:PGOQpViCOUroIeak/Uj/sjGAq9LADS3mOyjznmHy2pk= +github.com/go-openapi/swag/mangling v0.24.0/go.mod h1:Jm5Go9LHkycsz0wfoaBDkdc4CkpuSnIEf62brzyCbhc= +github.com/go-openapi/swag/netutils v0.24.0 h1:Bz02HRjYv8046Ycg/w80q3g9QCWeIqTvlyOjQPDjD8w= +github.com/go-openapi/swag/netutils v0.24.0/go.mod h1:WRgiHcYTnx+IqfMCtu0hy9oOaPR0HnPbmArSRN1SkZM= +github.com/go-openapi/swag/stringutils v0.24.0 h1:i4Z/Jawf9EvXOLUbT97O0HbPUja18VdBxeadyAqS1FM= +github.com/go-openapi/swag/stringutils v0.24.0/go.mod h1:5nUXB4xA0kw2df5PRipZDslPJgJut+NjL7D25zPZ/4w= +github.com/go-openapi/swag/typeutils v0.24.0 h1:d3szEGzGDf4L2y1gYOSSLeK6h46F+zibnEas2Jm/wIw= +github.com/go-openapi/swag/typeutils v0.24.0/go.mod h1:q8C3Kmk/vh2VhpCLaoR2MVWOGP8y7Jc8l82qCTd1DYI= +github.com/go-openapi/swag/yamlutils v0.24.0 h1:bhw4894A7Iw6ne+639hsBNRHg9iZg/ISrOVr+sJGp4c= +github.com/go-openapi/swag/yamlutils v0.24.0/go.mod h1:DpKv5aYuaGm/sULePoeiG8uwMpZSfReo1HR3Ik0yaG8= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.26.0 h1:SP05Nqhjcvz81uJaRfEV0YBSSSGMc/iMaVtFbr3Sw2k= -github.com/go-playground/validator/v10 v10.26.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo= -github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= -github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4= +github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo= +github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= +github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A= -github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI= -github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= +github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= +github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/cel-go v0.23.2 h1:UdEe3CvQh3Nv+E/j9r1Y//WO0K0cSyD7/y0bzyLIMI4= -github.com/google/cel-go v0.23.2/go.mod h1:52Pb6QsDbC5kvgxvZhiL9QX1oZEkcUF/ZqaPx1J5Wwo= -github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= -github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= +github.com/google/cel-go v0.26.1 h1:iPbVVEdkhTX++hpe3lzSk7D3G3QSYqLGoHOcEio+UXQ= +github.com/google/cel-go v0.26.1/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/pprof v0.0.0-20250903194437-c28834ac2320 h1:c7ayAhbRP9HnEl/hg/WQOM9s0snWztfW6feWXZbGHw0= +github.com/google/pprof v0.0.0-20250903194437-c28834ac2320/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= -github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= -github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA= +github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0 h1:FbSCl+KggFl+Ocym490i/EyXF4lPgLoUtcSWquBM0Rs= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0/go.mod h1:qOchhhIlmRcqk/O9uCo/puJlyo07YINaIqdZfZG3Jkc= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -182,24 +205,20 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= -github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= +github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I= +github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2 h1:uAwqOtyrFYggq3pVf3hs1XKkBxrQ8dkgjWz3LCLJsiY= -github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2/go.mod h1:LBzS4n6GX1C69tzSd5EibZ9cGOXFuHP7GxEMDYVe1sM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= -github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= -github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -221,28 +240,29 @@ github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4 github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= -github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= +github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= +github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= -github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= -github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= -github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= +github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw= +github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= -github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= -github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -250,14 +270,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= -github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= -github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= +github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= @@ -271,13 +291,13 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= -github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= -github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.1 h1:iS0MdW+kVTxgMoE1LAZyMiYJFKlOzLooE4MxjirtkAs= +github.com/stoewer/go-strcase v1.3.1/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -288,10 +308,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/stretchr/testify v1.11.0 h1:ib4sjIrwZKxE5u/Japgo/7SJV3PvgjGiRNAvTVGqQl8= -github.com/stretchr/testify v1.11.0/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= @@ -300,8 +318,8 @@ github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaO github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= -github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= -github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA= +github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= @@ -310,44 +328,42 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= -go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= -go.etcd.io/etcd/api/v3 v3.5.21 h1:A6O2/JDb3tvHhiIz3xf9nJ7REHvtEFJJ3veW3FbCnS8= -go.etcd.io/etcd/api/v3 v3.5.21/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY= -go.etcd.io/etcd/client/pkg/v3 v3.5.21 h1:lPBu71Y7osQmzlflM9OfeIV2JlmpBjqBNlLtcoBqUTc= -go.etcd.io/etcd/client/pkg/v3 v3.5.21/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs= -go.etcd.io/etcd/client/v2 v2.305.21 h1:eLiFfexc2mE+pTLz9WwnoEsX5JTTpLCYVivKkmVXIRA= -go.etcd.io/etcd/client/v2 v2.305.21/go.mod h1:OKkn4hlYNf43hpjEM3Ke3aRdUkhSl8xjKjSf8eCq2J8= -go.etcd.io/etcd/client/v3 v3.5.21 h1:T6b1Ow6fNjOLOtM0xSoKNQt1ASPCLWrF9XMHcH9pEyY= -go.etcd.io/etcd/client/v3 v3.5.21/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU= -go.etcd.io/etcd/pkg/v3 v3.5.21 h1:jUItxeKyrDuVuWhdh0HtjUANwyuzcb7/FAeUfABmQsk= -go.etcd.io/etcd/pkg/v3 v3.5.21/go.mod h1:wpZx8Egv1g4y+N7JAsqi2zoUiBIUWznLjqJbylDjWgU= -go.etcd.io/etcd/raft/v3 v3.5.21 h1:dOmE0mT55dIUsX77TKBLq+RgyumsQuYeiRQnW/ylugk= -go.etcd.io/etcd/raft/v3 v3.5.21/go.mod h1:fmcuY5R2SNkklU4+fKVBQi2biVp5vafMrWUEj4TJ4Cs= -go.etcd.io/etcd/server/v3 v3.5.21 h1:9w0/k12majtgarGmlMVuhwXRI2ob3/d1Ik3X5TKo0yU= -go.etcd.io/etcd/server/v3 v3.5.21/go.mod h1:G1mOzdwuzKT1VRL7SqRchli/qcFrtLBTAQ4lV20sXXo= +go.etcd.io/bbolt v1.4.2 h1:IrUHp260R8c+zYx/Tm8QZr04CX+qWS5PGfPdevhdm1I= +go.etcd.io/bbolt v1.4.2/go.mod h1:Is8rSHO/b4f3XigBC0lL0+4FwAQv3HXEEIgFMuKHceM= +go.etcd.io/etcd/api/v3 v3.6.4 h1:7F6N7toCKcV72QmoUKa23yYLiiljMrT4xCeBL9BmXdo= +go.etcd.io/etcd/api/v3 v3.6.4/go.mod h1:eFhhvfR8Px1P6SEuLT600v+vrhdDTdcfMzmnxVXXSbk= +go.etcd.io/etcd/client/pkg/v3 v3.6.4 h1:9HBYrjppeOfFjBjaMTRxT3R7xT0GLK8EJMVC4xg6ok0= +go.etcd.io/etcd/client/pkg/v3 v3.6.4/go.mod h1:sbdzr2cl3HzVmxNw//PH7aLGVtY4QySjQFuaCgcRFAI= +go.etcd.io/etcd/client/v3 v3.6.4 h1:YOMrCfMhRzY8NgtzUsHl8hC2EBSnuqbR3dh84Uryl7A= +go.etcd.io/etcd/client/v3 v3.6.4/go.mod h1:jaNNHCyg2FdALyKWnd7hxZXZxZANb0+KGY+YQaEMISo= +go.etcd.io/etcd/pkg/v3 v3.6.4 h1:fy8bmXIec1Q35/jRZ0KOes8vuFxbvdN0aAFqmEfJZWA= +go.etcd.io/etcd/pkg/v3 v3.6.4/go.mod h1:kKcYWP8gHuBRcteyv6MXWSN0+bVMnfgqiHueIZnKMtE= +go.etcd.io/etcd/server/v3 v3.6.4 h1:LsCA7CzjVt+8WGrdsnh6RhC0XqCsLkBly3ve5rTxMAU= +go.etcd.io/etcd/server/v3 v3.6.4/go.mod h1:aYCL/h43yiONOv0QIR82kH/2xZ7m+IWYjzRmyQfnCAg= +go.etcd.io/raft/v3 v3.6.0 h1:5NtvbDVYpnfZWcIHgGRk9DyzkBIXOi8j+DDp1IcnUWQ= +go.etcd.io/raft/v3 v3.6.0/go.mod h1:nLvLevg6+xrVtHUmVaTcTz603gQPHfh7kUAwV6YpfGo= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.33.0 h1:iax7M131HuAm9QkZotNHEfstof92xM+N8sr3uHXc2IM= -go.opentelemetry.io/otel/sdk v1.33.0/go.mod h1:A1Q5oi7/9XaMlIWzPSxLRWOI8nG3FnzHJNbiENQuihM= -go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc= -go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= -go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg= -go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8N5CE= +go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= @@ -360,23 +376,23 @@ go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= -go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE= -go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI= -golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= -golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw= +golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= -golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= +golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= +golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI= -golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= +golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0= +golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -388,32 +404,33 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= -golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= -golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo= +golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= -golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= -golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= -golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= -golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= +golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -421,8 +438,8 @@ golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= -golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -431,24 +448,24 @@ gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0 gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= -google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= -google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro= -google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8 h1:st3LcW/BPi75W4q1jJTEor/QWwbNlPlDG0JTn6XhZu0= -google.golang.org/genproto/googleapis/api v0.0.0-20241223144023-3abc09e42ca8/go.mod h1:klhJGKFyG8Tn50enBn7gizg4nXGXJ+jqEREdCWaPcV4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d h1:xJJRGY7TJcvIlpSrN3K6LAWgNFUILlO+OMAqtg9aqnw= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d/go.mod h1:3ENsm/5D1mzDyhpzeRi1NR784I0BcofWBoSc5QqqMK4= -google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A= -google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1 h1:APHvLLYBhtZvsbnpkfknDZ7NyH4z5+ub/I0u8L3Oz6g= +google.golang.org/genproto/googleapis/api v0.0.0-20250826171959-ef028d996bc1/go.mod h1:xUjFWUnWDpZ/C0Gu0qloASKFb6f8/QXiiXhSPFsD668= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 h1:pmJpJEvT846VzausCQ5d7KreSROcDqmO388w5YbnltA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1/go.mod h1:GmFNa4BdJZ2a8G+wCe9Bg3wwThLrJun751XstdJt5Og= +google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= +google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= @@ -465,61 +482,56 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg= gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo= -gorm.io/gorm v1.30.1 h1:lSHg33jJTBxs2mgJRfRZeLDG+WZaHYCk3Wtfl6Ngzo4= -gorm.io/gorm v1.30.1/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE= -k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8= -k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE= -k8s.io/apiextensions-apiserver v0.33.2 h1:6gnkIbngnaUflR3XwE1mCefN3YS8yTD631JXQhsU6M8= -k8s.io/apiextensions-apiserver v0.33.2/go.mod h1:IvVanieYsEHJImTKXGP6XCOjTwv2LUMos0YWc9O+QP8= -k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA= -k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= -k8s.io/apiserver v0.33.2 h1:KGTRbxn2wJagJowo29kKBp4TchpO1DRO3g+dB/KOJN4= -k8s.io/apiserver v0.33.2/go.mod h1:9qday04wEAMLPWWo9AwqCZSiIn3OYSZacDyu/AcoM/M= -k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA= -k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg= -k8s.io/cloud-provider v0.33.2 h1:tP/18SbhytAapqg2/tGD5PFUR6VLYra+QfJ7Qn3FN34= -k8s.io/cloud-provider v0.33.2/go.mod h1:yS8ArLLLZV1+Tv6hkSYrZuYEVz+wQgiekUtaqe9Wxao= -k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA= -k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4= -k8s.io/component-helpers v0.33.3 h1:fjWVORSQfI0WKzPeIFSju/gMD9sybwXBJ7oPbqQu6eM= -k8s.io/component-helpers v0.33.3/go.mod h1:7iwv+Y9Guw6X4RrnNQOyQlXcvJrVjPveHVqUA5dm31c= -k8s.io/controller-manager v0.33.2 h1:HIs8PbdTOaY6wTOvKKLwoAHSO6GeDjmYS0Gjnd6rF+c= -k8s.io/controller-manager v0.33.2/go.mod h1:n8maAdN06E3cD0h5N0wuYBv9Qi9FePl7y6Iz3pfc9PY= -k8s.io/csi-translation-lib v0.33.2 h1:QyWkVcf0rbNjc53uAqCyl9kmHCRn1O0Z4QT69y/jwHQ= -k8s.io/csi-translation-lib v0.33.2/go.mod h1:nFPX6BA20EDdIQpitb6p2wVtvLBuXsmm6D1Cwi3rDnE= -k8s.io/dynamic-resource-allocation v0.33.1 h1:xnEWV764LIsRQDTQ0tLFQMz1lY34Ep7D+/NNbrODfm4= -k8s.io/dynamic-resource-allocation v0.33.1/go.mod h1:AgBLCrIi+//A4VKljjJ7YPpJ+LeyDyTvUk7v8+Qf3pI= +gorm.io/gorm v1.31.0 h1:0VlycGreVhK7RF/Bwt51Fk8v0xLiiiFdbGDPIZQ7mJY= +gorm.io/gorm v1.31.0/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs= +k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= +k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= +k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc= +k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg= +k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ= +k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= +k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= +k8s.io/cloud-provider v0.34.0 h1:OgrNE+WSgfvDBQf6WS9qFM7Xr37bc0Og5kkL4hyWDmU= +k8s.io/cloud-provider v0.34.0/go.mod h1:JbMa0t6JIGDMLI7Py6bdp9TN6cfuHrWGq+E/X+Ljkmo= +k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8= +k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg= +k8s.io/component-helpers v0.34.1 h1:gWhH3CCdwAx5P3oJqZKb4Lg5FYZTWVbdWtOI8n9U4XY= +k8s.io/component-helpers v0.34.1/go.mod h1:4VgnUH7UA/shuBur+OWoQC0xfb69sy/93ss0ybZqm3c= +k8s.io/controller-manager v0.34.0 h1:oCHoqS8dcFp7zDSu7HUvTpakq3isSxil3GprGGlJMsE= +k8s.io/controller-manager v0.34.0/go.mod h1:XFto21U+Mm9BT8r/Jd5E4tHCGtwjKAUFOuDcqaj2VK0= +k8s.io/csi-translation-lib v0.34.0 h1:WhCkq35XATZ+x6NKqI4u7XSYtmucuCN7jDk+mmm9XUU= +k8s.io/csi-translation-lib v0.34.0/go.mod h1:lZ+vpT3/6hx7GxXcI1mcoHxZSONvxgl2NwawzFnJP4Y= +k8s.io/dynamic-resource-allocation v0.34.0 h1:RrFNZXb2s5cvvf+KKdO92ss/e+zjGFFaDKAIpzA+Pu8= +k8s.io/dynamic-resource-allocation v0.34.0/go.mod h1:aqmoDIvXjQRhSgxQkFLl6+Ndg6MfdEOI+TQsj1j9V+g= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kms v0.33.2 h1:GFwNXX4CZGQCg9DPOaJi1/+iKidCtB9/OIAGdzRo8FI= -k8s.io/kms v0.33.2/go.mod h1:C1I8mjFFBNzfUZXYt9FZVJ8MJl7ynFbGgZFbBzkBJ3E= -k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a h1:ZV3Zr+/7s7aVbjNGICQt+ppKWsF1tehxggNfbM7XnG8= -k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= -k8s.io/kube-scheduler v0.32.7 h1:QOvu/fNEYGg1gzzpowWHFCI8SD3vJs5Iz0qebEQADd4= -k8s.io/kube-scheduler v0.32.7/go.mod h1:ez/2BnvZv2Bq1K9LpBsDgRsTvwJLAzkcpRMfY7rhLMA= -k8s.io/kubelet v0.33.1 h1:x4LCw1/iZVWOKA4RoITnuB8gMHnw31HPB3S0EF0EexE= -k8s.io/kubelet v0.33.1/go.mod h1:8WpdC9M95VmsqIdGSQrajXooTfT5otEj8pGWOm+KKfQ= -k8s.io/kubernetes v1.33.3 h1:dBx5Z2ZhR8kNzAwCoCz4j1niUbUrNUDVxeSj4/Ienu0= -k8s.io/kubernetes v1.33.3/go.mod h1:nrt8sldmckKz2fCZhgRX3SKfS2e+CzXATPv6ITNkU00= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= +k8s.io/kms v0.34.0 h1:u+/rcxQ3Jr7gC9AY5nXuEnBcGEB7ZOIJ9cdLdyHyEjQ= +k8s.io/kms v0.34.0/go.mod h1:s1CFkLG7w9eaTYvctOxosx88fl4spqmixnNpys0JAtM= +k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 h1:o4oKOsvSymDkZRsMAPZU7bRdwL+lPOK5VS10Dr1D6eg= +k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/kube-scheduler v0.34.0 h1:iUT5spyg0RlZ9W5dImrxSxv0yTqbsI+/J72/Iuv9ed8= +k8s.io/kube-scheduler v0.34.0/go.mod h1:7pt2HDb32lZOihbt/aamuMBvSe1o+rrd2rQC8aJyfP0= +k8s.io/kubelet v0.34.0 h1:1nZt1Q6Kfx7xCaTS9vnqR9sjZDxf3cRSQkAFCczULmc= +k8s.io/kubelet v0.34.0/go.mod h1:NqbF8ViVettlZbf9hw9DJhubaWn7rGvDDTcLMDm6tQ0= +k8s.io/kubernetes v1.34.0 h1:NvUrwPAVB4W3mSOpJ/RtNGHWWYyUP/xPaX5rUSpzA0w= +k8s.io/kubernetes v1.34.0/go.mod h1:iu+FhII+Oc/1gGWLJcer6wpyih441aNFHl7Pvm8yPto= +k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0= +k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= -sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8= -sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/karpenter v1.6.1 h1:ZAC802Prk/GyKoGUu0LuzEn9fFmJLfUtMfo64derQgw= -sigs.k8s.io/karpenter v1.6.1/go.mod h1:AxCaeRjv1Pgw/Ff7vT4aqyXcg8v1UdBcfzWMCaKSVjA= -sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 h1:qPrZsv1cwQiFeieFlRqT627fVZ+tyfou/+S5S0H5ua0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= +sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= +sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/karpenter v1.6.2 h1:WFayZ49CSOaDMku1iYBTsD3A9hOB2yU/U95VcSAJ8KM= +sigs.k8s.io/karpenter v1.6.2/go.mod h1:AxCaeRjv1Pgw/Ff7vT4aqyXcg8v1UdBcfzWMCaKSVjA= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/scheduler-plugins v0.32.7 h1:fGr4JKraaTe6it4PIqUlXStfctFKYxJgYkDsiU6699o= -sigs.k8s.io/scheduler-plugins v0.32.7/go.mod h1:Oem5rktj6wgFr2SUqcaInUTIBX8tlY8c4qid5vp2lBw= -sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= -sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/cloudprovider/common/utils.go b/internal/cloudprovider/common/utils.go index fbe882ab..788ba96d 100644 --- a/internal/cloudprovider/common/utils.go +++ b/internal/cloudprovider/common/utils.go @@ -131,6 +131,16 @@ func CalculateLeastCostGPUNodes(ctx context.Context, provider types.GPUNodeProvi nodes := make([]tfv1.GPUNodeClaimSpec, 0, bestNumInstances) for i := int64(0); i < bestNumInstances; i++ { + + tflopsQuantity, err := resource.ParseQuantity(fmt.Sprintf("%f", bestInstance.FP16TFlopsPerGPU*float64(bestInstance.GPUCount))) + if err != nil { + return nil, fmt.Errorf("failed to parse GPUDeviceOffered: %v", err) + } + + vramQuantity, err := resource.ParseQuantity(fmt.Sprintf("%dGi", bestInstance.VRAMGigabytesPerGPU*bestInstance.GPUCount)) + if err != nil { + return nil, fmt.Errorf("failed to parse VRAMOffered: %v", err) + } nodes = append(nodes, tfv1.GPUNodeClaimSpec{ NodeName: fmt.Sprintf("%s-%s", pool.Name, generateRandomString(8)), InstanceType: bestInstance.InstanceType, @@ -139,8 +149,8 @@ func CalculateLeastCostGPUNodes(ctx context.Context, provider types.GPUNodeProvi Zone: zone, CapacityType: preferredCapacityType, - TFlopsOffered: resource.MustParse(fmt.Sprintf("%f", bestInstance.FP16TFlopsPerGPU*float64(bestInstance.GPUCount))), - VRAMOffered: resource.MustParse(fmt.Sprintf("%dGi", bestInstance.VRAMGigabytesPerGPU*bestInstance.GPUCount)), + TFlopsOffered: tflopsQuantity, + VRAMOffered: vramQuantity, GPUDeviceOffered: bestInstance.GPUCount, ExtraParams: cluster.Spec.ComputingVendor.Params.ExtraParams, diff --git a/internal/cloudprovider/karpenter/nodeclaim.go b/internal/cloudprovider/karpenter/nodeclaim.go index 2877e80d..15c8dcc0 100644 --- a/internal/cloudprovider/karpenter/nodeclaim.go +++ b/internal/cloudprovider/karpenter/nodeclaim.go @@ -318,7 +318,11 @@ func (p KarpenterGPUNodeProvider) buildNodeClaim(ctx context.Context, param *tfv // Add GPU resources if specified (Karpenter supports nvidia.com/gpu) if param.GPUDeviceOffered > 0 { - resourceRequests[karpenterConfig.GPUResourceName] = resource.MustParse(fmt.Sprintf("%d", param.GPUDeviceOffered)) + quantity, err := resource.ParseQuantity(fmt.Sprintf("%d", param.GPUDeviceOffered)) + if err != nil { + return nil, fmt.Errorf("failed to parse GPUDeviceOffered: %v", err) + } + resourceRequests[karpenterConfig.GPUResourceName] = quantity } // query nodeClass and build NodeClassRef diff --git a/internal/cloudprovider/pricing/pricing.go b/internal/cloudprovider/pricing/pricing.go index 33ee529f..45dd09bb 100644 --- a/internal/cloudprovider/pricing/pricing.go +++ b/internal/cloudprovider/pricing/pricing.go @@ -31,6 +31,7 @@ import ( "github.com/NexusGPU/tensor-fusion/internal/cloudprovider/types" "github.com/NexusGPU/tensor-fusion/internal/config" "github.com/NexusGPU/tensor-fusion/internal/constants" + "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -39,11 +40,17 @@ const ( providerAzure = "azure" ) +// CompleteGPUInfo combines GpuInfo with VRAM information from instance data +type CompleteGPUInfo struct { + *config.GpuInfo + VRAMGigabytes int32 +} + // Global data initialized at package load time var ( globalAWSGPUInstanceData map[string]GPUNodeInstanceInfoAndPrice globalAzureGPUInstanceData map[string]GPUNodeInstanceInfoAndPrice - tflopsMap map[string]*config.GpuInfo + tflopsMap map[string]*CompleteGPUInfo ) var readyCh = make(chan struct{}) @@ -51,8 +58,9 @@ var initOnce sync.Once // PricingProvider provides pricing information and calculations for instance types type PricingProvider interface { - GetPricing(instanceType, capacityType tfv1.CapacityTypeEnum) (float64, bool) - GetGPUNodeInstanceTypeInfo(region string) ([]string, bool) + GetPricing(instanceType string, capacityType tfv1.CapacityTypeEnum, region string) (float64, bool) + GetRegionalGPUNodeInstanceTypes(region string) ([]types.GPUNodeInstanceInfo, bool) + GetGPUCapacityByModel(gpuModel string) (resource.Quantity, resource.Quantity, bool) } type GPUNodeInstanceInfoAndPrice struct { @@ -77,7 +85,7 @@ var awsCSV string var azureCSV string func init() { - tflopsMap = make(map[string]*config.GpuInfo, 100) + tflopsMap = make(map[string]*CompleteGPUInfo, 100) } func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.GpuInfo) { @@ -86,8 +94,14 @@ func SetTflopsMapAndInitGPUPricingInfo(ctx context.Context, gpuInfos *[]config.G return } for _, gpuInfo := range *gpuInfos { - tflopsMap[gpuInfo.FullModelName] = &gpuInfo - tflopsMap[gpuInfo.Model] = &gpuInfo + if tflopsMap[gpuInfo.FullModelName] != nil { + continue + } + completeInfo := &CompleteGPUInfo{ + GpuInfo: &gpuInfo, + } + tflopsMap[gpuInfo.FullModelName] = completeInfo + tflopsMap[gpuInfo.Model] = completeInfo } initOnce.Do(func() { @@ -151,6 +165,11 @@ func loadCSVInstanceDataFromPath(ctx context.Context, data []byte, provider stri } instanceInfo.FP16TFlopsPerGPU = gpuInfo.Fp16TFlops.AsApproximateFloat64() + // Fill VRAM information if not already set + if gpuInfo.VRAMGigabytes == 0 { + gpuInfo.VRAMGigabytes = instanceInfo.VRAMGigabytesPerGPU + } + instanceInfoAndPrice := GPUNodeInstanceInfoAndPrice{ GPUNodeInstanceInfo: instanceInfo, onDemandPrice: prices[0], @@ -416,3 +435,19 @@ func (p *StaticPricingProvider) GetRegionalGPUNodeInstanceTypes(region string) ( return instanceTypes, len(instanceTypes) > 0 } + +// GetGPUCapacityByModel gets the full capacity (TFlops and VRAM) for a GPU model +// Returns TFlops, VRAM, and whether found +func (p *StaticPricingProvider) GetGPUCapacityByModel(gpuModel string) (resource.Quantity, resource.Quantity, bool) { + <-readyCh + + gpuInfo, exists := tflopsMap[gpuModel] + if !exists { + return resource.Quantity{}, resource.Quantity{}, false + } + + tflops := gpuInfo.Fp16TFlops + vram := *resource.NewQuantity(int64(gpuInfo.VRAMGigabytes)*constants.GiBToBytes, resource.BinarySI) + + return tflops, vram, true +} diff --git a/internal/config/rules.go b/internal/config/rules.go index dd3713bd..8bbfb556 100644 --- a/internal/config/rules.go +++ b/internal/config/rules.go @@ -132,7 +132,7 @@ func (r *AlertRule) toPostableAlert(alertQueryResult map[string]interface{}, sta labels := LabelSet{ "alertname": r.Name, "severity": r.Severity, - "job": constants.AlertJobName, + "job": constants.TensorFusionSystemName, "instance": instance, } annotations := LabelSet{ diff --git a/internal/constants/constants.go b/internal/constants/constants.go index b1aa6b64..77648769 100644 --- a/internal/constants/constants.go +++ b/internal/constants/constants.go @@ -30,6 +30,7 @@ const ( LabelKeyClusterOwner = Domain + "/cluster" LabelKeyNodeClass = Domain + "/node-class" LabelKeyPodTemplateHash = Domain + "/pod-template-hash" + LabelNodeSelectorHash = Domain + "/node-selector-hash" LabelComponent = Domain + "/component" // used by TF connection, for matching the related connections when worker Pod state changed LabelWorkerName = Domain + "/worker-name" @@ -69,13 +70,12 @@ const ( GPUModelAnnotation = Domain + "/gpu-model" // GPU ID list is assigned by scheduler, should not specified by user GPUDeviceIDsAnnotation = Domain + "/gpu-ids" + DedicatedGPUAnnotation = Domain + "/dedicated-gpu" SetPendingOwnedWorkloadAnnotation = Domain + "/pending-owned-workload" PricingAnnotation = Domain + "/hourly-pricing" // In remote vGPU mode, selected workload is set by user with /workload annotation or generated by system SelectedWorkloadAnnotation = Domain + "/selected-workload" - CELFilterExpressionAnnotation = Domain + "/cel-filter-expression" - WorkloadModeAnnotation = Domain + "/workload-mode" WorkloadModeDynamic = "dynamic" WorkloadModeFixed = "fixed" @@ -108,6 +108,8 @@ const ( // For grey release TensorFusionEnabledReplicasAnnotation = Domain + "/enabled-replicas" TensorFusionDefaultPoolKeyAnnotation = Domain + "/is-default-pool" + // Eviction protection annotation for controlling pod eviction timing + EvictionProtectionAnnotation = Domain + "/eviction-protection" NamespaceDefaultVal = "tensor-fusion-sys" @@ -121,6 +123,27 @@ const ( QoSLevelMedium = "medium" QoSLevelHigh = "high" QoSLevelCritical = "critical" + + // DRA support + // annotation for pod to indicate if DRA is enabled + DRAEnabledAnnotation = Domain + "/dra-enabled" + DRACelExpressionAnnotation = Domain + "/dra-cel-expression" + + DRADriverName = Domain + ".dra-driver" + DRAResourceClaimName = "tensor-fusion-resource-claim-%s-%s" + // resource claim name for request + DRAResourceClaimRequestName = "tensor-fusion-resource-claim-request-%s" + + DRAClaimDefineName = "tensor-fusion-gpu-claim" + + TensorFusionResourceClaimTemplateLabel = Domain + "/resource-claim-template" + + // ResourceClaimTemplate related constants + DRAResourceClaimTemplateName = "tensor-fusion-gpu-template" + + // ResourceSlice related constants + DRAResourceSliceName = "tensor-fusion-resource-slice-%s" + DRAResourceSlicePool = "tensor-fusion-resource-slice-pool-%s" ) // for avoid golang lint issues @@ -177,7 +200,7 @@ const TFDataPath = "/run/tensor-fusion" const TFDataPathWorkerExpr = "shm/$(POD_NAMESPACE)/$(POD_NAME)" const DataVolumeName = "tf-data" const TensorFusionPoolManualCompaction = Domain + "/manual-compaction" -const AlertJobName = "tensor-fusion" +const TensorFusionSystemName = "tensor-fusion" const ( LeaderInfoConfigMapName = "tensor-fusion-operator-leader-info" @@ -203,3 +226,5 @@ const ExtraVerificationInfoPodIDKey = "authentication.kubernetes.io/pod-uid" const SchedulerSimulationKey = "simulate-schedule" const MobileGpuClockSpeedMultiplier = 0.75 +const DefaultEvictionProtectionPriceRatio = 1.2 +const NodeCriticalPriorityClassName = "system-node-critical" diff --git a/internal/constants/env.go b/internal/constants/env.go index 1e26a392..06212f20 100644 --- a/internal/constants/env.go +++ b/internal/constants/env.go @@ -73,9 +73,10 @@ const ( LdPreloadFileName = "ld.so.preload" LdPreloadFile = "/etc/ld.so.preload" - TFLibsVolumeName = "tf-libs" - TFLibsVolumeMountPath = "/tensor-fusion" - TFConnectionNamePrefix = "-tf-vgpu-" + TFLibsVolumeName = "tf-libs" + TFLibsVolumeMountPath = "/tensor-fusion" + TFConnectionNamePrefix = "-tf-vgpu-" + TFConnectionNameNoPrefix = "tf-vgpu-" HostIPFieldRef = "status.hostIP" NodeNameFieldRef = "spec.nodeName" @@ -98,8 +99,7 @@ const ( LdPreloadEnv = "LD_PRELOAD" LdPreloadLimiter = "/home/app/libcuda_limiter.so" - SharedMemDeviceName = "/dev/shm" - SharedMemMountSubPath = "shm" + SharedMemMountSubPath = "/shm" // disable GPU limiter, for emergency use DisableGpuLimiterEnv = "DISABLE_GPU_LIMITER" diff --git a/internal/controller/dra/resourceclaim_controller.go b/internal/controller/dra/resourceclaim_controller.go new file mode 100644 index 00000000..6d18b234 --- /dev/null +++ b/internal/controller/dra/resourceclaim_controller.go @@ -0,0 +1,214 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dra + +import ( + "context" + "fmt" + + "github.com/NexusGPU/tensor-fusion/internal/utils" + corev1 "k8s.io/api/core/v1" + resourcev1beta2 "k8s.io/api/resource/v1beta2" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/NexusGPU/tensor-fusion/internal/constants" +) + +// ResourceClaimReconciler reconciles ResourceClaim objects +type ResourceClaimReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaims,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +func (r *ResourceClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + + // Fetch the ResourceClaim instance + resourceClaim := &resourcev1beta2.ResourceClaim{} + if err := r.Get(ctx, req.NamespacedName, resourceClaim); err != nil { + if errors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers. + // Return and don't requeue + log.Info("ResourceClaim resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + log.Error(err, "Failed to get ResourceClaim") + return ctrl.Result{}, err + } + + // Check if this ResourceClaim is created from our ResourceClaimTemplate + if resourceClaim.Labels == nil { + // No labels, not our ResourceClaim + return ctrl.Result{}, nil + } + + labelValue, exists := resourceClaim.Labels[constants.TensorFusionResourceClaimTemplateLabel] + if !exists || labelValue != constants.TrueStringValue { + // Not our ResourceClaim, ignore + return ctrl.Result{}, nil + } + + log.Info("Processing TensorFusion ResourceClaim", "name", resourceClaim.Name, "namespace", resourceClaim.Namespace) + + // Find the owner Pod to get the CEL expression annotation + ownerPod, err := r.findOwnerPod(ctx, resourceClaim) + if err != nil { + log.Error(err, "Failed to find owner Pod") + return ctrl.Result{}, err + } + + if ownerPod == nil { + log.Info("Owner Pod not found, ResourceClaim may not have OwnerReference yet") + return ctrl.Result{RequeueAfter: constants.PendingRequeueDuration}, nil + } + + // Update ResourceClaim with CEL expression + if err := r.updateResourceClaimCEL(resourceClaim, ownerPod); err != nil { + log.Error(err, "Failed to update ResourceClaim CEL expression") + return ctrl.Result{}, err + } + // Update ResourceClaim with capacity request + if err := r.updateCapacityRequest(resourceClaim, ownerPod); err != nil { + log.Error(err, "Failed to update ResourceClaim capacity request") + return ctrl.Result{}, err + } + + if err := r.Update(ctx, resourceClaim); err != nil { + log.Error(err, "Failed to update ResourceClaim") + return ctrl.Result{}, err + } + + log.Info("Successfully updated ResourceClaim") + return ctrl.Result{}, nil +} + +// findOwnerPod finds the Pod that owns this ResourceClaim +func (r *ResourceClaimReconciler) findOwnerPod(ctx context.Context, resourceClaim *resourcev1beta2.ResourceClaim) (*corev1.Pod, error) { + // Find the Pod OwnerReference (there should be exactly one) + var podOwnerRef *metav1.OwnerReference + for i, ownerRef := range resourceClaim.OwnerReferences { + if ownerRef.Kind == "Pod" && ownerRef.APIVersion == "v1" { + podOwnerRef = &resourceClaim.OwnerReferences[i] + break + } + } + + if podOwnerRef == nil { + return nil, nil // No Pod owner found + } + + // Get the Pod by name and namespace (UID is automatically verified by Kubernetes) + pod := &corev1.Pod{} + err := r.Get(ctx, types.NamespacedName{ + Name: podOwnerRef.Name, + Namespace: resourceClaim.Namespace, + }, pod) + if err != nil { + if errors.IsNotFound(err) { + return nil, nil // Pod was deleted + } + return nil, fmt.Errorf("failed to get owner Pod %s/%s: %w", resourceClaim.Namespace, podOwnerRef.Name, err) + } + + // Verify the UID matches (additional safety check) + if pod.UID != podOwnerRef.UID { + return nil, fmt.Errorf("Pod UID mismatch: expected %s, got %s", podOwnerRef.UID, pod.UID) + } + + return pod, nil +} + +// updateResourceClaimCEL updates the ResourceClaim's CEL selector expression +func (r *ResourceClaimReconciler) updateResourceClaimCEL(resourceClaim *resourcev1beta2.ResourceClaim, pod *corev1.Pod) error { + // Check if we need to update + if len(resourceClaim.Spec.Devices.Requests) == 0 { + return fmt.Errorf("no device requests found in ResourceClaim") + } + + deviceReq := &resourceClaim.Spec.Devices.Requests[0] + if deviceReq.Exactly == nil { + return fmt.Errorf("no ExactDeviceRequest found") + } + + // Get CEL expression from Pod annotation + celExpression := pod.Annotations[constants.DRACelExpressionAnnotation] + + if celExpression == "" { + return nil + } + + // Check if CEL expression is already set correctly + if len(deviceReq.Exactly.Selectors) > 0 && + deviceReq.Exactly.Selectors[0].CEL != nil && + deviceReq.Exactly.Selectors[0].CEL.Expression == celExpression { + // Already updated + return nil + } + + // Update the CEL expression + if len(deviceReq.Exactly.Selectors) == 0 { + deviceReq.Exactly.Selectors = []resourcev1beta2.DeviceSelector{{}} + } + + if deviceReq.Exactly.Selectors[0].CEL == nil { + deviceReq.Exactly.Selectors[0].CEL = &resourcev1beta2.CELDeviceSelector{} + } + + deviceReq.Exactly.Selectors[0].CEL.Expression = celExpression + + return nil +} + +func (r *ResourceClaimReconciler) updateCapacityRequest(resourceClaim *resourcev1beta2.ResourceClaim, pod *corev1.Pod) error { + if len(resourceClaim.Spec.Devices.Requests) == 0 { + return fmt.Errorf("no device requests found in ResourceClaim") + } + + deviceReq := &resourceClaim.Spec.Devices.Requests[0] + if deviceReq.Exactly == nil { + return fmt.Errorf("no ExactDeviceRequest found") + } + gpuRequestResource, err := utils.GetGPUResource(pod, true) + if err != nil { + return fmt.Errorf("failed to get GPU resource: %w", err) + } + //TODO extract to constants + deviceReq.Exactly.Capacity.Requests["tflops"] = gpuRequestResource.Tflops + deviceReq.Exactly.Capacity.Requests["vram"] = gpuRequestResource.Vram + + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *ResourceClaimReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&resourcev1beta2.ResourceClaim{}). + Complete(r) +} diff --git a/internal/controller/dra/resourceclaim_controller_test.go b/internal/controller/dra/resourceclaim_controller_test.go new file mode 100644 index 00000000..aeebbda7 --- /dev/null +++ b/internal/controller/dra/resourceclaim_controller_test.go @@ -0,0 +1,564 @@ +package dra + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + resourcev1beta2 "k8s.io/api/resource/v1beta2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/NexusGPU/tensor-fusion/internal/constants" +) + +func TestResourceClaimReconciler_Reconcile(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, resourcev1beta2.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + tests := []struct { + name string + resourceClaim *resourcev1beta2.ResourceClaim + pod *corev1.Pod + expectedResult ctrl.Result + expectError bool + expectUpdate bool + }{ + { + name: "ResourceClaim not found", + expectedResult: ctrl.Result{}, + expectError: false, + }, + { + name: "ResourceClaim without TensorFusion label", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + }, + expectedResult: ctrl.Result{}, + expectError: false, + }, + { + name: "ResourceClaim with wrong label value", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + Labels: map[string]string{ + constants.TensorFusionResourceClaimTemplateLabel: "false", + }, + }, + }, + expectedResult: ctrl.Result{}, + expectError: false, + }, + { + name: "ResourceClaim without owner Pod", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + Labels: map[string]string{ + constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue, + }, + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + Exactly: &resourcev1beta2.ExactDeviceRequest{ + Count: 1, + }, + }, + }, + }, + }, + }, + expectedResult: ctrl.Result{RequeueAfter: constants.PendingRequeueDuration}, + expectError: false, + }, + { + name: "Owner Pod without CEL annotation", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + Labels: map[string]string{ + constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Pod", + Name: "test-pod", + UID: "pod-uid-123", + }, + }, + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + Exactly: &resourcev1beta2.ExactDeviceRequest{ + Count: 1, + }, + }, + }, + }, + }, + }, + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: "pod-uid-123", + }, + }, + expectedResult: ctrl.Result{}, + expectError: false, + }, + { + name: "Successful CEL expression update", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + Labels: map[string]string{ + constants.TensorFusionResourceClaimTemplateLabel: constants.TrueStringValue, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Pod", + Name: "test-pod", + UID: "pod-uid-123", + }, + }, + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + Exactly: &resourcev1beta2.ExactDeviceRequest{ + Count: 1, + }, + }, + }, + }, + }, + }, + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: "pod-uid-123", + Annotations: map[string]string{ + constants.DRACelExpressionAnnotation: `device.attributes["tflops"].quantity >= quantity("10")`, + }, + }, + }, + expectedResult: ctrl.Result{}, + expectError: false, + expectUpdate: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var objects []runtime.Object + if tt.resourceClaim != nil { + objects = append(objects, tt.resourceClaim) + } + if tt.pod != nil { + objects = append(objects, tt.pod) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(objects...). + Build() + + reconciler := &ResourceClaimReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: "test-claim", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(context.Background(), req) + + if tt.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + assert.Equal(t, tt.expectedResult, result) + + // Check if ResourceClaim was updated with CEL expression + if tt.expectUpdate && tt.resourceClaim != nil { + updatedClaim := &resourcev1beta2.ResourceClaim{} + err := fakeClient.Get(context.Background(), types.NamespacedName{ + Name: tt.resourceClaim.Name, + Namespace: tt.resourceClaim.Namespace, + }, updatedClaim) + require.NoError(t, err) + + require.Len(t, updatedClaim.Spec.Devices.Requests, 1) + deviceReq := updatedClaim.Spec.Devices.Requests[0] + require.NotNil(t, deviceReq.Exactly) + require.Len(t, deviceReq.Exactly.Selectors, 1) + require.NotNil(t, deviceReq.Exactly.Selectors[0].CEL) + assert.Equal(t, `device.attributes["tflops"].quantity >= quantity("10")`, deviceReq.Exactly.Selectors[0].CEL.Expression) + } + }) + } +} + +func TestResourceClaimReconciler_findOwnerPod(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, resourcev1beta2.AddToScheme(scheme)) + + tests := []struct { + name string + resourceClaim *resourcev1beta2.ResourceClaim + pod *corev1.Pod + expectedPod *corev1.Pod + expectError bool + }{ + { + name: "No owner references", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + }, + expectedPod: nil, + expectError: false, + }, + { + name: "No Pod owner reference", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "test-deployment", + UID: "deployment-uid-123", + }, + }, + }, + }, + expectedPod: nil, + expectError: false, + }, + { + name: "Pod owner not found", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Pod", + Name: "nonexistent-pod", + UID: "pod-uid-123", + }, + }, + }, + }, + expectedPod: nil, + expectError: false, + }, + { + name: "Pod UID mismatch", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Pod", + Name: "test-pod", + UID: "pod-uid-123", + }, + }, + }, + }, + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: "different-uid", + }, + }, + expectedPod: nil, + expectError: true, + }, + { + name: "Successful Pod lookup", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Pod", + Name: "test-pod", + UID: "pod-uid-123", + }, + }, + }, + }, + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: "pod-uid-123", + }, + }, + expectedPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: "pod-uid-123", + }, + }, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var objects []runtime.Object + if tt.pod != nil { + objects = append(objects, tt.pod) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(objects...). + Build() + + reconciler := &ResourceClaimReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + pod, err := reconciler.findOwnerPod(context.Background(), tt.resourceClaim) + + if tt.expectError { + require.Error(t, err) + assert.Nil(t, pod) + } else { + require.NoError(t, err) + if tt.expectedPod == nil { + assert.Nil(t, pod) + } else { + require.NotNil(t, pod) + assert.Equal(t, tt.expectedPod.Name, pod.Name) + assert.Equal(t, tt.expectedPod.Namespace, pod.Namespace) + assert.Equal(t, tt.expectedPod.UID, pod.UID) + } + } + }) + } +} + +func TestResourceClaimReconciler_updateResourceClaimCEL(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, resourcev1beta2.AddToScheme(scheme)) + + tests := []struct { + name string + resourceClaim *resourcev1beta2.ResourceClaim + celExpression string + expectError bool + expectUpdate bool + }{ + { + name: "No device requests", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{}, + }, + }, + }, + celExpression: `device.attributes["tflops"].quantity >= quantity("10")`, + expectError: true, + }, + { + name: "No ExactDeviceRequest", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + // Exactly is nil + }, + }, + }, + }, + }, + celExpression: `device.attributes["tflops"].quantity >= quantity("10")`, + expectError: true, + }, + { + name: "CEL expression already set correctly", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + Exactly: &resourcev1beta2.ExactDeviceRequest{ + Count: 1, + Selectors: []resourcev1beta2.DeviceSelector{ + { + CEL: &resourcev1beta2.CELDeviceSelector{ + Expression: `device.attributes["tflops"].quantity >= quantity("10")`, + }, + }, + }, + }, + }, + }, + }, + }, + }, + celExpression: `device.attributes["tflops"].quantity >= quantity("10")`, + expectError: false, + expectUpdate: false, // No update needed + }, + { + name: "Successful CEL expression update - empty selectors", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + Exactly: &resourcev1beta2.ExactDeviceRequest{ + Count: 1, + }, + }, + }, + }, + }, + }, + celExpression: `device.attributes["tflops"].quantity >= quantity("10")`, + expectError: false, + expectUpdate: true, + }, + { + name: "Successful CEL expression update - nil CEL", + resourceClaim: &resourcev1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-claim", + Namespace: "default", + }, + Spec: resourcev1beta2.ResourceClaimSpec{ + Devices: resourcev1beta2.DeviceClaim{ + Requests: []resourcev1beta2.DeviceRequest{ + { + Name: "gpu-request", + Exactly: &resourcev1beta2.ExactDeviceRequest{ + Count: 1, + Selectors: []resourcev1beta2.DeviceSelector{ + { + // CEL is nil + }, + }, + }, + }, + }, + }, + }, + }, + celExpression: `device.attributes["vram"].quantity >= quantity("8Gi")`, + expectError: false, + expectUpdate: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(tt.resourceClaim). + Build() + + reconciler := &ResourceClaimReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + mockPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + constants.DRACelExpressionAnnotation: tt.celExpression, + }, + }, + } + err := reconciler.updateResourceClaimCEL(tt.resourceClaim, mockPod) + + if tt.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + + if tt.expectUpdate { + // Verify the CEL expression was set correctly + require.Len(t, tt.resourceClaim.Spec.Devices.Requests, 1) + deviceReq := tt.resourceClaim.Spec.Devices.Requests[0] + require.NotNil(t, deviceReq.Exactly) + require.Len(t, deviceReq.Exactly.Selectors, 1) + require.NotNil(t, deviceReq.Exactly.Selectors[0].CEL) + assert.Equal(t, tt.celExpression, deviceReq.Exactly.Selectors[0].CEL.Expression) + } + } + }) + } +} diff --git a/internal/controller/dra/resourceslice_controller.go b/internal/controller/dra/resourceslice_controller.go new file mode 100644 index 00000000..fbd03f6f --- /dev/null +++ b/internal/controller/dra/resourceslice_controller.go @@ -0,0 +1,218 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dra + +import ( + "context" + "fmt" + + resourcev1beta2 "k8s.io/api/resource/v1beta2" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" +) + +// ResourceSliceReconciler reconciles ResourceSlice objects based on GPUNode and GPU changes +type ResourceSliceReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceslices,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpunodes,verbs=get;list;watch +//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpus,verbs=get;list;watch +//+kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools,verbs=get;list;watch + +// Reconcile processes GPUNode changes and generates/updates corresponding ResourceSlices +func (r *ResourceSliceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Reconciling ResourceSlice for GPUNode", "name", req.Name) + + // Fetch the GPUNode + gpuNode := &tfv1.GPUNode{} + if err := r.Get(ctx, req.NamespacedName, gpuNode); err != nil { + if errors.IsNotFound(err) { + // GPUNode was deleted, clean up associated ResourceSlice + return r.cleanupResourceSlice(ctx, req.Name) + } + log.Error(err, "Failed to get GPUNode") + return ctrl.Result{}, err + } + + // If GPUNode is being deleted, clean up ResourceSlice + if !gpuNode.DeletionTimestamp.IsZero() { + return r.cleanupResourceSlice(ctx, gpuNode.Name) + } + // Get all GPUs owned by this node + gpuList := &tfv1.GPUList{} + if err := r.List(ctx, gpuList, client.MatchingLabels{constants.LabelKeyOwner: gpuNode.Name}); err != nil { + log.Error(err, "Failed to list GPUs for node") + return ctrl.Result{}, err + } + + // Skip if no GPUs discovered yet + if len(gpuList.Items) == 0 { + log.Info("No GPUs discovered for node yet, skipping ResourceSlice generation") + return ctrl.Result{}, nil + } + + // Generate/update ResourceSlice for this node + if err := r.reconcileResourceSlice(ctx, gpuNode, gpuList.Items); err != nil { + log.Error(err, "Failed to reconcile ResourceSlice") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +// reconcileResourceSlice creates or updates the ResourceSlice for a GPUNode +func (r *ResourceSliceReconciler) reconcileResourceSlice(ctx context.Context, gpuNode *tfv1.GPUNode, gpus []tfv1.GPU) error { + log := log.FromContext(ctx) + + resourceSliceName := fmt.Sprintf(constants.DRAResourceSliceName, gpuNode.Name) + resourceSlice := &resourcev1beta2.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceSliceName, + }, + } + + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, resourceSlice, func() error { + // Set basic spec fields + resourceSlice.Spec.Driver = constants.DRADriverName + resourceSlice.Spec.NodeName = &gpuNode.Name + resourceSlice.Spec.Pool = resourcev1beta2.ResourcePool{ + Name: gpuNode.Labels[constants.GpuPoolKey], + Generation: gpuNode.Generation, + ResourceSliceCount: 1, + } + + // Generate devices list + devices, err := r.generateDevices(ctx, gpus) + if err != nil { + return fmt.Errorf("failed to generate devices: %w", err) + } + resourceSlice.Spec.Devices = devices + + // Set labels for easy identification + if resourceSlice.Labels == nil { + resourceSlice.Labels = make(map[string]string) + } + resourceSlice.Labels[constants.LabelKeyOwner] = gpuNode.Name + resourceSlice.Labels[constants.KubernetesHostNameLabel] = gpuNode.Name + return nil + }) + + if err != nil { + return fmt.Errorf("failed to create or update ResourceSlice: %w", err) + } + + log.Info("Successfully reconciled ResourceSlice", "resourceSlice", resourceSliceName) + return nil +} + +// generateDevices creates the device list for ResourceSlice based on physical GPUs +func (r *ResourceSliceReconciler) generateDevices(_ context.Context, gpus []tfv1.GPU) ([]resourcev1beta2.Device, error) { + devices := make([]resourcev1beta2.Device, 0, len(gpus)) + + // Calculate virtual capacities for proportional allocation + + for _, gpu := range gpus { + if gpu.Status.Capacity == nil { + continue + } + //TODO extract to constants + //TODO quota support + poolName := gpu.Labels[constants.GpuPoolKey] + device := resourcev1beta2.Device{ + Name: gpu.Status.UUID, + Attributes: map[resourcev1beta2.QualifiedName]resourcev1beta2.DeviceAttribute{ + "model": { + StringValue: &gpu.Status.GPUModel, + }, + "pool_name": { + StringValue: &poolName, + }, + "pod_namespace": { + StringValue: &gpu.Namespace, + }, + }, + Capacity: map[resourcev1beta2.QualifiedName]resourcev1beta2.DeviceCapacity{ + "tflops": { + Value: gpu.Status.Capacity.Tflops, + }, + "vram": { + Value: gpu.Status.Capacity.Vram, + }, + }, + AllowMultipleAllocations: func() *bool { b := true; return &b }(), + } + + devices = append(devices, device) + } + + return devices, nil +} + +// cleanupResourceSlice removes the ResourceSlice associated with a deleted GPUNode +func (r *ResourceSliceReconciler) cleanupResourceSlice(ctx context.Context, nodeName string) (ctrl.Result, error) { + log := log.FromContext(ctx) + + resourceSliceName := fmt.Sprintf(constants.DRAResourceSliceName, nodeName) + resourceSlice := &resourcev1beta2.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceSliceName, + }, + } + + err := r.Delete(ctx, resourceSlice) + if err != nil && !errors.IsNotFound(err) { + log.Error(err, "Failed to delete ResourceSlice", "name", resourceSliceName) + return ctrl.Result{}, err + } + + log.Info("Successfully cleaned up ResourceSlice", "name", resourceSliceName) + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager +func (r *ResourceSliceReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&tfv1.GPUNode{}). + Watches(&tfv1.GPU{}, handler.EnqueueRequestsFromMapFunc( + func(ctx context.Context, obj client.Object) []reconcile.Request { + // Get the owner GPUNode name from GPU labels + if labels := obj.GetLabels(); labels != nil { + if nodeName, ok := labels[constants.LabelKeyOwner]; ok { + return []reconcile.Request{ + {NamespacedName: types.NamespacedName{Name: nodeName}}, + } + } + } + return nil + })). + Complete(r) +} diff --git a/internal/controller/gpunode_controller.go b/internal/controller/gpunode_controller.go index 9035ecdd..ae503f28 100644 --- a/internal/controller/gpunode_controller.go +++ b/internal/controller/gpunode_controller.go @@ -47,8 +47,9 @@ import ( // GPUNodeReconciler reconciles a GPUNode object type GPUNodeReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder + Scheme *runtime.Scheme + Recorder record.EventRecorder + Allocator *gpuallocator.GpuAllocator } // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpunodes,verbs=get;list;watch;create;update;patch;delete @@ -140,7 +141,7 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, nil } - hypervisorName, err := r.reconcileHypervisorPod(ctx, node, poolObj) + hypervisorName, err := r.reconcileHypervisorPod(ctx, node, poolObj, coreNode) if err != nil { return ctrl.Result{}, err } @@ -158,7 +159,9 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, err } -func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(ctx context.Context, hypervisorName string, node *tfv1.GPUNode, poolObj *tfv1.GPUPool) error { +func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity( + ctx context.Context, hypervisorName string, node *tfv1.GPUNode, poolObj *tfv1.GPUPool, +) error { pod := &corev1.Pod{} fetchErr := r.Get(ctx, client.ObjectKey{Name: hypervisorName, Namespace: utils.CurrentNamespace()}, pod) if fetchErr != nil { @@ -183,7 +186,7 @@ func (r *GPUNodeReconciler) checkStatusAndUpdateVirtualCapacity(ctx context.Cont return nil } else { - gpuModels, err := gpuallocator.RefreshGPUNodeCapacity(ctx, r.Client, node, poolObj) + gpuModels, err := gpuallocator.RefreshGPUNodeCapacity(ctx, r.Client, node, poolObj, r.Allocator) if err != nil { return err } @@ -319,7 +322,12 @@ func (r *GPUNodeReconciler) reconcileNodeDiscoveryJob( return nil } -func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tfv1.GPUNode, pool *tfv1.GPUPool) (string, error) { +func (r *GPUNodeReconciler) reconcileHypervisorPod( + ctx context.Context, + node *tfv1.GPUNode, + pool *tfv1.GPUPool, + k8sNode *corev1.Node, +) (string, error) { log := log.FromContext(ctx) if pool.Spec.ComponentConfig == nil || pool.Spec.ComponentConfig.Hypervisor == nil { @@ -361,7 +369,7 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf } log.Info("hypervisor pod not found, creating new one", "node", node.Name) - if err := r.createHypervisorPod(ctx, key, node, pool); err != nil { + if err := r.createHypervisorPod(ctx, key, node, pool, k8sNode); err != nil { if errors.IsAlreadyExists(err) { log.Info("hypervisor pod already exists, skip creation", "node", node.Name) return "", nil @@ -372,7 +380,13 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf return key.Name, nil } -func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client.ObjectKey, node *tfv1.GPUNode, pool *tfv1.GPUPool) error { +func (r *GPUNodeReconciler) createHypervisorPod( + ctx context.Context, + key client.ObjectKey, + node *tfv1.GPUNode, + pool *tfv1.GPUPool, + k8sNode *corev1.Node, +) error { log := log.FromContext(ctx) podTmpl := &corev1.PodTemplate{} @@ -447,7 +461,11 @@ func (r *GPUNodeReconciler) createHypervisorPod(ctx context.Context, key client. }) err = controllerutil.SetControllerReference(node, newPod, r.Scheme) if err != nil { - return fmt.Errorf("failed to set controller reference: %w", err) + return fmt.Errorf("failed to set controller reference for hypervisor: %w", err) + } + // also set node owned by k8s node to allow Karpenter to delete the node while hypervisor exists + if err := controllerutil.SetOwnerReference(k8sNode, newPod, r.Scheme); err != nil { + return fmt.Errorf("failed to set owner reference for hypervisor: %w", err) } // create hypervisor pod diff --git a/internal/controller/gpupool_controller.go b/internal/controller/gpupool_controller.go index 987eb81b..da8c63aa 100644 --- a/internal/controller/gpupool_controller.go +++ b/internal/controller/gpupool_controller.go @@ -30,13 +30,16 @@ import ( "github.com/NexusGPU/tensor-fusion/internal/metrics" utils "github.com/NexusGPU/tensor-fusion/internal/utils" "golang.org/x/time/rate" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/retry" "k8s.io/client-go/util/workqueue" + schedulingcorev1 "k8s.io/component-helpers/scheduling/corev1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -83,6 +86,9 @@ type GPUPoolReconciler struct { // and requeue until current time after that, start provisioning loop var provisioningInitializationMinTime = map[string]time.Time{} +// When GPU nodeSelector changed, trigger all node update +var poolSelectorChangeMap = map[string]string{} + // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools/status,verbs=get;update;patch // +kubebuilder:rbac:groups=tensor-fusion.ai,resources=gpupools/finalizers,verbs=update @@ -116,6 +122,10 @@ func (r *GPUPoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, nil } + if err := r.reconcilePoolSelectorChange(ctx, pool); err != nil { + return ctrl.Result{}, err + } + if err := r.reconcilePoolCurrentCapacityAndReadiness(ctx, pool); err != nil { return ctrl.Result{}, err } @@ -404,6 +414,59 @@ func (r *GPUPoolReconciler) reconcilePoolComponents(ctx context.Context, pool *t return ctrlResult, utilerrors.NewAggregate(errs) } +func (r *GPUPoolReconciler) reconcilePoolSelectorChange(ctx context.Context, pool *tfv1.GPUPool) error { + if pool.Spec.NodeManagerConfig != nil && pool.Spec.NodeManagerConfig.NodeSelector != nil { + hash := utils.GetObjectHash(pool.Spec.NodeManagerConfig.NodeSelector) + if poolSelectorChangeMap[pool.Name] == hash { + return nil + } + + // hash has changed, or first reconcile, should check all k8s nodes + nodes := &corev1.NodeList{} + selectors := utils.GetInitialGPUNodeSelector() + if err := r.List(ctx, nodes, client.MatchingLabels{selectors[0]: selectors[1]}); err != nil { + return err + } + for _, node := range nodes.Items { + // skip no label or deleting nodes + if node.Labels == nil || !node.DeletionTimestamp.IsZero() { + continue + } + matches, err := schedulingcorev1.MatchNodeSelectorTerms(&node, pool.Spec.NodeManagerConfig.NodeSelector) + if err != nil { + return err + } + if matches { + if err := UpdateK8SNodeSelectorHash(ctx, r.Client, &node, hash); err != nil { + return err + } + } + } + poolSelectorChangeMap[pool.Name] = hash + return nil + } + return nil +} + +func UpdateK8SNodeSelectorHash(ctx context.Context, k8sClient client.Client, node *corev1.Node, hash string) error { + // skip nodes that already injected the hash + if node.Labels[constants.LabelNodeSelectorHash] == hash { + return nil + } + // update label to trigger the GPUNode reconcile + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + latest := &corev1.Node{} + if err := k8sClient.Get(ctx, client.ObjectKey{Name: node.Name}, latest); err != nil { + return err + } + latest.Labels[constants.LabelNodeSelectorHash] = hash + return k8sClient.Update(ctx, latest) + }); err != nil { + return err + } + return nil +} + func (r *GPUPoolReconciler) cleanUpPool(ctx context.Context, pool *tfv1.GPUPool) (bool, error) { log := log.FromContext(ctx) log.Info("TensorFusionGPUPool is being deleted", "name", pool.Name) diff --git a/internal/controller/gpupool_controller_test.go b/internal/controller/gpupool_controller_test.go index 50b033cd..e3be7a99 100644 --- a/internal/controller/gpupool_controller_test.go +++ b/internal/controller/gpupool_controller_test.go @@ -42,6 +42,14 @@ var _ = Describe("GPUPool Controller", func() { pool := tfEnv.GetGPUPool(0) g.Expect(pool.Status.Phase).Should(Equal(tfv1.TensorFusionPoolPhaseRunning)) }).Should(Succeed()) + Eventually(func(g Gomega) { + nodeList := tfEnv.GetGPUNodeList(0) + for _, gpuNode := range nodeList.Items { + node := &corev1.Node{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: gpuNode.Name}, node)).Should(Succeed()) + g.Expect(node.Labels).To(HaveKey(constants.LabelNodeSelectorHash)) + } + }).Should(Succeed()) tfEnv.Cleanup() }) }) diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go index 3a9c652d..d8908847 100644 --- a/internal/controller/node_controller.go +++ b/internal/controller/node_controller.go @@ -19,8 +19,6 @@ package controller import ( "context" "fmt" - "os" - "strings" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" "github.com/NexusGPU/tensor-fusion/internal/constants" @@ -55,6 +53,8 @@ type NodeReconciler struct { // +kubebuilder:rbac:groups=core,resources=nodes/finalizers,verbs=create;get;patch;update // Reconcile k8s nodes to create and update GPUNode +// +//nolint:gocyclo func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := log.FromContext(ctx) node := &corev1.Node{} @@ -86,12 +86,15 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, err } if !matched { - // delete gpunode if no matched pool - if err := r.Delete(ctx, &tfv1.GPUNode{ - ObjectMeta: metav1.ObjectMeta{ - Name: node.Name, - }, - }); err != nil { + existingGPUNode := &tfv1.GPUNode{} + if err := r.Get(ctx, client.ObjectKey{Name: node.Name}, existingGPUNode); err != nil { + if errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("can not get gpuNode(%s) : %w", node.Name, err) + } + // delete existing gpunode if no matched pool + if err := r.Delete(ctx, existingGPUNode); err != nil { // requeue if the gpunode is not generated if errors.IsNotFound(err) { return ctrl.Result{}, nil @@ -121,6 +124,14 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, nil } + // update k8s node hash + hash := utils.GetObjectHash(pool.Spec.NodeManagerConfig.NodeSelector) + if node.Labels[constants.LabelNodeSelectorHash] != hash { + if err := UpdateK8SNodeSelectorHash(ctx, r.Client, node, hash); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to update k8s node hash: %w", err) + } + } + provisioningMode := pool.Spec.NodeManagerConfig.ProvisioningMode isDirectManagedMode := provisioningMode == tfv1.ProvisioningModeProvisioned isManagedNode := isDirectManagedMode || provisioningMode == tfv1.ProvisioningModeKarpenter @@ -199,11 +210,7 @@ func (r *NodeReconciler) generateGPUNode(node *corev1.Node, pool *tfv1.GPUPool, // SetupWithManager sets up the controller with the Manager. func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error { // must choose an initial label selector to avoid performance impact in large Kubernetes clusters - selector := os.Getenv("INITIAL_GPU_NODE_LABEL_SELECTOR") - if selector == "" { - selector = constants.InitialGPUNodeSelector - } - selectors := strings.Split(selector, "=") + selectors := utils.GetInitialGPUNodeSelector() p, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{ MatchLabels: map[string]string{ selectors[0]: selectors[1], diff --git a/internal/controller/pod_controller.go b/internal/controller/pod_controller.go index ab335948..a7bf7c2f 100644 --- a/internal/controller/pod_controller.go +++ b/internal/controller/pod_controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "strconv" + "time" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" "github.com/NexusGPU/tensor-fusion/internal/constants" @@ -66,6 +67,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R if err := r.Get(ctx, req.NamespacedName, pod); err != nil { if errors.IsNotFound(err) { r.Allocator.DeallocByPodIdentifier(ctx, req.NamespacedName) + metrics.RemoveWorkerMetrics(req.Name, time.Now()) log.Info("Released GPU resources when pod deleted", "pod", req.NamespacedName) return ctrl.Result{}, nil } @@ -106,8 +108,9 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R } if pod.Labels[constants.LabelComponent] == constants.ComponentWorker { - metrics.SetWorkerMetricsByWorkload(pod) - + if pod.DeletionTimestamp.IsZero() { + metrics.SetWorkerMetricsByWorkload(pod) + } shouldReturn, err := r.handleWorkerPodFinalizer(ctx, pod) if err != nil { return ctrl.Result{}, err @@ -148,7 +151,8 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R func (r *PodReconciler) handleWorkerPodFinalizer(ctx context.Context, pod *corev1.Pod) (bool, error) { // Handle our GPU resource cleanup finalizer shouldReturn, err := utils.HandleFinalizer(ctx, pod, r.Client, func(ctx context.Context, obj *corev1.Pod) (bool, error) { - metrics.RemoveWorkerMetrics(pod.Name, pod.DeletionTimestamp.Time) + // if the Pod keep terminating, should update deletion timestamp for raw cost calculation + metrics.RemoveWorkerMetrics(pod.Name, time.Now()) counter := &v1.TensorFusionPodCounter{Client: r.Client} if err := counter.Decrease(ctx, pod); err != nil { return false, err diff --git a/internal/controller/pod_controller_test.go b/internal/controller/pod_controller_test.go index b36f140f..cf53d119 100644 --- a/internal/controller/pod_controller_test.go +++ b/internal/controller/pod_controller_test.go @@ -230,9 +230,6 @@ var _ = Describe("Pod Controller", func() { }, } _ = k8sClient.Delete(ctx, connection) - Eventually(func() error { - return k8sClient.Get(ctx, client.ObjectKeyFromObject(connection), connection) - }).Should(Satisfy(errors.IsNotFound)) }) It("should successfully create TensorFusion connection for client pod", func() { diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 388b938f..0ba3228a 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -180,9 +180,10 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) err = (&GPUNodeReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("GPUNode"), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("GPUNode"), + Allocator: allocator, }).SetupWithManager(mgr) Expect(err).ToNot(HaveOccurred()) diff --git a/internal/controller/tensorfusioncluster_controller.go b/internal/controller/tensorfusioncluster_controller.go index a2f8ba12..3c64429e 100644 --- a/internal/controller/tensorfusioncluster_controller.go +++ b/internal/controller/tensorfusioncluster_controller.go @@ -20,7 +20,6 @@ import ( "context" "fmt" "strconv" - "strings" "sync" "golang.org/x/time/rate" @@ -43,7 +42,6 @@ import ( "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/NexusGPU/tensor-fusion/internal/metrics" utils "github.com/NexusGPU/tensor-fusion/internal/utils" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -305,7 +303,7 @@ func (r *TensorFusionClusterReconciler) reconcileGPUPool(ctx context.Context, tf } err = r.Create(ctx, gpupool) anyPoolChanged = true - r.updateMetricsRecorder(ctx, gpupool) + r.MetricsRecorder.UpdateMetricsRecorder(gpupool, true) if err != nil { errors = append(errors, fmt.Errorf("failed to create GPUPool %s: %w", key, err)) continue @@ -328,7 +326,7 @@ func (r *TensorFusionClusterReconciler) reconcileGPUPool(ctx context.Context, tf } anyPoolChanged = true } - r.updateMetricsRecorder(ctx, existingPool) + r.MetricsRecorder.UpdateMetricsRecorder(existingPool, specChanged) } } @@ -382,7 +380,6 @@ func (r *TensorFusionClusterReconciler) checkTFClusterComponentsReady(ctx contex constants.LabelKeyOwner: tfc.GetName(), })) if err != nil { - r.Recorder.Eventf(tfc, corev1.EventTypeWarning, "CheckComponentStatusError", err.Error()) return false, nil, fmt.Errorf("failed to list GPUPools: %w", err) } if len(pools.Items) != len(tfc.Spec.GPUPools) { @@ -411,7 +408,6 @@ func (r *TensorFusionClusterReconciler) updateTFClusterStatus(ctx context.Contex } } if err := r.Status().Update(ctx, tfc); err != nil { - r.Recorder.Eventf(tfc, corev1.EventTypeWarning, "UpdateClusterStatusError", err.Error()) return err } return nil @@ -443,34 +439,3 @@ func (r *TensorFusionClusterReconciler) SetupWithManager(mgr ctrl.Manager, addLi Owns(&tfv1.GPUPool{}). Complete(r) } - -// Update metrics recorder's raw billing map -func (r *TensorFusionClusterReconciler) updateMetricsRecorder(ctx context.Context, pool *tfv1.GPUPool) { - const dollarSign = "$" - log := log.FromContext(ctx) - if pool.Spec.QosConfig == nil { - log.Info("QosConfig is nil, skip updating metrics recorder", "pool", pool.Name) - return - } - - qosConfig := pool.Spec.QosConfig - if _, ok := r.MetricsRecorder.WorkerUnitPriceMap[pool.Name]; !ok { - r.MetricsRecorder.WorkerUnitPriceMap[pool.Name] = make(map[string]metrics.RawBillingPricing) - } - pricingDetail := r.MetricsRecorder.WorkerUnitPriceMap[pool.Name] - for _, pricing := range qosConfig.Pricing { - tflopsPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerFP16TFlopsPerHour, dollarSign), 64) - vramPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerGBOfVRAMPerHour, dollarSign), 64) - limitOverRequestChargingRatio, _ := strconv.ParseFloat(pricing.LimitsOverRequestsChargingRatio, 64) - - pricingDetail[string(pricing.Qos)] = metrics.RawBillingPricing{ - TflopsPerSecond: tflopsPerHour / float64(3600), - VramPerSecond: vramPerHour / float64(3600), - - TflopsOverRequestPerSecond: tflopsPerHour / float64(3600) * limitOverRequestChargingRatio, - VramOverRequestPerSecond: vramPerHour / float64(3600) * limitOverRequestChargingRatio, - } - } - - log.V(5).Info("Updated metrics recorder", "pool", pool.Name, "pricing", pricingDetail) -} diff --git a/internal/controller/tensorfusionworkload_controller.go b/internal/controller/tensorfusionworkload_controller.go index 1ec0d722..bc8ced78 100644 --- a/internal/controller/tensorfusionworkload_controller.go +++ b/internal/controller/tensorfusionworkload_controller.go @@ -347,7 +347,6 @@ func (r *TensorFusionWorkloadReconciler) updateStatus( readyCondition.Status = metav1.ConditionFalse readyCondition.Reason = "WorkerFailed" readyCondition.Message = fmt.Sprintf("Failed workers num: %d", failedWorkers) - r.Recorder.Eventf(workload, corev1.EventTypeWarning, "WorkerFailed", "Failed workers num: %d", failedWorkers) } else if workload.Spec.IsDynamicReplica() { // for dynamic replicas, if no worker failed, indicate workload is running phase = tfv1.TensorFusionWorkloadPhaseRunning diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter.go b/internal/gpuallocator/filter/cel_filter/cel_filter.go index a9369535..18a0d176 100644 --- a/internal/gpuallocator/filter/cel_filter/cel_filter.go +++ b/internal/gpuallocator/filter/cel_filter/cel_filter.go @@ -3,29 +3,533 @@ package cel_filter import ( "context" "fmt" - "time" + "reflect" + "regexp" + "runtime" + "strings" + "sync" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" + "github.com/NexusGPU/tensor-fusion/internal/utils" "github.com/google/cel-go/cel" "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" + "github.com/google/cel-go/common/types/traits" + "github.com/google/cel-go/interpreter" + "github.com/samber/lo" "sigs.k8s.io/controller-runtime/pkg/log" ) +// Parallel processing constants +const ( + // Threshold for enabling parallel processing + ParallelThreshold = 2000 + // Default number of worker goroutines + DefaultWorkerCount = 4 +) + +// Global string pool for GPU Phase values to reduce allocations +var ( + gpuPhaseStringPool = sync.OnceValue(func() map[string]types.String { + return map[string]types.String{ + constants.PhaseUnknown: types.String(constants.PhaseUnknown), + constants.PhasePending: types.String(constants.PhasePending), + constants.PhaseUpdating: types.String(constants.PhaseUpdating), + constants.PhaseRunning: types.String(constants.PhaseRunning), + constants.PhaseMigrating: types.String(constants.PhaseMigrating), + constants.PhaseDestroying: types.String(constants.PhaseDestroying), + } + }) +) + +// getPooledPhaseString returns a pooled CEL String for the given phase +func getPooledPhaseString(phase string) ref.Val { + pool := gpuPhaseStringPool() + if pooled, exists := pool[phase]; exists { + return pooled + } + // Return error for unexpected phase values + return types.NewErr("unknown GPU phase: %s", phase) +} + +// fieldUsage tracks which GPU fields are used in the expression +type fieldUsage struct { + labels bool + annotations bool + available bool + nodeSelector bool + runningApps bool +} + +// FastPathPredicate represents a compiled fast-path predicate function +type FastPathPredicate func(gpu *tfv1.GPU) bool + +// ExpressionPattern represents a recognized expression pattern for fast path +type ExpressionPattern struct { + Pattern *regexp.Regexp + Generator func(matches []string) FastPathPredicate +} + +// ZeroAllocActivation provides zero-allocation variable resolution for CEL +// This eliminates the need to create map[string]interface{} for each GPU +type ZeroAllocActivation struct { + gpuVal gpuVal + workerPodKey workerPodKeyVal + usage fieldUsage +} + +func (a *ZeroAllocActivation) init(g *tfv1.GPU, k tfv1.NameNamespace, usage fieldUsage) { + a.gpuVal.GPU = g + a.gpuVal.labels = nil + a.gpuVal.annotations = nil + a.gpuVal.nodeSelector = nil + a.gpuVal.available = nil + a.gpuVal.runningApps = nil + a.workerPodKey.name = k.Name + a.workerPodKey.namespace = k.Namespace + a.usage = usage +} + +// ResolveName implements interpreter.Activation interface +func (a *ZeroAllocActivation) ResolveName(name string) (interface{}, bool) { + switch name { + case CELVarGPU: + return &a.gpuVal, true + case CELVarWorkerPodKey: + return &a.workerPodKey, true + default: + return nil, false + } +} + +// Parent implements interpreter.Activation interface +func (a *ZeroAllocActivation) Parent() interpreter.Activation { + return nil +} + +type workerPodKeyVal struct { + name string + namespace string +} + +func (w *workerPodKeyVal) Type() ref.Type { return types.MapType } +func (w *workerPodKeyVal) Value() interface{} { + return map[string]string{"name": w.name, "namespace": w.namespace} +} +func (w *workerPodKeyVal) Equal(other ref.Val) ref.Val { return types.False } +func (w *workerPodKeyVal) ConvertToNative(t reflect.Type) (interface{}, error) { + return map[string]string{"name": w.name, "namespace": w.namespace}, nil +} +func (w *workerPodKeyVal) ConvertToType(typeValue ref.Type) ref.Val { + return types.NewErr("type conversion not supported") +} +func (w *workerPodKeyVal) Get(index ref.Val) ref.Val { + key, ok := index.Value().(string) + if !ok { + return types.NewErr("index must be string") + } + switch key { + case GPUFieldName: + return types.String(w.name) + case GPUFieldNamespace: + return types.String(w.namespace) + default: + return types.String("") + } +} +func (w *workerPodKeyVal) HasField(field string) bool { + return field == GPUFieldName || field == GPUFieldNamespace +} + +type appVal struct { + name string + namespace string + count int64 +} + +func (a *appVal) Type() ref.Type { return types.MapType } +func (a *appVal) Value() interface{} { return nil } +func (a *appVal) Equal(other ref.Val) ref.Val { return types.False } +func (a *appVal) ConvertToNative(t reflect.Type) (interface{}, error) { + return map[string]interface{}{ + "name": a.name, + "namespace": a.namespace, + "count": a.count, + }, nil +} +func (a *appVal) ConvertToType(typeValue ref.Type) ref.Val { + return types.NewErr("type conversion not supported") +} +func (a *appVal) Get(index ref.Val) ref.Val { + key, _ := index.Value().(string) + switch key { + case "name": + return types.String(a.name) + case "namespace": + return types.String(a.namespace) + case "count": + return types.Int(a.count) + default: + return types.String("") + } +} +func (a *appVal) HasField(field string) bool { + return field == "name" || field == "namespace" || field == "count" +} + +type runningAppsVal struct { + apps []tfv1.RunningAppDetail +} + +func (r *runningAppsVal) Type() ref.Type { return types.ListType } +func (r *runningAppsVal) Value() interface{} { return r.apps } +func (r *runningAppsVal) Equal(other ref.Val) ref.Val { return types.False } +func (r *runningAppsVal) ConvertToNative(t reflect.Type) (interface{}, error) { + if t.Kind() == reflect.Slice { + out := make([]map[string]interface{}, len(r.apps)) + for i, a := range r.apps { + out[i] = map[string]interface{}{ + "name": a.Name, + "namespace": a.Namespace, + "count": a.Count, + } + } + return out, nil + } + return r.apps, nil +} +func (r *runningAppsVal) ConvertToType(typeValue ref.Type) ref.Val { + return types.NewErr("type conversion not supported") +} +func (r *runningAppsVal) Get(index ref.Val) ref.Val { + i, ok := index.Value().(int) + if !ok { + if i64, ok2 := index.Value().(int64); ok2 { + i = int(i64) + ok = true + } + } + if !ok || i < 0 || i >= len(r.apps) { + return types.NewErr("index out of range") + } + app := r.apps[i] + return &appVal{name: app.Name, namespace: app.Namespace, count: int64(app.Count)} +} + +func (r *runningAppsVal) Size() ref.Val { return types.Int(len(r.apps)) } + +func (r *runningAppsVal) Contains(elem ref.Val) ref.Val { + av, ok := elem.(*appVal) + if !ok { + return types.False + } + for _, a := range r.apps { + if a.Name == av.name && a.Namespace == av.namespace && int64(a.Count) == av.count { + return types.True + } + } + return types.False +} +func (r *runningAppsVal) Iterator() traits.Iterator { + return &runningAppsIterator{apps: r.apps} +} +func (r *runningAppsVal) Add(elem ref.Val) ref.Val { + return types.NewErr("runningApps list is read-only") +} + +type runningAppsIterator struct { + apps []tfv1.RunningAppDetail + i int +} + +func (it *runningAppsIterator) Type() ref.Type { return types.IteratorType } +func (it *runningAppsIterator) Value() interface{} { return nil } +func (it *runningAppsIterator) Equal(other ref.Val) ref.Val { return types.False } +func (it *runningAppsIterator) ConvertToNative(t reflect.Type) (interface{}, error) { + return nil, fmt.Errorf("iterator cannot convert to native") +} +func (it *runningAppsIterator) ConvertToType(typeValue ref.Type) ref.Val { + return types.NewErr("type conversion not supported") +} +func (it *runningAppsIterator) HasNext() ref.Val { + return types.Bool(it.i < len(it.apps)) +} +func (it *runningAppsIterator) Next() ref.Val { + if it.i >= len(it.apps) { + return types.NewErr("iterator past end") + } + a := it.apps[it.i] + it.i++ + return &appVal{name: a.Name, namespace: a.Namespace, count: int64(a.Count)} +} + +var _ traits.Lister = (*runningAppsVal)(nil) +var _ traits.Iterator = (*runningAppsIterator)(nil) + +// gpuVal implements CEL value interface for GPU objects to eliminate map allocations +type gpuVal struct { + *tfv1.GPU + // Cached sub-values to avoid repeated allocations + labels ref.Val + annotations ref.Val + nodeSelector ref.Val + available ref.Val + runningApps ref.Val +} + +// Type implements ref.Val interface +func (v *gpuVal) Type() ref.Type { + return types.MapType +} + +// Value implements ref.Val interface +func (v *gpuVal) Value() interface{} { + return v.GPU +} + +// Equal implements ref.Val interface +func (v *gpuVal) Equal(other ref.Val) ref.Val { + if otherGPU, ok := other.(*gpuVal); ok { + return types.Bool(v.UID == otherGPU.UID) + } + return types.False +} + +// ConvertToNative implements ref.Val interface +func (v *gpuVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) { + return v.GPU, nil +} + +// ConvertToType implements ref.Val interface +func (v *gpuVal) ConvertToType(typeValue ref.Type) ref.Val { + switch typeValue { + case types.TypeType: + return types.MapType + default: + return types.NewErr("type conversion error") + } +} + +// HasField implements traits.FieldTester interface +func (v *gpuVal) HasField(field string) bool { + switch field { + case GPUFieldName, GPUFieldNamespace, GPUFieldGPUModel, GPUFieldUUID, + GPUFieldPhase, GPUFieldUsedBy, GPUFieldMessage, GPUFieldLabels, + GPUFieldAnnotations, GPUFieldAvailable, GPUFieldNodeSelector, GPUFieldRunningApps: + return true + default: + return false + } +} + +// Get implements traits.Indexer interface for field access with lazy caching +func (v *gpuVal) Get(index ref.Val) ref.Val { + field, ok := index.Value().(string) + if !ok { + return types.NewErr("index must be string") + } + + switch field { + case GPUFieldName: + return types.String(v.Name) + case GPUFieldNamespace: + return types.String(v.Namespace) + case GPUFieldGPUModel: + return types.String(v.Status.GPUModel) + case GPUFieldUUID: + return types.String(v.Status.UUID) + case GPUFieldPhase: + return getPooledPhaseString(string(v.Status.Phase)) + case GPUFieldUsedBy: + return types.String(string(v.Status.UsedBy)) + case GPUFieldMessage: + return types.String(v.Status.Message) + case GPUFieldLabels: + // Lazy initialization with caching + if v.labels == nil { + v.labels = &labelsVal{labels: v.Labels} + } + return v.labels + case GPUFieldAnnotations: + // Lazy initialization with caching + if v.annotations == nil { + v.annotations = &labelsVal{labels: v.Annotations} + } + return v.annotations + case GPUFieldAvailable: + // Lazy initialization with caching + if v.available == nil { + v.available = &availableVal{available: v.Status.Available} + } + return v.available + case GPUFieldNodeSelector: + // Lazy initialization with caching + if v.nodeSelector == nil { + v.nodeSelector = &labelsVal{labels: v.Status.NodeSelector} + } + return v.nodeSelector + case GPUFieldRunningApps: + // For now, keep simple implementation - can optimize later if needed + if v.runningApps == nil { + apps := make([]tfv1.RunningAppDetail, len(v.Status.RunningApps)) + for i, app := range v.Status.RunningApps { + apps[i] = *app + } + v.runningApps = &runningAppsVal{apps: apps} + } + return v.runningApps + default: + return types.NewErr("no such field: %s", field) + } +} + +// availableVal provides direct access to GPU available resources without maps +type availableVal struct { + available *tfv1.Resource +} + +// Type implements ref.Val interface +func (v *availableVal) Type() ref.Type { + return types.MapType +} + +// Value implements ref.Val interface +func (v *availableVal) Value() interface{} { + return v.available +} + +// Equal implements ref.Val interface +func (v *availableVal) Equal(other ref.Val) ref.Val { + return types.False // Not used in comparisons +} + +// ConvertToNative implements ref.Val interface +func (v *availableVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) { + return v.available, nil +} + +// ConvertToType implements ref.Val interface +func (v *availableVal) ConvertToType(typeValue ref.Type) ref.Val { + return types.NewErr("type conversion not supported") +} + +// Get implements field access for available resources +func (v *availableVal) Get(index ref.Val) ref.Val { + field, ok := index.Value().(string) + if !ok { + return types.NewErr("index must be string") + } + + if v.available == nil { + switch field { + case ResourceFieldTFlops: + return types.Double(0.0) + case ResourceFieldVRAM: + return types.Double(0.0) + default: + return types.NewErr("no such field: %s", field) + } + } + + switch field { + case ResourceFieldTFlops: + return types.Double(v.available.Tflops.AsApproximateFloat64()) + case ResourceFieldVRAM: + return types.Double(float64(v.available.Vram.Value())) + default: + return types.NewErr("no such field: %s", field) + } +} + +// HasField implements field testing +func (v *availableVal) HasField(field string) bool { + return field == ResourceFieldTFlops || field == ResourceFieldVRAM +} + +// labelsVal provides direct access to GPU labels without copying +type labelsVal struct { + labels map[string]string +} + +// Type implements ref.Val interface +func (v *labelsVal) Type() ref.Type { + return types.MapType +} + +// Value implements ref.Val interface +func (v *labelsVal) Value() interface{} { + return v.labels +} + +// Equal implements ref.Val interface +func (v *labelsVal) Equal(other ref.Val) ref.Val { + return types.False // Not used in comparisons +} + +// ConvertToNative implements ref.Val interface +func (v *labelsVal) ConvertToNative(typeDesc reflect.Type) (interface{}, error) { + return v.labels, nil +} + +// ConvertToType implements ref.Val interface +func (v *labelsVal) ConvertToType(typeValue ref.Type) ref.Val { + return types.NewErr("type conversion not supported") +} + +// Get implements map access for labels +func (v *labelsVal) Get(index ref.Val) ref.Val { + key, ok := index.Value().(string) + if !ok { + return types.NewErr("index must be string") + } + + if v.labels == nil { + return types.String("") + } + + value, exists := v.labels[key] + if !exists { + return types.String("") + } + return types.String(value) +} + // AllocRequestCELFilter converts AllocRequest to CEL filter and executes it type CELFilter struct { - cache *ExpressionCache - expression string - name string + cache *ExpressionCache + name string + // Store early filtering criteria for optimization + requiredPhases []tfv1.TensorFusionGPUPhase + requiredGPUModel string + userExpression string + // Track which fields are actually used + usage fieldUsage + // Display expression for logging (read-only) + displayExpression string } // NewAllocRequestCELFilter creates a new CEL filter from allocation request func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, error) { - // Convert AllocRequest to CEL expression - expression, err := convertAllocRequestToCEL(req) - if err != nil { - return nil, fmt.Errorf("failed to convert AllocRequest to CEL: %w", err) + // Extract early filtering criteria + var requiredPhases []tfv1.TensorFusionGPUPhase + var requiredGPUModel, userExpression, displayExpression string + + if req != nil { + requiredPhases = []tfv1.TensorFusionGPUPhase{ + tfv1.TensorFusionGPUPhaseRunning, + tfv1.TensorFusionGPUPhasePending, + } + requiredGPUModel = req.GPUModel + userExpression = req.CELFilterExpression + + // Build display expression for logging (not used for execution) + displayExpression = buildDisplayExpression(req) } + // Analyze field usage in user expression only + usage := analyzeFieldUsage(userExpression) + // Handle nil request case name := "AllocRequest-unknown" if req != nil { @@ -33,9 +537,13 @@ func NewCELFilter(req *tfv1.AllocRequest, cache *ExpressionCache) (*CELFilter, e } return &CELFilter{ - cache: cache, - expression: expression, - name: name, + cache: cache, + name: name, + requiredPhases: requiredPhases, + requiredGPUModel: requiredGPUModel, + userExpression: userExpression, + usage: usage, + displayExpression: displayExpression, }, nil } @@ -51,84 +559,76 @@ func (f *CELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, return gpus, nil } - if f.expression == "" { - // If no expression, return all GPUs (no filtering needed) - return gpus, nil - } - - // Get compiled program from cache - program, err := f.cache.GetOrCompileProgram(f.expression) - if err != nil { - return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.expression, err) - } - + // Pre-allocate result slice with estimated capacity for early filtering var filteredGPUs []*tfv1.GPU + // Early filtering phase: apply basic filters first to reduce CEL evaluation overhead + earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus)) for _, gpu := range gpus { - // Create timeout context for CEL evaluation - evalCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) - - // Create variables for CEL evaluation - vars := createCELVariables(*gpu, workerPodKey) - - // Evaluate with timeout - resultChan := make(chan evalResult, 1) - go func() { - result, _, evalErr := program.Eval(vars) - resultChan <- evalResult{result: result, err: evalErr} - }() - - select { - case evalRes := <-resultChan: - cancel() - if evalRes.err != nil { - log.Error(evalRes.err, "CEL expression evaluation failed", - "expression", f.expression, - "gpu", gpu.Name, - "workerPodKey", workerPodKey) - // On error, exclude the GPU (fail-safe) - continue - } + // when running progressive migration mode, only return GPUs used by tensor-fusion + if utils.IsProgressiveMigration() && gpu.Status.UsedBy != tfv1.UsedByTensorFusion { + continue + } + // Fast path: check phase first (most common filter) + if f.requiredPhases != nil && !lo.Contains(f.requiredPhases, gpu.Status.Phase) { + continue + } - // Convert result to boolean - if boolResult, ok := evalRes.result.(types.Bool); ok { - if bool(boolResult) { - filteredGPUs = append(filteredGPUs, gpu) - } - } else { - log.Error(nil, "CEL expression did not return boolean", - "expression", f.expression, - "result", evalRes.result, - "gpu", gpu.Name) - // On non-boolean result, exclude the GPU (fail-safe) - continue - } - case <-evalCtx.Done(): - cancel() - // Timeout - skip this GPU (fail-safe behavior) - log.V(1).Info("CEL evaluation timeout", "gpu", gpu.Name, "expression", f.expression) + // Fast path: check GPU model (second most common filter) + if f.requiredGPUModel != "" && gpu.Status.GPUModel != f.requiredGPUModel { continue } + + earlyFilteredGPUs = append(earlyFilteredGPUs, gpu) + } + + // If no user expression, return early filtered results + if f.userExpression == "" { + log.V(1).Info("CEL filter applied (early filtering only)", + "filter", f.name, + "inputGPUs", len(gpus), + "earlyFilteredGPUs", len(earlyFilteredGPUs), + "outputGPUs", len(earlyFilteredGPUs)) + return earlyFilteredGPUs, nil + } + + // If no GPUs passed early filtering, return empty result + if len(earlyFilteredGPUs) == 0 { + return earlyFilteredGPUs, nil + } + + // Get compiled program from cache for user expression + program, err := f.cache.GetOrCompileProgram(f.userExpression) + if err != nil { + return nil, fmt.Errorf("failed to get CEL program for expression %q: %w", f.userExpression, err) + } + + // Use fast path if available, otherwise fall back to CEL + + // Fallback to CEL evaluation for complex expressions + if len(earlyFilteredGPUs) >= ParallelThreshold { + // Use parallel evaluation for large GPU sets + filteredGPUs = f.filterFallbackParallel(program, earlyFilteredGPUs, workerPodKey) + } else { + // Sequential evaluation for smaller sets + filteredGPUs = f.filterFallbackSequential(ctx, program, earlyFilteredGPUs, workerPodKey) } - log.V(1).Info("AllocRequest CEL filter applied", + log.V(1).Info("CEL filter applied (CEL evaluation)", "filter", f.name, - "expression", f.expression, + "displayExpression", f.displayExpression, + "userExpression", f.userExpression, "inputGPUs", len(gpus), + "earlyFilteredGPUs", len(earlyFilteredGPUs), "outputGPUs", len(filteredGPUs)) return filteredGPUs, nil } -type evalResult struct { - result interface{} - err error -} - -// convertAllocRequestToCEL converts an allocation request to a CEL expression -func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) { +// buildDisplayExpression creates a readable expression string for logging purposes only +func buildDisplayExpression(req *tfv1.AllocRequest) string { if req == nil { - return "", nil + return "" } var conditions []string @@ -138,30 +638,24 @@ func convertAllocRequestToCEL(req *tfv1.AllocRequest) (string, error) { conditions = append(conditions, req.CELFilterExpression) } - // Add GPU phase condition (must be Ready) - conditions = append(conditions, "gpu.phase == 'Ready'") - - // Add GPU model filter if specified - if req.GPUModel != "" { - conditions = append(conditions, fmt.Sprintf("gpu.gpuModel == '%s'", req.GPUModel)) - } - - // If no conditions, return empty expression (no filtering) + // If no conditions, return empty expression if len(conditions) == 0 { - return "", nil + return "" } - // Combine all conditions with AND + // Combine all conditions with AND using strings.Builder for efficiency if len(conditions) == 1 { - return conditions[0], nil + return conditions[0] } - expression := conditions[0] + var builder strings.Builder + builder.WriteString(conditions[0]) for i := 1; i < len(conditions); i++ { - expression += " && " + conditions[i] + builder.WriteString(" && ") + builder.WriteString(conditions[i]) } - return expression, nil + return builder.String() } // createCELEnvironment creates a CEL environment with GPU-related variables and functions @@ -171,58 +665,135 @@ func createCELEnvironment() (*cel.Env, error) { cel.Variable(CELVarGPU, cel.MapType(cel.StringType, cel.DynType)), // Define worker pod key cel.Variable(CELVarWorkerPodKey, cel.MapType(cel.StringType, cel.StringType)), - // Define request object structure - cel.Variable(CELVarRequest, cel.MapType(cel.StringType, cel.DynType)), ) } -// createCELVariables creates variables for CEL evaluation from GPU and request information -func createCELVariables(gpu tfv1.GPU, workerPodKey tfv1.NameNamespace) map[string]interface{} { - // Convert GPU to a map for CEL evaluation - gpuMap := map[string]interface{}{ - GPUFieldName: gpu.Name, - GPUFieldNamespace: gpu.Namespace, - GPUFieldGPUModel: gpu.Status.GPUModel, - GPUFieldUUID: gpu.Status.UUID, - GPUFieldPhase: string(gpu.Status.Phase), - GPUFieldUsedBy: string(gpu.Status.UsedBy), - GPUFieldMessage: gpu.Status.Message, - GPUFieldLabels: gpu.Labels, - GPUFieldAnnotations: gpu.Annotations, - } +// filterFallbackSequential performs sequential CEL evaluation for smaller GPU sets +func (f *CELFilter) filterFallbackSequential(ctx context.Context, program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU { + filteredGPUs := make([]*tfv1.GPU, 0, len(gpus)/2) + log := log.FromContext(ctx) + var activation ZeroAllocActivation + for i, gpu := range gpus { + // Periodic context check every 64 GPUs for very large sets + if i&63 == 0 { + select { + case <-ctx.Done(): + log.V(1).Info("CEL evaluation cancelled", "processedGPUs", len(filteredGPUs), "totalGPUs", len(gpus)) + return filteredGPUs + default: + } + } - // Add available information if available - if gpu.Status.Available != nil { - gpuMap[GPUFieldAvailable] = map[string]interface{}{ - ResourceFieldTFlops: gpu.Status.Available.Tflops.AsApproximateFloat64(), - ResourceFieldVRAM: gpu.Status.Available.Vram.AsApproximateFloat64(), + // Use zero-allocation activation instead of maps + activation.init(gpu, workerPodKey, f.usage) + + // Direct synchronous evaluation with custom activation + result, _, evalErr := program.Eval(&activation) + + if evalErr != nil { + log.Error(evalErr, "CEL expression evaluation failed", + "expression", f.userExpression, + "gpu", gpu.Name, + "workerPodKey", workerPodKey) + // On error, exclude the GPU (fail-safe) + continue + } + + // Convert result to boolean + if boolResult, ok := result.(types.Bool); ok && bool(boolResult) { + filteredGPUs = append(filteredGPUs, gpu) + } else { + log.Error(nil, "CEL expression did not return boolean", + "expression", f.userExpression, + "result", result, + "gpu", gpu.Name) + // On non-boolean result, exclude the GPU (fail-safe) + continue } } - // Add node selector information - if gpu.Status.NodeSelector != nil { - gpuMap[GPUFieldNodeSelector] = gpu.Status.NodeSelector + return filteredGPUs +} + +// filterFallbackParallel performs parallel CEL evaluation for large GPU sets +func (f *CELFilter) filterFallbackParallel(program cel.Program, gpus []*tfv1.GPU, workerPodKey tfv1.NameNamespace) []*tfv1.GPU { + numGPUs := len(gpus) + numWorkers := runtime.NumCPU() + if numWorkers > DefaultWorkerCount { + numWorkers = DefaultWorkerCount } - // Add running apps information (always set, even if empty) - runningApps := make([]map[string]interface{}, len(gpu.Status.RunningApps)) - for i, app := range gpu.Status.RunningApps { - runningApps[i] = map[string]interface{}{ - AppFieldName: app.Name, - AppFieldNamespace: app.Namespace, - AppFieldCount: app.Count, + chunkSize := (numGPUs + numWorkers - 1) / numWorkers + resultChannels := make([]<-chan []*tfv1.GPU, numWorkers) + var activation ZeroAllocActivation + // Create workers + for i := 0; i < numWorkers; i++ { + start := i * chunkSize + end := start + chunkSize + if end > numGPUs { + end = numGPUs + } + + if start >= end { + // No work for this worker + ch := make(chan []*tfv1.GPU, 1) + ch <- []*tfv1.GPU{} + close(ch) + resultChannels[i] = ch + continue } + + chunk := gpus[start:end] + resultCh := make(chan []*tfv1.GPU, 1) + resultChannels[i] = resultCh + + // Start worker goroutine + go func(gpuChunk []*tfv1.GPU, resultCh chan<- []*tfv1.GPU) { + defer close(resultCh) + + filtered := make([]*tfv1.GPU, 0, len(gpuChunk)/2) // Estimate 50% pass rate + + for _, gpu := range gpuChunk { + // Use zero-allocation activation + activation.init(gpu, workerPodKey, f.usage) + + // Direct synchronous evaluation + result, _, evalErr := program.Eval(&activation) + if evalErr != nil { + // On error, exclude the GPU (fail-safe) + continue + } + + // Convert result to boolean + if boolResult, ok := result.(types.Bool); ok && bool(boolResult) { + filtered = append(filtered, gpu) + } + // On non-boolean result, exclude the GPU (fail-safe) + } + resultCh <- filtered + }(chunk, resultCh) } - gpuMap[GPUFieldRunningApps] = runningApps - // Worker pod key information - workerPodKeyMap := map[string]string{ - PodKeyFieldName: workerPodKey.Name, - PodKeyFieldNamespace: workerPodKey.Namespace, + // Collect results + var totalFiltered []*tfv1.GPU + for _, ch := range resultChannels { + chunkResults := <-ch + totalFiltered = append(totalFiltered, chunkResults...) } - return map[string]interface{}{ - CELVarGPU: gpuMap, - CELVarWorkerPodKey: workerPodKeyMap, + return totalFiltered +} + +// analyzeFieldUsage performs simple heuristic analysis of which fields are used in the expression +func analyzeFieldUsage(expression string) fieldUsage { + if expression == "" { + return fieldUsage{} + } + return fieldUsage{ + labels: strings.Contains(expression, "labels"), + annotations: strings.Contains(expression, "annotations"), + available: strings.Contains(expression, "available") || strings.Contains(expression, "tflops") || strings.Contains(expression, "vram"), + nodeSelector: strings.Contains(expression, "nodeSelector"), + runningApps: strings.Contains(expression, "runningApps"), } } diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go index 8894db07..254baf7c 100644 --- a/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go +++ b/internal/gpuallocator/filter/cel_filter/cel_filter_benchmark_test.go @@ -7,31 +7,41 @@ import ( "time" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/NexusGPU/tensor-fusion/internal/gpuallocator/filter" + dracel "k8s.io/dynamic-resource-allocation/cel" +) + +// Test constants for repeated strings +const ( + testEnvironmentProduction = "production" + testTierHighPerformance = "high-performance" + testPriorityCritical = "critical" ) // Benchmark performance of the CEL filter compared to the original filter func BenchmarkFilterPerformance(b *testing.B) { // Create test data - const numGPUs = 1000 + const numGPUs = 1000000 gpus := make([]*tfv1.GPU, numGPUs) for i := 0; i < numGPUs; i++ { gpuModel := "A100" - if i%3 == 0 { + switch i % 3 { + case 0: gpuModel = "V100" - } else if i%3 == 1 { + case 1: gpuModel = "H100" } - phase := "Ready" + phase := constants.PhaseRunning if i%10 == 0 { - phase = "Pending" + phase = constants.PhasePending } - gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", gpuModel, phase, 150.0, 40.0) - gpu.Labels["environment"] = "production" + gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), gpuModel, phase, 150.0, 40.0) + gpu.Labels["environment"] = testEnvironmentProduction if i%2 == 0 { - gpu.Labels["tier"] = "high-performance" + gpu.Labels["tier"] = testTierHighPerformance } gpus[i] = gpu } @@ -43,7 +53,7 @@ func BenchmarkFilterPerformance(b *testing.B) { b.Run("OriginalFilters", func(b *testing.B) { // Import the original filter package registry := filter.NewFilterRegistry().With( - filter.NewPhaseFilter("Ready"), + filter.NewPhaseFilter(constants.PhaseRunning), filter.NewGPUModelFilter("A100"), ) @@ -59,7 +69,7 @@ func BenchmarkFilterPerformance(b *testing.B) { // Benchmark CEL filter - basic filtering b.Run("CELFilter_Basic", func(b *testing.B) { - request := createTestAllocRequest("default", "test-workload", "A100", "") + request := createTestAllocRequest("A100", "") cache, err := NewExpressionCache(100, 5*time.Minute) if err != nil { b.Fatal(err) @@ -82,7 +92,7 @@ func BenchmarkFilterPerformance(b *testing.B) { // Benchmark CEL filter - complex expression b.Run("CELFilter_Complex", func(b *testing.B) { - request := createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'") + request := createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '"+testEnvironmentProduction+"'") cache, err := NewExpressionCache(100, 5*time.Minute) if err != nil { b.Fatal(err) @@ -114,15 +124,16 @@ func BenchmarkFilterPerformance(b *testing.B) { "gpu.gpuModel == 'A100' && gpu.available.tflops > 100.0", "gpu.gpuModel == 'V100' && gpu.available.tflops > 80.0", "gpu.gpuModel == 'H100' && gpu.available.tflops > 180.0", - "gpu.labels['environment'] == 'production'", - "gpu.labels['tier'] == 'high-performance'", + + "gpu.labels['environment'] == '" + testEnvironmentProduction + "'", + "gpu.labels['tier'] == '" + testTierHighPerformance + "'", "gpu.available.vram > 30000000000", } b.ResetTimer() for i := 0; i < b.N; i++ { expression := expressions[i%len(expressions)] - request := createTestAllocRequest("default", "test-workload", "", expression) + request := createTestAllocRequest("", expression) celFilter, err := NewCELFilter(request, cache) if err != nil { @@ -137,6 +148,46 @@ func BenchmarkFilterPerformance(b *testing.B) { } }) + // Benchmark DRA CEL filter - basic filtering + b.Run("DRACELFilter_Basic", func(b *testing.B) { + request := createTestAllocRequest("A100", "") + cache := dracel.NewCache(100, dracel.Features{}) + + draFilter, err := NewDRACELFilter(request, cache) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + filteredGPUs, err := draFilter.Filter(ctx, workerPodKey, gpus) + if err != nil { + b.Fatal(err) + } + _ = filteredGPUs + } + }) + + // Benchmark DRA CEL filter - complex expression + b.Run("DRACELFilter_Complex", func(b *testing.B) { + request := createTestAllocRequest("", "device.attributes['model'].string == 'A100' && device.attributes['label.environment'].string == '"+testEnvironmentProduction+"'") + cache := dracel.NewCache(100, dracel.Features{}) + + draFilter, err := NewDRACELFilter(request, cache) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + filteredGPUs, err := draFilter.Filter(ctx, workerPodKey, gpus) + if err != nil { + b.Fatal(err) + } + _ = filteredGPUs + } + }) + // Print performance comparison report after benchmarks printPerformanceComparison(b) } @@ -148,7 +199,7 @@ func BenchmarkCachePerformance(b *testing.B) { b.Fatal(err) } - expression := "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0" + expression := "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0" b.Run("CacheHit", func(b *testing.B) { // Pre-warm cache @@ -169,7 +220,7 @@ func BenchmarkCachePerformance(b *testing.B) { b.Run("CacheMiss", func(b *testing.B) { expressions := make([]string, b.N) for i := 0; i < b.N; i++ { - expressions[i] = fmt.Sprintf("gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50) + expressions[i] = fmt.Sprintf("gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= %d.0", i%200+50) } b.ResetTimer() @@ -187,10 +238,10 @@ func BenchmarkExpressionComplexity(b *testing.B) { const numGPUs = 100 gpus := make([]*tfv1.GPU, numGPUs) for i := 0; i < numGPUs; i++ { - gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "default", "A100", "Ready", 150.0, 40.0) - gpu.Labels["environment"] = "production" - gpu.Labels["tier"] = "high-performance" - gpu.Annotations["priority"] = "critical" + gpu := createTestGPU(fmt.Sprintf("gpu-%d", i), "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Labels["environment"] = testEnvironmentProduction + gpu.Labels["tier"] = testTierHighPerformance + gpu.Annotations["priority"] = testPriorityCritical gpus[i] = gpu } @@ -203,23 +254,23 @@ func BenchmarkExpressionComplexity(b *testing.B) { }{ { name: "Simple", - expression: "gpu.phase == 'Ready'", + expression: "gpu.phase == 'Running'", }, { name: "Medium", - expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100'", + expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100'", }, { name: "Complex", - expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0", + expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0", }, { name: "VeryComplex", - expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'", + expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '" + testEnvironmentProduction + "'", }, { name: "UltraComplex", - expression: "gpu.phase == 'Ready' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production' && gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'", + expression: "gpu.phase == 'Running' && gpu.gpuModel == 'A100' && gpu.available.tflops >= 150.0 && gpu.labels['environment'] == '" + testEnvironmentProduction + "' && gpu.labels['tier'] == '" + testTierHighPerformance + "' && gpu.annotations['priority'] == '" + testPriorityCritical + "'", }, } @@ -230,7 +281,7 @@ func BenchmarkExpressionComplexity(b *testing.B) { b.Fatal(err) } - request := createTestAllocRequest("default", "test-workload", "", tc.expression) + request := createTestAllocRequest("", tc.expression) celFilter, err := NewCELFilter(request, cache) if err != nil { b.Fatal(err) @@ -254,7 +305,7 @@ func printPerformanceComparison(b *testing.B) { === GPU Filter Performance Comparison === Test Environment: -- Number of GPUs: 1000 +- Number of GPUs: 1000000 - GPU Models: A100 (33%%), V100 (33%%), H100 (33%%) - GPU Phases: Ready (90%%), Pending (10%%) diff --git a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go index c21e4ee8..72481ee9 100644 --- a/internal/gpuallocator/filter/cel_filter/cel_filter_test.go +++ b/internal/gpuallocator/filter/cel_filter/cel_filter_test.go @@ -6,18 +6,25 @@ import ( "time" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// Test constants for repeated strings used only in cel_filter_test.go +const ( + testEnvProduction = "production" + testPriorCritical = "critical" +) + // Helper functions for creating test data -func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64) *tfv1.GPU { +func createTestGPU(name, gpuModel, phase string, tflops, vram float64) *tfv1.GPU { gpu := &tfv1.GPU{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: namespace, + Namespace: "default", Labels: make(map[string]string), Annotations: make(map[string]string), }, @@ -40,11 +47,11 @@ func createTestGPU(name, namespace, gpuModel, phase string, tflops, vram float64 return gpu } -func createTestAllocRequest(namespace, name, gpuModel, celExpression string) *tfv1.AllocRequest { +func createTestAllocRequest(gpuModel, celExpression string) *tfv1.AllocRequest { return &tfv1.AllocRequest{ WorkloadNameNamespace: tfv1.NameNamespace{ - Name: name, - Namespace: namespace, + Name: "test-workload", + Namespace: "default", }, GPUModel: gpuModel, CELFilterExpression: celExpression, @@ -65,71 +72,71 @@ func TestCELFilter_NormalCases(t *testing.T) { }{ { name: "filter by GPU model", - request: createTestAllocRequest("default", "test-workload", "A100", ""), + request: createTestAllocRequest("A100", ""), gpus: []*tfv1.GPU{ - createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), - createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), - createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0), + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), + createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0), + createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), }, expectedCount: 2, description: "Should filter GPUs matching the specified model A100", }, { name: "filter by GPU phase only", - request: createTestAllocRequest("default", "test-workload", "", ""), + request: createTestAllocRequest("", ""), gpus: []*tfv1.GPU{ - createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), - createTestGPU("gpu-2", "default", "A100", "Pending", 150.0, 40.0), - createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0), - createTestGPU("gpu-4", "default", "A100", "Failed", 150.0, 40.0), + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), + createTestGPU("gpu-2", "A100", constants.PhasePending, 150.0, 40.0), + createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), + createTestGPU("gpu-4", "A100", constants.PhaseFailed, 150.0, 40.0), }, - expectedCount: 2, - description: "Should only return GPUs in Ready phase", + expectedCount: 3, + description: "Should return GPUs in Running and Pending phases", }, { name: "custom CEL expression - filter by available TFLOPS", - request: createTestAllocRequest("default", "test-workload", "", "gpu.available.tflops > 120.0"), + request: createTestAllocRequest("", "gpu.available.tflops > 120.0"), gpus: []*tfv1.GPU{ - createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), - createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), - createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0), + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), + createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0), + createTestGPU("gpu-3", "H100", constants.PhaseRunning, 200.0, 80.0), }, expectedCount: 2, - description: "Should filter GPUs with TFLOPS > 120 and Ready phase", + description: "Should filter GPUs with TFLOPS > 120 and Running/Pending phase", }, { name: "custom CEL expression - filter by available VRAM", - request: createTestAllocRequest("default", "test-workload", "", "gpu.available.vram > 35000000000"), // > 35GB in bytes + request: createTestAllocRequest("", "gpu.available.vram > 35000000000"), // > 35GB in bytes gpus: []*tfv1.GPU{ - createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), // 40GB - createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), // 32GB - createTestGPU("gpu-3", "default", "H100", "Ready", 200.0, 80.0), // 80GB + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), // 40GB + createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0), // 32GB + createTestGPU("gpu-3", "H100", constants.PhaseRunning, 200.0, 80.0), // 80GB }, expectedCount: 2, - description: "Should filter GPUs with VRAM > 35GB and Ready phase", + description: "Should filter GPUs with VRAM > 35GB and Running/Pending phase", }, { name: "combined model and custom CEL expression", - request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0"), + request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0"), gpus: []*tfv1.GPU{ - createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), - createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0), - createTestGPU("gpu-3", "default", "V100", "Ready", 160.0, 32.0), - createTestGPU("gpu-4", "default", "A100", "Ready", 180.0, 40.0), + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), + createTestGPU("gpu-2", "A100", constants.PhaseRunning, 120.0, 40.0), + createTestGPU("gpu-3", "V100", constants.PhaseRunning, 160.0, 32.0), + createTestGPU("gpu-4", "A100", constants.PhaseRunning, 180.0, 40.0), }, expectedCount: 2, - description: "Should filter A100 GPUs with TFLOPS >= 150 and Ready phase", + description: "Should filter A100 GPUs with TFLOPS >= 150 and Running/Pending phase", }, { name: "filter by labels", - request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['environment'] == 'production'"), + request: createTestAllocRequest("", "gpu.labels['environment'] == '"+testEnvProduction+"'"), gpus: func() []*tfv1.GPU { - gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0) - gpu1.Labels["environment"] = "production" - gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0) + gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu1.Labels["environment"] = testEnvProduction + gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) gpu2.Labels["environment"] = "development" - gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0) - gpu3.Labels["environment"] = "production" + gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu3.Labels["environment"] = testEnvProduction return []*tfv1.GPU{gpu1, gpu2, gpu3} }(), expectedCount: 2, @@ -137,14 +144,14 @@ func TestCELFilter_NormalCases(t *testing.T) { }, { name: "filter by annotations", - request: createTestAllocRequest("default", "test-workload", "", "gpu.annotations['priority'] == 'critical'"), + request: createTestAllocRequest("", "gpu.annotations['priority'] == '"+testPriorCritical+"'"), gpus: func() []*tfv1.GPU { - gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0) - gpu1.Annotations["priority"] = "critical" - gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0) + gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu1.Annotations["priority"] = testPriorCritical + gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) gpu2.Annotations["priority"] = "low" - gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0) - gpu3.Annotations["priority"] = "critical" + gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu3.Annotations["priority"] = testPriorCritical return []*tfv1.GPU{gpu1, gpu2, gpu3} }(), expectedCount: 2, @@ -152,15 +159,15 @@ func TestCELFilter_NormalCases(t *testing.T) { }, { name: "combined labels and annotations filter", - request: createTestAllocRequest("default", "test-workload", "", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"), + request: createTestAllocRequest("", "gpu.labels['tier'] == 'high-performance' && gpu.annotations['priority'] == 'critical'"), gpus: func() []*tfv1.GPU { - gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0) + gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0) gpu1.Labels["tier"] = "high-performance" - gpu1.Annotations["priority"] = "critical" - gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 150.0, 40.0) + gpu1.Annotations["priority"] = testPriorCritical + gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) gpu2.Labels["tier"] = "standard" gpu2.Annotations["priority"] = "critical" - gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 150.0, 40.0) + gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0) gpu3.Labels["tier"] = "high-performance" gpu3.Annotations["priority"] = "low" return []*tfv1.GPU{gpu1, gpu2, gpu3} @@ -170,25 +177,139 @@ func TestCELFilter_NormalCases(t *testing.T) { }, { name: "empty GPU list", - request: createTestAllocRequest("default", "test-workload", "A100", ""), + request: createTestAllocRequest("A100", ""), gpus: []*tfv1.GPU{}, expectedCount: 0, description: "Should handle empty GPU list gracefully", }, { name: "complex combined expression with model, resources, and metadata", - request: createTestAllocRequest("default", "test-workload", "A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"), + request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && gpu.labels['environment'] == 'production'"), gpus: func() []*tfv1.GPU { - gpu1 := createTestGPU("gpu-1", "default", "A100", "Ready", 180.0, 40.0) - gpu1.Labels["environment"] = "production" - gpu2 := createTestGPU("gpu-2", "default", "A100", "Ready", 120.0, 40.0) + gpu1 := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 180.0, 40.0) + gpu1.Labels["environment"] = testEnvProduction + gpu2 := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 120.0, 40.0) gpu2.Labels["environment"] = "production" - gpu3 := createTestGPU("gpu-3", "default", "A100", "Ready", 200.0, 40.0) + gpu3 := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 200.0, 40.0) gpu3.Labels["environment"] = "development" return []*tfv1.GPU{gpu1, gpu2, gpu3} }(), expectedCount: 1, - description: "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Ready phase", + description: "Should filter A100 GPUs with TFLOPS >= 150, production environment, and Running/Pending phase", + }, + { + name: "filter by running apps - no running apps", + request: createTestAllocRequest("", "size(gpu.runningApps) == 0"), + gpus: []*tfv1.GPU{ + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), + func() *tfv1.GPU { + gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "app1", Namespace: "default", Count: 1}, + } + return gpu + }(), + createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), + }, + expectedCount: 2, + description: "Should return GPUs with no running apps", + }, + { + name: "filter by running apps - has specific app", + request: createTestAllocRequest("", "gpu.runningApps.exists(app, app.name == 'training-job' && app.namespace == 'ml-team')"), + gpus: []*tfv1.GPU{ + func() *tfv1.GPU { + gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "training-job", Namespace: "ml-team", Count: 2}, + {Name: "other-job", Namespace: "default", Count: 1}, + } + return gpu + }(), + func() *tfv1.GPU { + gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "other-job", Namespace: "ml-team", Count: 1}, + } + return gpu + }(), + createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), + }, + expectedCount: 1, + description: "Should return GPUs running specific training job", + }, + { + name: "filter by running apps - count threshold", + request: createTestAllocRequest("", "gpu.runningApps.all(app, app.count <= 2) && size(gpu.runningApps) > 0"), + gpus: []*tfv1.GPU{ + func() *tfv1.GPU { + gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "job1", Namespace: "default", Count: 1}, + {Name: "job2", Namespace: "default", Count: 2}, + } + return gpu + }(), + func() *tfv1.GPU { + gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "job1", Namespace: "default", Count: 5}, // Count > 2 + } + return gpu + }(), + createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps + }, + expectedCount: 1, + description: "Should return GPUs where all running apps have count <= 2", + }, + { + name: "filter by running apps - complex condition", + request: createTestAllocRequest("A100", "gpu.available.tflops >= 150.0 && (size(gpu.runningApps) == 0 || gpu.runningApps.all(app, app.namespace != 'restricted'))"), + gpus: []*tfv1.GPU{ + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps + func() *tfv1.GPU { + gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "job1", Namespace: "allowed", Count: 1}, + } + return gpu + }(), + func() *tfv1.GPU { + gpu := createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "job1", Namespace: "restricted", Count: 1}, // Restricted namespace + } + return gpu + }(), + createTestGPU("gpu-4", "V100", constants.PhaseRunning, 150.0, 40.0), // Wrong model + }, + expectedCount: 2, + description: "Should return A100 GPUs with sufficient resources and no restricted apps", + }, + { + name: "filter by running apps - namespace isolation", + request: createTestAllocRequest("", "!gpu.runningApps.exists(app, app.namespace == 'tenant-a')"), + gpus: []*tfv1.GPU{ + func() *tfv1.GPU { + gpu := createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "job1", Namespace: "tenant-b", Count: 1}, + {Name: "job2", Namespace: "shared", Count: 1}, + } + return gpu + }(), + func() *tfv1.GPU { + gpu := createTestGPU("gpu-2", "A100", constants.PhaseRunning, 150.0, 40.0) + gpu.Status.RunningApps = []*tfv1.RunningAppDetail{ + {Name: "job1", Namespace: "tenant-a", Count: 1}, // Should be excluded + {Name: "job2", Namespace: "tenant-b", Count: 1}, + } + return gpu + }(), + createTestGPU("gpu-3", "A100", constants.PhaseRunning, 150.0, 40.0), // No running apps + }, + expectedCount: 2, + description: "Should return GPUs not running apps from tenant-a", }, } @@ -207,6 +328,8 @@ func TestCELFilter_NormalCases(t *testing.T) { // Verify results require.NoError(t, err, "Filter execution should not fail") + + // Debug output for complex condition test assert.Len(t, filteredGPUs, tt.expectedCount, tt.description) // Verify filter name @@ -224,11 +347,11 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) { t.Run("CEL expressions edge cases", func(t *testing.T) { // Test GPUs for execution testGPUs := []*tfv1.GPU{ - createTestGPU("gpu-1", "default", "A100", "Ready", 150.0, 40.0), - createTestGPU("gpu-2", "default", "V100", "Ready", 100.0, 32.0), + createTestGPU("gpu-1", "A100", constants.PhaseRunning, 150.0, 40.0), + createTestGPU("gpu-2", "V100", constants.PhaseRunning, 100.0, 32.0), } // Add GPU with nil resources - gpuWithNilResources := createTestGPU("gpu-nil", "default", "A100", "Ready", 0, 0) + gpuWithNilResources := createTestGPU("gpu-nil", "A100", constants.PhaseRunning, 0, 0) gpuWithNilResources.Status.Available = nil testGPUs = append(testGPUs, gpuWithNilResources) @@ -338,7 +461,7 @@ func TestCELFilter_EdgeAndExceptionCases(t *testing.T) { cache, err := NewExpressionCache(10, 5*time.Minute) require.NoError(t, err) - request := createTestAllocRequest("default", "test-workload", "", tt.expression) + request := createTestAllocRequest("", tt.expression) celFilter, err := NewCELFilter(request, cache) if tt.shouldFail { diff --git a/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go b/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go new file mode 100644 index 00000000..83b73c93 --- /dev/null +++ b/internal/gpuallocator/filter/cel_filter/dra_cel_filter.go @@ -0,0 +1,216 @@ +package cel_filter + +import ( + "context" + "encoding/json" + "fmt" + + tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" + "github.com/NexusGPU/tensor-fusion/internal/utils" + "github.com/samber/lo" + resourceapi "k8s.io/api/resource/v1" + dracel "k8s.io/dynamic-resource-allocation/cel" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// DRACELFilter implements CEL filtering using k8s.io/dynamic-resource-allocation/cel +type DRACELFilter struct { + name string + requiredPhases []tfv1.TensorFusionGPUPhase + userExpression string + cache *dracel.Cache + displayExpression string +} + +// NewDRACELFilter creates a new DRA-based CEL filter from allocation request +func NewDRACELFilter(req *tfv1.AllocRequest, cache *dracel.Cache) (*DRACELFilter, error) { + // Extract early filtering criteria + var requiredPhases []tfv1.TensorFusionGPUPhase + var userExpression, displayExpression string + + if req != nil { + requiredPhases = []tfv1.TensorFusionGPUPhase{ + tfv1.TensorFusionGPUPhaseRunning, + tfv1.TensorFusionGPUPhasePending, + } + userExpression = req.CELFilterExpression + displayExpression = buildDisplayExpression(req) + } + + // Handle nil request case + name := "AllocRequest-unknown" + if req != nil { + name = fmt.Sprintf("AllocRequest-%s", req.WorkloadNameNamespace.String()) + } + + // Validate expression if provided + if userExpression != "" && cache != nil { + result := cache.Check(userExpression) + if result.Error != nil { + return nil, fmt.Errorf("failed to compile CEL expression %q: %w", userExpression, result.Error) + } + } + + return &DRACELFilter{ + name: name, + requiredPhases: requiredPhases, + userExpression: userExpression, + cache: cache, + displayExpression: displayExpression, + }, nil +} + +// Name returns the filter name +func (f *DRACELFilter) Name() string { + return f.name +} + +// Filter applies the CEL expression to filter GPUs +func (f *DRACELFilter) Filter(ctx context.Context, workerPodKey tfv1.NameNamespace, gpus []*tfv1.GPU) ([]*tfv1.GPU, error) { + log := log.FromContext(ctx) + if len(gpus) == 0 { + return gpus, nil + } + + // Early filtering phase: apply basic filters first + earlyFilteredGPUs := make([]*tfv1.GPU, 0, len(gpus)) + for _, gpu := range gpus { + // Progressive migration mode check + if utils.IsProgressiveMigration() && gpu.Status.UsedBy != tfv1.UsedByTensorFusion { + continue + } + + // Fast path: check phase first (most common filter) + if f.requiredPhases != nil && !lo.Contains(f.requiredPhases, gpu.Status.Phase) { + continue + } + + earlyFilteredGPUs = append(earlyFilteredGPUs, gpu) + } + + // If no user expression, return early filtered results + if f.userExpression == "" { + log.V(1).Info("DRA CEL filter applied (early filtering only)", + "filter", f.name, + "inputGPUs", len(gpus), + "outputGPUs", len(earlyFilteredGPUs)) + return earlyFilteredGPUs, nil + } + + // If no GPUs passed early filtering, return empty result + if len(earlyFilteredGPUs) == 0 { + return earlyFilteredGPUs, nil + } + + // Get compiled expression from cache + compiledExpr := f.cache.GetOrCompile(f.userExpression) + if compiledExpr.Error != nil { + return nil, fmt.Errorf("failed to compile CEL expression %q: %w", f.userExpression, compiledExpr.Error) + } + + // Apply CEL filtering using DRA + filteredGPUs := make([]*tfv1.GPU, 0, len(earlyFilteredGPUs)) + for _, gpu := range earlyFilteredGPUs { + // Convert GPU to DRA Device + device, err := convertGPUToDevice(gpu) + if err != nil { + log.Error(err, "Failed to convert GPU to Device", "gpu", gpu.Name) + continue + } + + // Evaluate CEL expression + matches, details, err := compiledExpr.DeviceMatches(ctx, device) + if err != nil { + log.Error(err, "CEL expression evaluation failed", + "expression", f.userExpression, + "gpu", gpu.Name, + "details", details) + // On error, exclude the GPU (fail-safe) + continue + } + + if matches { + filteredGPUs = append(filteredGPUs, gpu) + } + } + + log.V(1).Info("DRA CEL filter applied", + "filter", f.name, + "displayExpression", f.displayExpression, + "userExpression", f.userExpression, + "inputGPUs", len(gpus), + "earlyFilteredGPUs", len(earlyFilteredGPUs), + "outputGPUs", len(filteredGPUs)) + + return filteredGPUs, nil +} + +// convertGPUToDevice converts tfv1.GPU to dracel.Device +func convertGPUToDevice(gpu *tfv1.GPU) (dracel.Device, error) { + if gpu == nil { + return dracel.Device{}, fmt.Errorf("GPU is nil") + } + + allowMultiple := true + device := dracel.Device{ + Driver: constants.DRADriverName, + AllowMultipleAllocations: &allowMultiple, + Attributes: make(map[resourceapi.QualifiedName]resourceapi.DeviceAttribute), + Capacity: make(map[resourceapi.QualifiedName]resourceapi.DeviceCapacity), + } + + // Map basic attributes + device.Attributes[GPUFieldName] = resourceapi.DeviceAttribute{StringValue: &gpu.Name} + device.Attributes[GPUFieldNamespace] = resourceapi.DeviceAttribute{StringValue: &gpu.Namespace} + model := gpu.Status.GPUModel + device.Attributes[GPUFieldGPUModel] = resourceapi.DeviceAttribute{StringValue: &model} + uuid := gpu.Status.UUID + device.Attributes[GPUFieldUUID] = resourceapi.DeviceAttribute{StringValue: &uuid} + usedBy := string(gpu.Status.UsedBy) + device.Attributes[GPUFieldUsedBy] = resourceapi.DeviceAttribute{StringValue: &usedBy} + message := gpu.Status.Message + device.Attributes[GPUFieldMessage] = resourceapi.DeviceAttribute{StringValue: &message} + + // Map labels with prefix + if len(gpu.Labels) > 0 { + for k, v := range gpu.Labels { + labelValue := v + device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldLabels, k))] = resourceapi.DeviceAttribute{StringValue: &labelValue} + } + } + + // Map annotations with prefix + if len(gpu.Annotations) > 0 { + for k, v := range gpu.Annotations { + annotationValue := v + device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldAnnotations, k))] = resourceapi.DeviceAttribute{StringValue: &annotationValue} + } + } + + // Map nodeSelector with prefix + if len(gpu.Status.NodeSelector) > 0 { + for k, v := range gpu.Status.NodeSelector { + selectorValue := v + device.Attributes[resourceapi.QualifiedName(fmt.Sprintf("%s.%s", GPUFieldNodeSelector, k))] = resourceapi.DeviceAttribute{StringValue: &selectorValue} + } + } + + // Map runningApps as JSON string + if len(gpu.Status.RunningApps) > 0 { + appsJSON, err := json.Marshal(gpu.Status.RunningApps) + if err != nil { + return dracel.Device{}, fmt.Errorf("failed to marshal runningApps: %w", err) + } + appsStr := string(appsJSON) + device.Attributes[GPUFieldRunningApps] = resourceapi.DeviceAttribute{StringValue: &appsStr} + } + + // Map capacity (tflops and vram) - DRA experimental version maintains capacity state + if gpu.Status.Capacity != nil { + device.Capacity[ResourceFieldTFlops] = resourceapi.DeviceCapacity{Value: gpu.Status.Capacity.Tflops} + device.Capacity[ResourceFieldVRAM] = resourceapi.DeviceCapacity{Value: gpu.Status.Capacity.Vram} + } + + return device, nil +} diff --git a/internal/gpuallocator/filter/cel_filter/expression_cache.go b/internal/gpuallocator/filter/cel_filter/expression_cache.go index 4065c3b9..f98fb1d1 100644 --- a/internal/gpuallocator/filter/cel_filter/expression_cache.go +++ b/internal/gpuallocator/filter/cel_filter/expression_cache.go @@ -88,6 +88,10 @@ func (c *ExpressionCache) GetOrCompileProgram(expression string) (cel.Program, e return nil, fmt.Errorf("failed to compile CEL expression %q: %w", expression, issues.Err()) } + // Validate result type - must return boolean + // Note: Skip type validation for now as CEL type system is complex + // Runtime validation in Filter method is sufficient + program, err := c.env.Program(ast) if err != nil { c.misses++ @@ -121,7 +125,7 @@ func (c *ExpressionCache) hashExpression(expression string) string { // evictLRU removes the least recently used entry from cache func (c *ExpressionCache) evictLRU() { var oldestKey string - var oldestTime time.Time = time.Now() + var oldestTime = time.Now() for key, cached := range c.cache { if cached.AccessedAt.Before(oldestTime) { diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go index 44deb3c4..21b0c5a8 100644 --- a/internal/gpuallocator/gpuallocator.go +++ b/internal/gpuallocator/gpuallocator.go @@ -31,7 +31,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/retry" - "k8s.io/kubernetes/pkg/scheduler/framework" + fwk "k8s.io/kube-scheduler/framework" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" @@ -41,6 +41,8 @@ import ( const MaxGPUCounterPerAllocation = 128 const CleanUpCheckInterval = 3 * time.Minute +var GPUCapacityMap = map[string]tfv1.Resource{} + type Strategy interface { Score(gpu *tfv1.GPU) int @@ -52,7 +54,7 @@ type SimulateSchedulingFilterDetail struct { FilterStageDetails []filter.FilterDetail } -func (p *SimulateSchedulingFilterDetail) Clone() framework.StateData { +func (p *SimulateSchedulingFilterDetail) Clone() fwk.StateData { return p } @@ -250,6 +252,37 @@ func (s *GpuAllocator) applyLegacyFilters( return filteredGPUs, filterDetails, nil } +func (s *GpuAllocator) FilterWithPreempt( + req *tfv1.AllocRequest, + preemptAllocRequests []*tfv1.AllocRequest, +) ([]*tfv1.GPU, []filter.FilterDetail, error) { + toFilterGPUs := []*tfv1.GPU{} + for _, preemptAllocRequest := range preemptAllocRequests { + for _, gpuName := range preemptAllocRequest.GPUNames { + gpu := s.gpuStore[types.NamespacedName{Name: gpuName}] + if gpu == nil { + return nil, nil, fmt.Errorf("gpu %s not found", gpuName) + } + gpuCopy := gpu.DeepCopy() + gpuCopy.Status.Available.Tflops.Add(preemptAllocRequest.Request.Tflops) + gpuCopy.Status.Available.Vram.Add(preemptAllocRequest.Request.Vram) + toFilterGPUs = append(toFilterGPUs, gpuCopy) + } + } + + filterRegistry := s.filterRegistry.With(filter.NewResourceFilter(req.Request)) + // Add GPU model filter if specified + if req.GPUModel != "" { + filterRegistry = filterRegistry.With(filter.NewGPUModelFilter(req.GPUModel)) + } + // No need to check count and other filters since it's always in the same node during each preempt trial + filteredGPUs, filterDetails, err := filterRegistry.Apply(s.ctx, req.WorkloadNameNamespace, toFilterGPUs, false) + if err != nil { + return nil, nil, fmt.Errorf("apply filters: %w", err) + } + return filteredGPUs, filterDetails, nil +} + func (s *GpuAllocator) Select(req *tfv1.AllocRequest, filteredGPUs []*tfv1.GPU) ([]*tfv1.GPU, error) { pool := &tfv1.GPUPool{} if err := s.Get(s.ctx, client.ObjectKey{Name: req.PoolName}, pool); err != nil { @@ -312,6 +345,19 @@ func (s *GpuAllocator) Bind( gpuNodeName = gpu.Status.NodeSelector[constants.KubernetesHostNameLabel] } + // Double-check resource availability to prevent over-allocation + if gpu.Status.Available == nil { + return nil, fmt.Errorf("GPU %s has nil available resources", selectedGPU) + } + if gpu.Status.Available.Tflops.Cmp(req.Request.Tflops) < 0 { + return nil, fmt.Errorf("GPU %s insufficient TFLOPs: available %s, requested %s", + selectedGPU, gpu.Status.Available.Tflops.String(), req.Request.Tflops.String()) + } + if gpu.Status.Available.Vram.Cmp(req.Request.Vram) < 0 { + return nil, fmt.Errorf("GPU %s insufficient VRAM: available %s, requested %s", + selectedGPU, gpu.Status.Available.Vram.String(), req.Request.Vram.String()) + } + // reduce available resource on the GPU status gpu.Status.Available.Tflops.Sub(req.Request.Tflops) gpu.Status.Available.Vram.Sub(req.Request.Vram) @@ -372,9 +418,8 @@ func (s *GpuAllocator) Alloc(req *tfv1.AllocRequest) ([]*tfv1.GPU, error) { func (s *GpuAllocator) CheckQuotaAndFilter(ctx context.Context, req *tfv1.AllocRequest, isSimulateSchedule bool) ([]*tfv1.GPU, []filter.FilterDetail, error) { <-s.initializedCh - // Fast quota check (fail fast if quota insufficient) if err := s.quotaStore.CheckQuotaAvailable(req.WorkloadNameNamespace.Namespace, req); err != nil { - return nil, nil, fmt.Errorf("quota check failed: %w", err) + return nil, nil, err } // Get GPUs from the pool using the in-memory store @@ -604,12 +649,13 @@ func (s *GpuAllocator) AdjustAllocation(ctx context.Context, adjustRequest tfv1. } func (s *GpuAllocator) ListNonUsingNodes() sets.Set[string] { + <-s.initializedCh set := sets.New[string]() - for nodeName, gpuNames := range s.nodeWorkerStore { + for nodeName, podNames := range s.nodeWorkerStore { // If using by TF, the node can not be used by original scheduler // If using by other scheduler, won't record as TF worker, thus the map is empty // Return non using nodes can ensure original scheduler not conflict with TF - if len(gpuNames) == 0 { + if len(podNames) == 0 { set.Insert(nodeName) } } @@ -623,6 +669,20 @@ func (s *GpuAllocator) DeallocByPodIdentifier(ctx context.Context, podIdentifier } } +func (s *GpuAllocator) GetAllocationReqByNodeName(nodeName string) []*tfv1.AllocRequest { + allocRequests := make([]*tfv1.AllocRequest, 0, 8) + for workerName := range s.nodeWorkerStore[nodeName] { + podUID := s.podNamespaceNsToPodUID[workerName.String()] + if podUID == "" { + continue + } + if request, exists := s.uniqueAllocation[podUID]; exists { + allocRequests = append(allocRequests, request) + } + } + return allocRequests +} + func (s *GpuAllocator) checkGPUCapacityAndQuota(gpu *tfv1.GPU, oldRes, newRes tfv1.Resource) (tfv1.Resource, error) { if gpu.Status.Available == nil { return tfv1.Resource{}, fmt.Errorf("GPU available is nil, skip check") @@ -909,6 +969,11 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) { defer s.storeMutex.Unlock() if s.gpuStore[key] != nil { + if gpu.Status.GPUModel != "" { + if _, exists := GPUCapacityMap[gpu.Status.GPUModel]; !exists { + GPUCapacityMap[gpu.Status.GPUModel] = *gpu.Status.Capacity + } + } syncGPUMetadataAndStatusFromCluster(s.gpuStore[key], gpu) log.V(6).Info("GPU already exists in store", "name", key.Name) return @@ -924,25 +989,7 @@ func (s *GpuAllocator) handleGPUCreate(ctx context.Context, gpu *tfv1.GPU) { } s.gpuStore[key] = gpuInMem - if gpuInMem.Status.NodeSelector != nil { - gpuNodeName := gpuInMem.Status.NodeSelector[constants.KubernetesHostNameLabel] - if gpuNodeName != "" { - if _, exists := s.nodeGpuStore[gpuNodeName]; !exists { - s.nodeGpuStore[gpuNodeName] = make(map[string]*tfv1.GPU, 4) - } - s.nodeGpuStore[gpuNodeName][gpuInMem.Name] = gpuInMem - } - } - - if gpuInMem.Labels != nil { - pool := gpuInMem.Labels[constants.GpuPoolKey] - if pool != "" { - if _, exists := s.poolGpuStore[pool]; !exists { - s.poolGpuStore[pool] = make(map[string]*tfv1.GPU, 128) - } - s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem - } - } + s.addOrUpdateGPUMaps(gpuInMem) log.Info("Added GPU to store", "name", key.Name, "phase", gpu.Status.Phase) } @@ -991,6 +1038,38 @@ func (s *GpuAllocator) handleGPUUpdate(ctx context.Context, gpu *tfv1.GPU) { s.gpuStore[key] = gpu.DeepCopy() log.V(6).Info("Updated GPU in store (new entry)", "name", key.Name, "phase", gpu.Status.Phase) } + + s.addOrUpdateGPUMaps(s.gpuStore[key]) +} + +func (s *GpuAllocator) addOrUpdateGPUMaps(gpuInMem *tfv1.GPU) { + if gpuInMem.Status.NodeSelector != nil { + gpuNodeName := gpuInMem.Status.NodeSelector[constants.KubernetesHostNameLabel] + if gpuNodeName != "" { + if _, exists := s.nodeGpuStore[gpuNodeName]; !exists { + s.nodeGpuStore[gpuNodeName] = make(map[string]*tfv1.GPU, 4) + } + s.nodeGpuStore[gpuNodeName][gpuInMem.Name] = gpuInMem + if _, exists := s.nodeWorkerStore[gpuNodeName]; !exists { + s.nodeWorkerStore[gpuNodeName] = make(map[types.NamespacedName]struct{}, 4) + } + } + + } + + if gpuInMem.Labels != nil { + pool := gpuInMem.Labels[constants.GpuPoolKey] + if pool != "" { + if _, exists := s.poolGpuStore[pool]; !exists { + s.poolGpuStore[pool] = make(map[string]*tfv1.GPU, 128) + } + s.poolGpuStore[pool][gpuInMem.Name] = gpuInMem + } + } + + if gpuInMem.Status.GPUModel != "" { + GPUCapacityMap[gpuInMem.Status.GPUModel] = *gpuInMem.Status.Capacity + } } func syncGPUMetadataAndStatusFromCluster(old *tfv1.GPU, gpu *tfv1.GPU) { @@ -1163,6 +1242,68 @@ func (s *GpuAllocator) ReconcileAllocationState() { }) } +func (s *GpuAllocator) ReconcileAllocationStateForTesting() { + s.reconcileAllocationState() +} + +func (s *GpuAllocator) CheckQuotaAndFilterSingleNodePreempt( + nodeName string, allocReq *tfv1.AllocRequest, toPreemptPods sets.Set[types.NamespacedName], +) error { + <-s.initializedCh + // Only need to check total quotas when preempting + toPreemptUsage := &tfv1.GPUResourceUsage{ + Requests: tfv1.Resource{ + Tflops: resource.Quantity{}, + Vram: resource.Quantity{}, + }, + Limits: tfv1.Resource{ + Tflops: resource.Quantity{}, + Vram: resource.Quantity{}, + }, + } + workers := s.nodeWorkerStore[nodeName] + preemptAllocRequests := make([]*tfv1.AllocRequest, 0, len(workers)) + for workerName := range workers { + if !toPreemptPods.Has(workerName) { + continue + } + podUID := s.podNamespaceNsToPodUID[workerName.String()] + if podUID == "" { + continue + } + existingAllocation := s.uniqueAllocation[podUID] + if existingAllocation == nil { + continue + } + toPreemptUsage.Requests.Tflops.Add(existingAllocation.Request.Tflops) + toPreemptUsage.Requests.Vram.Add(existingAllocation.Request.Vram) + toPreemptUsage.Limits.Tflops.Add(existingAllocation.Limit.Tflops) + toPreemptUsage.Limits.Vram.Add(existingAllocation.Limit.Vram) + preemptAllocRequests = append(preemptAllocRequests, existingAllocation) + } + + if log.FromContext(s.ctx).V(5).Enabled() { + log.FromContext(s.ctx).V(5).Info("Preempting node and check quotas", "nodeName", nodeName, "toPreemptUsage", toPreemptUsage) + } + + if err := s.quotaStore.CheckTotalQuotaRelaxed(allocReq, toPreemptUsage); err != nil { + return fmt.Errorf("quota check failed during preempt: %w", err) + } + + // Get GPUs from the pool using the in-memory store + if allocReq.PoolName == "" { + return fmt.Errorf("GPU Pool name is empty, can not find GPUs during preempt") + } + filteredGPUs, _, err := s.FilterWithPreempt(allocReq, preemptAllocRequests) + if err != nil { + return err + } + if len(filteredGPUs) < int(allocReq.Count) { + return fmt.Errorf("no gpus available or valid in pool %s after filtering during preempt", allocReq.PoolName) + } + return nil +} + func (s *GpuAllocator) reconcileAllocationState() { ctx := s.ctx logger := log.FromContext(ctx) @@ -1210,7 +1351,7 @@ func (s *GpuAllocator) reconcileAllocationState() { // No workers, but node contains GPU, need include into nodeWorkerStore with empty map gpuNodeName := gpu.Status.NodeSelector[constants.KubernetesHostNameLabel] if _, exists := s.nodeWorkerStore[gpuNodeName]; !exists { - s.nodeWorkerStore[gpuNodeName] = map[types.NamespacedName]struct{}{} + s.nodeWorkerStore[gpuNodeName] = make(map[types.NamespacedName]struct{}, 4) } } @@ -1339,6 +1480,11 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest return &tfv1.AllocRequest{}, "gpu count annotation is too large", nil } + qosLevel := tfv1.QoSLevel(pod.Annotations[constants.QoSLevelAnnotation]) + if qosLevel == "" { + qosLevel = tfv1.QoSMedium + } + disableCELFilter := false if disabledFeatures, exists := pod.Annotations[constants.DisableFeaturesAnnotation]; exists { disabledFeaturesList := strings.Split(disabledFeatures, ",") @@ -1355,7 +1501,7 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest Limit: gpuLimitResource, DisableCELFilter: disableCELFilter, - CELFilterExpression: pod.Annotations[constants.CELFilterExpressionAnnotation], + CELFilterExpression: pod.Annotations[constants.DRACelExpressionAnnotation], Count: uint(count), GPUModel: pod.Annotations[constants.GPUModelAnnotation], @@ -1364,6 +1510,7 @@ func (s *GpuAllocator) ComposeAllocationRequest(pod *v1.Pod) (*tfv1.AllocRequest Namespace: pod.Namespace, }, PodMeta: pod.ObjectMeta, + QoS: qosLevel, } // for already allocated workers, set the GPU device IDs for further scaling and retrieval diff --git a/internal/gpuallocator/gpuallocator_test.go b/internal/gpuallocator/gpuallocator_test.go index 08d78130..bb3a494d 100644 --- a/internal/gpuallocator/gpuallocator_test.go +++ b/internal/gpuallocator/gpuallocator_test.go @@ -97,7 +97,7 @@ var _ = Describe("GPU Allocator", func() { if err := k8sClient.Get(ctx, types.NamespacedName{Name: "test-pool"}, pool); err != nil { Expect(err).NotTo(HaveOccurred()) } - _, _ = RefreshGPUNodeCapacity(ctx, k8sClient, gpuNode, pool) + _, _ = RefreshGPUNodeCapacity(ctx, k8sClient, gpuNode, pool, allocator) // Verify resources were reduced on the allocated GPU gpu := getGPU(gpus[0].Name) @@ -107,8 +107,14 @@ var _ = Describe("GPU Allocator", func() { node := getGPUNode(gpu) diffTflops := node.Status.TotalTFlops.Value() - node.Status.AvailableTFlops.Value() diffVRAM := node.Status.TotalVRAM.Value() - node.Status.AvailableVRAM.Value() + + diffVirtualTflops := node.Status.VirtualTFlops.Value() - node.Status.VirtualAvailableTFlops.Value() + diffVirtualVRAM := node.Status.VirtualVRAM.Value() - node.Status.VirtualAvailableVRAM.Value() Expect(diffTflops).To(BeEquivalentTo(50)) Expect(diffVRAM).To(BeEquivalentTo(8 * 1024 * 1024 * 1024)) + + Expect(diffVirtualTflops).To(BeEquivalentTo(50)) + Expect(diffVirtualVRAM).To(BeEquivalentTo(8 * 1024 * 1024 * 1024)) }) It("should allocate multiple GPUs from the same node", func() { diff --git a/internal/gpuallocator/node_capacity.go b/internal/gpuallocator/node_capacity.go index dc7488f6..43cce870 100644 --- a/internal/gpuallocator/node_capacity.go +++ b/internal/gpuallocator/node_capacity.go @@ -11,7 +11,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node *tfv1.GPUNode, pool *tfv1.GPUPool) ([]string, error) { +func RefreshGPUNodeCapacity( + ctx context.Context, k8sClient client.Client, + node *tfv1.GPUNode, pool *tfv1.GPUPool, + allocator *GpuAllocator, +) ([]string, error) { gpuList := &tfv1.GPUList{} if err := k8sClient.List(ctx, gpuList, client.MatchingLabels{constants.LabelKeyOwner: node.Name}); err != nil { return nil, fmt.Errorf("failed to list GPUs: %w", err) @@ -54,6 +58,17 @@ func RefreshGPUNodeCapacity(ctx context.Context, k8sClient client.Client, node * node.Status.VirtualTFlops = virtualTFlops node.Status.VirtualVRAM = virtualVRAM + vramAvailable := virtualVRAM.DeepCopy() + tflopsAvailable := virtualTFlops.DeepCopy() + + allocRequests := allocator.GetAllocationReqByNodeName(node.Name) + for _, allocRequest := range allocRequests { + vramAvailable.Sub(allocRequest.Limit.Vram) + tflopsAvailable.Sub(allocRequest.Limit.Tflops) + } + node.Status.VirtualAvailableVRAM = &vramAvailable + node.Status.VirtualAvailableTFlops = &tflopsAvailable + node.Status.Phase = tfv1.TensorFusionGPUNodePhaseRunning if !equality.Semantic.DeepEqual(node.Status, statusCopy) { diff --git a/internal/metrics/encoders/influx.go b/internal/metrics/encoders/influx.go index a459c7ee..4d089759 100644 --- a/internal/metrics/encoders/influx.go +++ b/internal/metrics/encoders/influx.go @@ -4,6 +4,7 @@ import ( "time" metricsProto "github.com/influxdata/line-protocol/v2/lineprotocol" + "k8s.io/klog/v2" ) // InfluxStrategy implements InfluxDB line protocol encoding @@ -28,7 +29,12 @@ func (s *InfluxStrategy) AddTag(key, value string) { } func (s *InfluxStrategy) AddField(key string, value any) { - s.enc.AddField(key, metricsProto.MustNewValue(value)) + v, parsed := metricsProto.NewValue(value) + if !parsed { + klog.Error("metrics influx encoder failed to parse value: ", key, value) + return + } + s.enc.AddField(key, v) } func (s *InfluxStrategy) EndLine(timestamp time.Time) { diff --git a/internal/metrics/recorder.go b/internal/metrics/recorder.go index 9050df00..7f47bab6 100644 --- a/internal/metrics/recorder.go +++ b/internal/metrics/recorder.go @@ -4,6 +4,7 @@ import ( "io" "math" "strconv" + "strings" "sync" "time" @@ -19,15 +20,17 @@ import ( // Worker level metrics, include worker resources/costs status // map updated in one reconcile loop in single goroutine, thus no RW lock needed var workerMetricsLock sync.RWMutex -var workerMetricsMap = map[string]*WorkerResourceMetrics{} +var workerMetricsMap = make(map[string]*WorkerResourceMetrics, 200) // Node level metrics, include node allocation/costs status var nodeMetricsLock sync.RWMutex -var nodeMetricsMap = map[string]*NodeResourceMetrics{} +var nodeMetricsMap = make(map[string]*NodeResourceMetrics, 100) // Pool level metrics, include pool allocation/costs status var poolMetricsLock sync.RWMutex -var poolMetricsMap = map[string]*PoolResourceMetrics{} +var poolMetricsMap = make(map[string]*PoolResourceMetrics, 4) + +var settingLock sync.RWMutex var log = ctrl.Log.WithName("metrics-recorder") @@ -37,6 +40,9 @@ type MetricsRecorder struct { // Raw billing result for node and workers HourlyUnitPriceMap map[string]float64 + // Pool level eviction protection price ratio map, key is pool name + PoolEvictionProtectionPriceRatioMap map[string]string + // Worker level unit price map, key is pool name, second level key is QoS level WorkerUnitPriceMap map[string]map[string]RawBillingPricing } @@ -80,14 +86,16 @@ func SetWorkerMetricsByWorkload(pod *corev1.Pod) { // Initialize metrics if _, ok := workerMetricsMap[pod.Name]; !ok { workerMetricsMap[pod.Name] = &WorkerResourceMetrics{ - WorkerName: pod.Name, - WorkloadName: pod.Labels[constants.WorkloadKey], - PoolName: pod.Annotations[constants.GpuPoolKey], - Namespace: pod.Namespace, - QoS: pod.Annotations[constants.QoSLevelAnnotation], - podLabels: pod.Labels, - RawCost: 0, - LastRecordTime: time.Now(), + WorkerName: pod.Name, + WorkloadName: pod.Labels[constants.WorkloadKey], + PoolName: pod.Annotations[constants.GpuPoolKey], + Namespace: pod.Namespace, + QoS: pod.Annotations[constants.QoSLevelAnnotation], + podLabels: pod.Labels, + RawCost: 0, + LastRecordTime: time.Now(), + creationTime: pod.CreationTimestamp.Time, + evictionProtection: pod.Annotations[constants.EvictionProtectionAnnotation], } } @@ -187,19 +195,37 @@ func SetPoolMetrics(poolObj *tfv1.GPUPool) { } if poolObj.Status.VirtualAvailableTFlops != nil && poolObj.Status.VirtualAvailableVRAM != nil { - poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedVramBytes / - poolObj.Status.VirtualVRAM.AsApproximateFloat64() * 100 + virtualVRAM := poolObj.Status.VirtualVRAM.AsApproximateFloat64() + virtualTFlops := poolObj.Status.VirtualTFlops.AsApproximateFloat64() + + if virtualVRAM > 0 { + poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedVramBytes / virtualVRAM * 100 + } else { + poolMetricsMap[poolObj.Name].AllocatedVramPercentToVirtualCap = 0 + } - poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedTflops / - poolObj.Status.VirtualTFlops.AsApproximateFloat64() * 100 - poolMetricsMap[poolObj.Name].AssignedLimitedTFlops = poolObj.Status.VirtualTFlops.AsApproximateFloat64() - + if virtualTFlops > 0 { + poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AllocatedTflops / virtualTFlops * 100 + } else { + poolMetricsMap[poolObj.Name].AllocatedTflopsPercentToVirtualCap = 0 + } + + poolMetricsMap[poolObj.Name].AssignedLimitedTFlops = virtualTFlops - poolObj.Status.VirtualAvailableTFlops.AsApproximateFloat64() - poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes = poolObj.Status.VirtualVRAM.AsApproximateFloat64() - + poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes = virtualVRAM - poolObj.Status.VirtualAvailableVRAM.AsApproximateFloat64() - poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedTFlops / - poolObj.Status.VirtualTFlops.AsApproximateFloat64() * 100 - poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes / - poolObj.Status.VirtualVRAM.AsApproximateFloat64() * 100 + + if virtualTFlops > 0 { + poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedTFlops / virtualTFlops * 100 + } else { + poolMetricsMap[poolObj.Name].AssignedLimitedTFlopsPercentToVirtualCap = 0 + } + + if virtualVRAM > 0 { + poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = poolMetricsMap[poolObj.Name].AssignedLimitedVramBytes / virtualVRAM * 100 + } else { + poolMetricsMap[poolObj.Name].AssignedLimitedVramPercentToVirtualCap = 0 + } } poolMetricsMap[poolObj.Name].GPUCount = int(poolObj.Status.TotalGPUs) } @@ -269,13 +295,17 @@ func (mr *MetricsRecorder) Start() { // Clean up worker metrics that have been deleted go func() { for { - time.Sleep(5 * time.Minute) + time.Sleep(1 * time.Minute) workerMetricsLock.Lock() - for _, metrics := range workerMetricsMap { + var keysToDelete []string + for key, metrics := range workerMetricsMap { if metrics.deletionTimestamp != nil && !metrics.deletionTimestamp.IsZero() { - delete(workerMetricsMap, metrics.WorkerName) + keysToDelete = append(keysToDelete, key) } } + for _, key := range keysToDelete { + delete(workerMetricsMap, key) + } workerMetricsLock.Unlock() } }() @@ -288,13 +318,12 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) { now := time.Now() enc := NewEncoder(config.GetGlobalConfig().MetricsFormat) - workerMetricsLock.RLock() + workerMetricsLock.Lock() activeWorkerCnt := 0 activeWorkerAndNodeByPool := map[string]*ActiveNodeAndWorker{} for _, metrics := range workerMetricsMap { - if metrics.deletionTimestamp != nil && !metrics.deletionTimestamp.IsZero() { metrics.RawCost = mr.getWorkerRawCost(metrics, metrics.deletionTimestamp.Sub(metrics.LastRecordTime)) } else { @@ -315,7 +344,9 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) { nodeCnt: 0, } } - activeWorkerAndNodeByPool[metrics.PoolName].workerCnt++ + if metrics.deletionTimestamp == nil || metrics.deletionTimestamp.IsZero() { + activeWorkerAndNodeByPool[metrics.PoolName].workerCnt++ + } enc.StartLine("tf_worker_resources") enc.AddTag("namespace", metrics.Namespace) @@ -344,7 +375,7 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) { enc.EndLine(now) } - workerMetricsLock.RUnlock() + workerMetricsLock.Unlock() nodeMetricsLock.RLock() @@ -395,16 +426,16 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) { enc.StartLine("tf_pool_metrics") enc.AddTag("pool", metrics.PoolName) enc.AddTag("phase", metrics.Phase) - enc.AddField("allocatedTflops", metrics.AllocatedTflops) - enc.AddField("allocatedTflopsPercent", metrics.AllocatedTflopsPercent) - enc.AddField("allocatedTflopsPercentVirtual", metrics.AllocatedTflopsPercentToVirtualCap) - enc.AddField("allocatedVramBytes", metrics.AllocatedVramBytes) - enc.AddField("allocatedVramPercent", metrics.AllocatedVramPercent) - enc.AddField("allocatedVramPercentVirtual", metrics.AllocatedVramPercentToVirtualCap) - enc.AddField("assignedLimitedTFlops", metrics.AssignedLimitedTFlops) - enc.AddField("assignedLimitedVramBytes", metrics.AssignedLimitedVramBytes) - enc.AddField("assignedLimitedTFlopsPercentVirtual", metrics.AssignedLimitedTFlopsPercentToVirtualCap) - enc.AddField("assignedLimitedVramPercentVirtual", metrics.AssignedLimitedVramPercentToVirtualCap) + enc.AddField("allocated_tflops", metrics.AllocatedTflops) + enc.AddField("allocated_tflops_percent", metrics.AllocatedTflopsPercent) + enc.AddField("allocated_tflops_percent_virtual", metrics.AllocatedTflopsPercentToVirtualCap) + enc.AddField("allocated_vram_bytes", metrics.AllocatedVramBytes) + enc.AddField("allocated_vram_percent", metrics.AllocatedVramPercent) + enc.AddField("allocated_vram_percent_virtual", metrics.AllocatedVramPercentToVirtualCap) + enc.AddField("limited_tflops", metrics.AssignedLimitedTFlops) + enc.AddField("limited_vram_bytes", metrics.AssignedLimitedVramBytes) + enc.AddField("limited_tflops_percent_virtual", metrics.AssignedLimitedTFlopsPercentToVirtualCap) + enc.AddField("limited_vram_percent_virtual", metrics.AssignedLimitedVramPercentToVirtualCap) enc.AddField("gpu_count", int64(metrics.GPUCount)) enc.EndLine(now) } @@ -421,7 +452,51 @@ func (mr *MetricsRecorder) RecordMetrics(writer io.Writer) { log.Info("metrics and raw billing recorded:", "workerCount", activeWorkerCnt, "nodeCount", len(nodeMetricsMap)) } +// Update metrics recorder's raw billing map +func (r *MetricsRecorder) UpdateMetricsRecorder(pool *tfv1.GPUPool, specChanged bool) { + const dollarSign = "$" + settingLock.Lock() + defer settingLock.Unlock() + if pool.Spec.QosConfig == nil { + log.Info("QosConfig is nil, skip updating metrics recorder", "pool", pool.Name) + return + } + + qosConfig := pool.Spec.QosConfig + if _, ok := r.WorkerUnitPriceMap[pool.Name]; !ok { + r.WorkerUnitPriceMap[pool.Name] = make(map[string]RawBillingPricing) + } + + if r.PoolEvictionProtectionPriceRatioMap == nil { + r.PoolEvictionProtectionPriceRatioMap = make(map[string]string, 4) + } + r.PoolEvictionProtectionPriceRatioMap[pool.Name] = qosConfig.EvictionProtectionPriceRatio + + pricingDetail := r.WorkerUnitPriceMap[pool.Name] + if !specChanged && len(pricingDetail) == 0 { + return + } + // Pricing potentially changed + for _, pricing := range qosConfig.Pricing { + tflopsPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerFP16TFlopsPerHour, dollarSign), 64) + vramPerHour, _ := strconv.ParseFloat(strings.TrimPrefix(pricing.Requests.PerGBOfVRAMPerHour, dollarSign), 64) + limitOverRequestChargingRatio, _ := strconv.ParseFloat(pricing.LimitsOverRequestsChargingRatio, 64) + + pricingDetail[string(pricing.Qos)] = RawBillingPricing{ + TflopsPerSecond: tflopsPerHour / float64(3600), + VramPerSecond: vramPerHour / float64(3600), + + TflopsOverRequestPerSecond: tflopsPerHour / float64(3600) * limitOverRequestChargingRatio, + VramOverRequestPerSecond: vramPerHour / float64(3600) * limitOverRequestChargingRatio, + } + } + + log.V(5).Info("Updated metrics recorder", "pool", pool.Name, "pricing", pricingDetail) +} + func (mr *MetricsRecorder) getWorkerRawCost(metrics *WorkerResourceMetrics, duration time.Duration) float64 { + settingLock.RLock() + defer settingLock.RUnlock() qosPricing, ok := mr.WorkerUnitPriceMap[metrics.PoolName] // The qos pricing for this pool not set if !ok { @@ -446,7 +521,37 @@ func (mr *MetricsRecorder) getWorkerRawCost(metrics *WorkerResourceMetrics, dura rawCostVRAMLimitOverRequest := (metrics.VramBytesLimit - metrics.VramBytesRequest) * pricing.VramOverRequestPerSecond / constants.GiBToBytes rawCostPerVRAM := pricing.VramPerSecond * metrics.VramBytesRequest / constants.GiBToBytes - return (rawCostPerTflops + rawCostPerVRAM + rawCostTflopsLimitOverRequest + rawCostVRAMLimitOverRequest) * duration.Seconds() * float64(metrics.GPUCount) + baseCost := (rawCostPerTflops + rawCostPerVRAM + rawCostTflopsLimitOverRequest + rawCostVRAMLimitOverRequest) * duration.Seconds() * float64(metrics.GPUCount) + + // Apply eviction protection price ratio if the pod is under protection and QoS is not critical + if metrics.evictionProtection != "" && qosLevel != constants.QoSLevelCritical { + if isUnderProtection := mr.isUnderEvictionProtection(metrics); isUnderProtection { + protectionPriceRatio := mr.PoolEvictionProtectionPriceRatioMap[metrics.PoolName] + protectionPriceRatioFloat, _ := strconv.ParseFloat(protectionPriceRatio, 64) + if protectionPriceRatioFloat < 1 { + protectionPriceRatioFloat = constants.DefaultEvictionProtectionPriceRatio + } + baseCost *= protectionPriceRatioFloat + } + } + + return baseCost +} + +// isUnderEvictionProtection checks if a worker is under eviction protection +func (mr *MetricsRecorder) isUnderEvictionProtection(metrics *WorkerResourceMetrics) bool { + if metrics.evictionProtection == "" { + return false + } + + // Parse protection duration (1h, 5h, 24h, etc.) + duration, err := time.ParseDuration(metrics.evictionProtection) + if err != nil { + return false + } + + protectionEndTime := metrics.creationTime.Add(duration) + return time.Now().Before(protectionEndTime) } // unit price data comes from global config map, and multi-GPU instance should normalized with per GPU pricing, e.g. 8xA100 p4d.24xlarge price should divide by 8 diff --git a/internal/metrics/types.go b/internal/metrics/types.go index ff3449cb..df06f169 100644 --- a/internal/metrics/types.go +++ b/internal/metrics/types.go @@ -51,6 +51,10 @@ type WorkerResourceMetrics struct { // For more accurate metrics, should record the deletion timestamp to calculate duration for the last metrics deletionTimestamp *time.Time + // Fields for eviction protection tracking - private, not stored in TSDB + creationTime time.Time + evictionProtection string + podLabels map[string]string } diff --git a/internal/quota/quota_store.go b/internal/quota/quota_store.go index 4edc7445..d9450236 100644 --- a/internal/quota/quota_store.go +++ b/internal/quota/quota_store.go @@ -79,7 +79,16 @@ func (qs *QuotaStore) CheckQuotaAvailable(namespace string, req *tfv1.AllocReque if err := qs.checkSingleQuotas(entry, req); err != nil { return err } - return qs.checkTotalQuotas(entry, req) + return qs.checkTotalQuotas(entry, req, nil) +} + +func (qs *QuotaStore) CheckTotalQuotaRelaxed(req *tfv1.AllocRequest, toReleaseResource *tfv1.GPUResourceUsage) error { + entry, exists := qs.QuotaStore[req.WorkloadNameNamespace.Namespace] + if !exists { + // No quota defined for this namespace, allow allocation + return nil + } + return qs.checkTotalQuotas(entry, req, toReleaseResource) } func (qs *QuotaStore) AdjustQuota(namespace string, reqDelta tfv1.Resource, limitDelta tfv1.Resource) { @@ -103,41 +112,51 @@ func (qs *QuotaStore) checkSingleQuotas(entry *QuotaStoreEntry, req *tfv1.AllocR if single.MaxLimits != nil { if !single.MaxLimits.Tflops.IsZero() && req.Limit.Tflops.Cmp(single.MaxLimits.Tflops) > 0 { return &QuotaExceededError{ - Namespace: entry.Quota.Namespace, - Resource: MaxTFlopsLimitResource, - Requested: req.Limit.Tflops, - Limit: single.MaxLimits.Tflops, + Namespace: entry.Quota.Namespace, + Resource: MaxTFlopsLimitResource, + Requested: req.Limit.Tflops, + Limit: single.MaxLimits.Tflops, + Unresolvable: true, } } // Check single VRAM limit (per GPU) if !single.MaxLimits.Vram.IsZero() && req.Request.Vram.Cmp(single.MaxLimits.Vram) > 0 { return &QuotaExceededError{ - Namespace: entry.Quota.Namespace, - Resource: MaxVRAMLimitResource, - Requested: req.Request.Vram, - Limit: single.MaxLimits.Vram, + Namespace: entry.Quota.Namespace, + Resource: MaxVRAMLimitResource, + Requested: req.Request.Vram, + Limit: single.MaxLimits.Vram, + Unresolvable: true, } } // Check single GPU count limit (per worker) if single.MaxGPUCount != nil && int32(req.Count) > *single.MaxGPUCount { return &QuotaExceededError{ - Namespace: entry.Quota.Namespace, - Resource: MaxGPULimitResource, - Requested: *resource.NewQuantity(int64(req.Count), resource.DecimalSI), - Limit: *resource.NewQuantity(int64(*single.MaxGPUCount), resource.DecimalSI), + Namespace: entry.Quota.Namespace, + Resource: MaxGPULimitResource, + Requested: *resource.NewQuantity(int64(req.Count), resource.DecimalSI), + Limit: *resource.NewQuantity(int64(*single.MaxGPUCount), resource.DecimalSI), + Unresolvable: true, } } } return nil } -func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRequest) error { +func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRequest, toReleaseResource *tfv1.GPUResourceUsage) error { quotaNs := entry.Quota.Namespace + + // Check total requests if entry.Quota.Spec.Total.Requests != nil { total := entry.Quota.Spec.Total.Requests - current := entry.CurrentUsage.Requests + current := *entry.CurrentUsage.Requests.DeepCopy() + + if toReleaseResource != nil { + current.Tflops.Sub(toReleaseResource.Requests.Tflops) + current.Vram.Sub(toReleaseResource.Requests.Vram) + } err := checkTotalExceeded(req, total, current, quotaNs, true) if err != nil { return err @@ -147,13 +166,24 @@ func (qs *QuotaStore) checkTotalQuotas(entry *QuotaStoreEntry, req *tfv1.AllocRe // Check total limits if entry.Quota.Spec.Total.Limits != nil { total := entry.Quota.Spec.Total.Limits - usage := entry.CurrentUsage.Limits + usage := *entry.CurrentUsage.Limits.DeepCopy() + + if toReleaseResource != nil { + usage.Tflops.Sub(toReleaseResource.Limits.Tflops) + usage.Vram.Sub(toReleaseResource.Limits.Vram) + } err := checkTotalExceeded(req, total, usage, quotaNs, false) if err != nil { return err } } + // If it's preempt case, skip checking total workers since it's + // replacing existing workers rather than creating new ones + if toReleaseResource != nil { + return nil + } + // Check total workers, each allocation will create one worker instance if entry.Quota.Spec.Total.MaxWorkers != nil { if entry.CurrentUsage.Workers >= *entry.Quota.Spec.Total.MaxWorkers { @@ -451,10 +481,11 @@ func (qs *QuotaStore) SyncQuotasToK8s(ctx context.Context) { // QuotaExceededError represents a quota exceeded error with detailed information type QuotaExceededError struct { - Namespace string - Resource string - Requested resource.Quantity - Limit resource.Quantity + Namespace string + Resource string + Requested resource.Quantity + Limit resource.Quantity + Unresolvable bool } func (e *QuotaExceededError) Error() string { diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go index 16dd1c61..bc893087 100644 --- a/internal/scheduler/gpuresources/gpuresources.go +++ b/internal/scheduler/gpuresources/gpuresources.go @@ -6,12 +6,14 @@ import ( "sort" "strconv" "strings" + "sync" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" "github.com/NexusGPU/tensor-fusion/internal/config" "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/NexusGPU/tensor-fusion/internal/gpuallocator" "github.com/NexusGPU/tensor-fusion/internal/metrics" + "github.com/NexusGPU/tensor-fusion/internal/quota" "github.com/NexusGPU/tensor-fusion/internal/utils" "github.com/samber/lo" v1 "k8s.io/api/core/v1" @@ -19,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" + fwk "k8s.io/kube-scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -26,6 +29,7 @@ import ( const Name = "GPUResourcesFit" const CycleStateAllocateRequest = "allocateRequest" const CycleStateGPUSchedulingResult = "gpuSchedulingResult" + const SchedulerSimulationKey = "schedulerSimulation" var _ framework.PreFilterPlugin = &GPUFit{} @@ -56,9 +60,15 @@ type GPUSchedulingStateData struct { // In Reserve stage, bind GPUs to pod, update allocator cache // In PostBind stage, fetch final GPUs call Pod patch API to update annotation FinalGPUs []string + + // Preempt pods + PreemptPods sync.Map + + // IsPreemption + IsPreemption bool } -func (p *GPUSchedulingStateData) Clone() framework.StateData { +func (p *GPUSchedulingStateData) Clone() fwk.StateData { return p } @@ -93,7 +103,7 @@ func (s *GPUFit) Name() string { return Name } -func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) { +func (s *GPUFit) PreFilter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, _ []fwk.NodeInfo) (*framework.PreFilterResult, *fwk.Status) { // Handle progressive migration case if utils.IsProgressiveMigration() && utils.HasGPUResourceRequest(pod) { nodeNames := s.allocator.ListNonUsingNodes() @@ -102,19 +112,24 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod "use native GPU resources, available native GPU nodes: "+strconv.Itoa(len(nodeNames))) return &framework.PreFilterResult{ NodeNames: nodeNames, - }, framework.NewStatus(framework.Success, "progressive migration for native resources claim") + }, fwk.NewStatus(fwk.Success, "progressive migration for native resources claim") + } + + // Check if DRA mode is enabled for this pod + if isDRAEnabled(pod) && hasDRAClaim(pod) { + return nil, fwk.NewStatus(fwk.Skip, "DRA mode enabled, skipping custom GPU prefilter") } // Skip non tensor-fusion mode if !utils.IsTensorFusionWorker(pod) { - return nil, framework.NewStatus(framework.Skip, "skip for non tensor-fusion mode") + return nil, fwk.NewStatus(fwk.Skip, "skip for non tensor-fusion mode") } // Handle tensor-fusion mode scheduling s.logger.Info("checking GPU node resources for pod", "pod", pod.Name) allocRequest, reason, err := s.allocator.ComposeAllocationRequest(pod) if err != nil { - return nil, framework.NewStatus(framework.Error, reason) + return nil, fwk.NewStatus(fwk.Error, reason) } state.Write(CycleStateAllocateRequest, allocRequest) @@ -134,7 +149,16 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeWarning, "GPUQuotaOrCapacityNotEnough", "check quota and filter", "TensorFusion schedule failed, no enough resource or quotas: "+err.Error()) s.logger.Error(err, "failed to check quota and filter", "pod", pod.Name) - return nil, framework.NewStatus(framework.Unschedulable, err.Error()) + + if quotaErr, ok := err.(*quota.QuotaExceededError); ok { + if quotaErr.Unresolvable { + return nil, fwk.NewStatus(fwk.UnschedulableAndUnresolvable, quotaErr.Error()) + } else { + return nil, fwk.NewStatus(fwk.Unschedulable, err.Error()) + } + } else { + return nil, fwk.NewStatus(fwk.Unschedulable, err.Error()) + } } validNodesValidGPUs := lo.GroupBy(filteredGPUs, func(gpu *tfv1.GPU) string { @@ -142,10 +166,14 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod }) validNodeNonMatchingGPUs := make(map[string][]*tfv1.GPU, len(validNodesValidGPUs)) - nodeNames := sets.New[string]() + cnt := 0 + allGPUNodeNames := sets.New[string]() nodeGPUs := s.allocator.GetNodeGpuStore() + for k := range nodeGPUs { + allGPUNodeNames.Insert(k) + } for k, matchedGPUs := range validNodesValidGPUs { - nodeNames.Insert(k) + cnt++ // get all GPUs on this node allGPUs := nodeGPUs[k] @@ -157,11 +185,17 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod continue } + preAllocSize := total - matched + if preAllocSize <= 0 { + s.logger.Error(nil, "Filtering GPU error, unexpected less than 0", "pod", + pod.Name, "node", k, "totalGPU count", total, "matchedGPU count", matched) + preAllocSize = 2 + } // range if it's not in validNodesValidGPUs, add to validNodeNonMatchingGPUs - validNodeNonMatchingGPUs[k] = make([]*tfv1.GPU, 0, total-matched) + validNodeNonMatchingGPUs[k] = make([]*tfv1.GPU, 0, preAllocSize) for gpuName, gpu := range allGPUs { seen := false - // just loop because the number always <= 8 + // just loop because the number always <= 8/16 for _, matchedGPU := range matchedGPUs { if gpuName == matchedGPU.Name { seen = true @@ -173,7 +207,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod } } } - s.logger.Info("filtered valid node GPUs", "nodes count", nodeNames.Len(), "pod", pod.Name) + s.logger.Info("filtered valid node GPUs", "nodes count", cnt, "pod", pod.Name) // assign score based on different strategies score := s.allocator.Score(ctx, s.cfg, allocRequest, validNodesValidGPUs) @@ -182,7 +216,7 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod notMatchingGPUScore := s.allocator.Score(ctx, s.cfg, allocRequest, validNodeNonMatchingGPUs) s.fh.EventRecorder().Eventf(pod, pod, v1.EventTypeNormal, "PreScheduleDone", "pre filter for TensorFusion workload", - "TensorFusion pre schedule done, valid GPU node count: "+strconv.Itoa(nodeNames.Len())) + "TensorFusion pre schedule done, valid GPU node count: "+strconv.Itoa(cnt)) if s.logger.V(6).Enabled() { jsonStr, _ := json.Marshal(validNodesValidGPUs) @@ -195,55 +229,133 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod ValidNodeGPUScore: score, ValidNodeNotMatchingGPUScore: notMatchingGPUScore, FinalGPUs: []string{}, + PreemptPods: sync.Map{}, + IsPreemption: false, }) return &framework.PreFilterResult{ - NodeNames: nodeNames, - }, framework.NewStatus(framework.Success) + NodeNames: allGPUNodeNames, + }, fwk.NewStatus(fwk.Success) } func (s *GPUFit) PreFilterExtensions() framework.PreFilterExtensions { - return nil + return s +} + +func (s *GPUFit) AddPod(ctx context.Context, state fwk.CycleState, pod *v1.Pod, podInfoToAdd fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status { + stateData, err := state.Read(CycleStateGPUSchedulingResult) + if err != nil { + return fwk.NewStatus(fwk.Error, err.Error()) + } + stateDataParsed := stateData.(*GPUSchedulingStateData) + if pods, ok := stateDataParsed.PreemptPods.Load(nodeInfo.Node().Name); ok { + podsParsed := pods.(sets.Set[types.NamespacedName]) + + nameNs := types.NamespacedName{ + Namespace: podInfoToAdd.GetPod().Namespace, + Name: podInfoToAdd.GetPod().Name, + } + if podsParsed.Has(nameNs) { + podsParsed.Delete(nameNs) + } + } + return fwk.NewStatus(fwk.Success, "") +} + +func (s *GPUFit) RemovePod(ctx context.Context, state fwk.CycleState, pod *v1.Pod, podInfoToRemove fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status { + stateData, err := state.Read(CycleStateGPUSchedulingResult) + if err != nil { + if fwk.ErrNotFound == err { + stateData = &GPUSchedulingStateData{ + PreemptPods: sync.Map{}, + } + state.Write(CycleStateGPUSchedulingResult, stateData) + } else { + return fwk.NewStatus(fwk.Error, err.Error()) + } + } + stateDataParsed := stateData.(*GPUSchedulingStateData) + stateDataParsed.IsPreemption = true + if pods, ok := stateDataParsed.PreemptPods.Load(nodeInfo.Node().Name); ok { + parsedPods := pods.(sets.Set[types.NamespacedName]) + parsedPods.Insert(types.NamespacedName{ + Namespace: podInfoToRemove.GetPod().Namespace, + Name: podInfoToRemove.GetPod().Name, + }) + } else { + stateDataParsed.PreemptPods.Store(nodeInfo.Node().Name, sets.New(types.NamespacedName{ + Namespace: podInfoToRemove.GetPod().Namespace, + Name: podInfoToRemove.GetPod().Name, + })) + } + return fwk.NewStatus(fwk.Success, "") } -func (s *GPUFit) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { +func (s *GPUFit) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status { + // Check if DRA mode is enabled for this pod + if isDRAEnabled(pod) && hasDRAClaim(pod) { + return fwk.NewStatus(fwk.Skip, "DRA mode enabled, skipping custom GPU filter") + } + if !utils.IsTensorFusionWorker(pod) { - return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode") + return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode") } filterResult, err := state.Read(CycleStateGPUSchedulingResult) if err != nil { - return framework.NewStatus(framework.Error, err.Error()) + return fwk.NewStatus(fwk.Error, err.Error()) + } + + // k8s will RemoveAll Pods, and run Filter for high priority pod, + // then Scheduler framework will reprieve victims one by one until filter returns unschedulable + if filterResult.(*GPUSchedulingStateData).IsPreemption { + allocRequest, err := state.Read(CycleStateAllocateRequest) + allocRequestParsed := allocRequest.(*tfv1.AllocRequest) + if err != nil { + return fwk.NewStatus(fwk.Error, err.Error()) + } + podsToPreempt, ok := filterResult.(*GPUSchedulingStateData).PreemptPods.Load(nodeInfo.Node().Name) + if !ok { + return fwk.NewStatus(fwk.Unschedulable, "no pods to preempt") + } + podsToPreemptParsed := podsToPreempt.(sets.Set[types.NamespacedName]) + err = s.allocator.CheckQuotaAndFilterSingleNodePreempt( + nodeInfo.Node().Name, allocRequestParsed, podsToPreemptParsed) + if err != nil { + return fwk.NewStatus(fwk.Unschedulable, err.Error()) + } + return fwk.NewStatus(fwk.Success, "") } - nodeName := nodeInfo.GetName() + + nodeName := nodeInfo.Node().Name if _, ok := filterResult.(*GPUSchedulingStateData).NodeGPUs[nodeName]; !ok { - return framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough") + return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough") } - return framework.NewStatus(framework.Success, "") + return fwk.NewStatus(fwk.Success, "") } func (s *GPUFit) Score( ctx context.Context, - state *framework.CycleState, + state fwk.CycleState, pod *v1.Pod, - nodeInfo *framework.NodeInfo, -) (int64, *framework.Status) { + nodeInfo fwk.NodeInfo, +) (int64, *fwk.Status) { // Skip non tensor-fusion mode scheduling if !utils.IsTensorFusionWorker(pod) { - return 0, framework.NewStatus(framework.Success, "") + return 0, fwk.NewStatus(fwk.Success, "") } if state == nil { - return 0, framework.NewStatus(framework.Error, "no valid node found, gpu capacity not enough") + return 0, fwk.NewStatus(fwk.Error, "no valid node found, gpu capacity not enough") } filterResult, err := state.Read(CycleStateGPUSchedulingResult) if err != nil { - return 0, framework.NewStatus(framework.Error, err.Error()) + return 0, fwk.NewStatus(fwk.Error, err.Error()) } scheduledState := filterResult.(*GPUSchedulingStateData) - gpuScoreMap, ok := scheduledState.ValidNodeGPUScore[nodeInfo.GetName()] + gpuScoreMap, ok := scheduledState.ValidNodeGPUScore[nodeInfo.Node().Name] if !ok { - return 0, framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough") + return 0, fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough") } // normalize to 0-100, when node has more GPUs but filtered out, // should consider it as 100 when strategy is compact_first, and consider as 0 when is low_load_first @@ -252,7 +364,7 @@ func (s *GPUFit) Score( sum += score } - notMatchingGPUScoreMap, ok := scheduledState.ValidNodeNotMatchingGPUScore[nodeInfo.GetName()] + notMatchingGPUScoreMap, ok := scheduledState.ValidNodeNotMatchingGPUScore[nodeInfo.Node().Name] if ok { for _, score := range notMatchingGPUScoreMap { sum += score @@ -265,27 +377,27 @@ func (s *GPUFit) ScoreExtensions() framework.ScoreExtensions { return nil } -func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status { +func (s *GPUFit) Reserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) *fwk.Status { if !utils.IsTensorFusionWorker(pod) { - return framework.NewStatus(framework.Success, "skip for non tensor-fusion mode") + return fwk.NewStatus(fwk.Success, "skip for non tensor-fusion mode") } s.logger.Info("Reserving pod for GPU resources", "pod", pod.Name, "node", nodeName) allocRequest, err := state.Read(CycleStateAllocateRequest) if err != nil { - return framework.NewStatus(framework.Error, err.Error()) + return fwk.NewStatus(fwk.Error, err.Error()) } schedulingResultRaw, err := state.Read(CycleStateGPUSchedulingResult) if err != nil { - return framework.NewStatus(framework.Error, err.Error()) + return fwk.NewStatus(fwk.Error, err.Error()) } // set final GPUs and try update GPU allocator cache schedulingResult := schedulingResultRaw.(*GPUSchedulingStateData) gpuScoreMap, ok := schedulingResult.ValidNodeGPUScore[nodeName] if !ok { - return framework.NewStatus(framework.Unschedulable, "no valid node found, gpu capacity not enough") + return fwk.NewStatus(fwk.Unschedulable, "no valid node found, gpu capacity not enough") } // find top N score GPUs in this node @@ -306,12 +418,12 @@ func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod * allocRequest.(*tfv1.AllocRequest), ) if err != nil { - return framework.NewStatus(framework.Error, err.Error()) + return fwk.NewStatus(fwk.Error, err.Error()) } - return framework.NewStatus(framework.Success, "") + return fwk.NewStatus(fwk.Success, "") } -func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) { +func (s *GPUFit) Unreserve(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) { if !utils.IsTensorFusionWorker(pod) { return } @@ -330,7 +442,7 @@ func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod }, schedulingResult.FinalGPUs, pod.ObjectMeta) } -func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) { +func (s *GPUFit) PostBind(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeName string) { if !utils.IsTensorFusionWorker(pod) { return } @@ -359,3 +471,17 @@ func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod "Attach GPU device ID info", "Attach TensorFusion GPU device IDs to Pod: "+gpuIDs) } } + +// isDRAEnabled checks if DRA is enabled for a pod +func isDRAEnabled(pod *v1.Pod) bool { + if pod.Annotations == nil { + return false + } + val, ok := pod.Annotations[constants.DRAEnabledAnnotation] + return ok && val == constants.TrueStringValue +} + +// hasDRAClaim checks if a pod has DRA ResourceClaim references +func hasDRAClaim(pod *v1.Pod) bool { + return len(pod.Spec.ResourceClaims) > 0 +} diff --git a/internal/scheduler/gpuresources/gpuresources_dra_test.go b/internal/scheduler/gpuresources/gpuresources_dra_test.go new file mode 100644 index 00000000..021be137 --- /dev/null +++ b/internal/scheduler/gpuresources/gpuresources_dra_test.go @@ -0,0 +1,237 @@ +package gpuresources + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/NexusGPU/tensor-fusion/internal/constants" +) + +func TestIsDRAEnabled(t *testing.T) { + tests := []struct { + name string + annotations map[string]string + expected bool + }{ + { + name: "DRA enabled annotation", + annotations: map[string]string{ + constants.DRAEnabledAnnotation: constants.TrueStringValue, + }, + expected: true, + }, + { + name: "DRA disabled annotation", + annotations: map[string]string{ + constants.DRAEnabledAnnotation: constants.FalseStringValue, + }, + expected: false, + }, + { + name: "no annotation", + expected: false, + }, + { + name: "other annotations", + annotations: map[string]string{ + "other.annotation": "value", + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: tt.annotations, + }, + } + + result := isDRAEnabled(pod) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestHasDRAClaimScheduler(t *testing.T) { + tests := []struct { + name string + pod *corev1.Pod + expected bool + }{ + { + name: "pod with resource claims", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + ResourceClaims: []corev1.PodResourceClaim{ + {Name: "gpu-claim"}, + }, + }, + }, + expected: true, + }, + { + name: "pod with multiple resource claims", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + ResourceClaims: []corev1.PodResourceClaim{ + {Name: "gpu-claim"}, + {Name: "other-claim"}, + }, + }, + }, + expected: true, + }, + { + name: "pod without resource claims", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{}, + }, + expected: false, + }, + { + name: "pod with empty resource claims", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + ResourceClaims: []corev1.PodResourceClaim{}, + }, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := hasDRAClaim(tt.pod) + assert.Equal(t, tt.expected, result) + }) + } +} + +// Integration test for DRA detection logic +func TestDRADetectionIntegration(t *testing.T) { + tests := []struct { + name string + draAnnotation string + hasResourceClaims bool + expectedDRA bool + expectedClaim bool + }{ + { + name: "DRA enabled with claims", + draAnnotation: constants.TrueStringValue, + hasResourceClaims: true, + expectedDRA: true, + expectedClaim: true, + }, + { + name: "DRA enabled without claims", + draAnnotation: constants.TrueStringValue, + hasResourceClaims: false, + expectedDRA: true, + expectedClaim: false, + }, + { + name: "DRA disabled with claims", + draAnnotation: constants.FalseStringValue, + hasResourceClaims: true, + expectedDRA: false, + expectedClaim: true, + }, + { + name: "no DRA annotation, no claims", + hasResourceClaims: false, + expectedDRA: false, + expectedClaim: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: make(map[string]string), + }, + Spec: corev1.PodSpec{}, + } + + if tt.draAnnotation != "" { + pod.Annotations[constants.DRAEnabledAnnotation] = tt.draAnnotation + } + + if tt.hasResourceClaims { + pod.Spec.ResourceClaims = []corev1.PodResourceClaim{ + {Name: "test-claim"}, + } + } + + draEnabled := isDRAEnabled(pod) + hasClaim := hasDRAClaim(pod) + + assert.Equal(t, tt.expectedDRA, draEnabled, "DRA enabled detection mismatch") + assert.Equal(t, tt.expectedClaim, hasClaim, "Resource claim detection mismatch") + }) + } +} + +// Test the combination logic that scheduler uses +func TestSchedulerDRALogic(t *testing.T) { + tests := []struct { + name string + draAnnotation string + hasResourceClaims bool + shouldSkipScheduler bool + }{ + { + name: "DRA enabled with claims - should skip", + draAnnotation: constants.TrueStringValue, + hasResourceClaims: true, + shouldSkipScheduler: true, + }, + { + name: "DRA enabled without claims - should not skip", + draAnnotation: constants.TrueStringValue, + hasResourceClaims: false, + shouldSkipScheduler: false, + }, + { + name: "DRA disabled with claims - should not skip", + draAnnotation: constants.FalseStringValue, + hasResourceClaims: true, + shouldSkipScheduler: false, + }, + { + name: "no DRA, no claims - should not skip", + shouldSkipScheduler: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: make(map[string]string), + }, + Spec: corev1.PodSpec{}, + } + + if tt.draAnnotation != "" { + pod.Annotations[constants.DRAEnabledAnnotation] = tt.draAnnotation + } + + if tt.hasResourceClaims { + pod.Spec.ResourceClaims = []corev1.PodResourceClaim{ + {Name: "test-claim"}, + } + } + + // This is the actual logic used in the scheduler + shouldSkip := isDRAEnabled(pod) && hasDRAClaim(pod) + assert.Equal(t, tt.shouldSkipScheduler, shouldSkip) + }) + } +} diff --git a/internal/scheduler/gpuresources/gpuresources_test.go b/internal/scheduler/gpuresources/gpuresources_test.go index fb7e45b5..5fa25150 100644 --- a/internal/scheduler/gpuresources/gpuresources_test.go +++ b/internal/scheduler/gpuresources/gpuresources_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/samber/lo" "github.com/stretchr/testify/suite" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -14,23 +15,28 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/informers" + clientsetfake "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/events" - "k8s.io/kubernetes/pkg/scheduler/framework" + fwk "k8s.io/kube-scheduler/framework" + framework "k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort" frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" tf "k8s.io/kubernetes/pkg/scheduler/testing/framework" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" - testutil "sigs.k8s.io/scheduler-plugins/test/util" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/NexusGPU/tensor-fusion/internal/gpuallocator" "github.com/NexusGPU/tensor-fusion/internal/utils" + internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache" + internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue" ) type GPUResourcesSuite struct { @@ -201,6 +207,7 @@ func (s *GPUResourcesSuite) SetupTest() { }, }, } + s.client = fake.NewClientBuilder().WithScheme(scheme.Scheme). WithRuntimeObjects(objList...). WithStatusSubresource( @@ -213,9 +220,11 @@ func (s *GPUResourcesSuite) SetupTest() { ). Build() + k8sObjs := make([]runtime.Object, 0, len(pods)+len(nodes)) for _, pod := range pods { err := s.client.Create(s.ctx, pod) s.NoError(err) + k8sObjs = append(k8sObjs, pod) } for _, gpu := range gpus { err := s.client.Create(s.ctx, gpu) @@ -224,6 +233,7 @@ func (s *GPUResourcesSuite) SetupTest() { for _, node := range nodes { err := s.client.Create(s.ctx, node) s.NoError(err) + k8sObjs = append(k8sObjs, node) } var registerPlugins []tf.RegisterPluginFunc @@ -233,11 +243,16 @@ func (s *GPUResourcesSuite) SetupTest() { tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), ) + fakeClientSet := clientsetfake.NewSimpleClientset(k8sObjs...) + informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0) + metrics.Register() + metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, s.ctx.Done()) fwk, err := tf.NewFramework( s.ctx, registeredPlugins, "", - frameworkruntime.WithPodNominator(testutil.NewPodNominator(nil)), - frameworkruntime.WithSnapshotSharedLister(testutil.NewFakeSharedLister(pods, nodes)), + frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)), + frameworkruntime.WithSnapshotSharedLister(internalcache.NewEmptySnapshot()), frameworkruntime.WithEventRecorder(&events.FakeRecorder{}), + frameworkruntime.WithMetricsRecorder(metricsRecorder), ) s.NoError(err) s.fwk = fwk @@ -271,7 +286,7 @@ func (s *GPUResourcesSuite) TestPreFilter() { tests := []struct { name string pod *v1.Pod - expectedStatus framework.Code + expectedStatus fwk.Code expectedNodes string }{ { @@ -282,7 +297,7 @@ func (s *GPUResourcesSuite) TestPreFilter() { constants.TFLOPSRequestAnnotation: "100", constants.VRAMRequestAnnotation: "10Gi", }), - expectedStatus: framework.Success, + expectedStatus: fwk.Success, expectedNodes: "node-a node-b", }, { @@ -293,7 +308,7 @@ func (s *GPUResourcesSuite) TestPreFilter() { constants.TFLOPSRequestAnnotation: "2000", constants.VRAMRequestAnnotation: "10Gi", }), - expectedStatus: framework.Success, + expectedStatus: fwk.Success, expectedNodes: "node-b", }, { @@ -304,7 +319,7 @@ func (s *GPUResourcesSuite) TestPreFilter() { constants.TFLOPSRequestAnnotation: "100", constants.VRAMRequestAnnotation: "10Gi", }), - expectedStatus: framework.Success, + expectedStatus: fwk.Success, expectedNodes: "node-b", }, { @@ -315,7 +330,7 @@ func (s *GPUResourcesSuite) TestPreFilter() { constants.TFLOPSRequestAnnotation: "2000", constants.VRAMRequestAnnotation: "80Gi", }), - expectedStatus: framework.Unschedulable, + expectedStatus: fwk.Unschedulable, expectedNodes: "", }, { @@ -326,7 +341,7 @@ func (s *GPUResourcesSuite) TestPreFilter() { constants.TFLOPSRequestAnnotation: "100", constants.VRAMRequestAnnotation: "10Gi", }), - expectedStatus: framework.Unschedulable, + expectedStatus: fwk.Unschedulable, expectedNodes: "", }, } @@ -334,11 +349,11 @@ func (s *GPUResourcesSuite) TestPreFilter() { for _, tt := range tests { s.Run(tt.name, func() { state := framework.NewCycleState() - res, status := s.plugin.PreFilter(s.ctx, state, tt.pod) + res, status := s.plugin.PreFilter(s.ctx, state, tt.pod, []fwk.NodeInfo{}) s.Equal(tt.expectedStatus, status.Code(), status.Message()) - if tt.expectedStatus == framework.Success { + if tt.expectedStatus == fwk.Success { s.Require().NotNil(res) - nodes := sort.StringSlice(res.NodeNames.UnsortedList()) + nodes := sort.StringSlice(getPreFilterResult(state)) nodes.Sort() s.Equal(tt.expectedNodes, strings.Join(nodes, " ")) } @@ -351,19 +366,19 @@ func (s *GPUResourcesSuite) TestPreFilterForNonTensorFusionPod() { tests := []struct { name string pod *v1.Pod - expectedStatus framework.Code + expectedStatus fwk.Code expectedNodes string }{ { name: "pod requires 1 GPU, enough capacity", pod: s.makeNonTensorFusionPod("p1", 1), - expectedStatus: framework.Success, + expectedStatus: fwk.Success, expectedNodes: "node-b node-c", }, { name: "pod requires 2 GPU, enough capacity", pod: s.makeNonTensorFusionPod("p1", 2), - expectedStatus: framework.Success, + expectedStatus: fwk.Success, expectedNodes: "node-b node-c", }, } @@ -371,9 +386,9 @@ func (s *GPUResourcesSuite) TestPreFilterForNonTensorFusionPod() { for _, tt := range tests { s.Run(tt.name, func() { state := framework.NewCycleState() - res, status := s.plugin.PreFilter(s.ctx, state, tt.pod) + res, status := s.plugin.PreFilter(s.ctx, state, tt.pod, []fwk.NodeInfo{}) s.Equal(tt.expectedStatus, status.Code(), status.Message()) - if tt.expectedStatus == framework.Success { + if tt.expectedStatus == fwk.Success { s.Require().NotNil(res) nodes := sort.StringSlice(res.NodeNames.UnsortedList()) nodes.Sort() @@ -394,23 +409,23 @@ func (s *GPUResourcesSuite) TestFilter() { constants.TFLOPSLimitAnnotation: "100", constants.VRAMLimitAnnotation: "40Gi", }) - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) tests := []struct { name string nodeName string - expectedStatus framework.Code + expectedStatus fwk.Code }{ { name: "node with available GPU", nodeName: "node-a", - expectedStatus: framework.Success, + expectedStatus: fwk.Success, }, { name: "node without available GPU", nodeName: "node-c", - expectedStatus: framework.Unschedulable, + expectedStatus: fwk.Unschedulable, }, } @@ -435,7 +450,7 @@ func (s *GPUResourcesSuite) TestScore() { constants.TFLOPSLimitAnnotation: "100", constants.VRAMLimitAnnotation: "40Gi", }) - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) // node a as one worker consumed 10% GPU resources @@ -466,7 +481,7 @@ func (s *GPUResourcesSuite) TestReserveAndUnreserve() { constants.TFLOPSLimitAnnotation: "100", constants.VRAMLimitAnnotation: "40Gi", }) - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) // Reserve on node-a @@ -507,7 +522,7 @@ func (s *GPUResourcesSuite) TestPostBind() { constants.TFLOPSLimitAnnotation: "100", constants.VRAMLimitAnnotation: "40Gi", }) - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) reserveStatus := s.plugin.Reserve(s.ctx, state, pod, "node-a") @@ -609,7 +624,7 @@ func (s *GPUResourcesSuite) TestScoreExtensions() { func (s *GPUResourcesSuite) TestPreFilterExtensions() { log.FromContext(s.ctx).Info("Running TestPreFilterExtensions") - s.Nil(s.plugin.PreFilterExtensions()) + s.NotNil(s.plugin.PreFilterExtensions()) } func (s *GPUResourcesSuite) TestName() { @@ -629,13 +644,13 @@ func (s *GPUResourcesSuite) TestReserve_ErrorHandling() { // No pre-filter call, so state is empty status := s.plugin.Reserve(s.ctx, state, pod, "node-a") s.Error(status.AsError()) - s.Equal(framework.Error, status.Code()) + s.Equal(fwk.Error, status.Code()) // Pre-filter, but for a different node - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) status = s.plugin.Reserve(s.ctx, state, pod, "node-c-non-existent") - s.Equal(framework.Unschedulable, status.Code()) + s.Equal(fwk.Unschedulable, status.Code()) } func (s *GPUResourcesSuite) TestUnreserve_ErrorHandling() { @@ -668,7 +683,7 @@ func (s *GPUResourcesSuite) TestPostBind_ErrorHandling() { s.plugin.PostBind(s.ctx, state, pod, "node-a") // Test with a pod that doesn't exist in the client - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) reserveStatus := s.plugin.Reserve(s.ctx, state, pod, "node-a") s.Require().True(reserveStatus.IsSuccess()) @@ -688,7 +703,7 @@ func (s *GPUResourcesSuite) TestFilter_ErrorHandling() { // No pre-filter call, so state is empty status := s.plugin.Filter(s.ctx, state, pod, nodeInfo) s.Error(status.AsError()) - s.Equal(framework.Error, status.Code()) + s.Equal(fwk.Error, status.Code()) } func (s *GPUResourcesSuite) TestScore_ErrorHandling() { @@ -704,13 +719,21 @@ func (s *GPUResourcesSuite) TestScore_ErrorHandling() { nodeInfo.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-a"}}) _, status := s.plugin.Score(s.ctx, state, pod, nodeInfo) s.Error(status.AsError()) - s.Equal(framework.Error, status.Code()) + s.Equal(fwk.Error, status.Code()) // Pre-filter, but for a different node nodeInfo = &framework.NodeInfo{} nodeInfo.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-c-non-existent"}}) - _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod) + _, preFilterStatus := s.plugin.PreFilter(s.ctx, state, pod, []fwk.NodeInfo{}) s.Require().True(preFilterStatus.IsSuccess()) _, status = s.plugin.Score(s.ctx, state, pod, nodeInfo) - s.Equal(framework.Unschedulable, status.Code()) + s.Equal(fwk.Unschedulable, status.Code()) +} + +func getPreFilterResult(state *framework.CycleState) []string { + data, err := state.Read(CycleStateGPUSchedulingResult) + if err != nil { + return nil + } + return lo.Keys(data.(*GPUSchedulingStateData).NodeGPUs) } diff --git a/internal/scheduler/gputopo/gpu_network_topo.go b/internal/scheduler/gputopo/gpu_network_topo.go index f481ea8c..197e3995 100644 --- a/internal/scheduler/gputopo/gpu_network_topo.go +++ b/internal/scheduler/gputopo/gpu_network_topo.go @@ -9,6 +9,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" + fwk "k8s.io/kube-scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -53,6 +54,6 @@ func (s *GPUNetworkTopologyAware) Name() string { return Name } -func (s *GPUNetworkTopologyAware) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { - return framework.NewStatus(framework.Success, "") +func (s *GPUNetworkTopologyAware) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status { + return fwk.NewStatus(fwk.Success, "") } diff --git a/internal/server/router/allocator_info.go b/internal/server/router/allocator_info.go index 7c8c4f78..58a949cf 100644 --- a/internal/server/router/allocator_info.go +++ b/internal/server/router/allocator_info.go @@ -17,6 +17,7 @@ import ( tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + fwk "k8s.io/kube-scheduler/framework" "k8s.io/kubernetes/pkg/scheduler" "k8s.io/kubernetes/pkg/scheduler/framework" "sigs.k8s.io/controller-runtime/pkg/log" @@ -104,20 +105,20 @@ func (r *AllocatorInfoRouter) SimulateScheduleOnePod(ctx *gin.Context) { state.SetRecordPluginMetrics(false) podsToActivate := framework.NewPodsToActivate() state.Write(framework.PodsToActivateKey, podsToActivate) - state.Write(framework.StateKey(constants.SchedulerSimulationKey), &gpuallocator.SimulateSchedulingFilterDetail{ + state.Write(fwk.StateKey(constants.SchedulerSimulationKey), &gpuallocator.SimulateSchedulingFilterDetail{ FilterStageDetails: []filter.FilterDetail{}, }) // simulate schedulingCycle non side effect part - fwk := r.scheduler.Profiles[pod.Spec.SchedulerName] - if fwk == nil { + fwkInstance := r.scheduler.Profiles[pod.Spec.SchedulerName] + if fwkInstance == nil { log.FromContext(ctx).Error(nil, "scheduler framework not found", "pod", pod.Name, "namespace", pod.Namespace) ctx.JSON(http.StatusInternalServerError, gin.H{"error": "scheduler framework not found"}) return } - scheduleResult, err := r.scheduler.SchedulePod(ctx, fwk, state, pod) + scheduleResult, err := r.scheduler.SchedulePod(ctx, fwkInstance, state, pod) gpuCycleState, _ := state.Read(gpuresources.CycleStateGPUSchedulingResult) - simulateSchedulingFilterDetail, _ := state.Read(framework.StateKey(constants.SchedulerSimulationKey)) + simulateSchedulingFilterDetail, _ := state.Read(fwk.StateKey(constants.SchedulerSimulationKey)) if err != nil { if fitError, ok := err.(*framework.FitError); ok { ctx.JSON(http.StatusOK, gin.H{ diff --git a/internal/utils/compose.go b/internal/utils/compose.go index e7170881..22752f45 100644 --- a/internal/utils/compose.go +++ b/internal/utils/compose.go @@ -16,6 +16,10 @@ import ( "k8s.io/utils/ptr" ) +var injectLibResource v1.ResourceList = v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("20m"), + v1.ResourceMemory: resource.MustParse("64Mi"), +} var nodeDiscoveryDefaultRequests v1.ResourceList = v1.ResourceList{ v1.ResourceCPU: resource.MustParse("20m"), v1.ResourceMemory: resource.MustParse("64Mi"), @@ -79,6 +83,8 @@ type TensorFusionInfo struct { // Pod mutating webhook can not get Pod UID sometimes, // thus need pod controller to set the owner reference PendingSetPodAsOwner bool + // DRA support + DRAEnabled bool } func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo) { @@ -113,6 +119,10 @@ func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo Tens pod.Annotations[constants.IsLocalGPUAnnotation] = strconv.FormatBool(tfInfo.Profile.IsLocalGPU) // add inject container annotation for client Pod, in case user doesn't specify it pod.Annotations[constants.InjectContainerAnnotation] = strings.Join(tfInfo.ContainerNames, ",") + // add DRA enabled annotation + if tfInfo.DRAEnabled { + pod.Annotations[constants.DRAEnabledAnnotation] = constants.TrueStringValue + } } func AppendTFWorkerLabelsAndAnnotationsAfterTemplate( @@ -175,6 +185,11 @@ func AddTFDefaultClientConfBeforePatch( MountPath: constants.TFLibsVolumeMountPath, }, }, + Resources: v1.ResourceRequirements{ + Requests: injectLibResource, + Limits: injectLibResource, + }, + Env: convertDisabledFeatures4InjectLib(pod.Annotations[constants.DisableFeaturesAnnotation]), }) pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{ Name: constants.TFLibsVolumeName, @@ -220,10 +235,9 @@ func AddTFDefaultClientConfBeforePatch( pod.Spec.Containers[injectContainerIndex].VolumeMounts = append( pod.Spec.Containers[injectContainerIndex].VolumeMounts, v1.VolumeMount{ - Name: constants.DataVolumeName, - MountPath: constants.SharedMemDeviceName, - SubPath: constants.SharedMemMountSubPath, - // + constants.TFLibsVolumeMountPath, SubPathExpr: constants.TFDataPathWorkerExpr, + Name: constants.DataVolumeName, + MountPath: constants.TFDataPath + constants.SharedMemMountSubPath, + SubPathExpr: constants.TFDataPathWorkerExpr, MountPropagation: ptr.To(v1.MountPropagationHostToContainer), }) @@ -302,22 +316,47 @@ func AddTFDefaultClientConfBeforePatch( } func convertDisabledFeaturesToEnvs(disabledFeatures string, envList []v1.EnvVar) []v1.EnvVar { - disabledFeaturesList := strings.Split(disabledFeatures, ",") - for _, feature := range disabledFeaturesList { + disabledFeaturesList := strings.SplitSeq(disabledFeatures, ",") + for feature := range disabledFeaturesList { if feat, ok := featureShortcutMap[feature]; ok { - envList = append(envList, v1.EnvVar{ - Name: feat.EnvName, - Value: feat.EnvValue, - }) + if !lo.ContainsBy(envList, func(item v1.EnvVar) bool { + return item.Name == feat.EnvName + }) { + envList = append(envList, v1.EnvVar{ + Name: feat.EnvName, + Value: feat.EnvValue, + }) + } } } return envList } +func convertDisabledFeatures4InjectLib(disabledFeatures string) []v1.EnvVar { + if disabledFeatures == "" { + return []v1.EnvVar{} + } + disabledFeaturesList := strings.SplitSeq(disabledFeatures, ",") + + // GPU limiter by-pass take effect in bootstrap stage, add special handling here + for feature := range disabledFeaturesList { + if feature == constants.BuiltInFeaturesGpuLimiter { + return []v1.EnvVar{ + { + Name: featureShortcutMap[feature].EnvName, + Value: featureShortcutMap[feature].EnvValue, + }, + } + } + } + return []v1.EnvVar{} +} + func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, pool *tfv1.GPUPool) { // Hypervisor needs to read /proc to map pod with processID spec.HostPID = true spec.TerminationGracePeriodSeconds = constants.GracefulPeriodSeconds + spec.PriorityClassName = constants.NodeCriticalPriorityClassName enableVector := pool.Spec.ComponentConfig.Hypervisor != nil && pool.Spec.ComponentConfig.Hypervisor.EnableVector @@ -430,8 +469,7 @@ func composeHypervisorContainer(spec *v1.PodSpec, pool *tfv1.GPUPool, enableVect spec.Containers[0].VolumeMounts = append(spec.Containers[0].VolumeMounts, v1.VolumeMount{ Name: constants.DataVolumeName, ReadOnly: false, - MountPath: constants.SharedMemDeviceName, - SubPath: constants.SharedMemMountSubPath, + MountPath: constants.TFDataPath, }, v1.VolumeMount{ Name: constants.TensorFusionGPUInfoConfigVolumeName, MountPath: constants.TensorFusionGPUInfoConfigMountPath, @@ -649,12 +687,9 @@ func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerCon spec.Containers[0].VolumeMounts = append( spec.Containers[0].VolumeMounts, v1.VolumeMount{ - Name: constants.DataVolumeName, - MountPath: constants.SharedMemDeviceName, - // TODO not working. - // + constants.TFLibsVolumeMountPath - // SubPathExpr: constants.TFDataPathWorkerExpr, - SubPath: constants.SharedMemMountSubPath, + Name: constants.DataVolumeName, + MountPath: constants.TFDataPath + constants.SharedMemMountSubPath, + SubPathExpr: constants.TFDataPathWorkerExpr, MountPropagation: ptr.To(v1.MountPropagationHostToContainer), }) spec.Containers[0].Env = append(spec.Containers[0].Env, v1.EnvVar{ diff --git a/internal/utils/reconcile.go b/internal/utils/reconcile.go index ebc091ac..23026cf7 100644 --- a/internal/utils/reconcile.go +++ b/internal/utils/reconcile.go @@ -214,6 +214,15 @@ func IsTensorFusionWorker(pod *corev1.Pod) bool { return pod.Labels[constants.LabelComponent] == constants.ComponentWorker } +func GetInitialGPUNodeSelector() []string { + selector := os.Getenv("INITIAL_GPU_NODE_LABEL_SELECTOR") + if selector == "" { + selector = constants.InitialGPUNodeSelector + } + selectors := strings.Split(selector, "=") + return selectors +} + var GPUResourceNames = []corev1.ResourceName{ "nvidia.com/gpu", "amd.com/gpu", diff --git a/internal/webhook/v1/pod_dra.go b/internal/webhook/v1/pod_dra.go new file mode 100644 index 00000000..6a55fc4f --- /dev/null +++ b/internal/webhook/v1/pod_dra.go @@ -0,0 +1,197 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + "context" + "fmt" + "strings" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" + "github.com/NexusGPU/tensor-fusion/internal/utils" +) + +// DRAProcessor handles all DRA-related operations for pod admission +type DRAProcessor struct { + client.Client + enableDRA bool + resourceClaimTemplateName string // cached ResourceClaimTemplate name + configLoaded bool // tracks if configuration has been loaded +} + +// NewDRAProcessor creates a new DRA processor +func NewDRAProcessor(client client.Client) *DRAProcessor { + return &DRAProcessor{ + Client: client, + enableDRA: false, + } +} + +// InitializeDRAConfig is kept for backward compatibility but now does nothing +// Configuration is loaded lazily on first use +func (p *DRAProcessor) InitializeDRAConfig(ctx context.Context) error { + // No-op - configuration is now loaded lazily + if p.configLoaded { + return nil + } + + // Set defaults first + p.enableDRA = false + p.resourceClaimTemplateName = constants.DRAResourceClaimTemplateName + + templateList := &tfv1.SchedulingConfigTemplateList{} + // Use the provided context to respect cancellation + err := p.List(ctx, templateList) + if err != nil { + // Log error but don't fail - fall back to defaults + // This allows webhook to work even if templates are unavailable + p.configLoaded = true + return nil + } + + // Check if any template has DRA enabled and cache the ResourceClaimTemplateName + for _, template := range templateList.Items { + if template.Spec.DRA != nil { + if template.Spec.DRA.Enable != nil && *template.Spec.DRA.Enable { + p.enableDRA = true + } + // Cache the ResourceClaimTemplateName from the template + if template.Spec.DRA.ResourceClaimTemplateName != "" { + p.resourceClaimTemplateName = template.Spec.DRA.ResourceClaimTemplateName + } + } + } + + p.configLoaded = true + return nil +} + +// IsDRAEnabled checks if DRA is enabled for a specific pod +func (p *DRAProcessor) IsDRAEnabled(ctx context.Context, pod *corev1.Pod) bool { + // Load configuration if not yet loaded (lazy loading) + if !p.configLoaded { + _ = p.InitializeDRAConfig(ctx) // Ignore error to maintain backward compatibility + } + + // Check pod-level annotation first (explicit override) + if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.TrueStringValue { + return true + } + + // Check pod-level annotation for explicit disable + if val, ok := pod.Annotations[constants.DRAEnabledAnnotation]; ok && val == constants.FalseStringValue { + return false + } + + // Fall back to global configuration + return p.enableDRA +} + +// HasDRAClaim checks if a pod has DRA ResourceClaim references +func HasDRAClaim(pod *corev1.Pod) bool { + return len(pod.Spec.ResourceClaims) > 0 +} + +// HandleDRAAdmission handles the complete DRA admission process +func (p *DRAProcessor) HandleDRAAdmission(ctx context.Context, pod *corev1.Pod, tfInfo *utils.TensorFusionInfo, containerIndices []int) error { + // Load DRA configuration if needed + if err := p.InitializeDRAConfig(ctx); err != nil { + return fmt.Errorf("failed to load DRA config: %w", err) + } + + // Convert GPU resources to ResourceClaimTemplate reference and store CEL in annotation + celSelector, err := BuildCELSelector(pod, tfInfo) + if err != nil { + return fmt.Errorf("failed to build CEL selector: %w", err) + } + + // Inject ResourceClaimTemplate reference to Pod + p.injectResourceClaimTemplateRef(pod) + + // Mark pod with DRA enabled annotation + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } + pod.Annotations[constants.DRAEnabledAnnotation] = constants.TrueStringValue + pod.Annotations[constants.DRACelExpressionAnnotation] = celSelector + + return nil +} + +// BuildCELSelector constructs a CEL expression for DRA device selection based on TensorFusion requirements +func BuildCELSelector(pod *corev1.Pod, tfInfo *utils.TensorFusionInfo) (string, error) { + var conditions []string + + // 1. GPU model filter (if specified - basic attribute that should be widely supported) + if tfInfo.Profile.GPUModel != "" { + conditions = append(conditions, fmt.Sprintf(`device.attributes["model"] == "%s"`, tfInfo.Profile.GPUModel)) + } + + // 2. GPU count requirement (important for multi-GPU workloads) + if tfInfo.Profile.GPUCount > 0 { + conditions = append(conditions, fmt.Sprintf(`size(devices) >= %d`, tfInfo.Profile.GPUCount)) + } + + // 3. Pool name filter (for resource isolation and scheduling preferences) + if tfInfo.Profile.PoolName != "" { + conditions = append(conditions, fmt.Sprintf(`device.attributes["pool_name"] == "%s"`, tfInfo.Profile.PoolName)) + } + + // 4. Pod namespace filter (for namespace-based device isolation) + if pod.Namespace != "" { + conditions = append(conditions, fmt.Sprintf(`device.attributes["pod_namespace"] == "%s"`, pod.Namespace)) + } + + // Return a basic condition if no specific requirements + if len(conditions) == 0 { + // Simple condition that should work with most DRA drivers + return `device.attributes.exists("type")`, nil + } + + return strings.Join(conditions, " && "), nil +} + +// injectResourceClaimTemplateRef adds ResourceClaimTemplate reference to Pod spec +func (p *DRAProcessor) injectResourceClaimTemplateRef(pod *corev1.Pod) { + // Add ResourceClaimTemplate reference to pod.Spec.ResourceClaims + if pod.Spec.ResourceClaims == nil { + pod.Spec.ResourceClaims = []corev1.PodResourceClaim{} + } + + // Use ResourceClaimTemplate instead of direct ResourceClaim + claimRef := corev1.PodResourceClaim{ + Name: constants.DRAClaimDefineName, + ResourceClaimTemplateName: &p.resourceClaimTemplateName, + } + + // Check if the claim reference already exists to maintain idempotency + claimExists := false + for _, existingClaim := range pod.Spec.ResourceClaims { + if existingClaim.Name == constants.DRAClaimDefineName { + claimExists = true + break + } + } + + if !claimExists { + pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, claimRef) + } +} diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go index 53610ffe..a7773ece 100644 --- a/internal/webhook/v1/pod_webhook.go +++ b/internal/webhook/v1/pod_webhook.go @@ -37,6 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing" "github.com/NexusGPU/tensor-fusion/internal/constants" "github.com/NexusGPU/tensor-fusion/internal/portallocator" "github.com/NexusGPU/tensor-fusion/internal/utils" @@ -46,17 +47,24 @@ import ( var httpClient = &http.Client{Timeout: 10 * time.Second} // SetupPodWebhookWithManager registers the webhook for Pod in the manager. -func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator) error { +func SetupPodWebhookWithManager(mgr ctrl.Manager, portAllocator *portallocator.PortAllocator, pricingProvider pricing.PricingProvider) error { webhookServer := mgr.GetWebhookServer() - webhookServer.Register("/mutate-v1-pod", - &admission.Webhook{ - Handler: &TensorFusionPodMutator{ - decoder: admission.NewDecoder(runtime.NewScheme()), - Client: mgr.GetClient(), - portAllocator: portAllocator, - }, - }) + // Initialize DRA processor + draProcessor := NewDRAProcessor(mgr.GetClient()) + if err := draProcessor.InitializeDRAConfig(context.Background()); err != nil { + return fmt.Errorf("failed to initialize DRA config: %w", err) + } + + // Initialize DRA setting from global configuration + mutator := &TensorFusionPodMutator{ + decoder: admission.NewDecoder(runtime.NewScheme()), + Client: mgr.GetClient(), + portAllocator: portAllocator, + draProcessor: draProcessor, + } + + webhookServer.Register("/mutate-v1-pod", &admission.Webhook{Handler: mutator}) return nil } @@ -64,6 +72,7 @@ type TensorFusionPodMutator struct { Client client.Client decoder admission.Decoder portAllocator *portallocator.PortAllocator + draProcessor *DRAProcessor } // Handle implements admission.Handler interface. @@ -100,7 +109,7 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque return admission.Errored(http.StatusBadRequest, fmt.Errorf("failed to marshal current pod: %w", err)) } - tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, pod) + tfInfo, err := ParseTensorFusionInfo(ctx, m.Client, m.draProcessor, pod) if err != nil { return admission.Errored(http.StatusInternalServerError, fmt.Errorf("parse tf resources: %w", err)) } @@ -159,16 +168,34 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque return admission.Allowed("no valid container to inject tensor-fusion, skipped") } - // Add defaults and tensor-fusion injection logic + // Handle DRA-specific processing if enabled + if tfInfo.DRAEnabled { + // Process DRA workload + if err := m.draProcessor.HandleDRAAdmission(ctx, pod, &tfInfo, containerIndices); err != nil { + return admission.Errored(http.StatusInternalServerError, fmt.Errorf("failed to handle DRA admission: %w", err)) + } + } + + // Common processing for both DRA and regular modes utils.AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod, tfInfo) utils.AddTFDefaultClientConfBeforePatch(ctx, pod, pool, tfInfo, containerIndices) + // Add priorityClass if contains higher QoS level and Pod priority class not specified + if pod.Spec.PriorityClassName == "" && + (tfInfo.Profile.Qos == tfv1.QoSHigh || tfInfo.Profile.Qos == tfv1.QoSCritical) { + pod.Spec.PriorityClassName = constants.TensorFusionSystemName + string(tfInfo.Profile.Qos) + } + // Inject initContainer and env variables patches, err := m.patchTFClient( - pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices, + ctx, pod, pool, tfInfo.Profile.IsLocalGPU, currentBytes, containerIndices, ) if err != nil { - log.Error(err, "failed to patch tf client", "pod", req.Name, "namespace", req.Namespace) + mode := "regular" + if tfInfo.DRAEnabled { + mode = "DRA" + } + log.Error(err, "failed to patch tf client", "mode", mode, "pod", req.Name, "namespace", req.Namespace) return admission.Errored(http.StatusInternalServerError, err) } @@ -266,6 +293,7 @@ func (m *TensorFusionPodMutator) createOrUpdateWorkload(ctx context.Context, pod } func (m *TensorFusionPodMutator) patchTFClient( + ctx context.Context, pod *corev1.Pod, pool *tfv1.GPUPool, isLocalGPU bool, @@ -392,7 +420,7 @@ func addConnectionForRemoteFixedReplicaVirtualGPU(pod *corev1.Pod, container *co if pod.GenerateName == "" && pod.Name != "" { prefix = pod.Name + constants.TFConnectionNamePrefix } else { - prefix = pod.GenerateName + constants.TFConnectionNamePrefix + prefix = pod.GenerateName + constants.TFConnectionNameNoPrefix } connectionName := fmt.Sprintf("%s%s", prefix, utils.NewShortID(10)) connectionNamespace := pod.Namespace @@ -516,16 +544,17 @@ func (m *TensorFusionPodMutator) assignClusterHostPortFromLeader(pod *corev1.Pod } func calculateQoSLevel(profile *tfv1.WorkloadProfileSpec, pool *tfv1.GPUPool) tfv1.QoSLevel { - sameReqLimits := profile.Resources.Limits.Tflops.Cmp(profile.Resources.Requests.Tflops) == 0 && - profile.Resources.Limits.Vram.Cmp(profile.Resources.Requests.Vram) == 0 - - // set to critical if req == limits, same logic as Kubernetes QoS - if sameReqLimits { - return constants.QoSLevelCritical - } - // when not set, assign default QoS if profile.Qos == "" { + sameReqLimits := profile.Resources.Limits.Tflops.Cmp(profile.Resources.Requests.Tflops) == 0 && + profile.Resources.Limits.Vram.Cmp(profile.Resources.Requests.Vram) == 0 + + // set to high if req == limits, same logic as Kubernetes QoS + // critical QoS can preempt other pods, have to be set manually + if sameReqLimits { + return constants.QoSLevelHigh + } + if pool.Spec.QosConfig == nil || pool.Spec.QosConfig.DefaultQoS == "" { return constants.QoSLevelMedium } diff --git a/internal/webhook/v1/pod_webhook_dra_test.go b/internal/webhook/v1/pod_webhook_dra_test.go new file mode 100644 index 00000000..e6fce827 --- /dev/null +++ b/internal/webhook/v1/pod_webhook_dra_test.go @@ -0,0 +1,393 @@ +package v1 + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/constants" + "github.com/NexusGPU/tensor-fusion/internal/utils" +) + +func TestDRAProcessor_IsDRAEnabled(t *testing.T) { + tests := []struct { + name string + processorDRA bool + podAnnotations map[string]string + expected bool + }{ + { + name: "global DRA enabled, no pod annotation", + processorDRA: true, + expected: true, + }, + { + name: "global DRA disabled, no pod annotation", + processorDRA: false, + expected: false, + }, + { + name: "global DRA disabled, pod annotation enabled", + processorDRA: false, + podAnnotations: map[string]string{ + constants.DRAEnabledAnnotation: constants.TrueStringValue, + }, + expected: true, + }, + { + name: "global DRA enabled, pod annotation disabled", + processorDRA: true, + podAnnotations: map[string]string{ + constants.DRAEnabledAnnotation: constants.FalseStringValue, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + processor := &DRAProcessor{ + enableDRA: tt.processorDRA, + configLoaded: true, // Skip config loading in tests + } + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: tt.podAnnotations, + }, + } + + result := processor.IsDRAEnabled(context.Background(), pod) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestDRAProcessor_HandleDRAAdmission(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, tfv1.AddToScheme(scheme)) + + // Create a SchedulingConfigTemplate with DRA config + template := &tfv1.SchedulingConfigTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-template", + }, + Spec: tfv1.SchedulingConfigTemplateSpec{ + DRA: &tfv1.DRAConfig{ + Enable: &[]bool{true}[0], + ResourceClaimTemplateName: "custom-gpu-template", + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(template). + Build() + + processor := &DRAProcessor{ + Client: fakeClient, + } + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "test-container"}, + }, + }, + } + + tfInfo := &utils.TensorFusionInfo{ + Profile: &tfv1.WorkloadProfileSpec{ + GPUCount: 1, + Resources: tfv1.Resources{ + Requests: tfv1.Resource{ + Tflops: resource.MustParse("10"), + Vram: resource.MustParse("8Gi"), + }, + }, + }, + } + + containerIndices := []int{0} + + // Test HandleDRAAdmission + err := processor.HandleDRAAdmission(context.Background(), pod, tfInfo, containerIndices) + require.NoError(t, err) + + // Verify CEL expression is stored in Pod annotation + celExpression := pod.Annotations[constants.DRACelExpressionAnnotation] + require.NotEmpty(t, celExpression) + assert.Contains(t, celExpression, `device.attributes["tflops"].quantity >= quantity("10")`) + assert.Contains(t, celExpression, `device.attributes["vram"].quantity >= quantity("8Gi")`) + + // Verify DRA enabled annotation is set + assert.Equal(t, constants.TrueStringValue, pod.Annotations[constants.DRAEnabledAnnotation]) + + // Verify ResourceClaimTemplate reference is added to Pod + require.Len(t, pod.Spec.ResourceClaims, 1) + podClaim := pod.Spec.ResourceClaims[0] + assert.Equal(t, constants.DRAClaimDefineName, podClaim.Name) + require.NotNil(t, podClaim.ResourceClaimTemplateName) + assert.Equal(t, "custom-gpu-template", *podClaim.ResourceClaimTemplateName) + + // Verify processor has cached the ResourceClaimTemplateName + assert.Equal(t, "custom-gpu-template", processor.resourceClaimTemplateName) +} + +func TestBuildCELSelector(t *testing.T) { + tests := []struct { + name string + pod *corev1.Pod + tfInfo *utils.TensorFusionInfo + expectedConditions []string + unexpectedConditions []string + }{ + { + name: "Basic resource filters", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + }, + }, + tfInfo: &utils.TensorFusionInfo{ + Profile: &tfv1.WorkloadProfileSpec{ + GPUCount: 2, + Resources: tfv1.Resources{ + Requests: tfv1.Resource{ + Tflops: resource.MustParse("20"), + Vram: resource.MustParse("16Gi"), + }, + }, + GPUModel: "H100", + }, + }, + expectedConditions: []string{ + `device.attributes["tflops"].quantity >= quantity("20")`, + `device.attributes["vram"].quantity >= quantity("16Gi")`, + `device.attributes["model"] == "H100"`, + `int(device.attributes["gpu_count"]) >= 2`, + `device.attributes["pod_namespace"] == "test-namespace"`, + }, + }, + { + name: "All filters including pool and workload", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "production", + }, + }, + tfInfo: &utils.TensorFusionInfo{ + Profile: &tfv1.WorkloadProfileSpec{ + GPUCount: 1, + Resources: tfv1.Resources{ + Requests: tfv1.Resource{ + Tflops: resource.MustParse("10"), + Vram: resource.MustParse("8Gi"), + }, + }, + GPUModel: "A100", + PoolName: "high-priority", + }, + WorkloadName: "ml-training-job", + }, + expectedConditions: []string{ + `device.attributes["tflops"].quantity >= quantity("10")`, + `device.attributes["vram"].quantity >= quantity("8Gi")`, + `device.attributes["model"] == "A100"`, + `int(device.attributes["gpu_count"]) >= 1`, + `device.attributes["pool_name"] == "high-priority"`, + `device.attributes["workload_name"] == "ml-training-job"`, + `device.attributes["workload_namespace"] == "production"`, + `device.attributes["pod_namespace"] == "production"`, + }, + }, + { + name: "Zero resources fallback to default condition", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + }, + tfInfo: &utils.TensorFusionInfo{ + Profile: &tfv1.WorkloadProfileSpec{ + GPUCount: 0, // Zero count should not add condition + Resources: tfv1.Resources{ + Requests: tfv1.Resource{ + // Zero resources + }, + }, + }, + }, + expectedConditions: []string{ + `device.attributes["pod_namespace"] == "default"`, + }, + }, + { + name: "Empty resources fallback to basic condition", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "", + }, + }, + tfInfo: &utils.TensorFusionInfo{ + Profile: &tfv1.WorkloadProfileSpec{ + // All empty/zero values + }, + }, + expectedConditions: []string{ + `device.attributes.exists("type")`, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + celExpression, err := BuildCELSelector(tt.pod, tt.tfInfo) + require.NoError(t, err) + require.NotEmpty(t, celExpression) + + // Verify expected conditions are present + for _, condition := range tt.expectedConditions { + assert.Contains(t, celExpression, condition, "Expected condition not found: %s", condition) + } + + // Verify unexpected conditions are not present + for _, condition := range tt.unexpectedConditions { + assert.NotContains(t, celExpression, condition, "Unexpected condition found: %s", condition) + } + + // Verify proper AND joining (unless it's the fallback condition) + if len(tt.expectedConditions) > 1 { + assert.Contains(t, celExpression, " && ", "Conditions should be joined with &&") + } + }) + } +} + +func TestHasDRAClaim(t *testing.T) { + tests := []struct { + name string + pod *corev1.Pod + expected bool + }{ + { + name: "pod with resource claims", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + ResourceClaims: []corev1.PodResourceClaim{ + {Name: "gpu-claim"}, + }, + }, + }, + expected: true, + }, + { + name: "pod without resource claims", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{}, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HasDRAClaim(tt.pod) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestDRAProcessor_LazyConfigLoading(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, tfv1.AddToScheme(scheme)) + + tests := []struct { + name string + templates []tfv1.SchedulingConfigTemplate + expected bool + }{ + { + name: "DRA enabled in template", + templates: []tfv1.SchedulingConfigTemplate{ + { + ObjectMeta: metav1.ObjectMeta{Name: "template1"}, + Spec: tfv1.SchedulingConfigTemplateSpec{ + DRA: &tfv1.DRAConfig{ + Enable: &[]bool{true}[0], + ResourceClaimTemplateName: "test-gpu-template", + }, + }, + }, + }, + expected: true, + }, + { + name: "DRA disabled in template", + templates: []tfv1.SchedulingConfigTemplate{ + { + ObjectMeta: metav1.ObjectMeta{Name: "template1"}, + Spec: tfv1.SchedulingConfigTemplateSpec{ + DRA: &tfv1.DRAConfig{ + Enable: &[]bool{false}[0], + }, + }, + }, + }, + expected: false, + }, + { + name: "no templates", + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := make([]client.Object, len(tt.templates)) + for i, template := range tt.templates { + objects[i] = &template + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + + processor := &DRAProcessor{ + Client: fakeClient, + } + + // Test lazy loading by calling a method that triggers config loading + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{}, + }, + } + + result := processor.IsDRAEnabled(context.Background(), pod) + assert.Equal(t, tt.expected, result) + + // Verify config was loaded + assert.True(t, processor.configLoaded) + }) + } +} diff --git a/internal/webhook/v1/pod_webhook_test.go b/internal/webhook/v1/pod_webhook_test.go index 55f29233..6f01b3ed 100644 --- a/internal/webhook/v1/pod_webhook_test.go +++ b/internal/webhook/v1/pod_webhook_test.go @@ -25,6 +25,7 @@ import ( tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" "github.com/NexusGPU/tensor-fusion/internal/config" "github.com/NexusGPU/tensor-fusion/internal/constants" + "github.com/NexusGPU/tensor-fusion/internal/gpuallocator" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/samber/lo" @@ -57,8 +58,9 @@ var _ = Describe("TensorFusionPodMutator", func() { decoder = admission.NewDecoder(scheme) mutator = &TensorFusionPodMutator{ - Client: k8sClient, - decoder: decoder, + Client: k8sClient, + decoder: decoder, + draProcessor: NewDRAProcessor(k8sClient), } }) @@ -240,6 +242,56 @@ var _ = Describe("TensorFusionPodMutator", func() { Expect(resp.Patches).To(BeEmpty()) }) + It("should handle dedicated GPU", func() { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-local-gpu", + Namespace: "default", + Labels: map[string]string{ + constants.TensorFusionEnabledLabelKey: "true", + }, + Annotations: map[string]string{ + constants.DedicatedGPUAnnotation: constants.TrueStringValue, + constants.GPUModelAnnotation: "A100", + constants.GpuPoolKey: "mock", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "main", + Image: "test-image", + }, + }, + }, + } + podBytes, err := json.Marshal(pod) + Expect(err).NotTo(HaveOccurred()) + req := admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Object: runtime.RawExtension{ + Raw: podBytes, + }, + Operation: admissionv1.Create, + Namespace: "default", + }, + } + + gpuallocator.GPUCapacityMap["A100"] = tfv1.Resource{ + Tflops: resource.MustParse("312"), + Vram: resource.MustParse("40Gi"), + } + resp := mutator.Handle(ctx, req) + Expect(resp.Allowed).To(BeTrue()) + + op, found := lo.Find(resp.Patches, func(patch jsonpatch.JsonPatchOperation) bool { + return patch.Operation == "add" && + patch.Path == "/metadata/annotations/tensor-fusion.ai~1tflops-request" + }) + Expect(found).To(BeTrue()) + Expect(op.Value).To(Equal("312")) + }) + It("should handle invalid pod specification", func() { req := admission.Request{ AdmissionRequest: admissionv1.AdmissionRequest{ @@ -532,7 +584,7 @@ var _ = Describe("TensorFusionPodMutator", func() { }, }, } - tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, pod) + tfInfo, err := ParseTensorFusionInfo(ctx, k8sClient, mutator.draProcessor, pod) Expect(err).NotTo(HaveOccurred()) Expect(tfInfo.ContainerNames).To(HaveLen(1)) Expect(tfInfo.ContainerNames[0]).To(Equal("test-container")) @@ -564,7 +616,7 @@ var _ = Describe("TensorFusionPodMutator", func() { currentBytes, err := json.Marshal(pod) Expect(err).NotTo(HaveOccurred()) - patch, err := mutator.patchTFClient(pod, pool, false, currentBytes, []int{0}) + patch, err := mutator.patchTFClient(context.Background(), pod, pool, false, currentBytes, []int{0}) Expect(err).NotTo(HaveOccurred()) Expect(patch).NotTo(BeEmpty()) // There should be at least 2 patches (initContainers and the container env patches) diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go index bf805b76..14f1ad3a 100644 --- a/internal/webhook/v1/tf_parser.go +++ b/internal/webhook/v1/tf_parser.go @@ -8,6 +8,7 @@ import ( tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" "github.com/NexusGPU/tensor-fusion/internal/constants" + "github.com/NexusGPU/tensor-fusion/internal/gpuallocator" "github.com/NexusGPU/tensor-fusion/internal/utils" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -28,6 +29,7 @@ type TFResource struct { func ParseTensorFusionInfo( ctx context.Context, k8sClient client.Client, + draProcessor *DRAProcessor, pod *corev1.Pod, ) (utils.TensorFusionInfo, error) { var info utils.TensorFusionInfo @@ -115,6 +117,16 @@ func ParseTensorFusionInfo( workloadProfile.Spec.GPUModel = gpuModel } + // Parse DRA enabled annotation + if draProcessor.IsDRAEnabled(ctx, pod) { + info.DRAEnabled = true + } + // Handle dedicated GPU logic + err = handleDedicatedGPU(pod, workloadProfile) + if err != nil { + return info, fmt.Errorf("handle dedicated GPU: %w", err) + } + info.Profile = &workloadProfile.Spec info.ContainerNames = containerNames return info, nil @@ -227,3 +239,29 @@ func setDefaultQuotasIfExists(workloadProfile *tfv1.WorkloadProfile, single tfv1 } } } + +// handleDedicatedGPU handles dedicated GPU annotation by setting full GPU capacity +func handleDedicatedGPU(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile) error { + dedicatedGPU, ok := pod.Annotations[constants.DedicatedGPUAnnotation] + if !ok || dedicatedGPU != constants.TrueStringValue { + return nil // Not a dedicated GPU request + } + + // Must have GPU model specified for dedicated GPU + if workloadProfile.Spec.GPUModel == "" { + return fmt.Errorf("dedicated GPU requires gpu-model annotation to be specified") + } + + // Get full GPU capacity from pricing provider + resource, found := gpuallocator.GPUCapacityMap[workloadProfile.Spec.GPUModel] + if !found { + return fmt.Errorf("could not find capacity information for GPU model: %s", workloadProfile.Spec.GPUModel) + } + + // Set full capacity for both requests and limits + workloadProfile.Spec.Resources.Requests.Tflops = resource.Tflops + workloadProfile.Spec.Resources.Requests.Vram = resource.Vram + workloadProfile.Spec.Resources.Limits.Tflops = resource.Tflops + workloadProfile.Spec.Resources.Limits.Vram = resource.Vram + return nil +} diff --git a/internal/webhook/v1/webhook_suite_test.go b/internal/webhook/v1/webhook_suite_test.go index 4e5d369b..26a6685d 100644 --- a/internal/webhook/v1/webhook_suite_test.go +++ b/internal/webhook/v1/webhook_suite_test.go @@ -27,6 +27,7 @@ import ( "time" tfv1 "github.com/NexusGPU/tensor-fusion/api/v1" + "github.com/NexusGPU/tensor-fusion/internal/cloudprovider/pricing" "github.com/NexusGPU/tensor-fusion/internal/config" "github.com/NexusGPU/tensor-fusion/internal/portallocator" . "github.com/onsi/ginkgo/v2" @@ -134,11 +135,13 @@ var _ = BeforeSuite(func() { }) Expect(err).NotTo(HaveOccurred()) + // Create a mock pricing provider for testing + mockPricingProvider := &pricing.StaticPricingProvider{} err = SetupPodWebhookWithManager(mgr, &portallocator.PortAllocator{ PortRangeStartCluster: 42000, PortRangeEndCluster: 62000, BitmapCluster: make([]uint64, (62000-42000)/64+1), - }) + }, mockPricingProvider) Expect(err).NotTo(HaveOccurred()) // +kubebuilder:scaffold:webhook diff --git a/patches/scheduler-csi-capacity-3.patch b/patches/scheduler-csi-capacity-3.patch index 29a21ae8..c5841d08 100644 --- a/patches/scheduler-csi-capacity-3.patch +++ b/patches/scheduler-csi-capacity-3.patch @@ -9,11 +9,11 @@ "strings" "time" -@@ -514,6 +516,14 @@ +@@ -543,6 +545,14 @@ } handlers = append(handlers, handlerRegistration) - case framework.CSIStorageCapacity: -+ // FIX kubernetes 1.24 and lower version API missing issue + case fwk.CSIStorageCapacity: ++ // FIX kubernetes 1.23 and lower version API missing issue + minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR") + if minorVersionStr != "" { + minorVersion, err := strconv.Atoi(minorVersionStr) @@ -22,5 +22,50 @@ + } + } if handlerRegistration, err = informerFactory.Storage().V1().CSIStorageCapacities().Informer().AddEventHandler( - buildEvtResHandler(at, framework.CSIStorageCapacity), + buildEvtResHandler(at, fwk.CSIStorageCapacity), ); err != nil { +@@ -578,6 +588,14 @@ + } + handlers = append(handlers, handlerRegistration) + case fwk.ResourceClaim: ++ // FIX kubernetes lower version API missing issue ++ minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR") ++ if minorVersionStr != "" { ++ minorVersion, err := strconv.Atoi(minorVersionStr) ++ if err != nil || minorVersion < 34 { ++ continue ++ } ++ } + if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { + handlerRegistration = resourceClaimCache.AddEventHandler( + buildEvtResHandler(at, fwk.ResourceClaim), +@@ -585,6 +603,14 @@ + handlers = append(handlers, handlerRegistration) + } + case fwk.ResourceSlice: ++ // FIX kubernetes lower version API missing issue ++ minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR") ++ if minorVersionStr != "" { ++ minorVersion, err := strconv.Atoi(minorVersionStr) ++ if err != nil || minorVersion < 34 { ++ continue ++ } ++ } + if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { + if handlerRegistration, err = resourceSliceTracker.AddEventHandler( + buildEvtResHandler(at, fwk.ResourceSlice), +@@ -594,6 +620,14 @@ + handlers = append(handlers, handlerRegistration) + } + case fwk.DeviceClass: ++ // FIX kubernetes lower version API missing issue ++ minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR") ++ if minorVersionStr != "" { ++ minorVersion, err := strconv.Atoi(minorVersionStr) ++ if err != nil || minorVersion < 34 { ++ continue ++ } ++ } + if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { + if handlerRegistration, err = informerFactory.Resource().V1().DeviceClasses().Informer().AddEventHandler( + buildEvtResHandler(at, fwk.DeviceClass), diff --git a/patches/scheduler-pdb-1.patch b/patches/scheduler-pdb-1.patch index ae9b966e..3a35e841 100644 --- a/patches/scheduler-pdb-1.patch +++ b/patches/scheduler-pdb-1.patch @@ -1,16 +1,38 @@ ---- ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go 2025-08-06 17:45:27 -+++ ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go 2025-08-06 17:45:19 -@@ -20,7 +20,9 @@ +--- ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go 2025-09-15 17:45:27 ++++ ../vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go 2025-09-15 17:45:19 +@@ -20,7 +20,10 @@ "context" "fmt" "math/rand" + "os" "sort" + "strconv" ++ "time" v1 "k8s.io/api/core/v1" policy "k8s.io/api/policy/v1" -@@ -364,5 +366,13 @@ +@@ -119,6 +122,20 @@ + // Default behavior: No additional filtering, beyond the internal requirement that the victim pod + // have lower priority than the preemptor pod. + pl.IsEligiblePod = func(nodeInfo fwk.NodeInfo, victim fwk.PodInfo, preemptor *v1.Pod) bool { ++ victimAnnotation := victim.GetPod().Annotations ++ if victimAnnotation == nil { ++ return true ++ } ++ if protectionPeriod, ok := victimAnnotation["tensor-fusion.ai/eviction-protection"]; ok { ++ duration, err := time.ParseDuration(protectionPeriod) ++ if err != nil { ++ return true ++ } ++ // Still in protection period, not allow to preempt ++ if time.Now().Before(victim.GetPod().CreationTimestamp.Add(duration)) { ++ return false ++ } ++ } + return true + } + +@@ -430,5 +447,13 @@ } func getPDBLister(informerFactory informers.SharedInformerFactory) policylisters.PodDisruptionBudgetLister { @@ -24,3 +46,4 @@ + } return informerFactory.Policy().V1().PodDisruptionBudgets().Lister() } + \ No newline at end of file diff --git a/patches/scheduler-pdb-2.patch b/patches/scheduler-pdb-2.patch index 810bb0c6..12af371e 100644 --- a/patches/scheduler-pdb-2.patch +++ b/patches/scheduler-pdb-2.patch @@ -9,22 +9,14 @@ "sync" "sync/atomic" "time" -@@ -34,6 +36,7 @@ - "k8s.io/apimachinery/pkg/util/sets" - corelisters "k8s.io/client-go/listers/core/v1" - policylisters "k8s.io/client-go/listers/policy/v1" -+ policyv1 "k8s.io/client-go/listers/policy/v1" - corev1helpers "k8s.io/component-helpers/scheduling/corev1" - "k8s.io/klog/v2" - extenderv1 "k8s.io/kube-scheduler/extender/v1" -@@ -145,7 +148,16 @@ +@@ -148,8 +150,17 @@ func NewEvaluator(pluginName string, fh framework.Handle, i Interface, enableAsyncPreemption bool) *Evaluator { podLister := fh.SharedInformerFactory().Core().V1().Pods().Lister() - pdbLister := fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister() -+ + + // FIX kubernetes 1.21 and lower version API missing issue -+ var pdbLister policyv1.PodDisruptionBudgetLister ++ var pdbLister policylisters.PodDisruptionBudgetLister + minorVersionStr := os.Getenv("KUBE_API_VERSION_MINOR") + if minorVersionStr != "" { + minorVersion, err := strconv.Atoi(minorVersionStr) @@ -32,6 +24,7 @@ + pdbLister = fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister() + } + } - ++ ev := &Evaluator{ PluginName: pluginName, + Handler: fh, diff --git a/scripts/patch-scheduler.sh b/scripts/patch-scheduler.sh index 9afbc43b..23c2837d 100755 --- a/scripts/patch-scheduler.sh +++ b/scripts/patch-scheduler.sh @@ -1,6 +1,10 @@ #!/bin/bash git apply ./patches/scheduler-csi-capacity-1.patch git apply ./patches/scheduler-csi-capacity-2.patch + +# diff -u eventhandlers.go eventhandlers-new.go > changes.patch git apply ./patches/scheduler-csi-capacity-3.patch git apply ./patches/scheduler-pdb-1.patch + +# diff -u original_file.go modified_file.go > changes.patch git apply ./patches/scheduler-pdb-2.patch \ No newline at end of file diff --git a/test/sched/gpufit_bench_test.go b/test/sched/gpufit_bench_test.go index 20be047e..147d31e8 100644 --- a/test/sched/gpufit_bench_test.go +++ b/test/sched/gpufit_bench_test.go @@ -20,7 +20,6 @@ func BenchmarkGPUFitPlugin(b *testing.B) { NumNodes: 500, NumGPUs: 3000, NumPods: 10000, - BatchSize: 1, PoolName: "test-pool", Namespace: "test-ns", Timeout: 5 * time.Minute, @@ -42,7 +41,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) { break } testPod := fixture.pods[i] - fixture.plugin.PreFilter(fixture.ctx, state, testPod) + fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil) filterResult, err := state.Read(gpuResourceFitPlugin.CycleStateGPUSchedulingResult) if err != nil { b.Fatal(err) @@ -82,7 +81,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) { b.Run("Filter", func(b *testing.B) { state := framework.NewCycleState() - fixture.plugin.PreFilter(fixture.ctx, state, testPod) + fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil) nodeInfo := &framework.NodeInfo{} b.ResetTimer() @@ -94,7 +93,7 @@ func BenchmarkGPUFitPlugin(b *testing.B) { b.Run("Score", func(b *testing.B) { state := framework.NewCycleState() - fixture.plugin.PreFilter(fixture.ctx, state, testPod) + fixture.plugin.PreFilter(fixture.ctx, state, testPod, nil) nodeInfo := &framework.NodeInfo{} b.ResetTimer() diff --git a/test/sched/preemption_test.go b/test/sched/preemption_test.go new file mode 100644 index 00000000..1715d61b --- /dev/null +++ b/test/sched/preemption_test.go @@ -0,0 +1,299 @@ +package sched + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/NexusGPU/tensor-fusion/cmd/sched" + "github.com/NexusGPU/tensor-fusion/internal/constants" + gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources" + gpuTopoPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gputopo" + "github.com/NexusGPU/tensor-fusion/internal/utils" + "github.com/stretchr/testify/require" + "go.uber.org/zap/zapcore" + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/klog/v2" + "k8s.io/kubernetes/cmd/kube-scheduler/app" + "k8s.io/kubernetes/pkg/scheduler" + st "k8s.io/kubernetes/pkg/scheduler/testing" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +// PreemptionTestSuite holds common test setup for preemption tests +type PreemptionTestSuite struct { + ctx context.Context + cancel context.CancelFunc + k8sClient client.Client + scheduler *scheduler.Scheduler + fixture *BenchmarkFixture + testEnv *envtest.Environment + kubeconfigPath string +} + +// SetupSuite initializes the test environment for preemption tests +func (pts *PreemptionTestSuite) SetupSuite(t *testing.T) { + klog.SetLogger(zap.New(zap.WriteTo(discardWriter{}), zap.UseDevMode(false), zap.Level(zapcore.InfoLevel))) + + // Setup test environment + ver, cfg, err := setupKubernetes() + require.NoError(t, err) + pts.testEnv = testEnv + + kubeconfigPath, err := writeKubeconfigToTempFileAndSetEnv(cfg) + require.NoError(t, err) + pts.kubeconfigPath = kubeconfigPath + + k8sClient, err := client.New(cfg, client.Options{Scheme: scheme.Scheme}) + require.NoError(t, err) + pts.k8sClient = k8sClient + + // Configure test with limited resources for preemption scenarios + benchConfig := BenchmarkConfig{ + NumNodes: 2, + NumGPUs: 4, + PoolName: "preemption-test-pool", + Namespace: "preemption-test-ns", + Timeout: 1 * time.Minute, + } + + mockBench := &testing.B{} + fixture := NewBenchmarkFixture(mockBench, benchConfig, k8sClient, true) + pts.fixture = fixture + + utils.SetProgressiveMigration(false) + + gpuResourceFitOpt := app.WithPlugin( + gpuResourceFitPlugin.Name, + gpuResourceFitPlugin.NewWithDeps(fixture.allocator, fixture.client), + ) + gpuTopoOpt := app.WithPlugin( + gpuTopoPlugin.Name, + gpuTopoPlugin.NewWithDeps(fixture.allocator, fixture.client), + ) + + ctx, cancel := context.WithCancel(context.Background()) + pts.ctx = ctx + pts.cancel = cancel + + cc, scheduler, err := sched.SetupScheduler(ctx, nil, + "../../config/samples/scheduler-config.yaml", true, ver, gpuResourceFitOpt, gpuTopoOpt) + require.NoError(t, err) + pts.scheduler = scheduler + scheduler.SchedulingQueue.Run(klog.FromContext(ctx)) + + // Start scheduler components + cc.EventBroadcaster.StartRecordingToSink(ctx.Done()) + cc.InformerFactory.Start(ctx.Done()) + cc.InformerFactory.WaitForCacheSync(ctx.Done()) + require.NoError(t, scheduler.WaitForHandlersSync(ctx)) +} + +// TearDownSuite cleans up the test environment +func (pts *PreemptionTestSuite) TearDownSuite(t *testing.T) { + if pts.cancel != nil { + pts.cancel() + } + if pts.fixture != nil { + pts.fixture.Close() + } + if pts.kubeconfigPath != "" { + require.NoError(t, cleanupKubeconfigTempFile(pts.kubeconfigPath)) + } + if pts.testEnv != nil { + require.NoError(t, pts.testEnv.Stop()) + } +} + +// discardWriter implements io.Writer to discard log output during tests +type discardWriter struct{} + +func (discardWriter) Write(p []byte) (n int, err error) { + return len(p), nil +} + +// TestPreemption tests comprehensive preemption scenarios +func TestPreemption(t *testing.T) { + suite := &PreemptionTestSuite{} + suite.SetupSuite(t) + defer suite.TearDownSuite(t) + testGPUResourcePreemption(t, suite) +} + +// TestPreemptionEvictProtection tests comprehensive preemption scenarios +func TestPreemptionEvictProtection(t *testing.T) { + suite := &PreemptionTestSuite{} + suite.SetupSuite(t) + defer suite.TearDownSuite(t) + testGPUResourceEvictProtection(t, suite) +} + +// testGPUResourcePreemption tests GPU shortage detection logic +func testGPUResourcePreemption(t *testing.T, suite *PreemptionTestSuite) { + // Mock cluster resources + // {"2250", "141Gi"}, // Simulate B200 + // {"989", "80Gi"}, // Simulate H100 + // {"450", "48Gi"}, // Simulate L40s + // {"312", "40Gi"}, // Simulate A100 + + // Create pods that will exhaust resources + toBeVictimPods := createPreemptionTestPodsWithQoS("victim", constants.QoSLevelMedium, 7+3+1+1, "300", "1Gi") + + for _, pod := range toBeVictimPods { + require.NoError(t, suite.k8sClient.Create(suite.ctx, pod)) + defer func() { + _ = suite.k8sClient.Delete(suite.ctx, pod) + }() + } + + // Try scheduling all pending pods + for range 12 { + suite.scheduler.ScheduleOne(suite.ctx) + } + + // schedule high priority pod + highPriorityPod := createPreemptionTestPodsWithQoS("high-priority", constants.QoSLevelHigh, 1, "300", "1Gi")[0] + require.NoError(t, suite.k8sClient.Create(suite.ctx, highPriorityPod)) + defer func() { + _ = suite.k8sClient.Delete(suite.ctx, highPriorityPod) + }() + + suite.scheduler.ScheduleOne(suite.ctx) + + // schedule critical priority pod + criticalPriorityPod := createPreemptionTestPodsWithQoS( + "critical-priority", constants.QoSLevelCritical, 1, "300", "1Gi")[0] + require.NoError(t, suite.k8sClient.Create(suite.ctx, criticalPriorityPod)) + defer func() { + _ = suite.k8sClient.Delete(suite.ctx, criticalPriorityPod) + }() + suite.scheduler.ScheduleOne(suite.ctx) + + // Preemption should be triggered and victims deleted, wait informer sync + time.Sleep(1 * time.Second) + + podList := &v1.PodList{} + err := suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"}) + require.NoError(t, err) + scheduledNodeMap := make(map[string]string) + for _, pod := range podList.Items { + scheduledNodeMap[pod.Name] = pod.Spec.NodeName + } + // 2 Pods deleted, 14 - 2 = 12 + require.Equal(t, 12, len(podList.Items)) + + // without Pod Controller, directly reconcile all state to simulate the Pod deletion + suite.fixture.allocator.ReconcileAllocationStateForTesting() + + // Trigger next 2 scheduling cycle, make sure the two higher priority pods are scheduled + suite.scheduler.ScheduleOne(suite.ctx) + suite.scheduler.ScheduleOne(suite.ctx) + + time.Sleep(1 * time.Second) + + err = suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"}) + require.NoError(t, err) + for _, pod := range podList.Items { + if strings.Contains(pod.Name, "victim") { + continue + } + scheduledNodeMap[pod.Name] = pod.Spec.NodeName + } + // not empty indicates the high priority pod is scheduled + require.NotEmpty(t, scheduledNodeMap["high-priority-0"]) + require.NotEmpty(t, scheduledNodeMap["critical-priority-0"]) +} + +func testGPUResourceEvictProtection(t *testing.T, suite *PreemptionTestSuite) { + toBeVictimPods := createPreemptionTestPodsWithQoS("victim", constants.QoSLevelMedium, 1, "2000", "2Gi") + toBeVictimPods[0].Annotations[constants.EvictionProtectionAnnotation] = "2s" + require.NoError(t, suite.k8sClient.Create(suite.ctx, toBeVictimPods[0])) + defer func() { + _ = suite.k8sClient.Delete(suite.ctx, toBeVictimPods[0]) + }() + + suite.scheduler.ScheduleOne(suite.ctx) + + toBeVictimPods = createPreemptionTestPodsWithQoS("high-priority", constants.QoSLevelHigh, 1, "2000", "2Gi") + require.NoError(t, suite.k8sClient.Create(suite.ctx, toBeVictimPods[0])) + defer func() { + _ = suite.k8sClient.Delete(suite.ctx, toBeVictimPods[0]) + }() + + // should not evict since it's inside protection period + suite.scheduler.ScheduleOne(suite.ctx) + + podList := &v1.PodList{} + err := suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"}) + require.NoError(t, err) + require.Equal(t, 2, len(podList.Items)) + + // should evict since protection period over + time.Sleep(2 * time.Second) + suite.scheduler.ScheduleOne(suite.ctx) + + suite.fixture.allocator.ReconcileAllocationStateForTesting() + + // Should schedule the new high priority pod + suite.scheduler.ScheduleOne(suite.ctx) + // waiting for binding cycle take effect + time.Sleep(300 * time.Millisecond) + + podList = &v1.PodList{} + err = suite.k8sClient.List(suite.ctx, podList, &client.ListOptions{Namespace: "preemption-test-ns"}) + require.NoError(t, err) + require.Equal(t, 1, len(podList.Items)) + require.Equal(t, "high-priority-0", podList.Items[0].Name) + require.Equal(t, "node-0", podList.Items[0].Spec.NodeName) +} + +// Helper functions +func createPreemptionTestPodsWithQoS(baseName, qosLevel string, count int, tflops, vram string) []*v1.Pod { + pods := make([]*v1.Pod, count) + for i := 0; i < count; i++ { + pod := st.MakePod(). + Namespace("preemption-test-ns"). + Name(fmt.Sprintf("%s-%d", baseName, i)). + UID(fmt.Sprintf("%s-%d", baseName, i)). + SchedulerName("tensor-fusion-scheduler"). + Res(map[v1.ResourceName]string{ + v1.ResourceCPU: "100m", + v1.ResourceMemory: "256Mi", + }). + Toleration("node.kubernetes.io/not-ready"). + ZeroTerminationGracePeriod().Obj() + + pod.Labels = map[string]string{ + constants.LabelComponent: constants.ComponentWorker, + constants.WorkloadKey: "test-workload", + } + + pod.Annotations = map[string]string{ + constants.GpuPoolKey: "preemption-test-pool", + constants.QoSLevelAnnotation: qosLevel, + constants.TFLOPSRequestAnnotation: tflops, + constants.VRAMRequestAnnotation: vram, + constants.TFLOPSLimitAnnotation: tflops, + constants.VRAMLimitAnnotation: vram, + constants.GpuCountAnnotation: "1", + } + pod.Spec.PriorityClassName = "tensor-fusion-" + qosLevel + + pods[i] = pod + } + return pods +} + +// func createPreemptionTestPodsWithEvictionProtection( +// namespace, baseName, qosLevel, protectionDuration string, count int, tflops, vram string) []*v1.Pod { +// pods := createPreemptionTestPodsWithQoS(namespace, baseName, qosLevel, count, tflops, vram) +// for _, pod := range pods { +// pod.Annotations[constants.EvictionProtectionAnnotation] = protectionDuration +// } +// return pods +// } diff --git a/test/sched/scheduler_bench_test.go b/test/sched/scheduler_bench_test.go index 65f43a13..bbed548f 100644 --- a/test/sched/scheduler_bench_test.go +++ b/test/sched/scheduler_bench_test.go @@ -6,14 +6,18 @@ import ( "os" "path/filepath" "runtime" + "strings" "testing" "time" "github.com/NexusGPU/tensor-fusion/cmd/sched" + "github.com/NexusGPU/tensor-fusion/internal/constants" gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources" gpuTopoPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gputopo" "github.com/NexusGPU/tensor-fusion/internal/utils" "go.uber.org/zap/zapcore" + "k8s.io/apimachinery/pkg/util/version" + "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -32,16 +36,27 @@ func defaultBenchmarkConfig() BenchmarkConfig { NumNodes: 1000, NumGPUs: 4000, NumPods: 10000, - BatchSize: 100, PoolName: "benchmark-pool", Namespace: "benchmark-ns", - Timeout: 10 * time.Minute, + Timeout: 5 * time.Minute, } } var testEnv *envtest.Environment -func setupKubernetes() (*rest.Config, error) { +func setupKubernetes() (*version.Version, *rest.Config, error) { + // export ENVTEST_K8S_VERSION=1.34.0 + // Run `./bin/setup-envtest use ${ENVTEST_K8S_VERSION} --bin-dir ./bin` before running the test + k8sVersion := os.Getenv("ENVTEST_K8S_VERSION") + if k8sVersion == "" { + k8sVersion = "1.31.0" + } + majorVersion := k8sVersion[:strings.Index(k8sVersion, ".")] + minorVersion := k8sVersion[strings.Index(k8sVersion, ".")+1 : strings.LastIndex(k8sVersion, ".")] + _ = os.Setenv(constants.KubeApiVersionMajorEnv, majorVersion) + _ = os.Setenv(constants.KubeApiVersionMinorEnv, minorVersion) + ver := version.MustParse(k8sVersion) + _ = feature.DefaultMutableFeatureGate.SetEmulationVersion(ver) testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{ filepath.Join("..", "..", "config", "crd", "bases"), @@ -49,15 +64,14 @@ func setupKubernetes() (*rest.Config, error) { }, ErrorIfCRDPathMissing: true, - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", - fmt.Sprintf("1.31.0-%s-%s", runtime.GOOS, runtime.GOARCH)), + fmt.Sprintf("%s-%s-%s", k8sVersion, runtime.GOOS, runtime.GOARCH)), + } + cfg, err := testEnv.Start() + if err != nil { + return nil, nil, err } - return testEnv.Start() + return ver, cfg, nil } // Estimated Performance: 400-500 pods/second for 1K nodes, 10K Pods cluster on Mac M4 Pro @@ -65,7 +79,7 @@ func setupKubernetes() (*rest.Config, error) { func BenchmarkScheduler(b *testing.B) { klog.SetLogger(zap.New(zap.WriteTo(os.Stderr), zap.UseDevMode(false), zap.Level(zapcore.ErrorLevel))) // Setup phase - runs once before all benchmark iterations - cfg, err := setupKubernetes() + ver, cfg, err := setupKubernetes() if err != nil { b.Fatal(err) } @@ -99,7 +113,7 @@ func BenchmarkScheduler(b *testing.B) { testCtx := ctx cc, scheduler, err := sched.SetupScheduler(testCtx, nil, - "../../config/samples/scheduler-config.yaml", true, gpuResourceFitOpt, gpuTopoOpt) + "../../config/samples/scheduler-config.yaml", true, ver, gpuResourceFitOpt, gpuTopoOpt) if err != nil { b.Fatal(err) } diff --git a/test/sched/setup.go b/test/sched/setup.go index 03e40bfa..5dc80e32 100644 --- a/test/sched/setup.go +++ b/test/sched/setup.go @@ -14,21 +14,27 @@ import ( gpuResourceFitPlugin "github.com/NexusGPU/tensor-fusion/internal/scheduler/gpuresources" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" + schedv1 "k8s.io/api/scheduling/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + informers "k8s.io/client-go/informers" + clientsetfake "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/events" "k8s.io/klog/v2" + internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache" + internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue" "k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort" frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + "k8s.io/kubernetes/pkg/scheduler/metrics" st "k8s.io/kubernetes/pkg/scheduler/testing" tf "k8s.io/kubernetes/pkg/scheduler/testing/framework" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - testutil "sigs.k8s.io/scheduler-plugins/test/util" ) // BenchmarkConfig holds benchmark configuration @@ -36,7 +42,6 @@ type BenchmarkConfig struct { NumNodes int NumGPUs int NumPods int - BatchSize int PoolName string Namespace string Timeout time.Duration @@ -85,23 +90,35 @@ func NewBenchmarkFixture( b.Logf("%d Pods created, Needed TFLOPS: %f, Needed VRAM: %f", len(pods), neededTflops, neededVRAM) // Batch create resources for better performance - batchCreateResources(b, ctx, client, nodes, gpus, pods, realAPIServer) + k8sNativeObjects := batchCreateResources(b, ctx, client, config.Namespace, nodes, gpus, pods, realAPIServer) // Setup allocator allocator := setupAllocator(b, ctx, client) // Setup framework and plugin - fwk, plugin := setupFrameworkAndPlugin(b, ctx, client, allocator, pods, nodes) - - return &BenchmarkFixture{ - ctx: ctx, - cancel: cancel, - plugin: plugin, - nodes: nodes, - pods: pods, - allocator: allocator, - client: client, - fwk: fwk, + if !realAPIServer { + fwk, plugin := setupFrameworkAndPlugin(b, ctx, client, allocator, k8sNativeObjects) + return &BenchmarkFixture{ + ctx: ctx, + cancel: cancel, + plugin: plugin, + nodes: nodes, + pods: pods, + allocator: allocator, + client: client, + fwk: fwk, + } + } else { + return &BenchmarkFixture{ + ctx: ctx, + cancel: cancel, + plugin: nil, + nodes: nodes, + pods: pods, + allocator: allocator, + client: client, + fwk: nil, + } } } @@ -162,10 +179,10 @@ func generateGPUs(totalGPUs int, nodes []*v1.Node, poolName string) ([]*tfv1.GPU // Pre-define GPU specs to avoid repeated allocations gpuSpecs := []struct{ tflops, vram string }{ - {"2250", "141Gi"}, // High-end - {"989", "80Gi"}, // Mid-range - {"450", "48Gi"}, // Entry-level - {"312", "40Gi"}, // Budget + {"2250", "141Gi"}, // Simulate B200 + {"989", "80Gi"}, // Simulate H100 + {"450", "48Gi"}, // Simulate L40s + {"312", "40Gi"}, // Simulate A100 } gpuIndex := 0 @@ -271,11 +288,27 @@ func generatePods(count int, namespace, poolName string) ([]*v1.Pod, float64, fl // Helper functions for setup func batchCreateResources( - b *testing.B, ctx context.Context, client client.Client, + b *testing.B, ctx context.Context, client client.Client, namespace string, nodes []*v1.Node, gpus []*tfv1.GPU, pods []*v1.Pod, realAPIServer bool, -) { +) []runtime.Object { + // Create priority classes + require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelCritical}, + Value: 100000, + })) + require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelHigh}, + Value: 10000, + })) + require.NoError(b, client.Create(ctx, &schedv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{Name: "tensor-fusion-" + constants.QoSLevelMedium}, + Value: 100, + PreemptionPolicy: ptr.To(v1.PreemptNever), + })) + + k8sObjs := []runtime.Object{} require.NoError(b, client.Create(ctx, &v1.Namespace{ - ObjectMeta: metav1.ObjectMeta{Name: "benchmark-ns"}, + ObjectMeta: metav1.ObjectMeta{Name: namespace}, })) timer := time.Now() @@ -283,6 +316,7 @@ func batchCreateResources( for _, node := range nodes { nodeCopy := node.DeepCopy() require.NoError(b, client.Create(ctx, nodeCopy)) + k8sObjs = append(k8sObjs, nodeCopy) if realAPIServer { node.ResourceVersion = nodeCopy.ResourceVersion @@ -310,13 +344,15 @@ func batchCreateResources( b.Logf("Creating %d pods", len(pods)) for _, pod := range pods { require.NoError(b, client.Create(ctx, pod)) + k8sObjs = append(k8sObjs, pod) } b.Logf("%d pods created, duration: %v", len(pods), time.Since(timer)) + return k8sObjs } func setupFrameworkAndPlugin( b *testing.B, ctx context.Context, client client.Client, - allocator *gpuallocator.GpuAllocator, pods []*v1.Pod, nodes []*v1.Node, + allocator *gpuallocator.GpuAllocator, k8sObjs []runtime.Object, ) (framework.Framework, *gpuResourceFitPlugin.GPUFit) { // Register plugins including our GPU plugin registeredPlugins := []tf.RegisterPluginFunc{ @@ -324,11 +360,16 @@ func setupFrameworkAndPlugin( tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), } - // Create framework - fwk, err := tf.NewFramework(ctx, registeredPlugins, "", - frameworkruntime.WithPodNominator(testutil.NewPodNominator(nil)), - frameworkruntime.WithSnapshotSharedLister(testutil.NewFakeSharedLister(pods, nodes)), + fakeClientSet := clientsetfake.NewSimpleClientset(k8sObjs...) + informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0) + metrics.Register() + metricsRecorder := metrics.NewMetricsAsyncRecorder(1000, time.Second, ctx.Done()) + fwk, err := tf.NewFramework( + ctx, registeredPlugins, "", + frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)), + frameworkruntime.WithSnapshotSharedLister(internalcache.NewEmptySnapshot()), frameworkruntime.WithEventRecorder(&events.FakeRecorder{}), + frameworkruntime.WithMetricsRecorder(metricsRecorder), ) require.NoError(b, err)