From 840b71f794911b437f645e6fbcb62e986cb2b2a7 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 11 Feb 2026 13:35:15 +0800 Subject: [PATCH 1/6] fix: support device allocation for multi-container pods with init containers Signed-off-by: Tim --- .../nvidiadevice/nvinternal/plugin/util.go | 20 ++++++++- pkg/device/devices.go | 42 +++++++++++++++---- pkg/util/util.go | 4 +- 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go index 6a6adfdb3..a161f0e65 100644 --- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go +++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util.go @@ -61,9 +61,27 @@ func GetNextDeviceRequest(dtype string, p corev1.Pod) (corev1.Container, device. if !ok { return corev1.Container{}, res, errors.New("device request not found") } + + // The annotation format follows the order: init containers first, then regular containers + // Index mapping: + // 0 to len(InitContainers)-1: init containers + // len(InitContainers) to len(InitContainers)+len(Containers)-1: regular containers + initContainerCount := len(p.Spec.InitContainers) + for ctridx, ctrDevice := range pd { if len(ctrDevice) > 0 { - return p.Spec.Containers[ctridx], ctrDevice, nil + if ctridx < initContainerCount { + // This is an init container + klog.Infof("Found device request in init container at index %d, name: %s", ctridx, p.Spec.InitContainers[ctridx].Name) + return p.Spec.InitContainers[ctridx], ctrDevice, nil + } else { + // This is a regular container + regularContainerIdx := ctridx - initContainerCount + if regularContainerIdx < len(p.Spec.Containers) { + klog.Infof("Found device request in container at index %d (original idx: %d), name: %s", regularContainerIdx, ctridx, p.Spec.Containers[regularContainerIdx].Name) + return p.Spec.Containers[regularContainerIdx], ctrDevice, nil + } + } } } return corev1.Container{}, res, errors.New("device request not found") diff --git a/pkg/device/devices.go b/pkg/device/devices.go index 8c7de8120..ab8c40116 100644 --- a/pkg/device/devices.go +++ b/pkg/device/devices.go @@ -386,9 +386,11 @@ func DecodePodDevices(checklist map[string]string, annos map[string]string) (Pod if err != nil { return PodDevices{}, nil } - if len(cd) == 0 { - continue - } + // IMPORTANT: Do NOT skip empty ContainerDevices! + // We must preserve the index mapping between annotation entries and pod containers. + // The annotation format is: "dev1:;dev2:;;dev3:;" where ; separates containers + // If we skip empty entries, the index mapping will be broken for multi-container pods + // (especially pods with init containers where some containers don't use devices) pd[devID] = append(pd[devID], cd) } } @@ -484,24 +486,48 @@ func ExtractMigTemplatesFromUUID(uuid string) (int, int, error) { } func Resourcereqs(pod *corev1.Pod) (counts PodDeviceRequests) { - counts = make(PodDeviceRequests, len(pod.Spec.Containers)) + // Total containers = init containers + regular containers + totalContainers := len(pod.Spec.InitContainers) + len(pod.Spec.Containers) + counts = make(PodDeviceRequests, totalContainers) klog.V(4).InfoS("Processing resource requirements", "pod", klog.KObj(pod), - "containerCount", len(pod.Spec.Containers)) + "initContainerCount", len(pod.Spec.InitContainers), + "containerCount", len(pod.Spec.Containers), + "totalContainers", totalContainers) //Count Nvidia GPU cnt := int32(0) - for i := range pod.Spec.Containers { + + // Process init containers first (indices 0 to len(InitContainers)-1) + for i := range pod.Spec.InitContainers { devices := GetDevices() counts[i] = make(ContainerDeviceRequests) - klog.V(5).InfoS("Processing container resources", + klog.V(5).InfoS("Processing init container resources", "pod", klog.KObj(pod), "containerIndex", i, + "containerName", pod.Spec.InitContainers[i].Name) + for idx, val := range devices { + request := val.GenerateResourceRequests(&pod.Spec.InitContainers[i]) + if request.Nums > 0 { + cnt += request.Nums + counts[i][idx] = request + } + } + } + + // Process regular containers (indices len(InitContainers) to totalContainers-1) + initContainerOffset := len(pod.Spec.InitContainers) + for i := range pod.Spec.Containers { + devices := GetDevices() + counts[initContainerOffset+i] = make(ContainerDeviceRequests) + klog.V(5).InfoS("Processing container resources", + "pod", klog.KObj(pod), + "containerIndex", initContainerOffset+i, "containerName", pod.Spec.Containers[i].Name) for idx, val := range devices { request := val.GenerateResourceRequests(&pod.Spec.Containers[i]) if request.Nums > 0 { cnt += request.Nums - counts[i][idx] = request + counts[initContainerOffset+i][idx] = request } } } diff --git a/pkg/util/util.go b/pkg/util/util.go index fc9dc272d..41ba9e6d7 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -95,7 +95,9 @@ func GetPendingPod(ctx context.Context, node string) (*corev1.Pod, error) { if phase, ok := p.Annotations[DeviceBindPhase]; !ok { continue } else { - if strings.Compare(phase, DeviceBindAllocating) != 0 { + // Allow both "allocating" and "success" phases for multi-container pods + // where some containers have already been allocated but others are still pending + if strings.Compare(phase, DeviceBindAllocating) != 0 && strings.Compare(phase, DeviceBindSuccess) != 0 { continue } } From fe46dd87c44600f727079797a2d568992335f0e5 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 11 Feb 2026 13:54:47 +0800 Subject: [PATCH 2/6] feat(device): fix whitespace formatting in Resourcereqs function Remove trailing whitespace from blank lines in the Resourcereqs function to maintain consistent code style. Signed-off-by: Tim --- pkg/device/devices.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/device/devices.go b/pkg/device/devices.go index ab8c40116..aba5a5b5b 100644 --- a/pkg/device/devices.go +++ b/pkg/device/devices.go @@ -496,7 +496,7 @@ func Resourcereqs(pod *corev1.Pod) (counts PodDeviceRequests) { "totalContainers", totalContainers) //Count Nvidia GPU cnt := int32(0) - + // Process init containers first (indices 0 to len(InitContainers)-1) for i := range pod.Spec.InitContainers { devices := GetDevices() @@ -513,7 +513,7 @@ func Resourcereqs(pod *corev1.Pod) (counts PodDeviceRequests) { } } } - + // Process regular containers (indices len(InitContainers) to totalContainers-1) initContainerOffset := len(pod.Spec.InitContainers) for i := range pod.Spec.Containers { From 2e7760e9fbb809033537d74462d6d6b41f4bed15 Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 24 Feb 2026 16:54:48 +0800 Subject: [PATCH 3/6] test(device): fix PodDevices encode/decode roundtrip test expectations Update test cases to reflect actual decode behavior where trailing ";" in annotation format produces an extra empty ContainerDevices element. - Add explicit 'want' field to separate input args from expected output - Document the encode/decode roundtrip behavior in test comments - Fix assertion to compare against expected 'want' values instead of args - Add missing empty ContainerDevices to Test_DecodePodDevices expected output Signed-off-by: Tim --- pkg/device/devices_test.go | 44 +++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/pkg/device/devices_test.go b/pkg/device/devices_test.go index d728c6366..9d53f94ef 100644 --- a/pkg/device/devices_test.go +++ b/pkg/device/devices_test.go @@ -128,12 +128,21 @@ func TestPodDevicesCoding(t *testing.T) { tests := []struct { name string args PodDevices + // want is the expected result after encode->decode roundtrip + // Due to the annotation format ending with ";", decode will produce an extra empty ContainerDevices + want PodDevices }{ { name: "one pod one container use zero device", args: PodDevices{ "NVIDIA": PodSingleDevice{}, }, + // Empty PodSingleDevice encodes to "", which decodes back to empty (no trailing ";") + want: PodDevices{ + "NVIDIA": PodSingleDevice{ + ContainerDevices{}, + }, + }, }, { name: "one pod one container use one device", @@ -144,6 +153,15 @@ func TestPodDevicesCoding(t *testing.T) { }, }, }, + // Encodes to "UUID1,Type1,1000,30:;", trailing ";" produces extra empty ContainerDevices + want: PodDevices{ + "NVIDIA": PodSingleDevice{ + ContainerDevices{ + ContainerDevice{0, "UUID1", "Type1", 1000, 30, nil}, + }, + ContainerDevices{}, + }, + }, }, { name: "one pod two container, every container use one device", @@ -157,6 +175,18 @@ func TestPodDevicesCoding(t *testing.T) { }, }, }, + // Encodes to "UUID1,Type1,1000,30:;UUID1,Type1,1000,30:;", trailing ";" produces extra empty ContainerDevices + want: PodDevices{ + "NVIDIA": PodSingleDevice{ + ContainerDevices{ + ContainerDevice{0, "UUID1", "Type1", 1000, 30, nil}, + }, + ContainerDevices{ + ContainerDevice{0, "UUID1", "Type1", 1000, 30, nil}, + }, + ContainerDevices{}, + }, + }, }, { name: "one pod one container use two devices", @@ -168,6 +198,16 @@ func TestPodDevicesCoding(t *testing.T) { }, }, }, + // Encodes to "UUID1,Type1,1000,30:UUID2,Type1,1000,30:;", trailing ";" produces extra empty ContainerDevices + want: PodDevices{ + "NVIDIA": PodSingleDevice{ + ContainerDevices{ + ContainerDevice{0, "UUID1", "Type1", 1000, 30, nil}, + ContainerDevice{0, "UUID2", "Type1", 1000, 30, nil}, + }, + ContainerDevices{}, + }, + }, }, } for _, test := range tests { @@ -175,7 +215,7 @@ func TestPodDevicesCoding(t *testing.T) { s := EncodePodDevices(inRequestDevices, test.args) fmt.Println(s) got, _ := DecodePodDevices(inRequestDevices, s) - assert.DeepEqual(t, test.args, got) + assert.DeepEqual(t, test.want, got) }) } } @@ -217,6 +257,7 @@ func Test_DecodePodDevices(t *testing.T) { SupportDevices["NVIDIA"]: "GPU-8dcd427f-483b-b48f-d7e5-75fb19a52b76,NVIDIA,500,3:;GPU-ebe7c3f7-303d-558d-435e-99a160631fe4,NVIDIA,500,3:;", }, }, + // Trailing ";" produces an extra empty ContainerDevices want: PodDevices{ "NVIDIA": { { @@ -235,6 +276,7 @@ func Test_DecodePodDevices(t *testing.T) { Usedcores: 3, }, }, + {}, }, }, wantErr: nil, From 5f4d1d176944e593f6b36169e739b998db907e3f Mon Sep 17 00:00:00 2001 From: TimWang <7367474+haitwang-cloud@users.noreply.github.com> Date: Wed, 25 Feb 2026 17:04:41 +0800 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Tim --- pkg/util/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/util.go b/pkg/util/util.go index 41ba9e6d7..fae7f79d0 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -97,7 +97,7 @@ func GetPendingPod(ctx context.Context, node string) (*corev1.Pod, error) { } else { // Allow both "allocating" and "success" phases for multi-container pods // where some containers have already been allocated but others are still pending - if strings.Compare(phase, DeviceBindAllocating) != 0 && strings.Compare(phase, DeviceBindSuccess) != 0 { + if phase != DeviceBindAllocating && phase != DeviceBindSuccess { continue } } From d5867217e205c375966884f07520ddbe84adfde5 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 25 Mar 2026 15:46:23 +0800 Subject: [PATCH 5/6] test(device): add unit tests for device allocation in regular and init containers Signed-off-by: Tim --- .../nvinternal/plugin/util_test.go | 204 +++++++++++++ pkg/device/devices_test.go | 276 ++++++++++++++++++ 2 files changed, 480 insertions(+) diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go index 2f0b988f6..7a9a8e402 100644 --- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go +++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go @@ -161,6 +161,210 @@ func TestGenerateMigTemplate(t *testing.T) { } } +func TestGetNextDeviceRequest_DeviceInRegularContainer(t *testing.T) { + // Save and restore InRequestDevices + oldInRequestDevices := device.InRequestDevices + defer func() { device.InRequestDevices = oldInRequestDevices }() + + device.InRequestDevices = map[string]string{ + "NVIDIA": "hami.io/vgpu-devices-to-allocate", + } + + // Pod with no init containers, one regular container with a device + // Annotation format: "UUID,Type,mem,cores:;" + // After split by ";", we get ["UUID,Type,mem,cores:", ""] + // Index 0 maps to regular container 0 (since no init containers) + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{ + "hami.io/vgpu-devices-to-allocate": "GPU-abc123,NVIDIA,1000,30:;", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main-container"}, + }, + }, + } + + ctr, ctrDevices, err := GetNextDeviceRequest("NVIDIA", pod) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ctr.Name != "main-container" { + t.Errorf("expected container name 'main-container', got '%s'", ctr.Name) + } + if len(ctrDevices) != 1 { + t.Fatalf("expected 1 device, got %d", len(ctrDevices)) + } + if ctrDevices[0].UUID != "GPU-abc123" { + t.Errorf("expected UUID 'GPU-abc123', got '%s'", ctrDevices[0].UUID) + } +} + +func TestGetNextDeviceRequest_DeviceInInitContainer(t *testing.T) { + oldInRequestDevices := device.InRequestDevices + defer func() { device.InRequestDevices = oldInRequestDevices }() + + device.InRequestDevices = map[string]string{ + "NVIDIA": "hami.io/vgpu-devices-to-allocate", + } + + // Pod with 1 init container (has device) and 1 regular container (no device) + // Annotation: "GPU-init1,NVIDIA,500,10:;;" + // After split by ";": ["GPU-init1,NVIDIA,500,10:", "", ""] + // Index 0 -> init container 0 (has device), Index 1 -> regular container 0 (empty) + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-init", + Namespace: "default", + Annotations: map[string]string{ + "hami.io/vgpu-devices-to-allocate": "GPU-init1,NVIDIA,500,10:;;", + }, + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + {Name: "init-with-gpu"}, + }, + Containers: []corev1.Container{ + {Name: "main-no-gpu"}, + }, + }, + } + + ctr, ctrDevices, err := GetNextDeviceRequest("NVIDIA", pod) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ctr.Name != "init-with-gpu" { + t.Errorf("expected container name 'init-with-gpu', got '%s'", ctr.Name) + } + if len(ctrDevices) != 1 { + t.Fatalf("expected 1 device, got %d", len(ctrDevices)) + } + if ctrDevices[0].UUID != "GPU-init1" { + t.Errorf("expected UUID 'GPU-init1', got '%s'", ctrDevices[0].UUID) + } +} + +func TestGetNextDeviceRequest_DeviceInRegularContainerWithInitOffset(t *testing.T) { + oldInRequestDevices := device.InRequestDevices + defer func() { device.InRequestDevices = oldInRequestDevices }() + + device.InRequestDevices = map[string]string{ + "NVIDIA": "hami.io/vgpu-devices-to-allocate", + } + + // Pod with 2 init containers (no device) and 1 regular container (has device) + // Annotation: ";;GPU-main1,NVIDIA,2000,50:;" + // After split by ";": ["", "", "GPU-main1,NVIDIA,2000,50:", ""] + // Index 0 -> init container 0 (empty) + // Index 1 -> init container 1 (empty) + // Index 2 -> regular container 0 (has device, regularIdx = 2 - 2 = 0) + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-offset", + Namespace: "default", + Annotations: map[string]string{ + "hami.io/vgpu-devices-to-allocate": ";;GPU-main1,NVIDIA,2000,50:;", + }, + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + {Name: "init1-no-gpu"}, + {Name: "init2-no-gpu"}, + }, + Containers: []corev1.Container{ + {Name: "main-with-gpu"}, + }, + }, + } + + ctr, ctrDevices, err := GetNextDeviceRequest("NVIDIA", pod) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ctr.Name != "main-with-gpu" { + t.Errorf("expected container name 'main-with-gpu', got '%s'", ctr.Name) + } + if len(ctrDevices) != 1 { + t.Fatalf("expected 1 device, got %d", len(ctrDevices)) + } + if ctrDevices[0].UUID != "GPU-main1" { + t.Errorf("expected UUID 'GPU-main1', got '%s'", ctrDevices[0].UUID) + } +} + +func TestGetNextDeviceRequest_NoDeviceFound(t *testing.T) { + oldInRequestDevices := device.InRequestDevices + defer func() { device.InRequestDevices = oldInRequestDevices }() + + device.InRequestDevices = map[string]string{ + "NVIDIA": "hami.io/vgpu-devices-to-allocate", + } + + // Pod with annotation but all containers have empty devices + // Annotation: ";;" + // After split by ";": ["", "", ""] + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-empty", + Namespace: "default", + Annotations: map[string]string{ + "hami.io/vgpu-devices-to-allocate": ";;", + }, + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + {Name: "init1"}, + }, + Containers: []corev1.Container{ + {Name: "main1"}, + }, + }, + } + + _, _, err := GetNextDeviceRequest("NVIDIA", pod) + if err == nil { + t.Fatal("expected error 'device request not found', got nil") + } + if err.Error() != "device request not found" { + t.Errorf("expected error 'device request not found', got '%s'", err.Error()) + } +} + +func TestGetNextDeviceRequest_DeviceTypeNotFound(t *testing.T) { + oldInRequestDevices := device.InRequestDevices + defer func() { device.InRequestDevices = oldInRequestDevices }() + + device.InRequestDevices = map[string]string{ + "NVIDIA": "hami.io/vgpu-devices-to-allocate", + } + + // Pod with annotation for NVIDIA, but we ask for a non-existent device type + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-notype", + Namespace: "default", + Annotations: map[string]string{ + "hami.io/vgpu-devices-to-allocate": "GPU-abc,NVIDIA,1000,30:;", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "main"}, + }, + }, + } + + _, _, err := GetNextDeviceRequest("AMD", pod) + if err == nil { + t.Fatal("expected error 'device request not found', got nil") + } +} + func Test_PodAllocationTrySuccess(t *testing.T) { // Initialize fake clientset and pre-load test data client.KubeClient = fake.NewSimpleClientset() diff --git a/pkg/device/devices_test.go b/pkg/device/devices_test.go index 9d53f94ef..407c44c03 100644 --- a/pkg/device/devices_test.go +++ b/pkg/device/devices_test.go @@ -24,6 +24,7 @@ import ( "gotest.tools/v3/assert" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/Project-HAMi/HAMi/pkg/util" @@ -844,6 +845,281 @@ func TestEncodeContainerDeviceType(t *testing.T) { } } +// mockDevices is a minimal implementation of the Devices interface for testing Resourcereqs. +type mockDevices struct { + resourceRequest ContainerDeviceRequest +} + +func (m *mockDevices) CommonWord() string { return "mock" } +func (m *mockDevices) MutateAdmission(_ *corev1.Container, _ *corev1.Pod) (bool, error) { + return false, nil +} +func (m *mockDevices) CheckHealth(_ string, _ *corev1.Node) (bool, bool) { return true, true } +func (m *mockDevices) NodeCleanUp(_ string) error { return nil } +func (m *mockDevices) GetResourceNames() ResourceNames { return ResourceNames{} } +func (m *mockDevices) GetNodeDevices(_ corev1.Node) ([]*DeviceInfo, error) { + return nil, nil +} +func (m *mockDevices) LockNode(_ *corev1.Node, _ *corev1.Pod) error { return nil } +func (m *mockDevices) ReleaseNodeLock(_ *corev1.Node, _ *corev1.Pod) error { return nil } +func (m *mockDevices) GenerateResourceRequests(ctr *corev1.Container) ContainerDeviceRequest { + // Return the mock request only if the container has the resource annotation we look for + for rName := range ctr.Resources.Limits { + if string(rName) == "nvidia.com/gpu" { + return m.resourceRequest + } + } + return ContainerDeviceRequest{} +} +func (m *mockDevices) PatchAnnotations(_ *corev1.Pod, _ *map[string]string, _ PodDevices) map[string]string { + return nil +} +func (m *mockDevices) ScoreNode(_ *corev1.Node, _ PodSingleDevice, _ []*DeviceUsage, _ string) float32 { + return 0 +} +func (m *mockDevices) AddResourceUsage(_ *corev1.Pod, _ *DeviceUsage, _ *ContainerDevice) error { + return nil +} +func (m *mockDevices) Fit(_ []*DeviceUsage, _ ContainerDeviceRequest, _ *corev1.Pod, _ *NodeInfo, _ *PodDevices) (bool, map[string]ContainerDevices, string) { + return false, nil, "" +} + +func TestResourcereqs_OnlyRegularContainers(t *testing.T) { + // Setup mock device + oldDevicesMap := DevicesMap + defer func() { DevicesMap = oldDevicesMap }() + + DevicesMap = map[string]Devices{ + "NVIDIA": &mockDevices{ + resourceRequest: ContainerDeviceRequest{ + Nums: 1, + Type: "NVIDIA", + Memreq: 1000, + Coresreq: 10, + }, + }, + } + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "main", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + }, + }, + }, + } + + counts := Resourcereqs(pod) + + // No init containers, so length == number of regular containers + assert.Equal(t, len(counts), 1) + assert.Equal(t, counts[0]["NVIDIA"].Nums, int32(1)) +} + +func TestResourcereqs_WithInitContainers(t *testing.T) { + oldDevicesMap := DevicesMap + defer func() { DevicesMap = oldDevicesMap }() + + DevicesMap = map[string]Devices{ + "NVIDIA": &mockDevices{ + resourceRequest: ContainerDeviceRequest{ + Nums: 1, + Type: "NVIDIA", + Memreq: 1000, + Coresreq: 10, + }, + }, + } + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: "init-no-gpu", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + }, + }, + { + Name: "init-with-gpu", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main-with-gpu", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + }, + }, + }, + } + + counts := Resourcereqs(pod) + + // Total containers = 2 init + 1 regular = 3 + assert.Equal(t, len(counts), 3) + + // Index 0: init-no-gpu - should have no device requests + _, hasNvidia0 := counts[0]["NVIDIA"] + assert.Equal(t, hasNvidia0, false) + + // Index 1: init-with-gpu - should have device request + assert.Equal(t, counts[1]["NVIDIA"].Nums, int32(1)) + + // Index 2: main-with-gpu - should have device request (initContainerOffset=2, so index 2) + assert.Equal(t, counts[2]["NVIDIA"].Nums, int32(1)) +} + +func TestResourcereqs_NoDeviceRequests(t *testing.T) { + oldDevicesMap := DevicesMap + defer func() { DevicesMap = oldDevicesMap }() + + DevicesMap = map[string]Devices{ + "NVIDIA": &mockDevices{ + resourceRequest: ContainerDeviceRequest{ + Nums: 1, + Type: "NVIDIA", + Memreq: 1000, + Coresreq: 10, + }, + }, + } + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: "init-no-gpu", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main-no-gpu", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + }, + }, + }, + }, + } + + counts := Resourcereqs(pod) + + // Total = 1 init + 1 regular = 2 + assert.Equal(t, len(counts), 2) + // No GPU requests in either container + _, hasNvidia0 := counts[0]["NVIDIA"] + assert.Equal(t, hasNvidia0, false) + _, hasNvidia1 := counts[1]["NVIDIA"] + assert.Equal(t, hasNvidia1, false) +} + +func TestResourcereqs_MultipleInitAndRegularContainers(t *testing.T) { + oldDevicesMap := DevicesMap + defer func() { DevicesMap = oldDevicesMap }() + + DevicesMap = map[string]Devices{ + "NVIDIA": &mockDevices{ + resourceRequest: ContainerDeviceRequest{ + Nums: 2, + Type: "NVIDIA", + Memreq: 2000, + Coresreq: 20, + }, + }, + } + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: "init1", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("2"), + }, + }, + }, + { + Name: "init2", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main1", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("2"), + }, + }, + }, + { + Name: "main2", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("2"), + }, + }, + }, + }, + }, + } + + counts := Resourcereqs(pod) + + // Total = 2 init + 2 regular = 4 + assert.Equal(t, len(counts), 4) + + // Index 0: init1 with GPU + assert.Equal(t, counts[0]["NVIDIA"].Nums, int32(2)) + + // Index 1: init2 without GPU + _, hasNvidia1 := counts[1]["NVIDIA"] + assert.Equal(t, hasNvidia1, false) + + // Index 2: main1 with GPU (offset=2) + assert.Equal(t, counts[2]["NVIDIA"].Nums, int32(2)) + + // Index 3: main2 with GPU (offset=2) + assert.Equal(t, counts[3]["NVIDIA"].Nums, int32(2)) +} + func TestCheckUUID(t *testing.T) { GPUUseUUID := "hami.io/gpu-use-uuid" GPUNoUseUUID := "hami.io/gpu-no-use-uuid" From 39534a7125731784370152eae4d28075239a90c0 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 25 Mar 2026 15:50:34 +0800 Subject: [PATCH 6/6] fix(tests): improve formatting in mockDevices methods for consistency Signed-off-by: Tim --- .../nvinternal/plugin/util_test.go | 2 +- pkg/device/devices_test.go | 28 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go index 7a9a8e402..77479851f 100644 --- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go +++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/util_test.go @@ -99,7 +99,7 @@ func TestGenerateMigTemplate(t *testing.T) { expectedPos: 1, expectedReset: true, expectedMig: map[string]int32{ - "1g.5gb": 1, + "1g.5gb": 1, "2g.10gb": 3, }, }, diff --git a/pkg/device/devices_test.go b/pkg/device/devices_test.go index 407c44c03..88f5d28cd 100644 --- a/pkg/device/devices_test.go +++ b/pkg/device/devices_test.go @@ -850,7 +850,7 @@ type mockDevices struct { resourceRequest ContainerDeviceRequest } -func (m *mockDevices) CommonWord() string { return "mock" } +func (m *mockDevices) CommonWord() string { return "mock" } func (m *mockDevices) MutateAdmission(_ *corev1.Container, _ *corev1.Pod) (bool, error) { return false, nil } @@ -860,7 +860,7 @@ func (m *mockDevices) GetResourceNames() ResourceNames { retur func (m *mockDevices) GetNodeDevices(_ corev1.Node) ([]*DeviceInfo, error) { return nil, nil } -func (m *mockDevices) LockNode(_ *corev1.Node, _ *corev1.Pod) error { return nil } +func (m *mockDevices) LockNode(_ *corev1.Node, _ *corev1.Pod) error { return nil } func (m *mockDevices) ReleaseNodeLock(_ *corev1.Node, _ *corev1.Pod) error { return nil } func (m *mockDevices) GenerateResourceRequests(ctr *corev1.Container) ContainerDeviceRequest { // Return the mock request only if the container has the resource annotation we look for @@ -892,9 +892,9 @@ func TestResourcereqs_OnlyRegularContainers(t *testing.T) { DevicesMap = map[string]Devices{ "NVIDIA": &mockDevices{ resourceRequest: ContainerDeviceRequest{ - Nums: 1, - Type: "NVIDIA", - Memreq: 1000, + Nums: 1, + Type: "NVIDIA", + Memreq: 1000, Coresreq: 10, }, }, @@ -933,9 +933,9 @@ func TestResourcereqs_WithInitContainers(t *testing.T) { DevicesMap = map[string]Devices{ "NVIDIA": &mockDevices{ resourceRequest: ContainerDeviceRequest{ - Nums: 1, - Type: "NVIDIA", - Memreq: 1000, + Nums: 1, + Type: "NVIDIA", + Memreq: 1000, Coresreq: 10, }, }, @@ -999,9 +999,9 @@ func TestResourcereqs_NoDeviceRequests(t *testing.T) { DevicesMap = map[string]Devices{ "NVIDIA": &mockDevices{ resourceRequest: ContainerDeviceRequest{ - Nums: 1, - Type: "NVIDIA", - Memreq: 1000, + Nums: 1, + Type: "NVIDIA", + Memreq: 1000, Coresreq: 10, }, }, @@ -1050,9 +1050,9 @@ func TestResourcereqs_MultipleInitAndRegularContainers(t *testing.T) { DevicesMap = map[string]Devices{ "NVIDIA": &mockDevices{ resourceRequest: ContainerDeviceRequest{ - Nums: 2, - Type: "NVIDIA", - Memreq: 2000, + Nums: 2, + Type: "NVIDIA", + Memreq: 2000, Coresreq: 20, }, },