Skip to content

Commit 4463f9b

Browse files
Merge pull request #1394 from openshift-cherrypick-robot/cherry-pick-1390-to-release-4.20
OCPBUGS-62023: release-4.20: e2e: set of fixes/changes to ovs pinning testing
2 parents 29c7e13 + db4422a commit 4463f9b

File tree

2 files changed

+73
-53
lines changed
  • test/e2e/performanceprofile/functests

2 files changed

+73
-53
lines changed

test/e2e/performanceprofile/functests/7_performance_kubelet_node/cgroups.go

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,14 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
278278
// we need to wait as process affinity can change
279279
time.Sleep(30 * time.Second)
280280
ctnCpuset := taskSet(ctx, containerPid, workerRTNode)
281-
testlog.Infof("Cpus used by ovn Containers are %s", ctnCpuset.String())
281+
testlog.Infof("Cpus used by ovn Containers are %s", ctnCpuset)
282282
pidList, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
283283
Expect(err).ToNot(HaveOccurred())
284-
cpumaskList, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
284+
pidToCPUs, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
285285
Expect(err).ToNot(HaveOccurred())
286-
for _, cpumask := range cpumaskList {
287-
Expect(ctnCpuset).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices(%s)", ctnCpuset.String(), cpumask.String())
286+
for pid, cpumask := range pidToCPUs {
287+
testlog.Infof("OVS service pid %s is using cpus %s", pid, cpumask)
288+
Expect(ctnCpuset.Equals(cpumask)).To(BeTrue(), "affinity of ovn kube node pods(%s) do not match with ovservices pid %s (%s)", ctnCpuset, pid, cpumask)
288289
}
289290

290291
})
@@ -327,15 +328,16 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
327328
// we need to wait as process affinity can change
328329
time.Sleep(30 * time.Second)
329330
ctnCpuset := taskSet(ctx, containerPid, workerRTNode)
330-
testlog.Infof("Container of ovn pod %s is using cpus %s", ovnPod.Name, ctnCpuset.String())
331+
testlog.Infof("Container of ovn pod %s is using cpus %s", ovnPod.Name, ctnCpuset)
331332

332333
pidList, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
333334
Expect(err).ToNot(HaveOccurred())
334335

335-
cpumaskList, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
336+
pidToCPUs, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
336337
Expect(err).ToNot(HaveOccurred())
337-
for _, cpumask := range cpumaskList {
338-
Expect(ctnCpuset).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices(%s)", ctnCpuset.String(), cpumask.String())
338+
for pid, cpumask := range pidToCPUs {
339+
testlog.Infof("OVS service pid %s is using cpus %s", pid, cpumask)
340+
Expect(ctnCpuset.Equals(cpumask)).To(BeTrue(), "affinity of ovn kube node pods(%s) do not match with ovservices pid %s (%s)", ctnCpuset, pid, cpumask)
339341
}
340342
deleteTestPod(ctx, testpod)
341343

@@ -399,17 +401,18 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
399401
// we need to wait as process affinity can change
400402
time.Sleep(30 * time.Second)
401403
ovnContainerCpuset1 := taskSet(ctx, containerPid, workerRTNode)
402-
testlog.Infof("Container of ovn pod %s is using cpus %s", ovnPod.Name, ovnContainerCpuset1.String())
404+
testlog.Infof("Container of ovn pod %s is using cpus %s", ovnPod.Name, ovnContainerCpuset1)
403405
pidList, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
404406
Expect(err).ToNot(HaveOccurred())
405407

406408
// We wait for 30 seconds for ovs process cpu affinity to be updated
407409
time.Sleep(30 * time.Second)
408410
// Verify ovs-vswitchd and ovsdb-server process affinity is updated
409-
cpumaskList1, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
411+
pidToCPUs, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
410412
Expect(err).ToNot(HaveOccurred())
411-
for _, cpumask := range cpumaskList1 {
412-
Expect(ovnContainerCpuset1).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices(%s)", ovnContainerCpuset1.String(), cpumask.String())
413+
for pid, cpumask := range pidToCPUs {
414+
testlog.Infof("OVS service pid %s is using cpus %s", pid, cpumask)
415+
Expect(ovnContainerCpuset1.Equals(cpumask)).To(BeTrue(), "affinity of ovn kube node pods(%s) do not match with ovservices pid %s (%s)", ovnContainerCpuset1, pid, cpumask)
413416
}
414417
// Delete testpod1
415418
testlog.Infof("Deleting pod %v", testpod1.Name)
@@ -418,15 +421,16 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
418421
time.Sleep(30 * time.Second)
419422
// Check the cpus of ovnkubenode pods
420423
ovnContainerCpuset2 := taskSet(ctx, containerPid, workerRTNode)
421-
testlog.Infof("cpus used by ovn kube node pods after deleting pod %v is %v", testpod1.Name, ovnContainerCpuset2.String())
424+
testlog.Infof("cpus used by ovn kube node pods after deleting pod %v is %v", testpod1.Name, ovnContainerCpuset2)
422425
// we wait some time for ovs process affinity to change
423426
time.Sleep(30 * time.Second)
424427

425428
// Verify ovs-vswitchd and ovsdb-server process affinity is updated
426-
cpumaskList2, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
429+
pidToCPUs, err = getCPUMaskForPids(ctx, pidList, workerRTNode)
427430
Expect(err).ToNot(HaveOccurred())
428-
for _, cpumask := range cpumaskList2 {
429-
Expect(ovnContainerCpuset2).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices(%s)", ovnContainerCpuset2.String(), cpumask.String())
431+
for pid, cpumask := range pidToCPUs {
432+
testlog.Infof("OVS service pid %s is using cpus %s", pid, cpumask)
433+
Expect(ovnContainerCpuset2.Equals(cpumask)).To(BeTrue(), "affinity of ovn kube node pods(%s) do not match with ovservices pid %s (%s)", ovnContainerCpuset2, pid, cpumask)
430434
}
431435
// Delete testpod2
432436
deleteTestPod(context.TODO(), testpod2)
@@ -452,15 +456,15 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
452456
pidList, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
453457
Expect(err).ToNot(HaveOccurred())
454458
Eventually(func() bool {
455-
cpumaskList, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
459+
pidToCPUs, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
456460
Expect(err).ToNot(HaveOccurred(), "Unable to fetch affinity of ovs services")
457-
for _, cpumask := range cpumaskList {
458-
testlog.Warningf("ovs services cpu mask is %s instead of %s", cpumask.String(), onlineCPUSet.String())
461+
for pid, cpumask := range pidToCPUs {
459462
// since cpuset.CPUSet contains map in its struct field we can't compare
460463
// the structs directly. After the deployment is deleted, the cpu mask
461464
// of ovs services should contain all cpus , which is generally 0-N (where
462465
// N is total number of cpus, this should be easy to compare.
463466
if !cpumask.Equals(onlineCPUSet) {
467+
testlog.Warningf("ovs servics pid %s cpu mask is %s instead of %s", pid, cpumask, onlineCPUSet)
464468
return false
465469
}
466470
}
@@ -477,26 +481,28 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
477481
// we need to wait as process affinity can change
478482
time.Sleep(30 * time.Second)
479483
ovnContainerCpuset := taskSet(ctx, containerPid, workerRTNode)
480-
testlog.Infof("Container of ovn pod %s is using cpus %s", ovnPod.Name, ovnContainerCpuset.String())
484+
testlog.Infof("Container of ovn pod %s is using cpus %s", ovnPod.Name, ovnContainerCpuset)
481485
pidList, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
482486
Expect(err).ToNot(HaveOccurred())
483487

484488
//wait for 30 seconds for ovs process to have its cpu affinity updated
485489
time.Sleep(30 * time.Second)
486490
// Verify ovs-vswitchd and ovsdb-server process affinity is updated
487-
cpumaskList1, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
491+
pidToCPUs1, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
488492
Expect(err).ToNot(HaveOccurred())
489-
for _, cpumask := range cpumaskList1 {
490-
Expect(ovnContainerCpuset).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices(%s)", ovnContainerCpuset.String(), cpumask.String())
493+
for pid, cpumask := range pidToCPUs1 {
494+
testlog.Infof("OVS service pid %s is using cpus %s", pid, cpumask)
495+
Expect(ovnContainerCpuset).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices pid %s (%s)", ovnContainerCpuset, pid, cpumask)
491496
}
492497

493498
testlog.Info("Rebooting the node")
494499
rebootCmd := "chroot /rootfs systemctl reboot"
495500
testlog.TaggedInfof("Reboot", "Node %q: Rebooting", workerRTNode.Name)
496501
_, _ = nodes.ExecCommand(ctx, workerRTNode, []string{"sh", "-c", rebootCmd})
497502
testlog.Info("Node Rebooted")
498-
499-
By("Waiting for node to be ready after reboot")
503+
By("Waiting for node to go into not ready state after reboot")
504+
nodes.WaitForNotReadyOrFail("Reboot", workerRTNode.Name, 10*time.Minute, 30*time.Second)
505+
By("Waiting for node to be ready again after reboot")
500506
nodes.WaitForReadyOrFail("Reboot", workerRTNode.Name, 10*time.Minute, 30*time.Second)
501507

502508
// After reboot verify test pod created using deployment is running
@@ -535,16 +541,17 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
535541
// we need to wait as process affinity can change
536542
time.Sleep(30 * time.Second)
537543
ovnContainerCpusetAfterReboot := taskSet(ctx, containerPid, workerRTNode)
538-
testlog.Infof("cpus used by ovn kube node pods %v", ovnContainerCpusetAfterReboot.String())
544+
testlog.Infof("cpus used by ovn kube node pods %v", ovnContainerCpusetAfterReboot)
539545
pidListAfterReboot, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
540546

541547
Expect(err).ToNot(HaveOccurred())
542548

543549
// Verify ovs-vswitchd and ovsdb-server process affinity is updated
544-
cpumaskList2, err := getCPUMaskForPids(ctx, pidListAfterReboot, workerRTNode)
550+
pidToCPUs2, err := getCPUMaskForPids(ctx, pidListAfterReboot, workerRTNode)
545551
Expect(err).ToNot(HaveOccurred())
546-
for _, cpumask := range cpumaskList2 {
547-
Expect(ovnContainerCpusetAfterReboot).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices(%s)", ovnContainerCpusetAfterReboot.String(), cpumask.String())
552+
for pid, cpumask := range pidToCPUs2 {
553+
testlog.Infof("OVS service pid %s is using cpus %s", pid, cpumask)
554+
Expect(ovnContainerCpusetAfterReboot).To(Equal(cpumask), "affinity of ovn kube node pods(%s) do not match with ovservices pid %s (%s)", ovnContainerCpusetAfterReboot, pid, cpumask)
548555
}
549556
})
550557

@@ -562,7 +569,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
562569
cpumask := strings.Split(line, ":")
563570
threadsCpuset, err := cpuset.Parse(strings.TrimSpace(cpumask[1]))
564571
Expect(err).ToNot(HaveOccurred())
565-
Expect(threadsCpuset).To(Equal(onlineCPUSet))
572+
Expect(threadsCpuset.Equals(onlineCPUSet), "actual cpuset %s not equals to expected cpuset %s", threadsCpuset, onlineCPUSet)
566573
}
567574
}
568575

@@ -584,16 +591,12 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
584591
// wait till the ovs process affinity is reverted back
585592
pidList, err := ovsPids(ctx, ovsSystemdServices, workerRTNode)
586593
Expect(err).ToNot(HaveOccurred())
587-
cpumaskList, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
588-
Expect(err).ToNot(HaveOccurred())
589594
Eventually(func() bool {
590-
for _, cpumask := range cpumaskList {
591-
testlog.Warningf("ovs services cpu mask is %s instead of %s", cpumask.String(), onlineCPUSet.String())
592-
// since cpuset.CPUSet contains map in its struct field we can't compare
593-
// the structs directly. After the deployment is delete, the cpu mask
594-
// of ovs services should contain all cpus , which is generally 0-N (where
595-
// N is total number of cpus, this should be easy to compare.
596-
if cpumask.String() != onlineCPUSet.String() {
595+
pidToCPUs, err := getCPUMaskForPids(ctx, pidList, workerRTNode)
596+
Expect(err).ToNot(HaveOccurred())
597+
for pid, cpumask := range pidToCPUs {
598+
if !cpumask.Equals(onlineCPUSet) {
599+
testlog.Warningf("ovs servics pid %s cpu mask is %s instead of %s", pid, cpumask, onlineCPUSet)
597600
return false
598601
}
599602
}
@@ -644,7 +647,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
644647
cpumask := strings.Split(line, ":")
645648
threadsCpuset, err := cpuset.Parse(strings.TrimSpace(cpumask[1]))
646649
Expect(err).ToNot(HaveOccurred())
647-
testlog.Infof("ovs-switchd thread CpuAffinity: %s, pod %s Affinity: %s", threadsCpuset.String(), pod.Name, podcpus.String())
650+
testlog.Infof("ovs-switchd thread CpuAffinity: %s, pod %s Affinity: %s", threadsCpuset, pod.Name, podcpus)
648651
Expect(podcpus.IsSubsetOf(threadsCpuset)).To(Equal(false))
649652
}
650653
}
@@ -704,7 +707,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
704707
cpumask := strings.Split(line, ":")
705708
threadsCpuset, err := cpuset.Parse(strings.TrimSpace(cpumask[1]))
706709
Expect(err).ToNot(HaveOccurred())
707-
testlog.Infof("ovs-switchd thread CpuAffinity: %s, pod %s Affinity: %s", threadsCpuset.String(), pod.Name, podcpus.String())
710+
testlog.Infof("ovs-switchd thread CpuAffinity: %s, pod %s Affinity: %s", threadsCpuset, pod.Name, podcpus)
708711
Expect(podcpus.IsSubsetOf(threadsCpuset)).To(Equal(false))
709712
}
710713
}
@@ -801,9 +804,8 @@ func ovnPodContainers(ovnKubeNodePod *corev1.Pod) ([]string, error) {
801804
}
802805

803806
// getCPUMaskForPids returns cpu mask of ovs process pids
804-
func getCPUMaskForPids(ctx context.Context, pidList []string, targetNode *corev1.Node) ([]cpuset.CPUSet, error) {
805-
var cpumaskList []cpuset.CPUSet
806-
807+
func getCPUMaskForPids(ctx context.Context, pidList []string, targetNode *corev1.Node) (map[string]cpuset.CPUSet, error) {
808+
pidToCPUSet := make(map[string]cpuset.CPUSet, len(pidList))
807809
for _, pid := range pidList {
808810
cmd := []string{"taskset", "-pc", pid}
809811
out, err := nodes.ExecCommand(ctx, targetNode, cmd)
@@ -817,11 +819,10 @@ func getCPUMaskForPids(ctx context.Context, pidList []string, targetNode *corev1
817819
if err != nil {
818820
return nil, fmt.Errorf("failed to parse cpuset: %s", err)
819821
}
820-
821-
cpumaskList = append(cpumaskList, maskSet)
822+
pidToCPUSet[pid] = maskSet
822823
}
823824

824-
return cpumaskList, nil
825+
return pidToCPUSet, nil
825826
}
826827

827828
func newDeployment() *appsv1.Deployment {

test/e2e/performanceprofile/functests/utils/nodes/nodes.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"strings"
1111
"time"
1212

13+
. "github.com/onsi/ginkgo/v2"
1314
. "github.com/onsi/gomega"
1415

1516
"sigs.k8s.io/yaml"
@@ -482,22 +483,39 @@ func GetNodeInterfaces(ctx context.Context, node corev1.Node) ([]NodeInterface,
482483
return nodeInterfaces, err
483484
}
484485

485-
func WaitForReadyOrFail(tag, nodeName string, timeout, polling time.Duration) {
486-
testlog.Infof("%s: waiting for node %q: to be ready", tag, nodeName)
487-
EventuallyWithOffset(1, func() (bool, error) {
486+
func WaitForState(tag, nodeName string, timeout, polling time.Duration, stateFunc func(*corev1.Node) bool) {
487+
GinkgoHelper()
488+
Eventually(func() (bool, error) {
488489
node, err := GetByName(nodeName)
489490
if err != nil {
490491
// intentionally tolerate error
491-
testlog.Infof("wait for node %q ready: %v", nodeName, err)
492+
testlog.Warningf("failed to get node %q: %v. retrying...", nodeName, err)
492493
return false, nil
493494
}
495+
return stateFunc(node), nil
496+
}).WithTimeout(timeout).WithPolling(polling).Should(BeTrue(), "node %q did not reach desired state within timeout", nodeName)
497+
}
498+
499+
func WaitForReadyOrFail(tag, nodeName string, timeout, polling time.Duration) {
500+
testlog.Infof("%s: waiting for node %q: to be ready", tag, nodeName)
501+
WaitForState(tag, nodeName, timeout, polling, func(node *corev1.Node) bool {
494502
ready := isNodeReady(*node)
495503
testlog.Infof("node %q ready=%v", nodeName, ready)
496-
return ready, nil
497-
}).WithTimeout(timeout).WithPolling(polling).Should(BeTrue(), "post reboot: cannot get readiness status after reboot for node %q", nodeName)
504+
return ready
505+
})
498506
testlog.Infof("%s: node %q: reported ready", tag, nodeName)
499507
}
500508

509+
func WaitForNotReadyOrFail(tag, nodeName string, timeout, polling time.Duration) {
510+
testlog.Infof("%s: waiting for node %q: to be not ready", tag, nodeName)
511+
WaitForState(tag, nodeName, timeout, polling, func(node *corev1.Node) bool {
512+
ready := isNodeReady(*node)
513+
testlog.Infof("node %q ready=%v", nodeName, ready)
514+
return !ready
515+
})
516+
testlog.Infof("%s: node %q: reported not ready", tag, nodeName)
517+
}
518+
501519
func isNodeReady(node corev1.Node) bool {
502520
for _, c := range node.Status.Conditions {
503521
if c.Type == corev1.NodeReady {
@@ -509,6 +527,7 @@ func isNodeReady(node corev1.Node) bool {
509527

510528
// ContainerPid returns container process pid using crictl inspect command
511529
func ContainerPid(ctx context.Context, node *corev1.Node, containerId string) (string, error) {
530+
GinkgoHelper()
512531
var err error
513532
var criInfo CrictlInfo
514533
var cridata = []byte{}

0 commit comments

Comments
 (0)