Skip to content

Commit 3d5d645

Browse files
qat,e2e: add heartbeat and auto-reset validations
Signed-off-by: Hyeongju Johannes Lee <hyeongju.lee@intel.com>
1 parent e2a82e8 commit 3d5d645

File tree

2 files changed

+111
-4
lines changed

2 files changed

+111
-4
lines changed

test/e2e/qat/qatplugin_dpdk.go

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"k8s.io/apimachinery/pkg/labels"
3030
"k8s.io/kubernetes/test/e2e/framework"
3131
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
32+
e2ejob "k8s.io/kubernetes/test/e2e/framework/job"
3233
e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
3334
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
3435
admissionapi "k8s.io/pod-security-admission/api"
@@ -38,6 +39,8 @@ const (
3839
qatPluginKustomizationYaml = "deployments/qat_plugin/overlays/e2e/kustomization.yaml"
3940
cryptoTestYaml = "deployments/qat_dpdk_app/crypto-perf/crypto-perf-dpdk-pod-requesting-qat-cy.yaml"
4041
compressTestYaml = "deployments/qat_dpdk_app/compress-perf/compress-perf-dpdk-pod-requesting-qat-dc.yaml"
42+
cyResource = "qat.intel.com/cy"
43+
dcResource = "qat.intel.com/dc"
4144
)
4245

4346
const (
@@ -111,14 +114,14 @@ func describeQatDpdkPlugin() {
111114
}
112115
})
113116

114-
ginkgo.Context("When QAT resources are available with crypto (cy) services enabled [Resource:cy]", func() {
117+
ginkgo.Context("When QAT resources are continuously available with crypto (cy) services enabled [Resource:cy]", func() {
115118
// This BeforeEach runs even before the JustBeforeEach above.
116119
ginkgo.BeforeEach(func() {
117120
ginkgo.By("creating a configMap before plugin gets deployed")
118121
e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "configmap", "--from-literal", "qat.conf=ServicesEnabled=sym;asym", "qat-config")
119122

120123
ginkgo.By("setting resourceName for cy services")
121-
resourceName = "qat.intel.com/cy"
124+
resourceName = cyResource
122125
})
123126

124127
ginkgo.It("deploys a crypto pod (openssl) requesting QAT resources [App:openssl]", func(ctx context.Context) {
@@ -139,13 +142,13 @@ func describeQatDpdkPlugin() {
139142
})
140143
})
141144

142-
ginkgo.Context("When QAT resources are available with compress (dc) services enabled [Resource:dc]", func() {
145+
ginkgo.Context("When QAT resources are continuously available with compress (dc) services enabled [Resource:dc]", func() {
143146
ginkgo.BeforeEach(func() {
144147
ginkgo.By("creating a configMap before plugin gets deployed")
145148
e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "configmap", "--from-literal", "qat.conf=ServicesEnabled=dc", "qat-config")
146149

147150
ginkgo.By("setting resourceName for dc services")
148-
resourceName = "qat.intel.com/dc"
151+
resourceName = dcResource
149152
})
150153

151154
ginkgo.It("deploys a compress pod (openssl) requesting QAT resources [App:openssl]", func(ctx context.Context) {
@@ -165,6 +168,59 @@ func describeQatDpdkPlugin() {
165168
ginkgo.It("does nothing", func() {})
166169
})
167170
})
171+
172+
ginkgo.Context("When a QAT device goes unresponsive", func() {
173+
ginkgo.When("QAT's auto-reset is off", func() {
174+
ginkgo.BeforeEach(func() {
175+
ginkgo.By("creating a configMap before plugin gets deployed")
176+
e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "configmap", "--from-literal", "qat.conf=$'ServiceEnabled=dc\nAutoresetEnabled=off", "qat-config")
177+
178+
ginkgo.By("setting resourceName for dc services")
179+
resourceName = dcResource
180+
})
181+
182+
ginkgo.It("checks if the heartbeat is read correctly [Functionality:heartbeat]", func(ctx context.Context) {
183+
injectError(ctx, f, resourceName)
184+
185+
ginkgo.By("seeing if there is zero resource")
186+
if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 100*time.Second, utils.WaitForZeroResource); err != nil {
187+
framework.Failf("unable to wait for nodes to have no resource: %v", err)
188+
}
189+
})
190+
})
191+
192+
ginkgo.When("QAT's autoreset is on", func() {
193+
ginkgo.BeforeEach(func() {
194+
ginkgo.By("creating a configMap before plugin gets deployed")
195+
e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "configmap", "--from-literal", "qat.conf=$'ServiceEnabled=dc\nAutoresetEnabled=on", "qat-config")
196+
197+
ginkgo.By("setting resourceName for dc services")
198+
resourceName = dcResource
199+
})
200+
201+
ginkgo.It("checks if an injected error gets solved [Functionality:auto-reset]", func(ctx context.Context) {
202+
injectError(ctx, f, resourceName)
203+
204+
ginkgo.By("seeing if there is zero resource")
205+
if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 100*time.Second, utils.WaitForZeroResource); err != nil {
206+
framework.Logf("unable to wait for nodes to have no resource: %v", err)
207+
// It should not be Failf since there can be a case auto-reset worked before plugin notices.
208+
// It is still necessary to have utils.WaitForNodeWithResource for WaitForZeroResource
209+
// because there can be also a case that the following part that checks WaitForPositiveResource may run
210+
// before the injected error is recognized by plugin when this part does not exist.
211+
// In other words, this is necessary to ensure that the injected error does not remain after this It() spec.
212+
}
213+
214+
ginkgo.By("seeing if there is positive allocatable resource")
215+
if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 300*time.Second, utils.WaitForPositiveResource); err != nil {
216+
framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err)
217+
}
218+
219+
ginkgo.By("checking if openssl pod runs successfully")
220+
runCpaSampleCode(ctx, f, compression, resourceName)
221+
})
222+
})
223+
})
168224
}
169225

170226
func runCpaSampleCode(ctx context.Context, f *framework.Framework, runTests int, resourceName v1.ResourceName) {
@@ -199,3 +255,38 @@ func runCpaSampleCode(ctx context.Context, f *framework.Framework, runTests int,
199255
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.ObjectMeta.Name, f.Namespace.Name, 300*time.Second)
200256
gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, pod.ObjectMeta.Name, pod.Spec.Containers[0].Name))
201257
}
258+
259+
func injectError(ctx context.Context, f *framework.Framework, resourceName v1.ResourceName) {
260+
nodeName, _ := utils.FindNodeAndResourceCapacity(f, ctx, resourceName.String())
261+
yes := true
262+
263+
job := e2ejob.NewTestJobOnNode("success", "qat-inject-error", v1.RestartPolicyNever, 1, 1, nil, 0, nodeName)
264+
job.Spec.Template.Spec.Containers[0].Command = []string{
265+
"/bin/sh",
266+
}
267+
job.Spec.Template.Spec.Containers[0].Args = []string{
268+
"-c",
269+
"find /sys/kernel/debug/qat_*/heartbeat/ -name inject_error -exec sh -c 'echo 1 > {}' \\;",
270+
}
271+
job.Spec.Template.Spec.Containers[0].VolumeMounts = []v1.VolumeMount{{
272+
Name: "debugfs",
273+
MountPath: "/sys/kernel/debug/",
274+
}}
275+
job.Spec.Template.Spec.Volumes = []v1.Volume{{
276+
Name: "debugfs",
277+
VolumeSource: v1.VolumeSource{
278+
HostPath: &v1.HostPathVolumeSource{
279+
Path: "/sys/kernel/debug/",
280+
},
281+
},
282+
}}
283+
job.Spec.Template.Spec.Containers[0].SecurityContext = &v1.SecurityContext{
284+
Privileged: &yes,
285+
}
286+
287+
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
288+
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
289+
290+
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, nil, 1)
291+
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
292+
}

test/e2e/utils/utils.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,3 +365,19 @@ func Kubectl(ns string, cmd string, opt string, file string) {
365365
msg := e2ekubectl.RunKubectlOrDie(ns, cmd, opt, path)
366366
framework.Logf("%s", msg)
367367
}
368+
369+
func FindNodeAndResourceCapacity(f *framework.Framework, ctx context.Context, resourceName string) (string, int64) {
370+
nodelist, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
371+
if err != nil {
372+
framework.Failf("failed to list Nodes: %v", err)
373+
}
374+
375+
// we have at least one node with resource capacity
376+
for _, item := range nodelist.Items {
377+
if q, ok := item.Status.Allocatable[v1.ResourceName(resourceName)]; ok && q.Value() > 0 {
378+
return item.Name, q.Value()
379+
}
380+
}
381+
382+
return "", 0
383+
}

0 commit comments

Comments
 (0)