diff --git a/README.md b/README.md index e7968f971..be51e9128 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ $ make deploy or build a catalog and deploy from OLM: ``` $ make catalog && make catalog-deploy -``` +``` ### FileIntegrity API: @@ -49,6 +49,7 @@ spec: - key: "myNode" operator: "Exists" effect: "NoSchedule" + priorityClassName: "system-cluster-critical" config: name: "myconfig" namespace: "openshift-file-integrity" @@ -62,10 +63,11 @@ status: In the `spec`: * **nodeSelector**: Selector for nodes to schedule the scan instances on. * **tolerations**: Specify tolerations to schedule on nodes with custom taints. When not specified, a default toleration allowing running on master and infra nodes is applied. +* **priorityClassName**: (Optional) Specifies the `PriorityClass` for the pods created by the operator. If the PriorityClass is invalid or not found, it will be ignored and cleared from the spec. * **config**: Point to a ConfigMap containing an AIDE configuration to use instead of the CoreOS optimized default. See "Applying an AIDE config" below. * **config.gracePeriod**: The number of seconds to pause in between AIDE integrity checks. Frequent AIDE checks on a node may be resource intensive, so it can be useful to specify a longer interval. Defaults to 900 (15 mins). * **config.maxBackups**: The maximum number of AIDE database and log backups (leftover from the re-init process) to keep on a node. Older backups beyond this number are automatically pruned by the daemon. Defaults to 5. -* **config.initialDelay**: An optional field. The number of seconds to wait before starting the first AIDE integrity check. Defaults to 0. +* **config.initialDelay**: An optional field. The number of seconds to wait before starting the first AIDE integrity check. Defaults to 0. In the `status`: * **phase**: The running status of the `FileIntegrity` instance. Can be `Initializing`, `Pending`, or `Active`. `Initializing` is displayed if the FileIntegrity is currently initializing or re-initializing the AIDE database, `Pending` if the FileIntegrity deployment is still being created, and `Active` if the scans are active and ongoing. For node scan results, see the `FileIntegrityNodeStatus` objects explained below. diff --git a/bundle/manifests/file-integrity-operator.clusterserviceversion.yaml b/bundle/manifests/file-integrity-operator.clusterserviceversion.yaml index bcb2cf0d8..ca0b94487 100644 --- a/bundle/manifests/file-integrity-operator.clusterserviceversion.yaml +++ b/bundle/manifests/file-integrity-operator.clusterserviceversion.yaml @@ -19,7 +19,7 @@ metadata: ] capabilities: Seamless Upgrades categories: Monitoring,Security - createdAt: "2024-11-21T18:02:08Z" + createdAt: "2026-01-12T12:08:39Z" olm.skipRange: '>=1.0.0 <1.3.6' operatorframework.io/cluster-monitoring: "true" operatorframework.io/suggested-namespace: openshift-file-integrity @@ -67,6 +67,14 @@ spec: - get - list - watch + - apiGroups: + - scheduling.k8s.io + resources: + - priorityclasses + verbs: + - get + - list + - watch serviceAccountName: file-integrity-operator deployments: - name: file-integrity-operator @@ -320,4 +328,3 @@ spec: - image: quay.io/file-integrity-operator/file-integrity-operator:latest name: operator version: 1.3.6 - replaces: file-integrity-operator.v1.3.5 diff --git a/bundle/manifests/fileintegrity.openshift.io_fileintegrities.yaml b/bundle/manifests/fileintegrity.openshift.io_fileintegrities.yaml index 1f01499f4..7eba199b3 100644 --- a/bundle/manifests/fileintegrity.openshift.io_fileintegrities.yaml +++ b/bundle/manifests/fileintegrity.openshift.io_fileintegrities.yaml @@ -78,6 +78,12 @@ spec: additionalProperties: type: string type: object + priorityClassName: + description: |- + Specifies the PriorityClass to use for the pods created by the operator. + This is an optional field. If PriorityClass is invalid or not found, + it will be ignored and cleared from the spec. + type: string tolerations: default: - effect: NoSchedule diff --git a/config/crd/bases/fileintegrity.openshift.io_fileintegrities.yaml b/config/crd/bases/fileintegrity.openshift.io_fileintegrities.yaml index 091050295..65cf3c924 100644 --- a/config/crd/bases/fileintegrity.openshift.io_fileintegrities.yaml +++ b/config/crd/bases/fileintegrity.openshift.io_fileintegrities.yaml @@ -78,6 +78,12 @@ spec: additionalProperties: type: string type: object + priorityClassName: + description: |- + Specifies the PriorityClass to use for the pods created by the operator. + This is an optional field. If PriorityClass is invalid or not found, + it will be ignored and cleared from the spec. + type: string tolerations: default: - effect: NoSchedule diff --git a/config/manifests/bases/file-integrity-operator.clusterserviceversion.yaml b/config/manifests/bases/file-integrity-operator.clusterserviceversion.yaml index e3ca45e84..5fd5a28e6 100644 --- a/config/manifests/bases/file-integrity-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/file-integrity-operator.clusterserviceversion.yaml @@ -19,7 +19,7 @@ metadata: ] capabilities: Seamless Upgrades categories: Monitoring,Security - olm.skipRange: '>=1.0.0 <1.3.5-dev' + olm.skipRange: '>=1.0.0 <1.3.6' operatorframework.io/cluster-monitoring: "true" operatorframework.io/suggested-namespace: openshift-file-integrity operators.openshift.io/infrastructure-features: '["disconnected", "fips"]' diff --git a/config/rbac/operator_clusterrole.yaml b/config/rbac/operator_clusterrole.yaml index 2e01b0b97..55363f6f2 100644 --- a/config/rbac/operator_clusterrole.yaml +++ b/config/rbac/operator_clusterrole.yaml @@ -11,3 +11,11 @@ rules: - get - list - watch + - apiGroups: + - scheduling.k8s.io + resources: + - priorityclasses + verbs: + - get + - list + - watch diff --git a/pkg/apis/fileintegrity/v1alpha1/fileintegrity_types.go b/pkg/apis/fileintegrity/v1alpha1/fileintegrity_types.go index 1d3c7cc5e..656692053 100644 --- a/pkg/apis/fileintegrity/v1alpha1/fileintegrity_types.go +++ b/pkg/apis/fileintegrity/v1alpha1/fileintegrity_types.go @@ -49,6 +49,10 @@ type FileIntegritySpec struct { // Specifies tolerations for custom taints. Defaults to allowing scheduling on master and infra nodes. // +kubebuilder:default={{key: "node-role.kubernetes.io/master", operator: "Exists", effect: "NoSchedule"},{key: "node-role.kubernetes.io/infra", operator: "Exists", effect: "NoSchedule"}} Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + // Specifies the PriorityClass to use for the pods created by the operator. + // This is an optional field. If PriorityClass is invalid or not found, + // it will be ignored and cleared from the spec. + PriorityClassName string `json:"priorityClassName,omitempty"` } // FileIntegrityConfig defines the name, namespace, and data key for an AIDE config to use for integrity checking. diff --git a/pkg/controller/fileintegrity/fileintegrity_controller.go b/pkg/controller/fileintegrity/fileintegrity_controller.go index b53971b2f..5e2ae8b3e 100644 --- a/pkg/controller/fileintegrity/fileintegrity_controller.go +++ b/pkg/controller/fileintegrity/fileintegrity_controller.go @@ -23,6 +23,7 @@ import ( "github.com/openshift/file-integrity-operator/pkg/common" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + schedulingv1 "k8s.io/api/scheduling/v1" kerr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -44,7 +45,12 @@ func AddFileIntegrityController(mgr manager.Manager, met *metrics.Metrics) error // newReconciler returns a new reconcile.Reconciler func newFileIntegrityReconciler(mgr manager.Manager, met *metrics.Metrics) reconcile.Reconciler { - return &FileIntegrityReconciler{Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Metrics: met} + return &FileIntegrityReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Metrics: met, + Recorder: mgr.GetEventRecorderFor("fileintegrityctrl"), + } } // add adds a new Controller to mgr with r as the reconcile.Reconciler @@ -359,6 +365,29 @@ func (r *FileIntegrityReconciler) reconcileUserConfig(instance *v1alpha1.FileInt return true, nil } +// validatePriorityClass checks if the specified PriorityClass exists and is valid. +// Returns true if valid (or empty), false otherwise. Following the pattern of compliance-operator, +// if PriorityClass is invalid or not found, it will be ignored. +func (r *FileIntegrityReconciler) validatePriorityClass(ctx context.Context, pcName string, logger logr.Logger) bool { + if pcName == "" { + return true // Empty is valid (uses default) + } + + pc := &schedulingv1.PriorityClass{} + err := r.Client.Get(ctx, types.NamespacedName{Name: pcName}, pc) + if err != nil { + if kerr.IsNotFound(err) { + logger.Info("PriorityClass not found, ignoring", "priorityClassName", pcName) + return false + } + logger.Error(err, "Error checking PriorityClass, ignoring", "priorityClassName", pcName) + return false + } + + logger.Info("PriorityClass validated successfully", "priorityClassName", pcName, "value", pc.Value) + return true +} + // gets the image of the first container in the operator deployment spec. We expect this to be the deployment named // file-integrity-operator in the openshift-file-integrity namespace. func (r *FileIntegrityReconciler) getOperatorDeploymentImage() (string, error) { @@ -395,6 +424,29 @@ func (r *FileIntegrityReconciler) FileIntegrityControllerReconcile(request recon return reconcile.Result{}, err } + // Validate PriorityClass if specified. If invalid or not found, it will be ignored. + if instance.Spec.PriorityClassName != "" { + if !r.validatePriorityClass(context.TODO(), instance.Spec.PriorityClassName, reqLogger) { + reqLogger.Info("Invalid or non-existent PriorityClass specified, will be ignored", + "priorityClassName", instance.Spec.PriorityClassName) + // Generate Warning event for the user + r.Recorder.Eventf(instance, corev1.EventTypeWarning, "PriorityClass", + "Error while getting priority class '%s', PriorityClass not found or invalid", + instance.Spec.PriorityClassName) + // Clear the PriorityClassName to avoid DaemonSet pod creation failures + instanceCopy := instance.DeepCopy() + instanceCopy.Spec.PriorityClassName = "" + if err := r.Client.Update(context.TODO(), instanceCopy); err != nil { + reqLogger.Error(err, "Error clearing invalid PriorityClassName") + // Continue anyway, the daemonset will fail to create pods but won't block reconciliation + } else { + reqLogger.Info("Cleared invalid PriorityClassName from FileIntegrity spec") + // Requeue to reconcile with the updated spec + return reconcile.Result{Requeue: true}, nil + } + } + } + // Get the operator image to later set as the daemonSet image. They need to always match before we deprecate RELATED_IMAGE_OPERATOR. operatorImage, err := r.getOperatorDeploymentImage() if err != nil { @@ -538,9 +590,10 @@ func (r *FileIntegrityReconciler) FileIntegrityControllerReconcile(request recon imgNeedsUpdate := updateDSImage(dsCopy, operatorImage, reqLogger) nsNeedsUpdate := updateDSNodeSelector(dsCopy, instance, reqLogger) tolsNeedsUpdate := updateDSTolerations(dsCopy, instance, reqLogger) + pcNeedsUpdate := updateDSPriorityClassName(dsCopy, instance, reqLogger) volsNeedUpdate := updateDSContainerVolumes(dsCopy, instance, operatorImage, reqLogger) - if argsNeedUpdate || imgNeedsUpdate || nsNeedsUpdate || tolsNeedsUpdate || volsNeedUpdate || scriptsUpdated { + if argsNeedUpdate || imgNeedsUpdate || nsNeedsUpdate || tolsNeedsUpdate || pcNeedsUpdate || volsNeedUpdate || scriptsUpdated { if err := r.Client.Update(context.TODO(), dsCopy); err != nil { return reconcile.Result{}, err } @@ -602,6 +655,17 @@ func updateDSTolerations(currentDS *appsv1.DaemonSet, fi *v1alpha1.FileIntegrity return needsUpdate } +func updateDSPriorityClassName(currentDS *appsv1.DaemonSet, fi *v1alpha1.FileIntegrity, logger logr.Logger) bool { + pcRef := ¤tDS.Spec.Template.Spec.PriorityClassName + expectedPC := fi.Spec.PriorityClassName + needsUpdate := *pcRef != expectedPC + if needsUpdate { + logger.Info("FileIntegrity needed priorityClassName update") + *pcRef = expectedPC + } + return needsUpdate +} + // Returns true when the daemon pod args derived from the FileIntegrity object differ from the current DS. // Returns false if there was no difference. // If an update is needed, this will update the arguments from the given DaemonSet @@ -730,6 +794,7 @@ func reinitAideDaemonset(reinitDaemonSetName string, fi *v1alpha1.FileIntegrity, Spec: corev1.PodSpec{ NodeSelector: selector.MatchLabels, Tolerations: fi.Spec.Tolerations, + PriorityClassName: fi.Spec.PriorityClassName, ServiceAccountName: common.OperatorServiceAccountName, InitContainers: []corev1.Container{ { @@ -855,6 +920,7 @@ func aideDaemonset(dsName string, fi *v1alpha1.FileIntegrity, operatorImage stri Spec: corev1.PodSpec{ NodeSelector: fi.Spec.NodeSelector, Tolerations: fi.Spec.Tolerations, + PriorityClassName: fi.Spec.PriorityClassName, ServiceAccountName: common.DaemonServiceAccountName, InitContainers: []corev1.Container{ { diff --git a/pkg/controller/fileintegrity/setup.go b/pkg/controller/fileintegrity/setup.go index 3f3ee54ad..a618870ec 100644 --- a/pkg/controller/fileintegrity/setup.go +++ b/pkg/controller/fileintegrity/setup.go @@ -22,6 +22,7 @@ import ( "github.com/openshift/file-integrity-operator/pkg/controller/metrics" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ctrlLog "sigs.k8s.io/controller-runtime/pkg/log" @@ -32,6 +33,7 @@ type FileIntegrityReconciler struct { client.Client *runtime.Scheme *metrics.Metrics + Recorder record.EventRecorder } // These are perms for all controllers. @@ -48,6 +50,7 @@ type FileIntegrityReconciler struct { //+kubebuilder:rbac:groups=fileintegrity.openshift.io,resources=fileintegrities/status,verbs=get;update;patch //+kubebuilder:rbac:groups=fileintegrity.openshift.io,resources=fileintegrities/finalizers,verbs=update //+kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update +//+kubebuilder:rbac:groups=scheduling.k8s.io,resources=priorityclasses,verbs=get;list;watch //+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch // Reconcile is part of the main kubernetes reconciliation loop which aims to diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index bfa62bb60..026dcb59e 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -1037,6 +1037,72 @@ func TestFileIntegrityTolerations(t *testing.T) { assertSingleNodeConditionIsSuccess(t, f, testIntegrityNamePrefix+"-tolerations", taintedNode, namespace, 2*time.Second, 5*time.Minute) } +func TestFileIntegrityPriorityClassName(t *testing.T) { + f, testctx, namespace := setupPriorityClassTest(t, testIntegrityNamePrefix+"-priorityclass") + defer testctx.Cleanup() + defer func() { + if err := cleanNodes(f, namespace); err != nil { + t.Fatal(err) + } + if err := resetBundleTestMetrics(f, namespace); err != nil { + t.Fatal(err) + } + }() + defer logContainerOutput(t, f, namespace, testIntegrityNamePrefix+"-priorityclass") + + // wait to go active. + err := waitForScanStatus(t, f, namespace, testIntegrityNamePrefix+"-priorityclass", v1alpha1.PhaseActive) + if err != nil { + t.Errorf("Timeout waiting for scan status") + } + + t.Log("Verifying that the DaemonSet pods have the correct priorityClassName") + dsName := common.DaemonSetName(testIntegrityNamePrefix + "-priorityclass") + if err := verifyDaemonSetPriorityClassName(t, f, namespace, dsName, "system-node-critical"); err != nil { + t.Errorf("Failed to verify priorityClassName: %v", err) + } +} + +func TestFileIntegrityInvalidPriorityClassName(t *testing.T) { + f, testctx, namespace := setupInvalidPriorityClassTest(t, testIntegrityNamePrefix+"-invalidpc") + defer testctx.Cleanup() + defer func() { + if err := cleanNodes(f, namespace); err != nil { + t.Fatal(err) + } + if err := resetBundleTestMetrics(f, namespace); err != nil { + t.Fatal(err) + } + }() + defer logContainerOutput(t, f, namespace, testIntegrityNamePrefix+"-invalidpc") + + // wait to go active even with invalid priority class (it should be cleared) + err := waitForScanStatus(t, f, namespace, testIntegrityNamePrefix+"-invalidpc", v1alpha1.PhaseActive) + if err != nil { + t.Errorf("Timeout waiting for scan status") + } + + t.Log("Verifying that the invalid PriorityClassName was cleared") + fileIntegrity := &v1alpha1.FileIntegrity{} + err = f.Client.Get(context.TODO(), types.NamespacedName{Name: testIntegrityNamePrefix + "-invalidpc", Namespace: namespace}, fileIntegrity) + if err != nil { + t.Errorf("Failed to get FileIntegrity: %v", err) + } + if fileIntegrity.Spec.PriorityClassName != "" { + t.Errorf("Expected priorityClassName to be cleared, but got: %s", fileIntegrity.Spec.PriorityClassName) + } + + t.Log("Verifying that the DaemonSet was created without priorityClassName") + dsName := common.DaemonSetName(testIntegrityNamePrefix + "-invalidpc") + ds, err := f.KubeClient.AppsV1().DaemonSets(namespace).Get(context.TODO(), dsName, metav1.GetOptions{}) + if err != nil { + t.Errorf("Failed to get DaemonSet: %v", err) + } + if ds.Spec.Template.Spec.PriorityClassName != "" { + t.Errorf("Expected DaemonSet priorityClassName to be empty, but got: %s", ds.Spec.Template.Spec.PriorityClassName) + } +} + func TestFileIntegrityLogCompress(t *testing.T) { f, testctx, namespace := setupTest(t) testName := testIntegrityNamePrefix + "-logcompress" diff --git a/tests/e2e/helpers.go b/tests/e2e/helpers.go index 42156e269..589f29e83 100644 --- a/tests/e2e/helpers.go +++ b/tests/e2e/helpers.go @@ -1008,6 +1008,113 @@ func setupTolerationTest(t *testing.T, integrityName string) (*framework.Framewo return f, testctx, namespace, taintedNodeName } +func verifyDaemonSetPriorityClassName(t *testing.T, f *framework.Framework, namespace, dsName, expectedPriorityClass string) error { + ds, err := f.KubeClient.AppsV1().DaemonSets(namespace).Get(goctx.TODO(), dsName, metav1.GetOptions{}) + if err != nil { + return err + } + + actualPriorityClass := ds.Spec.Template.Spec.PriorityClassName + if actualPriorityClass != expectedPriorityClass { + return errors.Errorf("Expected priorityClassName %s but got %s", expectedPriorityClass, actualPriorityClass) + } + + t.Logf("DaemonSet %s has correct priorityClassName: %s", dsName, actualPriorityClass) + return nil +} + +func setupPriorityClassTest(t *testing.T, integrityName string) (*framework.Framework, *framework.Context, string) { + testctx := setupTestRequirements(t) + namespace, err := testctx.GetOperatorNamespace() + if err != nil { + t.Errorf("could not get namespace: %v", err) + } + f := framework.Global + + testctx.AddCleanupFn(cleanUp(t, namespace)) + setupFileIntegrityOperatorCluster(t, testctx) + + t.Log("Creating FileIntegrity object for PriorityClassName tests") + testIntegrityCheck := &v1alpha1.FileIntegrity{ + ObjectMeta: metav1.ObjectMeta{ + Name: integrityName, + Namespace: namespace, + }, + Spec: v1alpha1.FileIntegritySpec{ + NodeSelector: map[string]string{ + "node-role.kubernetes.io/worker": "", + }, + Config: v1alpha1.FileIntegrityConfig{ + GracePeriod: defaultTestGracePeriod, + }, + PriorityClassName: "system-node-critical", + }, + } + cleanupOptions := framework.CleanupOptions{ + TestContext: testctx, + Timeout: cleanupTimeout, + RetryInterval: cleanupRetryInterval, + } + err = f.Client.Create(goctx.TODO(), testIntegrityCheck, &cleanupOptions) + if err != nil { + t.Errorf("could not create fileintegrity object: %v", err) + } + + dsName := common.DaemonSetName(testIntegrityCheck.Name) + err = waitForDaemonSet(daemonSetIsReady(f.KubeClient, dsName, namespace)) + if err != nil { + t.Errorf("Timed out waiting for DaemonSet %s", dsName) + } + + return f, testctx, namespace +} + +func setupInvalidPriorityClassTest(t *testing.T, integrityName string) (*framework.Framework, *framework.Context, string) { + testctx := setupTestRequirements(t) + namespace, err := testctx.GetOperatorNamespace() + if err != nil { + t.Errorf("could not get namespace: %v", err) + } + f := framework.Global + + testctx.AddCleanupFn(cleanUp(t, namespace)) + setupFileIntegrityOperatorCluster(t, testctx) + + t.Log("Creating FileIntegrity object with invalid PriorityClassName") + testIntegrityCheck := &v1alpha1.FileIntegrity{ + ObjectMeta: metav1.ObjectMeta{ + Name: integrityName, + Namespace: namespace, + }, + Spec: v1alpha1.FileIntegritySpec{ + NodeSelector: map[string]string{ + "node-role.kubernetes.io/worker": "", + }, + Config: v1alpha1.FileIntegrityConfig{ + GracePeriod: defaultTestGracePeriod, + }, + PriorityClassName: "non-existent-priority-class", + }, + } + cleanupOptions := framework.CleanupOptions{ + TestContext: testctx, + Timeout: cleanupTimeout, + RetryInterval: cleanupRetryInterval, + } + err = f.Client.Create(goctx.TODO(), testIntegrityCheck, &cleanupOptions) + if err != nil { + t.Errorf("could not create fileintegrity object: %v", err) + } + + dsName := common.DaemonSetName(testIntegrityCheck.Name) + err = waitForDaemonSet(daemonSetIsReady(f.KubeClient, dsName, namespace)) + if err != nil { + t.Errorf("Timed out waiting for DaemonSet %s", dsName) + } + + return f, testctx, namespace +} + func updateFileIntegrityConfig(t *testing.T, f *framework.Framework, integrityName, configMapName, namespace, key string, interval, timeout time.Duration) { var lastErr error pollErr := wait.PollImmediate(interval, timeout, func() (bool, error) {