From 91f85d9682a0c7537fdc3d5a2b358d95d8cf1189 Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Wed, 29 Oct 2025 16:57:12 +0100 Subject: [PATCH] RFE-7051: add unsupported dedicated events etcd This PR contains a dedicated in-memory etcd deployment that will run on one control plane host and configures the kube-apiserver to send events to it. Signed-off-by: Thomas Jungblut --- .../etcd/dedicated-event-etcd-deployment.yaml | 158 +++++++++++++ bindata/etcd/dedicated-event-etcd-svc.yaml | 17 ++ pkg/operator/ceohelpers/podsubstitution.go | 2 +- .../ceohelpers/unsupported_override.go | 5 + .../dedicated_etcd_controller.go | 221 ++++++++++++++++++ pkg/operator/starter.go | 15 ++ pkg/tlshelpers/tlshelpers.go | 2 + 7 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 bindata/etcd/dedicated-event-etcd-deployment.yaml create mode 100644 bindata/etcd/dedicated-event-etcd-svc.yaml create mode 100644 pkg/operator/dedicatedetcdcontroller/dedicated_etcd_controller.go diff --git a/bindata/etcd/dedicated-event-etcd-deployment.yaml b/bindata/etcd/dedicated-event-etcd-deployment.yaml new file mode 100644 index 0000000000..adb214b14e --- /dev/null +++ b/bindata/etcd/dedicated-event-etcd-deployment.yaml @@ -0,0 +1,158 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dedicated-event-etcd + namespace: openshift-etcd + labels: + app: dedicated-event-etcd + k8s-app: dedicated-event-etcd +spec: + strategy: + type: "Recreate" + replicas: 1 + selector: + matchLabels: + app: dedicated-event-etcd + k8s-app: dedicated-event-etcd + template: + metadata: + name: dedicated-event-etcd + annotations: + kubectl.kubernetes.io/default-container: etcdctl + labels: + app: dedicated-event-etcd + k8s-app: dedicated-event-etcd + spec: + hostNetwork: true + nodeSelector: + node-role.kubernetes.io/control-plane: '' + kubernetes.io/hostname: {{.NodeName}} + tolerations: + - operator: "Exists" + containers: + - name: etcdctl + image: {{.Image}} + imagePullPolicy: IfNotPresent + terminationMessagePolicy: FallbackToLogsOnError + command: + - "/bin/bash" + - "-c" + - "trap TERM INT; sleep infinity & wait" + volumeMounts: + - mountPath: /var/lib/etcd/ + name: data-dir + - mountPath: /etcd-all-bundles + name: etcd-ca-bundle + - mountPath: /etcd-all-certs + name: etcd-all-certs + env: + # export ETCDCTL_ENDPOINTS="https://${MY_POD_IP}:20379" + # export ETCDCTL_CACERT="/etcd-all-bundles/ca-bundle.crt" + # export ETCDCTL_CERT="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.crt" + # export ETCDCTL_KEY="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.key" + + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: ETCD_DATA_DIR + value: "/var/lib/etcd" + - name: ETCDCTL_ENDPOINTS + value: "https://${MY_POD_IP}:20379" + - name: ETCDCTL_CACERT + value: "/etcd-all-bundles/ca-bundle.crt" + - name: ETCDCTL_CERT + value: "/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.crt" + - name: ETCDCTL_KEY + value: "/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.key" + - name: etcd + image: {{.Image}} + imagePullPolicy: IfNotPresent + terminationMessagePolicy: FallbackToLogsOnError + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: + - /bin/sh + - -c + - | + #!/bin/sh + set -euo pipefail + set -x + + export ETCD_NAME=events-etcd + + echo "----------------" + env | grep ETCD | grep -v NODE + echo "----------------" + echo "$MY_NODE_NAME" + echo "$MY_POD_IP" + echo "----------------" + ls -l /etcd-all-certs + echo "----------------" + ls -l /etcd-all-bundles + echo "----------------" + + etcd \ + --data-dir=/var/lib/etcd \ + --logger=zap \ + --log-level=WARN \ + --snapshot-count=10000 \ + --quota-backend-bytes 8589934592 \ + --cert-file="/etcd-all-certs/etcd-serving-${MY_NODE_NAME}.crt" \ + --key-file="/etcd-all-certs/etcd-serving-${MY_NODE_NAME}.key" \ + --trusted-ca-file="/etcd-all-bundles/ca-bundle.crt" \ + --client-cert-auth=true \ + --initial-cluster="${ETCD_NAME}=https://${MY_POD_IP}:20380" \ + --initial-advertise-peer-urls="https://${MY_POD_IP}:20380" \ + --listen-peer-urls="https://${MY_POD_IP}:20380" \ + --peer-cert-file="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.crt"\ + --peer-key-file="/etcd-all-certs/etcd-peer-${MY_NODE_NAME}.key" \ + --peer-trusted-ca-file="/etcd-all-bundles/ca-bundle.crt" \ + --peer-client-cert-auth=true \ + --advertise-client-urls=https://${MY_POD_IP}:20379 \ + --listen-client-urls=https://0.0.0.0:20379 + + ports: + - containerPort: 20379 + name: events-etcd + protocol: TCP + - containerPort: 20380 + # shortened to fit into 15 chars + name: events-etcdpeer + protocol: TCP + resources: + limits: + memory: 8Gi + securityContext: + privileged: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /var/lib/etcd/ + name: data-dir + - mountPath: /etcd-all-bundles + name: etcd-ca-bundle + - mountPath: /etcd-all-certs + name: etcd-all-certs + volumes: + - configMap: + name: etcd-ca-bundle + name: etcd-ca-bundle + - secret: + secretName: etcd-all-certs + name: etcd-all-certs + - name: data-dir + emptyDir: + medium: Memory + sizeLimit: 8Gi + diff --git a/bindata/etcd/dedicated-event-etcd-svc.yaml b/bindata/etcd/dedicated-event-etcd-svc.yaml new file mode 100644 index 0000000000..cbf52610ad --- /dev/null +++ b/bindata/etcd/dedicated-event-etcd-svc.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: openshift-etcd + name: events-etcd + annotations: + prometheus.io/scrape: "false" + prometheus.io/scheme: https + labels: + k8s-app: dedicated-event-etcd +spec: + selector: + k8s-app: dedicated-event-etcd + ports: + - name: events-etcd + port: 20379 + protocol: TCP diff --git a/pkg/operator/ceohelpers/podsubstitution.go b/pkg/operator/ceohelpers/podsubstitution.go index fb1e92d4db..1143ab1202 100644 --- a/pkg/operator/ceohelpers/podsubstitution.go +++ b/pkg/operator/ceohelpers/podsubstitution.go @@ -125,7 +125,7 @@ func GetPodSubstitution( } // RenderTemplate renders a Pod template from the Assets with the data from a PodSubstitutionTemplate -func RenderTemplate(templateName string, subs *PodSubstitutionTemplate) (string, error) { +func RenderTemplate[T interface{}](templateName string, subs *T) (string, error) { fm := template.FuncMap{"quote": func(arg reflect.Value) string { return "\"" + arg.String() + "\"" }} diff --git a/pkg/operator/ceohelpers/unsupported_override.go b/pkg/operator/ceohelpers/unsupported_override.go index f91ecf7765..38210ca314 100644 --- a/pkg/operator/ceohelpers/unsupported_override.go +++ b/pkg/operator/ceohelpers/unsupported_override.go @@ -16,6 +16,11 @@ func isUnsupportedUnsafeEtcd(spec *operatorv1.StaticPodOperatorSpec) (bool, erro return tryGetUnsupportedValue(spec, "useUnsupportedUnsafeNonHANonProductionUnstableEtcd") } +// IsDedicatedEtcdForEventsEnabled returns true if useUnsupportedDedicatedEtcdForEvents key is set to any parsable value +func IsDedicatedEtcdForEventsEnabled(spec *operatorv1.StaticPodOperatorSpec) (bool, error) { + return tryGetUnsupportedValue(spec, "useUnsupportedDedicatedEtcdForEvents") +} + func tryGetUnsupportedValue(spec *operatorv1.StaticPodOperatorSpec, key string) (bool, error) { unsupportedConfig := map[string]interface{}{} if spec.UnsupportedConfigOverrides.Raw == nil { diff --git a/pkg/operator/dedicatedetcdcontroller/dedicated_etcd_controller.go b/pkg/operator/dedicatedetcdcontroller/dedicated_etcd_controller.go new file mode 100644 index 0000000000..c76d485d69 --- /dev/null +++ b/pkg/operator/dedicatedetcdcontroller/dedicated_etcd_controller.go @@ -0,0 +1,221 @@ +package dedicatedetcdcontroller + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "slices" + "strings" + "time" + + configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1" + configv1listers "github.com/openshift/client-go/config/listers/config/v1" + opv1 "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1" + "github.com/openshift/cluster-etcd-operator/pkg/dnshelpers" + "k8s.io/apimachinery/pkg/runtime" + + "github.com/openshift/cluster-etcd-operator/bindata" + "github.com/openshift/cluster-etcd-operator/pkg/operator/ceohelpers" + "github.com/openshift/cluster-etcd-operator/pkg/operator/health" + "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" + "github.com/openshift/library-go/pkg/controller/factory" + "github.com/openshift/library-go/pkg/operator/events" + "github.com/openshift/library-go/pkg/operator/v1helpers" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/kubernetes" + corev1listers "k8s.io/client-go/listers/core/v1" + "sigs.k8s.io/yaml" +) + +// DedicatedEtcdController manages the lifecycle of a dedicated etcd pod for events storage +type DedicatedEtcdController struct { + staticPodClient v1helpers.StaticPodOperatorClient + operatorClient opv1.OperatorV1Interface + kubeClient kubernetes.Interface + masterNodeLister corev1listers.NodeLister + networkLister configv1listers.NetworkLister + masterNodeSelector labels.Selector + etcdImage string +} + +type EventsEtcdDeploymentSubstitution struct { + Image string + NodeName string +} + +// NewDedicatedEtcdController creates a new controller for managing dedicated etcd pods +func NewDedicatedEtcdController( + livenessChecker *health.MultiAlivenessChecker, + etcdImage string, + staticPodClient v1helpers.StaticPodOperatorClient, + kubeClient kubernetes.Interface, + kubeInformersForNamespaces v1helpers.KubeInformersForNamespaces, + masterNodeLister corev1listers.NodeLister, + masterNodeSelector labels.Selector, + operatorClient opv1.OperatorV1Interface, + networkLister configv1informers.NetworkInformer, + eventRecorder events.Recorder) factory.Controller { + c := &DedicatedEtcdController{ + staticPodClient: staticPodClient, + kubeClient: kubeClient, + masterNodeLister: masterNodeLister, + masterNodeSelector: masterNodeSelector, + etcdImage: etcdImage, + operatorClient: operatorClient, + networkLister: networkLister.Lister(), + } + + syncer := health.NewDefaultCheckingSyncWrapper(c.sync) + livenessChecker.Add("DedicatedEtcdController", syncer) + + return factory.New(). + ResyncEvery(1*time.Minute). + WithSync(syncer.Sync). + WithInformers( + staticPodClient.Informer(), + kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace).Core().V1().ConfigMaps().Informer(), + networkLister.Informer(), + ). + ToController("DedicatedEtcdController", eventRecorder.WithComponentSuffix("dedicated-etcd-controller")) +} + +func (c *DedicatedEtcdController) sync(ctx context.Context, _ factory.SyncContext) error { + operatorSpec, _, _, err := c.staticPodClient.GetStaticPodOperatorState() + if err != nil { + return err + } + + // Check if the dedicated etcd for events feature is enabled via unsupported override + enabled, err := ceohelpers.IsDedicatedEtcdForEventsEnabled(operatorSpec) + if err != nil { + return fmt.Errorf("failed to check dedicated etcd override: %w", err) + } + + if !enabled { + return nil + } + + network, err := c.networkLister.Get("cluster") + if err != nil { + return fmt.Errorf("failed to get cluster network: %w", err) + } + + controlPlaneNodes, err := c.masterNodeLister.List(c.masterNodeSelector) + if err != nil { + return fmt.Errorf("failed to list master nodes: %w", err) + } + + slices.SortFunc(controlPlaneNodes, func(a, b *corev1.Node) int { + return strings.Compare(a.Name, b.Name) + }) + + destinedEventNode := controlPlaneNodes[0] + destinedEventNodeHostname := destinedEventNode.Labels["kubernetes.io/hostname"] + etcdURLHost, err := dnshelpers.GetEscapedPreferredInternalIPAddressForNodeName(network, destinedEventNode) + if err != nil { + return fmt.Errorf("failed to get etcd URL hostname for node %s: %w", destinedEventNode.Name, err) + } + + depClient := c.kubeClient.AppsV1().Deployments(operatorclient.TargetNamespace) + + renderedDeployment, err := ceohelpers.RenderTemplate("etcd/dedicated-event-etcd-deployment.yaml", + &EventsEtcdDeploymentSubstitution{Image: c.etcdImage, NodeName: destinedEventNodeHostname}) + if err != nil { + return fmt.Errorf("failed to render events-etcd deployment: %w", err) + } + + var deployment appsv1.Deployment + if err := yaml.Unmarshal([]byte(renderedDeployment), &deployment); err != nil { + return fmt.Errorf("failed to unmarshal events-etcd deployment: %w", err) + } + + svcTemplate := bindata.MustAsset("etcd/dedicated-event-etcd-svc.yaml") + var svc corev1.Service + if err := yaml.Unmarshal(svcTemplate, &svc); err != nil { + return fmt.Errorf("failed to unmarshal events-etcd svc: %w", err) + } + + if _, err = depClient.Get(ctx, deployment.Name, metav1.GetOptions{}); err != nil { + if errors.IsNotFound(err) { + _, err := depClient.Create(ctx, &deployment, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create events-etcd deployment: %w", err) + } + } else { + return fmt.Errorf("failed to get events-etcd deployment: %w", err) + } + } + + _, err = depClient.Update(ctx, &deployment, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update events-etcd deployment: %w", err) + } + + svcClient := c.kubeClient.CoreV1().Services(operatorclient.TargetNamespace) + if _, err := svcClient.Get(ctx, svc.Name, metav1.GetOptions{}); err != nil { + if errors.IsNotFound(err) { + _, err := svcClient.Create(ctx, &svc, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create events-etcd service: %w", err) + } + } else { + return fmt.Errorf("failed to get events-etcd service: %v", err) + } + } + + updatedSvc, err := svcClient.Update(ctx, &svc, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update events-etcd service: %w", err) + } + + kas, err := c.operatorClient.KubeAPIServers().Get(ctx, "cluster", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get kas cluster: %w", err) + } + + existingUnsupportedConfigOverrides := map[string]interface{}{} + if len(kas.Spec.UnsupportedConfigOverrides.Raw) > 0 { + if err := json.NewDecoder(bytes.NewBuffer(kas.Spec.UnsupportedConfigOverrides.Raw)).Decode(&existingUnsupportedConfigOverrides); err != nil { + return fmt.Errorf("failed to decode existing unsupported config overrides: %w", err) + } + } + + /* + apiVersion: operator.openshift.io/v1 + kind: KubeAPIServer + spec: + unsupportedConfigOverrides: + apiServerArguments: + etcd-servers-overrides: + - "/events#https://10.0.0.5:20379" + + --- OR via SVC --- + - "/events#https://events-etcd.openshift-etcd.svc:20379" + + TODO This would require KAS to run with dnsPolicy=ClusterFirstWithHostNet + https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-s-dns-policy + + */ + + if err := unstructured.SetNestedStringSlice(existingUnsupportedConfigOverrides, + []string{fmt.Sprintf("/events#https://%s:%d", etcdURLHost, updatedSvc.Spec.Ports[0].Port)}, + []string{"apiServerArguments", "etcd-servers-overrides"}...); err != nil { + return fmt.Errorf("failed to update disabled config-overrides: %w", err) + } + + kas.Spec.UnsupportedConfigOverrides = runtime.RawExtension{Object: &unstructured.Unstructured{Object: existingUnsupportedConfigOverrides}} + _, err = c.operatorClient.KubeAPIServers().Update(ctx, kas, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update kas cluster: %w", err) + } + + // one can verify whether it works by watching the event prefix with etcdctl + // sh-5.1# etcdctl watch /kubernetes.io/events --prefix + return nil +} diff --git a/pkg/operator/starter.go b/pkg/operator/starter.go index e0e829a7d0..b221b9014f 100644 --- a/pkg/operator/starter.go +++ b/pkg/operator/starter.go @@ -19,6 +19,7 @@ import ( operatorversionedclient "github.com/openshift/client-go/operator/clientset/versioned" operatorversionedclientv1alpha1 "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1alpha1" operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions" + "github.com/openshift/cluster-etcd-operator/pkg/operator/dedicatedetcdcontroller" "github.com/openshift/library-go/pkg/controller/controllercmd" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" @@ -493,6 +494,19 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle controllerContext.EventRecorder, ) + dedicatedEtcdController := dedicatedetcdcontroller.NewDedicatedEtcdController( + AlivenessChecker, + os.Getenv("IMAGE"), + operatorClient, + kubeClient, + kubeInformersForNamespaces, + controlPlaneNodeLister, + controlPlaneNodeLabelSelector, + operatorConfigClient.OperatorV1(), + networkInformer, + controllerContext.EventRecorder, + ) + enabledAutoBackupFeature, err := backuphelpers.AutoBackupFeatureGateEnabled(featureGateAccessor) if err != nil { return fmt.Errorf("could not determine AutoBackupFeatureGateEnabled, aborting controller start: %w", err) @@ -654,6 +668,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle go unsupportedConfigOverridesController.Run(ctx, 1) go scriptController.Run(ctx, 1) go defragController.Run(ctx, 1) + go dedicatedEtcdController.Run(ctx, 1) go envVarController.Run(1, ctx.Done()) go staticPodControllers.Start(ctx) diff --git a/pkg/tlshelpers/tlshelpers.go b/pkg/tlshelpers/tlshelpers.go index 1ceea66e67..f5623f8da7 100644 --- a/pkg/tlshelpers/tlshelpers.go +++ b/pkg/tlshelpers/tlshelpers.go @@ -65,7 +65,9 @@ func getServerHostNames(nodeInternalIPs []string) []string { "etcd.kube-system.svc", "etcd.kube-system.svc.cluster.local", "etcd.openshift-etcd.svc", + "events-etcd.openshift-etcd.svc", "etcd.openshift-etcd.svc.cluster.local", + "events-etcd.openshift-etcd.svc.cluster.local", "127.0.0.1", "::1", // "0:0:0:0:0:0:0:1" will be automatically collapsed to "::1", so we don't have to add it on top