From c0d2e60a5735269abb85e881f6298e92ec1507ab Mon Sep 17 00:00:00 2001 From: Jose Valdes Date: Wed, 4 Feb 2026 18:43:22 -0500 Subject: [PATCH 1/6] [nodeconfig] set log flush in kubeletconfig this commit explicitly configures kubelet's logging flush frequency to 5 seconds to ensure log entries are written to disk in near real-time. --- pkg/nodeconfig/nodeconfig.go | 23 +++++++++++++++-------- pkg/nodeconfig/nodeconfig_test.go | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pkg/nodeconfig/nodeconfig.go b/pkg/nodeconfig/nodeconfig.go index 1e9b84acf0..d5dcfb72b3 100644 --- a/pkg/nodeconfig/nodeconfig.go +++ b/pkg/nodeconfig/nodeconfig.go @@ -22,6 +22,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" clientcmdv1 "k8s.io/client-go/tools/clientcmd/api/v1" + logsapi "k8s.io/component-base/logs/api/v1" "k8s.io/kubectl/pkg/drain" kubeletconfigv1 "k8s.io/kubelet/config/v1" kubeletconfig "k8s.io/kubelet/config/v1beta1" @@ -743,14 +744,20 @@ func generateKubeletConfiguration(clusterDNS string) kubeletconfig.KubeletConfig Enabled: &falseBool, }, }, - ClusterDomain: "cluster.local", - ClusterDNS: []string{clusterDNS}, - CgroupsPerQOS: &falseBool, - RuntimeRequestTimeout: meta.Duration{Duration: 10 * time.Minute}, - MaxPods: 250, - KubeAPIQPS: &kubeAPIQPS, - KubeAPIBurst: 100, - SerializeImagePulls: &falseBool, + ClusterDomain: "cluster.local", + ClusterDNS: []string{clusterDNS}, + CgroupsPerQOS: &falseBool, + RuntimeRequestTimeout: meta.Duration{Duration: 10 * time.Minute}, + MaxPods: 250, + KubeAPIQPS: &kubeAPIQPS, + KubeAPIBurst: 100, + SerializeImagePulls: &falseBool, + Logging: logsapi.LoggingConfiguration{ + FlushFrequency: logsapi.TimeOrMetaDuration{ + Duration: meta.Duration{Duration: 5 * time.Second}, + SerializeAsString: true, + }, + }, EnableSystemLogHandler: &trueBool, EnableSystemLogQuery: &trueBool, FeatureGates: map[string]bool{ diff --git a/pkg/nodeconfig/nodeconfig_test.go b/pkg/nodeconfig/nodeconfig_test.go index 51ec04bb02..3356b1e745 100644 --- a/pkg/nodeconfig/nodeconfig_test.go +++ b/pkg/nodeconfig/nodeconfig_test.go @@ -84,7 +84,7 @@ func TestCreateKubeletConf(t *testing.T) { { name: "valid cidr", cidr: "10.0.128.8/24", - expectedSpec: "{\"kind\":\"KubeletConfiguration\",\"apiVersion\":\"kubelet.config.k8s.io/v1beta1\",\"syncFrequency\":\"0s\",\"fileCheckFrequency\":\"0s\",\"httpCheckFrequency\":\"0s\",\"rotateCertificates\":true,\"serverTLSBootstrap\":true,\"authentication\":{\"x509\":{\"clientCAFile\":\"C:\\\\k\\\\kubelet-ca.crt\"},\"webhook\":{\"cacheTTL\":\"0s\"},\"anonymous\":{\"enabled\":false}},\"authorization\":{\"webhook\":{\"cacheAuthorizedTTL\":\"0s\",\"cacheUnauthorizedTTL\":\"0s\"}},\"clusterDomain\":\"cluster.local\",\"clusterDNS\":[\"10.0.128.10\"],\"streamingConnectionIdleTimeout\":\"0s\",\"nodeStatusUpdateFrequency\":\"0s\",\"nodeStatusReportFrequency\":\"0s\",\"imageMinimumGCAge\":\"0s\",\"imageMaximumGCAge\":\"0s\",\"volumeStatsAggPeriod\":\"0s\",\"cgroupsPerQOS\":false,\"cpuManagerReconcilePeriod\":\"0s\",\"runtimeRequestTimeout\":\"10m0s\",\"maxPods\":250,\"resolvConf\":\"\",\"kubeAPIQPS\":50,\"kubeAPIBurst\":100,\"serializeImagePulls\":false,\"evictionHard\":{\"imagefs.available\":\"15%\",\"nodefs.available\":\"10%\"},\"evictionPressureTransitionPeriod\":\"0s\",\"featureGates\":{\"NodeLogQuery\":true,\"RotateKubeletServerCertificate\":true},\"memorySwap\":{},\"containerLogMaxSize\":\"50Mi\",\"systemReserved\":{\"cpu\":\"500m\",\"ephemeral-storage\":\"1Gi\",\"memory\":\"2Gi\"},\"enforceNodeAllocatable\":[\"none\"],\"logging\":{\"flushFrequency\":0,\"verbosity\":0,\"options\":{\"text\":{\"infoBufferSize\":\"0\"},\"json\":{\"infoBufferSize\":\"0\"}}},\"enableSystemLogHandler\":true,\"enableSystemLogQuery\":true,\"shutdownGracePeriod\":\"0s\",\"shutdownGracePeriodCriticalPods\":\"0s\",\"crashLoopBackOff\":{},\"registerWithTaints\":[{\"key\":\"os\",\"value\":\"Windows\",\"effect\":\"NoSchedule\"}],\"registerNode\":true,\"containerRuntimeEndpoint\":\"npipe://./pipe/containerd-containerd\"}", + expectedSpec: "{\"kind\":\"KubeletConfiguration\",\"apiVersion\":\"kubelet.config.k8s.io/v1beta1\",\"syncFrequency\":\"0s\",\"fileCheckFrequency\":\"0s\",\"httpCheckFrequency\":\"0s\",\"rotateCertificates\":true,\"serverTLSBootstrap\":true,\"authentication\":{\"x509\":{\"clientCAFile\":\"C:\\\\k\\\\kubelet-ca.crt\"},\"webhook\":{\"cacheTTL\":\"0s\"},\"anonymous\":{\"enabled\":false}},\"authorization\":{\"webhook\":{\"cacheAuthorizedTTL\":\"0s\",\"cacheUnauthorizedTTL\":\"0s\"}},\"clusterDomain\":\"cluster.local\",\"clusterDNS\":[\"10.0.128.10\"],\"streamingConnectionIdleTimeout\":\"0s\",\"nodeStatusUpdateFrequency\":\"0s\",\"nodeStatusReportFrequency\":\"0s\",\"imageMinimumGCAge\":\"0s\",\"imageMaximumGCAge\":\"0s\",\"volumeStatsAggPeriod\":\"0s\",\"cgroupsPerQOS\":false,\"cpuManagerReconcilePeriod\":\"0s\",\"runtimeRequestTimeout\":\"10m0s\",\"maxPods\":250,\"resolvConf\":\"\",\"kubeAPIQPS\":50,\"kubeAPIBurst\":100,\"serializeImagePulls\":false,\"evictionHard\":{\"imagefs.available\":\"15%\",\"nodefs.available\":\"10%\"},\"evictionPressureTransitionPeriod\":\"0s\",\"featureGates\":{\"NodeLogQuery\":true,\"RotateKubeletServerCertificate\":true},\"memorySwap\":{},\"containerLogMaxSize\":\"50Mi\",\"systemReserved\":{\"cpu\":\"500m\",\"ephemeral-storage\":\"1Gi\",\"memory\":\"2Gi\"},\"enforceNodeAllocatable\":[\"none\"],\"logging\":{\"flushFrequency\":\"5s\",\"verbosity\":0,\"options\":{\"text\":{\"infoBufferSize\":\"0\"},\"json\":{\"infoBufferSize\":\"0\"}}},\"enableSystemLogHandler\":true,\"enableSystemLogQuery\":true,\"shutdownGracePeriod\":\"0s\",\"shutdownGracePeriodCriticalPods\":\"0s\",\"crashLoopBackOff\":{},\"registerWithTaints\":[{\"key\":\"os\",\"value\":\"Windows\",\"effect\":\"NoSchedule\"}],\"registerNode\":true,\"containerRuntimeEndpoint\":\"npipe://./pipe/containerd-containerd\"}", expectedErr: false, }, { From b07f05580e99231ba326017f3d4a100444f219b2 Mon Sep 17 00:00:00 2001 From: Jose Valdes Date: Fri, 6 Feb 2026 11:27:56 -0500 Subject: [PATCH 2/6] [vendor] require k8s.io/component-base ran: go mod tidy && go mod vendor --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 6b18a86aba..86af9f8f8b 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,7 @@ require ( k8s.io/apimachinery v0.34.4 k8s.io/client-go v0.34.4 k8s.io/cloud-provider v0.34.4 + k8s.io/component-base v0.34.4 k8s.io/klog/v2 v2.130.1 k8s.io/kubectl v0.34.4 k8s.io/kubelet v0.34.4 @@ -153,7 +154,6 @@ require ( k8s.io/apiextensions-apiserver v0.34.4 // indirect k8s.io/apiserver v0.34.4 // indirect k8s.io/cli-runtime v0.34.4 // indirect - k8s.io/component-base v0.34.4 // indirect k8s.io/controller-manager v0.34.4 // indirect k8s.io/kube-openapi v0.0.0-20260127142750-a19766b6e2d4 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect From 18f00af302ffa4f3ff419cd63e192ba2dbcc0630 Mon Sep 17 00:00:00 2001 From: Jose Valdes Date: Wed, 4 Feb 2026 14:56:26 -0500 Subject: [PATCH 3/6] [services] add log rotation for kubelet this commit configures kubelet service to use kube-log-runner wrapper to enable optional log rotation on Windows nodes. This prevents unbounded log growth that could exhaust disk space. Log rotation parameters are configurable via environment variables: - SERVICES_LOG_FILE_SIZE: Size limit before rotation - SERVICES_LOG_FILE_AGE: Retention period for rotated logs - SERVICES_LOG_FLUSH_INTERVAL: Flush interval to disk This approach is necessary because Windows services don't have a native mechanism for output redirection, and Kubernetes has deprecated the --log-file flag for components. --- pkg/services/init.go | 25 +++ pkg/services/services.go | 88 ++++++++- pkg/services/services_test.go | 329 ++++++++++++++++++++++++++++++++++ 3 files changed, 440 insertions(+), 2 deletions(-) create mode 100644 pkg/services/init.go diff --git a/pkg/services/init.go b/pkg/services/init.go new file mode 100644 index 0000000000..4fc3af5f4d --- /dev/null +++ b/pkg/services/init.go @@ -0,0 +1,25 @@ +package services + +import ctrl "sigs.k8s.io/controller-runtime" + +var logFileSize, logFileAge, flushInterval string + +func init() { + log := ctrl.Log.WithName("services").WithName("init") + + var err error + logFileSize, err = getEnvQuantityString(logFileSizeEnvVar) + if err != nil { + log.Error(err, "cannot load environment variable", "name", logFileSizeEnvVar) + } + + logFileAge, err = getEnvDurationString(logFileAgeEnvVar) + if err != nil { + log.Error(err, "cannot load environment variable", "name", logFileAgeEnvVar) + } + + flushInterval, err = getEnvDurationString(logFlushIntervalEnvVar) + if err != nil { + log.Error(err, "cannot load environment variable", "name", logFlushIntervalEnvVar) + } +} diff --git a/pkg/services/services.go b/pkg/services/services.go index de40e8539f..caf490d013 100644 --- a/pkg/services/services.go +++ b/pkg/services/services.go @@ -2,10 +2,13 @@ package services import ( "fmt" + "os" "path/filepath" "strings" + "time" config "github.com/openshift/api/config/v1" + "k8s.io/apimachinery/pkg/api/resource" "github.com/openshift/windows-machine-config-operator/pkg/cluster" "github.com/openshift/windows-machine-config-operator/pkg/ignition" @@ -22,6 +25,13 @@ const ( // hostnameOverrideVar is the variable that should be replaced with the value of the desired instance hostname hostnameOverrideVar = "HOSTNAME_OVERRIDE" NodeIPVar = "NODE_IP" + + // logFileSizeEnvVar is the environment variable name for log file size limit + logFileSizeEnvVar = "SERVICES_LOG_FILE_SIZE" + // logFileAgeEnvVar is the environment variable name for log file age retention + logFileAgeEnvVar = "SERVICES_LOG_FILE_AGE" + // logFlushIntervalEnvVar is the environment variable name for log flush interval + logFlushIntervalEnvVar = "SERVICES_LOG_FLUSH_INTERVAL" ) // GenerateManifest returns the expected state of the Windows service configmap. If debug is true, debug logging @@ -222,8 +232,7 @@ func getKubeletServiceConfiguration(argsFromIginition map[string]string, debug b preScripts = append(preScripts, hostnameOverridePowershellVar) } - kubeletServiceCmd := fmt.Sprintf("%s -log-file=%s %s", - windows.KubeLogRunnerPath, windows.KubeletLog, windows.KubeletPath) + kubeletServiceCmd := getLogRunnerForCmd(windows.KubeletPath, windows.KubeletLog) for _, arg := range kubeletArgs { kubeletServiceCmd += fmt.Sprintf(" %s", arg) @@ -307,3 +316,78 @@ func getHostnameCmd(platformType config.PlatformType) string { return "" } } + +// getLogRunnerForCmd returns the command string to run the given commandPath with kube-log-runner +// logging to the given logfilePath. Log rotation parameters can be configured via environment variables. +func getLogRunnerForCmd(commandPath, logfilePath string) string { + cmdBuilder := strings.Builder{} + // log runner path must be first + cmdBuilder.WriteString(windows.KubeLogRunnerPath) + + // add log file option + cmdBuilder.WriteString(" -log-file=") + cmdBuilder.WriteString(logfilePath) + + if logFileSize != "" { + // log file size limit before creating a backup + cmdBuilder.WriteString(" -log-file-size=") + cmdBuilder.WriteString(logFileSize) + } + + if logFileAge != "" { + // log retention for backup files created after the size limit is reached + cmdBuilder.WriteString(" -log-file-age=") + cmdBuilder.WriteString(logFileAge) + } + + if flushInterval != "" { + // flush to ensure recent log entries are written to disk in near real-time + cmdBuilder.WriteString(" -flush-interval=") + cmdBuilder.WriteString(flushInterval) + } + + // last, add the target command to be run + cmdBuilder.WriteString(" " + commandPath) + + return cmdBuilder.String() +} + +// getEnvQuantityString returns the string value of the environment variable for the given key +// if it represents a valid and non-negative quantity, otherwise returns error +func getEnvQuantityString(key string) (string, error) { + value := os.Getenv(key) + value = strings.TrimSpace(value) + if value == "" { + // not present + return "", nil + } + // validate value as quantity + q, err := resource.ParseQuantity(value) + if err != nil { + return "", fmt.Errorf("invalid quantity value for %s: %w", key, err) + } + if q.Sign() < 0 { + return "", fmt.Errorf("quantity cannot be negative for %s", key) + } + return value, nil +} + +// getEnvDurationString returns the string value of the environment variable for the given key +// if it represents a valid and non-negative duration, otherwise returns error +func getEnvDurationString(key string) (string, error) { + value := os.Getenv(key) + value = strings.TrimSpace(value) + if value == "" { + // not present + return "", nil + } + if strings.HasPrefix(value, "-") { + return "", fmt.Errorf("duration cannot be negative for %s", key) + } + + // validate value as duration + if _, err := time.ParseDuration(value); err != nil { + return "", fmt.Errorf("invalid duration value for %s: %w", key, err) + } + return value, nil +} diff --git a/pkg/services/services_test.go b/pkg/services/services_test.go index ebbb0c70d1..d0cc7e3028 100644 --- a/pkg/services/services_test.go +++ b/pkg/services/services_test.go @@ -1,6 +1,7 @@ package services import ( + "strings" "testing" config "github.com/openshift/api/config/v1" @@ -154,3 +155,331 @@ func TestHybridOverlayConfiguration(t *testing.T) { }) } } + +func TestGetEnvDuration(t *testing.T) { + tests := []struct { + name string + envValue string + set bool + expected string + expectError bool + }{ + { + name: "unset environment variable returns empty string", + set: false, + expected: "", + expectError: false, + }, + { + name: "empty string returns empty string", + envValue: "", + set: true, + expected: "", + expectError: false, + }, + { + name: "whitespace-only string returns empty string", + envValue: " ", + set: true, + expected: "", + expectError: false, + }, + { + name: "valid duration in seconds", + envValue: "5s", + set: true, + expected: "5s", + expectError: false, + }, + { + name: "valid duration with leading/trailing whitespace is trimmed", + envValue: " 30s ", + set: true, + expected: "30s", + expectError: false, + }, + { + name: "zero duration is valid", + envValue: "0s", + set: true, + expected: "0s", + expectError: false, + }, + { + name: "invalid duration string returns error", + envValue: "notaduration", + set: true, + expected: "", + expectError: true, + }, + { + name: "number without unit returns error", + envValue: "100", + set: true, + expected: "", + expectError: true, + }, + { + name: "negative duration returns error", + envValue: "-5s", + set: true, + expected: "", + expectError: true, + }, + { + name: "duration with invalid unit returns error", + envValue: "10x", + set: true, + expected: "", + expectError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if tc.set { + t.Setenv("TEST_DURATION_VAR", tc.envValue) + } + result, err := getEnvDurationString("TEST_DURATION_VAR") + if tc.expectError { + assert.Error(t, err) + assert.Empty(t, result) + assert.Contains(t, err.Error(), "TEST_DURATION_VAR") + } else { + assert.NoError(t, err) + assert.Equal(t, tc.expected, result) + } + }) + } +} + +func TestGetEnvQuantity(t *testing.T) { + tests := []struct { + name string + envValue string + set bool + expected string + expectError bool + }{ + { + name: "unset environment variable returns empty string", + set: false, + expected: "", + expectError: false, + }, + { + name: "empty string returns empty string", + envValue: "", + set: true, + expected: "", + expectError: false, + }, + { + name: "whitespace-only string returns empty string", + envValue: " ", + set: true, + expected: "", + expectError: false, + }, + { + name: "valid integer quantity", + envValue: "100", + set: true, + expected: "100", + expectError: false, + }, + { + name: "valid quantity with suffix", + envValue: "100M", + set: true, + expected: "100M", + expectError: false, + }, + { + name: "valid quantity with leading/trailing whitespace is trimmed", + envValue: " 100Mi ", + set: true, + expected: "100Mi", + expectError: false, + }, + { + name: "zero quantity is valid", + envValue: "0", + set: true, + expected: "0", + expectError: false, + }, + { + name: "negative quantity returns error", + envValue: "-1", + set: true, + expected: "", + expectError: true, + }, + { + name: "invalid quantity returns error", + envValue: "notaquantity", + set: true, + expected: "", + expectError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if tc.set { + t.Setenv("TEST_QUANTITY_VAR", tc.envValue) + } + result, err := getEnvQuantityString("TEST_QUANTITY_VAR") + if tc.expectError { + assert.Error(t, err) + assert.Empty(t, result) + assert.Contains(t, err.Error(), "TEST_QUANTITY_VAR") + } else { + assert.NoError(t, err) + assert.Equal(t, tc.expected, result) + } + }) + } +} + +func TestGetLogRunnerForCmd(t *testing.T) { + origLogFileSize := logFileSize + origLogFileAge := logFileAge + origFlushInterval := flushInterval + t.Cleanup(func() { + logFileSize = origLogFileSize + logFileAge = origLogFileAge + flushInterval = origFlushInterval + }) + + tests := []struct { + name string + commandPath string + logfilePath string + logFileSize string + logFileAge string + flushInterval string + expectedContains []string + expectedNotContains []string + }{ + { + name: "basic command with no optional parameters", + commandPath: windows.KubeletPath, + logfilePath: windows.KubeletLog, + logFileSize: "", + logFileAge: "", + flushInterval: "", + expectedContains: []string{ + windows.KubeLogRunnerPath, + "-log-file=" + windows.KubeletLog, + windows.KubeletPath, + }, + expectedNotContains: []string{ + "-log-file-size=", + "-log-file-age=", + "-flush-interval=", + }, + }, + { + name: "command with log file size set", + commandPath: windows.KubeProxyPath, + logfilePath: windows.KubeProxyLog, + logFileSize: "100Mi", + logFileAge: "", + flushInterval: "", + expectedContains: []string{ + windows.KubeLogRunnerPath, + "-log-file=" + windows.KubeProxyLog, + "-log-file-size=100Mi", + windows.KubeProxyPath, + }, + expectedNotContains: []string{ + "-log-file-age=", + "-flush-interval=", + }, + }, + { + name: "command with log file age set", + commandPath: windows.KubeletPath, + logfilePath: windows.KubeletLog, + logFileSize: "", + logFileAge: "24h", + flushInterval: "", + expectedContains: []string{ + windows.KubeLogRunnerPath, + "-log-file=" + windows.KubeletLog, + "-log-file-age=24h", + windows.KubeletPath, + }, + expectedNotContains: []string{ + "-log-file-size=", + "-flush-interval=", + }, + }, + { + name: "command with flush interval set", + commandPath: windows.KubeletPath, + logfilePath: windows.KubeletLog, + logFileSize: "", + logFileAge: "", + flushInterval: "5s", + expectedContains: []string{ + windows.KubeLogRunnerPath, + "-log-file=" + windows.KubeletLog, + "-flush-interval=5s", + windows.KubeletPath, + }, + expectedNotContains: []string{ + "-log-file-size=", + "-log-file-age=", + }, + }, + { + name: "command with all optional parameters set", + commandPath: windows.KubeletPath, + logfilePath: windows.KubeletLog, + logFileSize: "50Mi", + logFileAge: "48h", + flushInterval: "10s", + expectedContains: []string{ + windows.KubeLogRunnerPath, + "-log-file=" + windows.KubeletLog, + "-log-file-size=50Mi", + "-log-file-age=48h", + "-flush-interval=10s", + windows.KubeletPath, + }, + expectedNotContains: []string{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Set package-level variables for this test case + logFileSize = tc.logFileSize + logFileAge = tc.logFileAge + flushInterval = tc.flushInterval + + result := getLogRunnerForCmd(tc.commandPath, tc.logfilePath) + + for _, expected := range tc.expectedContains { + assert.Contains(t, result, expected, + "Command should contain: %s\nActual command: %s", expected, result) + } + for _, notExpected := range tc.expectedNotContains { + assert.NotContains(t, result, notExpected, + "Command should not contain: %s\nActual command: %s", notExpected, result) + } + + // Verify ordering: KubeLogRunnerPath must come first, commandPath must come last + assert.True(t, len(result) > 0, "Result should not be empty") + assert.Equal(t, 0, strings.Index(result, windows.KubeLogRunnerPath), + "KubeLogRunnerPath must be at the start of the command") + expectedSuffix := " " + tc.commandPath + assert.True(t, len(result) >= len(expectedSuffix) && + result[len(result)-len(expectedSuffix):] == expectedSuffix, + "Command path must be at the end of the command string.\nActual: %s", result) + }) + } +} From 6af7e0252d85e618b8e9fd5b73fdf50fdfc986eb Mon Sep 17 00:00:00 2001 From: Jose Valdes Date: Tue, 17 Feb 2026 15:45:25 -0500 Subject: [PATCH 4/6] [services] add log rotation for kubeproxy this commit configures kubeproxy service to use kube-log-runner wrapper to enable automatic log rotation on Windows nodes. This prevents unbounded log growth that could exhaust disk space. Log rotation parameters are configurable via environment variables: - SERVICES_LOG_FILE_SIZE: Size limit before rotation (default: 100M) - SERVICES_LOG_FILE_AGE: Retention period for rotated logs (default: 168h) - SERVICES_LOG_FLUSH_INTERVAL: Flush interval to disk (default: 5s) --- pkg/services/services.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/services/services.go b/pkg/services/services.go index caf490d013..b62e1db649 100644 --- a/pkg/services/services.go +++ b/pkg/services/services.go @@ -153,9 +153,8 @@ func hybridOverlayConfiguration(apiServerEndpoint, vxlanPort string, debug bool) // kubeProxyConfiguration returns the Service definition for kube-proxy func kubeProxyConfiguration(debug bool) servicescm.Service { - cmd := fmt.Sprintf("%s -log-file=%s %s --config %s --windows-service", windows.KubeLogRunnerPath, windows.KubeProxyLog, - windows.KubeProxyPath, windows.KubeProxyConfigPath) - + cmd := getLogRunnerForCmd(windows.KubeProxyPath, windows.KubeProxyLog) + cmd = fmt.Sprintf("%s --config %s --windows-service", cmd, windows.KubeProxyConfigPath) verbosity := "0" if debug { verbosity = "4" From d33ba7454221fc9f73b4402481966a2656127286 Mon Sep 17 00:00:00 2001 From: Jose Valdes Date: Fri, 6 Feb 2026 11:39:32 -0500 Subject: [PATCH 5/6] [hack] set shorter service log file rotation in CI --- hack/common.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hack/common.sh b/hack/common.sh index eff9ac5656..2cb6cf0a70 100644 --- a/hack/common.sh +++ b/hack/common.sh @@ -380,6 +380,10 @@ deleteParallelUpgradeCheckerResources() { } +# Enables debug logging and set smaller size for services log file in the operator pod to make it easier to +# troubleshoot issues in CI. +# The method for patching the deployment depends on the OLM version, which is detected by checking for the presence +# of a subscription (OLMv0) or clusterextension (OLMv1). enable_debug_logging() { if [[ $(oc get -n $WMCO_DEPLOY_NAMESPACE pod -l name=windows-machine-config-operator -ojson) == *"--debugLogging"* ]]; then echo "Debug logging already enabled" @@ -390,13 +394,16 @@ enable_debug_logging() { WMCO_SUB=$(oc get sub -n "$WMCO_DEPLOY_NAMESPACE" --no-headers 2>/dev/null | awk '{print $1}') if [[ -n "$WMCO_SUB" ]]; then echo "Detected OLMv0, patching subscription $WMCO_SUB" - oc patch subscription $WMCO_SUB -n $WMCO_DEPLOY_NAMESPACE --type=merge -p '{"spec":{"config":{"env":[{"name":"ARGS","value":"--debugLogging"}]}}}' + oc patch subscription $WMCO_SUB -n $WMCO_DEPLOY_NAMESPACE --type=merge -p '{"spec":{"config":{"env":[{"name":"ARGS","value":"--debugLogging"},{"name":"SERVICES_LOG_FILE_SIZE","value":"1M"}]}}}' # delete the deployment to ensure the changes are picked up in a timely matter oc delete deployment -n $WMCO_DEPLOY_NAMESPACE windows-machine-config-operator elif oc get clusterextension windows-machine-config-operator &>/dev/null; then echo "Detected OLMv1, patching deployment directly..." - # Add debug env variable to the WMCO manager container - oc set env deployment/windows-machine-config-operator -n "$WMCO_DEPLOY_NAMESPACE" ARGS="--debugLogging" -c manager + # Add debug env variable and log file limit to the WMCO manager container + oc set env deployment/windows-machine-config-operator -n "$WMCO_DEPLOY_NAMESPACE" \ + ARGS="--debugLogging" \ + SERVICES_LOG_FILE_SIZE="1M" \ + -c manager # force restart to pick up the env variable change oc scale deployment/windows-machine-config-operator -n "$WMCO_DEPLOY_NAMESPACE" --replicas=0 oc scale deployment/windows-machine-config-operator -n "$WMCO_DEPLOY_NAMESPACE" --replicas=1 From 857920f18f9ecbbe3ada4b1748307e5acd4424f8 Mon Sep 17 00:00:00 2001 From: Jose Valdes Date: Wed, 18 Feb 2026 23:12:33 -0500 Subject: [PATCH 6/6] [docs] add log rotation information this commit documents the automatic log rotation for managed Windows services in the Enabled features section in the README.md --- README.md | 29 +++++++++++- docs/log-rotation-managed-services.md | 63 +++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 docs/log-rotation-managed-services.md diff --git a/README.md b/README.md index 3745c76bfa..9a6364aa5f 100644 --- a/README.md +++ b/README.md @@ -246,6 +246,31 @@ in a healthy state with no disruptions. ## Enabled features +### Automatic log rotation for managed Windows services + +Automatic rotation of the log files for the managed Windows services is available to prevent disk space +exhaustion. Uses [kube-log-runner](https://github.com/kubernetes/kubernetes/tree/master/staging/src/k8s.io/component-base/logs/kube-log-runner) +as a wrapper binary that executes the service while capturing stdout/stderr, rotates logs based on size +and automatically cleaning up old files based on age. + +For details on the log rotation naming convention, please refer to the [kube-log-runner documentation](https://github.com/kubernetes/kubernetes/tree/master/staging/src/k8s.io/component-base/logs/kube-log-runner) + +The log rotation functionality is disabled by default, causing log files to grow indefinitely. + +Managed Windows services with log rotation capabilities: +- kubelet +- kube-proxy + +Not yet supported: +- containerd +- csi-proxy +- windows_exporter +- hybrid-overlay-node +- azure-cloud-node-manager + +For instructions to enable, customize or disable log rotation refer to +[log rotation for managed Windows services documentation](docs/log-rotation-managed-services.md). + ### Autoscaling Windows nodes Cluster autoscaling is supported for Windows instances. @@ -257,7 +282,7 @@ Cluster autoscaling is supported for Windows instances. Windows instances brought up with WMCO are set up with the containerd container runtime. As WMCO installs and manages the container runtime, it is recommended not to preinstall containerd in MachineSet or BYOH Windows instances. -### Cluster-wide proxy +### Cluster-wide proxy WMCO supports using a [cluster-wide proxy](https://docs.openshift.com/container-platform/latest/networking/enable-cluster-wide-proxy.html) to route egress traffic from Windows nodes on OpenShift Container Platform. @@ -297,7 +322,7 @@ Some valid values could be: `$mirrorRegistry/oss/kubernetes/pause:3.9`, `$mirror ### Horizontal Pod Autoscaling Horizontal Pod autoscaling is available for Windows workloads. -Please follow the [Horizontal Pod autoscaling docs](https://docs.openshift.com/container-platform/latest/nodes/pods/nodes-pods-autoscaling.html) +Please follow the [Horizontal Pod autoscaling docs](https://docs.openshift.com/container-platform/latest/nodes/pods/nodes-pods-autoscaling.html) to create a horizontal pod autoscaler object for CPU and memory utilization of Windows workloads. ## Limitations diff --git a/docs/log-rotation-managed-services.md b/docs/log-rotation-managed-services.md new file mode 100644 index 0000000000..7db64fbd7b --- /dev/null +++ b/docs/log-rotation-managed-services.md @@ -0,0 +1,63 @@ +# Log rotation for managed Windows services + +Log rotation for managed Windows services is available for WMCO 10.22+. This feature rotates log files based +on configurable size and age thresholds and is configured via environment variables in the operator. + +## Enabling log rotation for managed Windows services + +To enable and customize the log rotation behavior, add the following environment variables to the subscription (OLMv0). +The operator will restart to load the newly added environment variables and apply log rotation to the +managed services. This will result in a reconfiguration of the existing Windows nodes, one at a time, until all +nodes have been handled, to minimize disruption. + +### Setting environment variables in the subscription: +```yaml +kind: Subscription +spec: + config: + env: + - name: SERVICES_LOG_FILE_SIZE + value: "100M" # Rotate when log reaches this size (suggested: 100M) + - name: SERVICES_LOG_FILE_AGE + value: "168h" # Keep rotated logs for this duration (e.g: 168h/7 days) + - name: SERVICES_LOG_FLUSH_INTERVAL + value: "5s" # Flush logs to disk at this interval (suggested: 5s) +``` + +### Patching the subscription using the CLI: +```shell script +oc patch subscription -n \ + --type=merge \ + -p '{"spec":{"config":{"env":[{"name":"SERVICES_LOG_FILE_SIZE","value":"100M"},{"name":"SERVICES_LOG_FILE_AGE","value":"168h"},{"name":"SERVICES_LOG_FLUSH_INTERVAL","value":"5s"}]}}}' +``` + +### Patching the operator deployment using the CLI (OLMv1 or manual installs): + +```shell script + oc set env deployment/windows-machine-config-operator -n -c manager \ + SERVICES_LOG_FILE_SIZE="100M" \ + SERVICES_LOG_FILE_AGE="168h" \ + SERVICES_LOG_FLUSH_INTERVAL="5s" +``` +where: +- ``: The namespace where the operator is installed (e.g., `openshift-windows-machine-config-operator`) +- ``: The name of the subscription used to install the operator (e.g., `windows-machine-config-operator-subscription`) + +## Disabling log rotation for managed Windows services + +To disable log rotation, remove the `SERVICES_LOG_FILE_SIZE`, `SERVICES_LOG_FILE_AGE`, and `SERVICES_LOG_FLUSH_INTERVAL` +environment variables from the subscription or operator deployment. + +## Behavior when log rotation settings change + +**Effect on existing log files:** When rotation settings are changed (enabled, disabled, or updated), any previously +rotated log files are retained according to the `SERVICES_LOG_FILE_AGE` value that was in effect when they were +created. Once that retention period expires, the files are cleaned up automatically. New log files and any future +rotated files will follow the updated rotation rules going forward. + +**Operator and node behavior:** Any change to the `SERVICES_LOG_FILE_SIZE`, `SERVICES_LOG_FILE_AGE`, or +`SERVICES_LOG_FLUSH_INTERVAL` environment variables—whether in the subscription (OLMv0) or the operator deployment +(OLMv1 / manual installs)—will cause the operator to restart in order to load the updated configuration. After +restarting, the operator will reconfigure each Windows node one at a time to apply the new log rotation settings, +minimizing disruption. Note that service continuity during reconfiguration is not guaranteed; brief interruptions +to managed services (such as kubelet or kube-proxy) may occur on each node as it is reconfigured.