From b251f50f0eb4029415c24280ea8f31773acd6ef6 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 18 Dec 2025 14:36:13 +0100 Subject: [PATCH 1/7] CLOUDP-351614: Fix monitoring failure after disabling TLS on AppDB When TLS is disabled on AppDB, clear stale TLS params from monitoring config's additionalParams to prevent the monitoring agent from trying to use certificate files that no longer exist. Added addTLSParams/clearTLSParams functions that use shared constants to ensure add and remove operations stay in sync. --- .evergreen.yml | 16 +++- changelog/20251216_fix_tls_monitoring.md | 6 ++ controllers/om/deployment.go | 74 ++++++++++++++----- controllers/om/deployment/testing_utils.go | 2 +- controllers/om/deployment_test.go | 41 ++++++++-- .../operator/appdbreplicaset_controller.go | 44 ++++++++--- .../appdbreplicaset_controller_test.go | 74 +++++++++++++++++++ controllers/operator/common_controller.go | 2 +- .../mongodbshardedcluster_controller.go | 2 +- .../mongodbshardedcluster_controller_test.go | 2 +- .../operator/mongodbstandalone_controller.go | 2 +- .../mongodbstandalone_controller_test.go | 2 +- 12 files changed, 228 insertions(+), 39 deletions(-) create mode 100644 changelog/20251216_fix_tls_monitoring.md diff --git a/.evergreen.yml b/.evergreen.yml index f39693d58..3f8905ea6 100644 --- a/.evergreen.yml +++ b/.evergreen.yml @@ -1439,7 +1439,21 @@ buildvariants: tags: [ "pr_patch", "staging", "e2e_test_suite", "static" ] run_on: - ubuntu2404-large - <<: *base_om8_dependency + depends_on: + - name: build_om_images + variant: build_om80_images + - name: build_operator_ubi + variant: init_test_run + - name: build_init_database_image_ubi + variant: init_test_run + - name: build_test_image + variant: init_test_run + - name: build_init_appdb_images_ubi + variant: init_test_run + - name: build_init_om_images_ubi + variant: init_test_run + - name: publish_helm_chart + variant: init_test_run tasks: - name: e2e_static_ops_manager_kind_only_task_group - name: e2e_static_ops_manager_kind_6_0_only_task_group diff --git a/changelog/20251216_fix_tls_monitoring.md b/changelog/20251216_fix_tls_monitoring.md new file mode 100644 index 000000000..ec63eeb2e --- /dev/null +++ b/changelog/20251216_fix_tls_monitoring.md @@ -0,0 +1,6 @@ +--- +kind: fix +date: 2025-12-16 +--- + +* Fixed an issue where monitoring agents would fail after disabling TLS on a MongoDB deployment. diff --git a/controllers/om/deployment.go b/controllers/om/deployment.go index 3b75e19fe..23545e949 100644 --- a/controllers/om/deployment.go +++ b/controllers/om/deployment.go @@ -251,15 +251,16 @@ func (d Deployment) MergeShardedCluster(opts DeploymentShardedClusterMergeOption return shardsScheduledForRemoval, nil } -// AddMonitoringAndBackup adds monitoring and backup agents to each process -// The automation agent will update the agents versions to the latest version automatically +// ConfigureMonitoringAndBackup configures monitoring and backup agents for each process. +// This is called on every reconcile to ensure the monitoring/backup config matches the desired state. +// The automation agent will update the agents versions to the latest version automatically. // Note, that these two are deliberately combined as all clients (standalone, rs etc.) need both backup and monitoring -// together -func (d Deployment) AddMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) { +// together. +func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) { if len(d.getProcesses()) == 0 { return } - d.AddMonitoring(log, tls, caFilepath) + d.ConfigureMonitoring(log, tls, caFilepath) d.addBackup(log) } @@ -277,8 +278,9 @@ func (d Deployment) GetReplicaSetByName(name string) ReplicaSet { return nil } -// AddMonitoring adds monitoring agents for all processes in the deployment -func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) { +// ConfigureMonitoring configures monitoring agents for all processes in the deployment. +// This is called on every reconcile to ensure the monitoring config matches the desired state. +func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) { if len(d.getProcesses()) == 0 { return } @@ -306,23 +308,61 @@ func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath s monitoringVersion["hostname"] = p.HostName() if tls { - additionalParams := map[string]string{ - "useSslForAllConnections": "true", - "sslTrustedServerCertificates": caFilePath, + pemKeyFile := "" + if pem := p.EnsureTLSConfig()["PEMKeyFile"]; pem != nil { + pemKeyFile = pem.(string) } - - pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"] - if pemKeyFile != nil { - additionalParams["sslClientCertificate"] = pemKeyFile.(string) - } - - monitoringVersion["additionalParams"] = additionalParams + params := map[string]string{} + addTLSParams(params, caFilePath, pemKeyFile) + monitoringVersion["additionalParams"] = params + } else { + // Clear TLS-specific params when TLS is disabled to prevent monitoring from + // trying to use certificate files that no longer exist. + // We only clear TLS fields, preserving any other additionalParams that may be set. + clearTLSParamsFromMonitoringVersion(monitoringVersion) } } d.setMonitoringVersions(monitoringVersions) } +// TLS param keys for monitoring additionalParams. +const ( + tlsParamUseSsl = "useSslForAllConnections" + tlsParamTrustedCert = "sslTrustedServerCertificates" + tlsParamClientCert = "sslClientCertificate" +) + +func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) { + params[tlsParamUseSsl] = "true" + params[tlsParamTrustedCert] = caFilePath + if pemKeyFile != "" { + params[tlsParamClientCert] = pemKeyFile + } +} + +func clearTLSParams(params map[string]string) { + delete(params, tlsParamUseSsl) + delete(params, tlsParamTrustedCert) + delete(params, tlsParamClientCert) +} + +// clearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring +// version's additionalParams. If additionalParams becomes empty after removing TLS fields, +// the entire map is removed. +func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) { + additionalParams, ok := monitoringVersion["additionalParams"].(map[string]string) + if !ok { + return + } + + clearTLSParams(additionalParams) + + if len(additionalParams) == 0 { + delete(monitoringVersion, "additionalParams") + } +} + // RemoveMonitoringAndBackup removes both monitoring and backup agent configurations. This must be called when the // Mongodb resource is being removed, otherwise UI will show non-existing agents in the "servers" tab func (d Deployment) RemoveMonitoringAndBackup(names []string, log *zap.SugaredLogger) { diff --git a/controllers/om/deployment/testing_utils.go b/controllers/om/deployment/testing_utils.go index 79f0ebf82..0a5333ccc 100644 --- a/controllers/om/deployment/testing_utils.go +++ b/controllers/om/deployment/testing_utils.go @@ -37,7 +37,7 @@ func CreateFromReplicaSet(mongoDBImage string, forceEnterprise bool, rs *mdb.Mon lastConfig.ToMap(), zap.S(), ) - d.AddMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) + d.ConfigureMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) d.ConfigureTLS(rs.Spec.GetSecurity(), util.CAFilePathInContainer) return d } diff --git a/controllers/om/deployment_test.go b/controllers/om/deployment_test.go index 281731187..2dad50bdb 100644 --- a/controllers/om/deployment_test.go +++ b/controllers/om/deployment_test.go @@ -489,12 +489,12 @@ func TestConfiguringTlsProcessFromOpsManager(t *testing.T) { } } -func TestAddMonitoring(t *testing.T) { +func TestConfigureMonitoring(t *testing.T) { d := NewDeployment() rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs")) d.MergeReplicaSet(rs0, nil, nil, zap.S()) - d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer) + d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer) expectedMonitoringVersions := []interface{}{ map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion}, @@ -504,16 +504,16 @@ func TestAddMonitoring(t *testing.T) { assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions()) // adding again - nothing changes - d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer) + d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer) assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions()) } -func TestAddMonitoringTls(t *testing.T) { +func TestConfigureMonitoringTls(t *testing.T) { d := NewDeployment() rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs")) d.MergeReplicaSet(rs0, nil, nil, zap.S()) - d.AddMonitoring(zap.S(), true, util.CAFilePathInContainer) + d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer) expectedAdditionalParams := map[string]string{ "useSslForAllConnections": "true", @@ -528,10 +528,39 @@ func TestAddMonitoringTls(t *testing.T) { assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions()) // adding again - nothing changes - d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer) + d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer) assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions()) } +func TestConfigureMonitoringTLSDisable(t *testing.T) { + d := NewDeployment() + + rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs")) + d.MergeReplicaSet(rs0, nil, nil, zap.S()) + d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer) + + // verify TLS is present in additionalParams + expectedAdditionalParams := map[string]string{ + "useSslForAllConnections": "true", + "sslTrustedServerCertificates": util.CAFilePathInContainer, + } + expectedMonitoringVersionsWithTls := []interface{}{ + map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams}, + map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams}, + map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams}, + } + assert.Equal(t, expectedMonitoringVersionsWithTls, d.getMonitoringVersions()) + + // disabling TLS should clear additionalParams (CLOUDP-351614) + d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer) + expectedMonitoringVersionsWithoutTls := []interface{}{ + map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion}, + map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion}, + map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion}, + } + assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions()) +} + func TestAddBackup(t *testing.T) { d := NewDeployment() diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index 86f61b52b..5d45914e3 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -1430,9 +1430,17 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger monitoringVersions := ac.MonitoringVersions for _, p := range ac.Processes { found := false - for _, m := range monitoringVersions { + for i, m := range monitoringVersions { if m.Hostname == p.HostName { found = true + if !tls { + // Clear TLS-specific params when TLS is disabled to prevent monitoring from + // trying to use certificate files that no longer exist. + clearTLSParams(monitoringVersions[i].AdditionalParams) + if len(monitoringVersions[i].AdditionalParams) == 0 { + monitoringVersions[i].AdditionalParams = nil + } + } break } } @@ -1442,15 +1450,12 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger Name: om.MonitoringAgentDefaultVersion, } if tls { - additionalParams := map[string]string{ - "useSslForAllConnections": "true", - "sslTrustedServerCertificates": appdbCAFilePath, - } - pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile") - if pemKeyFile != nil { - additionalParams["sslClientCertificate"] = pemKeyFile.String() + pemKeyFile := "" + if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil { + pemKeyFile = pem.String() } - monitoringVersion.AdditionalParams = additionalParams + monitoringVersion.AdditionalParams = map[string]string{} + addTLSParams(monitoringVersion.AdditionalParams, appdbCAFilePath, pemKeyFile) } log.Debugw("Added monitoring agent configuration", "host", p.HostName, "tls", tls) monitoringVersions = append(monitoringVersions, monitoringVersion) @@ -1459,6 +1464,27 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger ac.MonitoringVersions = monitoringVersions } +// TLS param keys for monitoring additionalParams. +const ( + tlsParamUseSsl = "useSslForAllConnections" + tlsParamTrustedCert = "sslTrustedServerCertificates" + tlsParamClientCert = "sslClientCertificate" +) + +func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) { + params[tlsParamUseSsl] = "true" + params[tlsParamTrustedCert] = caFilePath + if pemKeyFile != "" { + params[tlsParamClientCert] = pemKeyFile + } +} + +func clearTLSParams(params map[string]string) { + delete(params, tlsParamUseSsl) + delete(params, tlsParamTrustedCert) + delete(params, tlsParamClientCert) +} + // registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project func (r *ReconcileAppDbReplicaSet) registerAppDBHostsWithProject(hostnames []string, conn om.Connection, opsManagerPassword string, log *zap.SugaredLogger) error { getHostsResult, err := conn.GetHosts() diff --git a/controllers/operator/appdbreplicaset_controller_test.go b/controllers/operator/appdbreplicaset_controller_test.go index df6d6b1ed..a24106b6f 100644 --- a/controllers/operator/appdbreplicaset_controller_test.go +++ b/controllers/operator/appdbreplicaset_controller_test.go @@ -1457,3 +1457,77 @@ func createRunningAppDB(ctx context.Context, t *testing.T, startingMembers int, assert.Equal(t, ok, res) return reconciler } + +// TestClearTLSParams tests CLOUDP-351614 fix: +// When TLS is disabled on AppDB, TLS-specific params should be cleared from +// the monitoring config's additionalParams to prevent the monitoring agent +// from trying to use certificate files that no longer exist. +func TestClearTLSParams(t *testing.T) { + tests := []struct { + name string + input map[string]string + expectedOutput map[string]string + }{ + { + name: "nil map", + input: nil, + expectedOutput: nil, + }, + { + name: "empty map", + input: map[string]string{}, + expectedOutput: map[string]string{}, + }, + { + name: "only TLS params", + input: map[string]string{ + "useSslForAllConnections": "true", + "sslTrustedServerCertificates": "/some/path/ca.pem", + "sslClientCertificate": "/some/path/cert.pem", + }, + expectedOutput: map[string]string{}, + }, + { + name: "mixed params - TLS and non-TLS", + input: map[string]string{ + "useSslForAllConnections": "true", + "sslTrustedServerCertificates": "/some/path/ca.pem", + "sslClientCertificate": "/some/path/cert.pem", + "someOtherParam": "someValue", + "anotherParam": "anotherValue", + }, + expectedOutput: map[string]string{ + "someOtherParam": "someValue", + "anotherParam": "anotherValue", + }, + }, + { + name: "only non-TLS params", + input: map[string]string{ + "someOtherParam": "someValue", + "anotherParam": "anotherValue", + }, + expectedOutput: map[string]string{ + "someOtherParam": "someValue", + "anotherParam": "anotherValue", + }, + }, + { + name: "partial TLS params", + input: map[string]string{ + "useSslForAllConnections": "true", + "someOtherParam": "someValue", + }, + expectedOutput: map[string]string{ + "someOtherParam": "someValue", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + clearTLSParams(tt.input) + assert.Equal(t, tt.expectedOutput, tt.input) + }) + } +} diff --git a/controllers/operator/common_controller.go b/controllers/operator/common_controller.go index da6e00c1a..2af4690d1 100644 --- a/controllers/operator/common_controller.go +++ b/controllers/operator/common_controller.go @@ -1075,7 +1075,7 @@ func ReconcileReplicaSetAC(ctx context.Context, d om.Deployment, spec mdbv1.DbCo } d.MergeReplicaSet(rs, spec.GetAdditionalMongodConfig().ToMap(), lastMongodConfig, log) - d.AddMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath) + d.ConfigureMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath) d.ConfigureTLS(spec.GetSecurity(), caFilePath) d.ConfigureInternalClusterAuthentication(rs.GetProcessNames(), spec.GetSecurity().GetInternalClusterAuthenticationMode(), internalClusterPath) diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index 1cb9e6b31..66d954a28 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -2006,7 +2006,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c return err } - d.AddMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath) + d.ConfigureMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath) d.ConfigureTLS(sc.Spec.GetSecurity(), opts.caFilePath) setupInternalClusterAuth(d, sc.Name, sc.GetSecurity().GetInternalClusterAuthenticationMode(), diff --git a/controllers/operator/mongodbshardedcluster_controller_test.go b/controllers/operator/mongodbshardedcluster_controller_test.go index c49a2852d..b6c3335b4 100644 --- a/controllers/operator/mongodbshardedcluster_controller_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_test.go @@ -1676,7 +1676,7 @@ func createDeploymentFromShardedCluster(t *testing.T, updatable v1.CustomResourc Finalizing: false, }) assert.NoError(t, err) - d.AddMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) + d.ConfigureMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) return d } diff --git a/controllers/operator/mongodbstandalone_controller.go b/controllers/operator/mongodbstandalone_controller.go index 56b4531ca..27f641b68 100644 --- a/controllers/operator/mongodbstandalone_controller.go +++ b/controllers/operator/mongodbstandalone_controller.go @@ -365,7 +365,7 @@ func (r *ReconcileMongoDbStandalone) updateOmDeployment(ctx context.Context, con d.MergeStandalone(standaloneOmObject, s.Spec.AdditionalMongodConfig.ToMap(), lastStandaloneConfig.ToMap(), nil) // TODO change last argument in separate PR - d.AddMonitoringAndBackup(log, s.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) + d.ConfigureMonitoringAndBackup(log, s.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) d.ConfigureTLS(s.Spec.GetSecurity(), util.CAFilePathInContainer) return nil }, diff --git a/controllers/operator/mongodbstandalone_controller_test.go b/controllers/operator/mongodbstandalone_controller_test.go index da9edab70..757ac47aa 100644 --- a/controllers/operator/mongodbstandalone_controller_test.go +++ b/controllers/operator/mongodbstandalone_controller_test.go @@ -458,6 +458,6 @@ func createDeploymentFromStandalone(st *mdbv1.MongoDB) om.Deployment { } d.MergeStandalone(process, st.Spec.AdditionalMongodConfig.ToMap(), lastConfig.ToMap(), nil) - d.AddMonitoringAndBackup(zap.S(), st.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) + d.ConfigureMonitoringAndBackup(zap.S(), st.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer) return d } From 4f47621ee30e4956ff565fcb033d6f14713e2f91 Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 18 Dec 2025 15:00:25 +0100 Subject: [PATCH 2/7] om8 dep --- .evergreen.yml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/.evergreen.yml b/.evergreen.yml index 3f8905ea6..f39693d58 100644 --- a/.evergreen.yml +++ b/.evergreen.yml @@ -1439,21 +1439,7 @@ buildvariants: tags: [ "pr_patch", "staging", "e2e_test_suite", "static" ] run_on: - ubuntu2404-large - depends_on: - - name: build_om_images - variant: build_om80_images - - name: build_operator_ubi - variant: init_test_run - - name: build_init_database_image_ubi - variant: init_test_run - - name: build_test_image - variant: init_test_run - - name: build_init_appdb_images_ubi - variant: init_test_run - - name: build_init_om_images_ubi - variant: init_test_run - - name: publish_helm_chart - variant: init_test_run + <<: *base_om8_dependency tasks: - name: e2e_static_ops_manager_kind_only_task_group - name: e2e_static_ops_manager_kind_6_0_only_task_group From 738c53c2d6be28dc06ab104f629f27b8599761fc Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Thu, 18 Dec 2025 15:54:34 +0100 Subject: [PATCH 3/7] fix goconst: 1 --- controllers/operator/appdbreplicaset_controller.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index 5d45914e3..09dc3a55d 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -87,8 +87,8 @@ const ( // Used to convey to the operator to force reconfigure agent. At the moment // it is used for DR in case of Multi-Cluster AppDB when after a cluster outage // there is no primary in the AppDB deployment. - ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure" - + ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure" + trueString = "trueString" ForcedReconfigureAlreadyPerformedAnnotation = "mongodb.com/v1.forceReconfigurePerformed" ) @@ -717,7 +717,7 @@ func (r *ReconcileAppDbReplicaSet) ReconcileAppDB(ctx context.Context, opsManage opsManager.Annotations = map[string]string{} } - if val, ok := opsManager.Annotations[ForceReconfigureAnnotation]; ok && val == "true" { + if val, ok := opsManager.Annotations[ForceReconfigureAnnotation]; ok && val == trueString { annotationsToAdd := map[string]string{ForcedReconfigureAlreadyPerformedAnnotation: timeutil.Now()} err := annotations.SetAnnotations(ctx, opsManager, annotationsToAdd, r.client) @@ -1248,7 +1248,7 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex // it checks this with the user provided annotation and if the operator has actually performed a force reconfigure already func shouldPerformForcedReconfigure(annotations map[string]string) bool { if val, ok := annotations[ForceReconfigureAnnotation]; ok { - if val == "true" { + if val == trueString { if _, ok := annotations[ForcedReconfigureAlreadyPerformedAnnotation]; !ok { return true } @@ -1472,7 +1472,7 @@ const ( ) func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) { - params[tlsParamUseSsl] = "true" + params[tlsParamUseSsl] = trueString params[tlsParamTrustedCert] = caFilePath if pemKeyFile != "" { params[tlsParamClientCert] = pemKeyFile From 4cd5dd30bf7d86c5ac5179cac93626191b20893c Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 19 Dec 2025 10:18:07 +0100 Subject: [PATCH 4/7] Fix monitoring TLS configuration bugs 1. Fix type assertion in clearTLSParamsFromMonitoringVersion to handle both map[string]string (locally created) and map[string]interface{} (from JSON/API unmarshaling) 2. Fix addMonitoring to update TLS params on existing monitoring entries when TLS is enabled (previously only added TLS for new entries) 3. Fix trueString constant to be "true" instead of "trueString" --- controllers/om/deployment.go | 22 ++++++++++++++----- .../operator/appdbreplicaset_controller.go | 14 ++++++++++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/controllers/om/deployment.go b/controllers/om/deployment.go index 23545e949..dde1fd9b8 100644 --- a/controllers/om/deployment.go +++ b/controllers/om/deployment.go @@ -351,15 +351,25 @@ func clearTLSParams(params map[string]string) { // version's additionalParams. If additionalParams becomes empty after removing TLS fields, // the entire map is removed. func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) { - additionalParams, ok := monitoringVersion["additionalParams"].(map[string]string) - if !ok { + additionalParamsRaw, exists := monitoringVersion["additionalParams"] + if !exists || additionalParamsRaw == nil { return } - clearTLSParams(additionalParams) - - if len(additionalParams) == 0 { - delete(monitoringVersion, "additionalParams") + // Handle both map[string]string (locally created) and map[string]interface{} (from JSON/API) + switch params := additionalParamsRaw.(type) { + case map[string]string: + clearTLSParams(params) + if len(params) == 0 { + delete(monitoringVersion, "additionalParams") + } + case map[string]interface{}: + delete(params, tlsParamUseSsl) + delete(params, tlsParamTrustedCert) + delete(params, tlsParamClientCert) + if len(params) == 0 { + delete(monitoringVersion, "additionalParams") + } } } diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index 09dc3a55d..c310d7bea 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -88,7 +88,7 @@ const ( // it is used for DR in case of Multi-Cluster AppDB when after a cluster outage // there is no primary in the AppDB deployment. ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure" - trueString = "trueString" + trueString = "true" ForcedReconfigureAlreadyPerformedAnnotation = "mongodb.com/v1.forceReconfigurePerformed" ) @@ -1433,7 +1433,17 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger for i, m := range monitoringVersions { if m.Hostname == p.HostName { found = true - if !tls { + if tls { + // Add/update TLS params when TLS is enabled + pemKeyFile := "" + if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil { + pemKeyFile = pem.String() + } + if monitoringVersions[i].AdditionalParams == nil { + monitoringVersions[i].AdditionalParams = map[string]string{} + } + addTLSParams(monitoringVersions[i].AdditionalParams, appdbCAFilePath, pemKeyFile) + } else { // Clear TLS-specific params when TLS is disabled to prevent monitoring from // trying to use certificate files that no longer exist. clearTLSParams(monitoringVersions[i].AdditionalParams) From 773363772be8e38a03b652d05592442a3ccdf55b Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 19 Dec 2025 13:56:01 +0100 Subject: [PATCH 5/7] ccleanup --- controllers/om/deployment.go | 103 ++++-------------- controllers/om/monitoring_tls.go | 41 +++++++ .../operator/appdbreplicaset_controller.go | 77 ++++--------- .../appdbreplicaset_controller_test.go | 2 +- .../mongodbmultireplicaset_controller_test.go | 4 +- ...odbshardedcluster_controller_multi_test.go | 2 +- 6 files changed, 87 insertions(+), 142 deletions(-) create mode 100644 controllers/om/monitoring_tls.go diff --git a/controllers/om/deployment.go b/controllers/om/deployment.go index dde1fd9b8..f1aa4ae98 100644 --- a/controllers/om/deployment.go +++ b/controllers/om/deployment.go @@ -6,6 +6,7 @@ import ( "fmt" "math" "regexp" + "slices" "github.com/blang/semver" "github.com/spf13/cast" @@ -264,11 +265,6 @@ func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls boo d.addBackup(log) } -// DEPRECATED: this shouldn't be used as it may panic because of different underlying type; use GetReplicaSets instead -func (d Deployment) ReplicaSets() []ReplicaSet { - return d["replicaSets"].([]ReplicaSet) -} - func (d Deployment) GetReplicaSetByName(name string) ReplicaSet { for _, rs := range d.GetReplicaSets() { if rs.Name() == name { @@ -284,95 +280,38 @@ func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFile if len(d.getProcesses()) == 0 { return } + monitoringVersions := d.getMonitoringVersions() for _, p := range d.getProcesses() { - found := false - var monitoringVersion map[string]interface{} - for _, m := range monitoringVersions { - monitoringVersion = m.(map[string]interface{}) - if monitoringVersion["hostname"] == p.HostName() { - found = true - break - } - } + hostname := p.HostName() + pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"] - if !found { - monitoringVersion = map[string]interface{}{ - "hostname": p.HostName(), + foundIdx := slices.IndexFunc(monitoringVersions, func(m interface{}) bool { + return m.(map[string]interface{})["hostname"] == hostname + }) + + if foundIdx == -1 { + mv := map[string]interface{}{ + "hostname": hostname, "name": MonitoringAgentDefaultVersion, } - log.Debugw("Added monitoring agent configuration", "host", p.HostName(), "tls", tls) - monitoringVersions = append(monitoringVersions, monitoringVersion) - } - - monitoringVersion["hostname"] = p.HostName() - - if tls { - pemKeyFile := "" - if pem := p.EnsureTLSConfig()["PEMKeyFile"]; pem != nil { - pemKeyFile = pem.(string) + if tls { + mv["additionalParams"] = NewTLSParams(caFilePath, pemKeyFile) } - params := map[string]string{} - addTLSParams(params, caFilePath, pemKeyFile) - monitoringVersion["additionalParams"] = params + log.Debugw("Added monitoring agent configuration", "host", hostname, "tls", tls) + monitoringVersions = append(monitoringVersions, mv) } else { - // Clear TLS-specific params when TLS is disabled to prevent monitoring from - // trying to use certificate files that no longer exist. - // We only clear TLS fields, preserving any other additionalParams that may be set. - clearTLSParamsFromMonitoringVersion(monitoringVersion) + mv := monitoringVersions[foundIdx].(map[string]interface{}) + if tls { + mv["additionalParams"] = NewTLSParams(caFilePath, pemKeyFile) + } else { + ClearTLSParamsFromMonitoringVersion(mv) + } } - } d.setMonitoringVersions(monitoringVersions) } -// TLS param keys for monitoring additionalParams. -const ( - tlsParamUseSsl = "useSslForAllConnections" - tlsParamTrustedCert = "sslTrustedServerCertificates" - tlsParamClientCert = "sslClientCertificate" -) - -func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) { - params[tlsParamUseSsl] = "true" - params[tlsParamTrustedCert] = caFilePath - if pemKeyFile != "" { - params[tlsParamClientCert] = pemKeyFile - } -} - -func clearTLSParams(params map[string]string) { - delete(params, tlsParamUseSsl) - delete(params, tlsParamTrustedCert) - delete(params, tlsParamClientCert) -} - -// clearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring -// version's additionalParams. If additionalParams becomes empty after removing TLS fields, -// the entire map is removed. -func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) { - additionalParamsRaw, exists := monitoringVersion["additionalParams"] - if !exists || additionalParamsRaw == nil { - return - } - - // Handle both map[string]string (locally created) and map[string]interface{} (from JSON/API) - switch params := additionalParamsRaw.(type) { - case map[string]string: - clearTLSParams(params) - if len(params) == 0 { - delete(monitoringVersion, "additionalParams") - } - case map[string]interface{}: - delete(params, tlsParamUseSsl) - delete(params, tlsParamTrustedCert) - delete(params, tlsParamClientCert) - if len(params) == 0 { - delete(monitoringVersion, "additionalParams") - } - } -} - // RemoveMonitoringAndBackup removes both monitoring and backup agent configurations. This must be called when the // Mongodb resource is being removed, otherwise UI will show non-existing agents in the "servers" tab func (d Deployment) RemoveMonitoringAndBackup(names []string, log *zap.SugaredLogger) { diff --git a/controllers/om/monitoring_tls.go b/controllers/om/monitoring_tls.go new file mode 100644 index 000000000..6ac18c915 --- /dev/null +++ b/controllers/om/monitoring_tls.go @@ -0,0 +1,41 @@ +package om + +// TLS param keys for monitoring additionalParams. +const ( + TLSParamUseSsl = "useSslForAllConnections" + TLSParamTrustedCert = "sslTrustedServerCertificates" + TLSParamClientCert = "sslClientCertificate" +) + +// NewTLSParams creates and returns a new map with TLS parameters. +func NewTLSParams(caFilePath string, pemKeyFile interface{}) map[string]string { + params := map[string]string{ + TLSParamUseSsl: "true", + TLSParamTrustedCert: caFilePath, + } + if pemKeyFile != nil && pemKeyFile.(string) != "" { + params[TLSParamClientCert] = pemKeyFile.(string) + } + return params +} + +func clearTLSParamsFromMap[V any](params map[string]V) { + delete(params, TLSParamUseSsl) + delete(params, TLSParamTrustedCert) + delete(params, TLSParamClientCert) +} + +// ClearTLSParams removes TLS-specific parameters from the given params map. +func ClearTLSParams(params map[string]string) { + clearTLSParamsFromMap(params) +} + +// ClearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring +// version's additionalParams. +func ClearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) { + params, ok := monitoringVersion["additionalParams"].(map[string]interface{}) + if !ok { + return + } + clearTLSParamsFromMap(params) +} diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index c310d7bea..1aedefb0d 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "path" + "slices" "sort" "strconv" "strings" @@ -1427,72 +1428,36 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger if len(ac.Processes) == 0 { return } + monitoringVersions := ac.MonitoringVersions for _, p := range ac.Processes { - found := false - for i, m := range monitoringVersions { - if m.Hostname == p.HostName { - found = true - if tls { - // Add/update TLS params when TLS is enabled - pemKeyFile := "" - if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil { - pemKeyFile = pem.String() - } - if monitoringVersions[i].AdditionalParams == nil { - monitoringVersions[i].AdditionalParams = map[string]string{} - } - addTLSParams(monitoringVersions[i].AdditionalParams, appdbCAFilePath, pemKeyFile) - } else { - // Clear TLS-specific params when TLS is disabled to prevent monitoring from - // trying to use certificate files that no longer exist. - clearTLSParams(monitoringVersions[i].AdditionalParams) - if len(monitoringVersions[i].AdditionalParams) == 0 { - monitoringVersions[i].AdditionalParams = nil - } - } - break - } - } - if !found { - monitoringVersion := automationconfig.MonitoringVersion{ - Hostname: p.HostName, + hostname := p.HostName + pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile").String() + + foundIdx := slices.IndexFunc(monitoringVersions, func(m automationconfig.MonitoringVersion) bool { + return m.Hostname == hostname + }) + + if foundIdx == -1 { + mv := automationconfig.MonitoringVersion{ + Hostname: hostname, Name: om.MonitoringAgentDefaultVersion, } if tls { - pemKeyFile := "" - if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil { - pemKeyFile = pem.String() - } - monitoringVersion.AdditionalParams = map[string]string{} - addTLSParams(monitoringVersion.AdditionalParams, appdbCAFilePath, pemKeyFile) + mv.AdditionalParams = om.NewTLSParams(appdbCAFilePath, pemKeyFile) + } + log.Debugw("Added monitoring agent configuration", "host", hostname, "tls", tls) + monitoringVersions = append(monitoringVersions, mv) + } else { + if tls { + monitoringVersions[foundIdx].AdditionalParams = om.NewTLSParams(appdbCAFilePath, pemKeyFile) + } else { + om.ClearTLSParams(monitoringVersions[foundIdx].AdditionalParams) } - log.Debugw("Added monitoring agent configuration", "host", p.HostName, "tls", tls) - monitoringVersions = append(monitoringVersions, monitoringVersion) } } ac.MonitoringVersions = monitoringVersions -} - -// TLS param keys for monitoring additionalParams. -const ( - tlsParamUseSsl = "useSslForAllConnections" - tlsParamTrustedCert = "sslTrustedServerCertificates" - tlsParamClientCert = "sslClientCertificate" -) - -func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) { - params[tlsParamUseSsl] = trueString - params[tlsParamTrustedCert] = caFilePath - if pemKeyFile != "" { - params[tlsParamClientCert] = pemKeyFile - } -} -func clearTLSParams(params map[string]string) { - delete(params, tlsParamUseSsl) - delete(params, tlsParamTrustedCert) - delete(params, tlsParamClientCert) } // registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project diff --git a/controllers/operator/appdbreplicaset_controller_test.go b/controllers/operator/appdbreplicaset_controller_test.go index a24106b6f..15fab0da7 100644 --- a/controllers/operator/appdbreplicaset_controller_test.go +++ b/controllers/operator/appdbreplicaset_controller_test.go @@ -1526,7 +1526,7 @@ func TestClearTLSParams(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - clearTLSParams(tt.input) + om.ClearTLSParams(tt.input) assert.Equal(t, tt.expectedOutput, tt.input) }) } diff --git a/controllers/operator/mongodbmultireplicaset_controller_test.go b/controllers/operator/mongodbmultireplicaset_controller_test.go index 0f98b1522..13c9a5818 100644 --- a/controllers/operator/mongodbmultireplicaset_controller_test.go +++ b/controllers/operator/mongodbmultireplicaset_controller_test.go @@ -911,7 +911,7 @@ func TestScaling(t *testing.T) { dep, err := omConnectionFactory.GetConnection().ReadDeployment() assert.NoError(t, err) - replicaSets := dep.ReplicaSets() + replicaSets := dep.GetReplicaSets() assert.Len(t, replicaSets, 1) members := replicaSets[0].Members() @@ -932,7 +932,7 @@ func TestScaling(t *testing.T) { dep, err = omConnectionFactory.GetConnection().ReadDeployment() assert.NoError(t, err) - replicaSets = dep.ReplicaSets() + replicaSets = dep.GetReplicaSets() assert.Len(t, replicaSets, 1) members = replicaSets[0].Members() diff --git a/controllers/operator/mongodbshardedcluster_controller_multi_test.go b/controllers/operator/mongodbshardedcluster_controller_multi_test.go index 20e5eeee9..4a5a89e14 100644 --- a/controllers/operator/mongodbshardedcluster_controller_multi_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_multi_test.go @@ -1277,7 +1277,7 @@ func TestReconcileForComplexMultiClusterYaml(t *testing.T) { require.NoError(t, err) automationConfig, err := omConnectionFactory.GetConnection().ReadAutomationConfig() require.NoError(t, err) - normalizedActualReplicaSets, err := normalizeObjectToInterfaceMap(map[string]any{"replicaSets": automationConfig.Deployment.ReplicaSets()}) + normalizedActualReplicaSets, err := normalizeObjectToInterfaceMap(map[string]any{"replicaSets": automationConfig.Deployment.GetReplicaSets()}) require.NoError(t, err) if !assert.Equal(t, normalizedExpectedReplicaSets, normalizedActualReplicaSets) { visualDiff, err := getVisualJsonDiff(normalizedExpectedReplicaSets, normalizedActualReplicaSets) From 6c53570012ac80788b8ab0f2b13c2b4434738b2b Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 19 Dec 2025 14:40:20 +0100 Subject: [PATCH 6/7] fix tests --- controllers/om/monitoring_tls.go | 18 +++++++++++++----- .../operator/appdbreplicaset_controller.go | 1 - 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/controllers/om/monitoring_tls.go b/controllers/om/monitoring_tls.go index 6ac18c915..82cbfa5db 100644 --- a/controllers/om/monitoring_tls.go +++ b/controllers/om/monitoring_tls.go @@ -31,11 +31,19 @@ func ClearTLSParams(params map[string]string) { } // ClearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring -// version's additionalParams. +// version's additionalParams. If additionalParams becomes empty after removing TLS fields, +// it is deleted from the monitoring version. func ClearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) { - params, ok := monitoringVersion["additionalParams"].(map[string]interface{}) - if !ok { - return + var isEmpty bool + switch params := monitoringVersion["additionalParams"].(type) { + case map[string]string: + clearTLSParamsFromMap(params) + isEmpty = len(params) == 0 + case map[string]interface{}: + clearTLSParamsFromMap(params) + isEmpty = len(params) == 0 + } + if isEmpty { + delete(monitoringVersion, "additionalParams") } - clearTLSParamsFromMap(params) } diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index 1aedefb0d..e2490f92b 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -1457,7 +1457,6 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger } } ac.MonitoringVersions = monitoringVersions - } // registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project From 1445360b5d7644faddc10744adaaa69f07ac229c Mon Sep 17 00:00:00 2001 From: Nam Nguyen Date: Fri, 19 Dec 2025 19:39:39 +0100 Subject: [PATCH 7/7] address feedback --- controllers/om/deployment.go | 6 +++--- controllers/om/deployment_test.go | 6 +++--- controllers/operator/appdbreplicaset_controller.go | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/controllers/om/deployment.go b/controllers/om/deployment.go index f1aa4ae98..9f5b8c54a 100644 --- a/controllers/om/deployment.go +++ b/controllers/om/deployment.go @@ -262,7 +262,7 @@ func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls boo return } d.ConfigureMonitoring(log, tls, caFilepath) - d.addBackup(log) + d.ConfigureBackup(log) } func (d Deployment) GetReplicaSetByName(name string) ReplicaSet { @@ -1058,8 +1058,8 @@ func (d Deployment) removeMonitoring(processNames []string) { d.setMonitoringVersions(updatedMonitoringVersions) } -// addBackup adds backup agent configuration for each of the processes of deployment -func (d Deployment) addBackup(log *zap.SugaredLogger) { +// ConfigureBackup adds backup agent configuration for each of the processes of deployment +func (d Deployment) ConfigureBackup(log *zap.SugaredLogger) { backupVersions := d.getBackupVersions() for _, p := range d.getProcesses() { found := false diff --git a/controllers/om/deployment_test.go b/controllers/om/deployment_test.go index 2dad50bdb..fcc6156b3 100644 --- a/controllers/om/deployment_test.go +++ b/controllers/om/deployment_test.go @@ -561,12 +561,12 @@ func TestConfigureMonitoringTLSDisable(t *testing.T) { assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions()) } -func TestAddBackup(t *testing.T) { +func TestConfigureBackup(t *testing.T) { d := NewDeployment() rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs")) d.MergeReplicaSet(rs0, nil, nil, zap.S()) - d.addBackup(zap.S()) + d.ConfigureBackup(zap.S()) expectedBackupVersions := []interface{}{ map[string]interface{}{"hostname": "my-rs-0.some.host", "name": BackupAgentDefaultVersion}, @@ -576,7 +576,7 @@ func TestAddBackup(t *testing.T) { assert.Equal(t, expectedBackupVersions, d.getBackupVersions()) // adding again - nothing changes - d.addBackup(zap.S()) + d.ConfigureBackup(zap.S()) assert.Equal(t, expectedBackupVersions, d.getBackupVersions()) } diff --git a/controllers/operator/appdbreplicaset_controller.go b/controllers/operator/appdbreplicaset_controller.go index e2490f92b..15387bfff 100644 --- a/controllers/operator/appdbreplicaset_controller.go +++ b/controllers/operator/appdbreplicaset_controller.go @@ -1179,7 +1179,7 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex }). AddModifications(func(automationConfig *automationconfig.AutomationConfig) { if acType == monitoring { - addMonitoring(automationConfig, log, rs.GetSecurity().IsTLSEnabled()) + configureMonitoring(automationConfig, log, rs.GetSecurity().IsTLSEnabled()) automationConfig.ReplicaSets = []automationconfig.ReplicaSet{} automationConfig.Processes = []automationconfig.Process{} } @@ -1424,7 +1424,7 @@ func setBaseUrlForAgents(ac *automationconfig.AutomationConfig, url string) { } } -func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger, tls bool) { +func configureMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger, tls bool) { if len(ac.Processes) == 0 { return }