Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog/20251216_fix_tls_monitoring.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
kind: fix
date: 2025-12-16
---

* Fixed an issue where monitoring agents would fail after disabling TLS on a MongoDB deployment.
Comment on lines +1 to +6
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

77 changes: 33 additions & 44 deletions controllers/om/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"math"
"regexp"
"slices"

"github.com/blang/semver"
"github.com/spf13/cast"
Expand Down Expand Up @@ -251,21 +252,17 @@ func (d Deployment) MergeShardedCluster(opts DeploymentShardedClusterMergeOption
return shardsScheduledForRemoval, nil
}

// AddMonitoringAndBackup adds monitoring and backup agents to each process
// The automation agent will update the agents versions to the latest version automatically
// ConfigureMonitoringAndBackup configures monitoring and backup agents for each process.
// This is called on every reconcile to ensure the monitoring/backup config matches the desired state.
// The automation agent will update the agents versions to the latest version automatically.
// Note, that these two are deliberately combined as all clients (standalone, rs etc.) need both backup and monitoring
// together
func (d Deployment) AddMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
// together.
func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
if len(d.getProcesses()) == 0 {
return
}
d.AddMonitoring(log, tls, caFilepath)
d.addBackup(log)
}

// DEPRECATED: this shouldn't be used as it may panic because of different underlying type; use GetReplicaSets instead
func (d Deployment) ReplicaSets() []ReplicaSet {
return d["replicaSets"].([]ReplicaSet)
d.ConfigureMonitoring(log, tls, caFilepath)
d.ConfigureBackup(log)
}

func (d Deployment) GetReplicaSetByName(name string) ReplicaSet {
Expand All @@ -277,48 +274,40 @@ func (d Deployment) GetReplicaSetByName(name string) ReplicaSet {
return nil
}

// AddMonitoring adds monitoring agents for all processes in the deployment
func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
// ConfigureMonitoring configures monitoring agents for all processes in the deployment.
// This is called on every reconcile to ensure the monitoring config matches the desired state.
func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
if len(d.getProcesses()) == 0 {
return
}

monitoringVersions := d.getMonitoringVersions()
for _, p := range d.getProcesses() {
found := false
var monitoringVersion map[string]interface{}
for _, m := range monitoringVersions {
monitoringVersion = m.(map[string]interface{})
if monitoringVersion["hostname"] == p.HostName() {
found = true
break
}
}
hostname := p.HostName()
pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"]

if !found {
monitoringVersion = map[string]interface{}{
"hostname": p.HostName(),
foundIdx := slices.IndexFunc(monitoringVersions, func(m interface{}) bool {
return m.(map[string]interface{})["hostname"] == hostname
})

if foundIdx == -1 {
mv := map[string]interface{}{
"hostname": hostname,
"name": MonitoringAgentDefaultVersion,
}
log.Debugw("Added monitoring agent configuration", "host", p.HostName(), "tls", tls)
monitoringVersions = append(monitoringVersions, monitoringVersion)
}

monitoringVersion["hostname"] = p.HostName()

if tls {
additionalParams := map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": caFilePath,
if tls {
mv["additionalParams"] = NewTLSParams(caFilePath, pemKeyFile)
}

pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"]
if pemKeyFile != nil {
additionalParams["sslClientCertificate"] = pemKeyFile.(string)
log.Debugw("Added monitoring agent configuration", "host", hostname, "tls", tls)
monitoringVersions = append(monitoringVersions, mv)
} else {
mv := monitoringVersions[foundIdx].(map[string]interface{})
if tls {
mv["additionalParams"] = NewTLSParams(caFilePath, pemKeyFile)
} else {
ClearTLSParamsFromMonitoringVersion(mv)
}

monitoringVersion["additionalParams"] = additionalParams
}

}
d.setMonitoringVersions(monitoringVersions)
}
Expand Down Expand Up @@ -1069,8 +1058,8 @@ func (d Deployment) removeMonitoring(processNames []string) {
d.setMonitoringVersions(updatedMonitoringVersions)
}

// addBackup adds backup agent configuration for each of the processes of deployment
func (d Deployment) addBackup(log *zap.SugaredLogger) {
// ConfigureBackup adds backup agent configuration for each of the processes of deployment
func (d Deployment) ConfigureBackup(log *zap.SugaredLogger) {
backupVersions := d.getBackupVersions()
for _, p := range d.getProcesses() {
found := false
Expand Down
2 changes: 1 addition & 1 deletion controllers/om/deployment/testing_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func CreateFromReplicaSet(mongoDBImage string, forceEnterprise bool, rs *mdb.Mon
lastConfig.ToMap(),
zap.S(),
)
d.AddMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
d.ConfigureMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
d.ConfigureTLS(rs.Spec.GetSecurity(), util.CAFilePathInContainer)
return d
}
47 changes: 38 additions & 9 deletions controllers/om/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,12 +489,12 @@ func TestConfiguringTlsProcessFromOpsManager(t *testing.T) {
}
}

func TestAddMonitoring(t *testing.T) {
func TestConfigureMonitoring(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)

expectedMonitoringVersions := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
Expand All @@ -504,16 +504,16 @@ func TestAddMonitoring(t *testing.T) {
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())

// adding again - nothing changes
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
}

func TestAddMonitoringTls(t *testing.T) {
func TestConfigureMonitoringTls(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.AddMonitoring(zap.S(), true, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)

expectedAdditionalParams := map[string]string{
"useSslForAllConnections": "true",
Expand All @@ -528,16 +528,45 @@ func TestAddMonitoringTls(t *testing.T) {
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())

// adding again - nothing changes
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
}

func TestAddBackup(t *testing.T) {
func TestConfigureMonitoringTLSDisable(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.addBackup(zap.S())
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)

// verify TLS is present in additionalParams
expectedAdditionalParams := map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": util.CAFilePathInContainer,
}
expectedMonitoringVersionsWithTls := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
}
assert.Equal(t, expectedMonitoringVersionsWithTls, d.getMonitoringVersions())

// disabling TLS should clear additionalParams (CLOUDP-351614)
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
expectedMonitoringVersionsWithoutTls := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion},
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion},
}
assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions())
}

func TestConfigureBackup(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.ConfigureBackup(zap.S())

expectedBackupVersions := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": BackupAgentDefaultVersion},
Expand All @@ -547,7 +576,7 @@ func TestAddBackup(t *testing.T) {
assert.Equal(t, expectedBackupVersions, d.getBackupVersions())

// adding again - nothing changes
d.addBackup(zap.S())
d.ConfigureBackup(zap.S())
assert.Equal(t, expectedBackupVersions, d.getBackupVersions())
}

Expand Down
49 changes: 49 additions & 0 deletions controllers/om/monitoring_tls.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package om

// TLS param keys for monitoring additionalParams.
const (
TLSParamUseSsl = "useSslForAllConnections"
TLSParamTrustedCert = "sslTrustedServerCertificates"
TLSParamClientCert = "sslClientCertificate"
)

// NewTLSParams creates and returns a new map with TLS parameters.
func NewTLSParams(caFilePath string, pemKeyFile interface{}) map[string]string {
params := map[string]string{
TLSParamUseSsl: "true",
TLSParamTrustedCert: caFilePath,
}
if pemKeyFile != nil && pemKeyFile.(string) != "" {
params[TLSParamClientCert] = pemKeyFile.(string)
}
return params
}

func clearTLSParamsFromMap[V any](params map[string]V) {
delete(params, TLSParamUseSsl)
delete(params, TLSParamTrustedCert)
delete(params, TLSParamClientCert)
}

// ClearTLSParams removes TLS-specific parameters from the given params map.
func ClearTLSParams(params map[string]string) {
clearTLSParamsFromMap(params)
}

// ClearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring
// version's additionalParams. If additionalParams becomes empty after removing TLS fields,
// it is deleted from the monitoring version.
func ClearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) {
var isEmpty bool
switch params := monitoringVersion["additionalParams"].(type) {
case map[string]string:
clearTLSParamsFromMap(params)
isEmpty = len(params) == 0
case map[string]interface{}:
clearTLSParamsFromMap(params)
isEmpty = len(params) == 0
}
if isEmpty {
delete(monitoringVersion, "additionalParams")
}
}
54 changes: 27 additions & 27 deletions controllers/operator/appdbreplicaset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"path"
"slices"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -87,8 +88,8 @@ const (
// Used to convey to the operator to force reconfigure agent. At the moment
// it is used for DR in case of Multi-Cluster AppDB when after a cluster outage
// there is no primary in the AppDB deployment.
ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure"

ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure"
trueString = "true"
ForcedReconfigureAlreadyPerformedAnnotation = "mongodb.com/v1.forceReconfigurePerformed"
)

Expand Down Expand Up @@ -717,7 +718,7 @@ func (r *ReconcileAppDbReplicaSet) ReconcileAppDB(ctx context.Context, opsManage
opsManager.Annotations = map[string]string{}
}

if val, ok := opsManager.Annotations[ForceReconfigureAnnotation]; ok && val == "true" {
if val, ok := opsManager.Annotations[ForceReconfigureAnnotation]; ok && val == trueString {
annotationsToAdd := map[string]string{ForcedReconfigureAlreadyPerformedAnnotation: timeutil.Now()}

err := annotations.SetAnnotations(ctx, opsManager, annotationsToAdd, r.client)
Expand Down Expand Up @@ -1178,7 +1179,7 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex
}).
AddModifications(func(automationConfig *automationconfig.AutomationConfig) {
if acType == monitoring {
addMonitoring(automationConfig, log, rs.GetSecurity().IsTLSEnabled())
configureMonitoring(automationConfig, log, rs.GetSecurity().IsTLSEnabled())
automationConfig.ReplicaSets = []automationconfig.ReplicaSet{}
automationConfig.Processes = []automationconfig.Process{}
}
Expand Down Expand Up @@ -1248,7 +1249,7 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex
// it checks this with the user provided annotation and if the operator has actually performed a force reconfigure already
func shouldPerformForcedReconfigure(annotations map[string]string) bool {
if val, ok := annotations[ForceReconfigureAnnotation]; ok {
if val == "true" {
if val == trueString {
if _, ok := annotations[ForcedReconfigureAlreadyPerformedAnnotation]; !ok {
return true
}
Expand Down Expand Up @@ -1423,37 +1424,36 @@ func setBaseUrlForAgents(ac *automationconfig.AutomationConfig, url string) {
}
}

func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger, tls bool) {
func configureMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger, tls bool) {
if len(ac.Processes) == 0 {
return
}

monitoringVersions := ac.MonitoringVersions
for _, p := range ac.Processes {
found := false
for _, m := range monitoringVersions {
if m.Hostname == p.HostName {
found = true
break
}
}
if !found {
monitoringVersion := automationconfig.MonitoringVersion{
Hostname: p.HostName,
hostname := p.HostName
pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile").String()

foundIdx := slices.IndexFunc(monitoringVersions, func(m automationconfig.MonitoringVersion) bool {
return m.Hostname == hostname
})

if foundIdx == -1 {
mv := automationconfig.MonitoringVersion{
Hostname: hostname,
Name: om.MonitoringAgentDefaultVersion,
}
if tls {
additionalParams := map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": appdbCAFilePath,
}
pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile")
if pemKeyFile != nil {
additionalParams["sslClientCertificate"] = pemKeyFile.String()
}
monitoringVersion.AdditionalParams = additionalParams
mv.AdditionalParams = om.NewTLSParams(appdbCAFilePath, pemKeyFile)
}
log.Debugw("Added monitoring agent configuration", "host", hostname, "tls", tls)
monitoringVersions = append(monitoringVersions, mv)
} else {
if tls {
monitoringVersions[foundIdx].AdditionalParams = om.NewTLSParams(appdbCAFilePath, pemKeyFile)
} else {
om.ClearTLSParams(monitoringVersions[foundIdx].AdditionalParams)
}
log.Debugw("Added monitoring agent configuration", "host", p.HostName, "tls", tls)
monitoringVersions = append(monitoringVersions, monitoringVersion)
}
}
ac.MonitoringVersions = monitoringVersions
Expand Down
Loading