Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a5a4d71
feat: test rollingops
patriciareinoso Apr 17, 2026
12aaa1f
minimum fix ut
patriciareinoso Apr 18, 2026
c987485
fix lint
patriciareinoso Apr 18, 2026
05bb961
fix pre commit
patriciareinoso Apr 18, 2026
50cd931
fix charms pack
patriciareinoso Apr 18, 2026
711ae13
Update mongodb_operator.py
patriciareinoso Apr 18, 2026
e25fa6a
fix common name
patriciareinoso Apr 19, 2026
71a15b4
fix python version
patriciareinoso Apr 20, 2026
f55e244
fix python versio
patriciareinoso Apr 20, 2026
71c3162
remove sync
patriciareinoso Apr 20, 2026
7e389a4
new iteration
patriciareinoso Apr 20, 2026
dae5581
Merge branch '8/edge' into DPE-9684-rolling-ops
patriciareinoso Apr 20, 2026
70c0816
fix merge
patriciareinoso Apr 20, 2026
0cac66b
skip vault for now
patriciareinoso Apr 20, 2026
2478f7c
improve workflows
patriciareinoso Apr 21, 2026
136f1db
restore build charms
patriciareinoso Apr 21, 2026
f767a7b
restore tls uts
patriciareinoso Apr 21, 2026
a83c4a1
Merge branch '8/edge' into DPE-9684-rolling-ops
patriciareinoso Apr 22, 2026
b038087
improve workflows
patriciareinoso Apr 22, 2026
9b7d83c
Merge branch '8/edge' into DPE-9684-rolling-ops
patriciareinoso Apr 22, 2026
9b2fcce
fix merge
patriciareinoso Apr 22, 2026
23cd69b
fix precommit
patriciareinoso Apr 23, 2026
9eb0e61
fix mypy
patriciareinoso Apr 23, 2026
c9063cf
avoid crashing
patriciareinoso Apr 23, 2026
0ab14b5
optional retry mongod is ready
patriciareinoso Apr 24, 2026
29d73b4
Merge branch '8/edge' into DPE-9684-rolling-ops
patriciareinoso Apr 24, 2026
fdbcd32
add waiting on mongostls
patriciareinoso Apr 24, 2026
4219c35
wait on ldap
patriciareinoso Apr 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
988 changes: 530 additions & 458 deletions poetry.lock

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ classifiers = [
"Intended Audience :: System Administrators",
"Operating System :: POSIX :: Linux",
]
requires-python = ">=3.10,<4.0"
requires-python = ">=3.12,<4.0"

dependencies = [
"poetry-core (>=2.0)",
"ops (~=3.5.2)",
"overrides (~=7.7.0)",
"cryptography", # tls_certificates lib v3
"jsonschema (~=4.24.0)", # tls_certificates lib v3
"pydantic (~=2.11.0)",
"pydantic (~=2.12.5)",
"pydantic-settings",
"pyyaml (~=6.0.2)",
"tenacity (~=9.0.0)",
Expand All @@ -41,6 +41,7 @@ dependencies = [
"charm-refresh (>=3.1.0.2,<4.0.0.0)",
"google-cloud-storage (~=2.16.0)",
"google-api-core (~=2.17.0)",
"charmlibs-rollingops @ git+https://github.com/patriciareinoso/charmlibs@DPE-9350-logs-location#subdirectory=rollingops",
"pytest-interface-tester (>=3.4.1,<4.0.0)",
"hvac (>=2.4.0,<3.0.0)",
"python-hcl2"
Expand Down Expand Up @@ -83,15 +84,15 @@ build-backend = "poetry_dynamic_versioning.backend"
cryptography = "*" # tls_certificates lib v3
jsonschema = "^4.24.0" # tls_certificates lib v3
ops = "~3.5.2"
pydantic = "~2.11.0"
pydantic = "~2.12.5"
pydantic-settings = "*"
pymongo = "*"
cosl = "*" # loki_push_api

[tool.poetry.group.dev.dependencies]
pre-commit = "^4.0.1"
ruff = "^0.7.2"
pydantic = "~2.11.0"
pydantic = "~2.12.5"
mypy = "*"
types-PyYAML = "*"
types-python-dateutil = "*"
Expand All @@ -101,7 +102,7 @@ optional = true

[tool.poetry.group.format.dependencies]
ruff = "^0.7.2"
pydantic = "~2.11.0"
pydantic = "~2.12.5"
mypy = "*"
types-PyYAML = "*"

Expand All @@ -113,7 +114,7 @@ ruff = "^0.7.2"
tomli = "*"
codespell = "^2.2.6"
shellcheck-py = "^0.10.0.1"
pydantic = "~2.11.0"
pydantic = "~2.12.5"
mypy = "*"
types-PyYAML = "*"

Expand Down
2 changes: 1 addition & 1 deletion scripts/build_lib_for_integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ CHARMS_PATH="./tests/charms"
VERSION_TAG="test/0.0.0+dirty"

# Default values
declare -a TEST_CHARMS=("${CHARMS_PATH}/mongodb_test_charm" "${CHARMS_PATH}/mongodb_k8s_test_charm" "${CHARMS_PATH}/mongos_test_charm" "${CHARMS_PATH}/mongos_k8s_test_charm")
declare -a TEST_CHARMS=("${CHARMS_PATH}/mongodb_test_charm") #"${CHARMS_PATH}/mongos_test_charm") # "${CHARMS_PATH}/mongodb_k8s_test_charm" "${CHARMS_PATH}/mongos_k8s_test_charm")
PLATFORM="ubuntu@24.04:$(dpkg --print-architecture)"

POSITIONAL_ARGS=()
Expand Down
1 change: 1 addition & 0 deletions single_kernel_mongo/config/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class PeerRelationNames(str, Enum):
ROUTER_PEERS = "router-peers"
STATUS_PEERS = "status-peers"
LDAP_PEERS = "ldap-peers"
ROLLINGOPS_PEERS = "rollingops-peers"


class RelationNames(str, Enum):
Expand Down
18 changes: 18 additions & 0 deletions single_kernel_mongo/config/statuses.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class MongoDBStatuses(Enum):
message="Waiting for mongodb-exporter to start...",
check="MongoDB Exporter status check.",
)
WAITING_FOR_RESTART = StatusObject(
status="waiting",
message="Waiting for MongoDB restart.",
)
INVALID_SHARDING_REL = StatusObject(
status="blocked",
message="The sharding interface cannot be used by replica sets.",
Expand Down Expand Up @@ -92,6 +96,11 @@ class MongoDBStatuses(Enum):
action="Set the role config to a valid value: `replication`, `shard` or `config-server`.",
running="blocking",
)
RESTARTING = StatusObject(
status="maintenance",
message="Restarting MongoDB.",
running="blocking",
)


class MongosStatuses(Enum):
Expand All @@ -112,6 +121,10 @@ class MongosStatuses(Enum):
message="Waiting for mongos to start...",
check="mongos process status check.",
)
WAITING_FOR_RESTART = StatusObject(
status="waiting",
message="Waiting for mongos restart.",
)
INVALID_REL = StatusObject(
status="blocked",
message="The relation is invalid.",
Expand Down Expand Up @@ -180,6 +193,11 @@ class MongosStatuses(Enum):
STARTING_MONGOS = StatusObject(
status="maintenance", message="Starting mongos.", running="blocking"
)
RESTARTING = StatusObject(
status="maintenance",
message="Restarting mongos.",
running="blocking",
)

@classmethod
def missing_tls(cls, internal: bool) -> StatusObject:
Expand Down
9 changes: 8 additions & 1 deletion single_kernel_mongo/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import charm_refresh
import jinja2
from charmlibs.rollingops import RollingOpsManager
from data_platform_helpers.advanced_statuses.models import StatusObject
from data_platform_helpers.advanced_statuses.protocol import ManagerStatusProtocol
from data_platform_helpers.advanced_statuses.types import Scope
Expand Down Expand Up @@ -110,6 +111,7 @@ class OperatorProtocol(ABC, Object, ManagerStatusProtocol):
tls_events: TLSEventsHandler
ldap_events: LDAPEventHandler
sysctl_config: Config
rollingops_manager: RollingOpsManager

if TYPE_CHECKING:

Expand Down Expand Up @@ -206,6 +208,11 @@ def restart_charm_services(self, force: bool = False) -> None:
"""Restart the relevant services with updated config."""
...

@abstractmethod
def rolling_restart_charm_services(self, force: bool = False) -> None:
"""Request an async lock to restart the relevant services."""
...

@abstractmethod
def get_relation_feasible_status(self, name: str) -> StatusObject | None:
"""Checks if the relation is feasible in this context."""
Expand Down Expand Up @@ -356,7 +363,7 @@ def remove_ca_cert_from_trust_store(self, file: TrustStoreFiles):
# Update CA certificates to remove the certificate from the trust store
self.workload.exec(["update-ca-certificates"])
# Restart the service
self.restart_charm_services(force=True)
self.rolling_restart_charm_services(force=True)

def write_thp_config_file(self):
"""Writes the unit file to enable Transparent Huge Pages."""
Expand Down
42 changes: 2 additions & 40 deletions single_kernel_mongo/events/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@
)
from ops.framework import Object

from single_kernel_mongo.config.statuses import MongosStatuses
from single_kernel_mongo.exceptions import (
DatabaseRequestedHasNotRunYetError,
DeferrableError,
DeferrableFailedHookChecksError,
MissingCredentialsError,
NonDeferrableFailedHookChecksError,
WaitingForSecretsError,
WorkloadServiceError,
)
from single_kernel_mongo.lib.charms.data_platform_libs.v0.data_interfaces import (
DatabaseCreatedEvent,
Expand Down Expand Up @@ -170,49 +167,14 @@ def _on_database_created(self, event: DatabaseCreatedEvent) -> None:

def _handle_changed_secrets(self, event: SecretChangedEvent):
"""SecretChanged event handler, which is used to propagate the updated passwords."""
try:
self.manager.handle_secret_changed(event.secret.label or "")
except (DeferrableError, DeferrableFailedHookChecksError):
event.defer()
except NonDeferrableFailedHookChecksError as e:
logger.info(f"Skipping {str(type(event))}: {str(e)}")
except WaitingForSecretsError as e:
logger.info(f"Skipping {str(type(event))}: {str(e)}")
self.dependent.state.statuses.add(
MongosStatuses.WAITING_FOR_SECRETS.value,
scope="unit",
component=self.charm.name,
)
except WorkloadServiceError:
# Some status was already set and a log was already displayed in
# `restart_charm_services`
return
self.manager.handle_secret_changed(event.secret.label or "")

def _on_relation_changed(self, event: RelationChangedEvent) -> None:
"""Relation changed event handler.

The manager will update the mongos configuration and restart it.
"""
try:
self.manager.update_mongos_and_restart()
except (
DeferrableError,
DeferrableFailedHookChecksError,
) as e:
defer_event_with_info_log(logger, event, str(type(event)), str(e))
except NonDeferrableFailedHookChecksError as e:
logger.info(f"Skipping {str(type(event))}: {str(e)}")
except (WaitingForSecretsError, MissingCredentialsError) as e:
logger.info(f"Skipping {str(type(event))}: {str(e)}")
self.dependent.state.statuses.add(
MongosStatuses.WAITING_FOR_SECRETS.value,
scope="unit",
component=self.charm.name,
)
except WorkloadServiceError:
# Some status was already set and a log was already displayed in
# `restart_charm_services`
return
self.manager.async_update_mongos_and_restart()

def _on_relation_broken(self, event: RelationBrokenEvent) -> None:
"""On relation broken event, we cleanup the users and mongos instance."""
Expand Down
5 changes: 4 additions & 1 deletion single_kernel_mongo/events/ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ def _on_certificate_removed(self, event: CertificateRemovedEvent) -> None:
self.manager.remove_ldap_certificates()

def _on_restart_if_ready(self, event: RestartIfReadyEvent) -> None:
"""Custom ops revent to trigger restart of leader with a single source of truth."""
"""Custom ops revent to trigger restart of leader with a single source of truth.

Also executed by follower units on relation changed event.
"""
action = "restart-ldap-if-ready"
try:
self.manager.restart_when_ready()
Expand Down
53 changes: 47 additions & 6 deletions single_kernel_mongo/managers/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from logging import getLogger
from typing import TYPE_CHECKING

from charmlibs.rollingops import OperationResult
from data_platform_helpers.advanced_statuses.models import StatusObject
from ops.framework import Object
from ops.model import Relation
Expand All @@ -21,8 +22,11 @@
from single_kernel_mongo.exceptions import (
DeferrableError,
DeferrableFailedHookChecksError,
MissingConfigServerError,
MissingCredentialsError,
NonDeferrableFailedHookChecksError,
WaitingForSecretsError,
WorkloadServiceError,
)
from single_kernel_mongo.lib.charms.data_platform_libs.v0.data_interfaces import (
DatabaseProviderData,
Expand Down Expand Up @@ -326,11 +330,13 @@ def update_mongos_and_restart(self) -> None:

if updated_keyfile or updated_config or not self.dependent.is_mongos_running():
logger.info("Restarting mongos with new secrets.")
self.charm.status_handler.set_running_status(
MongosStatuses.STARTING_MONGOS.value, scope="unit"
)

self.dependent.restart_charm_services()
try:
self.dependent.restart_charm_services(force=False)
except MissingConfigServerError as e:
raise NonDeferrableFailedHookChecksError from e
except WorkloadServiceError as e:
raise DeferrableError from e

# Restart on highly loaded databases can be very slow (up to 10-20 minutes).
if not self.dependent.is_mongos_running():
Expand All @@ -340,7 +346,7 @@ def update_mongos_and_restart(self) -> None:
scope="unit",
component=self.dependent.name,
)
raise DeferrableError
raise DeferrableError("Mongos is not running.")

self.state.statuses.set(
CharmStatuses.ACTIVE_IDLE.value, scope="unit", component=self.dependent.name
Expand All @@ -352,6 +358,29 @@ def update_mongos_and_restart(self) -> None:

self.dependent.share_connection_info()

def update_mongos_and_restart_callback(self) -> OperationResult:
"""Callback use during update mongos and restart rolling operation."""
try:
self.update_mongos_and_restart()
return OperationResult.RELEASE
except (
DeferrableError,
DeferrableFailedHookChecksError,
) as e:
logger.info("Deferrable error during mongos update and restart. %s", e)
return OperationResult.RETRY_RELEASE
except NonDeferrableFailedHookChecksError as e:
logger.info("Non deferrable error during mongos update and restart. %s", e)
return OperationResult.RELEASE
except (WaitingForSecretsError, MissingCredentialsError) as e:
logger.info("Skipping mongos update and restart: %s", e)
self.state.statuses.add(
MongosStatuses.WAITING_FOR_SECRETS.value,
scope="unit",
component=self.charm.name,
)
return OperationResult.RELEASE

def handle_secret_changed(self, secret_label: str | None) -> None:
"""If the certificates are rotated for example, handle it immediately.

Expand All @@ -373,7 +402,19 @@ def handle_secret_changed(self, secret_label: str | None) -> None:
return

# This will take care of updating everything that needs updating
self.update_mongos_and_restart()
self.async_update_mongos_and_restart()

def async_update_mongos_and_restart(self):
"""Async update mongos and restart."""
self.state.statuses.add(
MongosStatuses.WAITING_FOR_MONGOS_START.value,
scope="unit",
component=self.dependent.name,
)
self.dependent.rollingops_manager.request_async_lock(
callback_id="update_mongos_and_restart_callback", max_retry=2
)
logger.info("Requested and async lock to update Mongos and restart.")

def remove_users_and_cleanup_mongo(self, relation: Relation) -> None:
"""Proceeds on relation broken."""
Expand Down
9 changes: 7 additions & 2 deletions single_kernel_mongo/managers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,16 @@ def configure_and_restart(self, force: bool = False) -> None:
"""Re-configure if needed and restart the service if needed."""
current_config_file = "\n".join(self.workload.read(self.file))
current_config_file_content = safe_load(current_config_file)

new_content = self.build_config()

if force or not self.workload.active() or new_content != current_config_file_content:
config_changed = new_content != current_config_file_content
should_restart = force or not self.workload.active() or config_changed

if config_changed:
logger.info("Workload config changed. Writing the new config.")
self.workload.write(self.file, safe_dump(new_content))
if should_restart:
logger.info("Workload should restart now.")
self.workload.restart()


Expand Down
6 changes: 3 additions & 3 deletions single_kernel_mongo/managers/ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def restart_when_ready(self) -> None:
case LdapState.ACTIVE:
self.share_hash_with_mongos()
logger.info("Restarting mongodb server for LDAP integration")
self.dependent.restart_charm_services()
self.dependent.rolling_restart_charm_services(force=False)
self.state.statuses.set(
LdapStatuses.ACTIVE_IDLE.value, scope="unit", component=self.name
)
Expand All @@ -149,7 +149,7 @@ def clean_ldap_credentials_and_uri(self) -> None:
self.remove_hash_from_mongos()

if self.state.db_initialised: # Don't restart if we haven't initialised the DB yet.
self.dependent.restart_charm_services()
self.dependent.rolling_restart_charm_services(force=False)

self.state.statuses.clear(scope="unit", component=self.name)
statuses = self.get_statuses(scope="unit", recompute=True)
Expand Down Expand Up @@ -202,7 +202,7 @@ def remove_ldap_certificates(self) -> None:
local_cert_file.unlink()

if self.state.db_initialised: # Don't restart if we haven't initialised the DB yet.
self.dependent.restart_charm_services()
self.dependent.rolling_restart_charm_services(force=False)

statuses = self.get_statuses(scope="unit", recompute=True)
self.state.statuses.clear(scope="unit", component=self.name)
Expand Down
Loading
Loading