-
Notifications
You must be signed in to change notification settings - Fork 1
[DPE-9752] Add topology observer #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 9/edge
Are you sure you want to change the base?
Changes from all commits
c042adf
572e889
48ba7d2
f7816b0
8d3cc01
3c91b2b
6c46244
c7d98ff
fd44750
06d1bc6
5460bf2
49f748b
4c93025
3378430
7a85afb
887b6ce
438490f
1474d53
29fadf7
613ef61
36a4ba3
ba70e90
88b7ae0
55af2d0
0c5040e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -250,6 +250,14 @@ def _on_unit_fully_started(self, event: UnitFullyStarted) -> None: | |
| self.charm.unit.open_port("tcp", CLIENT_PORT) | ||
| self.charm.unit.open_port("tcp", TLS_PORT) | ||
|
|
||
| if not self.charm.unit.is_leader(): | ||
| return | ||
|
|
||
| try: | ||
| self.charm.topology_manager.start_observer() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why isn't this run at the beginning of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessarily on the health of the server, rather on the starting procedure to be completed on the leader unit. Two things to consider here:
|
||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to start topology observer: %s", e) | ||
|
|
||
| def _on_peer_relation_changed(self, _: ops.RelationChangedEvent) -> None: | ||
| """Handle event received by all units when a unit's relation data changes.""" | ||
| try: | ||
|
|
@@ -264,6 +272,15 @@ def _on_peer_relation_changed(self, _: ops.RelationChangedEvent) -> None: | |
| for lock in [StartLock(self.charm.state), RestartLock(self.charm.state)]: | ||
| lock.process() | ||
|
|
||
| if not self.charm.state.unit_server.is_active: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar question here, the health of the unit's service should not have an impact on the topology observer process start, or am I missing something?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the health doesn't impact the observer, only the starting procedure to be completed (and the unit not being in the process of being removed).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For context: The |
||
| return | ||
|
|
||
| # need to pick up scaling operations, TLS switchover, CA rotation and so on | ||
| try: | ||
| self.charm.topology_manager.restart_observer() | ||
|
reneradoi marked this conversation as resolved.
|
||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to restart topology observer: %s", e) | ||
|
|
||
| def _on_peer_relation_departed(self, _: ops.RelationDepartedEvent) -> None: | ||
| """Handle event received by all units when a unit departs.""" | ||
| try: | ||
|
|
@@ -272,10 +289,27 @@ def _on_peer_relation_departed(self, _: ops.RelationDepartedEvent) -> None: | |
| logger.error(f"Failed to update sentinel quorum: {e}") | ||
| # not critical to defer here, we can wait for the next relation change | ||
|
|
||
| if not self.charm.unit.is_leader() or not self.charm.state.unit_server.is_active: | ||
| return | ||
|
|
||
| try: | ||
| self.charm.topology_manager.restart_observer() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we restart in this hook?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a unit leaves, the connection parameters (endpoints) of the observer should be updated to not connect to this unit anymore. |
||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to restart topology observer: %s", e) | ||
|
|
||
| def _on_update_status(self, event: ops.UpdateStatusEvent) -> None: | ||
| """Handle the update-status event.""" | ||
| if not self.charm.state.unit_server.is_started: | ||
| logger.warning("Service not started") | ||
| return | ||
|
|
||
| if not self.charm.unit.is_leader(): | ||
| return | ||
|
|
||
| try: | ||
| self.charm.topology_manager.start_observer() | ||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to start topology observer: %s", e) | ||
|
|
||
| def _on_leader_elected(self, event: ops.LeaderElectedEvent) -> None: | ||
| """Handle the leader-elected event.""" | ||
|
|
@@ -294,6 +328,12 @@ def _on_leader_elected(self, event: ops.LeaderElectedEvent) -> None: | |
| if not self.charm.unit.is_leader(): | ||
| return | ||
|
|
||
| if self.charm.state.unit_server.is_active: | ||
| try: | ||
| self.charm.topology_manager.start_observer() | ||
|
reneradoi marked this conversation as resolved.
|
||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to start topology observer: %s", e) | ||
|
|
||
| if self.charm.state.cluster.internal_users_credentials: | ||
| logger.debug("Internal user credentials already set") | ||
| return | ||
|
|
@@ -367,6 +407,12 @@ def _on_config_changed(self, event: ops.ConfigChangedEvent) -> None: | |
| event.defer() | ||
| return | ||
|
|
||
| # propagate updated credentials to topology observer | ||
| try: | ||
| self.charm.topology_manager.restart_observer() | ||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to restart topology observer: %s", e) | ||
|
|
||
| def _on_secret_changed(self, event: ops.SecretChangedEvent) -> None: | ||
| """Handle the secret_changed event.""" | ||
| if not (admin_secret_id := self.charm.config.get(INTERNAL_USERS_PASSWORD_CONFIG)): | ||
|
|
@@ -384,15 +430,25 @@ def _on_secret_changed(self, event: ops.SecretChangedEvent) -> None: | |
| ): | ||
| event.defer() | ||
| return | ||
|
|
||
| # propagate updated credentials to topology observer | ||
| try: | ||
| self.charm.topology_manager.restart_observer() | ||
| except (ValkeyWorkloadCommandError, ValueError) as e: | ||
| logger.error("Failed to restart topology observer: %s", e) | ||
|
|
||
| return | ||
|
|
||
| # from here, code is only relevant for non-leader units | ||
| if event.secret.label and event.secret.label.endswith(INTERNAL_USERS_SECRET_LABEL_SUFFIX): | ||
| # leader unit processed the secret change from user, non-leader units can replicate | ||
| try: | ||
| self.charm.config_manager.set_acl_file() | ||
| self.charm.config_manager.set_sentinel_acl_file() | ||
| if self.charm.state.unit_server.is_started: | ||
| self.charm.cluster_manager.reload_acl_file() | ||
| # todo: request rolling restart | ||
| self.charm.sentinel_manager.restart_service() | ||
| # update the local unit admin password to match the leader | ||
| self.charm.config_manager.update_local_valkey_admin_password() | ||
| if self.charm.state.unit_server.is_started: | ||
|
|
@@ -454,8 +510,11 @@ def _update_internal_users_password(self, secret_id: str) -> None: | |
| logger.info("Password(s) for internal users have changed") | ||
| try: | ||
| self.charm.config_manager.set_acl_file(passwords=new_passwords) | ||
| self.charm.config_manager.set_sentinel_acl_file(passwords=new_passwords) | ||
| if self.charm.state.unit_server.is_started: | ||
| self.charm.cluster_manager.reload_acl_file() | ||
| # todo: request rolling restart | ||
| self.charm.sentinel_manager.restart_service() | ||
| self.charm.state.cluster.update( | ||
| { | ||
| f"{user.value.replace('-', '_')}_password": new_passwords[user.value] | ||
|
|
@@ -549,6 +608,9 @@ def _on_storage_detaching(self, event: ops.StorageDetachingEvent) -> None: | |
| primary_ip, | ||
| ) | ||
|
|
||
| if self.charm.unit.is_leader(): | ||
| self.charm.topology_manager.stop_observer() | ||
|
|
||
| # stop valkey and sentinel processes | ||
| self.charm.workload.stop() | ||
| active_sentinels = [ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.