From c042adf16a369714ce489bc9194d2d6d99031b77 Mon Sep 17 00:00:00 2001 From: reneradoi Date: Thu, 16 Apr 2026 17:28:22 +0200 Subject: [PATCH 1/6] add K8s client class, create and update services and pod labels --- poetry.lock | 144 +++++++++++++++++++++++++++++++-- pyproject.toml | 1 + src/common/exceptions.py | 4 + src/common/k8s_client.py | 102 +++++++++++++++++++++++ src/events/external_clients.py | 24 ++++-- src/literals.py | 8 ++ src/managers/sentinel.py | 39 +++++++++ tests/unit/conftest.py | 5 ++ 8 files changed, 315 insertions(+), 12 deletions(-) create mode 100644 src/common/k8s_client.py diff --git a/poetry.lock b/poetry.lock index 2f17dc38..8b2d8c36 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "allure-pytest" @@ -66,7 +66,7 @@ version = "4.13.0" description = "High-level concurrency and networking framework on top of asyncio or Trio" optional = false python-versions = ">=3.10" -groups = ["integration"] +groups = ["main", "integration"] files = [ {file = "anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708"}, {file = "anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc"}, @@ -97,7 +97,7 @@ version = "2026.2.25" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" -groups = ["integration"] +groups = ["main", "integration"] files = [ {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, @@ -674,13 +674,113 @@ files = [ {file = "durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba"}, ] +[[package]] +name = "h11" +version = "0.16.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, + {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, +] + +[[package]] +name = "h2" +version = "4.3.0" +description = "Pure-Python HTTP/2 protocol implementation" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd"}, + {file = "h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1"}, +] + +[package.dependencies] +hpack = ">=4.1,<5" +hyperframe = ">=6.1,<7" + +[[package]] +name = "hpack" +version = "4.1.0" +description = "Pure-Python HPACK header encoding" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"}, + {file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"}, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, + {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.16" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.28.1" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, + {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} +httpcore = "==1.*" +idna = "*" + +[package.extras] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "hyperframe" +version = "6.1.0" +description = "Pure-Python HTTP/2 framing" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"}, + {file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"}, +] + [[package]] name = "idna" version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.8" -groups = ["integration"] +groups = ["main", "integration"] files = [ {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, @@ -753,7 +853,7 @@ files = [ ] [package.dependencies] -certifi = ">=14.05.14" +certifi = ">=14.5.14" durationpy = ">=0.7" python-dateutil = ">=2.5.3" pyyaml = ">=5.4.1" @@ -767,6 +867,38 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" adal = ["adal (>=1.0.2)"] google-auth = ["google-auth (>=1.0.1)"] +[[package]] +name = "lightkube" +version = "0.19.1" +description = "Lightweight kubernetes client library" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "lightkube-0.19.1-py3-none-any.whl", hash = "sha256:49fef08a1c7aa42082820111ffd5dbbaf78f54c99385810690fc9d94eef5c80d"}, + {file = "lightkube-0.19.1.tar.gz", hash = "sha256:4c8526068024c194c02fbc0ca6021922feb4b1b9d741d330129f873b27e0fe97"}, +] + +[package.dependencies] +httpx = {version = ">=0.28.1,<1.0.0", extras = ["http2"]} +lightkube-models = ">=1.15.12.0" +pyyaml = "*" + +[package.extras] +jinja-templates = ["jinja2"] + +[[package]] +name = "lightkube-models" +version = "1.35.0.8" +description = "Models and Resources for lightkube module" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "lightkube_models-1.35.0.8-py3-none-any.whl", hash = "sha256:d01fce42f96baf47a77a571bff59d6a513e96ae043fc03cfaaaaf79c609c4441"}, + {file = "lightkube_models-1.35.0.8.tar.gz", hash = "sha256:dbc624596a7d94e6c43c5deda972be964202e0e8f26e2ab8e61d589d710b5e22"}, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1541,4 +1673,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "1bf4d9f6216b7c7cd55932889297619563057999e263c02090a73c7e903ccc6d" +content-hash = "54a93e43d0c1bbb152b9f6f3ac3fdc4d75bd5916d2a77624e9b8ced5f7222495" diff --git a/pyproject.toml b/pyproject.toml index 7ffa319c..1ff49048 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ tenacity = "*" data-platform-helpers = ">=0.1.7" validators = ">=0.35.0" dpcharmlibs-interfaces = ">=1.0.2" +lightkube = ">=0.19.0" [tool.poetry.requires-plugins] poetry-plugin-export = ">=1.8" diff --git a/src/common/exceptions.py b/src/common/exceptions.py index 26ced3d0..e3e19222 100644 --- a/src/common/exceptions.py +++ b/src/common/exceptions.py @@ -66,3 +66,7 @@ class RequestingLockTimedOutError(Exception): class ValkeyCertificatesNotReadyError(Exception): """Custom Exception if not all units have stored the TLS certificates.""" + + +class KubernetesClientError(Exception): + """Custom Exception if a connection to the Kubernetes Cluster API fails.""" diff --git a/src/common/k8s_client.py b/src/common/k8s_client.py new file mode 100644 index 00000000..e206503f --- /dev/null +++ b/src/common/k8s_client.py @@ -0,0 +1,102 @@ +# Copyright 2026 Canonical Ltd. +# See LICENSE file for licensing details. + +"""K8sClient utility class to connect to the Kubernetes API server.""" + +import logging + +from lightkube.core.client import Client +from lightkube.core.exceptions import ApiError +from lightkube.models.core_v1 import ServicePort, ServiceSpec +from lightkube.models.meta_v1 import ObjectMeta +from lightkube.resources.core_v1 import Pod, Service + +from common.exceptions import KubernetesClientError + +logger = logging.getLogger(__name__) + + +class K8sClient: + """Expose Kubernetes API commands to the charm.""" + + def __init__(self, namespace: str, app_name: str): + self.namespace = namespace + self.app_name = app_name + self.client = Client() + + def ensure_endpoint_service(self, role: str, port: int) -> None: + """Create or update a K8s service. + + Args: + role(str): name of the role to create the service for, e.g "primary" or "replicas" + port(int): the port number to set for the service + """ + service_name = f"{self.app_name}-{role}" + service_port = ServicePort(port=port, targetPort=port) + + try: + service = self.client.get(res=Service, name=service_name, namespace=self.namespace) + if service.spec.ports != [service_port]: + service.spec.ports = [service_port] + self.client.patch(Service, service_name, service) + logger.info("Updated Kubernetes service %s to port %s", service_name, port) + return + except ApiError as e: + # 404 will be raised if service does not exist yet + if e.status.code != 404: + raise KubernetesClientError from e + + pod0 = self.client.get( + res=Pod, + name=self.app_name + "-0", + namespace=self.namespace, + ) + + service = Service( + apiVersion="v1", + kind="Service", + metadata=ObjectMeta( + namespace=self.namespace, + name=service_name, + ownerReferences=pod0.metadata.ownerReferences, + ), + spec=ServiceSpec( + selector={"application-name": self.app_name, "role": role}, + ports=[service_port], + type="ClusterIP", + ), + ) + + try: + self.client.create(service) + logger.info("Created Kubernetes service %s for port %s", service_name, port) + except ApiError as e: + logger.error("Kubernetes service creation failed: %s", e) + raise KubernetesClientError from e + + def update_pod_label(self, pod_name: str, role: str) -> None: + """Create or update a label for a pod. + + Args: + pod_name(str): the name of the pod + role(str): name of the role to create the label for, e.g "primary" or "replicas" + """ + try: + pod = self.client.get(Pod, pod_name, namespace=self.namespace) + except ApiError as e: + raise KubernetesClientError from e + + if not pod.metadata.labels: + pod.metadata.labels = {} + + if pod.metadata.labels.get("role") == role: + return + + logger.info("Updating pod %s to role %s", pod_name, role) + pod.metadata.labels["application-name"] = self.app_name + pod.metadata.labels["role"] = role + try: + self.client.patch(Pod, pod_name, pod) + except ApiError as e: + logger.error("Failed to update Kubernetes pod labels: %s", e) + raise KubernetesClientError from e diff --git a/src/events/external_clients.py b/src/events/external_clients.py index c23bd2b5..4e80b8b4 100644 --- a/src/events/external_clients.py +++ b/src/events/external_clients.py @@ -23,6 +23,7 @@ ) from common.exceptions import ( + KubernetesClientError, ValkeyACLLoadError, ValkeyCannotGetPrimaryIPError, ValkeyServicesFailedToStartError, @@ -178,15 +179,26 @@ def _on_bulk_resources_requested( self.charm.state.cluster.update({"client_user_epoch": time.time()}) def _on_peer_relation_changed(self, event: ops.RelationChangedEvent) -> None: - """Handle peer relation changes in regard to external client relations.""" - if ( - not self.charm.state.unit_server.is_started - or not self.charm.state.external_client_relations - ): + """Handle peer relation changes in regard to external client relations. + + This handler catches all changes from scaling operations, TLS switchover, TLS CA rotation, + IP changes, etc. + """ + if not self.charm.state.unit_server.is_started: + return + + if self.charm.unit.is_leader(): + try: + self.charm.sentinel_manager.reconcile_k8s_services() + except (KubernetesClientError, ValkeyCannotGetPrimaryIPError) as e: + logger.error("Error updating Kubernetes services: %s", e) + event.defer() + return + + if not self.charm.state.external_client_relations: return if self.charm.unit.is_leader(): - # this catches all changes from scaling operations, TLS switchover, IP changes, etc. try: self._update_client_relations() except (ValkeyCannotGetPrimaryIPError, ValkeyWorkloadCommandError) as e: diff --git a/src/literals.py b/src/literals.py index abb04f45..9168e365 100644 --- a/src/literals.py +++ b/src/literals.py @@ -122,3 +122,11 @@ class TLSCARotationState(StrEnum): NEW_CA_DETECTED = "new-ca-detected" NEW_CA_ADDED = "new-ca-added" CA_UPDATED = "ca-updated" + + +class K8sService(StrEnum): + """Services managed by the charm in Kubernetes.""" + + PRIMARY = "primary" + REPLICAS = "replicas" + SENTINELS = "sentinels" diff --git a/src/managers/sentinel.py b/src/managers/sentinel.py index 00140b9d..7a235479 100644 --- a/src/managers/sentinel.py +++ b/src/managers/sentinel.py @@ -20,6 +20,7 @@ ValkeyCannotGetPrimaryIPError, ValkeyWorkloadCommandError, ) +from common.k8s_client import K8sClient from core.base_workload import WorkloadBase from core.cluster_state import ClusterState from literals import ( @@ -29,6 +30,8 @@ SENTINEL_TLS_PORT, TLS_PORT, CharmUsers, + K8sService, + Substrate, ) from statuses import CharmStatuses @@ -46,6 +49,12 @@ def __init__(self, state: ClusterState, workload: WorkloadBase): self.workload = workload self.admin_user = CharmUsers.SENTINEL_CHARM_ADMIN.value + if self.state.substrate == Substrate.K8S: + self.k8s_client = K8sClient( + namespace=self.state.model.name, + app_name=self.state.model.app.name, + ) + @property def admin_password(self) -> str: """Get the password of the admin user for the sentinel service.""" @@ -359,3 +368,33 @@ def set_quorum(self, quorum: int) -> None: """Set the quorum for the sentinel cluster.""" client = self._get_sentinel_client() client.set(self.state.endpoint, PRIMARY_NAME, "quorum", str(quorum)) + + def reconcile_k8s_services(self) -> None: + """Create or update the services and pod labels in Kubernetes.""" + if self.state.substrate == Substrate.VM: + return + + valkey_port = TLS_PORT if self.state.unit_server.is_tls_enabled else CLIENT_PORT + sentinel_port = ( + SENTINEL_TLS_PORT if self.state.unit_server.is_tls_enabled else SENTINEL_PORT + ) + + self.k8s_client.ensure_endpoint_service(role=K8sService.PRIMARY.value, port=valkey_port) + self.k8s_client.ensure_endpoint_service(role=K8sService.REPLICAS.value, port=valkey_port) + self.k8s_client.ensure_endpoint_service( + role=K8sService.SENTINELS.value, port=sentinel_port + ) + + primary_endpoint = self.get_primary_ip() + for unit in self.state.servers: + if not unit.is_active: + continue + + pod_name = unit.unit_name.replace("/", "-") + self.k8s_client.update_pod_label( + pod_name=pod_name, + role=K8sService.PRIMARY.value + if unit.get_endpoint(Substrate.K8S) == primary_endpoint + else K8sService.REPLICAS.value, + ) + self.k8s_client.update_pod_label(pod_name=pod_name, role=K8sService.SENTINELS.value) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 41a6a18c..515b6912 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -27,6 +27,11 @@ def mock_bind_address(mocker): ) +@pytest.fixture(autouse=True) +def mock_k8s_client(mocker): + mocker.patch("lightkube.core.client.GenericSyncClient") + + @pytest.fixture(autouse=True) def tenacity_wait(mocker): mocker.patch("tenacity.nap.time") From 572e889f914d8219cf2b085e31a85e27b98ec9c6 Mon Sep 17 00:00:00 2001 From: reneradoi Date: Fri, 17 Apr 2026 09:01:41 +0200 Subject: [PATCH 2/6] update rust to 1.94.0 to build `lightkube` --- charmcraft.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charmcraft.yaml b/charmcraft.yaml index 795caf07..c309e121 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -75,7 +75,7 @@ parts: # rpds-py (Python package) >=0.19.0 requires rustc >=1.76, which is not available in the # Ubuntu 22.04 archive. Install rustc and cargo using rustup instead of the Ubuntu archive rustup set profile minimal - rustup default 1.90.0 # renovate: charmcraft-rust-latest + rustup default 1.94.0 # renovate: charmcraft-rust-latest craftctl default # Include requirements.txt in *.charm artifact for easier debugging From 48ba7d20e9da0fd0387294f668a2b58a7e0f3dd1 Mon Sep 17 00:00:00 2001 From: reneradoi Date: Fri, 17 Apr 2026 11:06:33 +0200 Subject: [PATCH 3/6] publish service endpoints in client relation on K8s --- src/common/k8s_client.py | 3 ++- src/literals.py | 1 - src/managers/sentinel.py | 26 +++++++++++++++++--------- tests/unit/test_client_relation.py | 8 ++++---- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/common/k8s_client.py b/src/common/k8s_client.py index e206503f..3e0ad1d0 100644 --- a/src/common/k8s_client.py +++ b/src/common/k8s_client.py @@ -9,6 +9,7 @@ from lightkube.core.exceptions import ApiError from lightkube.models.core_v1 import ServicePort, ServiceSpec from lightkube.models.meta_v1 import ObjectMeta +from lightkube.types import PatchType from lightkube.resources.core_v1 import Pod, Service from common.exceptions import KubernetesClientError @@ -38,7 +39,7 @@ def ensure_endpoint_service(self, role: str, port: int) -> None: service = self.client.get(res=Service, name=service_name, namespace=self.namespace) if service.spec.ports != [service_port]: service.spec.ports = [service_port] - self.client.patch(Service, service_name, service) + self.client.patch(Service, service_name, service, patch_type=PatchType.MERGE) logger.info("Updated Kubernetes service %s to port %s", service_name, port) return except ApiError as e: diff --git a/src/literals.py b/src/literals.py index 9168e365..063b5b33 100644 --- a/src/literals.py +++ b/src/literals.py @@ -129,4 +129,3 @@ class K8sService(StrEnum): PRIMARY = "primary" REPLICAS = "replicas" - SENTINELS = "sentinels" diff --git a/src/managers/sentinel.py b/src/managers/sentinel.py index 7a235479..57116917 100644 --- a/src/managers/sentinel.py +++ b/src/managers/sentinel.py @@ -5,6 +5,7 @@ """Manager for all sentinel related tasks.""" import logging +import socket import tenacity from data_platform_helpers.advanced_statuses.models import StatusObject @@ -141,16 +142,30 @@ def get_primary_ip(self) -> str: def get_primary_endpoint(self) -> str: """Get the endpoint of the primary node, consisting of address and port.""" - primary_address = self.get_primary_ip() port = TLS_PORT if self.state.unit_server.is_tls_enabled else CLIENT_PORT + if self.state.substrate == Substrate.K8S: + # get the DNS name of the K8s service + primary_address = socket.getfqdn( + f"{self.state.model.app.name}-{K8sService.PRIMARY.value}" + ) + return f"{primary_address}:{port}" + + primary_address = self.get_primary_ip() return f"{primary_address}:{port}" def get_replica_endpoints(self) -> str: """Get the endpoints of all replica nodes, consisting of address and port.""" port = TLS_PORT if self.state.unit_server.is_tls_enabled else CLIENT_PORT - client = self._get_sentinel_client() + if self.state.substrate == Substrate.K8S: + # get the DNS name of the K8s service + replicas_address = socket.getfqdn( + f"{self.state.model.app.name}-{K8sService.REPLICAS.value}" + ) + return f"{replicas_address}:{port}" + + client = self._get_sentinel_client() replica_list = client.replicas_primary(hostname=self.state.endpoint) return ",".join(sorted([f"{replica['ip']}:{port}" for replica in replica_list])) @@ -375,15 +390,9 @@ def reconcile_k8s_services(self) -> None: return valkey_port = TLS_PORT if self.state.unit_server.is_tls_enabled else CLIENT_PORT - sentinel_port = ( - SENTINEL_TLS_PORT if self.state.unit_server.is_tls_enabled else SENTINEL_PORT - ) self.k8s_client.ensure_endpoint_service(role=K8sService.PRIMARY.value, port=valkey_port) self.k8s_client.ensure_endpoint_service(role=K8sService.REPLICAS.value, port=valkey_port) - self.k8s_client.ensure_endpoint_service( - role=K8sService.SENTINELS.value, port=sentinel_port - ) primary_endpoint = self.get_primary_ip() for unit in self.state.servers: @@ -397,4 +406,3 @@ def reconcile_k8s_services(self) -> None: if unit.get_endpoint(Substrate.K8S) == primary_endpoint else K8sService.REPLICAS.value, ) - self.k8s_client.update_pod_label(pod_name=pod_name, role=K8sService.SENTINELS.value) diff --git a/tests/unit/test_client_relation.py b/tests/unit/test_client_relation.py index ebf8cff7..d2310fe0 100644 --- a/tests/unit/test_client_relation.py +++ b/tests/unit/test_client_relation.py @@ -93,8 +93,8 @@ def test_add_new_client_user(cloud_spec): assert response["resource"] == key_prefix assert response["request-id"] == request_id assert response["salt"] == salt - assert response["endpoints"] == f"{primary_endpoint}:{CLIENT_PORT}" - assert response["read-only-endpoints"] == f"{replica_endpoint}:{CLIENT_PORT}" + assert response["endpoints"] == f"valkey-primary:{CLIENT_PORT}" + assert response["read-only-endpoints"] == f"valkey-replicas:{CLIENT_PORT}" assert response["sentinel-endpoints"] == f"{primary_endpoint}:{SENTINEL_PORT}" assert response["version"] == valkey_version assert response["mode"] == "sentinel" @@ -161,8 +161,8 @@ def test_add_new_client_user_v0(cloud_spec): secret_tls = state_out.get_secret(id=secret_tls_id) assert response["database"] == key_prefix - assert response["endpoints"] == f"{primary_endpoint}:{CLIENT_PORT}" - assert response["read-only-endpoints"] == f"{replica_endpoint}:{CLIENT_PORT}" + assert response["endpoints"] == f"valkey-primary:{CLIENT_PORT}" + assert response["read-only-endpoints"] == f"valkey-replicas:{CLIENT_PORT}" assert response["sentinel_endpoints"] == f"{primary_endpoint}:{SENTINEL_PORT}" assert response["version"] == valkey_version assert response["mode"] == "sentinel" From f7816b0c6258b5fc4ca01bfd39e3fdc98ddf926a Mon Sep 17 00:00:00 2001 From: reneradoi Date: Fri, 17 Apr 2026 12:37:25 +0200 Subject: [PATCH 4/6] add K8s services to SANs DNS in TLS certificate requests --- src/common/k8s_client.py | 2 +- src/managers/sentinel.py | 9 ++------- src/managers/tls.py | 14 ++++++++++++-- tests/unit/test_tls.py | 9 ++++++++- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/common/k8s_client.py b/src/common/k8s_client.py index 3e0ad1d0..d4858e2d 100644 --- a/src/common/k8s_client.py +++ b/src/common/k8s_client.py @@ -9,8 +9,8 @@ from lightkube.core.exceptions import ApiError from lightkube.models.core_v1 import ServicePort, ServiceSpec from lightkube.models.meta_v1 import ObjectMeta -from lightkube.types import PatchType from lightkube.resources.core_v1 import Pod, Service +from lightkube.types import PatchType from common.exceptions import KubernetesClientError diff --git a/src/managers/sentinel.py b/src/managers/sentinel.py index 57116917..431b7f05 100644 --- a/src/managers/sentinel.py +++ b/src/managers/sentinel.py @@ -5,7 +5,6 @@ """Manager for all sentinel related tasks.""" import logging -import socket import tenacity from data_platform_helpers.advanced_statuses.models import StatusObject @@ -146,9 +145,7 @@ def get_primary_endpoint(self) -> str: if self.state.substrate == Substrate.K8S: # get the DNS name of the K8s service - primary_address = socket.getfqdn( - f"{self.state.model.app.name}-{K8sService.PRIMARY.value}" - ) + primary_address = f"{self.state.model.app.name}-{K8sService.PRIMARY.value}" return f"{primary_address}:{port}" primary_address = self.get_primary_ip() @@ -160,9 +157,7 @@ def get_replica_endpoints(self) -> str: if self.state.substrate == Substrate.K8S: # get the DNS name of the K8s service - replicas_address = socket.getfqdn( - f"{self.state.model.app.name}-{K8sService.REPLICAS.value}" - ) + replicas_address = f"{self.state.model.app.name}-{K8sService.REPLICAS.value}" return f"{replicas_address}:{port}" client = self._get_sentinel_client() diff --git a/src/managers/tls.py b/src/managers/tls.py index 59b0512e..51202424 100644 --- a/src/managers/tls.py +++ b/src/managers/tls.py @@ -27,7 +27,13 @@ from common.exceptions import ValkeyWorkloadCommandError from core.base_workload import WorkloadBase from core.cluster_state import ClusterState -from literals import TLS_CLIENT_PRIVATE_KEY_CONFIG, TLSCARotationState, TLSState +from literals import ( + TLS_CLIENT_PRIVATE_KEY_CONFIG, + K8sService, + Substrate, + TLSCARotationState, + TLSState, +) from statuses import CharmStatuses, TLSStatuses logger = logging.getLogger(__name__) @@ -107,7 +113,7 @@ def build_sans_ip(self) -> frozenset[str]: extra_sans = [san.strip() for san in extra_sans_config.split(",")] sans_ip = {san for san in extra_sans if self._is_ip_address(san)} - if self.state.substrate == "k8s": + if self.state.substrate == Substrate.K8S: return frozenset(sans_ip) sans_ip.add(self.state.bind_address) @@ -141,6 +147,10 @@ def build_sans_dns(self) -> frozenset[str]: sans_dns.add(self.state.unit_server.unit_name.replace("/", "")) sans_dns.add(self.state.hostname) + if self.state.substrate == Substrate.K8S: + sans_dns.add(f"{self.state.model.app.name}-{K8sService.PRIMARY.value}") + sans_dns.add(f"{self.state.model.app.name}-{K8sService.REPLICAS.value}") + return frozenset(sans_dns) def read_and_validate_private_key(self, private_key_secret_id: str) -> PrivateKey | None: diff --git a/tests/unit/test_tls.py b/tests/unit/test_tls.py index 8fa8aefd..8314d641 100644 --- a/tests/unit/test_tls.py +++ b/tests/unit/test_tls.py @@ -1117,6 +1117,7 @@ def test_set_extra_sans_config_option(cloud_spec): current_sans_value = ( "X509v3 Subject Alternative Name: \n " "DNS:valkey0, DNS:valkey-0.valkey-endpoints, " + "DNS:valkey-primary, DNS:valkey-replicas, " "IP Address:127.1.1.1, IP Address:192.0.2.0" ) with ( @@ -1151,6 +1152,7 @@ def test_set_extra_sans_config_option_unit_placeholder(cloud_spec): current_sans_value = ( "X509v3 Subject Alternative Name: \n " "DNS:myhostname, DNS:valkey0, DNS:valkey-0.valkey-endpoints, " + "DNS:valkey-primary, DNS:valkey-replicas, " "IP Address:127.1.1.1, IP Address:192.168.1.100, IP Address:192.0.2.0" ) with ( @@ -1258,7 +1260,12 @@ def test_set_extra_sans_config_option_no_update(cloud_spec): model=testing.Model(name="my-vm-model", type="lxd", cloud_spec=cloud_spec), ) - current_sans_value = "X509v3 Subject Alternative Name: \n DNS:myhostname, DNS:valkey0, DNS:valkey-0.valkey-endpoints, IP Address:192.168.1.100" + current_sans_value = ( + "X509v3 Subject Alternative Name: \n " + "DNS:myhostname, DNS:valkey0, DNS:valkey-0.valkey-endpoints, " + "DNS:valkey-primary, DNS:valkey-replicas, " + "IP Address:192.168.1.100" + ) with ( patch("workload_k8s.ValkeyK8sWorkload.exec", return_value=[current_sans_value]), patch("workload_k8s.ValkeyK8sWorkload.exec", return_value=[current_sans_value]), From 1474d534fd3dcfb8e27c7f8768dd3f5cb6269846 Mon Sep 17 00:00:00 2001 From: reneradoi Date: Thu, 23 Apr 2026 08:36:04 +0200 Subject: [PATCH 5/6] additional error handling --- src/common/k8s_client.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/common/k8s_client.py b/src/common/k8s_client.py index d4858e2d..a7eb3e02 100644 --- a/src/common/k8s_client.py +++ b/src/common/k8s_client.py @@ -47,11 +47,14 @@ def ensure_endpoint_service(self, role: str, port: int) -> None: if e.status.code != 404: raise KubernetesClientError from e - pod0 = self.client.get( - res=Pod, - name=self.app_name + "-0", - namespace=self.namespace, - ) + try: + pod0 = self.client.get( + res=Pod, + name=self.app_name + "-0", + namespace=self.namespace, + ) + except ApiError as e: + raise KubernetesClientError from e service = Service( apiVersion="v1", From 36a4ba32356f68f47f2dc49f3b44f3c0e416fbc2 Mon Sep 17 00:00:00 2001 From: reneradoi Date: Thu, 23 Apr 2026 12:20:29 +0200 Subject: [PATCH 6/6] move substrate check to event handler, instantiate `k8s_client` as `None` in VM --- src/events/external_clients.py | 9 +++++++-- src/managers/sentinel.py | 4 +--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/events/external_clients.py b/src/events/external_clients.py index 4e80b8b4..453fd44e 100644 --- a/src/events/external_clients.py +++ b/src/events/external_clients.py @@ -30,7 +30,12 @@ ValkeyTLSLoadError, ValkeyWorkloadCommandError, ) -from literals import CERTIFICATE_TRANSFER_RELATION, EXTERNAL_CLIENTS_RELATION, PEER_RELATION +from literals import ( + CERTIFICATE_TRANSFER_RELATION, + EXTERNAL_CLIENTS_RELATION, + PEER_RELATION, + Substrate, +) if TYPE_CHECKING: from charm import ValkeyCharm @@ -187,7 +192,7 @@ def _on_peer_relation_changed(self, event: ops.RelationChangedEvent) -> None: if not self.charm.state.unit_server.is_started: return - if self.charm.unit.is_leader(): + if self.charm.unit.is_leader() and self.charm.state.substrate == Substrate.K8S: try: self.charm.sentinel_manager.reconcile_k8s_services() except (KubernetesClientError, ValkeyCannotGetPrimaryIPError) as e: diff --git a/src/managers/sentinel.py b/src/managers/sentinel.py index 431b7f05..ff404126 100644 --- a/src/managers/sentinel.py +++ b/src/managers/sentinel.py @@ -49,6 +49,7 @@ def __init__(self, state: ClusterState, workload: WorkloadBase): self.workload = workload self.admin_user = CharmUsers.SENTINEL_CHARM_ADMIN.value + self.k8s_client: K8sClient | None = None if self.state.substrate == Substrate.K8S: self.k8s_client = K8sClient( namespace=self.state.model.name, @@ -381,9 +382,6 @@ def set_quorum(self, quorum: int) -> None: def reconcile_k8s_services(self) -> None: """Create or update the services and pod labels in Kubernetes.""" - if self.state.substrate == Substrate.VM: - return - valkey_port = TLS_PORT if self.state.unit_server.is_tls_enabled else CLIENT_PORT self.k8s_client.ensure_endpoint_service(role=K8sService.PRIMARY.value, port=valkey_port)