From 53520112252917dd3b50b6e17bf2d6ec9780c24c Mon Sep 17 00:00:00 2001 From: xeniape Date: Wed, 15 Oct 2025 13:51:14 +0200 Subject: [PATCH 01/10] chore: ensure metrics are correctly exposed --- .../hbase/pages/usage-guide/monitoring.adoc | 19 +++- rust/operator-binary/src/crd/mod.rs | 30 +++++ rust/operator-binary/src/hbase_controller.rs | 104 ++++++++++++++++-- 3 files changed, 140 insertions(+), 13 deletions(-) diff --git a/docs/modules/hbase/pages/usage-guide/monitoring.adoc b/docs/modules/hbase/pages/usage-guide/monitoring.adoc index 2c9b7570..8391ec70 100644 --- a/docs/modules/hbase/pages/usage-guide/monitoring.adoc +++ b/docs/modules/hbase/pages/usage-guide/monitoring.adoc @@ -6,5 +6,20 @@ See xref:operators:monitoring.adoc[] for more details. Starting with HBase 2.6 the URL for Prometheus metrics has changed. This is because HBase offers now a built-in endpoint for this purpose. -This endpoint is available from the UI service. -For example, in the case of the master service, the URL is `http://:16010/prometheus`. +This endpoint is available from the `metrics` Services. +For example, in the case of the master Service, the URL is `http://-master--metrics:16010/prometheus`. + +== Authentication when using TLS + +HBase exposes metrics through the same port as their web UI. Hence, when configuring HBase with TLS the metrics are also secured by TLS, +and the clients scraping the metrics endpoint need to authenticate against it. This could for example be accomplished by utilizing mTLS +between Kubernetes Pods with the xref:home:secret-operator:index.adoc[Secret Operator]. + +When using Prometheus `ServiceMonitor` for scraping, the `address` label needs relabeling to use the `headless` Service instead of the +`metrics` Service. This is because per default Prometheus targets the Pod IPs as endpoints, but since the Pod IPs are not +part of the certificate, the authentication will fail. Instead, the FQDN of the Pods, which can be added to the certificate, is used, but +this FQDN is only available through the `headless` Service. + +A more detailed explanation can be found in the xref:home:nifi:usage_guide/monitoring.adoc[NiFi Operator Monitoring Docs] with a similar situation +and an example of a Prometheus `ServiceMonitor` configured for TLS in the +https://github.com/stackabletech/demos/blob/main/stacks/monitoring/prometheus-service-monitors.yaml[Monitoring Stack{external-link-icon}^]. diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 4a67187a..69d91b44 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -69,16 +69,20 @@ pub const HBASE_UI_PORT_NAME_HTTP: &str = "ui-http"; pub const HBASE_UI_PORT_NAME_HTTPS: &str = "ui-https"; pub const HBASE_REST_PORT_NAME_HTTP: &str = "rest-http"; pub const HBASE_REST_PORT_NAME_HTTPS: &str = "rest-https"; +pub const HBASE_METRICS_PORT_NAME: &str = "metrics"; pub const HBASE_MASTER_PORT: u16 = 16000; // HBase always uses 16010, regardless of http or https. On 2024-01-17 we decided in Arch-meeting that we want to stick // the port numbers to what the product is doing, so we get the least surprise for users - even when this means we have // inconsistency between Stackable products. pub const HBASE_MASTER_UI_PORT: u16 = 16010; +pub const HBASE_MASTER_METRICS_PORT: u16 = 16010; pub const HBASE_REGIONSERVER_PORT: u16 = 16020; pub const HBASE_REGIONSERVER_UI_PORT: u16 = 16030; +pub const HBASE_REGIONSERVER_METRICS_PORT: u16 = 16030; pub const HBASE_REST_PORT: u16 = 8080; pub const HBASE_REST_UI_PORT: u16 = 8085; +pub const HBASE_REST_METRICS_PORT: u16 = 8085; pub const LISTENER_VOLUME_NAME: &str = "listener"; pub const LISTENER_VOLUME_DIR: &str = "/stackable/listener"; @@ -542,6 +546,24 @@ impl v1alpha1::HbaseCluster { } } + /// Returns required metrics port name and metrics port number tuples depending on the role. + /// The metrics are available over the UI port. + pub fn metrics_ports(&self, role: &HbaseRole) -> Vec<(String, u16)> { + match role { + HbaseRole::Master => vec![( + HBASE_METRICS_PORT_NAME.to_string(), + HBASE_MASTER_METRICS_PORT, + )], + HbaseRole::RegionServer => vec![( + HBASE_METRICS_PORT_NAME.to_string(), + HBASE_REGIONSERVER_METRICS_PORT, + )], + HbaseRole::RestServer => { + vec![(HBASE_METRICS_PORT_NAME.to_string(), HBASE_REST_METRICS_PORT)] + } + } + } + pub fn service_port(&self, role: &HbaseRole) -> u16 { match role { HbaseRole::Master => HBASE_MASTER_PORT, @@ -550,6 +572,14 @@ impl v1alpha1::HbaseCluster { } } + pub fn metrics_port(&self, role: &HbaseRole) -> u16 { + match role { + HbaseRole::Master => HBASE_MASTER_METRICS_PORT, + HbaseRole::RegionServer => HBASE_REGIONSERVER_METRICS_PORT, + HbaseRole::RestServer => HBASE_REST_METRICS_PORT, + } + } + /// Name of the port used by the Web UI, which depends on HTTPS usage pub fn ui_port_name(&self) -> String { if self.has_https_enabled() { diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index ea19a09a..5fa13469 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -45,7 +45,7 @@ use stackable_operator::{ core::{DeserializeGuard, error_boundary}, runtime::controller::Action, }, - kvp::{Label, LabelError, Labels, ObjectLabels}, + kvp::{Annotations, Label, LabelError, Labels, ObjectLabels}, logging::controller::ReconcilerError, memory::{BinaryMultiple, MemoryQuantity}, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, @@ -427,6 +427,14 @@ pub async fn reconcile_hbase( let rg_service = build_rolegroup_service(hbase, &hbase_role, &rolegroup, &resolved_product_image)?; + + let rg_metrics_service = build_rolegroup_metrics_service( + hbase, + &hbase_role, + &rolegroup, + &resolved_product_image, + )?; + let rg_configmap = build_rolegroup_config_map( hbase, &client.kubernetes_cluster_info, @@ -452,6 +460,12 @@ pub async fn reconcile_hbase( .with_context(|_| ApplyRoleGroupServiceSnafu { rolegroup: rolegroup.clone(), })?; + cluster_resources + .add(client, rg_metrics_service) + .await + .with_context(|_| ApplyRoleGroupServiceSnafu { + rolegroup: rolegroup.clone(), + })?; cluster_resources .add(client, rg_configmap) .await @@ -739,12 +753,9 @@ fn build_rolegroup_service( }) .collect(); - let prometheus_label = - Label::try_from(("prometheus.io/scrape", "true")).context(BuildLabelSnafu)?; - let metadata = ObjectMetaBuilder::new() .name_and_namespace(hbase) - .name(headless_service_name(&rolegroup.object_name())) + .name(rolegroup.rolegroup_headless_service_name()) .ownerreference_from_resource(hbase, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? .with_recommended_labels(build_recommended_labels( @@ -754,7 +765,6 @@ fn build_rolegroup_service( &rolegroup.role_group, )) .context(ObjectMetaSnafu)? - .with_label(prometheus_label) .build(); let service_selector = @@ -778,6 +788,82 @@ fn build_rolegroup_service( }) } +/// The rolegroup metrics [`Service`] is a service that exposes metrics and a prometheus scraping label. +pub fn build_rolegroup_metrics_service( + hbase: &v1alpha1::HbaseCluster, + hbase_role: &HbaseRole, + rolegroup: &RoleGroupRef, + resolved_product_image: &ResolvedProductImage, +) -> Result { + let ports = hbase + .metrics_ports(hbase_role) + .into_iter() + .map(|(name, value)| ServicePort { + name: Some(name), + port: i32::from(value), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }) + .collect(); + + let service_selector = + Labels::role_group_selector(hbase, APP_NAME, &rolegroup.role, &rolegroup.role_group) + .context(BuildLabelSnafu)?; + + Ok(Service { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(hbase) + .name(rolegroup.rolegroup_metrics_service_name()) + .ownerreference_from_resource(hbase, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + hbase, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(ObjectMetaSnafu)? + .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) + .with_annotations(prometheus_annotations(hbase, hbase_role)) + .build(), + spec: Some(ServiceSpec { + // Internal communication does not need to be exposed + type_: Some("ClusterIP".to_string()), + cluster_ip: Some("None".to_string()), + ports: Some(ports), + selector: Some(service_selector.into()), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }), + status: None, + }) +} + +/// Common annotations for Prometheus +/// +/// These annotations can be used in a ServiceMonitor. +/// +/// see also +fn prometheus_annotations(hbase: &v1alpha1::HbaseCluster, hbase_role: &HbaseRole) -> Annotations { + Annotations::try_from([ + ("prometheus.io/path".to_owned(), "/prometheus".to_owned()), + ( + "prometheus.io/port".to_owned(), + hbase.metrics_port(hbase_role).to_string(), + ), + ( + "prometheus.io/scheme".to_owned(), + if hbase.has_https_enabled() { + "https".to_owned() + } else { + "http".to_owned() + }, + ), + ("prometheus.io/scrape".to_owned(), "true".to_owned()), + ]) + .expect("should be valid annotations") +} + /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// /// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_rolegroup_service`]). @@ -1088,7 +1174,7 @@ fn build_rolegroup_statefulset( match_labels: Some(statefulset_match_labels.into()), ..LabelSelector::default() }, - service_name: Some(headless_service_name(&rolegroup_ref.object_name())), + service_name: Some(rolegroup_ref.rolegroup_headless_service_name()), template: pod_template, volume_claim_templates: listener_pvc, ..StatefulSetSpec::default() @@ -1198,10 +1284,6 @@ fn build_hbase_env_sh( Ok(result) } -fn headless_service_name(role_group_name: &str) -> String { - format!("{name}-headless", name = role_group_name) -} - #[cfg(test)] mod test { use rstest::rstest; From 8495dda4134b7063788b2de260949a523e4184c4 Mon Sep 17 00:00:00 2001 From: xeniape Date: Wed, 15 Oct 2025 14:15:45 +0200 Subject: [PATCH 02/10] add changelog entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a4a1636..510f5ae5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,14 +10,18 @@ - `EOS_CHECK_MODE` (`--eos-check-mode`) to set the EoS check mode. Currently, only "offline" is supported. - `EOS_INTERVAL` (`--eos-interval`) to set the interval in which the operator checks if it is EoS. - `EOS_DISABLED` (`--eos-disabled`) to disable the EoS checker completely. +- Add `metrics` Services ([#701]). ### Changed - Changed env-vars to be consistent with config-utils in the entrypoint script ([#700]). +- BREAKING: The `prometheus.io/scrape` label moved from the `headless` Service to the `metrics` Service, which + uses `metrics` as the port name instead of the previous `ui-http`/`ui-https` port name ([#701]). [#691]: https://github.com/stackabletech/hbase-operator/pull/691 [#697]: https://github.com/stackabletech/hbase-operator/pull/697 [#700]: https://github.com/stackabletech/hbase-operator/pull/700 +[#701]: https://github.com/stackabletech/hbase-operator/pull/701 ## [25.7.0] - 2025-07-23 From 3cce09063c1b90af817059e209ad38f32bc7999a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 27 Oct 2025 13:12:10 +0100 Subject: [PATCH 03/10] Update rust/operator-binary/src/crd/mod.rs Co-authored-by: Techassi --- rust/operator-binary/src/crd/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 69d91b44..9ec82475 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -551,15 +551,15 @@ impl v1alpha1::HbaseCluster { pub fn metrics_ports(&self, role: &HbaseRole) -> Vec<(String, u16)> { match role { HbaseRole::Master => vec![( - HBASE_METRICS_PORT_NAME.to_string(), + HBASE_METRICS_PORT_NAME.to_owned(), HBASE_MASTER_METRICS_PORT, )], HbaseRole::RegionServer => vec![( - HBASE_METRICS_PORT_NAME.to_string(), + HBASE_METRICS_PORT_NAME.to_owned(), HBASE_REGIONSERVER_METRICS_PORT, )], HbaseRole::RestServer => { - vec![(HBASE_METRICS_PORT_NAME.to_string(), HBASE_REST_METRICS_PORT)] + vec![(HBASE_METRICS_PORT_NAME.to_owned(), HBASE_REST_METRICS_PORT)] } } } From e992b5be43167f0f9816fd868d82495ce22175cd Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 27 Oct 2025 13:12:23 +0100 Subject: [PATCH 04/10] Update rust/operator-binary/src/hbase_controller.rs Co-authored-by: Techassi --- rust/operator-binary/src/hbase_controller.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 5fa13469..98c9c584 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -801,7 +801,7 @@ pub fn build_rolegroup_metrics_service( .map(|(name, value)| ServicePort { name: Some(name), port: i32::from(value), - protocol: Some("TCP".to_string()), + protocol: Some("TCP".to_owned()), ..ServicePort::default() }) .collect(); From 969e1ae056717ecfbe7ba22e3b1ed3b6a87074e8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 27 Oct 2025 13:12:37 +0100 Subject: [PATCH 05/10] Apply suggestions from code review Co-authored-by: Techassi --- rust/operator-binary/src/hbase_controller.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 98c9c584..50beaa3f 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -828,8 +828,8 @@ pub fn build_rolegroup_metrics_service( .build(), spec: Some(ServiceSpec { // Internal communication does not need to be exposed - type_: Some("ClusterIP".to_string()), - cluster_ip: Some("None".to_string()), + type_: Some("ClusterIP".to_owned()), + cluster_ip: Some("None".to_owned()), ports: Some(ports), selector: Some(service_selector.into()), publish_not_ready_addresses: Some(true), From aabbecfd79c528232ddea003c5df77a979a7f715 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 27 Oct 2025 14:17:52 +0100 Subject: [PATCH 06/10] add metrics service to tls cert --- rust/operator-binary/src/hbase_controller.rs | 1 + rust/operator-binary/src/kerberos.rs | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 50beaa3f..bef8c2f6 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -1092,6 +1092,7 @@ fn build_rolegroup_statefulset( if hbase.has_kerberos_enabled() { add_kerberos_pod_config( hbase, + rolegroup_ref, &mut hbase_container, &mut pod_builder, merged_config diff --git a/rust/operator-binary/src/kerberos.rs b/rust/operator-binary/src/kerberos.rs index e3ae1781..904dc00f 100644 --- a/rust/operator-binary/src/kerberos.rs +++ b/rust/operator-binary/src/kerberos.rs @@ -11,6 +11,7 @@ use stackable_operator::{ }, }, kube::{ResourceExt, runtime::reflector::ObjectRef}, + role_utils::RoleGroupRef, shared::time::Duration, utils::cluster_info::KubernetesClusterInfo, }; @@ -230,6 +231,7 @@ pub fn kerberos_ssl_client_settings(hbase: &v1alpha1::HbaseCluster) -> BTreeMap< pub fn add_kerberos_pod_config( hbase: &v1alpha1::HbaseCluster, + rolegroup_ref: &RoleGroupRef, cb: &mut ContainerBuilder, pb: &mut PodBuilder, requested_secret_lifetime: Duration, @@ -264,6 +266,8 @@ pub fn add_kerberos_pod_config( SecretOperatorVolumeSourceBuilder::new(https_secret_class) .with_pod_scope() .with_node_scope() + // We need to add the metrics service for scraping + .with_service_scope(rolegroup_ref.rolegroup_metrics_service_name()) .with_format(SecretFormat::TlsPkcs12) .with_tls_pkcs12_password(TLS_STORE_PASSWORD) .with_auto_tls_cert_lifetime(requested_secret_lifetime) From 8dfd7d6b0f7bfdbf75b37aec1b7ff91c342c46bf Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 27 Oct 2025 16:00:36 +0100 Subject: [PATCH 07/10] clean up port retrieval methods --- rust/operator-binary/src/crd/mod.rs | 144 +++++++++---------- rust/operator-binary/src/hbase_controller.rs | 117 +++------------ 2 files changed, 90 insertions(+), 171 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 9ec82475..9e81266f 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -65,11 +65,11 @@ pub const SSL_CLIENT_XML: &str = "ssl-client.xml"; pub const HBASE_CLUSTER_DISTRIBUTED: &str = "hbase.cluster.distributed"; pub const HBASE_ROOTDIR: &str = "hbase.rootdir"; -pub const HBASE_UI_PORT_NAME_HTTP: &str = "ui-http"; -pub const HBASE_UI_PORT_NAME_HTTPS: &str = "ui-https"; -pub const HBASE_REST_PORT_NAME_HTTP: &str = "rest-http"; -pub const HBASE_REST_PORT_NAME_HTTPS: &str = "rest-https"; -pub const HBASE_METRICS_PORT_NAME: &str = "metrics"; +const HBASE_UI_PORT_NAME_HTTP: &str = "ui-http"; +const HBASE_UI_PORT_NAME_HTTPS: &str = "ui-https"; +const HBASE_REST_PORT_NAME_HTTP: &str = "rest-http"; +const HBASE_REST_PORT_NAME_HTTPS: &str = "rest-https"; +const HBASE_METRICS_PORT_NAME: &str = "metrics"; pub const HBASE_MASTER_PORT: u16 = 16000; // HBase always uses 16010, regardless of http or https. On 2024-01-17 we decided in Arch-meeting that we want to stick @@ -517,78 +517,6 @@ impl v1alpha1::HbaseCluster { .as_ref() .map(|a| a.tls_secret_class.clone()) } - - /// Returns required port name and port number tuples depending on the role. - /// Hbase versions 2.6.* will have two ports for each role. The metrics are available over the - /// UI port. - pub fn ports(&self, role: &HbaseRole) -> Vec<(String, u16)> { - match role { - HbaseRole::Master => vec![ - ("master".to_string(), HBASE_MASTER_PORT), - (self.ui_port_name(), HBASE_MASTER_UI_PORT), - ], - HbaseRole::RegionServer => vec![ - ("regionserver".to_string(), HBASE_REGIONSERVER_PORT), - (self.ui_port_name(), HBASE_REGIONSERVER_UI_PORT), - ], - HbaseRole::RestServer => vec![ - ( - if self.has_https_enabled() { - HBASE_REST_PORT_NAME_HTTPS - } else { - HBASE_REST_PORT_NAME_HTTP - } - .to_string(), - HBASE_REST_PORT, - ), - (self.ui_port_name(), HBASE_REST_UI_PORT), - ], - } - } - - /// Returns required metrics port name and metrics port number tuples depending on the role. - /// The metrics are available over the UI port. - pub fn metrics_ports(&self, role: &HbaseRole) -> Vec<(String, u16)> { - match role { - HbaseRole::Master => vec![( - HBASE_METRICS_PORT_NAME.to_owned(), - HBASE_MASTER_METRICS_PORT, - )], - HbaseRole::RegionServer => vec![( - HBASE_METRICS_PORT_NAME.to_owned(), - HBASE_REGIONSERVER_METRICS_PORT, - )], - HbaseRole::RestServer => { - vec![(HBASE_METRICS_PORT_NAME.to_owned(), HBASE_REST_METRICS_PORT)] - } - } - } - - pub fn service_port(&self, role: &HbaseRole) -> u16 { - match role { - HbaseRole::Master => HBASE_MASTER_PORT, - HbaseRole::RegionServer => HBASE_REGIONSERVER_PORT, - HbaseRole::RestServer => HBASE_REST_PORT, - } - } - - pub fn metrics_port(&self, role: &HbaseRole) -> u16 { - match role { - HbaseRole::Master => HBASE_MASTER_METRICS_PORT, - HbaseRole::RegionServer => HBASE_REGIONSERVER_METRICS_PORT, - HbaseRole::RestServer => HBASE_REST_METRICS_PORT, - } - } - - /// Name of the port used by the Web UI, which depends on HTTPS usage - pub fn ui_port_name(&self) -> String { - if self.has_https_enabled() { - HBASE_UI_PORT_NAME_HTTPS - } else { - HBASE_UI_PORT_NAME_HTTP - } - .to_string() - } } pub fn merged_env(rolegroup_config: Option<&BTreeMap>) -> Vec { @@ -789,6 +717,68 @@ impl HbaseRole { }; Ok(pvc) } + + /// Returns required port name and port number tuples depending on the role. + /// Hbase versions 2.6.* will have two ports for each role. The metrics are available over the + /// UI port. + pub fn ports(&self, hbase: &v1alpha1::HbaseCluster) -> Vec<(String, u16)> { + vec![ + (self.data_port_name(hbase), self.data_port()), + (Self::ui_port_name(hbase).to_string(), self.ui_port()), + ] + } + + pub fn data_port(&self) -> u16 { + match self { + HbaseRole::Master => HBASE_MASTER_PORT, + HbaseRole::RegionServer => HBASE_REGIONSERVER_PORT, + HbaseRole::RestServer => HBASE_REST_PORT, + } + } + + pub fn data_port_name(&self, hbase: &v1alpha1::HbaseCluster) -> String { + match self { + HbaseRole::Master | HbaseRole::RegionServer => self.to_string(), + HbaseRole::RestServer => { + if hbase.has_https_enabled() { + HBASE_REST_PORT_NAME_HTTPS.to_owned() + } else { + HBASE_REST_PORT_NAME_HTTP.to_owned() + } + } + } + } + + pub fn ui_port(&self) -> u16 { + match self { + HbaseRole::Master => HBASE_MASTER_UI_PORT, + HbaseRole::RegionServer => HBASE_REGIONSERVER_UI_PORT, + HbaseRole::RestServer => HBASE_REST_UI_PORT, + } + } + + /// Name of the port used by the Web UI, which depends on HTTPS usage + pub fn ui_port_name(hbase: &v1alpha1::HbaseCluster) -> &str { + if hbase.has_https_enabled() { + HBASE_UI_PORT_NAME_HTTPS + } else { + HBASE_UI_PORT_NAME_HTTP + } + } + + pub fn metrics_port(&self) -> u16 { + match self { + HbaseRole::Master => HBASE_MASTER_METRICS_PORT, + HbaseRole::RegionServer => HBASE_REGIONSERVER_METRICS_PORT, + HbaseRole::RestServer => HBASE_REST_METRICS_PORT, + } + } + + pub fn metrics_port_name(&self) -> &str { + match self { + _ => HBASE_METRICS_PORT_NAME, + } + } } fn default_resources(role: &HbaseRole) -> ResourcesFragment { diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index bef8c2f6..f43f187b 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -75,10 +75,9 @@ use crate::{ }, crd::{ APP_NAME, AnyServiceConfig, Container, HBASE_ENV_SH, HBASE_MASTER_PORT, - HBASE_MASTER_UI_PORT, HBASE_REGIONSERVER_PORT, HBASE_REGIONSERVER_UI_PORT, - HBASE_REST_PORT_NAME_HTTP, HBASE_REST_PORT_NAME_HTTPS, HBASE_SITE_XML, HbaseClusterStatus, - HbaseRole, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, - SSL_CLIENT_XML, SSL_SERVER_XML, merged_env, v1alpha1, + HBASE_MASTER_UI_PORT, HBASE_REGIONSERVER_PORT, HBASE_REGIONSERVER_UI_PORT, HBASE_SITE_XML, + HbaseClusterStatus, HbaseRole, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, + LISTENER_VOLUME_NAME, SSL_CLIENT_XML, SSL_SERVER_XML, merged_env, v1alpha1, }, discovery::build_discovery_configmap, kerberos::{ @@ -108,9 +107,6 @@ const HBASE_LOG_CONFIG_TMP_DIR: &str = "/stackable/tmp/log_config"; const DOCKER_IMAGE_BASE_NAME: &str = "hbase"; -const HBASE_MASTER_PORT_NAME: &str = "master"; -const HBASE_REGIONSERVER_PORT_NAME: &str = "regionserver"; - pub struct Ctx { pub client: stackable_operator::client::Client, pub product_config: ProductConfigManager, @@ -125,21 +121,6 @@ pub enum Error { #[snafu(display("missing secret lifetime"))] MissingSecretLifetime, - #[snafu(display("object defines no version"))] - ObjectHasNoVersion, - - #[snafu(display("object defines no namespace"))] - ObjectHasNoNamespace, - - #[snafu(display("object defines no master role"))] - NoMasterRole, - - #[snafu(display("the HBase role [{role}] is missing from spec"))] - MissingHbaseRole { role: String }, - - #[snafu(display("object defines no regionserver role"))] - NoRegionServerRole, - #[snafu(display("failed to create cluster resources"))] CreateClusterResources { source: stackable_operator::cluster_resources::Error, @@ -206,12 +187,6 @@ pub enum Error { cm_name: String, }, - #[snafu(display("failed to retrieve the entry {entry} for config map {cm_name}"))] - MissingConfigMapEntry { - entry: &'static str, - cm_name: String, - }, - #[snafu(display("failed to patch service account"))] ApplyServiceAccount { source: stackable_operator::cluster_resources::Error, @@ -228,9 +203,6 @@ pub enum Error { role: String, }, - #[snafu(display("failed to retrieve Hbase role group: {source}"))] - UnidentifiedHbaseRoleGroup { source: crate::crd::Error }, - #[snafu(display("failed to resolve and merge config for role and role group"))] FailedToResolveConfig { source: crate::crd::Error }, @@ -289,9 +261,6 @@ pub enum Error { #[snafu(display("unknown role [{role}]"))] UnknownHbaseRole { source: ParseError, role: String }, - #[snafu(display("authorization is only supported from HBase 2.6 onwards"))] - AuthorizationNotSupported, - #[snafu(display("failed to configure logging"))] ConfigureLogging { source: LoggingError }, @@ -742,8 +711,8 @@ fn build_rolegroup_service( rolegroup: &RoleGroupRef, resolved_product_image: &ResolvedProductImage, ) -> Result { - let ports = hbase - .ports(hbase_role) + let ports = hbase_role + .ports(hbase) .into_iter() .map(|(name, value)| ServicePort { name: Some(name), @@ -795,16 +764,12 @@ pub fn build_rolegroup_metrics_service( rolegroup: &RoleGroupRef, resolved_product_image: &ResolvedProductImage, ) -> Result { - let ports = hbase - .metrics_ports(hbase_role) - .into_iter() - .map(|(name, value)| ServicePort { - name: Some(name), - port: i32::from(value), - protocol: Some("TCP".to_owned()), - ..ServicePort::default() - }) - .collect(); + let ports = vec![ServicePort { + name: Some(hbase_role.metrics_port_name().to_owned()), + port: i32::from(hbase_role.metrics_port()), + protocol: Some("TCP".to_owned()), + ..ServicePort::default() + }]; let service_selector = Labels::role_group_selector(hbase, APP_NAME, &rolegroup.role, &rolegroup.role_group) @@ -849,7 +814,7 @@ fn prometheus_annotations(hbase: &v1alpha1::HbaseCluster, hbase_role: &HbaseRole ("prometheus.io/path".to_owned(), "/prometheus".to_owned()), ( "prometheus.io/port".to_owned(), - hbase.metrics_port(hbase_role).to_string(), + hbase_role.metrics_port().to_string(), ), ( "prometheus.io/scheme".to_owned(), @@ -879,8 +844,8 @@ fn build_rolegroup_statefulset( ) -> Result { let hbase_version = &resolved_product_image.app_version_label_value; - let ports = hbase - .ports(hbase_role) + let ports = hbase_role + .ports(hbase) .into_iter() .map(|(name, value)| ContainerPort { name: Some(name), @@ -890,38 +855,12 @@ fn build_rolegroup_statefulset( }) .collect(); - let probe_template = match hbase_role { - HbaseRole::Master => Probe { - tcp_socket: Some(TCPSocketAction { - port: IntOrString::String(HBASE_MASTER_PORT_NAME.to_string()), - ..TCPSocketAction::default() - }), - ..Probe::default() - }, - HbaseRole::RegionServer => Probe { - tcp_socket: Some(TCPSocketAction { - port: IntOrString::String(HBASE_REGIONSERVER_PORT_NAME.to_string()), - ..TCPSocketAction::default() - }), - ..Probe::default() - }, - HbaseRole::RestServer => Probe { - // We cant use HTTPGetAction, as it returns a 401 in case kerberos is enabled, and there is currently no way - // to tell Kubernetes an 401 is healthy. As an alternative we run curl ourselves and check the http status - // code there. - tcp_socket: Some(TCPSocketAction { - port: IntOrString::String( - if hbase.has_https_enabled() { - HBASE_REST_PORT_NAME_HTTPS - } else { - HBASE_REST_PORT_NAME_HTTP - } - .to_string(), - ), - ..TCPSocketAction::default() - }), - ..Probe::default() - }, + let probe_template = Probe { + tcp_socket: Some(TCPSocketAction { + port: IntOrString::String(hbase_role.data_port_name(hbase)), + ..TCPSocketAction::default() + }), + ..Probe::default() }; let startup_probe = Probe { @@ -967,12 +906,6 @@ fn build_rolegroup_statefulset( let role_name = hbase_role.cli_role_name(); let mut hbase_container = ContainerBuilder::new("hbase").expect("ContainerBuilder not created"); - let rest_http_port_name = if hbase.has_https_enabled() { - HBASE_REST_PORT_NAME_HTTPS - } else { - HBASE_REST_PORT_NAME_HTTP - }; - hbase_container .image_from_product_image(resolved_product_image) .command(command()) @@ -980,13 +913,9 @@ fn build_rolegroup_statefulset( {entrypoint} {role} {port} {port_name} {ui_port_name}", entrypoint = "/stackable/hbase/bin/hbase-entrypoint.sh".to_string(), role = role_name, - port = hbase.service_port(hbase_role).to_string(), - port_name = match hbase_role { - HbaseRole::Master => HBASE_MASTER_PORT_NAME, - HbaseRole::RegionServer => HBASE_REGIONSERVER_PORT_NAME, - HbaseRole::RestServer => rest_http_port_name, - }, - ui_port_name = hbase.ui_port_name(), + port = hbase_role.data_port(), + port_name = hbase_role.data_port_name(hbase), + ui_port_name = HbaseRole::ui_port_name(hbase), }]) .add_env_vars(merged_env) // Needed for the `containerdebug` process to log it's tracing information to. From 04ae9f1cd2e461531b74dfb5cf00f34555874830 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 27 Oct 2025 16:08:51 +0100 Subject: [PATCH 08/10] pre commit --- rust/operator-binary/src/crd/mod.rs | 6 ++---- rust/operator-binary/src/hbase_controller.rs | 2 +- .../kuttl/logging/test_log_aggregation.py | 18 +++++++++--------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 9e81266f..b5d350b6 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -774,10 +774,8 @@ impl HbaseRole { } } - pub fn metrics_port_name(&self) -> &str { - match self { - _ => HBASE_METRICS_PORT_NAME, - } + pub fn metrics_port_name() -> &'static str { + HBASE_METRICS_PORT_NAME } } diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index f43f187b..b360bb3d 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -765,7 +765,7 @@ pub fn build_rolegroup_metrics_service( resolved_product_image: &ResolvedProductImage, ) -> Result { let ports = vec![ServicePort { - name: Some(hbase_role.metrics_port_name().to_owned()), + name: Some(HbaseRole::metrics_port_name().to_owned()), port: i32::from(hbase_role.metrics_port()), protocol: Some("TCP".to_owned()), ..ServicePort::default() diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index d7c7c0fa..48ad19c1 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -23,9 +23,9 @@ def check_sent_events(): }, ) - assert ( - response.status_code == 200 - ), "Cannot access the API of the vector aggregator." + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) result = response.json() @@ -35,13 +35,13 @@ def check_sent_events(): componentId = transform["componentId"] if componentId == "filteredInvalidEvents": - assert ( - sentEvents is None or sentEvents["sentEventsTotal"] == 0 - ), "Invalid log events were sent." + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) else: - assert ( - sentEvents is not None and sentEvents["sentEventsTotal"] > 0 - ), f'No events were sent in "{componentId}".' + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) if __name__ == "__main__": From b562ea9bc6eedc695de3004fe465bb6f5f5303a9 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 29 Oct 2025 11:39:34 +0100 Subject: [PATCH 09/10] Apply suggestions from code review Co-authored-by: Techassi --- docs/modules/hbase/pages/usage-guide/monitoring.adoc | 2 +- rust/operator-binary/src/crd/mod.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/modules/hbase/pages/usage-guide/monitoring.adoc b/docs/modules/hbase/pages/usage-guide/monitoring.adoc index 8391ec70..7e2998c6 100644 --- a/docs/modules/hbase/pages/usage-guide/monitoring.adoc +++ b/docs/modules/hbase/pages/usage-guide/monitoring.adoc @@ -16,7 +16,7 @@ and the clients scraping the metrics endpoint need to authenticate against it. T between Kubernetes Pods with the xref:home:secret-operator:index.adoc[Secret Operator]. When using Prometheus `ServiceMonitor` for scraping, the `address` label needs relabeling to use the `headless` Service instead of the -`metrics` Service. This is because per default Prometheus targets the Pod IPs as endpoints, but since the Pod IPs are not +`metrics` Service. This is because by default Prometheus targets the Pod IPs as endpoints, but since the Pod IPs are not part of the certificate, the authentication will fail. Instead, the FQDN of the Pods, which can be added to the certificate, is used, but this FQDN is only available through the `headless` Service. diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index b5d350b6..35285ae0 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -719,7 +719,8 @@ impl HbaseRole { } /// Returns required port name and port number tuples depending on the role. - /// Hbase versions 2.6.* will have two ports for each role. The metrics are available over the + /// + /// Hbase versions 2.6.* will have two ports for each role. The metrics are available on the /// UI port. pub fn ports(&self, hbase: &v1alpha1::HbaseCluster) -> Vec<(String, u16)> { vec![ From 33855440bc7c981dbba1e70bcff36a85be26eddd Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 30 Oct 2025 10:27:17 +0100 Subject: [PATCH 10/10] use boolean for https instead of hbase cluster --- rust/operator-binary/src/crd/mod.rs | 9 ++++++--- rust/operator-binary/src/hbase_controller.rs | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 35285ae0..789e621c 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -725,7 +725,10 @@ impl HbaseRole { pub fn ports(&self, hbase: &v1alpha1::HbaseCluster) -> Vec<(String, u16)> { vec![ (self.data_port_name(hbase), self.data_port()), - (Self::ui_port_name(hbase).to_string(), self.ui_port()), + ( + Self::ui_port_name(hbase.has_https_enabled()).to_string(), + self.ui_port(), + ), ] } @@ -759,8 +762,8 @@ impl HbaseRole { } /// Name of the port used by the Web UI, which depends on HTTPS usage - pub fn ui_port_name(hbase: &v1alpha1::HbaseCluster) -> &str { - if hbase.has_https_enabled() { + pub fn ui_port_name(has_https_enabled: bool) -> &'static str { + if has_https_enabled { HBASE_UI_PORT_NAME_HTTPS } else { HBASE_UI_PORT_NAME_HTTP diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index b360bb3d..15524afd 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -915,7 +915,7 @@ fn build_rolegroup_statefulset( role = role_name, port = hbase_role.data_port(), port_name = hbase_role.data_port_name(hbase), - ui_port_name = HbaseRole::ui_port_name(hbase), + ui_port_name = HbaseRole::ui_port_name(hbase.has_https_enabled()), }]) .add_env_vars(merged_env) // Needed for the `containerdebug` process to log it's tracing information to.