From 14b7b8a6836ee11516041525b3dcea1a96f22c87 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 14 Aug 2025 09:45:12 +0200 Subject: [PATCH 1/5] chore: Reduce severity of Pod eviciton errors --- Cargo.lock | 1 + Cargo.nix | 4 ++ Cargo.toml | 1 + rust/operator-binary/Cargo.toml | 1 + .../src/restart_controller/pod.rs | 49 ++++++++++++++++--- 5 files changed, 50 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e8652d7..676f1d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2529,6 +2529,7 @@ dependencies = [ "built", "clap", "futures 0.3.31", + "http", "serde", "serde_json", "snafu 0.8.6", diff --git a/Cargo.nix b/Cargo.nix index ec60d5d..32a04d7 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -8249,6 +8249,10 @@ rec { packageId = "futures 0.3.31"; features = [ "compat" ]; } + { + name = "http"; + packageId = "http"; + } { name = "serde"; packageId = "serde"; diff --git a/Cargo.toml b/Cargo.toml index dfbbe4a..0343016 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ anyhow = "1.0" built = { version = "0.8", features = ["chrono", "git2"] } clap = "4.5" futures = { version = "0.3", features = ["compat"] } +http = "1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" snafu = "0.8" diff --git a/rust/operator-binary/Cargo.toml b/rust/operator-binary/Cargo.toml index 78bad2c..f4f0508 100644 --- a/rust/operator-binary/Cargo.toml +++ b/rust/operator-binary/Cargo.toml @@ -13,6 +13,7 @@ stackable-operator.workspace = true anyhow.workspace = true clap.workspace = true +http.workspace = true futures.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/rust/operator-binary/src/restart_controller/pod.rs b/rust/operator-binary/src/restart_controller/pod.rs index 59f5a97..4143e19 100644 --- a/rust/operator-binary/src/restart_controller/pod.rs +++ b/rust/operator-binary/src/restart_controller/pod.rs @@ -1,6 +1,7 @@ use std::{sync::Arc, time::Duration}; use futures::StreamExt; +use http::StatusCode; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ client::Client, @@ -11,10 +12,10 @@ use stackable_operator::{ kube::{ self, api::{EvictParams, PartialObjectMeta}, - core::DynamicObject, + core::{DynamicObject, ErrorResponse}, runtime::{ Controller, - controller::Action, + controller::{self, Action}, events::{Recorder, Reporter}, reflector::ObjectRef, watcher, @@ -96,10 +97,7 @@ pub async fn start(client: &Client, watch_namespace: &WatchNamespace) { // The event_recorder needs to be shared across all invocations, so that // events are correctly aggregated let event_recorder = event_recorder.clone(); - async move { - report_controller_reconciled(&event_recorder, FULL_CONTROLLER_NAME, &result) - .await; - } + async move { report_result(result, event_recorder).await } }, ) .await; @@ -192,6 +190,45 @@ async fn reconcile(pod: Arc>, ctx: Arc) -> Result>, Action), + controller::Error, + >, + event_recorder: Arc, +) { + if let Err(controller::Error::ReconcilerFailed( + Error::EvictPod { + source: evict_pod_error, + }, + _, + )) = &result + { + const TOO_MANY_REQUESTS_HTTP_CODE: u16 = StatusCode::TOO_MANY_REQUESTS.as_u16(); + if let kube::Error::Api(ErrorResponse { + code: TOO_MANY_REQUESTS_HTTP_CODE, + .. + }) = evict_pod_error + { + tracing::info!( + ?evict_pod_error, + "Tried to evict Pod, but wasn't allowed to do so, as it would violate the Pod's disruption budget. Retrying later" + ); + } + } + + report_controller_reconciled(&event_recorder, FULL_CONTROLLER_NAME, &result).await; +} + fn error_policy(_obj: Arc>, _error: &Error, _ctx: Arc) -> Action { Action::requeue(Duration::from_secs(5)) } From 41bad1ea7c42502d2a6120605f85e5f4c03f72c8 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 14 Aug 2025 09:52:27 +0200 Subject: [PATCH 2/5] clippy --- rust/operator-binary/src/restart_controller/pod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/operator-binary/src/restart_controller/pod.rs b/rust/operator-binary/src/restart_controller/pod.rs index 4143e19..b24a67b 100644 --- a/rust/operator-binary/src/restart_controller/pod.rs +++ b/rust/operator-binary/src/restart_controller/pod.rs @@ -199,6 +199,7 @@ async fn reconcile(pod: Arc>, ctx: Arc) -> Result>, Action), From 3af4448009d299607dd6340973e43f6ba6ea520a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 14 Aug 2025 09:57:27 +0200 Subject: [PATCH 3/5] changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9d1e4d..6bd966e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Changed + +- Reduce severity of Pod eviction errors. Previously, the operator would produce lot's of + `Cannot evict pod as it would violate the pod's disruption budget` errors. With this fix, the + error is reduced to an info instead ([#372]). + +[#372]: https://github.com/stackabletech/commons-operator/pull/372 + ## [25.7.0] - 2025-07-23 ## [25.7.0-rc1] - 2025-07-18 From 2bfcca5f35de52aa5e5c85ac67424578448dc8e1 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 14 Aug 2025 10:01:16 +0200 Subject: [PATCH 4/5] Update slab to fix RUSTSEC-2025-0047 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 676f1d1..678af9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2453,9 +2453,9 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" From 9d2667a0b8cad9f1abc39128c353f2debf0a17ec Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 14 Aug 2025 10:14:02 +0200 Subject: [PATCH 5/5] Regenerate nix lockfile --- Cargo.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.nix b/Cargo.nix index 32a04d7..f1c58bd 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -8020,9 +8020,9 @@ rec { }; "slab" = rec { crateName = "slab"; - version = "0.4.10"; + version = "0.4.11"; edition = "2018"; - sha256 = "03f5a9gdp33mngya4qwq2555138pj74pl015scv57wsic5rikp04"; + sha256 = "12bm4s88rblq02jjbi1dw31984w61y2ldn13ifk5gsqgy97f8aks"; authors = [ "Carl Lerche " ];