diff --git a/CHANGELOG.md b/CHANGELOG.md index f9d1e4d..6bd966e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Changed + +- Reduce severity of Pod eviction errors. Previously, the operator would produce lot's of + `Cannot evict pod as it would violate the pod's disruption budget` errors. With this fix, the + error is reduced to an info instead ([#372]). + +[#372]: https://github.com/stackabletech/commons-operator/pull/372 + ## [25.7.0] - 2025-07-23 ## [25.7.0-rc1] - 2025-07-18 diff --git a/Cargo.lock b/Cargo.lock index e8652d7..678af9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2453,9 +2453,9 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" @@ -2529,6 +2529,7 @@ dependencies = [ "built", "clap", "futures 0.3.31", + "http", "serde", "serde_json", "snafu 0.8.6", diff --git a/Cargo.nix b/Cargo.nix index ec60d5d..f1c58bd 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -8020,9 +8020,9 @@ rec { }; "slab" = rec { crateName = "slab"; - version = "0.4.10"; + version = "0.4.11"; edition = "2018"; - sha256 = "03f5a9gdp33mngya4qwq2555138pj74pl015scv57wsic5rikp04"; + sha256 = "12bm4s88rblq02jjbi1dw31984w61y2ldn13ifk5gsqgy97f8aks"; authors = [ "Carl Lerche " ]; @@ -8249,6 +8249,10 @@ rec { packageId = "futures 0.3.31"; features = [ "compat" ]; } + { + name = "http"; + packageId = "http"; + } { name = "serde"; packageId = "serde"; diff --git a/Cargo.toml b/Cargo.toml index dfbbe4a..0343016 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ anyhow = "1.0" built = { version = "0.8", features = ["chrono", "git2"] } clap = "4.5" futures = { version = "0.3", features = ["compat"] } +http = "1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" snafu = "0.8" diff --git a/rust/operator-binary/Cargo.toml b/rust/operator-binary/Cargo.toml index 78bad2c..f4f0508 100644 --- a/rust/operator-binary/Cargo.toml +++ b/rust/operator-binary/Cargo.toml @@ -13,6 +13,7 @@ stackable-operator.workspace = true anyhow.workspace = true clap.workspace = true +http.workspace = true futures.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/rust/operator-binary/src/restart_controller/pod.rs b/rust/operator-binary/src/restart_controller/pod.rs index 59f5a97..b24a67b 100644 --- a/rust/operator-binary/src/restart_controller/pod.rs +++ b/rust/operator-binary/src/restart_controller/pod.rs @@ -1,6 +1,7 @@ use std::{sync::Arc, time::Duration}; use futures::StreamExt; +use http::StatusCode; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ client::Client, @@ -11,10 +12,10 @@ use stackable_operator::{ kube::{ self, api::{EvictParams, PartialObjectMeta}, - core::DynamicObject, + core::{DynamicObject, ErrorResponse}, runtime::{ Controller, - controller::Action, + controller::{self, Action}, events::{Recorder, Reporter}, reflector::ObjectRef, watcher, @@ -96,10 +97,7 @@ pub async fn start(client: &Client, watch_namespace: &WatchNamespace) { // The event_recorder needs to be shared across all invocations, so that // events are correctly aggregated let event_recorder = event_recorder.clone(); - async move { - report_controller_reconciled(&event_recorder, FULL_CONTROLLER_NAME, &result) - .await; - } + async move { report_result(result, event_recorder).await } }, ) .await; @@ -192,6 +190,46 @@ async fn reconcile(pod: Arc>, ctx: Arc) -> Result>, Action), + controller::Error, + >, + event_recorder: Arc, +) { + if let Err(controller::Error::ReconcilerFailed( + Error::EvictPod { + source: evict_pod_error, + }, + _, + )) = &result + { + const TOO_MANY_REQUESTS_HTTP_CODE: u16 = StatusCode::TOO_MANY_REQUESTS.as_u16(); + if let kube::Error::Api(ErrorResponse { + code: TOO_MANY_REQUESTS_HTTP_CODE, + .. + }) = evict_pod_error + { + tracing::info!( + ?evict_pod_error, + "Tried to evict Pod, but wasn't allowed to do so, as it would violate the Pod's disruption budget. Retrying later" + ); + } + } + + report_controller_reconciled(&event_recorder, FULL_CONTROLLER_NAME, &result).await; +} + fn error_policy(_obj: Arc>, _error: &Error, _ctx: Arc) -> Action { Action::requeue(Duration::from_secs(5)) }