From 0cdb012d3624675805348a95c1e4598d454bb57b Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Tue, 31 Mar 2026 17:27:39 +0300 Subject: [PATCH 1/3] Add PVC shrink and invalid maxUnavailable string validations to webhook --- CHANGELOG.md | 2 + .../v1beta1/cassandradatacenter_webhook.go | 18 +++ .../cassandradatacenter_webhook_test.go | 109 ++++++++++++++++++ 3 files changed, 129 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f17038a7..a76fc377 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [ENHANCEMENT] [#912](https://github.com/k8ssandra/cass-operator/issues/912) Add new webhook validations for maxUnavailable string format as well as PVC sizes + ## v1.30.0 * [CHANGE] [#905](https://github.com/k8ssandra/cass-operator/issues/905) Relax the ServerVersion checks to structure only without separating OSS/DSE/HCD. diff --git a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go index 501a592a..5305086e 100644 --- a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go +++ b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go @@ -12,6 +12,7 @@ import ( apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" @@ -19,6 +20,7 @@ import ( "github.com/google/go-cmp/cmp" api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + corev1 "k8s.io/api/core/v1" ) // log is for logging in this package. @@ -134,6 +136,13 @@ func ValidateSingleDatacenter(dc *api.CassandraDatacenter) error { return attemptedTo("define config dse-yaml with %s", serverStr) } + if dc.Spec.MaxUnavailable != nil { + maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(dc.Spec.MaxUnavailable, int(dc.Spec.Size), true) + if err != nil || maxUnavailable < 0 { + return attemptedTo("use invalid maxUnavailable value '%s'", dc.Spec.MaxUnavailable.String()) + } + } + // if using multiple nodes per worker, requests and limits should be set for both cpu and memory if dc.Spec.AllowMultipleNodesPerWorker { if dc.Spec.Resources.Requests.Cpu().IsZero() || @@ -180,6 +189,15 @@ func ValidateDatacenterFieldChanges(oldDc *api.CassandraDatacenter, newDc *api.C oldClaimSpec := oldDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec.DeepCopy() newClaimSpec := newDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec.DeepCopy() + if oldClaimSpec != nil && newClaimSpec != nil { + oldStorageRequest := oldClaimSpec.Resources.Requests[corev1.ResourceStorage] + newStorageRequest := newClaimSpec.Resources.Requests[corev1.ResourceStorage] + + if oldStorageRequest.Cmp(newStorageRequest) > 0 { + return attemptedTo( + "shrink storageConfig.CassandraDataVolumeClaimSpec from %s to %s", oldStorageRequest.String(), newStorageRequest.String()) + } + } // CassandraDataVolumeClaimSpec changes are disallowed if metav1.HasAnnotation(newDc.ObjectMeta, api.AllowStorageChangesAnnotation) && newDc.Annotations[api.AllowStorageChangesAnnotation] == "true" { diff --git a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go index 37aeae86..99c79498 100644 --- a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go +++ b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go @@ -11,6 +11,7 @@ import ( api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" corev1 "k8s.io/api/core/v1" @@ -127,6 +128,36 @@ func Test_ValidateSingleDatacenter(t *testing.T) { }, errString: "", }, + { + name: "Valid maxUnavailable percentage", + dc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + ServerType: "cassandra", + ServerVersion: "5.0.0", + Size: 3, + MaxUnavailable: ptr.To(intstr.Parse("50%")), + }, + }, + errString: "", + }, + { + name: "Invalid maxUnavailable string", + dc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + ServerType: "cassandra", + ServerVersion: "5.0.0", + Size: 3, + MaxUnavailable: ptr.To(intstr.FromString("invalid")), + }, + }, + errString: "attempted to use invalid maxUnavailable value 'invalid'", + }, { name: "Dse Workloads in Cassandra Invalid", dc: &api.CassandraDatacenter{ @@ -676,6 +707,84 @@ func Test_ValidateDatacenterFieldChanges(t *testing.T) { }, errString: "", }, + { + name: "storage requests size shrink is rejected even when storage changes are allowed", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("2Gi")}, + }, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + api.AllowStorageChangesAnnotation: "true", + }, + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("1Gi")}, + }, + }, + }, + }, + }, + errString: "shrink storageConfig.CassandraDataVolumeClaimSpec from 2Gi to 1Gi", + }, + { + name: "storage requests have different unit but same size", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("1024Mi")}, + }, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + api.AllowStorageChangesAnnotation: "true", + }, + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("1Gi")}, + }, + }, + }, + }, + }, + errString: "", + }, { name: "Removing a rack", oldDc: &api.CassandraDatacenter{ From 3ef7f3ff5a219fe924f5d33a26c7657cb8fc081c Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Wed, 15 Apr 2026 18:09:52 +0300 Subject: [PATCH 2/3] Add validation to prevent scaling up / scaling down if previous scaling operation has not completed yet --- CHANGELOG.md | 1 + .../v1beta1/cassandradatacenter_types.go | 3 + .../v1beta1/cassandradatacenter_webhook.go | 18 ++ .../cassandradatacenter_webhook_test.go | 166 ++++++++++++++++++ 4 files changed, 188 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a76fc377..98d308f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased * [ENHANCEMENT] [#912](https://github.com/k8ssandra/cass-operator/issues/912) Add new webhook validations for maxUnavailable string format as well as PVC sizes +* [ENHANCEMENT] [#902](https://github.com/k8ssandra/cass-operator/issues/902) If scaling down or scaling up process is still ongoing, the webhook will prevent changing the cluster size. ## v1.30.0 diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index df0e23f4..62f23758 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -88,6 +88,9 @@ const ( // EnableParallelCleanupWithinRackAnnotation speeds up post-scale-out cleanup by processing nodes in parallel within a rack. EnableParallelCleanupWithinRackAnnotation = "cassandra.datastax.com/enable-parallel-cleanup-within-rack" + // BypassWebhookValidationsAnnotation allows bypassing CassandraDatacenter update webhook validations + BypassWebhookValidationsAnnotation = "cassandra.datastax.com/bypass-webhook-validations" + AllowUpdateAlways AllowUpdateType = "always" AllowUpdateOnce AllowUpdateType = "once" diff --git a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go index 5305086e..99766b2a 100644 --- a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go +++ b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go @@ -87,6 +87,13 @@ func (v *CassandraDatacenterCustomValidator) ValidateUpdate(ctx context.Context, log.Info("Validation for CassandraDatacenter upon update", "name", dc.GetName()) + if metav1.HasAnnotation(dc.ObjectMeta, api.BypassWebhookValidationsAnnotation) && + dc.Annotations[api.BypassWebhookValidationsAnnotation] == "true" { + log.Info("Webhook validations bypassed with annotation") + + return nil, nil + } + if err := ValidateSingleDatacenter(dc); err != nil { return nil, err } @@ -210,6 +217,17 @@ func ValidateDatacenterFieldChanges(oldDc *api.CassandraDatacenter, newDc *api.C return attemptedTo("change storageConfig.CassandraDataVolumeClaimSpec, diff: %s", pvcSourceDiff) } + if oldDc.Spec.Size != newDc.Spec.Size { + if oldDc.GetConditionStatus(api.DatacenterScalingUp) == corev1.ConditionTrue { + return attemptedTo("change size while datacenter is still scaling up") + } + + if oldDc.GetConditionStatus(api.DatacenterScalingDown) == corev1.ConditionTrue { + // Here we don't want to allow any changes as scaling down is a lot heavier operation + return attemptedTo("change size while datacenter is still scaling down") + } + } + // Topology changes - Racks // - Rack Name and Zone changes are disallowed. // - Removing racks is not supported. diff --git a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go index 99c79498..b805a835 100644 --- a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go +++ b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go @@ -4,6 +4,7 @@ package v1beta1 import ( + "context" "encoding/json" "strings" "testing" @@ -841,6 +842,74 @@ func Test_ValidateDatacenterFieldChanges(t *testing.T) { }, errString: "", }, + { + name: "Scaling is rejected while datacenter is still scaling up", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 3, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingUp, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + }, + errString: "change size while datacenter is still scaling up", + }, + { + name: "Scaling is rejected while datacenter is still scaling down", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingDown, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 3, + }, + }, + errString: "change size while datacenter is still scaling down", + }, { name: "Changed a rack name", oldDc: &api.CassandraDatacenter{ @@ -1029,6 +1098,103 @@ func Test_ValidateDatacenterFieldChanges(t *testing.T) { } } +func TestValidateUpdateBypassAnnotation(t *testing.T) { + validator := &CassandraDatacenterCustomValidator{} + + tests := []struct { + name string + oldDc *api.CassandraDatacenter + newDc *api.CassandraDatacenter + errString string + }{ + { + name: "Scaling up even more is rejected while datacenter is still scaling up without bypass annotation", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingUp, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 9, + }, + }, + errString: "change size while datacenter is still scaling up", + }, + { + name: "Scaling up even more is allowed if bypass is set while datacenter is still scaling up from previous operation", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingUp, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + api.BypassWebhookValidationsAnnotation: "true", + }, + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 9, + }, + }, + errString: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + warnings, err := validator.ValidateUpdate(context.Background(), tt.oldDc, tt.newDc) + assert.Empty(t, warnings) + + if tt.errString == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.errString) + } + }) + } +} + var fqlEnabledConfig string = `{"cassandra-yaml": { "full_query_logging_options": { "log_dir": "/var/log/cassandra/fql" From cfe4bbb14ec80e31651fe0dccbc47bb048a22ebd Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Fri, 17 Apr 2026 10:27:34 +0300 Subject: [PATCH 3/3] Add proper checks to the test scaling that it has finished before trying to scale again --- tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go b/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go index 6e689410..93630478 100644 --- a/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go +++ b/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go @@ -76,6 +76,8 @@ var _ = Describe(testName, func() { FormatOutput(json) ns.WaitForOutputAndLog(step, k, "true true true true", 1200) + ns.WaitForDatacenterReady(dcName) + step = "scale up to 5 nodes" json = "{\"spec\": {\"size\": 5}}" k = kubectl.PatchMerge(dcResource, json) @@ -89,6 +91,8 @@ var _ = Describe(testName, func() { FormatOutput(json) ns.WaitForOutputAndLog(step, k, "true true true true true", 1200) + ns.WaitForDatacenterReady(dcName) + step = "stopping the dc" json = "{\"spec\": {\"stopped\": true}}" k = kubectl.PatchMerge(dcResource, json)