From eeacac91f2c7d66e8afc77daaf1d5f806445f757 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Fri, 20 Mar 2026 15:44:36 +0200 Subject: [PATCH 1/3] Add support for maxUnavailable in the Spec for Kubernetes 1.35 and up. --- CHANGELOG.md | 1 + .../v1beta1/cassandradatacenter_types.go | 5 ++ .../v1beta1/zz_generated.deepcopy.go | 6 ++ ...dra.datastax.com_cassandradatacenters.yaml | 9 ++ pkg/reconciliation/construct_statefulset.go | 42 +++++---- .../construct_statefulset_test.go | 90 +++++++++++++++++++ 6 files changed, 138 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d027bd4c..537784bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [FEATURE] [#893](https://github.com/k8ssandra/cass-operator/issues/893) Add support for maxUnavailable (Kubernetes 1.35 and up). This allows to make changes to the Cassandra pods in parallel, thus speeding up changes in larger clusters. Allows integer or percentage setting, but will never target more than one rack at a time. * [ENHANCEMENT] [#888](https://github.com/k8ssandra/cass-operator/issues/888) Add new metrics around all calls to the mgmt-api. This allows to track if some calls are taking longer to execute than expected. ## v1.29.1 diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index a000a785..39f9aeb3 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -14,6 +14,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/validation" ) @@ -281,6 +282,10 @@ type CassandraDatacenterSpec struct { // Setting to 0 might cause multiple Cassandra pods to restart at the same time despite PodDisruptionBudget settings. MinReadySeconds *int32 `json:"minReadySeconds,omitempty"` + // MaxUnavailable sets the maximum number of pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. + // +kubebuilder:validation:XIntOrString + MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + // ReadOnlyRootFilesystem makes the cassandra container to be run with a read-only root filesystem. This is enabled by default when using OSS Cassandra 4.1.0 and or newer, DSE 6.8 and newer (from datastax/dse-mgmtapi-6_8 repository) or HCD. // If serverImage override is used, this setting defaults to false. ReadOnlyRootFilesystem *bool `json:"readOnlyRootFilesystem,omitempty"` diff --git a/apis/cassandra/v1beta1/zz_generated.deepcopy.go b/apis/cassandra/v1beta1/zz_generated.deepcopy.go index a1b4a073..4af796e9 100644 --- a/apis/cassandra/v1beta1/zz_generated.deepcopy.go +++ b/apis/cassandra/v1beta1/zz_generated.deepcopy.go @@ -24,6 +24,7 @@ import ( "encoding/json" "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -362,6 +363,11 @@ func (in *CassandraDatacenterSpec) DeepCopyInto(out *CassandraDatacenterSpec) { *out = new(int32) **out = **in } + if in.MaxUnavailable != nil { + in, out := &in.MaxUnavailable, &out.MaxUnavailable + *out = new(intstr.IntOrString) + **out = **in + } if in.ReadOnlyRootFilesystem != nil { in, out := &in.ReadOnlyRootFilesystem, &out.ReadOnlyRootFilesystem *out = new(bool) diff --git a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml index 26a2dda9..83672e4e 100644 --- a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml +++ b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml @@ -350,6 +350,15 @@ spec: - serverSecretName type: object type: object + maxUnavailable: + anyOf: + - type: integer + - type: string + description: MaxUnavailable sets the maximum number of pods that can + be modified simultaneously during an update. This can at most target + a single rack, so values higher than rack size will have no effect. + Requires Kubernetes 1.35 or higher. + x-kubernetes-int-or-string: true minReadySeconds: description: |- MinReadySeconds sets the minimum number of seconds for which a newly created pod should be ready without any of its containers crashing, for it to be considered available. Defaults to 5 seconds and is set in the StatefulSet spec. diff --git a/pkg/reconciliation/construct_statefulset.go b/pkg/reconciliation/construct_statefulset.go index f10d4733..ff67cc44 100644 --- a/pkg/reconciliation/construct_statefulset.go +++ b/pkg/reconciliation/construct_statefulset.go @@ -158,21 +158,8 @@ func newStatefulSetForCassandraDatacenter( result.Spec.ServiceName = sts.Spec.ServiceName } - if dc.Spec.CanaryUpgrade { - var partition int32 - if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > replicaCountInt32 { - partition = replicaCountInt32 - } else { - partition = replicaCountInt32 - dc.Spec.CanaryUpgradeCount - } - - strategy := appsv1.StatefulSetUpdateStrategy{ - Type: appsv1.RollingUpdateStatefulSetStrategyType, - RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{ - Partition: &partition, - }, - } - result.Spec.UpdateStrategy = strategy + if strategy := buildStatefulSetUpdateStrategy(dc, replicaCountInt32); strategy != nil { + result.Spec.UpdateStrategy = *strategy } if dc.Spec.MinReadySeconds != nil { @@ -185,6 +172,31 @@ func newStatefulSetForCassandraDatacenter( return result, nil } +func buildStatefulSetUpdateStrategy(dc *api.CassandraDatacenter, replicaCount int32) *appsv1.StatefulSetUpdateStrategy { + if !dc.Spec.CanaryUpgrade && dc.Spec.MaxUnavailable == nil { + return nil + } + + rollingUpdate := &appsv1.RollingUpdateStatefulSetStrategy{ + MaxUnavailable: dc.Spec.MaxUnavailable, + } + + if dc.Spec.CanaryUpgrade { + var partition int32 + if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > replicaCount { + partition = replicaCount + } else { + partition = replicaCount - dc.Spec.CanaryUpgradeCount + } + rollingUpdate.Partition = &partition + } + + return &appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: rollingUpdate, + } +} + func legacyInternodeMount(dc *api.CassandraDatacenter, sts *appsv1.StatefulSet) bool { if serverconfig.LegacyInternodeEnabled(dc) { return true diff --git a/pkg/reconciliation/construct_statefulset_test.go b/pkg/reconciliation/construct_statefulset_test.go index 28061926..50f4ec6b 100644 --- a/pkg/reconciliation/construct_statefulset_test.go +++ b/pkg/reconciliation/construct_statefulset_test.go @@ -14,6 +14,7 @@ import ( "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" @@ -770,6 +771,95 @@ func TestMinReadySecondsChange(t *testing.T) { assert.Equal(int32(10), sts.Spec.MinReadySeconds) } +func TestMaxUnavailableChange(t *testing.T) { + tests := []struct { + name string + maxUnavailable intstr.IntOrString + expectedRolling *appsv1.RollingUpdateStatefulSetStrategy + }{ + { + name: "integer", + maxUnavailable: intstr.FromInt32(1), + expectedRolling: &appsv1.RollingUpdateStatefulSetStrategy{ + MaxUnavailable: ptr.To(intstr.FromInt32(1)), + }, + }, + { + name: "percentage", + maxUnavailable: intstr.Parse("25%"), + expectedRolling: &appsv1.RollingUpdateStatefulSetStrategy{ + MaxUnavailable: ptr.To(intstr.Parse("25%")), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dc := &api.CassandraDatacenter{ + Spec: api.CassandraDatacenterSpec{ + ClusterName: "test", + ServerType: "cassandra", + ServerVersion: "4.0.7", + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{}, + }, + Racks: []api.Rack{ + { + Name: "r1", + }, + }, + PodTemplateSpec: &corev1.PodTemplateSpec{}, + MaxUnavailable: ptr.To(tt.maxUnavailable), + }, + } + + sts, err := newStatefulSetForCassandraDatacenter(nil, dc.Spec.Racks[0].Name, dc, 3, imageRegistry) + require.NoError(t, err, "failed to build statefulset") + + expectedStrategy := appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: tt.expectedRolling, + } + assert.Equal(t, expectedStrategy, sts.Spec.UpdateStrategy) + }) + } +} + +func TestMaxUnavailableMergedWithCanaryUpgrade(t *testing.T) { + dc := &api.CassandraDatacenter{ + Spec: api.CassandraDatacenterSpec{ + ClusterName: "test", + ServerType: "cassandra", + ServerVersion: "4.0.7", + CanaryUpgrade: true, + CanaryUpgradeCount: 1, + MaxUnavailable: ptr.To(intstr.Parse("25%")), + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{}, + }, + Racks: []api.Rack{ + { + Name: "r1", + }, + }, + PodTemplateSpec: &corev1.PodTemplateSpec{}, + }, + } + + sts, err := newStatefulSetForCassandraDatacenter(nil, dc.Spec.Racks[0].Name, dc, 3, imageRegistry) + require.NoError(t, err, "failed to build statefulset") + + expectedStrategy := appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{ + Partition: ptr.To(int32(2)), + MaxUnavailable: ptr.To(intstr.Parse("25%")), + }, + } + + assert.Equal(t, expectedStrategy, sts.Spec.UpdateStrategy) +} + func TestAddManagementApiServerSecurity(t *testing.T) { require := require.New(t) dc := &api.CassandraDatacenter{ From 5090ecbeb3e39196ed7bf60d96fb5f945768c289 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Mon, 23 Mar 2026 14:16:18 +0200 Subject: [PATCH 2/3] Make PodDisruptionBudget follow the MaxUnavailable changes --- .../v1beta1/cassandradatacenter_types.go | 3 +- ...dra.datastax.com_cassandradatacenters.yaml | 7 ++- pkg/reconciliation/constructor.go | 19 ++++++++ pkg/reconciliation/constructor_test.go | 46 +++++++++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index 39f9aeb3..83aeaa1d 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -282,7 +282,8 @@ type CassandraDatacenterSpec struct { // Setting to 0 might cause multiple Cassandra pods to restart at the same time despite PodDisruptionBudget settings. MinReadySeconds *int32 `json:"minReadySeconds,omitempty"` - // MaxUnavailable sets the maximum number of pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. + // MaxUnavailable sets the maximum number of rack pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. Setting percentage will + // calculate against single rack's percentage of pods, not the entire datacenter. // +kubebuilder:validation:XIntOrString MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` diff --git a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml index 83672e4e..e7754077 100644 --- a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml +++ b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml @@ -354,10 +354,9 @@ spec: anyOf: - type: integer - type: string - description: MaxUnavailable sets the maximum number of pods that can - be modified simultaneously during an update. This can at most target - a single rack, so values higher than rack size will have no effect. - Requires Kubernetes 1.35 or higher. + description: |- + MaxUnavailable sets the maximum number of rack pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. Setting percentage will + calculate against single rack's percentage of pods, not the entire datacenter. x-kubernetes-int-or-string: true minReadySeconds: description: |- diff --git a/pkg/reconciliation/constructor.go b/pkg/reconciliation/constructor.go index 10ffe6be..208bb6c2 100644 --- a/pkg/reconciliation/constructor.go +++ b/pkg/reconciliation/constructor.go @@ -23,6 +23,25 @@ import ( // newPodDisruptionBudgetForDatacenter creates a PodDisruptionBudget object for the Datacenter func newPodDisruptionBudgetForDatacenter(dc *api.CassandraDatacenter) *policyv1.PodDisruptionBudget { minAvailable := intstr.FromInt(int(dc.Spec.Size - 1)) + + if dc.Spec.MaxUnavailable != nil { + racks := dc.GetRacks() + rackNodeCounts := api.SplitRacks(int(dc.Spec.Size), len(racks)) + maxRackNodeCount := 0 + for _, rackNodeCount := range rackNodeCounts { + if rackNodeCount > maxRackNodeCount { + maxRackNodeCount = rackNodeCount + } + } + + if maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(dc.Spec.MaxUnavailable, maxRackNodeCount, true); err == nil { + // Even if someone decides to set "200%" or something similarly silly here, it doesn't matter as we only process a single rack when it comes to MaxUnavailable changes + calculatedMinAvailable := int(dc.Spec.Size) - maxUnavailable + minAvailable = intstr.FromInt(calculatedMinAvailable) + } + // If err was not nil, we'll stick to the original minAvailable of size-1 + } + labels := dc.GetDatacenterLabels() oplabels.AddOperatorLabels(labels, dc) selectorLabels := dc.GetDatacenterLabels() diff --git a/pkg/reconciliation/constructor_test.go b/pkg/reconciliation/constructor_test.go index eee018f8..b2166ba1 100644 --- a/pkg/reconciliation/constructor_test.go +++ b/pkg/reconciliation/constructor_test.go @@ -6,6 +6,8 @@ import ( api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" ) func TestPodDisruptionBudget(t *testing.T) { @@ -29,3 +31,47 @@ func TestPodDisruptionBudget(t *testing.T) { assert.Equal("dc1", pdb.Spec.Selector.MatchLabels["cassandra.datastax.com/datacenter"]) assert.Equal(pdb.Spec.MinAvailable.IntVal, dc.Spec.Size-1) } + +func TestPodDisruptionBudgetIntMaxUnavailable(t *testing.T) { + assert := assert.New(t) + + dc := &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dc1", + Namespace: "test", + }, + Spec: api.CassandraDatacenterSpec{ + Size: 6, + MaxUnavailable: ptr.To(intstr.FromInt(2)), + }, + } + + pdb := newPodDisruptionBudgetForDatacenter(dc) + assert.Equal(int32(4), pdb.Spec.MinAvailable.IntVal) +} + +func TestPodDisruptionBudgetPercentageMaxUnavailable(t *testing.T) { + assert := assert.New(t) + + dc := &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dc1", + Namespace: "test", + }, + Spec: api.CassandraDatacenterSpec{ + Size: 6, + Racks: []api.Rack{ + {Name: "rack1"}, + {Name: "rack2"}, + }, + MaxUnavailable: ptr.To(intstr.Parse("50%")), + }, + } + + pdb := newPodDisruptionBudgetForDatacenter(dc) + assert.Equal(int32(4), pdb.Spec.MinAvailable.IntVal) // This was roundup + + dc.Spec.MaxUnavailable = ptr.To(intstr.Parse("100%")) + pdb = newPodDisruptionBudgetForDatacenter(dc) + assert.Equal(int32(3), pdb.Spec.MinAvailable.IntVal) +} From cb2b4be9e8893ca47ce466cdc6e78582356f36e4 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Tue, 24 Mar 2026 10:41:40 +0200 Subject: [PATCH 3/3] Set the maxUnavailable to 100% of the largest rack --- pkg/reconciliation/constructor.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/reconciliation/constructor.go b/pkg/reconciliation/constructor.go index 208bb6c2..b1dc6be3 100644 --- a/pkg/reconciliation/constructor.go +++ b/pkg/reconciliation/constructor.go @@ -35,7 +35,9 @@ func newPodDisruptionBudgetForDatacenter(dc *api.CassandraDatacenter) *policyv1. } if maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(dc.Spec.MaxUnavailable, maxRackNodeCount, true); err == nil { - // Even if someone decides to set "200%" or something similarly silly here, it doesn't matter as we only process a single rack when it comes to MaxUnavailable changes + if maxUnavailable > maxRackNodeCount { + maxUnavailable = maxRackNodeCount + } calculatedMinAvailable := int(dc.Spec.Size) - maxUnavailable minAvailable = intstr.FromInt(calculatedMinAvailable) }