diff --git a/CHANGELOG.md b/CHANGELOG.md index d027bd4c..537784bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [FEATURE] [#893](https://github.com/k8ssandra/cass-operator/issues/893) Add support for maxUnavailable (Kubernetes 1.35 and up). This allows to make changes to the Cassandra pods in parallel, thus speeding up changes in larger clusters. Allows integer or percentage setting, but will never target more than one rack at a time. * [ENHANCEMENT] [#888](https://github.com/k8ssandra/cass-operator/issues/888) Add new metrics around all calls to the mgmt-api. This allows to track if some calls are taking longer to execute than expected. ## v1.29.1 diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index a000a785..83aeaa1d 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -14,6 +14,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/validation" ) @@ -281,6 +282,11 @@ type CassandraDatacenterSpec struct { // Setting to 0 might cause multiple Cassandra pods to restart at the same time despite PodDisruptionBudget settings. MinReadySeconds *int32 `json:"minReadySeconds,omitempty"` + // MaxUnavailable sets the maximum number of rack pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. Setting percentage will + // calculate against single rack's percentage of pods, not the entire datacenter. + // +kubebuilder:validation:XIntOrString + MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + // ReadOnlyRootFilesystem makes the cassandra container to be run with a read-only root filesystem. This is enabled by default when using OSS Cassandra 4.1.0 and or newer, DSE 6.8 and newer (from datastax/dse-mgmtapi-6_8 repository) or HCD. // If serverImage override is used, this setting defaults to false. ReadOnlyRootFilesystem *bool `json:"readOnlyRootFilesystem,omitempty"` diff --git a/apis/cassandra/v1beta1/zz_generated.deepcopy.go b/apis/cassandra/v1beta1/zz_generated.deepcopy.go index a1b4a073..4af796e9 100644 --- a/apis/cassandra/v1beta1/zz_generated.deepcopy.go +++ b/apis/cassandra/v1beta1/zz_generated.deepcopy.go @@ -24,6 +24,7 @@ import ( "encoding/json" "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -362,6 +363,11 @@ func (in *CassandraDatacenterSpec) DeepCopyInto(out *CassandraDatacenterSpec) { *out = new(int32) **out = **in } + if in.MaxUnavailable != nil { + in, out := &in.MaxUnavailable, &out.MaxUnavailable + *out = new(intstr.IntOrString) + **out = **in + } if in.ReadOnlyRootFilesystem != nil { in, out := &in.ReadOnlyRootFilesystem, &out.ReadOnlyRootFilesystem *out = new(bool) diff --git a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml index 26a2dda9..e7754077 100644 --- a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml +++ b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml @@ -350,6 +350,14 @@ spec: - serverSecretName type: object type: object + maxUnavailable: + anyOf: + - type: integer + - type: string + description: |- + MaxUnavailable sets the maximum number of rack pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. Setting percentage will + calculate against single rack's percentage of pods, not the entire datacenter. + x-kubernetes-int-or-string: true minReadySeconds: description: |- MinReadySeconds sets the minimum number of seconds for which a newly created pod should be ready without any of its containers crashing, for it to be considered available. Defaults to 5 seconds and is set in the StatefulSet spec. diff --git a/pkg/reconciliation/construct_statefulset.go b/pkg/reconciliation/construct_statefulset.go index f10d4733..ff67cc44 100644 --- a/pkg/reconciliation/construct_statefulset.go +++ b/pkg/reconciliation/construct_statefulset.go @@ -158,21 +158,8 @@ func newStatefulSetForCassandraDatacenter( result.Spec.ServiceName = sts.Spec.ServiceName } - if dc.Spec.CanaryUpgrade { - var partition int32 - if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > replicaCountInt32 { - partition = replicaCountInt32 - } else { - partition = replicaCountInt32 - dc.Spec.CanaryUpgradeCount - } - - strategy := appsv1.StatefulSetUpdateStrategy{ - Type: appsv1.RollingUpdateStatefulSetStrategyType, - RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{ - Partition: &partition, - }, - } - result.Spec.UpdateStrategy = strategy + if strategy := buildStatefulSetUpdateStrategy(dc, replicaCountInt32); strategy != nil { + result.Spec.UpdateStrategy = *strategy } if dc.Spec.MinReadySeconds != nil { @@ -185,6 +172,31 @@ func newStatefulSetForCassandraDatacenter( return result, nil } +func buildStatefulSetUpdateStrategy(dc *api.CassandraDatacenter, replicaCount int32) *appsv1.StatefulSetUpdateStrategy { + if !dc.Spec.CanaryUpgrade && dc.Spec.MaxUnavailable == nil { + return nil + } + + rollingUpdate := &appsv1.RollingUpdateStatefulSetStrategy{ + MaxUnavailable: dc.Spec.MaxUnavailable, + } + + if dc.Spec.CanaryUpgrade { + var partition int32 + if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > replicaCount { + partition = replicaCount + } else { + partition = replicaCount - dc.Spec.CanaryUpgradeCount + } + rollingUpdate.Partition = &partition + } + + return &appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: rollingUpdate, + } +} + func legacyInternodeMount(dc *api.CassandraDatacenter, sts *appsv1.StatefulSet) bool { if serverconfig.LegacyInternodeEnabled(dc) { return true diff --git a/pkg/reconciliation/construct_statefulset_test.go b/pkg/reconciliation/construct_statefulset_test.go index 28061926..50f4ec6b 100644 --- a/pkg/reconciliation/construct_statefulset_test.go +++ b/pkg/reconciliation/construct_statefulset_test.go @@ -14,6 +14,7 @@ import ( "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" @@ -770,6 +771,95 @@ func TestMinReadySecondsChange(t *testing.T) { assert.Equal(int32(10), sts.Spec.MinReadySeconds) } +func TestMaxUnavailableChange(t *testing.T) { + tests := []struct { + name string + maxUnavailable intstr.IntOrString + expectedRolling *appsv1.RollingUpdateStatefulSetStrategy + }{ + { + name: "integer", + maxUnavailable: intstr.FromInt32(1), + expectedRolling: &appsv1.RollingUpdateStatefulSetStrategy{ + MaxUnavailable: ptr.To(intstr.FromInt32(1)), + }, + }, + { + name: "percentage", + maxUnavailable: intstr.Parse("25%"), + expectedRolling: &appsv1.RollingUpdateStatefulSetStrategy{ + MaxUnavailable: ptr.To(intstr.Parse("25%")), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dc := &api.CassandraDatacenter{ + Spec: api.CassandraDatacenterSpec{ + ClusterName: "test", + ServerType: "cassandra", + ServerVersion: "4.0.7", + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{}, + }, + Racks: []api.Rack{ + { + Name: "r1", + }, + }, + PodTemplateSpec: &corev1.PodTemplateSpec{}, + MaxUnavailable: ptr.To(tt.maxUnavailable), + }, + } + + sts, err := newStatefulSetForCassandraDatacenter(nil, dc.Spec.Racks[0].Name, dc, 3, imageRegistry) + require.NoError(t, err, "failed to build statefulset") + + expectedStrategy := appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: tt.expectedRolling, + } + assert.Equal(t, expectedStrategy, sts.Spec.UpdateStrategy) + }) + } +} + +func TestMaxUnavailableMergedWithCanaryUpgrade(t *testing.T) { + dc := &api.CassandraDatacenter{ + Spec: api.CassandraDatacenterSpec{ + ClusterName: "test", + ServerType: "cassandra", + ServerVersion: "4.0.7", + CanaryUpgrade: true, + CanaryUpgradeCount: 1, + MaxUnavailable: ptr.To(intstr.Parse("25%")), + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{}, + }, + Racks: []api.Rack{ + { + Name: "r1", + }, + }, + PodTemplateSpec: &corev1.PodTemplateSpec{}, + }, + } + + sts, err := newStatefulSetForCassandraDatacenter(nil, dc.Spec.Racks[0].Name, dc, 3, imageRegistry) + require.NoError(t, err, "failed to build statefulset") + + expectedStrategy := appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{ + Partition: ptr.To(int32(2)), + MaxUnavailable: ptr.To(intstr.Parse("25%")), + }, + } + + assert.Equal(t, expectedStrategy, sts.Spec.UpdateStrategy) +} + func TestAddManagementApiServerSecurity(t *testing.T) { require := require.New(t) dc := &api.CassandraDatacenter{ diff --git a/pkg/reconciliation/constructor.go b/pkg/reconciliation/constructor.go index 10ffe6be..b1dc6be3 100644 --- a/pkg/reconciliation/constructor.go +++ b/pkg/reconciliation/constructor.go @@ -23,6 +23,27 @@ import ( // newPodDisruptionBudgetForDatacenter creates a PodDisruptionBudget object for the Datacenter func newPodDisruptionBudgetForDatacenter(dc *api.CassandraDatacenter) *policyv1.PodDisruptionBudget { minAvailable := intstr.FromInt(int(dc.Spec.Size - 1)) + + if dc.Spec.MaxUnavailable != nil { + racks := dc.GetRacks() + rackNodeCounts := api.SplitRacks(int(dc.Spec.Size), len(racks)) + maxRackNodeCount := 0 + for _, rackNodeCount := range rackNodeCounts { + if rackNodeCount > maxRackNodeCount { + maxRackNodeCount = rackNodeCount + } + } + + if maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(dc.Spec.MaxUnavailable, maxRackNodeCount, true); err == nil { + if maxUnavailable > maxRackNodeCount { + maxUnavailable = maxRackNodeCount + } + calculatedMinAvailable := int(dc.Spec.Size) - maxUnavailable + minAvailable = intstr.FromInt(calculatedMinAvailable) + } + // If err was not nil, we'll stick to the original minAvailable of size-1 + } + labels := dc.GetDatacenterLabels() oplabels.AddOperatorLabels(labels, dc) selectorLabels := dc.GetDatacenterLabels() diff --git a/pkg/reconciliation/constructor_test.go b/pkg/reconciliation/constructor_test.go index eee018f8..b2166ba1 100644 --- a/pkg/reconciliation/constructor_test.go +++ b/pkg/reconciliation/constructor_test.go @@ -6,6 +6,8 @@ import ( api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" ) func TestPodDisruptionBudget(t *testing.T) { @@ -29,3 +31,47 @@ func TestPodDisruptionBudget(t *testing.T) { assert.Equal("dc1", pdb.Spec.Selector.MatchLabels["cassandra.datastax.com/datacenter"]) assert.Equal(pdb.Spec.MinAvailable.IntVal, dc.Spec.Size-1) } + +func TestPodDisruptionBudgetIntMaxUnavailable(t *testing.T) { + assert := assert.New(t) + + dc := &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dc1", + Namespace: "test", + }, + Spec: api.CassandraDatacenterSpec{ + Size: 6, + MaxUnavailable: ptr.To(intstr.FromInt(2)), + }, + } + + pdb := newPodDisruptionBudgetForDatacenter(dc) + assert.Equal(int32(4), pdb.Spec.MinAvailable.IntVal) +} + +func TestPodDisruptionBudgetPercentageMaxUnavailable(t *testing.T) { + assert := assert.New(t) + + dc := &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dc1", + Namespace: "test", + }, + Spec: api.CassandraDatacenterSpec{ + Size: 6, + Racks: []api.Rack{ + {Name: "rack1"}, + {Name: "rack2"}, + }, + MaxUnavailable: ptr.To(intstr.Parse("50%")), + }, + } + + pdb := newPodDisruptionBudgetForDatacenter(dc) + assert.Equal(int32(4), pdb.Spec.MinAvailable.IntVal) // This was roundup + + dc.Spec.MaxUnavailable = ptr.To(intstr.Parse("100%")) + pdb = newPodDisruptionBudgetForDatacenter(dc) + assert.Equal(int32(3), pdb.Spec.MinAvailable.IntVal) +}