Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti

## unreleased

* [FEATURE] [#893](https://github.com/k8ssandra/cass-operator/issues/893) Add support for maxUnavailable (Kubernetes 1.35 and up). This allows to make changes to the Cassandra pods in parallel, thus speeding up changes in larger clusters. Allows integer or percentage setting, but will never target more than one rack at a time.
* [ENHANCEMENT] [#888](https://github.com/k8ssandra/cass-operator/issues/888) Add new metrics around all calls to the mgmt-api. This allows to track if some calls are taking longer to execute than expected.

## v1.29.1
Expand Down
6 changes: 6 additions & 0 deletions apis/cassandra/v1beta1/cassandradatacenter_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/validation"
)

Expand Down Expand Up @@ -281,6 +282,11 @@ type CassandraDatacenterSpec struct {
// Setting to 0 might cause multiple Cassandra pods to restart at the same time despite PodDisruptionBudget settings.
MinReadySeconds *int32 `json:"minReadySeconds,omitempty"`

// MaxUnavailable sets the maximum number of rack pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. Setting percentage will
// calculate against single rack's percentage of pods, not the entire datacenter.
// +kubebuilder:validation:XIntOrString
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`

// ReadOnlyRootFilesystem makes the cassandra container to be run with a read-only root filesystem. This is enabled by default when using OSS Cassandra 4.1.0 and or newer, DSE 6.8 and newer (from datastax/dse-mgmtapi-6_8 repository) or HCD.
// If serverImage override is used, this setting defaults to false.
ReadOnlyRootFilesystem *bool `json:"readOnlyRootFilesystem,omitempty"`
Expand Down
6 changes: 6 additions & 0 deletions apis/cassandra/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,14 @@ spec:
- serverSecretName
type: object
type: object
maxUnavailable:
anyOf:
- type: integer
- type: string
description: |-
MaxUnavailable sets the maximum number of rack pods that can be modified simultaneously during an update. This can at most target a single rack, so values higher than rack size will have no effect. Requires Kubernetes 1.35 or higher. Setting percentage will
calculate against single rack's percentage of pods, not the entire datacenter.
x-kubernetes-int-or-string: true
minReadySeconds:
description: |-
MinReadySeconds sets the minimum number of seconds for which a newly created pod should be ready without any of its containers crashing, for it to be considered available. Defaults to 5 seconds and is set in the StatefulSet spec.
Expand Down
42 changes: 27 additions & 15 deletions pkg/reconciliation/construct_statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,21 +158,8 @@ func newStatefulSetForCassandraDatacenter(
result.Spec.ServiceName = sts.Spec.ServiceName
}

if dc.Spec.CanaryUpgrade {
var partition int32
if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > replicaCountInt32 {
partition = replicaCountInt32
} else {
partition = replicaCountInt32 - dc.Spec.CanaryUpgradeCount
}

strategy := appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{
Partition: &partition,
},
}
result.Spec.UpdateStrategy = strategy
if strategy := buildStatefulSetUpdateStrategy(dc, replicaCountInt32); strategy != nil {
result.Spec.UpdateStrategy = *strategy
}

if dc.Spec.MinReadySeconds != nil {
Expand All @@ -185,6 +172,31 @@ func newStatefulSetForCassandraDatacenter(
return result, nil
}

func buildStatefulSetUpdateStrategy(dc *api.CassandraDatacenter, replicaCount int32) *appsv1.StatefulSetUpdateStrategy {
if !dc.Spec.CanaryUpgrade && dc.Spec.MaxUnavailable == nil {
return nil
}

rollingUpdate := &appsv1.RollingUpdateStatefulSetStrategy{
MaxUnavailable: dc.Spec.MaxUnavailable,
}

if dc.Spec.CanaryUpgrade {
var partition int32
if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > replicaCount {
partition = replicaCount
} else {
partition = replicaCount - dc.Spec.CanaryUpgradeCount
}
rollingUpdate.Partition = &partition
}

return &appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
RollingUpdate: rollingUpdate,
}
}

func legacyInternodeMount(dc *api.CassandraDatacenter, sts *appsv1.StatefulSet) bool {
if serverconfig.LegacyInternodeEnabled(dc) {
return true
Expand Down
90 changes: 90 additions & 0 deletions pkg/reconciliation/construct_statefulset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"

api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1"
Expand Down Expand Up @@ -770,6 +771,95 @@ func TestMinReadySecondsChange(t *testing.T) {
assert.Equal(int32(10), sts.Spec.MinReadySeconds)
}

func TestMaxUnavailableChange(t *testing.T) {
tests := []struct {
name string
maxUnavailable intstr.IntOrString
expectedRolling *appsv1.RollingUpdateStatefulSetStrategy
}{
{
name: "integer",
maxUnavailable: intstr.FromInt32(1),
expectedRolling: &appsv1.RollingUpdateStatefulSetStrategy{
MaxUnavailable: ptr.To(intstr.FromInt32(1)),
},
},
{
name: "percentage",
maxUnavailable: intstr.Parse("25%"),
expectedRolling: &appsv1.RollingUpdateStatefulSetStrategy{
MaxUnavailable: ptr.To(intstr.Parse("25%")),
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dc := &api.CassandraDatacenter{
Spec: api.CassandraDatacenterSpec{
ClusterName: "test",
ServerType: "cassandra",
ServerVersion: "4.0.7",
StorageConfig: api.StorageConfig{
CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{},
},
Racks: []api.Rack{
{
Name: "r1",
},
},
PodTemplateSpec: &corev1.PodTemplateSpec{},
MaxUnavailable: ptr.To(tt.maxUnavailable),
},
}

sts, err := newStatefulSetForCassandraDatacenter(nil, dc.Spec.Racks[0].Name, dc, 3, imageRegistry)
require.NoError(t, err, "failed to build statefulset")

expectedStrategy := appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
RollingUpdate: tt.expectedRolling,
}
assert.Equal(t, expectedStrategy, sts.Spec.UpdateStrategy)
})
}
}

func TestMaxUnavailableMergedWithCanaryUpgrade(t *testing.T) {
dc := &api.CassandraDatacenter{
Spec: api.CassandraDatacenterSpec{
ClusterName: "test",
ServerType: "cassandra",
ServerVersion: "4.0.7",
CanaryUpgrade: true,
CanaryUpgradeCount: 1,
MaxUnavailable: ptr.To(intstr.Parse("25%")),
StorageConfig: api.StorageConfig{
CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{},
},
Racks: []api.Rack{
{
Name: "r1",
},
},
PodTemplateSpec: &corev1.PodTemplateSpec{},
},
}

sts, err := newStatefulSetForCassandraDatacenter(nil, dc.Spec.Racks[0].Name, dc, 3, imageRegistry)
require.NoError(t, err, "failed to build statefulset")

expectedStrategy := appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{
Partition: ptr.To(int32(2)),
MaxUnavailable: ptr.To(intstr.Parse("25%")),
},
}

assert.Equal(t, expectedStrategy, sts.Spec.UpdateStrategy)
}

func TestAddManagementApiServerSecurity(t *testing.T) {
require := require.New(t)
dc := &api.CassandraDatacenter{
Expand Down
21 changes: 21 additions & 0 deletions pkg/reconciliation/constructor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@ import (
// newPodDisruptionBudgetForDatacenter creates a PodDisruptionBudget object for the Datacenter
func newPodDisruptionBudgetForDatacenter(dc *api.CassandraDatacenter) *policyv1.PodDisruptionBudget {
minAvailable := intstr.FromInt(int(dc.Spec.Size - 1))

if dc.Spec.MaxUnavailable != nil {
racks := dc.GetRacks()
rackNodeCounts := api.SplitRacks(int(dc.Spec.Size), len(racks))
maxRackNodeCount := 0
for _, rackNodeCount := range rackNodeCounts {
if rackNodeCount > maxRackNodeCount {
maxRackNodeCount = rackNodeCount
}
}

if maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(dc.Spec.MaxUnavailable, maxRackNodeCount, true); err == nil {
if maxUnavailable > maxRackNodeCount {
maxUnavailable = maxRackNodeCount
}
calculatedMinAvailable := int(dc.Spec.Size) - maxUnavailable
minAvailable = intstr.FromInt(calculatedMinAvailable)
}
// If err was not nil, we'll stick to the original minAvailable of size-1
}

labels := dc.GetDatacenterLabels()
oplabels.AddOperatorLabels(labels, dc)
selectorLabels := dc.GetDatacenterLabels()
Expand Down
46 changes: 46 additions & 0 deletions pkg/reconciliation/constructor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
)

func TestPodDisruptionBudget(t *testing.T) {
Expand All @@ -29,3 +31,47 @@ func TestPodDisruptionBudget(t *testing.T) {
assert.Equal("dc1", pdb.Spec.Selector.MatchLabels["cassandra.datastax.com/datacenter"])
assert.Equal(pdb.Spec.MinAvailable.IntVal, dc.Spec.Size-1)
}

func TestPodDisruptionBudgetIntMaxUnavailable(t *testing.T) {
assert := assert.New(t)

dc := &api.CassandraDatacenter{
ObjectMeta: metav1.ObjectMeta{
Name: "dc1",
Namespace: "test",
},
Spec: api.CassandraDatacenterSpec{
Size: 6,
MaxUnavailable: ptr.To(intstr.FromInt(2)),
},
}

pdb := newPodDisruptionBudgetForDatacenter(dc)
assert.Equal(int32(4), pdb.Spec.MinAvailable.IntVal)
}

func TestPodDisruptionBudgetPercentageMaxUnavailable(t *testing.T) {
assert := assert.New(t)

dc := &api.CassandraDatacenter{
ObjectMeta: metav1.ObjectMeta{
Name: "dc1",
Namespace: "test",
},
Spec: api.CassandraDatacenterSpec{
Size: 6,
Racks: []api.Rack{
{Name: "rack1"},
{Name: "rack2"},
},
MaxUnavailable: ptr.To(intstr.Parse("50%")),
},
}

pdb := newPodDisruptionBudgetForDatacenter(dc)
assert.Equal(int32(4), pdb.Spec.MinAvailable.IntVal) // This was roundup

dc.Spec.MaxUnavailable = ptr.To(intstr.Parse("100%"))
pdb = newPodDisruptionBudgetForDatacenter(dc)
assert.Equal(int32(3), pdb.Spec.MinAvailable.IntVal)
}
Loading