diff --git a/CHANGELOG.md b/CHANGELOG.md index f17038a7f..98d308f0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [ENHANCEMENT] [#912](https://github.com/k8ssandra/cass-operator/issues/912) Add new webhook validations for maxUnavailable string format as well as PVC sizes +* [ENHANCEMENT] [#902](https://github.com/k8ssandra/cass-operator/issues/902) If scaling down or scaling up process is still ongoing, the webhook will prevent changing the cluster size. + ## v1.30.0 * [CHANGE] [#905](https://github.com/k8ssandra/cass-operator/issues/905) Relax the ServerVersion checks to structure only without separating OSS/DSE/HCD. diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index df0e23f4e..62f23758d 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -88,6 +88,9 @@ const ( // EnableParallelCleanupWithinRackAnnotation speeds up post-scale-out cleanup by processing nodes in parallel within a rack. EnableParallelCleanupWithinRackAnnotation = "cassandra.datastax.com/enable-parallel-cleanup-within-rack" + // BypassWebhookValidationsAnnotation allows bypassing CassandraDatacenter update webhook validations + BypassWebhookValidationsAnnotation = "cassandra.datastax.com/bypass-webhook-validations" + AllowUpdateAlways AllowUpdateType = "always" AllowUpdateOnce AllowUpdateType = "once" diff --git a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go index 501a592a0..99766b2a4 100644 --- a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go +++ b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook.go @@ -12,6 +12,7 @@ import ( apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" @@ -19,6 +20,7 @@ import ( "github.com/google/go-cmp/cmp" api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + corev1 "k8s.io/api/core/v1" ) // log is for logging in this package. @@ -85,6 +87,13 @@ func (v *CassandraDatacenterCustomValidator) ValidateUpdate(ctx context.Context, log.Info("Validation for CassandraDatacenter upon update", "name", dc.GetName()) + if metav1.HasAnnotation(dc.ObjectMeta, api.BypassWebhookValidationsAnnotation) && + dc.Annotations[api.BypassWebhookValidationsAnnotation] == "true" { + log.Info("Webhook validations bypassed with annotation") + + return nil, nil + } + if err := ValidateSingleDatacenter(dc); err != nil { return nil, err } @@ -134,6 +143,13 @@ func ValidateSingleDatacenter(dc *api.CassandraDatacenter) error { return attemptedTo("define config dse-yaml with %s", serverStr) } + if dc.Spec.MaxUnavailable != nil { + maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(dc.Spec.MaxUnavailable, int(dc.Spec.Size), true) + if err != nil || maxUnavailable < 0 { + return attemptedTo("use invalid maxUnavailable value '%s'", dc.Spec.MaxUnavailable.String()) + } + } + // if using multiple nodes per worker, requests and limits should be set for both cpu and memory if dc.Spec.AllowMultipleNodesPerWorker { if dc.Spec.Resources.Requests.Cpu().IsZero() || @@ -180,6 +196,15 @@ func ValidateDatacenterFieldChanges(oldDc *api.CassandraDatacenter, newDc *api.C oldClaimSpec := oldDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec.DeepCopy() newClaimSpec := newDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec.DeepCopy() + if oldClaimSpec != nil && newClaimSpec != nil { + oldStorageRequest := oldClaimSpec.Resources.Requests[corev1.ResourceStorage] + newStorageRequest := newClaimSpec.Resources.Requests[corev1.ResourceStorage] + + if oldStorageRequest.Cmp(newStorageRequest) > 0 { + return attemptedTo( + "shrink storageConfig.CassandraDataVolumeClaimSpec from %s to %s", oldStorageRequest.String(), newStorageRequest.String()) + } + } // CassandraDataVolumeClaimSpec changes are disallowed if metav1.HasAnnotation(newDc.ObjectMeta, api.AllowStorageChangesAnnotation) && newDc.Annotations[api.AllowStorageChangesAnnotation] == "true" { @@ -192,6 +217,17 @@ func ValidateDatacenterFieldChanges(oldDc *api.CassandraDatacenter, newDc *api.C return attemptedTo("change storageConfig.CassandraDataVolumeClaimSpec, diff: %s", pvcSourceDiff) } + if oldDc.Spec.Size != newDc.Spec.Size { + if oldDc.GetConditionStatus(api.DatacenterScalingUp) == corev1.ConditionTrue { + return attemptedTo("change size while datacenter is still scaling up") + } + + if oldDc.GetConditionStatus(api.DatacenterScalingDown) == corev1.ConditionTrue { + // Here we don't want to allow any changes as scaling down is a lot heavier operation + return attemptedTo("change size while datacenter is still scaling down") + } + } + // Topology changes - Racks // - Rack Name and Zone changes are disallowed. // - Removing racks is not supported. diff --git a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go index 37aeae866..b805a835a 100644 --- a/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go +++ b/internal/webhooks/cassandra/v1beta1/cassandradatacenter_webhook_test.go @@ -4,6 +4,7 @@ package v1beta1 import ( + "context" "encoding/json" "strings" "testing" @@ -11,6 +12,7 @@ import ( api "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" corev1 "k8s.io/api/core/v1" @@ -127,6 +129,36 @@ func Test_ValidateSingleDatacenter(t *testing.T) { }, errString: "", }, + { + name: "Valid maxUnavailable percentage", + dc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + ServerType: "cassandra", + ServerVersion: "5.0.0", + Size: 3, + MaxUnavailable: ptr.To(intstr.Parse("50%")), + }, + }, + errString: "", + }, + { + name: "Invalid maxUnavailable string", + dc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + ServerType: "cassandra", + ServerVersion: "5.0.0", + Size: 3, + MaxUnavailable: ptr.To(intstr.FromString("invalid")), + }, + }, + errString: "attempted to use invalid maxUnavailable value 'invalid'", + }, { name: "Dse Workloads in Cassandra Invalid", dc: &api.CassandraDatacenter{ @@ -676,6 +708,84 @@ func Test_ValidateDatacenterFieldChanges(t *testing.T) { }, errString: "", }, + { + name: "storage requests size shrink is rejected even when storage changes are allowed", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("2Gi")}, + }, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + api.AllowStorageChangesAnnotation: "true", + }, + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("1Gi")}, + }, + }, + }, + }, + }, + errString: "shrink storageConfig.CassandraDataVolumeClaimSpec from 2Gi to 1Gi", + }, + { + name: "storage requests have different unit but same size", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("1024Mi")}, + }, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + api.AllowStorageChangesAnnotation: "true", + }, + }, + Spec: api.CassandraDatacenterSpec{ + StorageConfig: api.StorageConfig{ + CassandraDataVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: storageName, + AccessModes: []corev1.PersistentVolumeAccessMode{"ReadWriteOnce"}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{"storage": resource.MustParse("1Gi")}, + }, + }, + }, + }, + }, + errString: "", + }, { name: "Removing a rack", oldDc: &api.CassandraDatacenter{ @@ -732,6 +842,74 @@ func Test_ValidateDatacenterFieldChanges(t *testing.T) { }, errString: "", }, + { + name: "Scaling is rejected while datacenter is still scaling up", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 3, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingUp, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + }, + errString: "change size while datacenter is still scaling up", + }, + { + name: "Scaling is rejected while datacenter is still scaling down", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingDown, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 3, + }, + }, + errString: "change size while datacenter is still scaling down", + }, { name: "Changed a rack name", oldDc: &api.CassandraDatacenter{ @@ -920,6 +1098,103 @@ func Test_ValidateDatacenterFieldChanges(t *testing.T) { } } +func TestValidateUpdateBypassAnnotation(t *testing.T) { + validator := &CassandraDatacenterCustomValidator{} + + tests := []struct { + name string + oldDc *api.CassandraDatacenter + newDc *api.CassandraDatacenter + errString string + }{ + { + name: "Scaling up even more is rejected while datacenter is still scaling up without bypass annotation", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingUp, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 9, + }, + }, + errString: "change size while datacenter is still scaling up", + }, + { + name: "Scaling up even more is allowed if bypass is set while datacenter is still scaling up from previous operation", + oldDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 6, + }, + Status: api.CassandraDatacenterStatus{ + Conditions: []api.DatacenterCondition{ + { + Type: api.DatacenterScalingUp, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + newDc: &api.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + api.BypassWebhookValidationsAnnotation: "true", + }, + }, + Spec: api.CassandraDatacenterSpec{ + Racks: []api.Rack{{ + Name: "rack0", + }}, + Size: 9, + }, + }, + errString: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + warnings, err := validator.ValidateUpdate(context.Background(), tt.oldDc, tt.newDc) + assert.Empty(t, warnings) + + if tt.errString == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.errString) + } + }) + } +} + var fqlEnabledConfig string = `{"cassandra-yaml": { "full_query_logging_options": { "log_dir": "/var/log/cassandra/fql" diff --git a/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go b/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go index 6e6894106..936304788 100644 --- a/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go +++ b/tests/scale_up_stop_resume/scale_up_park_unpark_suite_test.go @@ -76,6 +76,8 @@ var _ = Describe(testName, func() { FormatOutput(json) ns.WaitForOutputAndLog(step, k, "true true true true", 1200) + ns.WaitForDatacenterReady(dcName) + step = "scale up to 5 nodes" json = "{\"spec\": {\"size\": 5}}" k = kubectl.PatchMerge(dcResource, json) @@ -89,6 +91,8 @@ var _ = Describe(testName, func() { FormatOutput(json) ns.WaitForOutputAndLog(step, k, "true true true true true", 1200) + ns.WaitForDatacenterReady(dcName) + step = "stopping the dc" json = "{\"spec\": {\"stopped\": true}}" k = kubectl.PatchMerge(dcResource, json)