diff --git a/.github/workflows/kindIntegTest.yml b/.github/workflows/kindIntegTest.yml index b1f4f031..9c173a2d 100644 --- a/.github/workflows/kindIntegTest.yml +++ b/.github/workflows/kindIntegTest.yml @@ -59,7 +59,6 @@ jobs: strategy: matrix: integration_test: - - upgrade_operator # Test is not setup to run against 4.0 - additional_seeds #TODO: Fails against C* 4.0, fix in https://github.com/k8ssandra/cass-operator/issues/459 - scale_down_unbalanced_racks #TODO: Fails against C* 4.0 and DSE 6.8, fix in https://github.com/k8ssandra/cass-operator/issues/459 runs-on: ubuntu-latest @@ -187,6 +186,7 @@ jobs: - superuser-secret-provided - test_bad_config_and_fix - test_mtls_mgmt_api + - upgrade_operator # More than 3 workers tests: - add_racks #- additional_seeds #TODO: Fails against C* 4.0, fix in https://github.com/k8ssandra/cass-operator/issues/459 diff --git a/.github/workflows/workflow-integration-tests.yaml b/.github/workflows/workflow-integration-tests.yaml index 87f3c485..2cd7e693 100644 --- a/.github/workflows/workflow-integration-tests.yaml +++ b/.github/workflows/workflow-integration-tests.yaml @@ -63,7 +63,6 @@ jobs: version: - "3.11.14" integration_test: - - upgrade_operator - cdc_successful - additional_seeds - scale_down_unbalanced_racks @@ -205,6 +204,7 @@ jobs: - cluster_wide_install - config_change - config_secret + - upgrade_operator # Unnecessary to run against multiple versions #- multi_cluster_management # cluster_wide_install verifies the same thing #- oss_test_all_the_things # This is now the smoke test, see kind_smoke_tests job - scale_down diff --git a/CHANGELOG.md b/CHANGELOG.md index 2862bf48..e361ce20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti ## unreleased +* [CHANGE] [#566](https://github.com/k8ssandra/cass-operator/issues/566) BREAKING: StatefulSets will no longer be automatically updated if CassandraDatacenter is not modified, unless an annotation "cassandra.datastax.com/autoupdate-spec" is set to the CassandraDatacenter with value "always" or "once". This means users of config secret should set this variable to "always" to keep their existing behavior. For other users, this means that for example the upgrades of operator will no longer automatically apply updated settings or system-logger image. The benefit is that updating the operator no longer causes the cluster to have a rolling restart. A new condition to indicate such change could be necessary is called "RequiresUpdate" and it will be set to True until the next refresh of reconcile has happened. * [CHANGE] [#618](https://github.com/k8ssandra/cass-operator/issues/618) Update dependencies to support controller-runtime 0.17.2, modify required parts. * [ENHANCEMENT] [#628](https://github.com/k8ssandra/cass-operator/issues/628) Replace pod task can replace any node, including those that have crashed * [ENHANCEMENT] [#532](https://github.com/k8ssandra/cass-operator/issues/532) Instead of rejecting updates/creates with deprecated fields, return kubectl warnings. diff --git a/Makefile b/Makefile index 1ea11a10..46065bff 100644 --- a/Makefile +++ b/Makefile @@ -139,9 +139,9 @@ test: manifests generate fmt vet lint envtest ## Run tests. integ-test: kustomize cert-manager helm ## Run integration tests from directory M_INTEG_DIR or set M_INTEG_DIR=all to run all the integration tests. ifeq ($(M_INTEG_DIR), all) # Run all the tests (exclude kustomize & testdata directories) - cd tests && go test -v ./... -timeout 300m --ginkgo.progress --ginkgo.v + cd tests && go test -v ./... -timeout 300m --ginkgo.show-node-events --ginkgo.v else - cd tests/${M_INTEG_DIR} && go test -v ./... -timeout 300m --ginkgo.progress --ginkgo.v + cd tests/${M_INTEG_DIR} && go test -v ./... -timeout 300m --ginkgo.show-node-events --ginkgo.v endif .PHONY: version diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index 97a52be7..e2d17732 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -65,6 +65,12 @@ const ( // cluster has gone through scale up operation. NoAutomatedCleanupAnnotation = "cassandra.datastax.com/no-cleanup" + // UpdateAllowedAnnotation marks the Datacenter to allow upgrades to StatefulSets Spec even if CassandraDatacenter object was not modified. Allowed values are "once" and "always" + UpdateAllowedAnnotation = "cassandra.datastax.com/autoupdate-spec" + + AllowUpdateAlways AllowUpdateType = "always" + AllowUpdateOnce AllowUpdateType = "once" + CassNodeState = "cassandra.datastax.com/node-state" ProgressUpdating ProgressState = "Updating" @@ -74,6 +80,8 @@ const ( DefaultInternodePort = 7000 ) +type AllowUpdateType string + // ProgressState - this type exists so there's no chance of pushing random strings to our progress status type ProgressState string @@ -379,6 +387,7 @@ const ( DatacenterRollingRestart DatacenterConditionType = "RollingRestart" DatacenterValid DatacenterConditionType = "Valid" DatacenterDecommission DatacenterConditionType = "Decommission" + DatacenterRequiresUpdate DatacenterConditionType = "RequiresUpdate" // DatacenterHealthy indicates if QUORUM can be reached from all deployed nodes. // If this check fails, certain operations such as scaling up will not proceed. @@ -961,3 +970,7 @@ func (dc *CassandraDatacenter) DatacenterName() string { func (dc *CassandraDatacenter) UseClientImage() bool { return dc.Spec.ServerType == "cassandra" && semver.Compare(fmt.Sprintf("v%s", dc.Spec.ServerVersion), "v4.1.0") >= 0 } + +func (dc *CassandraDatacenter) GenerationChanged() bool { + return dc.Status.ObservedGeneration < dc.Generation +} diff --git a/apis/cassandra/v1beta1/cassandradatacenter_webhook.go b/apis/cassandra/v1beta1/cassandradatacenter_webhook.go index b697a4c4..bacf3379 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_webhook.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_webhook.go @@ -25,6 +25,7 @@ import ( "github.com/k8ssandra/cass-operator/pkg/images" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -336,6 +337,13 @@ func ValidateServiceLabelsAndAnnotations(dc CassandraDatacenter) error { } } + if metav1.HasAnnotation(dc.ObjectMeta, UpdateAllowedAnnotation) { + updateType := AllowUpdateType(dc.Annotations[UpdateAllowedAnnotation]) + if updateType != AllowUpdateAlways && updateType != AllowUpdateOnce { + return attemptedTo("use %s annotation with value other than 'once' or 'always'", UpdateAllowedAnnotation) + } + } + return nil } diff --git a/apis/cassandra/v1beta1/webhook_test.go b/apis/cassandra/v1beta1/webhook_test.go index 925a74d7..c7210ce1 100644 --- a/apis/cassandra/v1beta1/webhook_test.go +++ b/apis/cassandra/v1beta1/webhook_test.go @@ -318,6 +318,38 @@ func Test_ValidateSingleDatacenter(t *testing.T) { }, errString: "configure DatacenterService with reserved annotations and/or labels (prefixes cassandra.datastax.com and/or k8ssandra.io)", }, + { + name: "Allow upgrade should not accept invalid values", + dc: &CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + "cassandra.datastax.com/autoupdate-spec": "invalid", + }, + }, + Spec: CassandraDatacenterSpec{ + ServerType: "dse", + ServerVersion: "6.8.42", + }, + }, + errString: "use cassandra.datastax.com/autoupdate-spec annotation with value other than 'once' or 'always'", + }, + { + name: "Allow upgrade should accept once value", + dc: &CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exampleDC", + Annotations: map[string]string{ + "cassandra.datastax.com/autoupdate-spec": "once", + }, + }, + Spec: CassandraDatacenterSpec{ + ServerType: "dse", + ServerVersion: "6.8.42", + }, + }, + errString: "", + }, } for _, tt := range tests { diff --git a/apis/control/v1alpha1/cassandratask_types.go b/apis/control/v1alpha1/cassandratask_types.go index b54eb213..d02703b3 100644 --- a/apis/control/v1alpha1/cassandratask_types.go +++ b/apis/control/v1alpha1/cassandratask_types.go @@ -80,6 +80,7 @@ const ( CommandMove CassandraCommand = "move" CommandGarbageCollect CassandraCommand = "garbagecollect" CommandFlush CassandraCommand = "flush" + CommandRefresh CassandraCommand = "refresh" ) type CassandraJob struct { @@ -167,6 +168,8 @@ const ( JobFailed JobConditionType = "Failed" // JobRunning means the job is currently executing JobRunning JobConditionType = "Running" + // DatacenterUpdated + DatacenterUpdated JobConditionType = "DatacenterUpdated" ) //+kubebuilder:object:root=true diff --git a/internal/controllers/cassandra/cassandradatacenter_controller.go b/internal/controllers/cassandra/cassandradatacenter_controller.go index 7226984a..60376c41 100644 --- a/internal/controllers/cassandra/cassandradatacenter_controller.go +++ b/internal/controllers/cassandra/cassandradatacenter_controller.go @@ -179,7 +179,7 @@ func (r *CassandraDatacenterReconciler) SetupWithManager(mgr ctrl.Manager) error // Create a new managed controller builder c := ctrl.NewControllerManagedBy(mgr). Named("cassandradatacenter_controller"). - For(&api.CassandraDatacenter{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + For(&api.CassandraDatacenter{}, builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.AnnotationChangedPredicate{}))). // We might want to consider annotation filtering Owns(&appsv1.StatefulSet{}, builder.WithPredicates(managedByCassandraOperatorPredicate)). Owns(&policyv1.PodDisruptionBudget{}, builder.WithPredicates(managedByCassandraOperatorPredicate)). Owns(&corev1.Service{}, builder.WithPredicates(managedByCassandraOperatorPredicate)) diff --git a/internal/controllers/control/cassandratask_controller.go b/internal/controllers/control/cassandratask_controller.go index 14f25e30..45bd78fd 100644 --- a/internal/controllers/control/cassandratask_controller.go +++ b/internal/controllers/control/cassandratask_controller.go @@ -309,12 +309,20 @@ JobDefinition: flush(taskConfig) case api.CommandGarbageCollect: gc(taskConfig) + case api.CommandRefresh: + // This targets the Datacenter only + res, err = r.refreshDatacenter(ctx, dc, &cassTask) + if err != nil { + return ctrl.Result{}, err + } + completed = taskConfig.Completed + break JobDefinition default: err = fmt.Errorf("unknown job command: %s", job.Command) return ctrl.Result{}, err } - if !r.HasCondition(cassTask, api.JobRunning, metav1.ConditionTrue) { + if !r.HasCondition(&cassTask, api.JobRunning, metav1.ConditionTrue) { valid, errValidate := taskConfig.Validate() if errValidate != nil && valid { // Retry, this is a transient error @@ -423,7 +431,7 @@ func (r *CassandraTaskReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func (r *CassandraTaskReconciler) HasCondition(task api.CassandraTask, condition api.JobConditionType, status metav1.ConditionStatus) bool { +func (r *CassandraTaskReconciler) HasCondition(task *api.CassandraTask, condition api.JobConditionType, status metav1.ConditionStatus) bool { for _, cond := range task.Status.Conditions { if cond.Type == string(condition) { return cond.Status == status diff --git a/internal/controllers/control/cassandratask_controller_test.go b/internal/controllers/control/cassandratask_controller_test.go index 1b31d330..8e1396ef 100644 --- a/internal/controllers/control/cassandratask_controller_test.go +++ b/internal/controllers/control/cassandratask_controller_test.go @@ -10,6 +10,7 @@ import ( "github.com/k8ssandra/cass-operator/pkg/httphelper" + cassapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" api "github.com/k8ssandra/cass-operator/apis/control/v1alpha1" . "github.com/onsi/ginkgo/v2" @@ -683,7 +684,6 @@ var _ = Describe("CassandraTask controller tests", func() { AfterEach(func() { deleteDatacenter(testNamespaceName) - // Expect(k8sClient.Delete(context.TODO(), testDc)).Should(Succeed()) }) Context("Restart", func() { @@ -781,4 +781,48 @@ var _ = Describe("CassandraTask controller tests", func() { }) }) }) + Describe("Execute jobs against Datacenters", func() { + var testNamespaceName string + BeforeEach(func() { + testNamespaceName = fmt.Sprintf("test-task-%d", rand.Int31()) + By("create datacenter", createDatacenter(testDatacenterName, testNamespaceName)) + }) + + AfterEach(func() { + deleteDatacenter(testNamespaceName) + }) + + Context("Refresh", func() { + It("Adds an annotation if CassandraDatacenter does not have one and waits for completion", func() { + taskKey, task := buildTask(api.CommandRefresh, testNamespaceName) + Expect(k8sClient.Create(context.Background(), task)).Should(Succeed()) + + dc := &cassdcapi.CassandraDatacenter{} + Eventually(func() bool { + if err := k8sClient.Get(context.TODO(), types.NamespacedName{Name: testDatacenterName, Namespace: testNamespaceName}, dc); err != nil { + return false + } + if metav1.HasAnnotation(dc.ObjectMeta, cassapi.UpdateAllowedAnnotation) { + return dc.Annotations[cassapi.UpdateAllowedAnnotation] == "once" + } + return false + }, "5s", "50ms").Should(BeTrue()) + + delete(dc.Annotations, cassapi.UpdateAllowedAnnotation) + Expect(k8sClient.Update(context.Background(), dc)).Should(Succeed()) + + _ = waitForTaskCompletion(taskKey) + }) + It("Completes if autoupdate-spec is always allowed", func() { + dc := &cassdcapi.CassandraDatacenter{} + Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: testDatacenterName, Namespace: testNamespaceName}, dc)).To(Succeed()) + metav1.SetMetaDataAnnotation(&dc.ObjectMeta, cassapi.UpdateAllowedAnnotation, string(cassapi.AllowUpdateAlways)) + Expect(k8sClient.Update(context.Background(), dc)).Should(Succeed()) + + taskKey, task := buildTask(api.CommandRefresh, testNamespaceName) + Expect(k8sClient.Create(context.Background(), task)).Should(Succeed()) + _ = waitForTaskCompletion(taskKey) + }) + }) + }) }) diff --git a/internal/controllers/control/jobs.go b/internal/controllers/control/jobs.go index fec73abf..3fa41f7f 100644 --- a/internal/controllers/control/jobs.go +++ b/internal/controllers/control/jobs.go @@ -13,6 +13,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" cassapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" @@ -404,6 +405,45 @@ func compact(taskConfig *TaskConfiguration) { taskConfig.AsyncFunc = compactAsync } +// Refresh CassandraDatacenter + +func (r *CassandraTaskReconciler) refreshDatacenter(ctx context.Context, dc *cassapi.CassandraDatacenter, task *api.CassandraTask) (ctrl.Result, error) { + // If there's no "always" annotation, add "once" annotation and check that it's removed (that indicates finished) + if metav1.HasAnnotation(dc.ObjectMeta, cassapi.UpdateAllowedAnnotation) { + // No need to add anything, process is still going or it was always allowed + val := cassapi.AllowUpdateType(dc.Annotations[cassapi.UpdateAllowedAnnotation]) + if val == cassapi.AllowUpdateAlways { + // Nothing to do here, the autoupdate is set + return ctrl.Result{}, nil + } else { + // Still waiting for the refresh to happen + return ctrl.Result{RequeueAfter: JobRunningRequeue}, nil + } + } + + if r.HasCondition(task, api.DatacenterUpdated, metav1.ConditionTrue) { + // The refresh has completed, since the annotation is gone + return ctrl.Result{}, nil + } + + // Lets start the process + patch := client.MergeFrom(dc.DeepCopy()) + + metav1.SetMetaDataAnnotation(&dc.ObjectMeta, cassapi.UpdateAllowedAnnotation, string(cassapi.AllowUpdateOnce)) + + if err := r.Patch(ctx, dc, patch); err != nil { + return ctrl.Result{}, err + } + + taskPatch := client.MergeFrom(task.DeepCopy()) + if modified := SetCondition(task, api.DatacenterUpdated, metav1.ConditionTrue, "Datacenter updated to update spec once"); modified { + if err := r.Client.Status().Patch(ctx, task, taskPatch); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{RequeueAfter: JobRunningRequeue}, nil +} + // Common functions func isCassandraUp(pod *corev1.Pod) bool { diff --git a/pkg/reconciliation/constructor.go b/pkg/reconciliation/constructor.go index d8053671..bbd3dad5 100644 --- a/pkg/reconciliation/constructor.go +++ b/pkg/reconciliation/constructor.go @@ -10,6 +10,7 @@ import ( "github.com/k8ssandra/cass-operator/pkg/oplabels" "github.com/k8ssandra/cass-operator/pkg/utils" + corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -62,11 +63,23 @@ func setOperatorProgressStatus(rc *ReconciliationContext, newState api.ProgressS if rc.Datacenter.Status.DatacenterName == nil { rc.Datacenter.Status.DatacenterName = &rc.Datacenter.Spec.DatacenterName } + rc.setCondition(api.NewDatacenterCondition(api.DatacenterRequiresUpdate, corev1.ConditionFalse)) } if err := rc.Client.Status().Patch(rc.Ctx, rc.Datacenter, patch); err != nil { rc.ReqLogger.Error(err, "error updating the Cassandra Operator Progress state") return err } + // The allow-upgrade=once annotation is temporary and should be removed after first successful reconcile + if metav1.HasAnnotation(rc.Datacenter.ObjectMeta, api.UpdateAllowedAnnotation) && rc.Datacenter.Annotations[api.UpdateAllowedAnnotation] == string(api.AllowUpdateOnce) { + // remove the annotation + patch = client.MergeFrom(rc.Datacenter.DeepCopy()) + delete(rc.Datacenter.ObjectMeta.Annotations, api.UpdateAllowedAnnotation) + if err := rc.Client.Patch(rc.Ctx, rc.Datacenter, patch); err != nil { + rc.ReqLogger.Error(err, "error removing the allow-upgrade=once annotation") + return err + } + } + return nil } diff --git a/pkg/reconciliation/reconcile_racks.go b/pkg/reconciliation/reconcile_racks.go index 4f518e93..56804e76 100644 --- a/pkg/reconciliation/reconcile_racks.go +++ b/pkg/reconciliation/reconcile_racks.go @@ -166,6 +166,11 @@ func (rc *ReconciliationContext) CheckRackCreation() result.ReconcileResult { return result.Continue() } +func (rc *ReconciliationContext) UpdateAllowed() bool { + // HasAnnotation might require also checking if it's "once / always".. or then we need to validate those allowed values in the webhook + return rc.Datacenter.GenerationChanged() || metav1.HasAnnotation(rc.Datacenter.ObjectMeta, api.UpdateAllowedAnnotation) +} + func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { logger := rc.ReqLogger dc := rc.Datacenter @@ -199,7 +204,20 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult { return result.Error(err) } - if !utils.ResourcesHaveSameHash(statefulSet, desiredSts) { + if !utils.ResourcesHaveSameHash(statefulSet, desiredSts) && !rc.UpdateAllowed() { + logger. + WithValues("rackName", rackName). + Info("update is blocked, but statefulset needs an update. Marking datacenter as requiring update.") + dcPatch := client.MergeFrom(dc.DeepCopy()) + rc.setCondition(api.NewDatacenterCondition(api.DatacenterRequiresUpdate, corev1.ConditionTrue)) + if err := rc.Client.Status().Patch(rc.Ctx, dc, dcPatch); err != nil { + logger.Error(err, "error patching datacenter status for updating") + return result.Error(err) + } + return result.Continue() + } + + if !utils.ResourcesHaveSameHash(statefulSet, desiredSts) && rc.UpdateAllowed() { logger. WithValues("rackName", rackName). Info("statefulset needs an update") @@ -378,7 +396,6 @@ func (rc *ReconciliationContext) CheckRackForceUpgrade() result.ReconcileResult if err := rc.Client.Update(rc.Ctx, statefulSet); err != nil { if errors.IsInvalid(err) { if err = rc.deleteStatefulSet(statefulSet); err != nil { - // logger.Error(err, "Failed to delete the StatefulSet", "Invalid", errors.IsInvalid(err), "Forbidden", errors.IsForbidden(err)) return result.Error(err) } } else { diff --git a/pkg/reconciliation/reconcile_racks_test.go b/pkg/reconciliation/reconcile_racks_test.go index c1bf7fa3..6f9c28d9 100644 --- a/pkg/reconciliation/reconcile_racks_test.go +++ b/pkg/reconciliation/reconcile_racks_test.go @@ -24,6 +24,7 @@ import ( "github.com/k8ssandra/cass-operator/pkg/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -349,6 +350,34 @@ func TestCheckRackPodTemplate_CanaryUpgrade(t *testing.T) { assert.True(t, result.Completed()) } +func TestCheckRackPodTemplate_GenerationCheck(t *testing.T) { + assert := assert.New(t) + rc, _, cleanpMockSrc := setupTest() + defer cleanpMockSrc() + + require.NoError(t, rc.CalculateRackInformation()) + + res := rc.CheckRackCreation() + assert.False(res.Completed(), "CheckRackCreation did not complete as expected") + + // Update the generation manually and now verify we won't do updates to StatefulSets if the generation hasn't changed + rc.Datacenter.Status.ObservedGeneration = rc.Datacenter.Generation + rc.Datacenter.Spec.ServerVersion = "6.8.44" + + res = rc.CheckRackPodTemplate() + assert.Equal(result.Continue(), res) + cond, found := rc.Datacenter.GetCondition(api.DatacenterRequiresUpdate) + assert.True(found) + assert.Equal(corev1.ConditionTrue, cond.Status) + + // Add annotation + metav1.SetMetaDataAnnotation(&rc.Datacenter.ObjectMeta, api.UpdateAllowedAnnotation, string(api.AllowUpdateAlways)) + rc.Datacenter.Spec.ServerVersion = "6.8.44" // This needs to be reapplied, since we call Patch in the CheckRackPodTemplate() + + res = rc.CheckRackPodTemplate() + assert.True(res.Completed()) +} + func TestReconcilePods(t *testing.T) { t.Skip() rc, _, cleanupMockScr := setupTest() diff --git a/pkg/reconciliation/testing.go b/pkg/reconciliation/testing.go index b7f447ef..35e35215 100644 --- a/pkg/reconciliation/testing.go +++ b/pkg/reconciliation/testing.go @@ -77,8 +77,9 @@ func CreateMockReconciliationContext( // Instance a cassandraDatacenter cassandraDatacenter := &api.CassandraDatacenter{ ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, + Name: name, + Namespace: namespace, + Generation: 1, }, Spec: api.CassandraDatacenterSpec{ Size: size, diff --git a/tests/testdata/cluster-with-config-secret.yaml b/tests/testdata/cluster-with-config-secret.yaml index f1a04816..bfa432ca 100644 --- a/tests/testdata/cluster-with-config-secret.yaml +++ b/tests/testdata/cluster-with-config-secret.yaml @@ -2,6 +2,8 @@ apiVersion: cassandra.datastax.com/v1beta1 kind: CassandraDatacenter metadata: name: dc1 + annotations: + cassandra.datastax.com/allow-upgrade: always spec: clusterName: cluster1 serverType: cassandra diff --git a/tests/testdata/default-three-rack-three-node-dc-4x.yaml b/tests/testdata/default-three-rack-three-node-dc-4x.yaml index 1eb88920..68e0ec79 100644 --- a/tests/testdata/default-three-rack-three-node-dc-4x.yaml +++ b/tests/testdata/default-three-rack-three-node-dc-4x.yaml @@ -5,7 +5,7 @@ metadata: spec: clusterName: cluster1 serverType: cassandra - serverVersion: 4.0.3 + serverVersion: 4.1.4 managementApiAuth: insecure: {} size: 3 diff --git a/tests/testdata/operator-1.7.1-oss-dc.yaml b/tests/testdata/operator-1.7.1-oss-dc.yaml index 24c0a98d..9811dc1f 100644 --- a/tests/testdata/operator-1.7.1-oss-dc.yaml +++ b/tests/testdata/operator-1.7.1-oss-dc.yaml @@ -5,7 +5,7 @@ metadata: spec: clusterName: cluster1 serverType: cassandra - serverVersion: "3.11.7" + serverVersion: "3.11.15" managementApiAuth: insecure: {} size: 3 diff --git a/tests/upgrade_operator/kustomization.yaml b/tests/upgrade_operator/kustomization.yaml index f487080d..bb670495 100644 --- a/tests/upgrade_operator/kustomization.yaml +++ b/tests/upgrade_operator/kustomization.yaml @@ -4,4 +4,4 @@ namespace: test-upgrade-operator apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.12.0 +- github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.19.1 diff --git a/tests/upgrade_operator/upgrade_operator_suite_test.go b/tests/upgrade_operator/upgrade_operator_suite_test.go index 3e2e837e..6a8eafd4 100644 --- a/tests/upgrade_operator/upgrade_operator_suite_test.go +++ b/tests/upgrade_operator/upgrade_operator_suite_test.go @@ -20,7 +20,7 @@ var ( testName = "Upgrade Operator" namespace = "test-upgrade-operator" dcName = "dc1" - dcYaml = "../testdata/operator-1.7.1-oss-dc.yaml" + dcYaml = "../testdata/default-three-rack-three-node-dc-4x.yaml" dcResource = fmt.Sprintf("CassandraDatacenter/%s", dcName) dcLabel = fmt.Sprintf("cassandra.datastax.com/datacenter=%s", dcName) ns = ginkgo_util.NewWrapper(testName, namespace) @@ -48,7 +48,7 @@ func TestLifecycle(t *testing.T) { // InstallOldOperator installs the oldest supported upgrade path (for Kubernetes 1.25) func InstallOldOperator() { - step := "install cass-operator 1.12.0" + step := "install cass-operator 1.19.1" By(step) err := kustomize.DeployDir(namespace, "upgrade_operator") @@ -82,55 +82,78 @@ var _ = Describe(testName, func() { ns.WaitForDatacenterReady(dcName) // Get UID of the cluster pod - // step = "get Cassandra pods UID" - // k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.metadata.uid}") - // createdPodUID := ns.OutputAndLog(step, k) + step = "get Cassandra pods UID" + k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.metadata.uid}") + createdPodUID := ns.OutputAndLog(step, k) - step = "get name of 1.12.0 operator pod" + step = "get name of 1.19.1 operator pod" json := "jsonpath={.items[].metadata.name}" k = kubectl.Get("pods").WithFlag("selector", "name=cass-operator").FormatOutput(json) oldOperatorName := ns.OutputAndLog(step, k) UpgradeOperator() - step = "wait for 1.12.0 operator pod to be removed" + step = "wait for 1.19.1 operator pod to be removed" k = kubectl.Get("pods").WithFlag("field-selector", fmt.Sprintf("metadata.name=%s", oldOperatorName)) ns.WaitForOutputAndLog(step, k, "", 60) ns.WaitForOperatorReady() - // give the operator a minute to reconcile and update the datacenter - time.Sleep(1 * time.Minute) - + // Let the new operator get the lock and start reconciling if it's going to + time.Sleep(60 * time.Second) ns.WaitForDatacenterReady(dcName) - ns.ExpectDoneReconciling(dcName) // Verify Pod hasn't restarted - // step = "get Cassandra pods UID" - // k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.metadata.uid}") - // postUpgradeCassPodUID := ns.OutputAndLog(step, k) + step = "get Cassandra pods UID" + k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.metadata.uid}") + postUpgradeCassPodUID := ns.OutputAndLog(step, k) - // Expect(createdPodUID).To(Equal(postUpgradeCassPodUID)) + Expect(createdPodUID).To(Equal(postUpgradeCassPodUID)) // Verify PodDisruptionBudget is available (1.11 updates from v1beta1 -> v1) - json = "jsonpath={.items[].metadata.name}" - k = kubectl.Get("poddisruptionbudgets").WithLabel("cassandra.datastax.com/datacenter").FormatOutput(json) - err = ns.WaitForOutputContains(k, "dc1-pdb", 20) - Expect(err).ToNot(HaveOccurred()) - - // Update Cassandra version to ensure we can still do changes - step = "perform cassandra upgrade" - json = "{\"spec\": {\"serverVersion\": \"3.11.14\"}}" + // json = "jsonpath={.items[].metadata.name}" + // k = kubectl.Get("poddisruptionbudgets").WithLabel("cassandra.datastax.com/datacenter").FormatOutput(json) + // err = ns.WaitForOutputContains(k, "dc1-pdb", 20) + // Expect(err).ToNot(HaveOccurred()) + + // Get current system-logger image + // Verify the Pod now has updated system-logger container image + step = "get Cassandra pod system-logger" + k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.spec.containers[?(@.name == 'server-system-logger')].image}") + loggerImage := ns.OutputAndLog(step, k) + Expect(loggerImage).To(Equal("cr.k8ssandra.io/k8ssandra/system-logger:v1.19.1")) + + // Add annotation to allow upgrade to update the StatefulSets + step = "add annotation to allow upgrade" + json = "{\"metadata\": {\"annotations\": {\"cassandra.datastax.com/autoupdate-spec\": \"once\"}}}" k = kubectl.PatchMerge(dcResource, json) ns.ExecAndLog(step, k) + // Wait for the operator to reconcile the datacenter ns.WaitForDatacenterOperatorProgress(dcName, "Updating", 60) ns.WaitForDatacenterReady(dcName) - ns.WaitForDatacenterReadyPodCount(dcName, 3) - ns.ExpectDoneReconciling(dcName) + // Verify pod has been restarted + step = "get Cassandra pods UID" + k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.metadata.uid}") + postAllowUpgradeUID := ns.OutputAndLog(step, k) + + Expect(postUpgradeCassPodUID).ToNot(Equal(postAllowUpgradeUID)) + + // Verify the Pod now has updated system-logger container image + step = "get Cassandra pod system-logger" + k = kubectl.Get("pod/cluster1-dc1-r1-sts-0").FormatOutput("jsonpath={.spec.containers[?(@.name == 'server-system-logger')].image}") + loggerImageNew := ns.OutputAndLog(step, k) + Expect(loggerImage).To(Not(Equal(loggerImageNew))) + + // Verify the allow-upgrade=once annotation was removed from CassandraDatacenter + step = "get CassandraDatacenter allow-upgrade annotation" + k = kubectl.Get("CassandraDatacenter", dcName).FormatOutput("jsonpath={.metadata.annotations}") + annotations := ns.OutputAndLog(step, k) + Expect(annotations).To(Not(ContainSubstring("cassandra.datastax.com/autoupdate-spec"))) + // Verify delete still works correctly and that we won't leave any resources behind step = "deleting the dc" k = kubectl.DeleteFromFiles(dcYaml)