diff --git a/pkg/controllers/updaterun/controller.go b/pkg/controllers/updaterun/controller.go index cc8da2847..e55467e30 100644 --- a/pkg/controllers/updaterun/controller.go +++ b/pkg/controllers/updaterun/controller.go @@ -45,10 +45,10 @@ type Reconciler struct { func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtime.Result, error) { startTime := time.Now() - klog.V(2).InfoS("ClusterResourceStagedUpdateRun reconciliation starts", "stagedUpdateRun", req.NamespacedName) + klog.V(2).InfoS("StagedUpdateRun reconciliation starts", "stagedUpdateRun", req.NamespacedName) defer func() { latency := time.Since(startTime).Milliseconds() - klog.V(2).InfoS("ClusterResourceStagedUpdateRun reconciliation ends", "stagedUpdateRun", req.NamespacedName, "latency", latency) + klog.V(2).InfoS("StagedUpdateRun reconciliation ends", "stagedUpdateRun", req.NamespacedName, "latency", latency) }() var updateRun placementv1alpha1.ClusterResourceStagedUpdateRun @@ -72,7 +72,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim var updatingStageIndex int var tobeUpdatedBinding, tobeDeletedBinding []*placementv1beta1.ClusterResourceBinding var err error - if !condition.IsConditionStatusTrue(meta.FindStatusCondition(updateRun.Status.Conditions, string(placementv1alpha1.StagedUpdateRunConditionInitialized)), updateRun.Generation) { + initCond := meta.FindStatusCondition(updateRun.Status.Conditions, string(placementv1alpha1.StagedUpdateRunConditionInitialized)) + if !condition.IsConditionStatusTrue(initCond, updateRun.Generation) { + if condition.IsConditionStatusFalse(initCond, updateRun.Generation) { + klog.InfoS("The stagedUpdateRun has failed to initialize", "errorMsg", initCond.Message, "stagedUpdateRun", runObjRef) + return runtime.Result{}, nil + } klog.V(2).InfoS("The stagedUpdateRun is not initialized", "stagedUpdateRun", runObjRef) if tobeUpdatedBinding, tobeDeletedBinding, err = r.initialize(ctx, &updateRun); err != nil { klog.ErrorS(err, "Failed to initialize the stagedUpdateRun", "stagedUpdateRun", runObjRef) @@ -83,7 +88,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim return runtime.Result{}, err } updatingStageIndex = 0 //start from the first stage - klog.V(2).InfoS("The stagedUpdateRun is initialized", "stagedUpdateRun", runObjRef) + klog.V(2).InfoS("Initialized the stagedUpdateRun", "stagedUpdateRun", runObjRef) } else { klog.V(2).InfoS("The stagedUpdateRun is initialized", "stagedUpdateRun", runObjRef) // Check if the stagedUpdateRun is finished @@ -100,9 +105,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim } return runtime.Result{}, err } - klog.V(2).InfoS("The stagedUpdateRun is validated", "stagedUpdateRun", runObjRef) + klog.V(2).InfoS("Validated the stagedUpdateRun", "stagedUpdateRun", runObjRef) } - // the previous run is completed but the update probably failed + // the previous run is completed but the update to the status failed if updatingStageIndex == -1 { klog.V(2).InfoS("the stagedUpdateRun is completed", "stagedUpdateRun", runObjRef) return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, &updateRun) @@ -110,12 +115,20 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim // execute the update run klog.V(2).InfoS("Continue to execute the stagedUpdateRun", "updatingStageIndex", updatingStageIndex, "stagedUpdateRun", runObjRef) finished, executeErr := r.executeUpdateRun(ctx, &updateRun, updatingStageIndex, tobeUpdatedBinding, tobeDeletedBinding) + if executeErr != nil { + // errStagedUpdatedAborted cannot be retried + if errors.Is(executeErr, errStagedUpdatedAborted) { + return runtime.Result{}, r.recordUpdateRunFailed(ctx, &updateRun, executeErr.Error()) + } + return runtime.Result{}, executeErr + } if finished { klog.V(2).InfoS("The stagedUpdateRun is finished", "stagedUpdateRun", runObjRef) return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, &updateRun) } - // requeue if the update run is not finished or there is an execute error - return runtime.Result{Requeue: true}, executeErr + // retry in 5 seconds if the update run is not finished + klog.V(2).InfoS("The stagedUpdateRun is not finished yet", "stagedUpdateRun", runObjRef) + return runtime.Result{RequeueAfter: time.Second * 5}, nil } // handleDelete handles the deletion of the stagedUpdateRun object @@ -123,7 +136,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim // We will delete all the dependent resources, such as approvalRequest objects, of the stagedUpdateRun object. func (r *Reconciler) handleDelete(ctx context.Context, updateRun *placementv1alpha1.ClusterResourceStagedUpdateRun) error { runObjRef := klog.KObj(updateRun) - // delete all the associated snapshots + // delete all the associated approvalRequests approvalRequest := &placementv1alpha1.ApprovalRequest{} if err := r.Client.DeleteAllOf(ctx, approvalRequest, client.InNamespace(updateRun.GetNamespace()), client.MatchingLabels{placementv1alpha1.TargetUpdateRunLabel: updateRun.GetName()}); err != nil { klog.ErrorS(err, "Failed to delete all associated approvalRequests", "stagedUpdateRun", runObjRef) @@ -150,9 +163,9 @@ func (r *Reconciler) ensureFinalizer(ctx context.Context, updateRun *placementv1 // SetupWithManager sets up the controller with the Manager. func (r *Reconciler) SetupWithManager(mgr runtime.Manager) error { - r.recorder = mgr.GetEventRecorderFor("stagedupdaterun-controller") + r.recorder = mgr.GetEventRecorderFor("clusterresource-stagedupdaterun-controller") return runtime.NewControllerManagedBy(mgr). - Named("stagedupdaterun-controller"). + Named("clusterresource-stagedupdaterun-controller"). For(&placementv1alpha1.ClusterResourceStagedUpdateRun{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). Watches(&placementv1alpha1.ApprovalRequest{}, &handler.Funcs{ // We only care about when an approval request is approved. diff --git a/pkg/controllers/updaterun/executing.go b/pkg/controllers/updaterun/executing.go index 4827be405..f8b079721 100644 --- a/pkg/controllers/updaterun/executing.go +++ b/pkg/controllers/updaterun/executing.go @@ -29,7 +29,7 @@ func (r *Reconciler) executeUpdateRun(ctx context.Context, updateRun *placementv executingErr := r.executeUpdatingStage(ctx, updateRun, updatingStageIndex, tobeUpdatedBinding) if errors.Is(executingErr, errStagedUpdatedAborted) { markStageUpdatingFailed(updatingStage, updateRun.Generation, executingErr.Error()) - return true, r.recordUpdateRunFailed(ctx, updateRun, executingErr.Error()) + return true, executingErr } if updatingErr := r.recordUpdateRunStatus(ctx, updateRun); updatingErr != nil { return false, updatingErr