Skip to content

Commit

Permalink
pkg/cvo/status: Filter out immediate UpdateEffectNone errors nested i…
Browse files Browse the repository at this point in the history
…n MultipleErrors in Failing condition

Various errors get propagated to users, such as the summarized task
graph error. For example, in the form of the message in the Failing
condition. However, update errors set with the update effect of
UpdateEffectNone can confuse users, as these primarily informing
messages get displayed together with valid update errors that heavily
impact the update. This can result in a message such as:

{
  "lastTransitionTime": "2023-06-20T13:40:12Z",
  "message": "Multiple errors are preventing progress:\n* Cluster
  operator authentication is updating versions\n* Could not update
  customresourcedefinition \"alertingrules.monitoring.openshift.io\"
  (512 of 993): the object is invalid, possibly due to local cluster
  configuration",
  "reason": "MultipleErrors",
  "status": "True",
  "type": "Failing"
}

The Failing condition is not true because of the UpdateEffectNone
error ("Cluster operator authentication is updating versions"), but
its message still gets displayed.

This commit makes sure that update errors that do not heavily affect
the update will be removed from the MultipleErrors error in the Failing
condition message to an extent.

The filtered out errors from the message will still be displayed in the
logs and in other places, such as the ReconciliationIssues condition.

The original code handles correctly situations where the status failure
is an UpdateEffectNone error. The new changes leave such errors be. In
case the MultipleErrors error contains only UpdateEffectNone errors, the
error is unchanged to keep the original logic unchanged and keep the
commit simple. The goal of this commit is to remove unimportant messages
from MultipleErrors errors that contain valid messages in the Failing
condition.

The current code contains an override to set the Failing condition
when history is empty or the CVO is reconciling. This commit will keep
this logic functional. This means the filtering is only applied
when history is not empty and the CVO is not reconciling the payload.
  • Loading branch information
DavidHurta authored and openshift-cherrypick-robot committed Sep 4, 2024
1 parent 6d25104 commit 875c0d8
Show file tree
Hide file tree
Showing 2 changed files with 267 additions and 17 deletions.
92 changes: 75 additions & 17 deletions pkg/cvo/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,37 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
})
}

progressReason, progressMessage, skipFailure := convertErrorToProgressing(cvStatus.History, now.Time, status)
failure := status.Failure
failingReason, failingMessage := getReasonMessageFromError(failure)
var skipFailure bool
var progressReason, progressMessage string
if !status.Reconciling && len(cvStatus.History) != 0 {
progressReason, progressMessage, skipFailure = convertErrorToProgressing(now.Time, failure)
failure = filterErrorForFailingCondition(failure, payload.UpdateEffectNone)
filteredFailingReason, filteredFailingMessage := getReasonMessageFromError(failure)
if failingReason != filteredFailingReason {
klog.Infof("Filtered failure reason changed from '%s' to '%s'", failingReason, filteredFailingReason)
}
if failingMessage != filteredFailingMessage {
klog.Infof("Filtered failure message changed from '%s' to '%s'", failingMessage, filteredFailingMessage)
}
failingReason, failingMessage = filteredFailingReason, filteredFailingMessage
}

// set the failing condition
failingCondition := configv1.ClusterOperatorStatusCondition{
Type: ClusterStatusFailing,
Status: configv1.ConditionFalse,
LastTransitionTime: now,
}
if failure != nil && !skipFailure {
failingCondition.Status = configv1.ConditionTrue
failingCondition.Reason = failingReason
failingCondition.Message = failingMessage
}
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, failingCondition)

// update progressing
if err := status.Failure; err != nil && !skipFailure {
var reason string
msg := progressMessage
Expand All @@ -307,16 +336,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
msg = "an error occurred"
}

// set the failing condition
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{
Type: ClusterStatusFailing,
Status: configv1.ConditionTrue,
Reason: reason,
Message: err.Error(),
LastTransitionTime: now,
})

// update progressing
if status.Reconciling {
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorProgressing,
Expand All @@ -336,9 +355,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
}

} else {
// clear the failure condition
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{Type: ClusterStatusFailing, Status: configv1.ConditionFalse, LastTransitionTime: now})

// update progressing
if status.Reconciling {
message := fmt.Sprintf("Cluster version is %s", version)
Expand Down Expand Up @@ -413,6 +429,48 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
}
}

// getReasonMessageFromError returns the reason and the message from an error.
// If the reason or message is not available, an empty string is returned.
func getReasonMessageFromError(err error) (reason, message string) {
if uErr, ok := err.(*payload.UpdateError); ok {
reason = uErr.Reason
}
if err != nil {
message = err.Error()
}
return reason, message
}

// filterErrorForFailingCondition filters out update errors based on the given
// updateEffect from a MultipleError error. If the err has the reason
// MultipleErrors, its immediate nested errors are filtered out and the error
// is recreated. If all nested errors are filtered out, nil is returned.
func filterErrorForFailingCondition(err error, updateEffect payload.UpdateEffectType) error {
if err == nil {
return nil
}
if uErr, ok := err.(*payload.UpdateError); ok && uErr.Reason == "MultipleErrors" {
if nested, ok := uErr.Nested.(interface{ Errors() []error }); ok {
filtered := nested.Errors()
filtered = filterOutUpdateErrors(filtered, updateEffect)
return newMultipleError(filtered)
}
}
return err
}

// filterOutUpdateErrors filters out update errors of the given effect.
func filterOutUpdateErrors(errs []error, updateEffect payload.UpdateEffectType) []error {
filtered := make([]error, 0, len(errs))
for _, err := range errs {
if uErr, ok := err.(*payload.UpdateError); ok && uErr.UpdateEffect == updateEffect {
continue
}
filtered = append(filtered, err)
}
return filtered
}

func setImplicitlyEnabledCapabilitiesCondition(cvStatus *configv1.ClusterVersionStatus, implicitlyEnabled []configv1.ClusterVersionCapability,
now metav1.Time) {

Expand Down Expand Up @@ -479,11 +537,11 @@ func setDesiredReleaseAcceptedCondition(cvStatus *configv1.ClusterVersionStatus,
// how an update error is interpreted. An error may simply need to be reported but does not indicate the update is
// failing. An error may indicate the update is failing or that if the error continues for a defined interval the
// update is failing.
func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) {
if len(history) == 0 || status.Failure == nil || status.Reconciling {
func convertErrorToProgressing(now time.Time, statusFailure error) (reason string, message string, ok bool) {
if statusFailure == nil {
return "", "", false
}
uErr, ok := status.Failure.(*payload.UpdateError)
uErr, ok := statusFailure.(*payload.UpdateError)
if !ok {
return "", "", false
}
Expand Down
192 changes: 192 additions & 0 deletions pkg/cvo/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,13 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
Reason: "MultipleErrors",
Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions",
},
shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true,
expectedConditionModified: &configv1.ClusterOperatorStatusCondition{
Type: ClusterStatusFailing,
Status: configv1.ConditionTrue,
Reason: "ClusterOperatorNotAvailable",
Message: "Cluster operator A is not available",
},
},
{
name: "MultipleErrors of UpdateEffectNone and UpdateEffectNone",
Expand Down Expand Up @@ -527,6 +534,11 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
Reason: "MultipleErrors",
Message: "Multiple errors are preventing progress:\n* Cluster operator A is updating versions\n* Cluster operator B is getting conscious",
},
shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true,
expectedConditionModified: &configv1.ClusterOperatorStatusCondition{
Type: ClusterStatusFailing,
Status: configv1.ConditionFalse,
},
},
{
name: "MultipleErrors of UpdateEffectFail, UpdateEffectFailAfterInterval, and UpdateEffectNone",
Expand Down Expand Up @@ -562,6 +574,13 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
Reason: "MultipleErrors",
Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions\n* Cluster operator C is degraded",
},
shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true,
expectedConditionModified: &configv1.ClusterOperatorStatusCondition{
Type: ClusterStatusFailing,
Status: configv1.ConditionTrue,
Reason: "MultipleErrors",
Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator C is degraded",
},
},
}
for _, tc := range tests {
Expand Down Expand Up @@ -601,3 +620,176 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
})
}
}

func Test_filterOutUpdateErrors(t *testing.T) {
type args struct {
errs []error
updateEffectType payload.UpdateEffectType
}
tests := []struct {
name string
args args
want []error
}{
{
name: "empty errors",
args: args{
errs: []error{},
updateEffectType: payload.UpdateEffectNone,
},
want: []error{},
},
{
name: "single update error of the specified value",
args: args{
errs: []error{
&payload.UpdateError{
Name: "None",
UpdateEffect: payload.UpdateEffectNone,
},
},
updateEffectType: payload.UpdateEffectNone,
},
want: []error{},
},
{
name: "errors do not contain update errors of the specified value",
args: args{
errs: []error{
&payload.UpdateError{
Name: "Fail",
UpdateEffect: payload.UpdateEffectFail,
},
&payload.UpdateError{
Name: "Report",
UpdateEffect: payload.UpdateEffectReport,
},
&payload.UpdateError{
Name: "Fail After Interval",
UpdateEffect: payload.UpdateEffectFailAfterInterval,
},
},
updateEffectType: payload.UpdateEffectNone,
},
want: []error{
&payload.UpdateError{
Name: "Fail",
UpdateEffect: payload.UpdateEffectFail,
},
&payload.UpdateError{
Name: "Report",
UpdateEffect: payload.UpdateEffectReport,
},
&payload.UpdateError{
Name: "Fail After Interval",
UpdateEffect: payload.UpdateEffectFailAfterInterval,
},
},
},
{
name: "errors contain update errors of the specified value UpdateEffectNone",
args: args{
errs: []error{
&payload.UpdateError{
Name: "Fail After Interval",
UpdateEffect: payload.UpdateEffectFailAfterInterval,
},
&payload.UpdateError{
Name: "None #1",
UpdateEffect: payload.UpdateEffectNone,
},
&payload.UpdateError{
Name: "Report",
UpdateEffect: payload.UpdateEffectReport,
},
&payload.UpdateError{
Name: "None #2",
UpdateEffect: payload.UpdateEffectNone,
},
},
updateEffectType: payload.UpdateEffectNone,
},
want: []error{
&payload.UpdateError{
Name: "Fail After Interval",
UpdateEffect: payload.UpdateEffectFailAfterInterval,
},
&payload.UpdateError{
Name: "Report",
UpdateEffect: payload.UpdateEffectReport,
},
},
},
{
name: "errors contain update errors of the specified value UpdateEffectReport",
args: args{
errs: []error{
&payload.UpdateError{
Name: "Fail After Interval",
UpdateEffect: payload.UpdateEffectFailAfterInterval,
},
&payload.UpdateError{
Name: "None #1",
UpdateEffect: payload.UpdateEffectNone,
},
&payload.UpdateError{
Name: "Report",
UpdateEffect: payload.UpdateEffectReport,
},
&payload.UpdateError{
Name: "None #2",
UpdateEffect: payload.UpdateEffectNone,
},
},
updateEffectType: payload.UpdateEffectReport,
},
want: []error{
&payload.UpdateError{
Name: "Fail After Interval",
UpdateEffect: payload.UpdateEffectFailAfterInterval,
},
&payload.UpdateError{
Name: "None #1",
UpdateEffect: payload.UpdateEffectNone,
},
&payload.UpdateError{
Name: "None #2",
UpdateEffect: payload.UpdateEffectNone,
},
},
},
{
name: "errors contain only update errors of the specified value UpdateEffectNone",
args: args{
errs: []error{
&payload.UpdateError{
Name: "None #1",
UpdateEffect: payload.UpdateEffectNone,
},
&payload.UpdateError{
Name: "None #2",
UpdateEffect: payload.UpdateEffectNone,
},
&payload.UpdateError{
Name: "None #3",
UpdateEffect: payload.UpdateEffectNone,
},
&payload.UpdateError{
Name: "None #4",
UpdateEffect: payload.UpdateEffectNone,
},
},
updateEffectType: payload.UpdateEffectNone,
},
want: []error{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
filtered := filterOutUpdateErrors(tt.args.errs, tt.args.updateEffectType)
if difference := cmp.Diff(filtered, tt.want); difference != "" {
t.Errorf("got errors differ from expected:\n%s", difference)
}
})
}
}

0 comments on commit 875c0d8

Please sign in to comment.