Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additional monitoring rules to the PrometheusRule #791

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 73 additions & 9 deletions controllers/argocd_metrics_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,13 +582,6 @@ func newServiceMonitor(namespace, name, matchLabel string) *monitoringv1.Service
}

func newPrometheusRule(namespace string) *monitoringv1.PrometheusRule {
// The namespace used in the alert rule is not the namespace of the
// running application, it is the namespace that the corresponding
// ArgoCD application metadata was created in. This is needed to
// scope this alert rule to only fire for applications managed
// by the ArgoCD instance installed in this namespace.
expr := fmt.Sprintf("argocd_app_info{namespace=\"%s\",sync_status=\"OutOfSync\"} > 0", namespace)

objectMeta := metav1.ObjectMeta{
Name: alertRuleName,
Namespace: namespace,
Expand All @@ -602,17 +595,88 @@ func newPrometheusRule(namespace string) *monitoringv1.PrometheusRule {
Alert: "ArgoCDSyncAlert",
Annotations: map[string]string{
"summary": "Argo CD application is out of sync",
"description": "Argo CD application {{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.",
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.",
},
Expr: intstr.IntOrString{
Type: intstr.String,
// The namespace used in the alert rule is not the namespace of the
// running application, it is the namespace that the corresponding
// ArgoCD application metadata was created in. This is needed to
// scope this alert rule to only fire for applications managed
// by the ArgoCD instance installed in this namespace.
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\",sync_status=\"OutOfSync\"} > 0", namespace),
},
For: "5m",
Labels: map[string]string{
"severity": "warning",
},
},
{
Alert: "ArgoCDUnknownSyncAlert",
Annotations: map[string]string{
"summary": "Argo CD application sync state is unknown",
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is in an unknown sync state. Check ArgoCDUnknownSyncAlert status, this often occurs when the Application is misconfigured.",
},
Expr: intstr.IntOrString{
Type: intstr.String,
StrVal: expr,
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\",sync_status=\"Unknown\"} > 0", namespace),
},
For: "5m",
Labels: map[string]string{
"severity": "critical",
},
},
{
Alert: "ArgoCDHealthAlert",
Annotations: map[string]string{
"summary": "Argo CD application is not healthy",
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is not healthy. Check ArgoCDHealthAlert status, this alert is designed to notify that an application managed by Argo CD is not in a healthy, suspended, progressing or degraded state.",
},
Expr: intstr.IntOrString{
Type: intstr.String,
// General warning of not healthy, this ignores the status of Healthy and
// Suspended which are expected statuses. Degraded and Progressing are
// handled by other rules below
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\", health_status!~\"Healthy|Suspended|Progressing|Degraded\"} > 0", namespace),
},
For: "5m",
Labels: map[string]string{
"severity": "warning",
},
},
{
Alert: "ArgoCDDegradedAlert",
Annotations: map[string]string{
"summary": "Argo CD application is degraded",
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is degraded. Check ArgoCDDegradedAlert status, this alert is designed to notify that an application managed by Argo CD is degraded.",
},
Expr: intstr.IntOrString{
Type: intstr.String,
// Specific warning of degraded state
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\", health_status=\"Degraded\"} > 0", namespace),
},
For: "5m",
Labels: map[string]string{
"severity": "critical",
},
},
{
Alert: "ArgoCDProgressingAlert",
Annotations: map[string]string{
"summary": "Argo CD application has been progressing for more than 10 minutes",
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} has been progressing for more than 10 minutes. Check ArgoCDProgressingAlert status, this alert is designed to notify when an application is taking a long time to exit the Progressing state.",
},
Expr: intstr.IntOrString{
Type: intstr.String,
// This rule is used to notify when an application is stuck in the progressing
// state for more then 10m.
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\", health_status=\"Progressing\"} > 0", namespace),
},
For: "10m",
Labels: map[string]string{
"severity": "warning",
},
},
},
},
},
Expand Down
48 changes: 41 additions & 7 deletions controllers/argocd_metrics_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,38 @@ func TestReconciler_add_prometheus_rule(t *testing.T) {
namespace: "namespace-two",
},
}
testMonitoringRules := []struct {
name string
duration string
expr string
}{
{
name: "ArgoCDSyncAlert",
duration: "5m",
expr: "argocd_app_info{namespace=\"%s\",sync_status=\"OutOfSync\"} > 0",
},
{
name: "ArgoCDUnknownSyncAlert",
duration: "5m",
expr: "argocd_app_info{namespace=\"%s\",sync_status=\"Unknown\"} > 0",
},
{
name: "ArgoCDHealthAlert",
duration: "5m",
expr: "argocd_app_info{namespace=\"%s\", health_status!~\"Healthy|Suspended|Progressing|Degraded\"} > 0",
},
{
name: "ArgoCDDegradedAlert",
duration: "5m",
expr: "argocd_app_info{namespace=\"%s\", health_status=\"Degraded\"} > 0",
},
{
name: "ArgoCDProgressingAlert",
duration: "10m",
expr: "argocd_app_info{namespace=\"%s\", health_status=\"Progressing\"} > 0",
},
}

flagPtr := false
for _, tc := range testCases {
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, &flagPtr)
Expand All @@ -327,13 +359,15 @@ func TestReconciler_add_prometheus_rule(t *testing.T) {
assert.Equal(t, rule.OwnerReferences[0].Kind, argocdKind)
assert.Equal(t, rule.OwnerReferences[0].Name, tc.instanceName)

assert.Equal(t, rule.Spec.Groups[0].Rules[0].Alert, "ArgoCDSyncAlert")
assert.Assert(t, rule.Spec.Groups[0].Rules[0].Annotations["summary"] != "")
assert.Assert(t, rule.Spec.Groups[0].Rules[0].Annotations["description"] != "")
assert.Assert(t, rule.Spec.Groups[0].Rules[0].Labels["severity"] != "")
assert.Equal(t, rule.Spec.Groups[0].Rules[0].For, "5m")
expr := fmt.Sprintf("argocd_app_info{namespace=\"%s\",sync_status=\"OutOfSync\"} > 0", tc.namespace)
assert.Equal(t, rule.Spec.Groups[0].Rules[0].Expr.StrVal, expr)
for index, testMonitoringRule := range testMonitoringRules {
assert.Equal(t, rule.Spec.Groups[0].Rules[index].Alert, testMonitoringRule.name)
assert.Assert(t, rule.Spec.Groups[0].Rules[index].Annotations["summary"] != "")
assert.Assert(t, rule.Spec.Groups[0].Rules[index].Annotations["description"] != "")
assert.Assert(t, rule.Spec.Groups[0].Rules[index].Labels["severity"] != "")
assert.Equal(t, rule.Spec.Groups[0].Rules[index].For, testMonitoringRule.duration)
expr := fmt.Sprintf(testMonitoringRule.expr, tc.namespace)
assert.Equal(t, rule.Spec.Groups[0].Rules[index].Expr.StrVal, expr)
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,35 @@ spec:
- alert: ArgoCDSyncAlert
annotations:
summary: Argo CD application is out of sync
description: Argo CD application {{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.
expr: argocd_app_info{namespace="openshift-gitops",sync_status="OutOfSync"} > 0
labels:
severity: warning
- alert: ArgoCDUnknownSyncAlert
annotations:
summary: Argo CD application sync state is unknown
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is in an unknown sync state. Check ArgoCDUnknownSyncAlert status, this often occurs when the Application is misconfigured.
expr: argocd_app_info{namespace="openshift-gitops",sync_status="Unknown"} > 0
labels:
severity: critical
- alert: ArgoCDHealthAlert
annotations:
summary: Argo CD application is not healthy
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is not healthy. Check ArgoCDHealthAlert status, this alert is designed to notify that an application managed by Argo CD is not in a healthy, suspended, progressing or degraded state.
expr: argocd_app_info{namespace="openshift-gitops", health_status!~"Healthy|Suspended|Progressing|Degraded"} > 0
labels:
severity: warning
- alert: ArgoCDDegradedAlert
annotations:
summary: Argo CD application is degraded
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is degraded. Check ArgoCDDegradedAlert status, this alert is designed to notify that an application managed by Argo CD is degraded.
expr: argocd_app_info{namespace="openshift-gitops", health_status="Degraded"} > 0
labels:
severity: critical
- alert: ArgoCDProgressingAlert
annotations:
summary: Argo CD application has been progressing for more than 10 minutes
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} has been progressing for more than 10 minutes. Check ArgoCDProgressingAlert status, this alert is designed to notify when an application is taking a long time to exit the Progressing state.
expr: argocd_app_info{namespace="openshift-gitops", health_status="Progressing"} > 0
labels:
severity: warning
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,40 @@ spec:
- alert: ArgoCDSyncAlert
annotations:
summary: Argo CD application is out of sync
description: Argo CD application {{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.
expr: argocd_app_info{namespace="openshift-gitops",sync_status="OutOfSync"} > 0
for: 5m
labels:
severity: warning
- alert: ArgoCDUnknownSyncAlert
annotations:
summary: Argo CD application sync state is unknown
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is in an unknown sync state. Check ArgoCDUnknownSyncAlert status, this often occurs when the Application is misconfigured.
expr: argocd_app_info{namespace="openshift-gitops",sync_status="Unknown"} > 0
for: 5m
labels:
severity: critical
- alert: ArgoCDHealthAlert
annotations:
summary: Argo CD application is not healthy
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is not healthy. Check ArgoCDHealthAlert status, this alert is designed to notify that an application managed by Argo CD is not in a healthy, suspended, progressing or degraded state.
expr: argocd_app_info{namespace="openshift-gitops", health_status!~"Healthy|Suspended|Progressing|Degraded"} > 0
for: 5m
labels:
severity: warning
- alert: ArgoCDDegradedAlert
annotations:
summary: Argo CD application is degraded
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is degraded. Check ArgoCDDegradedAlert status, this alert is designed to notify that an application managed by Argo CD is degraded.
expr: argocd_app_info{namespace="openshift-gitops", health_status="Degraded"} > 0
for: 5m
labels:
severity: critical
- alert: ArgoCDProgressingAlert
annotations:
summary: Argo CD application has been progressing for more than 10 minutes
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} has been progressing for more than 10 minutes. Check ArgoCDProgressingAlert status, this alert is designed to notify when an application is taking a long time to exit the Progressing state.
expr: argocd_app_info{namespace="openshift-gitops", health_status="Progressing"} > 0
for: 10m
labels:
severity: warning
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,40 @@ spec:
- alert: ArgoCDSyncAlert
annotations:
summary: Argo CD application is out of sync
description: Argo CD application {{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.
expr: argocd_app_info{namespace="openshift-gitops",sync_status="OutOfSync"} > 0
for: 5m
labels:
severity: warning
severity: warning
- alert: ArgoCDUnknownSyncAlert
annotations:
summary: Argo CD application sync state is unknown
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is in an unknown sync state. Check ArgoCDUnknownSyncAlert status, this often occurs when the Application is misconfigured.
expr: argocd_app_info{namespace="openshift-gitops",sync_status="Unknown"} > 0
for: 5m
labels:
severity: critical
- alert: ArgoCDHealthAlert
annotations:
summary: Argo CD application is not healthy
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is not healthy. Check ArgoCDHealthAlert status, this alert is designed to notify that an application managed by Argo CD is not in a healthy, suspended, progressing or degraded state.
expr: argocd_app_info{namespace="openshift-gitops", health_status!~"Healthy|Suspended|Progressing|Degraded"} > 0
for: 5m
labels:
severity: warning
- alert: ArgoCDDegradedAlert
annotations:
summary: Argo CD application is degraded
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is degraded. Check ArgoCDDegradedAlert status, this alert is designed to notify that an application managed by Argo CD is degraded.
expr: argocd_app_info{namespace="openshift-gitops", health_status="Degraded"} > 0
for: 5m
labels:
severity: critical
- alert: ArgoCDProgressingAlert
annotations:
summary: Argo CD application has been progressing for more than 10 minutes
description: Argo CD application {{ $labels.namespace }}/{{ $labels.name }} has been progressing for more than 10 minutes. Check ArgoCDProgressingAlert status, this alert is designed to notify when an application is taking a long time to exit the Progressing state.
expr: argocd_app_info{namespace="openshift-gitops", health_status="Progressing"} > 0
for: 10m
labels:
severity: warning