Skip to content

Commit

Permalink
Add option to exclude alerts from the results
Browse files Browse the repository at this point in the history
The use case behind this: sometimes you want to define
a so called Watchdog or DeadMansSwitch alert that is always
firing, in order to monitoring that the alerting is working.

When such a Watchdog is defined the list of all alerts will
always be Critical. Thus we add a flag to exclude certain alerts.
  • Loading branch information
martialblog committed Dec 16, 2024
1 parent b37570f commit b5fab46
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 6 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,13 @@ Examples:
| total=2 firing=1 pending=0 inactive=1
Flags:
-h, --help help for alert
-n, --name strings The name of one or more specific alerts to check.
This parameter can be repeated e.G.: '--name alert1 --name alert2'
If no name is given, all alerts will be evaluated
-T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK")
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
--exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex.
-h, --help help for alert
-n, --name strings The name of one or more specific alerts to check.
This parameter can be repeated e.G.: '--name alert1 --name alert2'
If no name is given, all alerts will be evaluated
-T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK")
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
```
#### Checking all defined alerts
Expand Down
21 changes: 21 additions & 0 deletions cmd/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"errors"
"fmt"
"regexp"
"strings"

"github.com/NETWAYS/check_prometheus/internal/alert"
Expand All @@ -15,6 +16,7 @@ import (
type AlertConfig struct {
AlertName []string
Group []string
ExcludeAlerts []string
ProblemsOnly bool
NoAlertsState string
}
Expand Down Expand Up @@ -115,6 +117,11 @@ inactive = 0`,
continue
}

if matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts) {
// If the alert matches a regex from the list we can skip it.
continue
}

// Handle Inactive Alerts
if len(rl.AlertingRule.Alerts) == 0 {
// Counting states for perfdata
Expand Down Expand Up @@ -197,6 +204,8 @@ func init() {

fs.StringVarP(&cliAlertConfig.NoAlertsState, "no-alerts-state", "T", "OK", "State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK")

fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, "Alerts to ignore. Can be used multiple times and supports regex.")

fs.StringSliceVarP(&cliAlertConfig.AlertName, "name", "n", nil,
"The name of one or more specific alerts to check."+
"\nThis parameter can be repeated e.G.: '--name alert1 --name alert2'"+
Expand All @@ -222,3 +231,15 @@ func convertStateToInt(state string) (int, error) {
return check.Unknown, errors.New("invalid state")
}
}

// Matches a list of regular expressions against a string.
func matches(input string, regexToExclude []string) bool {
for _, regex := range regexToExclude {
re := regexp.MustCompile(regex)
if re.MatchString(input) {
return true
}
}

return false
}
13 changes: 13 additions & 0 deletions cmd/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@ exit status 2
\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00
|total=2 firing=1 pending=1 inactive=0
exit status 2
`,
},
{
name: "alert-problems-only-with-exlude",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"status":"success","data":{"groups":[{"name":"Foo","file":"alerts.yaml","rules":[{"state":"inactive","name":"HostOutOfMemory","query":"up","duration":120,"labels":{"severity":"critical"},"annotations":{"description":"Foo","summary":"Foo"},"alerts":[],"health":"ok","evaluationTime":0.000553928,"lastEvaluation":"2022-11-24T14:08:17.597083058Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000581212,"lastEvaluation":"2022-11-24T14:08:17.59706083Z"},{"name":"SQL","file":"alerts.yaml","rules":[{"state":"pending","name":"SqlAccessDeniedRate","query":"mysql","duration":17280000,"labels":{"severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"alerts":[{"labels":{"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"state":"pending","activeAt":"2022-11-21T10:38:35.373483748Z","value":"4.03448275862069e-01"}],"health":"ok","evaluationTime":0.002909617,"lastEvaluation":"2022-11-24T14:08:25.375220595Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.003046259,"lastEvaluation":"2022-11-24T14:08:25.375096825Z"},{"name":"TLS","file":"alerts.yaml","rules":[{"state":"firing","name":"BlackboxTLS","query":"SSL","duration":0,"labels":{"severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"alerts":[{"labels":{"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"state":"firing","activeAt":"2022-11-24T05:11:27.211699259Z","value":"-6.065338210999966e+06"}],"health":"ok","evaluationTime":0.000713955,"lastEvaluation":"2022-11-24T14:08:17.212720815Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000738927,"lastEvaluation":"2022-11-24T14:08:17.212700182Z"}]}}`))
})),
args: []string{"run", "../main.go", "alert", "--problems", "--exclude-alert", "Sql.*DeniedRate"},
expected: `[CRITICAL] - 1 Alerts: 1 Firing - 0 Pending - 0 Inactive
\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00
exit status 2
`,
},
Expand Down

0 comments on commit b5fab46

Please sign in to comment.