diff --git a/api/v1/backup_types.go b/api/v1/backup_types.go index 61b40a575..99425fd71 100644 --- a/api/v1/backup_types.go +++ b/api/v1/backup_types.go @@ -33,6 +33,10 @@ type BackupSpec struct { // +optional PromURL string `json:"promURL,omitempty"` + // ClusterName sets athe kubernetes cluster name to send to pushgateway for grouping metrics + // +optional + ClusterName string `json:"clusterName,omitempty"` + // StatsURL sets an arbitrary URL where the restic container posts metrics and // information about the snapshots to. This is in addition to the prometheus // pushgateway. diff --git a/api/v1/check_types.go b/api/v1/check_types.go index ffa3379a3..51c912a4d 100644 --- a/api/v1/check_types.go +++ b/api/v1/check_types.go @@ -18,6 +18,10 @@ type CheckSpec struct { // +optional PromURL string `json:"promURL,omitempty"` + // ClusterName sets athe kubernetes cluster name to send to pushgateway for grouping metrics + // +optional + ClusterName string `json:"clusterName,omitempty"` + // KeepJobs amount of jobs to keep for later analysis. // // Deprecated: Use FailedJobsHistoryLimit and SuccessfulJobsHistoryLimit respectively. diff --git a/cmd/operator/main.go b/cmd/operator/main.go index 399568f2b..d98df171c 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -73,7 +73,7 @@ var ( &cli.StringFlag{Destination: &cfg.Config.GlobalStatsURL, Name: "globalstatsurl", EnvVars: []string{"BACKUP_GLOBALSTATSURL"}, Usage: "set the URL to post metrics globally"}, &cli.StringFlag{Destination: &cfg.Config.MetricsBindAddress, Name: "metrics-bindaddress", EnvVars: []string{"BACKUP_METRICS_BINDADDRESS"}, Value: ":8080", Usage: "set the bind address for the prometheus endpoint"}, &cli.StringFlag{Destination: &cfg.Config.PromURL, Name: "promurl", EnvVars: []string{"BACKUP_PROMURL"}, Value: "http://127.0.0.1/", Usage: "set the operator wide default prometheus push gateway"}, - + &cli.StringFlag{Destination: &cfg.Config.ClusterName, Name: "clusterName", EnvVars: []string{"CLUSTER_NAME"}, Value: "default", Usage: "set the operator wide kubernetes cluster name to send to push gateway for grouping metrics"}, &cli.StringFlag{Destination: &cfg.Config.RestartPolicy, Name: "restartpolicy", EnvVars: []string{"BACKUP_RESTARTPOLICY"}, Value: "OnFailure", Usage: "set the RestartPolicy for the backup jobs. According to https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/, this should be 'OnFailure' for jobs that terminate"}, &cli.StringFlag{Destination: &cfg.Config.PodFilter, Name: "podfilter", EnvVars: []string{"BACKUP_PODFILTER"}, Value: "backupPod=true", Usage: "the filter used to find the backup pods"}, &cli.StringFlag{Destination: &cfg.Config.ServiceAccount, Name: "podexecaccountname", Aliases: []string{"serviceaccount"}, EnvVars: []string{"BACKUP_PODEXECACCOUNTNAME"}, Value: "pod-executor", Usage: "set the service account name that should be used for the pod command execution"}, diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 90d33dbd6..b051476c4 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -91,9 +91,10 @@ func initTest(t *testing.T) *testEnvironment { ctx := context.Background() cfg.Config = &cfg.Configuration{ - Hostname: os.Getenv("HOSTNAME"), - PromURL: os.Getenv("PROM_URL"), - WebhookURL: os.Getenv("STATS_URL"), + Hostname: os.Getenv("HOSTNAME"), + PromURL: os.Getenv("PROM_URL"), + ClusterName: os.Getenv("CLUSTER_NAME"), + WebhookURL: os.Getenv("STATS_URL"), RestoreS3Endpoint: os.Getenv("RESTORE_S3ENDPOINT"), RestoreS3AccessKey: os.Getenv("RESTORE_ACCESSKEYID"), @@ -105,7 +106,7 @@ func initTest(t *testing.T) *testEnvironment { } mainLogger := zapr.NewLogger(zaptest.NewLogger(t)) - statHandler := stats.NewHandler(cfg.Config.PromURL, cfg.Config.Hostname, cfg.Config.WebhookURL, mainLogger) + statHandler := stats.NewHandler(cfg.Config.PromURL, cfg.Config.ClusterName, cfg.Config.Hostname, cfg.Config.WebhookURL, mainLogger) resticCli := cli.New(ctx, mainLogger, statHandler) cleanupDirs(t) diff --git a/cmd/restic/main.go b/cmd/restic/main.go index fce1bd609..326c886ba 100644 --- a/cmd/restic/main.go +++ b/cmd/restic/main.go @@ -55,6 +55,7 @@ var ( &cli.BoolFlag{Destination: &cfg.Config.SkipPreBackup, Name: "skipPreBackup", EnvVars: []string{"SKIP_PREBACKUP"}, Usage: "If the job should skip the backup command and only backup volumes."}, &cli.StringFlag{Destination: &cfg.Config.PromURL, Name: "promURL", EnvVars: []string{"PROM_URL"}, Usage: "Sets the URL of a prometheus push gateway to report metrics."}, + &cli.StringFlag{Destination: &cfg.Config.ClusterName, Name: "clusterName", EnvVars: []string{"CLUSTER_NAME"}, Usage: "Sets the Kubernetes cluster name for grouping metrics in push gateway"}, &cli.StringFlag{Destination: &cfg.Config.WebhookURL, Name: "webhookURL", Aliases: []string{"statsURL"}, EnvVars: []string{"STATS_URL"}, Usage: "Sets the URL of a server which will retrieve a webhook after the action completes."}, &cli.StringFlag{Destination: &cfg.Config.Hostname, Name: "hostname", EnvVars: []string{"HOSTNAME"}, Usage: "Sets the hostname to use in reports.", Hidden: true, Required: true}, @@ -120,7 +121,7 @@ func resticMain(c *cli.Context) error { ctx, cancel := context.WithCancel(c.Context) cancelOnTermination(cancel, resticLog) - statHandler := stats.NewHandler(cfg.Config.PromURL, cfg.Config.Hostname, cfg.Config.WebhookURL, resticLog) + statHandler := stats.NewHandler(cfg.Config.PromURL, cfg.Config.ClusterName, cfg.Config.Hostname, cfg.Config.WebhookURL, resticLog) resticCLI := resticCli.New(ctx, resticLog.WithName("restic"), statHandler) diff --git a/config/crd/apiextensions.k8s.io/v1/k8up.io_backups.yaml b/config/crd/apiextensions.k8s.io/v1/k8up.io_backups.yaml index 8639918a3..6c035267b 100644 --- a/config/crd/apiextensions.k8s.io/v1/k8up.io_backups.yaml +++ b/config/crd/apiextensions.k8s.io/v1/k8up.io_backups.yaml @@ -436,6 +436,10 @@ spec: type: object type: array type: object + clusterName: + description: ClusterName sets athe kubernetes cluster name to send + to pushgateway for grouping metrics + type: string failedJobsHistoryLimit: description: |- FailedJobsHistoryLimit amount of failed jobs to keep for later analysis. diff --git a/config/crd/apiextensions.k8s.io/v1/k8up.io_checks.yaml b/config/crd/apiextensions.k8s.io/v1/k8up.io_checks.yaml index aadf5b7f9..0c84965ef 100644 --- a/config/crd/apiextensions.k8s.io/v1/k8up.io_checks.yaml +++ b/config/crd/apiextensions.k8s.io/v1/k8up.io_checks.yaml @@ -431,6 +431,10 @@ spec: type: object type: array type: object + clusterName: + description: ClusterName sets athe kubernetes cluster name to send + to pushgateway for grouping metrics + type: string failedJobsHistoryLimit: description: |- FailedJobsHistoryLimit amount of failed jobs to keep for later analysis. diff --git a/config/crd/apiextensions.k8s.io/v1/k8up.io_schedules.yaml b/config/crd/apiextensions.k8s.io/v1/k8up.io_schedules.yaml index 9d8375356..1991e5960 100644 --- a/config/crd/apiextensions.k8s.io/v1/k8up.io_schedules.yaml +++ b/config/crd/apiextensions.k8s.io/v1/k8up.io_schedules.yaml @@ -1742,6 +1742,10 @@ spec: type: object type: array type: object + clusterName: + description: ClusterName sets athe kubernetes cluster name to + send to pushgateway for grouping metrics + type: string concurrentRunsAllowed: type: boolean failedJobsHistoryLimit: @@ -2574,6 +2578,10 @@ spec: type: object type: array type: object + clusterName: + description: ClusterName sets athe kubernetes cluster name to + send to pushgateway for grouping metrics + type: string concurrentRunsAllowed: type: boolean failedJobsHistoryLimit: diff --git a/config/samples/k8up_v1_schedule.yaml b/config/samples/k8up_v1_schedule.yaml index f4cc489b6..f0b30058b 100644 --- a/config/samples/k8up_v1_schedule.yaml +++ b/config/samples/k8up_v1_schedule.yaml @@ -41,6 +41,7 @@ spec: failedJobsHistoryLimit: 2 successfulJobsHistoryLimit: 2 promURL: http://minio.minio:9000 + clusterName: default resources: requests: memory: "64Mi" @@ -55,6 +56,7 @@ spec: cpu: "250m" schedule: '@hourly-random' promURL: http://minio.minio:9000 + clusterName: default prune: schedule: '*/4 * * * *' retention: diff --git a/operator/cfg/config.go b/operator/cfg/config.go index bf76d5665..07a539c39 100644 --- a/operator/cfg/config.go +++ b/operator/cfg/config.go @@ -75,6 +75,7 @@ type Configuration struct { PodExecRoleName string PodFilter string PromURL string + ClusterName string RestartPolicy string SkipWithoutAnnotation bool diff --git a/restic/cfg/config.go b/restic/cfg/config.go index e2101c064..3fda2031b 100644 --- a/restic/cfg/config.go +++ b/restic/cfg/config.go @@ -35,8 +35,9 @@ type Configuration struct { SkipPreBackup bool - PromURL string - WebhookURL string + PromURL string + ClusterName string + WebhookURL string Hostname string KubeConfig string diff --git a/restic/stats/handler.go b/restic/stats/handler.go index 13767a777..12329b45a 100644 --- a/restic/stats/handler.go +++ b/restic/stats/handler.go @@ -23,14 +23,16 @@ type Handler struct { promURL string promHostname string webhookURL string + clusterName string log logr.Logger } -func NewHandler(promURL, promHostname, webhookURL string, log logr.Logger) *Handler { +func NewHandler(promURL, clusterName, promHostname, webhookURL string, log logr.Logger) *Handler { return &Handler{ promHostname: promHostname, promURL: promURL, webhookURL: webhookURL, + clusterName: clusterName, log: log.WithName("statsHandler"), } } @@ -55,7 +57,7 @@ func (h *Handler) SendPrometheus(promStats cli.PrometheusProvider) error { func (h *Handler) updatePrometheus(collector prometheus.Collector) error { return push.New(h.promURL, subsystem).Collector(collector). - Grouping("instance", h.promHostname). + Grouping("instance", h.promHostname).Grouping("cluster", h.clusterName). Add() }