diff --git a/api/v1alpha1/nodemaintenance_types.go b/api/v1alpha1/nodemaintenance_types.go index 43a37cb..e2ee2c5 100644 --- a/api/v1alpha1/nodemaintenance_types.go +++ b/api/v1alpha1/nodemaintenance_types.go @@ -49,6 +49,11 @@ const ( ConditionChangedEventType = "ConditionChanged" ) +const ( + // MaintenanceFinalizerName is the name of the finalizer being used by NodeMaintenance controllers + MaintenanceFinalizerName = "maintenance.finalizers.nvidia.com" +) + // NodeMaintenanceSpec defines the desired state of NodeMaintenance type NodeMaintenanceSpec struct { // RequestorID MUST follow domain name notation format (https://tools.ietf.org/html/rfc1035#section-2.3.1) @@ -144,6 +149,8 @@ type NodeMaintenanceStatus struct { // +listMapKey=type Conditions []metav1.Condition `json:"conditions,omitempty" patchMergeKey:"type" patchStrategy:"merge" protobuf:"bytes,1,rep,name=conditions"` + // WaitForCompletion is the list of namespaced named pods that we wait to complete + WaitForCompletion []string `json:"waitForCompletion,omitempty"` // Drain represents the drain status of the node Drain *DrainStatus `json:"drain,omitempty"` } diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 0b67074..e333c3a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -245,6 +245,11 @@ func (in *NodeMaintenanceStatus) DeepCopyInto(out *NodeMaintenanceStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.WaitForCompletion != nil { + in, out := &in.WaitForCompletion, &out.WaitForCompletion + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.Drain != nil { in, out := &in.Drain, &out.Drain *out = new(DrainStatus) diff --git a/cmd/maintenance-manager/main.go b/cmd/maintenance-manager/main.go index c8d19c0..4beb78c 100644 --- a/cmd/maintenance-manager/main.go +++ b/cmd/maintenance-manager/main.go @@ -24,19 +24,24 @@ import ( // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. + "k8s.io/client-go/kubernetes" _ "k8s.io/client-go/plugin/pkg/client/auth" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/healthz" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" "github.com/Mellanox/maintenance-operator/internal/controller" + "github.com/Mellanox/maintenance-operator/internal/cordon" operatorlog "github.com/Mellanox/maintenance-operator/internal/log" + "github.com/Mellanox/maintenance-operator/internal/podcompletion" "github.com/Mellanox/maintenance-operator/internal/scheduler" "github.com/Mellanox/maintenance-operator/internal/version" //+kubebuilder:scaffold:imports @@ -104,7 +109,8 @@ func main() { TLSOpts: tlsOpts, }) - mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + restConfig := ctrl.GetConfigOrDie() + mgr, err := ctrl.NewManager(restConfig, ctrl.Options{ Scheme: scheme, Metrics: metricsserver.Options{ BindAddress: metricsAddr, @@ -132,12 +138,22 @@ func main() { os.Exit(1) } + k8sInterface, err := kubernetes.NewForConfig(restConfig) + if err != nil { + setupLog.Error(err, "unable to create kubernetes interface") + os.Exit(1) + } + + mgrClient := mgr.GetClient() + nmrOptions := controller.NewNodeMaintenanceReconcilerOptions() if err = (&controller.NodeMaintenanceReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Options: nmrOptions, - }).SetupWithManager(mgr); err != nil { + Client: mgrClient, + Scheme: mgr.GetScheme(), + Options: nmrOptions, + CordonHandler: cordon.NewCordonHandler(mgrClient, k8sInterface), + WaitPodCompletionHandler: podcompletion.NewPodCompletionHandler(mgrClient), + }).SetupWithManager(mgr, ctrl.Log.WithName("NodeMaintenanceReconciler")); err != nil { setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenance") os.Exit(1) } @@ -145,7 +161,7 @@ func main() { nmsrOptions := controller.NewNodeMaintenanceSchedulerReconcilerOptions() nmsrLog := ctrl.Log.WithName("NodeMaintenanceScheduler") if err = (&controller.NodeMaintenanceSchedulerReconciler{ - Client: mgr.GetClient(), + Client: mgrClient, Scheme: mgr.GetScheme(), Options: nmsrOptions, Log: nmsrLog, @@ -156,7 +172,7 @@ func main() { } if err = (&controller.MaintenanceOperatorConfigReconciler{ - Client: mgr.GetClient(), + Client: mgrClient, Scheme: mgr.GetScheme(), NodeMaintenanceReconcierOptions: nmrOptions, SchedulerReconcierOptions: nmsrOptions, @@ -175,8 +191,20 @@ func main() { os.Exit(1) } + ctx := ctrl.SetupSignalHandler() + // index fields in mgr cache + + // pod spec.nodeName used in nodemaintenance controller. + err = mgr.GetCache().IndexField(ctx, &corev1.Pod{}, "spec.nodeName", func(o client.Object) []string { + return []string{o.(*corev1.Pod).Spec.NodeName} + }) + if err != nil { + setupLog.Error(err, "failed to index field for cache") + os.Exit(1) + } + setupLog.Info("starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err := mgr.Start(ctx); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) } diff --git a/config/crd/bases/maintenance.nvidia.com_nodemaintenances.yaml b/config/crd/bases/maintenance.nvidia.com_nodemaintenances.yaml index e93cf93..91a230a 100644 --- a/config/crd/bases/maintenance.nvidia.com_nodemaintenances.yaml +++ b/config/crd/bases/maintenance.nvidia.com_nodemaintenances.yaml @@ -245,6 +245,12 @@ spec: type: string type: array type: object + waitForCompletion: + description: WaitForCompletion is the list of namespaced named pods + that we wait to complete + items: + type: string + type: array type: object type: object served: true diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 3a21273..1bced71 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -17,6 +17,19 @@ rules: verbs: - get - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - delete + - get + - list + - patch + - update - watch - apiGroups: - maintenance.nvidia.com diff --git a/go.mod b/go.mod index fde8c74..876458b 100644 --- a/go.mod +++ b/go.mod @@ -10,19 +10,26 @@ require ( github.com/onsi/gomega v1.33.1 github.com/pkg/errors v0.9.1 go.uber.org/zap v1.26.0 - k8s.io/api v0.30.2 - k8s.io/apimachinery v0.30.2 - k8s.io/client-go v0.30.2 + k8s.io/api v0.30.3 + k8s.io/apimachinery v0.30.3 + k8s.io/client-go v0.30.3 + k8s.io/kubectl v0.30.3 sigs.k8s.io/controller-runtime v0.18.4 ) require ( + github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect + github.com/MakeNowJust/heredoc v1.0.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chai2010/gettext-go v1.0.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-errors/errors v1.4.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect @@ -31,28 +38,45 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.0.1 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.3.0 // indirect + github.com/gorilla/websocket v1.5.0 // indirect + github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect github.com/imdario/mergo v0.3.6 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/mitchellh/go-wordwrap v1.0.1 // indirect + github.com/moby/spdystream v0.2.0 // indirect + github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect + github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/prometheus/client_golang v1.16.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/spf13/cobra v1.7.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/xlab/treeprint v1.2.0 // indirect + go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect + golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect golang.org/x/term v0.20.0 // indirect golang.org/x/text v0.15.0 // indirect @@ -65,10 +89,14 @@ require ( gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.30.1 // indirect + k8s.io/cli-runtime v0.30.3 // indirect + k8s.io/component-base v0.30.3 // indirect k8s.io/klog/v2 v2.120.1 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect + sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect sigs.k8s.io/yaml v1.3.0 // indirect ) diff --git a/go.sum b/go.sum index b9544b9..a1d3011 100644 --- a/go.sum +++ b/go.sum @@ -1,19 +1,43 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= +github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= +github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk= +github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= +github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d h1:105gxyaGwCFad8crR9dcMQWvV9Hvulu6hwUh4tWPJnM= +github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= +github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -28,14 +52,31 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= +github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -44,10 +85,19 @@ github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -61,27 +111,42 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0= +github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= +github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= +github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= +github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= +github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= +github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= +github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= +github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= +github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= @@ -90,19 +155,31 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= +github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= +go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -112,10 +189,18 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -123,17 +208,27 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -144,6 +239,10 @@ golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= @@ -155,8 +254,24 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -170,24 +285,36 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI= -k8s.io/api v0.30.2/go.mod h1:ULg5g9JvOev2dG0u2hig4Z7tQ2hHIuS+m8MNZ+X6EmI= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +k8s.io/api v0.30.3 h1:ImHwK9DCsPA9uoU3rVh4QHAHHK5dTSv1nxJUapx8hoQ= +k8s.io/api v0.30.3/go.mod h1:GPc8jlzoe5JG3pb0KJCSLX5oAFIW3/qNJITlDj8BH04= k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws= k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4= -k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg= -k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= -k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50= -k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs= +k8s.io/apimachinery v0.30.3 h1:q1laaWCmrszyQuSQCfNB8cFgCuDAoPszKY4ucAjDwHc= +k8s.io/apimachinery v0.30.3/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/cli-runtime v0.30.3 h1:aG69oRzJuP2Q4o8dm+f5WJIX4ZBEwrvdID0+MXyUY6k= +k8s.io/cli-runtime v0.30.3/go.mod h1:hwrrRdd9P84CXSKzhHxrOivAR9BRnkMt0OeP5mj7X30= +k8s.io/client-go v0.30.3 h1:bHrJu3xQZNXIi8/MoxYtZBBWQQXwy16zqJwloXXfD3k= +k8s.io/client-go v0.30.3/go.mod h1:8d4pf8vYu665/kUbsxWAQ/JDBNWqfFeZnvFiVdmx89U= +k8s.io/component-base v0.30.3 h1:Ci0UqKWf4oiwy8hr1+E3dsnliKnkMLZMVbWzeorlk7s= +k8s.io/component-base v0.30.3/go.mod h1:C1SshT3rGPCuNtBs14RmVD2xW0EhRSeLvBh7AGk1quA= k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/kubectl v0.30.3 h1:YIBBvMdTW0xcDpmrOBzcpUVsn+zOgjMYIu7kAq+yqiI= +k8s.io/kubectl v0.30.3/go.mod h1:IcR0I9RN2+zzTRUa1BzZCm4oM0NLOawE6RzlDvd1Fpo= k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw= sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 h1:XX3Ajgzov2RKUdc5jW3t5jwY7Bo7dcRm+tFxT+NfgY0= +sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3/go.mod h1:9n16EZKMhXBNSiUC5kSdFQJkdH3zbxS/JoO619G1VAY= +sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 h1:W6cLQc5pnqM7vh3b7HvGNfXrJ/xL6BDMS0v1V/HHg5U= +sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3/go.mod h1:JWP1Fj0VWGHyw3YUPjXSQnRnrwezrZSrApfX5S0nIag= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= diff --git a/internal/controller/nodemaintenance_controller.go b/internal/controller/nodemaintenance_controller.go index a71bce8..44810c1 100644 --- a/internal/controller/nodemaintenance_controller.go +++ b/internal/controller/nodemaintenance_controller.go @@ -18,25 +18,41 @@ package controller import ( "context" + "errors" "sync" "time" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" + "github.com/Mellanox/maintenance-operator/internal/cordon" "github.com/Mellanox/maintenance-operator/internal/k8sutils" + operatorlog "github.com/Mellanox/maintenance-operator/internal/log" + "github.com/Mellanox/maintenance-operator/internal/podcompletion" ) -var defaultMaxNodeMaintenanceTime = 1600 * time.Second +var ( + defaultMaxNodeMaintenanceTime = 1600 * time.Second + waitPodCompletionRequeueTime = 10 * time.Second +) + +const ( + // ReadyTimeAnnotation is annotation that contains NodeMaintenance time in tranisitioned to ready state + ReadyTimeAnnotation = "maintenance.nvidia.com/ready-time" +) // NewNodeMaintenanceReconcilerOptions creates new *NodeMaintenanceReconcilerOptions func NewNodeMaintenanceReconcilerOptions() *NodeMaintenanceReconcilerOptions { @@ -82,13 +98,17 @@ type NodeMaintenanceReconciler struct { Scheme *runtime.Scheme EventRecorder record.EventRecorder - Options *NodeMaintenanceReconcilerOptions + Options *NodeMaintenanceReconcilerOptions + CordonHandler cordon.Handler + WaitPodCompletionHandler podcompletion.Handler } //+kubebuilder:rbac:groups=maintenance.nvidia.com,resources=nodemaintenances,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=maintenance.nvidia.com,resources=nodemaintenances/status,verbs=get;update;patch //+kubebuilder:rbac:groups=maintenance.nvidia.com,resources=nodemaintenances/finalizers,verbs=update //+kubebuilder:rbac:groups="",resources=events,verbs=create +//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;update;patch +//+kubebuilder:rbac:groups="",resources=pods,verbs=get;watch;list;update;patch;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -98,6 +118,7 @@ type NodeMaintenanceReconciler struct { func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { reqLog := log.FromContext(ctx) reqLog.Info("got request", "name", req.NamespacedName) + var err error // load any stored options r.Options.Load() @@ -105,7 +126,7 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ // get NodeMaintenance object nm := &maintenancev1.NodeMaintenance{} - if err := r.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: req.Name}, nm); err != nil { + if err = r.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: req.Name}, nm); err != nil { if k8serrors.IsNotFound(err) { reqLog.Info("NodeMaintenance object not found, nothing to do.") return reconcile.Result{}, nil @@ -113,49 +134,326 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ return reconcile.Result{}, err } + // get node object + node := &corev1.Node{} + err = r.Client.Get(ctx, types.NamespacedName{Name: nm.Spec.NodeName}, node) + if err != nil { + if k8serrors.IsNotFound(err) { + reqLog.Info("node not found", "name", nm.Spec.NodeName) + // node not found, remove finalizer from NodeMaintenance if exists + err = k8sutils.RemoveFinalizer(ctx, r.Client, nm, maintenancev1.MaintenanceFinalizerName) + if err != nil { + reqLog.Error(err, "failed to remove finalizer for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + } + return reconcile.Result{}, err + } + return reconcile.Result{}, err + } + + // set owner reference if not set + if !k8sutils.HasOwnerRef(node, nm) { + err = k8sutils.SetOwnerRef(ctx, r.Client, node, nm) + if err != nil { + reqLog.Error(err, "failed to set owner reference for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + } + } + // Handle its state according to Ready Condition state := k8sutils.GetReadyConditionReason(nm) + res := ctrl.Result{} - var err error - //nolint:gocritic switch state { case maintenancev1.ConditionReasonUninitialized: - err = r.handleUninitiaized(ctx, reqLog, nm) + err = r.handleUninitiaizedState(ctx, reqLog, nm) if err != nil { - reqLog.Error(err, "failed to handle uninitialized NodeMaintenance object") + reqLog.Error(err, "failed to handle uninitialized state for NodeMaintenance object") + } + case maintenancev1.ConditionReasonScheduled: + err = r.handleScheduledState(ctx, reqLog, nm) + if err != nil { + reqLog.Error(err, "failed to handle scheduled state for NodeMaintenance object") + } + case maintenancev1.ConditionReasonCordon: + err = r.handleCordonState(ctx, reqLog, nm, node) + if err != nil { + reqLog.Error(err, "failed to handle cordon state for NodeMaintenance object") + } + case maintenancev1.ConditionReasonWaitForPodCompletion: + res, err = r.handleWaitPodCompletionState(ctx, reqLog, nm, node) + if err != nil { + reqLog.Error(err, "failed to handle waitForPodCompletion state for NodeMaintenance object") + } + case maintenancev1.ConditionReasonDraining: + // TODO(adrianc): implement + case maintenancev1.ConditionReasonReady: + err = r.handleReadyState(ctx, reqLog, nm, node) + if err != nil { + reqLog.Error(err, "failed to handle Ready state for NodeMaintenance object") } } - return ctrl.Result{}, err + return res, err } -// handleUninitiaized handles NodeMaintenance in ConditionReasonUninitialized state +// handleUninitiaizedState handles NodeMaintenance in ConditionReasonUninitialized state // it eventually sets NodeMaintenance Ready condition Reason to ConditionReasonPending -func (r *NodeMaintenanceReconciler) handleUninitiaized(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance) error { +func (r *NodeMaintenanceReconciler) handleUninitiaizedState(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance) error { reqLog.Info("Handle Uninitialized NodeMaintenance") - // set Ready condition to ConditionReasonPending and update object - changed := k8sutils.SetReadyConditionReason(nm, maintenancev1.ConditionReasonPending) + // Set Ready condition to ConditionReasonPending and update object + err := k8sutils.SetReadyConditionReason(ctx, r.Client, nm, maintenancev1.ConditionReasonPending) + if err != nil { + reqLog.Error(err, "failed to update status for NodeMaintenance object") + return err + } + + // emit state change event + r.EventRecorder.Event( + nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonPending) + + return nil +} + +// handleScheduledState handles NodeMaintenance in ConditionReasonScheduled state +// it eventually sets NodeMaintenance Ready condition Reason to ConditionReasonWaitForPodCompletion +func (r *NodeMaintenanceReconciler) handleScheduledState(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance) error { + reqLog.Info("Handle Scheduled NodeMaintenance") + // handle finalizers var err error - if changed { - err = r.Status().Update(ctx, nm) + if nm.GetDeletionTimestamp().IsZero() { + // conditionally add finalizer + err = k8sutils.AddFinalizer(ctx, r.Client, nm, maintenancev1.MaintenanceFinalizerName) if err != nil { - reqLog.Error(err, "failed to update status for NodeMaintenance object") + reqLog.Error(err, "failed to set finalizer for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + return err } + } else { + // object is being deleted, remove finalizer if exists and return + reqLog.Info("NodeMaintenance object is deleting, removing maintenance finalizer", "namespace", nm.Namespace, "name", nm.Name) + err = k8sutils.RemoveFinalizer(ctx, r.Client, nm, maintenancev1.MaintenanceFinalizerName) + if err != nil { + reqLog.Error(err, "failed to remove finalizer for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + } + return err + } + + // TODO(adrianc): in openshift, we should pause MCP here + + // Set Ready condition to ConditionReasonCordon and update object + err = k8sutils.SetReadyConditionReason(ctx, r.Client, nm, maintenancev1.ConditionReasonCordon) + if err != nil { + reqLog.Error(err, "failed to update status for NodeMaintenance object") + return err } // emit state change event r.EventRecorder.Event( - nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonPending) + nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonCordon) - return err + return nil +} + +func (r *NodeMaintenanceReconciler) handleCordonState(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) error { + reqLog.Info("Handle Cordon NodeMaintenance") + var err error + + if !nm.GetDeletionTimestamp().IsZero() { + reqLog.Info("NodeMaintenance object is deleting") + if nm.Spec.Cordon { + reqLog.Info("handle uncordon of node, ", "node", node.Name) + err = r.CordonHandler.HandleUnCordon(ctx, reqLog, nm, node) + if err != nil { + return err + } + } + + // TODO(adrianc): unpause MCP in OCP when support is added. + + reqLog.Info("remove maintenance finalizer for node maintenance", "namespace", nm.Namespace, "name", nm.Name) + err = k8sutils.RemoveFinalizer(ctx, r.Client, nm, maintenancev1.MaintenanceFinalizerName) + if err != nil { + reqLog.Error(err, "failed to remove finalizer for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + } + return err + } + + if nm.Spec.Cordon { + err = r.CordonHandler.HandleCordon(ctx, reqLog, nm, node) + if err != nil { + return err + } + } + + err = k8sutils.SetReadyConditionReason(ctx, r.Client, nm, maintenancev1.ConditionReasonWaitForPodCompletion) + if err != nil { + reqLog.Error(err, "failed to update status for NodeMaintenance object") + return err + } + + r.EventRecorder.Event( + nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonWaitForPodCompletion) + + return nil +} + +func (r *NodeMaintenanceReconciler) handleWaitPodCompletionState(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) (ctrl.Result, error) { + reqLog.Info("Handle WaitPodCompletion NodeMaintenance") + // handle finalizers + var err error + var res ctrl.Result + + if !nm.GetDeletionTimestamp().IsZero() { + // object is being deleted, handle cleanup. + reqLog.Info("NodeMaintenance object is deleting") + if nm.Spec.Cordon { + reqLog.Info("handle uncordon of node, ", "node", node.Name) + err = r.CordonHandler.HandleUnCordon(ctx, reqLog, nm, node) + if err != nil { + return res, err + } + } + + // TODO(adrianc): unpause MCP in OCP when support is added. + + // remove finalizer if exists and return + reqLog.Info("NodeMaintenance object is deleting, removing maintenance finalizer", "namespace", nm.Namespace, "name", nm.Name) + err = k8sutils.RemoveFinalizer(ctx, r.Client, nm, maintenancev1.MaintenanceFinalizerName) + if err != nil { + reqLog.Error(err, "failed to remove finalizer for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + } + return res, err + } + + if nm.Spec.WaitForPodCompletion != nil { + waitingForPods, err := r.WaitPodCompletionHandler.HandlePodCompletion(ctx, reqLog, nm) + + if err == nil { + if len(waitingForPods) > 0 { + reqLog.Info("waiting for pods to finish", "pods", waitingForPods) + return ctrl.Result{Requeue: true, RequeueAfter: waitPodCompletionRequeueTime}, nil + } + } else if err != nil && !errors.Is(err, podcompletion.ErrPodCompletionTimeout) { + reqLog.Error(err, "failed to handle waitPodCompletion") + return res, err + } + // Note(adrianc): we get here if waitingForPods is zero length or timeout reached, in any case + // we can can progress to next step for this NodeMaintenance + } + + // update condition and send event + err = k8sutils.SetReadyConditionReason(ctx, r.Client, nm, maintenancev1.ConditionReasonReady) + if err != nil { + reqLog.Error(err, "failed to update status for NodeMaintenance object") + return res, err + } + + r.EventRecorder.Event( + nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonReady) + + return res, nil +} + +func (r *NodeMaintenanceReconciler) handleReadyState(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) error { + reqLog.Info("Handle Ready NodeMaintenance") + // handle finalizers + var err error + + if !nm.GetDeletionTimestamp().IsZero() { + // object is being deleted, handle cleanup. + reqLog.Info("NodeMaintenance object is deleting") + if nm.Spec.Cordon { + reqLog.Info("handle uncordon of node, ", "node", node.Name) + err = r.CordonHandler.HandleUnCordon(ctx, reqLog, nm, node) + if err != nil { + return err + } + } + + // TODO(adrianc): unpause MCP in OCP when support is added. + + // remove finalizer if exists and return + reqLog.Info("NodeMaintenance object is deleting, removing maintenance finalizer", "namespace", nm.Namespace, "name", nm.Name) + err = k8sutils.RemoveFinalizer(ctx, r.Client, nm, maintenancev1.MaintenanceFinalizerName) + if err != nil { + reqLog.Error(err, "failed to remove finalizer for NodeMaintenance", "namespace", nm.Namespace, "name", nm.Name) + } + return err + } + + // set ready-time annotation + if !metav1.HasAnnotation(nm.ObjectMeta, ReadyTimeAnnotation) || + nm.Annotations[ReadyTimeAnnotation] == "" { + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, ReadyTimeAnnotation, time.Now().UTC().Format(time.RFC3339)) + err := r.Update(ctx, nm) + if err != nil { + return err + } + } + + return nil } // SetupWithManager sets up the controller with the Manager. -func (r *NodeMaintenanceReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *NodeMaintenanceReconciler) SetupWithManager(mgr ctrl.Manager, log logr.Logger) error { r.EventRecorder = mgr.GetEventRecorderFor("nodemaintenancereconciler") return ctrl.NewControllerManagedBy(mgr). - For(&maintenancev1.NodeMaintenance{}). + For(&maintenancev1.NodeMaintenance{}, builder.WithPredicates(NewReadyConditionChangedPredicate(log))). Complete(r) } + +// NewReadyConditionChangedPredicate creates a new ReadyConditionChangedPredicate +func NewReadyConditionChangedPredicate(log logr.Logger) ReadyConditionChangedPredicate { + return ReadyConditionChangedPredicate{ + Funcs: predicate.Funcs{}, + log: log, + } +} + +// ReadyConditionChangedPredicate will trigger enqueue of Event for reconcile in the following cases: +// 1. A change in NodeMaintenance Ready Condition +// 2. Update to the object occurred and deletion timestamp is set +// 3. NodeMaintenance created +// 4. NodeMaintenance deleted +// 5. generic event received +type ReadyConditionChangedPredicate struct { + predicate.Funcs + + log logr.Logger +} + +// Update implements Predicate. +func (p ReadyConditionChangedPredicate) Update(e event.TypedUpdateEvent[client.Object]) bool { + if e.ObjectOld == nil { + p.log.Error(nil, "old object is nil in update event, ignoring event.") + return false + } + if e.ObjectNew == nil { + p.log.Error(nil, "new object is nil in update event, ignoring event.") + return false + } + + oldO, ok := e.ObjectOld.(*maintenancev1.NodeMaintenance) + if !ok { + p.log.Error(nil, "failed to cast old object to NodeMaintenance in update event, ignoring event.") + return false + } + + newO, ok := e.ObjectNew.(*maintenancev1.NodeMaintenance) + if !ok { + p.log.Error(nil, "failed to cast new object to NodeMaintenance in update event, ignoring event.") + return false + } + + oldRCR := k8sutils.GetReadyConditionReason(oldO) + newRCR := k8sutils.GetReadyConditionReason(newO) + + process := oldRCR != newRCR || !newO.GetDeletionTimestamp().IsZero() + + p.log.V(operatorlog.DebugLevel).Info("Update event for NodeMaintenance", + "name", newO.Name, "namespace", newO.Namespace, + "condition-changed", oldRCR != newRCR, "old", oldRCR, "new", newRCR, + "deleting", !newO.GetDeletionTimestamp().IsZero(), "process", process) + + return process +} diff --git a/internal/controller/nodemaintenance_controller_test.go b/internal/controller/nodemaintenance_controller_test.go index c2a4024..a70a678 100644 --- a/internal/controller/nodemaintenance_controller_test.go +++ b/internal/controller/nodemaintenance_controller_test.go @@ -24,13 +24,18 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" + "github.com/Mellanox/maintenance-operator/internal/cordon" "github.com/Mellanox/maintenance-operator/internal/k8sutils" + "github.com/Mellanox/maintenance-operator/internal/podcompletion" "github.com/Mellanox/maintenance-operator/internal/testutils" ) @@ -38,6 +43,7 @@ var _ = Describe("NodeMaintenance Controller", func() { Context("Envtests", func() { var nmObjectsToCleanup []*maintenancev1.NodeMaintenance var nodeObjectsToCleanup []*corev1.Node + var podObjectsToCleanup []*corev1.Pod var reconciler *NodeMaintenanceReconciler var options *NodeMaintenanceReconcilerOptions @@ -67,14 +73,22 @@ var _ = Describe("NodeMaintenance Controller", func() { By("create NodeMaintenanceReconciler") options = NewNodeMaintenanceReconcilerOptions() reconciler = &NodeMaintenanceReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - Options: options, + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Options: options, + CordonHandler: cordon.NewCordonHandler(k8sClient, k8sInterface), + WaitPodCompletionHandler: podcompletion.NewPodCompletionHandler(k8sClient), } // setup reconciler with manager By("setup NodeMaintenanceReconciler with controller manager") - Expect(reconciler.SetupWithManager(mgr)).ToNot(HaveOccurred()) + Expect(reconciler.SetupWithManager(mgr, ctrllog.Log.WithName("NodeMaintenanceReconciler"))). + ToNot(HaveOccurred()) + + // set up index fields + Expect(mgr.GetCache().IndexField(testCtx, &corev1.Pod{}, "spec.nodeName", func(o client.Object) []string { + return []string{o.(*corev1.Pod).Spec.NodeName} + })).ToNot(HaveOccurred()) // start manager testMgrCtx, cancel := context.WithCancel(testCtx) @@ -99,7 +113,22 @@ var _ = Describe("NodeMaintenance Controller", func() { AfterEach(func() { By("Cleanup NodeMaintenance resources") for _, nm := range nmObjectsToCleanup { - Expect(k8sClient.Delete(testCtx, nm)).To(Succeed()) + err := k8sClient.Delete(testCtx, nm) + if err != nil && k8serrors.IsNotFound(err) { + err = nil + } + Expect(err).ToNot(HaveOccurred()) + } + By("Wait for NodeMaintenance resources to be deleted") + for _, nm := range nmObjectsToCleanup { + Eventually(func() bool { + err := k8sClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm) + if err != nil && k8serrors.IsNotFound(err) { + return true + } + return false + + }).WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(BeTrue()) } nmObjectsToCleanup = make([]*maintenancev1.NodeMaintenance, 0) @@ -108,6 +137,17 @@ var _ = Describe("NodeMaintenance Controller", func() { Expect(k8sClient.Delete(testCtx, n)).To(Succeed()) } nodeObjectsToCleanup = make([]*corev1.Node, 0) + + By("Cleanup Pod resources") + for _, p := range podObjectsToCleanup { + var grace int64 + err := k8sClient.Delete(testCtx, p, &client.DeleteOptions{GracePeriodSeconds: &grace, Preconditions: &metav1.Preconditions{UID: &p.UID}}) + if err != nil && k8serrors.IsNotFound(err) { + err = nil + } + Expect(err).ToNot(HaveOccurred()) + } + podObjectsToCleanup = make([]*corev1.Pod, 0) }) It("Should transition new NodeMaintenance Resource to Pending", func() { @@ -116,24 +156,82 @@ var _ = Describe("NodeMaintenance Controller", func() { nmObjectsToCleanup = append(nmObjectsToCleanup, nm) By("Eventually NodeMaintenance condition is set to Pending") - Eventually(func() string { - nm := &maintenancev1.NodeMaintenance{} - err := k8sClient.Get(testCtx, types.NamespacedName{Namespace: "default", Name: "test-nm"}, nm) - if err == nil { - return k8sutils.GetReadyConditionReason(nm) - } - return "" - }).WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(Equal(maintenancev1.ConditionReasonPending)) + Eventually(testutils.GetReadyConditionReasonForFn(testCtx, k8sClient, client.ObjectKeyFromObject(nm))). + WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(Equal(maintenancev1.ConditionReasonPending)) By("ConditionChanged event with Pending msg is sent for NodeMaintenance") - Eventually(func() string { - el := &corev1.EventList{} - err := k8sClient.List(testCtx, el, client.MatchingFields{"involvedObject.uid": string(nm.UID)}) - if err == nil && len(el.Items) > 0 { - return el.Items[0].Message + Eventually(testutils.EventsForObjFn(testCtx, k8sClient, nm.UID)).WithTimeout(10 * time.Second). + WithPolling(1 * time.Second).Should(Equal([]string{maintenancev1.ConditionReasonPending})) + }) + + It("Full lifecycle of NodeMaintenance", func() { + By("Create NodeMaintenance") + nm := testutils.GetTestNodeMaintenance("test-nm", "test-node-0", "some-operator.nvidia.com", "") + nm.Spec.WaitForPodCompletion = &maintenancev1.WaitForPodCompletionSpec{} + Expect(k8sClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + nmObjectsToCleanup = append(nmObjectsToCleanup, nm) + + By("Create test pod") + pod := testutils.GetTestPod("test-pod", "test-node-0", nil) + Expect(k8sClient.Create(testCtx, pod)).ToNot(HaveOccurred()) + podObjectsToCleanup = append(podObjectsToCleanup, pod) + + By("Eventually NodeMaintenance condition is set to Pending") + Eventually(testutils.GetReadyConditionReasonForFn(testCtx, k8sClient, client.ObjectKeyFromObject(nm))). + WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(Equal(maintenancev1.ConditionReasonPending)) + + By("Set NodeMaintenance to Scheduled") + Expect(k8sClient.Get(testCtx, types.NamespacedName{Namespace: "default", Name: "test-nm"}, nm)).ToNot(HaveOccurred()) + Expect(k8sutils.SetReadyConditionReason(testCtx, k8sClient, nm, maintenancev1.ConditionReasonScheduled)).ToNot(HaveOccurred()) + + By("Eventually NodeMaintenance condition is set to WaitForPodCompletion") + Eventually(testutils.GetReadyConditionReasonForFn(testCtx, k8sClient, client.ObjectKeyFromObject(nm))). + WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(Equal(maintenancev1.ConditionReasonWaitForPodCompletion)) + + By("Consistently NodeMaintenance remains in WaitForPodComletion") + Consistently(testutils.GetReadyConditionReasonForFn(testCtx, k8sClient, client.ObjectKeyFromObject(nm))). + Within(time.Second).WithPolling(100 * time.Millisecond). + Should(Equal(maintenancev1.ConditionReasonWaitForPodCompletion)) + + By("After deleting pod, NodeMaintenance is eventually Ready") + // NOTE(adrianc) for pods we must provide DeleteOptions as below else apiserver will not delete pod object + var grace int64 + Expect(k8sClient.Delete(testCtx, pod, &client.DeleteOptions{GracePeriodSeconds: &grace, Preconditions: &metav1.Preconditions{UID: &pod.UID}})). + ToNot(HaveOccurred()) + Eventually(testutils.GetReadyConditionReasonForFn(testCtx, k8sClient, client.ObjectKeyFromObject(nm))). + WithTimeout(20 * time.Second).WithPolling(1 * time.Second).Should(Equal(maintenancev1.ConditionReasonReady)) + + By("Validating expected") + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + }, + } + Expect(k8sClient.Get(testCtx, client.ObjectKeyFromObject(node), node)).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeTrue()) + + Expect(k8sClient.Get(testCtx, client.ObjectKeyFromObject(nm), nm)).ToNot(HaveOccurred()) + Expect(nm.Annotations[cordon.NodeInitialStateUnschedulableAnnot]).To(Equal("false")) + Expect(nm.Annotations[podcompletion.WaitForPodCompletionStartAnnot]).ToNot(BeEmpty()) + Expect(nm.Annotations[ReadyTimeAnnotation]).ToNot(BeEmpty()) + + By("ConditionChanged events are sent for NodeMaintenance") + Eventually(testutils.EventsForObjFn(testCtx, k8sClient, nm.UID)).WithTimeout(10 * time.Second). + WithPolling(1 * time.Second).Should(ContainElements( + maintenancev1.ConditionReasonPending, maintenancev1.ConditionReasonCordon, + maintenancev1.ConditionReasonWaitForPodCompletion, maintenancev1.ConditionReasonReady)) + + By("Should Uncordon node after NodeMaintenance is deleted") + Expect(k8sClient.Delete(testCtx, nm)).ToNot(HaveOccurred()) + Eventually(func() bool { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node-0", + }, } - return "" - }).WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(Equal(maintenancev1.ConditionReasonPending)) + Expect(k8sClient.Get(testCtx, client.ObjectKeyFromObject(node), node)).ToNot(HaveOccurred()) + return node.Spec.Unschedulable + }).WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(BeFalse()) }) }) @@ -150,5 +248,4 @@ var _ = Describe("NodeMaintenance Controller", func() { }) }) }) - }) diff --git a/internal/controller/nodemaintenancescheduler_controller.go b/internal/controller/nodemaintenancescheduler_controller.go index 902a356..5be775a 100644 --- a/internal/controller/nodemaintenancescheduler_controller.go +++ b/internal/controller/nodemaintenancescheduler_controller.go @@ -183,45 +183,41 @@ func (r *NodeMaintenanceSchedulerReconciler) Reconcile(ctx context.Context, req wg := sync.WaitGroup{} for _, nm := range toSchedule { nm := nm - changed := k8sutils.SetReadyConditionReason(nm, maintenancev1.ConditionReasonScheduled) - if changed { - wg.Add(1) - go func() { - defer wg.Done() - // update status - // TODO(adrianc): use Patch? - err := r.Client.Status().Update(ctx, nm) - if err != nil { - r.Log.Error(err, "failed to update condition for NodeMaintenance", "name", nm.Name, "namespace", nm.Namespace) - - return - } + wg.Add(1) + go func() { + defer wg.Done() + // update status + // TODO(adrianc): use Patch? + err := k8sutils.SetReadyConditionReason(ctx, r.Client, nm, maintenancev1.ConditionReasonScheduled) + if err != nil { + r.Log.Error(err, "failed to update condition for NodeMaintenance", "name", nm.Name, "namespace", nm.Namespace) + return + } - // emit event - r.EventRecorder.Event(nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonScheduled) - - // wait for condition to be updated in cache - err = wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 10*time.Second, false, func(ctx context.Context) (done bool, err error) { - updatedNm := &maintenancev1.NodeMaintenance{} - innerErr := r.Client.Get(ctx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, updatedNm) - if innerErr != nil { - if k8serrors.IsNotFound(innerErr) { - return true, nil - } - r.Log.Error(innerErr, "failed to get NodeMaintenance object while waiting for condition update. retrying", "name", nm.Name, "namespace", nm.Namespace) - return false, nil - } - if k8sutils.GetReadyConditionReason(updatedNm) == maintenancev1.ConditionReasonScheduled { + // emit event + r.EventRecorder.Event(nm, corev1.EventTypeNormal, maintenancev1.ConditionChangedEventType, maintenancev1.ConditionReasonScheduled) + + // wait for condition to be updated in cache + err = wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 10*time.Second, false, func(ctx context.Context) (done bool, err error) { + updatedNm := &maintenancev1.NodeMaintenance{} + innerErr := r.Client.Get(ctx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, updatedNm) + if innerErr != nil { + if k8serrors.IsNotFound(innerErr) { return true, nil } + r.Log.Error(innerErr, "failed to get NodeMaintenance object while waiting for condition update. retrying", "name", nm.Name, "namespace", nm.Namespace) return false, nil - }) - if err != nil { - // Note(adrianc): if this happens we rely on the fact that caches are updated until next reconcile call - r.Log.Error(err, "failed while waiting for condition for NodeMaintenance", "name", nm.Name, "namespace", nm.Namespace) } - }() - } + if k8sutils.GetReadyConditionReason(updatedNm) == maintenancev1.ConditionReasonScheduled { + return true, nil + } + return false, nil + }) + if err != nil { + // Note(adrianc): if this happens we rely on the fact that caches are updated until next reconcile call + r.Log.Error(err, "failed while waiting for condition for NodeMaintenance", "name", nm.Name, "namespace", nm.Namespace) + } + }() } // wait for all updates to finish wg.Wait() diff --git a/internal/controller/nodemaintenancescheduler_controller_test.go b/internal/controller/nodemaintenancescheduler_controller_test.go index 1b64b5d..dc1d5ad 100644 --- a/internal/controller/nodemaintenancescheduler_controller_test.go +++ b/internal/controller/nodemaintenancescheduler_controller_test.go @@ -126,8 +126,8 @@ var _ = Describe("NodeMaintenanceScheduler Controller", func() { nmObjectsToCleanup = append(nmObjectsToCleanup, nodeMaintenanceResource) By("update Ready condition reason to Pending") - k8sutils.SetReadyConditionReason(nodeMaintenanceResource, maintenancev1.ConditionReasonPending) - Expect(k8sClient.Status().Update(testCtx, nodeMaintenanceResource)) + Expect(k8sutils.SetReadyConditionReason(testCtx, k8sClient, nodeMaintenanceResource, maintenancev1.ConditionReasonPending)). + ToNot(HaveOccurred()) By("Eventually NodeMaintenance condition is set to Scheduled") Eventually(func() string { diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 8b7c90c..c3ca775 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -25,6 +25,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" @@ -41,6 +42,7 @@ import ( var cfg *rest.Config var k8sClient client.Client +var k8sInterface kubernetes.Interface var testEnv *envtest.Environment func TestControllers(t *testing.T) { @@ -81,6 +83,10 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) Expect(k8sClient).NotTo(BeNil()) + k8sInterface, err = kubernetes.NewForConfig(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sInterface).NotTo(BeNil()) + // init operator vars vars.OperatorNamespace = "default" diff --git a/internal/cordon/cordon.go b/internal/cordon/cordon.go new file mode 100644 index 0000000..148f499 --- /dev/null +++ b/internal/cordon/cordon.go @@ -0,0 +1,111 @@ +/* +Copyright 2024, NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package cordon + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/kubectl/pkg/drain" + "sigs.k8s.io/controller-runtime/pkg/client" + + maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" +) + +const ( + // NodeInitialStateUnschedulableAnnot stores the "unschedulable" initial state of the node + NodeInitialStateUnschedulableAnnot string = "maintenance.nvidia.com/node-initial-state.unschedulable" + + // FalseString is a string representation of "false" boolean value. + FalseString string = "false" +) + +// Handler is an interface to handle cordon/uncordon of nodes +type Handler interface { + // HandleCordon handles cordon of nodes. in an idempotent manner. + HandleCordon(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) error + // HandleUnCordon handles uncordon for node. in an idempotent mannere. + HandleUnCordon(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) error +} + +// NewCordonHandler creates a new cordon Handler +func NewCordonHandler(c client.Client, k kubernetes.Interface) Handler { + return &cordonHandler{ + k8sclient: c, + k8sInterface: k, + } +} + +// cordonHandler implements Handler interface +type cordonHandler struct { + k8sclient client.Client + k8sInterface kubernetes.Interface +} + +// HandleCordon handles cordon of nodes +func (c *cordonHandler) HandleCordon(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) error { + // conditionally set node-initial-state annot + if !metav1.HasAnnotation(nm.ObjectMeta, NodeInitialStateUnschedulableAnnot) { + // set annotation + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, NodeInitialStateUnschedulableAnnot, fmt.Sprintf("%v", node.Spec.Unschedulable)) + err := c.k8sclient.Update(ctx, nm) + if err != nil { + return err + } + } + + // cordon node if its initial state was not unschedulable and the node is currently schedulable (i.e not cordoned) + if nm.Annotations[NodeInitialStateUnschedulableAnnot] == FalseString && + !node.Spec.Unschedulable { + helper := &drain.Helper{Ctx: ctx, Client: c.k8sInterface} + err := drain.RunCordonOrUncordon(helper, node, true) + if err != nil { + reqLog.Error(err, "failed to cordon node", "name", node.Name) + return err + } + } + + return nil +} + +// HandleUnCordon handles uncordon for node +func (c *cordonHandler) HandleUnCordon(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance, node *corev1.Node) error { + // uncordon node + if nm.Annotations[NodeInitialStateUnschedulableAnnot] == FalseString && + node.Spec.Unschedulable { + helper := &drain.Helper{Ctx: ctx, Client: c.k8sInterface} + err := drain.RunCordonOrUncordon(helper, node, false) + if err != nil { + reqLog.Error(err, "failed to uncordon node", "name", node.Name) + return err + } + } + + // remove nodeInitialStateUnschedulableAnnot annotation + if metav1.HasAnnotation(nm.ObjectMeta, NodeInitialStateUnschedulableAnnot) { + delete(nm.Annotations, NodeInitialStateUnschedulableAnnot) + if err := c.k8sclient.Update(ctx, nm); err != nil { + reqLog.Error(err, "failed to update NodeMaintenance annotations") + return err + } + } + + return nil +} diff --git a/internal/cordon/cordon_suite_test.go b/internal/cordon/cordon_suite_test.go new file mode 100644 index 0000000..a0a3fdd --- /dev/null +++ b/internal/cordon/cordon_suite_test.go @@ -0,0 +1,29 @@ +/* + Copyright 2024, NVIDIA CORPORATION & AFFILIATES + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cordon_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCordon(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "cordon test Suite") +} diff --git a/internal/cordon/cordon_test.go b/internal/cordon/cordon_test.go new file mode 100644 index 0000000..ae8c0b2 --- /dev/null +++ b/internal/cordon/cordon_test.go @@ -0,0 +1,153 @@ +/* + Copyright 2024, NVIDIA CORPORATION & AFFILIATES + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cordon_test + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + ks8fake "k8s.io/client-go/kubernetes/fake" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrfake "sigs.k8s.io/controller-runtime/pkg/client/fake" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + + maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" + "github.com/Mellanox/maintenance-operator/internal/cordon" + operatorlog "github.com/Mellanox/maintenance-operator/internal/log" + "github.com/Mellanox/maintenance-operator/internal/testutils" +) + +var _ = BeforeSuite(func() { + operatorlog.InitLog() +}) + +var _ = Describe("cordon tests", func() { + var fakeClient client.Client + var fakeInterface kubernetes.Interface + var testCtx context.Context + + var node *corev1.Node + var nm *maintenancev1.NodeMaintenance + var handler cordon.Handler + + BeforeEach(func() { + testCtx = context.Background() + node = testutils.GetTestNodes("node", 1, false)[0] + nm = testutils.GetTestNodeMaintenance("test-nm", node.Name, "test.nvidia.com", "") + + s := runtime.NewScheme() + maintenancev1.AddToScheme(s) + fakeClient = ctrfake.NewClientBuilder(). + WithScheme(s). + WithStatusSubresource(&maintenancev1.NodeMaintenance{}). + WithObjects(nm). + Build() + fakeInterface = ks8fake.NewSimpleClientset(node) + handler = cordon.NewCordonHandler(fakeClient, fakeInterface) + }) + + Context("Test HandleCordon", func() { + It("cordons node", func() { + Expect(handler.HandleCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)).ToNot(HaveOccurred()) + Expect(metav1.HasAnnotation(nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot)).To(BeTrue()) + Expect(nm.Annotations[cordon.NodeInitialStateUnschedulableAnnot]).To(Equal("false")) + node, err := fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeTrue()) + }) + + It("does not cordon node if initial state annotation is set to true", func() { + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot, "true") + Expect(handler.HandleCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + + node, err := fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeFalse()) + }) + + It("succeeds multiple calls to HandleCordon", func() { + Expect(handler.HandleCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + Expect(handler.HandleCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + + node, err := fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeTrue()) + }) + }) + + Context("Test HandleUnCordon", func() { + It("uncordons node and removes annotation", func() { + Expect(handler.HandleCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + Expect(handler.HandleUnCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + + node, err := fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeFalse()) + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)).ToNot(HaveOccurred()) + Expect(metav1.HasAnnotation(nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot)).To(BeFalse()) + + }) + + It("succeeds if node is not cordoned", func() { + Expect(handler.HandleUnCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + + node, err := fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeFalse()) + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)).ToNot(HaveOccurred()) + Expect(metav1.HasAnnotation(nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot)).To(BeFalse()) + }) + + It("uncordons multiple calls", func() { + Expect(handler.HandleCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + Expect(handler.HandleUnCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + Expect(handler.HandleUnCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + }) + + It("succeeds if node is not cordoned with initial state annotation", func() { + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot, "false") + Expect(fakeClient.Update(testCtx, nm)).ToNot(HaveOccurred()) + Expect(handler.HandleUnCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + + node, err := fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeFalse()) + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)).ToNot(HaveOccurred()) + Expect(metav1.HasAnnotation(nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot)).To(BeFalse()) + }) + + It("does not uncordon node if initial state annotation is true", func() { + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, cordon.NodeInitialStateUnschedulableAnnot, "true") + node.Spec.Unschedulable = true + _, err := fakeInterface.CoreV1().Nodes().Update(testCtx, node, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(handler.HandleUnCordon(testCtx, ctrllog.Log.WithName("cordonHandler"), nm, node)).ToNot(HaveOccurred()) + node, err = fakeInterface.CoreV1().Nodes().Get(testCtx, node.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(node.Spec.Unschedulable).To(BeTrue()) + }) + }) + +}) diff --git a/internal/k8sutils/k8sutils.go b/internal/k8sutils/k8sutils.go index 3976391..a65e777 100644 --- a/internal/k8sutils/k8sutils.go +++ b/internal/k8sutils/k8sutils.go @@ -17,9 +17,14 @@ package k8sutils import ( + "context" + "slices" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" ) @@ -34,26 +39,36 @@ func GetReadyConditionReason(nm *maintenancev1.NodeMaintenance) string { } // SetReadyConditionReason sets or updates Ready condition in nm.Status with reason. -// returns true if conditions were updated in nm object or false otherwise. -func SetReadyConditionReason(nm *maintenancev1.NodeMaintenance, reason string) (changed bool) { - return SetReadyConditionReasonMsg(nm, reason, "") +// in addition, it updates status of the object in k8s api if required. +// returns error if occurred. +func SetReadyConditionReason(ctx context.Context, client client.Client, nm *maintenancev1.NodeMaintenance, reason string) error { + return SetReadyConditionReasonMsg(ctx, client, nm, reason, "") } // SetReadyConditionReasonMsg sets or updates Ready condition in nm.Status with reason and msg. -// returns true if conditions were updated in nm object or false otherwise. -func SetReadyConditionReasonMsg(nm *maintenancev1.NodeMaintenance, reason string, msg string) (changed bool) { +// in addition, it updates status of the object in k8s api if required. +// returns error if occurred. +func SetReadyConditionReasonMsg(ctx context.Context, client client.Client, nm *maintenancev1.NodeMaintenance, reason string, msg string) error { status := metav1.ConditionFalse if reason == maintenancev1.ConditionReasonReady { status = metav1.ConditionTrue } cond := metav1.Condition{ - Type: maintenancev1.ConditionTypeReady, - Status: status, - Reason: reason, - Message: msg, + Type: maintenancev1.ConditionTypeReady, + Status: status, + ObservedGeneration: nm.Generation, + Reason: reason, + Message: msg, + } + + changed := meta.SetStatusCondition(&nm.Status.Conditions, cond) + var err error + if changed { + err = client.Status().Update(ctx, nm) } - return meta.SetStatusCondition(&nm.Status.Conditions, cond) + + return err } // IsUnderMaintenance returns true if NodeMaintenance is currently undergoing maintenance @@ -80,3 +95,48 @@ func IsNodeReady(n *corev1.Node) bool { func IsNodeUnschedulable(n *corev1.Node) bool { return n.Spec.Unschedulable } + +// AddFinalizer conditionally adds finalizer from NodeMaintenance +func AddFinalizer(ctx context.Context, k8sClient client.Client, nm *maintenancev1.NodeMaintenance, finalizer string) error { + instanceFinalizers := nm.GetFinalizers() + if !slices.Contains(instanceFinalizers, finalizer) { + nm.SetFinalizers(append(instanceFinalizers, finalizer)) + if err := k8sClient.Update(ctx, nm); err != nil { + return err + } + } + return nil +} + +// RemoveFinalizer conditionally removes finalizer from NodeMaintenance +func RemoveFinalizer(ctx context.Context, k8sClient client.Client, nm *maintenancev1.NodeMaintenance, finalizer string) error { + instanceFinalizers := nm.GetFinalizers() + i := slices.Index(instanceFinalizers, finalizer) + if i >= 0 { + newFinalizers := slices.Delete(instanceFinalizers, i, i+1) + nm.SetFinalizers(newFinalizers) + if err := k8sClient.Update(ctx, nm); err != nil { + return err + } + } + return nil +} + +// SetOwnerRef conditionally sets owner referece of object to owner +func SetOwnerRef(ctx context.Context, k8sClient client.Client, owner metav1.Object, object client.Object) error { + err := controllerutil.SetOwnerReference(owner, object, k8sClient.Scheme()) + if err != nil { + return err + } + return k8sClient.Update(ctx, object) +} + +// HasOwnerRef returns true if object owned by owner +func HasOwnerRef(owner metav1.Object, object metav1.Object) bool { + for _, o := range object.GetOwnerReferences() { + if o.UID == owner.GetUID() { + return true + } + } + return false +} diff --git a/internal/k8sutils/k8sutils_test.go b/internal/k8sutils/k8sutils_test.go index e019db5..4003381 100644 --- a/internal/k8sutils/k8sutils_test.go +++ b/internal/k8sutils/k8sutils_test.go @@ -17,41 +17,73 @@ package k8sutils_test import ( - maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" - "github.com/Mellanox/maintenance-operator/internal/k8sutils" + "context" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" + "github.com/Mellanox/maintenance-operator/internal/k8sutils" + "github.com/Mellanox/maintenance-operator/internal/testutils" ) var _ = Describe("k8sutils Tests", func() { - Context("GetReadyConditionReason", func() { - nm := &maintenancev1.NodeMaintenance{} - Expect(k8sutils.GetReadyConditionReason(nm)).To(Equal(maintenancev1.ConditionReasonUninitialized)) - _ = k8sutils.SetReadyConditionReason(nm, maintenancev1.ConditionReasonPending) - Expect(k8sutils.GetReadyConditionReason(nm)).To(Equal(maintenancev1.ConditionReasonPending)) + var fakeClient client.Client + var testCtx context.Context + + BeforeEach(func() { + testCtx = context.Background() + s := runtime.NewScheme() + maintenancev1.AddToScheme(s) + corev1.AddToScheme(s) + fakeClient = fake.NewClientBuilder().WithScheme(s).WithStatusSubresource(&maintenancev1.NodeMaintenance{}).Build() + }) + Context("GetReadyConditionReason", func() { + It("Gets condition as expected", func() { + nm := testutils.GetTestNodeMaintenance("test", "node-1", "test.nvidia.com", "") + Expect(k8sutils.GetReadyConditionReason(nm)).To(Equal(maintenancev1.ConditionReasonUninitialized)) + Expect(fakeClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + Expect(k8sutils.SetReadyConditionReason(testCtx, fakeClient, nm, maintenancev1.ConditionReasonPending)).ToNot(HaveOccurred()) + Expect(k8sutils.GetReadyConditionReason(nm)).To(Equal(maintenancev1.ConditionReasonPending)) + }) }) Context("SetReadyConditionReason", func() { - nm := &maintenancev1.NodeMaintenance{} - changed := k8sutils.SetReadyConditionReason(nm, maintenancev1.ConditionReasonCordon) - Expect(changed).To(BeTrue()) - Expect(nm.Status.Conditions[0].Type).To(Equal(maintenancev1.ConditionTypeReady)) - Expect(nm.Status.Conditions[0].Reason).To(Equal(maintenancev1.ConditionReasonCordon)) + It("Sets condition as expected", func() { + nm := testutils.GetTestNodeMaintenance("test", "node-1", "test.nvidia.com", "") + nm.Generation = 4 + Expect(fakeClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + Expect(k8sutils.SetReadyConditionReason(testCtx, fakeClient, nm, maintenancev1.ConditionReasonCordon)).ToNot(HaveOccurred()) + By("Object is updated in place") + Expect(nm.Status.Conditions[0].Type).To(Equal(maintenancev1.ConditionTypeReady)) + Expect(nm.Status.Conditions[0].Reason).To(Equal(maintenancev1.ConditionReasonCordon)) + Expect(nm.Status.Conditions[0].ObservedGeneration).To(Equal(nm.Generation)) + By("Object is updated in k8s") + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)).ToNot(HaveOccurred()) + Expect(nm.Status.Conditions[0].Type).To(Equal(maintenancev1.ConditionTypeReady)) + Expect(nm.Status.Conditions[0].Reason).To(Equal(maintenancev1.ConditionReasonCordon)) + Expect(nm.Status.Conditions[0].ObservedGeneration).To(Equal(nm.Generation)) + }) }) Context("SetReadyConditionReasonMsg", func() { - It("Sets condition as expected", func() { - nm := &maintenancev1.NodeMaintenance{} - changed := k8sutils.SetReadyConditionReasonMsg(nm, maintenancev1.ConditionReasonCordon, "foobar") - Expect(changed).To(BeTrue()) + It("Sets condition as expected with msg", func() { + nm := testutils.GetTestNodeMaintenance("test", "node-1", "test.nvidia.com", "") + Expect(fakeClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + By("updating status should succeed") + Expect(k8sutils.SetReadyConditionReasonMsg(testCtx, fakeClient, nm, maintenancev1.ConditionReasonCordon, "foobar")).ToNot(HaveOccurred()) Expect(nm.Status.Conditions[0].Type).To(Equal(maintenancev1.ConditionTypeReady)) Expect(nm.Status.Conditions[0].Reason).To(Equal(maintenancev1.ConditionReasonCordon)) Expect(nm.Status.Conditions[0].Message).To(Equal("foobar")) - changed = k8sutils.SetReadyConditionReasonMsg(nm, maintenancev1.ConditionReasonCordon, "foobar") - Expect(changed).To(BeFalse()) + By("updating again to same value should succeed without calling k8s client") + Expect(k8sutils.SetReadyConditionReasonMsg(testCtx, nil, nm, maintenancev1.ConditionReasonCordon, "foobar")).ToNot(HaveOccurred()) Expect(nm.Status.Conditions[0].Type).To(Equal(maintenancev1.ConditionTypeReady)) Expect(nm.Status.Conditions[0].Reason).To(Equal(maintenancev1.ConditionReasonCordon)) Expect(nm.Status.Conditions[0].Message).To(Equal("foobar")) @@ -59,7 +91,12 @@ var _ = Describe("k8sutils Tests", func() { }) Context("IsUnderMaintenance", func() { - nm := &maintenancev1.NodeMaintenance{} + var nm *maintenancev1.NodeMaintenance + + BeforeEach(func() { + nm = &maintenancev1.NodeMaintenance{} + }) + It("Returns false if not under maintenance", func() { By("no condition") nm.Status.Conditions = nil @@ -88,7 +125,11 @@ var _ = Describe("k8sutils Tests", func() { }) Context("IsNodeReady", func() { - node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}} + var node *corev1.Node + + BeforeEach(func() { + node = &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}} + }) It("Returns true when node is Ready", func() { node.Status.Conditions = []corev1.NodeCondition{{Type: corev1.NodeReady, Status: corev1.ConditionTrue}} @@ -102,7 +143,11 @@ var _ = Describe("k8sutils Tests", func() { }) Context("IsNodeUnschedulable", func() { - node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}} + var node *corev1.Node + + BeforeEach(func() { + node = &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}} + }) It("Returns true when node is unschedulable", func() { node.Spec.Unschedulable = true @@ -115,4 +160,89 @@ var _ = Describe("k8sutils Tests", func() { }) }) + Context("AddFinalizer", func() { + var nm *maintenancev1.NodeMaintenance + + BeforeEach(func() { + nm = testutils.GetTestNodeMaintenance("test", "node-1", "test.nvidia.com", "") + }) + + It("adds finalizer to object and updates in k8s", func() { + Expect(fakeClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + Expect(k8sutils.AddFinalizer(testCtx, fakeClient, nm, "test-finalizer")).ToNot(HaveOccurred()) + Expect(nm.Finalizers).To(ContainElement("test-finalizer")) + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)).ToNot(HaveOccurred()) + Expect(nm.Finalizers).To(ContainElement("test-finalizer")) + Expect(nm.Finalizers).To(HaveLen(1)) + }) + + It("does nothing if finalizer exists", func() { + nm.Finalizers = append(nm.Finalizers, "test-finalizer") + Expect(k8sutils.AddFinalizer(testCtx, nil, nm, "test-finalizer")).ToNot(HaveOccurred()) + Expect(nm.Finalizers).To(ContainElement("test-finalizer")) + Expect(nm.Finalizers).To(HaveLen(1)) + }) + }) + + Context("RemoveFinalizer", func() { + var nm *maintenancev1.NodeMaintenance + + BeforeEach(func() { + nm = testutils.GetTestNodeMaintenance("test", "node-1", "test.nvidia.com", "") + }) + + It("does nothing if finalizer does not exits", func() { + nm.Finalizers = append(nm.Finalizers, "foo") + Expect(k8sutils.RemoveFinalizer(testCtx, nil, nm, "test-finalizer")).ToNot(HaveOccurred()) + Expect(nm.Finalizers).To(ContainElement("foo")) + Expect(nm.Finalizers).To(HaveLen(1)) + }) + + It("removes finalizer if exists", func() { + nm.Finalizers = append(nm.Finalizers, "test-finalizer") + Expect(fakeClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + Expect(k8sutils.RemoveFinalizer(testCtx, fakeClient, nm, "test-finalizer")).ToNot(HaveOccurred()) + Expect(nm.Finalizers).To(BeEmpty()) + Expect(fakeClient.Get(testCtx, types.NamespacedName{Namespace: nm.Namespace, Name: nm.Name}, nm)). + ToNot(HaveOccurred()) + Expect(nm.Finalizers).To(BeEmpty()) + }) + }) + + Context("SetOwnerRef", func() { + var node *corev1.Node + var nm *maintenancev1.NodeMaintenance + + BeforeEach(func() { + node = &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-1", UID: "abcdef"}} + nm = testutils.GetTestNodeMaintenance("test", "node-1", "foo.bar", "") + nm.UID = "efgh" + }) + + It("sets owner reference for object", func() { + Expect(fakeClient.Create(testCtx, nm)).ToNot(HaveOccurred()) + Expect(k8sutils.SetOwnerRef(testCtx, fakeClient, node, nm)).ToNot(HaveOccurred()) + Expect(nm.OwnerReferences[0].UID).To(Equal(node.UID)) + }) + }) + + Context("HasOwnerRef", func() { + var node *corev1.Node + var nm *maintenancev1.NodeMaintenance + + BeforeEach(func() { + node = &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-1", UID: "abcdef"}} + nm = testutils.GetTestNodeMaintenance("test", "node-1", "foo.bar", "") + nm.UID = "efghij" + }) + + It("returns true if object has owner reference to owned", func() { + nm.OwnerReferences = []metav1.OwnerReference{{UID: node.UID}} + Expect(k8sutils.HasOwnerRef(node, nm)).To(BeTrue()) + }) + + It("returns false if object does not have owner reference to owned", func() { + Expect(k8sutils.HasOwnerRef(node, nm)).To(BeFalse()) + }) + }) }) diff --git a/internal/log/log.go b/internal/log/log.go index f033950..9a60b13 100644 --- a/internal/log/log.go +++ b/internal/log/log.go @@ -25,6 +25,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" ) +const ( + DebugLevel = int(zapcore.DebugLevel) +) + // Options stores controller-runtime (zap) log config var Options = &zap.Options{ Development: true, diff --git a/internal/podcompletion/podcompletion.go b/internal/podcompletion/podcompletion.go new file mode 100644 index 0000000..ee7d8fc --- /dev/null +++ b/internal/podcompletion/podcompletion.go @@ -0,0 +1,126 @@ +/* +Copyright 2024, NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package podcompletion + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" +) + +const ( + WaitForPodCompletionStartAnnot string = "maintenance.nvidia.com/wait-pod-completion-start-time" +) + +// ErrPodCompletionTimeout is a custom Error to convey that Pod completion timeout has reached. +var ErrPodCompletionTimeout = errors.New("PodCompletionTimeoutError") + +// Handler is an interface to handle waiting for pod completion for NodeMaintenance +type Handler interface { + // HandlePodCompletion handles waiting for pods to complete, returns list of pods that need to be waited or error if occurred. + // a special PodCompletionTimeoutError is returned if timeout exceeded. + HandlePodCompletion(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance) ([]string, error) +} + +// NewPodCompletionHandler creates a new WaitPodCompletion Handler +func NewPodCompletionHandler(c client.Client) Handler { + return &podCompletionHandler{ + k8sclient: c, + } +} + +// podCompletionHandler implements Handler interface +type podCompletionHandler struct { + k8sclient client.Client +} + +// HandlePodCompletion handles pod completion for node +func (p *podCompletionHandler) HandlePodCompletion(ctx context.Context, reqLog logr.Logger, nm *maintenancev1.NodeMaintenance) ([]string, error) { + var err error + var startTime time.Time + + if !metav1.HasAnnotation(nm.ObjectMeta, WaitForPodCompletionStartAnnot) || + nm.Annotations[WaitForPodCompletionStartAnnot] == "" { + // set waitForPodCompletion time annotation + startTime = time.Now().UTC() + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, WaitForPodCompletionStartAnnot, startTime.Format(time.RFC3339)) + err = p.k8sclient.Update(ctx, nm) + if err != nil { + return nil, err + } + } else { + // take start time from waitForPodCompletion time annotation + startTime, err = time.Parse(time.RFC3339, nm.Annotations[WaitForPodCompletionStartAnnot]) + if err != nil { + // if we failed to parse annotation, reset it so we can eventually succeed. + reqLog.Error(err, "failed to parse annotation. resetting annotation", "key", WaitForPodCompletionStartAnnot, "value", nm.Annotations[WaitForPodCompletionStartAnnot]) + delete(nm.Annotations, WaitForPodCompletionStartAnnot) + innerErr := p.k8sclient.Update(ctx, nm) + if innerErr != nil { + reqLog.Error(innerErr, "failed to reset wait for pod completion annotation", "key", WaitForPodCompletionStartAnnot) + } + return nil, err + } + } + reqLog.Info("HandlePodCompletion", "start-time", startTime) + + // check expire time + if nm.Spec.WaitForPodCompletion.TimeoutSecond > 0 { + timeNow := time.Now() + timeExpire := startTime.Add(time.Duration(nm.Spec.WaitForPodCompletion.TimeoutSecond * uint32(time.Second))) + if timeNow.After(timeExpire) { + reqLog.Error(nil, "HandlePodCompletion timeout reached") + return nil, ErrPodCompletionTimeout + } + } + + // list pods with given selector for given node + podList := &corev1.PodList{} + selectorLabels, err := labels.Parse(nm.Spec.WaitForPodCompletion.PodSelector) + if err != nil { + return nil, fmt.Errorf("failed to parse Spec.WaitForPodCompletion.PodSelector as label selectors") + } + selectorFields := fields.OneTermEqualSelector("spec.nodeName", nm.Spec.NodeName) + + err = p.k8sclient.List(ctx, podList, &client.ListOptions{LabelSelector: selectorLabels, FieldSelector: selectorFields}) + if err != nil { + return nil, fmt.Errorf("failed to list pods. %w", err) + } + + waitingPods := make([]string, 0, len(podList.Items)) + for _, p := range podList.Items { + waitingPods = append(waitingPods, types.NamespacedName{Namespace: p.Namespace, Name: p.Name}.String()) + } + + // update status + nm.Status.WaitForCompletion = waitingPods + if err = p.k8sclient.Status().Update(ctx, nm); err != nil { + return nil, fmt.Errorf("failed to update NodeMaintenance status. %w", err) + } + + return waitingPods, nil +} diff --git a/internal/podcompletion/podcompletion_suite_test.go b/internal/podcompletion/podcompletion_suite_test.go new file mode 100644 index 0000000..8725707 --- /dev/null +++ b/internal/podcompletion/podcompletion_suite_test.go @@ -0,0 +1,29 @@ +/* + Copyright 2024, NVIDIA CORPORATION & AFFILIATES + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package podcompletion_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestPodCompletion(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "cordon test Suite") +} diff --git a/internal/podcompletion/podcompletion_test.go b/internal/podcompletion/podcompletion_test.go new file mode 100644 index 0000000..60a64fd --- /dev/null +++ b/internal/podcompletion/podcompletion_test.go @@ -0,0 +1,111 @@ +/* + Copyright 2024, NVIDIA CORPORATION & AFFILIATES + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package podcompletion_test + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrfake "sigs.k8s.io/controller-runtime/pkg/client/fake" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + + maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" + operatorlog "github.com/Mellanox/maintenance-operator/internal/log" + "github.com/Mellanox/maintenance-operator/internal/podcompletion" + "github.com/Mellanox/maintenance-operator/internal/testutils" +) + +var _ = BeforeSuite(func() { + operatorlog.InitLog() +}) + +var _ = Describe("podcompletion tests", func() { + var fakeClient client.Client + var testCtx context.Context + + var nm *maintenancev1.NodeMaintenance + var handler podcompletion.Handler + + BeforeEach(func() { + testCtx = context.Background() + nm = testutils.GetTestNodeMaintenance("test-nm", "node-0", "test.nvidia.com", "") + nm.Spec.WaitForPodCompletion = &maintenancev1.WaitForPodCompletionSpec{} + + testPod := testutils.GetTestPod("test-pod", "node-0", map[string]string{"foo": "bar"}) + + s := runtime.NewScheme() + corev1.AddToScheme(s) + maintenancev1.AddToScheme(s) + fakeClient = ctrfake.NewClientBuilder(). + WithScheme(s). + WithStatusSubresource(&maintenancev1.NodeMaintenance{}). + WithObjects(nm, testPod). + WithIndex(&corev1.Pod{}, "spec.nodeName", func(o client.Object) []string { + return []string{o.(*corev1.Pod).Spec.NodeName} + }). + Build() + handler = podcompletion.NewPodCompletionHandler(fakeClient) + }) + + Context("Test HandlePodCompletion", func() { + It("returns expected list of pods and updates start time annotation", func() { + nm.Spec.WaitForPodCompletion.PodSelector = "" + Expect(fakeClient.Update(testCtx, nm)).ToNot(HaveOccurred()) + pods, err := handler.HandlePodCompletion(testCtx, ctrllog.Log.WithName("podCompletionHandler"), nm) + Expect(err).ToNot(HaveOccurred()) + Expect(pods).To(Equal([]string{"default/test-pod"})) + + Expect(fakeClient.Get(testCtx, client.ObjectKeyFromObject(nm), nm)).ToNot(HaveOccurred()) + Expect(nm.Status.WaitForCompletion).To(Equal([]string{"default/test-pod"})) + Expect(metav1.HasAnnotation(nm.ObjectMeta, podcompletion.WaitForPodCompletionStartAnnot)).To(BeTrue()) + t1, e := time.Parse(time.RFC3339, nm.Annotations[podcompletion.WaitForPodCompletionStartAnnot]) + Expect(e).ToNot(HaveOccurred()) + + // call again, expect wait-pod-completion-start annotation value to remain the same + time.Sleep(1 * time.Second) + _, err = handler.HandlePodCompletion(testCtx, ctrllog.Log.WithName("podCompletionHandler"), nm) + Expect(err).ToNot(HaveOccurred()) + Expect(fakeClient.Get(testCtx, client.ObjectKeyFromObject(nm), nm)).ToNot(HaveOccurred()) + t2, e := time.Parse(time.RFC3339, nm.Annotations[podcompletion.WaitForPodCompletionStartAnnot]) + Expect(e).ToNot(HaveOccurred()) + Expect(t1.Equal(t2)).To(BeTrue()) + }) + + It("returns pod completion timeout error if timeout happened if there are still pods", func() { + nm.Spec.WaitForPodCompletion.TimeoutSecond = 1 + metav1.SetMetaDataAnnotation(&nm.ObjectMeta, podcompletion.WaitForPodCompletionStartAnnot, + time.Now().Add(-2*time.Second).Format(time.RFC3339)) + _, err := handler.HandlePodCompletion(testCtx, ctrllog.Log.WithName("podCompletionHandler"), nm) + Expect(err).To(HaveOccurred()) + Expect(err).To(Equal(podcompletion.ErrPodCompletionTimeout)) + }) + + It("succeeds if there are no pods to be waited", func() { + nm.Spec.WaitForPodCompletion.PodSelector = "bar=baz" + Expect(fakeClient.Update(testCtx, nm)).ToNot(HaveOccurred()) + pods, err := handler.HandlePodCompletion(testCtx, ctrllog.Log.WithName("podCompletionHandler"), nm) + Expect(err).ToNot(HaveOccurred()) + Expect(pods).To(BeEmpty()) + }) + }) +}) diff --git a/internal/testutils/testutils.go b/internal/testutils/testutils.go index 630d0af..e8489d1 100644 --- a/internal/testutils/testutils.go +++ b/internal/testutils/testutils.go @@ -18,12 +18,16 @@ package testutils import ( + "context" "fmt" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1" + "github.com/Mellanox/maintenance-operator/internal/k8sutils" ) // GetTestNodes used to create node objects for testing controllers @@ -41,6 +45,7 @@ func GetTestNodes(nodePrefix string, numOfNodes int, unschedulable bool) []*core return nodes } +// GetTestNodeMaintenance used to create NodeMaintenance object for tests func GetTestNodeMaintenance(name, nodeName, requestorID, reason string) *maintenancev1.NodeMaintenance { nm := &maintenancev1.NodeMaintenance{ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "default"}, @@ -62,3 +67,53 @@ func GetTestNodeMaintenance(name, nodeName, requestorID, reason string) *mainten } return nm } + +// GetTestPod used to create pod objects for tests +func GetTestPod(name, nodeName string, labels map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + Labels: labels, + }, + Spec: corev1.PodSpec{ + NodeName: nodeName, + Containers: []corev1.Container{ + { + Name: "foo", + Image: "bar", + }, + }, + }, + } +} + +// GetReadyConditionReasonForFn returns a function that when called it returns NodeMaintenance +// Ready condition Reason to be used in Tests e.g in Eventually() or Consistently() blocks. +func GetReadyConditionReasonForFn(ctx context.Context, c client.Client, ok client.ObjectKey) func() string { + return func() string { + nm := &maintenancev1.NodeMaintenance{} + err := c.Get(ctx, ok, nm) + if err == nil { + return k8sutils.GetReadyConditionReason(nm) + } + return "" + } +} + +// EventsForObjFn returns returns a function that when called it returns Event messages for +// NodeMaintenance to be used in Tests e.g in Eventually() or Consistently() blocks. +func EventsForObjFn(ctx context.Context, c client.Client, objUID types.UID) func() []string { + return func() []string { + el := &corev1.EventList{} + err := c.List(ctx, el, client.MatchingFields{"involvedObject.uid": string(objUID)}) + if err == nil && len(el.Items) > 0 { + eMsgs := make([]string, 0, len(el.Items)) + for _, e := range el.Items { + eMsgs = append(eMsgs, e.Message) + } + return eMsgs + } + return nil + } +}