Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maintenance operator config controller #6

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ linters:
- errname
- exportloopref
- fatcontext
- forcetypeassert
- funlen
- ginkgolinter
- goconst
Expand Down
46 changes: 24 additions & 22 deletions cmd/maintenance-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
"github.com/Mellanox/maintenance-operator/internal/controller"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
"github.com/Mellanox/maintenance-operator/internal/scheduler"
"github.com/Mellanox/maintenance-operator/internal/version"
//+kubebuilder:scaffold:imports
Expand Down Expand Up @@ -73,18 +73,14 @@ func main() {
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.BoolVar(&printVersion, "version", false, "print version and exit")

opts := zap.Options{
Development: true,
}
opts.BindFlags(flag.CommandLine)
operatorlog.BindFlags(flag.CommandLine)
flag.Parse()

if printVersion {
fmt.Printf("%s\n", version.GetVersionString())
os.Exit(0)
}

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
operatorlog.InitLog()

setupLog.Info("Maintenance Operator", "version", version.GetVersionString())

Expand Down Expand Up @@ -136,30 +132,36 @@ func main() {
os.Exit(1)
}

nmrOptions := controller.NewNodeMaintenanceReconcilerOptions()
if err = (&controller.NodeMaintenanceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Options: nmrOptions,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenance")
os.Exit(1)
}
if err = (&controller.MaintenanceOperatorConfigReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),

nmsrOptions := controller.NewNodeMaintenanceSchedulerReconcilerOptions()
nmsrLog := ctrl.Log.WithName("NodeMaintenanceScheduler")
if err = (&controller.NodeMaintenanceSchedulerReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Options: nmsrOptions,
Log: nmsrLog,
Sched: scheduler.NewDefaultScheduler(nmsrLog.WithName("DefaultScheduler")),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MaintenanceOperatorConfig")
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenanceScheduler")
os.Exit(1)
}
nmSchedulerReconcilerLog := ctrl.Log.WithName("NodeMaintenanceScheduler")
if err = (&controller.NodeMaintenanceSchedulerReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
MaxUnavailable: nil,
MaxParallelOperations: nil,
Log: nmSchedulerReconcilerLog,
Sched: scheduler.NewDefaultScheduler(nmSchedulerReconcilerLog.WithName("DefaultScheduler")),

if err = (&controller.MaintenanceOperatorConfigReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
NodeMaintenanceReconcierOptions: nmrOptions,
SchedulerReconcierOptions: nmsrOptions,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenanceScheduler")
setupLog.Error(err, "unable to create controller", "controller", "MaintenanceOperatorConfig")
os.Exit(1)
}
//+kubebuilder:scaffold:builder
Expand Down
6 changes: 6 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,11 @@ spec:
requests:
cpu: 10m
memory: 64Mi
env:
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace

serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
github.com/pkg/errors v0.9.1
go.uber.org/zap v1.26.0
k8s.io/api v0.30.2
k8s.io/apimachinery v0.30.2
k8s.io/client-go v0.30.2
Expand Down Expand Up @@ -49,7 +50,6 @@ require (
github.com/prometheus/procfs v0.12.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
Expand Down
42 changes: 40 additions & 2 deletions internal/controller/maintenanceoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,30 @@ package controller

import (
"context"
"time"

k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"

maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
"github.com/Mellanox/maintenance-operator/internal/vars"
)

const (
defaultMaintenanceOperatorConifgName = "default"
)

// MaintenanceOperatorConfigReconciler reconciles a MaintenanceOperatorConfig object
type MaintenanceOperatorConfigReconciler struct {
client.Client
Scheme *runtime.Scheme

SchedulerReconcierOptions *NodeMaintenanceSchedulerReconcilerOptions
NodeMaintenanceReconcierOptions *NodeMaintenanceReconcilerOptions
}

//+kubebuilder:rbac:groups=maintenance.nvidia.com,resources=maintenanceoperatorconfigs,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -45,13 +56,40 @@ type MaintenanceOperatorConfigReconciler struct {
func (r *MaintenanceOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reqLog := log.FromContext(ctx)
reqLog.Info("got request", "name", req.NamespacedName)
if req.Name != defaultMaintenanceOperatorConifgName || req.Namespace != vars.OperatorNamespace {
reqLog.Info("request for non default MaintenanceOperatorConfig, ignoring")
return ctrl.Result{}, nil
}

cfg := &maintenancev1.MaintenanceOperatorConfig{}
err := r.Client.Get(ctx, req.NamespacedName, cfg)
if err != nil {
if k8serrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}

// handle reconcilers options
reqLog.Info("store scheduler reconciler options", "MaxUnavailable", cfg.Spec.MaxUnavailable,
"MaxParallelOperations", cfg.Spec.MaxParallelOperations)
r.SchedulerReconcierOptions.Store(cfg.Spec.MaxUnavailable, cfg.Spec.MaxParallelOperations)
reqLog.Info("store nodeMaintenance reconciler options", "MaxNodeMaintenanceTimeSeconds", cfg.Spec.MaxNodeMaintenanceTimeSeconds)
r.NodeMaintenanceReconcierOptions.Store(time.Second * time.Duration(cfg.Spec.MaxNodeMaintenanceTimeSeconds))

// handle log level
reqLog.Info("setting operator log level", "LogLevel", cfg.Spec.LogLevel)
err = operatorlog.SetLogLevel(string(cfg.Spec.LogLevel))
if err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *MaintenanceOperatorConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&maintenancev1alpha1.MaintenanceOperatorConfig{}).
For(&maintenancev1.MaintenanceOperatorConfig{}).
Complete(r)
}
103 changes: 102 additions & 1 deletion internal/controller/maintenanceoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,71 @@ package controller

import (
"context"
"sync"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
ctrl "sigs.k8s.io/controller-runtime"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
)

var _ = Describe("MaintenanceOperatorConfig Controller", func() {
var reconciler *MaintenanceOperatorConfigReconciler

// test context, TODO(adrianc): use ginkgo spec context
var testCtx context.Context

BeforeEach(func() {
testCtx = context.Background()

// create controller manager
By("create controller manager")
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
Scheme: k8sClient.Scheme(),
Metrics: metricsserver.Options{BindAddress: "0"},
})
Expect(err).ToNot(HaveOccurred())

// create reconciler
By("create MaintenanceOperatorConfigReconciler")
reconciler = &MaintenanceOperatorConfigReconciler{
Client: k8sClient,
Scheme: k8sClient.Scheme(),
SchedulerReconcierOptions: NewNodeMaintenanceSchedulerReconcilerOptions(),
NodeMaintenanceReconcierOptions: NewNodeMaintenanceReconcilerOptions(),
}

// setup reconciler with manager
By("setup MaintenanceOperatorConfigReconciler with controller manager")
Expect(reconciler.SetupWithManager(mgr)).ToNot(HaveOccurred())

// start manager
testMgrCtx, cancel := context.WithCancel(testCtx)
By("start manager")
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
defer GinkgoRecover()
By("Start controller manager")
err := mgr.Start(testMgrCtx)
Expect(err).ToNot(HaveOccurred())
}()

DeferCleanup(func() {
By("Shut down controller manager")
cancel()
wg.Wait()
})
})

It("Should Reconcile MaintenanceOperatorConfig resource", func() {
It("Should Reconcile MaintenanceOperatorConfig resource with defaults", func() {
oc := &maintenancev1.MaintenanceOperatorConfig{
ObjectMeta: metav1.ObjectMeta{Name: "default", Namespace: "default"},
Spec: maintenancev1.MaintenanceOperatorConfigSpec{},
Expand All @@ -43,5 +91,58 @@ var _ = Describe("MaintenanceOperatorConfig Controller", func() {
DeferCleanup(func() {
Expect(k8sClient.Delete(testCtx, oc)).ToNot(HaveOccurred())
})

Consistently(func(g Gomega) {
reconciler.SchedulerReconcierOptions.Load()
reconciler.NodeMaintenanceReconcierOptions.Load()
g.Expect(reconciler.SchedulerReconcierOptions.MaxParallelOperations()).To(Equal(&intstr.IntOrString{Type: intstr.Int, IntVal: 1}))
g.Expect(reconciler.SchedulerReconcierOptions.MaxUnavailable()).To(BeNil())
g.Expect(reconciler.NodeMaintenanceReconcierOptions.MaxNodeMaintenanceTime()).To(Equal(defaultMaxNodeMaintenanceTime))
}).ProbeEvery(100 * time.Millisecond).Within(time.Second).Should(Succeed())
})

It("Should Reconcile MaintenanceOperatorConfig resource with specified values", func() {
By("create MaintenanceOperatorConfig with non default values")
oc := &maintenancev1.MaintenanceOperatorConfig{
ObjectMeta: metav1.ObjectMeta{Name: "default", Namespace: "default"},
Spec: maintenancev1.MaintenanceOperatorConfigSpec{
MaxParallelOperations: &intstr.IntOrString{Type: intstr.Int, IntVal: 3},
MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 3},
MaxNodeMaintenanceTimeSeconds: 300,
LogLevel: "debug",
},
}
Expect(k8sClient.Create(testCtx, oc)).ToNot(HaveOccurred())
DeferCleanup(func() {
Expect(k8sClient.Delete(testCtx, oc)).ToNot(HaveOccurred())
})

By("check MaintenanceOperatorConfig values were updated")
Eventually(func(g Gomega) {
reconciler.SchedulerReconcierOptions.Load()
reconciler.NodeMaintenanceReconcierOptions.Load()
g.Expect(reconciler.SchedulerReconcierOptions.MaxParallelOperations()).To(Equal(oc.Spec.MaxParallelOperations))
g.Expect(reconciler.SchedulerReconcierOptions.MaxUnavailable()).To(Equal(oc.Spec.MaxUnavailable))
g.Expect(reconciler.NodeMaintenanceReconcierOptions.MaxNodeMaintenanceTime()).
To(Equal(time.Second * time.Duration(oc.Spec.MaxNodeMaintenanceTimeSeconds)))
g.Expect(operatorlog.GetLogLevel()).To(BeEquivalentTo(oc.Spec.LogLevel))
}).ProbeEvery(100 * time.Millisecond).Within(time.Second).Should(Succeed())

By("update MaintenanceOperatorConfig")
oc.Spec.MaxParallelOperations = &intstr.IntOrString{Type: intstr.Int, IntVal: 5}
oc.Spec.MaxUnavailable = nil
oc.Spec.LogLevel = "info"
Expect(k8sClient.Update(testCtx, oc)).ToNot(HaveOccurred())

By("check MaintenanceOperatorConfig values were updated")
Eventually(func(g Gomega) {
reconciler.SchedulerReconcierOptions.Load()
reconciler.NodeMaintenanceReconcierOptions.Load()
g.Expect(reconciler.SchedulerReconcierOptions.MaxParallelOperations()).To(Equal(oc.Spec.MaxParallelOperations))
g.Expect(reconciler.SchedulerReconcierOptions.MaxUnavailable()).To(Equal(oc.Spec.MaxUnavailable))
g.Expect(reconciler.NodeMaintenanceReconcierOptions.MaxNodeMaintenanceTime()).
To(Equal(time.Second * time.Duration(oc.Spec.MaxNodeMaintenanceTimeSeconds)))
g.Expect(operatorlog.GetLogLevel()).To(BeEquivalentTo(oc.Spec.LogLevel))
}).ProbeEvery(100 * time.Millisecond).Within(time.Second).Should(Succeed())
})
})
Loading
Loading