Skip to content

Commit

Permalink
Merge pull request #6 from adrianchiris/maintenance-operator-config-c…
Browse files Browse the repository at this point in the history
…ontroller

Maintenance operator config controller
  • Loading branch information
ykulazhenkov authored Jul 29, 2024
2 parents 3705ff1 + 724e665 commit afa2376
Show file tree
Hide file tree
Showing 14 changed files with 528 additions and 139 deletions.
1 change: 0 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ linters:
- errname
- exportloopref
- fatcontext
- forcetypeassert
- funlen
- ginkgolinter
- goconst
Expand Down
46 changes: 24 additions & 22 deletions cmd/maintenance-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
"github.com/Mellanox/maintenance-operator/internal/controller"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
"github.com/Mellanox/maintenance-operator/internal/scheduler"
"github.com/Mellanox/maintenance-operator/internal/version"
//+kubebuilder:scaffold:imports
Expand Down Expand Up @@ -73,18 +73,14 @@ func main() {
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.BoolVar(&printVersion, "version", false, "print version and exit")

opts := zap.Options{
Development: true,
}
opts.BindFlags(flag.CommandLine)
operatorlog.BindFlags(flag.CommandLine)
flag.Parse()

if printVersion {
fmt.Printf("%s\n", version.GetVersionString())
os.Exit(0)
}

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
operatorlog.InitLog()

setupLog.Info("Maintenance Operator", "version", version.GetVersionString())

Expand Down Expand Up @@ -136,30 +132,36 @@ func main() {
os.Exit(1)
}

nmrOptions := controller.NewNodeMaintenanceReconcilerOptions()
if err = (&controller.NodeMaintenanceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Options: nmrOptions,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenance")
os.Exit(1)
}
if err = (&controller.MaintenanceOperatorConfigReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),

nmsrOptions := controller.NewNodeMaintenanceSchedulerReconcilerOptions()
nmsrLog := ctrl.Log.WithName("NodeMaintenanceScheduler")
if err = (&controller.NodeMaintenanceSchedulerReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Options: nmsrOptions,
Log: nmsrLog,
Sched: scheduler.NewDefaultScheduler(nmsrLog.WithName("DefaultScheduler")),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MaintenanceOperatorConfig")
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenanceScheduler")
os.Exit(1)
}
nmSchedulerReconcilerLog := ctrl.Log.WithName("NodeMaintenanceScheduler")
if err = (&controller.NodeMaintenanceSchedulerReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
MaxUnavailable: nil,
MaxParallelOperations: nil,
Log: nmSchedulerReconcilerLog,
Sched: scheduler.NewDefaultScheduler(nmSchedulerReconcilerLog.WithName("DefaultScheduler")),

if err = (&controller.MaintenanceOperatorConfigReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
NodeMaintenanceReconcierOptions: nmrOptions,
SchedulerReconcierOptions: nmsrOptions,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenanceScheduler")
setupLog.Error(err, "unable to create controller", "controller", "MaintenanceOperatorConfig")
os.Exit(1)
}
//+kubebuilder:scaffold:builder
Expand Down
6 changes: 6 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,11 @@ spec:
requests:
cpu: 10m
memory: 64Mi
env:
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace

serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
github.com/pkg/errors v0.9.1
go.uber.org/zap v1.26.0
k8s.io/api v0.30.2
k8s.io/apimachinery v0.30.2
k8s.io/client-go v0.30.2
Expand Down Expand Up @@ -49,7 +50,6 @@ require (
github.com/prometheus/procfs v0.12.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
Expand Down
42 changes: 40 additions & 2 deletions internal/controller/maintenanceoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,30 @@ package controller

import (
"context"
"time"

k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"

maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
"github.com/Mellanox/maintenance-operator/internal/vars"
)

const (
defaultMaintenanceOperatorConifgName = "default"
)

// MaintenanceOperatorConfigReconciler reconciles a MaintenanceOperatorConfig object
type MaintenanceOperatorConfigReconciler struct {
client.Client
Scheme *runtime.Scheme

SchedulerReconcierOptions *NodeMaintenanceSchedulerReconcilerOptions
NodeMaintenanceReconcierOptions *NodeMaintenanceReconcilerOptions
}

//+kubebuilder:rbac:groups=maintenance.nvidia.com,resources=maintenanceoperatorconfigs,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -45,13 +56,40 @@ type MaintenanceOperatorConfigReconciler struct {
func (r *MaintenanceOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reqLog := log.FromContext(ctx)
reqLog.Info("got request", "name", req.NamespacedName)
if req.Name != defaultMaintenanceOperatorConifgName || req.Namespace != vars.OperatorNamespace {
reqLog.Info("request for non default MaintenanceOperatorConfig, ignoring")
return ctrl.Result{}, nil
}

cfg := &maintenancev1.MaintenanceOperatorConfig{}
err := r.Client.Get(ctx, req.NamespacedName, cfg)
if err != nil {
if k8serrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}

// handle reconcilers options
reqLog.Info("store scheduler reconciler options", "MaxUnavailable", cfg.Spec.MaxUnavailable,
"MaxParallelOperations", cfg.Spec.MaxParallelOperations)
r.SchedulerReconcierOptions.Store(cfg.Spec.MaxUnavailable, cfg.Spec.MaxParallelOperations)
reqLog.Info("store nodeMaintenance reconciler options", "MaxNodeMaintenanceTimeSeconds", cfg.Spec.MaxNodeMaintenanceTimeSeconds)
r.NodeMaintenanceReconcierOptions.Store(time.Second * time.Duration(cfg.Spec.MaxNodeMaintenanceTimeSeconds))

// handle log level
reqLog.Info("setting operator log level", "LogLevel", cfg.Spec.LogLevel)
err = operatorlog.SetLogLevel(string(cfg.Spec.LogLevel))
if err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *MaintenanceOperatorConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&maintenancev1alpha1.MaintenanceOperatorConfig{}).
For(&maintenancev1.MaintenanceOperatorConfig{}).
Complete(r)
}
103 changes: 102 additions & 1 deletion internal/controller/maintenanceoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,71 @@ package controller

import (
"context"
"sync"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
ctrl "sigs.k8s.io/controller-runtime"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

maintenancev1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
)

var _ = Describe("MaintenanceOperatorConfig Controller", func() {
var reconciler *MaintenanceOperatorConfigReconciler

// test context, TODO(adrianc): use ginkgo spec context
var testCtx context.Context

BeforeEach(func() {
testCtx = context.Background()

// create controller manager
By("create controller manager")
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
Scheme: k8sClient.Scheme(),
Metrics: metricsserver.Options{BindAddress: "0"},
})
Expect(err).ToNot(HaveOccurred())

// create reconciler
By("create MaintenanceOperatorConfigReconciler")
reconciler = &MaintenanceOperatorConfigReconciler{
Client: k8sClient,
Scheme: k8sClient.Scheme(),
SchedulerReconcierOptions: NewNodeMaintenanceSchedulerReconcilerOptions(),
NodeMaintenanceReconcierOptions: NewNodeMaintenanceReconcilerOptions(),
}

// setup reconciler with manager
By("setup MaintenanceOperatorConfigReconciler with controller manager")
Expect(reconciler.SetupWithManager(mgr)).ToNot(HaveOccurred())

// start manager
testMgrCtx, cancel := context.WithCancel(testCtx)
By("start manager")
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
defer GinkgoRecover()
By("Start controller manager")
err := mgr.Start(testMgrCtx)
Expect(err).ToNot(HaveOccurred())
}()

DeferCleanup(func() {
By("Shut down controller manager")
cancel()
wg.Wait()
})
})

It("Should Reconcile MaintenanceOperatorConfig resource", func() {
It("Should Reconcile MaintenanceOperatorConfig resource with defaults", func() {
oc := &maintenancev1.MaintenanceOperatorConfig{
ObjectMeta: metav1.ObjectMeta{Name: "default", Namespace: "default"},
Spec: maintenancev1.MaintenanceOperatorConfigSpec{},
Expand All @@ -43,5 +91,58 @@ var _ = Describe("MaintenanceOperatorConfig Controller", func() {
DeferCleanup(func() {
Expect(k8sClient.Delete(testCtx, oc)).ToNot(HaveOccurred())
})

Consistently(func(g Gomega) {
reconciler.SchedulerReconcierOptions.Load()
reconciler.NodeMaintenanceReconcierOptions.Load()
g.Expect(reconciler.SchedulerReconcierOptions.MaxParallelOperations()).To(Equal(&intstr.IntOrString{Type: intstr.Int, IntVal: 1}))
g.Expect(reconciler.SchedulerReconcierOptions.MaxUnavailable()).To(BeNil())
g.Expect(reconciler.NodeMaintenanceReconcierOptions.MaxNodeMaintenanceTime()).To(Equal(defaultMaxNodeMaintenanceTime))
}).ProbeEvery(100 * time.Millisecond).Within(time.Second).Should(Succeed())
})

It("Should Reconcile MaintenanceOperatorConfig resource with specified values", func() {
By("create MaintenanceOperatorConfig with non default values")
oc := &maintenancev1.MaintenanceOperatorConfig{
ObjectMeta: metav1.ObjectMeta{Name: "default", Namespace: "default"},
Spec: maintenancev1.MaintenanceOperatorConfigSpec{
MaxParallelOperations: &intstr.IntOrString{Type: intstr.Int, IntVal: 3},
MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 3},
MaxNodeMaintenanceTimeSeconds: 300,
LogLevel: "debug",
},
}
Expect(k8sClient.Create(testCtx, oc)).ToNot(HaveOccurred())
DeferCleanup(func() {
Expect(k8sClient.Delete(testCtx, oc)).ToNot(HaveOccurred())
})

By("check MaintenanceOperatorConfig values were updated")
Eventually(func(g Gomega) {
reconciler.SchedulerReconcierOptions.Load()
reconciler.NodeMaintenanceReconcierOptions.Load()
g.Expect(reconciler.SchedulerReconcierOptions.MaxParallelOperations()).To(Equal(oc.Spec.MaxParallelOperations))
g.Expect(reconciler.SchedulerReconcierOptions.MaxUnavailable()).To(Equal(oc.Spec.MaxUnavailable))
g.Expect(reconciler.NodeMaintenanceReconcierOptions.MaxNodeMaintenanceTime()).
To(Equal(time.Second * time.Duration(oc.Spec.MaxNodeMaintenanceTimeSeconds)))
g.Expect(operatorlog.GetLogLevel()).To(BeEquivalentTo(oc.Spec.LogLevel))
}).ProbeEvery(100 * time.Millisecond).Within(time.Second).Should(Succeed())

By("update MaintenanceOperatorConfig")
oc.Spec.MaxParallelOperations = &intstr.IntOrString{Type: intstr.Int, IntVal: 5}
oc.Spec.MaxUnavailable = nil
oc.Spec.LogLevel = "info"
Expect(k8sClient.Update(testCtx, oc)).ToNot(HaveOccurred())

By("check MaintenanceOperatorConfig values were updated")
Eventually(func(g Gomega) {
reconciler.SchedulerReconcierOptions.Load()
reconciler.NodeMaintenanceReconcierOptions.Load()
g.Expect(reconciler.SchedulerReconcierOptions.MaxParallelOperations()).To(Equal(oc.Spec.MaxParallelOperations))
g.Expect(reconciler.SchedulerReconcierOptions.MaxUnavailable()).To(Equal(oc.Spec.MaxUnavailable))
g.Expect(reconciler.NodeMaintenanceReconcierOptions.MaxNodeMaintenanceTime()).
To(Equal(time.Second * time.Duration(oc.Spec.MaxNodeMaintenanceTimeSeconds)))
g.Expect(operatorlog.GetLogLevel()).To(BeEquivalentTo(oc.Spec.LogLevel))
}).ProbeEvery(100 * time.Millisecond).Within(time.Second).Should(Succeed())
})
})
Loading

0 comments on commit afa2376

Please sign in to comment.