Skip to content

Commit

Permalink
[release-1.12] Don't rely on InfraStructureTopology for infra HA (#3196
Browse files Browse the repository at this point in the history
…) (#3202)

* [release-1.12] Don't rely on InfraStructureTopology for infra HA (#3196)

This is a manual backport of #3186

Signed-off-by: Oren Cohen <[email protected]>

* [release-1.12] Fixes for HA, based on #3209

Signed-off-by: Oren Cohen <[email protected]>

---------

Signed-off-by: Oren Cohen <[email protected]>
Co-authored-by: kubevirt-bot <[email protected]>
  • Loading branch information
orenc1 and kubevirt-bot authored Dec 25, 2024
1 parent d767e65 commit 271a689
Show file tree
Hide file tree
Showing 30 changed files with 591 additions and 149 deletions.
5 changes: 5 additions & 0 deletions api/v1beta1/hyperconverged_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,11 @@ type HyperConvergedStatus struct {
// SystemHealthStatus reflects the health of HCO and its secondary resources, based on the aggregated conditions.
// +optional
SystemHealthStatus string `json:"systemHealthStatus,omitempty"`

// InfrastructureHighlyAvailable describes whether the cluster has only one worker node
// (false) or more (true).
// +optional
InfrastructureHighlyAvailable *bool `json:"infrastructureHighlyAvailable,omitempty"`
}

type Version struct {
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions api/v1beta1/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions cmd/hyperconverged-cluster-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import (
hcov1beta1 "github.com/kubevirt/hyperconverged-cluster-operator/api/v1beta1"
"github.com/kubevirt/hyperconverged-cluster-operator/cmd/cmdcommon"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/hyperconverged"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/nodes"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/observability"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/operands"
"github.com/kubevirt/hyperconverged-cluster-operator/pkg/monitoring/metrics"
Expand Down Expand Up @@ -181,6 +182,13 @@ func main() {
}
}

// Create a new Nodes reconciler
if err := nodes.RegisterReconciler(mgr); err != nil {
logger.Error(err, "failed to register the Nodes controller")
eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "InitError", "Unable to register Nodes controller; "+err.Error())
os.Exit(1)
}

err = createPriorityClass(ctx, mgr)
cmdHelper.ExitOnError(err, "Failed creating PriorityClass")

Expand Down
5 changes: 5 additions & 0 deletions config/crd/bases/hco.kubevirt.io_hyperconvergeds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4810,6 +4810,11 @@ spec:
DataImportSchedule is the cron expression that is used in for the hard-coded data import cron templates. HCO
generates the value of this field once and stored in the status field, so will survive restart.
type: string
infrastructureHighlyAvailable:
description: |-
InfrastructureHighlyAvailable describes whether the cluster has only one worker node
(false) or more (true).
type: boolean
observedGeneration:
description: |-
ObservedGeneration reflects the HyperConverged resource generation. If the ObservedGeneration is less than the
Expand Down
9 changes: 9 additions & 0 deletions controllers/commontestutils/testUtils.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,9 @@ func (ClusterInfoMock) IsControlPlaneHighlyAvailable() bool {
func (ClusterInfoMock) IsInfrastructureHighlyAvailable() bool {
return true
}
func (ClusterInfoMock) SetHighAvailabilityMode(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoMock) GetDomain() string {
return "domain"
}
Expand Down Expand Up @@ -354,6 +357,9 @@ func (ClusterInfoSNOMock) IsControlPlaneHighlyAvailable() bool {
func (ClusterInfoSNOMock) IsInfrastructureHighlyAvailable() bool {
return false
}
func (ClusterInfoSNOMock) SetHighAvailabilityMode(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoSNOMock) GetDomain() string {
return "domain"
}
Expand Down Expand Up @@ -411,6 +417,9 @@ func (ClusterInfoSRCPHAIMock) IsControlPlaneHighlyAvailable() bool {
func (ClusterInfoSRCPHAIMock) IsInfrastructureHighlyAvailable() bool {
return true
}
func (ClusterInfoSRCPHAIMock) SetHighAvailabilityMode(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoSRCPHAIMock) GetPod() *corev1.Pod {
return pod
}
Expand Down
8 changes: 7 additions & 1 deletion controllers/hyperconverged/hyperconverged_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"os"
"reflect"

"k8s.io/utils/ptr"

"github.com/blang/semver/v4"
jsonpatch "github.com/evanphx/json-patch/v5"
"github.com/go-logr/logr"
Expand Down Expand Up @@ -162,7 +164,8 @@ func add(mgr manager.Manager, r reconcile.Reconciler, ci hcoutil.ClusterInfo) er
err = c.Watch(
source.Kind(mgr.GetCache(), &hcov1beta1.HyperConverged{}),
&operatorhandler.InstrumentedEnqueueRequestForObject{},
predicate.Or(predicate.GenerationChangedPredicate{}, predicate.AnnotationChangedPredicate{}))
predicate.Or(predicate.GenerationChangedPredicate{}, predicate.AnnotationChangedPredicate{},
predicate.ResourceVersionChangedPredicate{}))
if err != nil {
return err
}
Expand Down Expand Up @@ -419,6 +422,9 @@ func (r *ReconcileHyperConverged) doReconcile(req *common.HcoRequest) (reconcile
if init {
r.eventEmitter.EmitEvent(req.Instance, corev1.EventTypeNormal, "InitHCO", "Initiating the HyperConverged")
r.setInitialConditions(req)

req.Instance.Status.InfrastructureHighlyAvailable = ptr.To(hcoutil.GetClusterInfo().IsInfrastructureHighlyAvailable())
req.StatusDirty = true
}

r.setLabels(req)
Expand Down
137 changes: 137 additions & 0 deletions controllers/nodes/nodes_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package nodes

import (
"context"
"maps"

"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/utils/ptr"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/event"

hcov1beta1 "github.com/kubevirt/hyperconverged-cluster-operator/api/v1beta1"

operatorhandler "github.com/operator-framework/operator-lib/handler"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util"
)

var (
log = logf.Log.WithName("controller_nodes")
)

// RegisterReconciler creates a new Nodes Reconciler and registers it into manager.
func RegisterReconciler(mgr manager.Manager) error {
return add(mgr, newReconciler(mgr))
}

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) reconcile.Reconciler {
r := &ReconcileNodeCounter{
client: mgr.GetClient(),
}

return r
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
func add(mgr manager.Manager, r reconcile.Reconciler) error {
// Create a new controller
c, err := controller.New("nodes-controller", mgr, controller.Options{Reconciler: r})
if err != nil {
return err
}

// Watch for changes to the cluster's nodes
err = c.Watch(
source.Kind(
mgr.GetCache(), &corev1.Node{}),
&operatorhandler.InstrumentedEnqueueRequestForObject{},
nodeCountChangePredicate{},
)
if err != nil {
return err
}

return nil
}

// Custom predicate to detect changes in node count
type nodeCountChangePredicate struct {
predicate.Funcs
}

func (nodeCountChangePredicate) Update(e event.UpdateEvent) bool {
return !maps.Equal(e.ObjectOld.GetLabels(), e.ObjectNew.GetLabels())
}

func (nodeCountChangePredicate) Create(_ event.CreateEvent) bool {
// node is added
return true
}

func (nodeCountChangePredicate) Delete(_ event.DeleteEvent) bool {
// node is removed
return true
}

func (nodeCountChangePredicate) Generic(_ event.GenericEvent) bool {
return false
}

// ReconcileNodeCounter reconciles the nodes count
type ReconcileNodeCounter struct {
// This client, initialized using mgr.Client() above, is a split client
// that reads objects from the cache and writes to the apiserver
client client.Client
}

// Reconcile updates the nodes count on ClusterInfo singleton
func (r *ReconcileNodeCounter) Reconcile(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) {
log.Info("Triggered by a node count change")
clusterInfo := hcoutil.GetClusterInfo()
err := clusterInfo.SetHighAvailabilityMode(ctx, r.client)
if err != nil {
return reconcile.Result{}, err
}

hco := &hcov1beta1.HyperConverged{}
namespace, err := hcoutil.GetOperatorNamespaceFromEnv()
if err != nil {
return reconcile.Result{}, err
}
hcoKey := types.NamespacedName{
Name: hcoutil.HyperConvergedName,
Namespace: namespace,
}
err = r.client.Get(ctx, hcoKey, hco)
if err != nil {
if errors.IsNotFound(err) {
return reconcile.Result{}, nil
}
return reconcile.Result{}, err
}

if !hco.ObjectMeta.DeletionTimestamp.IsZero() {
return reconcile.Result{}, nil
}
if hco.Status.InfrastructureHighlyAvailable == nil ||
*hco.Status.InfrastructureHighlyAvailable != clusterInfo.IsInfrastructureHighlyAvailable() {

hco.Status.InfrastructureHighlyAvailable = ptr.To(clusterInfo.IsInfrastructureHighlyAvailable())
err = r.client.Status().Update(ctx, hco)
if err != nil {
return reconcile.Result{}, err
}
}
return reconcile.Result{}, nil
}
Loading

0 comments on commit 271a689

Please sign in to comment.