Skip to content

Commit

Permalink
Honor fail-on-init-error when no resources are found
Browse files Browse the repository at this point in the history
As implemented GFD will not fail if no resources are detected -- even
if fail-on-init-error is set. This change ensures that fail-on-init-error
is honored if no resources are detected.

Signed-off-by: Evan Lezar <[email protected]>
  • Loading branch information
elezar committed Oct 31, 2024
1 parent 78c5937 commit 6413d86
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
5 changes: 4 additions & 1 deletion cmd/gpu-feature-discovery/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ func start(c *cli.Context, cfg *Config) error {
}
klog.Infof("\nRunning with config:\n%v", string(configJSON))

manager := resource.NewManager(config)
manager, err := resource.NewManager(config)
if err != nil {
return err
}
vgpul := vgpu.NewVGPULib(vgpu.NewNvidiaPCILib())

var clientSets flags.ClientSets
Expand Down
24 changes: 17 additions & 7 deletions internal/resource/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,26 @@
package resource

import (
"fmt"

"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"k8s.io/klog/v2"

spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
)

// NewManager is a factory method that creates a resource Manager based on the specified config.
func NewManager(config *spec.Config) Manager {
return WithConfig(getManager(), config)
func NewManager(config *spec.Config) (Manager, error) {
manager, err := getManager()
if err != nil {
if *config.Flags.FailOnInitError {
return nil, err
}
klog.ErrorS(err, "using empty manager")
return NewNullManager(), nil
}

return WithConfig(manager, config), nil
}

// WithConfig modifies a manager depending on the specified config.
Expand All @@ -39,7 +50,7 @@ func WithConfig(manager Manager, config *spec.Config) Manager {
}

// getManager returns the resource manager depending on the system configuration.
func getManager() Manager {
func getManager() (Manager, error) {
// logWithReason logs the output of the has* / is* checks from the info.Interface
logWithReason := func(f func() (bool, string), tag string) bool {
is, reason := f()
Expand All @@ -63,12 +74,11 @@ func getManager() Manager {

if hasNVML {
klog.Info("Using NVML manager")
return NewNVMLManager()
return NewNVMLManager(), nil
} else if isTegra {
klog.Info("Using CUDA manager")
return NewCudaManager()
return NewCudaManager(), nil
}

klog.Warning("No valid resources detected; using empty manager.")
return NewNullManager()
return nil, fmt.Errorf("no valid resource detected")
}

0 comments on commit 6413d86

Please sign in to comment.