From e6ec8d3d8f8f0999e2401be3489f1f4f8d8b6ef4 Mon Sep 17 00:00:00 2001 From: Daniel Kleinstein Date: Sun, 27 Oct 2024 19:00:54 +0200 Subject: [PATCH] bugfix: Allow for optional timeslicing configuration This fixes a vestigial bug from the introduction of MPS. Now that two timeslicing configurations are available, it's possible for MPS to be configured instead of timeslicing. But since the TimeSlicing field was made non-optional - its existence is forced when unmarshalling, and then the parsing fails because no resources are specified under the empty TimeSlicing field. Signed-off-by: Daniel Kleinstein --- api/config/v1/config.go | 8 ++++++-- api/config/v1/sharing.go | 6 +++--- internal/lm/mig-strategy_test.go | 10 +++++----- internal/lm/nvml_test.go | 2 +- internal/lm/resource_test.go | 6 +++--- internal/rm/rm_test.go | 6 +++--- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/api/config/v1/config.go b/api/config/v1/config.go index 0af2c8125..0ab9531d1 100644 --- a/api/config/v1/config.go +++ b/api/config/v1/config.go @@ -97,9 +97,13 @@ func DisableResourceNamingInConfig(logger logger, config *Config) { config.Resources.MIGs = nil // Disable renaming / device selection in Sharing.TimeSlicing.Resources - config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing") + if (config.sharing.TimeSlicing != nil) { + config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing") + } // Disable renaming / device selection in Sharing.MPS.Resources - config.Sharing.MPS.disableResoureRenaming(logger, "mps") + if (config.Sharing.MPS != nil) { + config.Sharing.MPS.disableResoureRenaming(logger, "mps") + } } // parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct. diff --git a/api/config/v1/sharing.go b/api/config/v1/sharing.go index 82906fd36..e084f67a3 100644 --- a/api/config/v1/sharing.go +++ b/api/config/v1/sharing.go @@ -19,7 +19,7 @@ package v1 // Sharing encapsulates the set of sharing strategies that are supported. type Sharing struct { // TimeSlicing defines the set of replicas to be made for timeSlicing available resources. - TimeSlicing ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"` + TimeSlicing *ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"` // MPS defines the set of replicas to be shared using MPS MPS *ReplicatedResources `json:"mps,omitempty" yaml:"mps,omitempty"` } @@ -38,7 +38,7 @@ func (s *Sharing) SharingStrategy() SharingStrategy { return SharingStrategyMPS } - if s.TimeSlicing.isReplicated() { + if s.TimeSlicing != nil && s.TimeSlicing.isReplicated() { return SharingStrategyTimeSlicing } return SharingStrategyNone @@ -49,5 +49,5 @@ func (s *Sharing) ReplicatedResources() *ReplicatedResources { if s.MPS != nil { return s.MPS } - return &s.TimeSlicing + return s.TimeSlicing } diff --git a/internal/lm/mig-strategy_test.go b/internal/lm/mig-strategy_test.go index 73fcbac95..005fc9615 100644 --- a/internal/lm/mig-strategy_test.go +++ b/internal/lm/mig-strategy_test.go @@ -58,7 +58,7 @@ func TestMigStrategyNoneLabels(t *testing.T) { devices: []resource.Device{ rt.NewFullGPU(), }, - timeSlicing: spec.ReplicatedResources{ + timeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -83,7 +83,7 @@ func TestMigStrategyNoneLabels(t *testing.T) { rt.NewFullGPU(), rt.NewFullGPU(), }, - timeSlicing: spec.ReplicatedResources{ + timeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -107,7 +107,7 @@ func TestMigStrategyNoneLabels(t *testing.T) { devices: []resource.Device{ rt.NewMigEnabledDevice(), }, - timeSlicing: spec.ReplicatedResources{ + timeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -129,7 +129,7 @@ func TestMigStrategyNoneLabels(t *testing.T) { rt.NewMigEnabledDevice(), rt.NewMigEnabledDevice(), }, - timeSlicing: spec.ReplicatedResources{ + timeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -151,7 +151,7 @@ func TestMigStrategyNoneLabels(t *testing.T) { rt.NewMigEnabledDevice(), rt.NewFullGPU(), }, - timeSlicing: spec.ReplicatedResources{ + timeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", diff --git a/internal/lm/nvml_test.go b/internal/lm/nvml_test.go index 073721cd8..812e7f82e 100644 --- a/internal/lm/nvml_test.go +++ b/internal/lm/nvml_test.go @@ -103,7 +103,7 @@ func TestSharingLabeler(t *testing.T) { description: "config with timeslicing replicas", config: &spec.Config{ Sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Replicas: 2, diff --git a/internal/lm/resource_test.go b/internal/lm/resource_test.go index c2e3b3e5c..3f1801885 100644 --- a/internal/lm/resource_test.go +++ b/internal/lm/resource_test.go @@ -55,7 +55,7 @@ func TestGPUResourceLabeler(t *testing.T) { description: "time-slicing ignores non-matching resource", count: 1, sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/not-gpu", @@ -79,7 +79,7 @@ func TestGPUResourceLabeler(t *testing.T) { description: "time-slicing appends suffix and doubles count", count: 1, sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -103,7 +103,7 @@ func TestGPUResourceLabeler(t *testing.T) { description: "time-slicing renamed does not append suffix and doubles count", count: 1, sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", diff --git a/internal/rm/rm_test.go b/internal/rm/rm_test.go index 063ed4034..26c10f6c7 100644 --- a/internal/rm/rm_test.go +++ b/internal/rm/rm_test.go @@ -53,7 +53,7 @@ func TestValidateRequest(t *testing.T) { { description: "timeslicing with single device", sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -73,7 +73,7 @@ func TestValidateRequest(t *testing.T) { { description: "timeslicing with two devices", sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -93,7 +93,7 @@ func TestValidateRequest(t *testing.T) { { description: "timeslicing with two devices -- failRequestsGreaterThanOne", sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ FailRequestsGreaterThanOne: true, Resources: []spec.ReplicatedResource{ {