From e5f49f99285998eb70c0fd896eb1a3fd47459cfc Mon Sep 17 00:00:00 2001 From: Daniel Kleinstein Date: Sun, 27 Oct 2024 19:00:54 +0200 Subject: [PATCH 1/3] bugfix: Allow for optional timeslicing configuration This fixes a vestigial bug from the introduction of MPS. Now that two timeslicing configurations are available, it's possible for MPS to be configured instead of timeslicing. But since the TimeSlicing field was made non-optional - its existence is forced when unmarshalling, and then the parsing fails because no resources are specified under the empty TimeSlicing field. Signed-off-by: Daniel Kleinstein --- api/config/v1/config.go | 8 ++++++-- api/config/v1/sharing.go | 6 +++--- internal/lm/mig-strategy_test.go | 2 +- internal/lm/nvml_test.go | 2 +- internal/lm/resource_test.go | 8 ++++---- internal/rm/rm_test.go | 6 +++--- 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/api/config/v1/config.go b/api/config/v1/config.go index c102c4239..2017a374d 100644 --- a/api/config/v1/config.go +++ b/api/config/v1/config.go @@ -96,9 +96,13 @@ func DisableResourceNamingInConfig(logger logger, config *Config) { config.Resources.MIGs = nil // Disable renaming / device selection in Sharing.TimeSlicing.Resources - config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing") + if (config.sharing.TimeSlicing != nil) { + config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing") + } // Disable renaming / device selection in Sharing.MPS.Resources - config.Sharing.MPS.disableResoureRenaming(logger, "mps") + if (config.Sharing.MPS != nil) { + config.Sharing.MPS.disableResoureRenaming(logger, "mps") + } } // parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct. diff --git a/api/config/v1/sharing.go b/api/config/v1/sharing.go index 82906fd36..e084f67a3 100644 --- a/api/config/v1/sharing.go +++ b/api/config/v1/sharing.go @@ -19,7 +19,7 @@ package v1 // Sharing encapsulates the set of sharing strategies that are supported. type Sharing struct { // TimeSlicing defines the set of replicas to be made for timeSlicing available resources. - TimeSlicing ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"` + TimeSlicing *ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"` // MPS defines the set of replicas to be shared using MPS MPS *ReplicatedResources `json:"mps,omitempty" yaml:"mps,omitempty"` } @@ -38,7 +38,7 @@ func (s *Sharing) SharingStrategy() SharingStrategy { return SharingStrategyMPS } - if s.TimeSlicing.isReplicated() { + if s.TimeSlicing != nil && s.TimeSlicing.isReplicated() { return SharingStrategyTimeSlicing } return SharingStrategyNone @@ -49,5 +49,5 @@ func (s *Sharing) ReplicatedResources() *ReplicatedResources { if s.MPS != nil { return s.MPS } - return &s.TimeSlicing + return s.TimeSlicing } diff --git a/internal/lm/mig-strategy_test.go b/internal/lm/mig-strategy_test.go index 73fcbac95..7adb09515 100644 --- a/internal/lm/mig-strategy_test.go +++ b/internal/lm/mig-strategy_test.go @@ -183,7 +183,7 @@ func TestMigStrategyNoneLabels(t *testing.T) { }, }, Sharing: spec.Sharing{ - TimeSlicing: tc.timeSlicing, + TimeSlicing: &tc.timeSlicing, }, } diff --git a/internal/lm/nvml_test.go b/internal/lm/nvml_test.go index 073721cd8..812e7f82e 100644 --- a/internal/lm/nvml_test.go +++ b/internal/lm/nvml_test.go @@ -103,7 +103,7 @@ func TestSharingLabeler(t *testing.T) { description: "config with timeslicing replicas", config: &spec.Config{ Sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Replicas: 2, diff --git a/internal/lm/resource_test.go b/internal/lm/resource_test.go index c2e3b3e5c..6a3d7ae7f 100644 --- a/internal/lm/resource_test.go +++ b/internal/lm/resource_test.go @@ -55,7 +55,7 @@ func TestGPUResourceLabeler(t *testing.T) { description: "time-slicing ignores non-matching resource", count: 1, sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/not-gpu", @@ -79,7 +79,7 @@ func TestGPUResourceLabeler(t *testing.T) { description: "time-slicing appends suffix and doubles count", count: 1, sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -103,7 +103,7 @@ func TestGPUResourceLabeler(t *testing.T) { description: "time-slicing renamed does not append suffix and doubles count", count: 1, sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -422,7 +422,7 @@ func TestMigResourceLabeler(t *testing.T) { t.Run(tc.description, func(t *testing.T) { config := &spec.Config{ Sharing: spec.Sharing{ - TimeSlicing: tc.timeSlicing, + TimeSlicing: &tc.timeSlicing, }, } l, err := NewMIGResourceLabeler(tc.resourceName, config, device, tc.count) diff --git a/internal/rm/rm_test.go b/internal/rm/rm_test.go index 063ed4034..26c10f6c7 100644 --- a/internal/rm/rm_test.go +++ b/internal/rm/rm_test.go @@ -53,7 +53,7 @@ func TestValidateRequest(t *testing.T) { { description: "timeslicing with single device", sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -73,7 +73,7 @@ func TestValidateRequest(t *testing.T) { { description: "timeslicing with two devices", sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ Resources: []spec.ReplicatedResource{ { Name: "nvidia.com/gpu", @@ -93,7 +93,7 @@ func TestValidateRequest(t *testing.T) { { description: "timeslicing with two devices -- failRequestsGreaterThanOne", sharing: spec.Sharing{ - TimeSlicing: spec.ReplicatedResources{ + TimeSlicing: &spec.ReplicatedResources{ FailRequestsGreaterThanOne: true, Resources: []spec.ReplicatedResource{ { From 14c6485b49199674ddc3ee3691d4a14af969e80e Mon Sep 17 00:00:00 2001 From: Daniel Kleinstein Date: Mon, 28 Oct 2024 13:46:48 +0000 Subject: [PATCH 2/3] config: Remove unnecessary nil checks Signed-off-by: Daniel Kleinstein --- api/config/v1/config.go | 8 ++------ api/config/v1/sharing.go | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/api/config/v1/config.go b/api/config/v1/config.go index 2017a374d..c102c4239 100644 --- a/api/config/v1/config.go +++ b/api/config/v1/config.go @@ -96,13 +96,9 @@ func DisableResourceNamingInConfig(logger logger, config *Config) { config.Resources.MIGs = nil // Disable renaming / device selection in Sharing.TimeSlicing.Resources - if (config.sharing.TimeSlicing != nil) { - config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing") - } + config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing") // Disable renaming / device selection in Sharing.MPS.Resources - if (config.Sharing.MPS != nil) { - config.Sharing.MPS.disableResoureRenaming(logger, "mps") - } + config.Sharing.MPS.disableResoureRenaming(logger, "mps") } // parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct. diff --git a/api/config/v1/sharing.go b/api/config/v1/sharing.go index e084f67a3..ab7681124 100644 --- a/api/config/v1/sharing.go +++ b/api/config/v1/sharing.go @@ -38,7 +38,7 @@ func (s *Sharing) SharingStrategy() SharingStrategy { return SharingStrategyMPS } - if s.TimeSlicing != nil && s.TimeSlicing.isReplicated() { + if s.TimeSlicing.isReplicated() { return SharingStrategyTimeSlicing } return SharingStrategyNone From 72b9e0bffa37773356aa41b4aa8dc19495cf9b44 Mon Sep 17 00:00:00 2001 From: Daniel Kleinstein Date: Mon, 28 Oct 2024 16:36:50 +0200 Subject: [PATCH 3/3] resource: Fix resourceLabeler's sharingDisabled to check if a strategy is defined Signed-off-by: Daniel Kleinstein --- internal/lm/resource.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/lm/resource.go b/internal/lm/resource.go index 5ed73ca18..5a0d5428c 100644 --- a/internal/lm/resource.go +++ b/internal/lm/resource.go @@ -215,7 +215,7 @@ func (rl resourceLabeler) getReplicas() int { // sharingDisabled checks whether the resourceLabeler has sharing disabled // TODO: The nil check here is because we call NewGPUResourceLabeler with a nil config when sharing is disabled. func (rl resourceLabeler) sharingDisabled() bool { - return rl.sharing == nil + return rl.sharing == nil || (rl.sharing.SharingStrategy() == spec.SharingStrategyNone) } // isShared checks whether the resource is shared.