Skip to content

Commit

Permalink
Add resource.sharing-strategy labels
Browse files Browse the repository at this point in the history
This change adds sharing-strategy labels per resource.

This label can have the value: none, mps, time-slicing depending
on the sharing configuration. For invalid configurations, this label
is empty.

Signed-off-by: Evan Lezar <[email protected]>
  • Loading branch information
elezar committed Feb 6, 2024
1 parent 31214a5 commit 51dbc89
Show file tree
Hide file tree
Showing 9 changed files with 379 additions and 258 deletions.
1 change: 1 addition & 0 deletions internal/lm/mig-strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ func newInvalidMigStrategyLabeler(device resource.Device, reason string) (Labele

rl.updateLabel(labels, "count", 0)
rl.updateLabel(labels, "replicas", 0)
rl.updateLabel(labels, "sharing-strategy", "")
rl.updateLabel(labels, "memory", 0)

return labels, nil
Expand Down
222 changes: 118 additions & 104 deletions internal/lm/mig-strategy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
rt.NewFullGPU(),
},
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
},
},
{
Expand All @@ -66,13 +67,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
},
},
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "2",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "2",
"nvidia.com/gpu.sharing-strategy": "time-slicing",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
},
},
{
Expand All @@ -90,13 +92,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
},
},
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "2",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "2",
"nvidia.com/gpu.sharing-strategy": "time-slicing",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
},
},
{
Expand All @@ -113,10 +116,11 @@ func TestMigStrategyNoneLabels(t *testing.T) {
},
},
expectedLabels: Labels{
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
},
},
{
Expand All @@ -134,10 +138,11 @@ func TestMigStrategyNoneLabels(t *testing.T) {
},
},
expectedLabels: Labels{
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
},
},
{
Expand All @@ -155,13 +160,14 @@ func TestMigStrategyNoneLabels(t *testing.T) {
},
},
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "2",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "2",
"nvidia.com/gpu.sharing-strategy": "time-slicing",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL-SHARED",
},
},
}
Expand Down Expand Up @@ -212,14 +218,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
rt.NewFullGPU(),
},
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/mig.strategy": "single",
},
},
{
Expand All @@ -229,14 +236,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
rt.NewFullGPU(),
},
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "300",
"nvidia.com/gpu.product": "MOCKMODEL",
"nvidia.com/mig.strategy": "single",
},
},
{
Expand All @@ -247,19 +255,20 @@ func TestMigStrategySingleLabels(t *testing.T) {
),
},
expectedLabels: Labels{
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.memory": "100",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.multiprocessors": "0",
"nvidia.com/gpu.slices.gi": "1",
"nvidia.com/gpu.slices.ci": "2",
"nvidia.com/gpu.engines.copy": "0",
"nvidia.com/gpu.engines.decoder": "0",
"nvidia.com/gpu.engines.encoder": "0",
"nvidia.com/gpu.engines.jpeg": "0",
"nvidia.com/gpu.engines.ofa": "0",
"nvidia.com/gpu.count": "1",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "100",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.multiprocessors": "0",
"nvidia.com/gpu.slices.gi": "1",
"nvidia.com/gpu.slices.ci": "2",
"nvidia.com/gpu.engines.copy": "0",
"nvidia.com/gpu.engines.decoder": "0",
"nvidia.com/gpu.engines.encoder": "0",
"nvidia.com/gpu.engines.jpeg": "0",
"nvidia.com/gpu.engines.ofa": "0",
},
},
{
Expand Down Expand Up @@ -287,19 +296,20 @@ func TestMigStrategySingleLabels(t *testing.T) {
),
},
expectedLabels: Labels{
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.memory": "100",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.multiprocessors": "12",
"nvidia.com/gpu.slices.gi": "1",
"nvidia.com/gpu.slices.ci": "2",
"nvidia.com/gpu.engines.copy": "13",
"nvidia.com/gpu.engines.decoder": "14",
"nvidia.com/gpu.engines.encoder": "15",
"nvidia.com/gpu.engines.jpeg": "16",
"nvidia.com/gpu.engines.ofa": "17",
"nvidia.com/gpu.count": "2",
"nvidia.com/gpu.replicas": "1",
"nvidia.com/gpu.sharing-strategy": "none",
"nvidia.com/gpu.memory": "100",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-1g.100gb",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.multiprocessors": "12",
"nvidia.com/gpu.slices.gi": "1",
"nvidia.com/gpu.slices.ci": "2",
"nvidia.com/gpu.engines.copy": "13",
"nvidia.com/gpu.engines.decoder": "14",
"nvidia.com/gpu.engines.encoder": "15",
"nvidia.com/gpu.engines.jpeg": "16",
"nvidia.com/gpu.engines.ofa": "17",
},
},
{
Expand All @@ -309,11 +319,12 @@ func TestMigStrategySingleLabels(t *testing.T) {
},
isInvalid: true,
expectedLabels: Labels{
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.sharing-strategy": "",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
},
},
{
Expand All @@ -326,11 +337,12 @@ func TestMigStrategySingleLabels(t *testing.T) {
},
isInvalid: true,
expectedLabels: Labels{
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.sharing-strategy": "",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
},
},
{
Expand All @@ -343,14 +355,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
},
isInvalid: true,
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.sharing-strategy": "",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
},
},
{
Expand All @@ -364,14 +377,15 @@ func TestMigStrategySingleLabels(t *testing.T) {
},
isInvalid: true,
expectedLabels: Labels{
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
"nvidia.com/gpu.compute.major": "8",
"nvidia.com/gpu.compute.minor": "0",
"nvidia.com/gpu.family": "ampere",
"nvidia.com/gpu.count": "0",
"nvidia.com/gpu.replicas": "0",
"nvidia.com/gpu.sharing-strategy": "",
"nvidia.com/gpu.memory": "0",
"nvidia.com/gpu.product": "MOCKMODEL-MIG-INVALID",
"nvidia.com/mig.strategy": "single",
},
},
}
Expand Down
Loading

0 comments on commit 51dbc89

Please sign in to comment.