Skip to content

Commit

Permalink
Add GPU entity to workloadmeta
Browse files Browse the repository at this point in the history
  • Loading branch information
gjulianm committed Dec 11, 2024
1 parent c239913 commit 6f0a929
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 0 deletions.
8 changes: 8 additions & 0 deletions comp/core/workloadmeta/def/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ type Component interface {
// to all entities with kind KindProcess.
ListProcesses() []*Process

// GetGPU returns metadata about a GPU device. It fetches the entity
// with kind KindGPU and the given ID.
GetGPU(id string) (*GPU, error)

// ListGPUs returns metadata about all known GPU devices, equivalent
// to all entities with kind KindGPU.
ListGPUs() []*GPU

// ListProcessesWithFilter returns all the processes for which the passed
// filter evaluates to true.
ListProcessesWithFilter(filterFunc EntityFilterFunc[*Process]) []*Process
Expand Down
55 changes: 55 additions & 0 deletions comp/core/workloadmeta/def/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
KindECSTask Kind = "ecs_task"
KindContainerImageMetadata Kind = "container_image_metadata"
KindProcess Kind = "process"
KindGPU Kind = "gpu"
)

// Source is the source name of an entity.
Expand Down Expand Up @@ -1349,3 +1350,57 @@ func (e EventBundle) Acknowledge() {
// InitHelper this should be provided as a helper to allow passing the component into
// the inithook for additional start-time configutation.
type InitHelper func(context.Context, Component, config.Component) error

// GPU represents a GPU resource.
type GPU struct {
EntityID
EntityMeta
Vendor string
Model string
ActivePIDs []int
}

var _ Entity = &GPU{}

// GetID implements Entity#GetID.
func (g GPU) GetID() EntityID {
return g.EntityID
}

// Merge implements Entity#Merge.
func (g *GPU) Merge(e Entity) error {
gg, ok := e.(*GPU)
if !ok {
return fmt.Errorf("cannot merge GPU with different kind %T", e)
}

// If the source has active PIDs, remove the ones from the destination so merge() takes latest active PIDs from the soure
if gg.ActivePIDs != nil {
g.ActivePIDs = nil
}

return merge(g, gg)
}

// DeepCopy implements Entity#DeepCopy.
func (g GPU) DeepCopy() Entity {
cp := deepcopy.Copy(g).(GPU)
return &cp
}

// String implements Entity#String.
func (g GPU) String(verbose bool) string {
var sb strings.Builder

_, _ = fmt.Fprintln(&sb, "----------- Entity ID -----------")
_, _ = fmt.Fprintln(&sb, g.EntityID.String(verbose))

_, _ = fmt.Fprintln(&sb, "----------- Entity Meta -----------")
_, _ = fmt.Fprintln(&sb, g.EntityMeta.String(verbose))

_, _ = fmt.Fprintln(&sb, "Vendor:", g.Vendor)
_, _ = fmt.Fprintln(&sb, "Model:", g.Model)
_, _ = fmt.Fprintln(&sb, "Active PIDs:", g.ActivePIDs)

return sb.String()
}
33 changes: 33 additions & 0 deletions comp/core/workloadmeta/def/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,36 @@ func TestMergeECSContainer(t *testing.T) {
assert.Nil(t, container2.ECSContainer)
assert.EqualValues(t, container1.ECSContainer.DisplayName, "ecs-container-1")
}

func TestMergeGPU(t *testing.T) {
gpu1 := GPU{
EntityID: EntityID{
Kind: KindGPU,
ID: "gpu-1-id",
},
EntityMeta: EntityMeta{
Name: "gpu-1",
},
Vendor: "nvidia",
Model: "",
ActivePIDs: []int{123, 456},
}
gpu2 := GPU{
EntityID: EntityID{
Kind: KindGPU,
ID: "gpu-1-id",
},
EntityMeta: EntityMeta{
Name: "gpu-1",
},
Vendor: "nvidia",
Model: "tesla",
ActivePIDs: []int{654},
}

err := gpu1.Merge(&gpu2)
assert.NoError(t, err)
assert.Equal(t, gpu1.Model, "tesla")
assert.ElementsMatch(t, gpu1.ActivePIDs, []int{654})
assert.Equal(t, gpu1.Vendor, "nvidia")
}
22 changes: 22 additions & 0 deletions comp/core/workloadmeta/impl/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,28 @@ func (w *workloadmeta) ListKubernetesMetadata(filterFunc wmdef.EntityFilterFunc[
return metadata
}

// GetGPU implements Store#GetGPU.
func (w *workloadmeta) GetGPU(id string) (*wmdef.GPU, error) {
entity, err := w.getEntityByKind(wmdef.KindGPU, id)
if err != nil {
return nil, err
}

return entity.(*wmdef.GPU), nil
}

// ListGPUs implements Store#ListGPUs.
func (w *workloadmeta) ListGPUs() []*wmdef.GPU {
entities := w.listEntitiesByKind(wmdef.KindGPU)

gpuList := make([]*wmdef.GPU, 0, len(entities))
for i := range entities {
gpuList = append(gpuList, entities[i].(*wmdef.GPU))
}

return gpuList
}

// Notify implements Store#Notify
func (w *workloadmeta) Notify(events []wmdef.CollectorEvent) {
if len(events) > 0 {
Expand Down

0 comments on commit 6f0a929

Please sign in to comment.