Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the "limit" modifier for binding #2051

Merged
merged 1 commit into from
Oct 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/hwloc/hwloc-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ typedef struct {
} prte_hwloc_topo_data_t;
PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_hwloc_topo_data_t);

/**
* Struct used to cache object-level data used
* when computing process placement - the struct
* is attached to the userdata of each object
* in the topology upon first use of that object
* in a placement computation
*/
typedef struct {
pmix_object_t super;
unsigned nprocs;
} prte_hwloc_obj_data_t;
PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_hwloc_obj_data_t);

/* define binding policies */
typedef uint16_t prte_binding_policy_t;
Expand Down Expand Up @@ -263,6 +275,9 @@ PRTE_EXPORT hwloc_obj_t prte_hwloc_base_get_obj_by_type(hwloc_topology_t topo,
hwloc_obj_type_t target,
unsigned int instance);

// reset all obj counters
PRTE_EXPORT void prte_hwloc_base_reset_counters(void);

/**
* Get the number of pu's under a given hwloc object.
*/
Expand Down
8 changes: 8 additions & 0 deletions src/hwloc/hwloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -683,3 +683,11 @@ PMIX_CLASS_INSTANCE(prte_hwloc_topo_data_t,
pmix_object_t,
topo_data_const, NULL);


static void obj_data_const(prte_hwloc_obj_data_t *ptr)
{
ptr->nprocs = 0;
}
PMIX_CLASS_INSTANCE(prte_hwloc_obj_data_t,
pmix_object_t,
obj_data_const, NULL);
60 changes: 60 additions & 0 deletions src/hwloc/hwloc_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1860,3 +1860,63 @@ int prte_hwloc_print(char **output, char *prefix, hwloc_topology_t src)
*output = tmp;
return PRTE_SUCCESS;
}

void prte_hwloc_base_reset_counters(void)
{
prte_topology_t *ptopo;
hwloc_topology_t topo;
hwloc_obj_type_t type;
hwloc_obj_t obj;
prte_hwloc_obj_data_t *objcnt;
unsigned width, w;
unsigned depth, d;
int n;

/* this can be a fairly expensive operation as we must traverse
* all objects of interest in all topologies since we cannot
* know which ones might have been used. Fortunately, we almost
* always have only one topology, and there aren't that many
* objects in it - so this normally goes fairly quickly
*/

for (n = 0; n < prte_node_topologies->size; n++) {
ptopo = (prte_topology_t *) pmix_pointer_array_get_item(prte_node_topologies, n);
if (NULL == ptopo) {
continue;
}
topo = ptopo->topo;

/* get the max depth of the topology */
depth = hwloc_topology_get_depth(topo);

/* start at the first depth below the top machine level */
for (d = 1; d < depth; d++) {
/* get the object type at this depth */
type = hwloc_get_depth_type(topo, d);
/* if it isn't one of interest, then ignore it */
if (HWLOC_OBJ_NUMANODE != type && HWLOC_OBJ_PACKAGE != type &&
HWLOC_OBJ_L1CACHE != type && HWLOC_OBJ_L2CACHE != type && HWLOC_OBJ_L3CACHE != type &&
HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) {
continue;
}

/* get the width of the topology at this depth */
width = hwloc_get_nbobjs_by_depth(topo, d);
if (0 == width) {
continue;
}

/* scan all objects at this depth to see if
* the location overlaps with them
*/
for (w = 0; w < width; w++) {
/* get the object at this depth/index */
obj = hwloc_get_obj_by_depth(topo, d, w);
if (NULL != obj->userdata) {
objcnt = (prte_hwloc_obj_data_t*)obj->userdata;
objcnt->nprocs = 0;
}
}
}
}
}
16 changes: 16 additions & 0 deletions src/mca/rmaps/base/rmaps_base_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,
hwloc_obj_t target;
hwloc_cpuset_t tgtcpus, tmpcpus;
int nobjs, n;
prte_hwloc_obj_data_t *objcnt;

pmix_output_verbose(5, prte_rmaps_base_framework.framework_output,
"mca:rmaps: bind %s with policy %s",
Expand Down Expand Up @@ -95,6 +96,18 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,

for (n=0; n < nobjs; n++) {
tmp_obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->hwb, n);
// if a limit on the number of procs/object has been set,
// then check it here
if (NULL == tmp_obj->userdata) {
objcnt = PMIX_NEW(prte_hwloc_obj_data_t);
tmp_obj->userdata = (void*)objcnt;
} else {
objcnt = (prte_hwloc_obj_data_t*)tmp_obj->userdata;
}
if (0 < options->limit && options->limit <= objcnt->nprocs) {
// skip this object
continue;
}
tmpcpus = tmp_obj->cpuset;
hwloc_bitmap_and(prte_rmaps_base.available, node->available, tmpcpus);
hwloc_bitmap_and(prte_rmaps_base.available, prte_rmaps_base.available, prte_rmaps_base.baseset);
Expand All @@ -115,6 +128,9 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,
}
if (0 < ncpus) {
trg_obj = tmp_obj;
if (0 < options->limit) {
objcnt->nprocs++;
}
break;
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_BINDING_LIMIT, (void**) &u16ptr, PMIX_UINT16)) {
options.limit = u16;
// reset any prior counters
prte_hwloc_base_reset_counters();
}

pmix_output_verbose(5, prte_rmaps_base_framework.framework_output,
Expand Down
Loading