Skip to content

Commit

Permalink
Implement the "limit" modifier for binding
Browse files Browse the repository at this point in the history
Sometimes a user wants to map/bind procs by an object type,
but needs to limit the number of procs bound to any particular
object to some arbitrary number (i.e., not the number of
available CPUs on the object). Example might be to map/bind
to a cache level, but limit the number of procs on any given
cache to some smaller number before moving to the next cache.

Docs were updated to explain this in a prior commit.

Signed-off-by: Ralph Castain <[email protected]>
  • Loading branch information
rhc54 committed Oct 19, 2024
1 parent 4474dfc commit 30ce690
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 0 deletions.
15 changes: 15 additions & 0 deletions src/hwloc/hwloc-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ typedef struct {
} prte_hwloc_topo_data_t;
PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_hwloc_topo_data_t);

/**
* Struct used to cache object-level data used
* when computing process placement - the struct
* is attached to the userdata of each object
* in the topology upon first use of that object
* in a placement computation
*/
typedef struct {
pmix_object_t super;
unsigned nprocs;
} prte_hwloc_obj_data_t;
PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_hwloc_obj_data_t);

/* define binding policies */
typedef uint16_t prte_binding_policy_t;
Expand Down Expand Up @@ -263,6 +275,9 @@ PRTE_EXPORT hwloc_obj_t prte_hwloc_base_get_obj_by_type(hwloc_topology_t topo,
hwloc_obj_type_t target,
unsigned int instance);

// reset all obj counters
PRTE_EXPORT void prte_hwloc_base_reset_counters(void);

/**
* Get the number of pu's under a given hwloc object.
*/
Expand Down
8 changes: 8 additions & 0 deletions src/hwloc/hwloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -683,3 +683,11 @@ PMIX_CLASS_INSTANCE(prte_hwloc_topo_data_t,
pmix_object_t,
topo_data_const, NULL);


static void obj_data_const(prte_hwloc_obj_data_t *ptr)
{
ptr->nprocs = 0;
}
PMIX_CLASS_INSTANCE(prte_hwloc_obj_data_t,
pmix_object_t,
obj_data_const, NULL);
60 changes: 60 additions & 0 deletions src/hwloc/hwloc_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1860,3 +1860,63 @@ int prte_hwloc_print(char **output, char *prefix, hwloc_topology_t src)
*output = tmp;
return PRTE_SUCCESS;
}

void prte_hwloc_base_reset_counters(void)
{
prte_topology_t *ptopo;
hwloc_topology_t topo;
hwloc_obj_type_t type;
hwloc_obj_t obj;
prte_hwloc_obj_data_t *objcnt;
unsigned width, w;
unsigned depth, d;
int n;

/* this can be a fairly expensive operation as we must traverse
* all objects of interest in all topologies since we cannot
* know which ones might have been used. Fortunately, we almost
* always have only one topology, and there aren't that many
* objects in it - so this normally goes fairly quickly
*/

for (n = 0; n < prte_node_topologies->size; n++) {
ptopo = (prte_topology_t *) pmix_pointer_array_get_item(prte_node_topologies, n);
if (NULL == ptopo) {
continue;
}
topo = ptopo->topo;

/* get the max depth of the topology */
depth = hwloc_topology_get_depth(topo);

/* start at the first depth below the top machine level */
for (d = 1; d < depth; d++) {
/* get the object type at this depth */
type = hwloc_get_depth_type(topo, d);
/* if it isn't one of interest, then ignore it */
if (HWLOC_OBJ_NUMANODE != type && HWLOC_OBJ_PACKAGE != type &&
HWLOC_OBJ_L1CACHE != type && HWLOC_OBJ_L2CACHE != type && HWLOC_OBJ_L3CACHE != type &&
HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) {
continue;
}

/* get the width of the topology at this depth */
width = hwloc_get_nbobjs_by_depth(topo, d);
if (0 == width) {
continue;
}

/* scan all objects at this depth to see if
* the location overlaps with them
*/
for (w = 0; w < width; w++) {
/* get the object at this depth/index */
obj = hwloc_get_obj_by_depth(topo, d, w);
if (NULL != obj->userdata) {
objcnt = (prte_hwloc_obj_data_t*)obj->userdata;
objcnt->nprocs = 0;
}
}
}
}
}
16 changes: 16 additions & 0 deletions src/mca/rmaps/base/rmaps_base_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,
hwloc_obj_t target;
hwloc_cpuset_t tgtcpus, tmpcpus;
int nobjs, n;
prte_hwloc_obj_data_t *objcnt;

pmix_output_verbose(5, prte_rmaps_base_framework.framework_output,
"mca:rmaps: bind %s with policy %s",
Expand Down Expand Up @@ -95,6 +96,18 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,

for (n=0; n < nobjs; n++) {
tmp_obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->hwb, n);
// if a limit on the number of procs/object has been set,
// then check it here
if (NULL == tmp_obj->userdata) {
objcnt = PMIX_NEW(prte_hwloc_obj_data_t);
tmp_obj->userdata = (void*)objcnt;
} else {
objcnt = (prte_hwloc_obj_data_t*)tmp_obj->userdata;
}
if (0 < options->limit && options->limit <= objcnt->nprocs) {
// skip this object
continue;
}
tmpcpus = tmp_obj->cpuset;
hwloc_bitmap_and(prte_rmaps_base.available, node->available, tmpcpus);
hwloc_bitmap_and(prte_rmaps_base.available, prte_rmaps_base.available, prte_rmaps_base.baseset);
Expand All @@ -115,6 +128,9 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,
}
if (0 < ncpus) {
trg_obj = tmp_obj;
if (0 < options->limit) {
objcnt->nprocs++;
}
break;
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_BINDING_LIMIT, (void**) &u16ptr, PMIX_UINT16)) {
options.limit = u16;
// reset any prior counters
prte_hwloc_base_reset_counters();
}

pmix_output_verbose(5, prte_rmaps_base_framework.framework_output,
Expand Down

0 comments on commit 30ce690

Please sign in to comment.