From 18997775b58488a62e5a2543bb52cd456f7d4576 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 18 Oct 2024 07:43:23 -0600 Subject: [PATCH] Implement the PMIX_JOB_CHILD_SEP support Provide an optional way to determine the fate of child jobs if/when the parent job terminates. In the current implementation, we do not support continuation after parent job abnormally terminates - has to be a normal termination. Users can toggle the behavior by providing the PMIX_JOB_CHILD_SEP attribute in their job info passed to PMIx_Spawn. In the absence of that attribute, we default to allowing the child to continue executing. Provide an output when child jobs are terminated warning the user that this has happened, and why. Signed-off-by: Ralph Castain (cherry picked from commit 747da0436ec94c359c85bc13ee6efccc60085a2f) --- src/docs/show-help-files/help-state-base.txt | 17 ++++++++++ src/mca/state/base/help-state-base.txt | 19 ++++++++++- src/mca/state/dvm/state_dvm.c | 35 ++++++++++++++------ src/prted/pmix/pmix_server_dyn.c | 6 ++++ src/util/attr.c | 2 ++ src/util/attr.h | 3 ++ 6 files changed, 70 insertions(+), 12 deletions(-) diff --git a/src/docs/show-help-files/help-state-base.txt b/src/docs/show-help-files/help-state-base.txt index 46799d9b17..83a9969fe9 100644 --- a/src/docs/show-help-files/help-state-base.txt +++ b/src/docs/show-help-files/help-state-base.txt @@ -40,3 +40,20 @@ behavior: You must specify one of the above in combination with NOTIFYERRORS in order to receive notifications of errors. Please correct the situation and try again. +# +[child-term] +At least one child job is being terminated due to termination of +its parent: + + Parent: %s + Child: %s + +This behavior is controlled by setting the PMIX_JOB_CHILD_SEP attribute +in the job info provided at time of spawn for the child job. When set to +"true", the runtime will "separate" the child from its parent and allow +it to continue execution after parent termination. Note that this is only +true for parents that normally terminate - abnormal termination will always +result in a complete teardown of all child jobs. + +In the absence of the attribute, the runtime will default to the "true" +behavior. diff --git a/src/mca/state/base/help-state-base.txt b/src/mca/state/base/help-state-base.txt index 52ee955188..83a9969fe9 100644 --- a/src/mca/state/base/help-state-base.txt +++ b/src/mca/state/base/help-state-base.txt @@ -3,7 +3,7 @@ # Copyright (c) 2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2019 Intel, Inc. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. +# Copyright (c) 2022-2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -40,3 +40,20 @@ behavior: You must specify one of the above in combination with NOTIFYERRORS in order to receive notifications of errors. Please correct the situation and try again. +# +[child-term] +At least one child job is being terminated due to termination of +its parent: + + Parent: %s + Child: %s + +This behavior is controlled by setting the PMIX_JOB_CHILD_SEP attribute +in the job info provided at time of spawn for the child job. When set to +"true", the runtime will "separate" the child from its parent and allow +it to continue execution after parent termination. Note that this is only +true for parents that normally terminate - abnormal termination will always +result in a complete teardown of all child jobs. + +In the absence of the attribute, the runtime will default to the "true" +behavior. diff --git a/src/mca/state/dvm/state_dvm.c b/src/mca/state/dvm/state_dvm.c index f9acf4944a..ed2f099437 100644 --- a/src/mca/state/dvm/state_dvm.c +++ b/src/mca/state/dvm/state_dvm.c @@ -29,6 +29,7 @@ #include "src/util/pmix_output.h" #include "src/util/proc_info.h" #include "src/util/session_dir.h" +#include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/filem/filem.h" @@ -505,7 +506,7 @@ static void check_complete(int fd, short args, void *cbdata) prte_state_caddy_t *caddy = (prte_state_caddy_t *) cbdata; prte_job_t *jdata, *jptr; prte_proc_t *proc; - int i, rc; + int i, rc, nprocs; prte_node_t *node; prte_job_map_t *map; int32_t index; @@ -519,7 +520,7 @@ static void check_complete(int fd, short args, void *cbdata) hwloc_obj_t obj; hwloc_obj_type_t type; hwloc_cpuset_t boundcpus, tgt; - bool takeall; + bool takeall, sep, *sepptr = &sep; PRTE_HIDE_UNUSED_PARAMS(fd, args); PMIX_ACQUIRE_OBJECT(caddy); @@ -815,21 +816,33 @@ static void check_complete(int fd, short args, void *cbdata) prte_state_base_check_fds(jdata); } - /* if this job was a launcher, then we need to abort all of its - * child jobs that might still be running */ + /* if this job started child jobs, then we need to abort all of its + * child jobs that might still be running unless designated to + * run independently of their parent */ if (0 < pmix_list_get_size(&jdata->children)) { PMIX_CONSTRUCT(&procs, pmix_pointer_array_t); pmix_pointer_array_init(&procs, 1, INT_MAX, 1); + nprocs = 0; PMIX_LIST_FOREACH(jptr, &jdata->children, prte_job_t) { - proc = PMIX_NEW(prte_proc_t); - PMIX_LOAD_PROCID(&proc->name, jptr->nspace, PMIX_RANK_WILDCARD); - pmix_pointer_array_add(&procs, proc); + if (prte_get_attribute(&jptr->attributes, PRTE_JOB_CHILD_SEP, (void**)&sepptr, PMIX_BOOL) && !sep) { + proc = PMIX_NEW(prte_proc_t); + PMIX_LOAD_PROCID(&proc->name, jptr->nspace, PMIX_RANK_WILDCARD); + pmix_pointer_array_add(&procs, proc); + ++nprocs; + if (1 == nprocs) { + // output a warning message that at least one child is being terminated + pmix_show_help("help-state-base.txt", "child-term", true, + jdata->nspace, jptr->nspace); + } + } } - prte_plm.terminate_procs(&procs); - for (i = 0; i < procs.size; i++) { - if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(&procs, i))) { - PMIX_RELEASE(proc); + if (0 < nprocs) { + prte_plm.terminate_procs(&procs); + for (i = 0; i < procs.size; i++) { + if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(&procs, i))) { + PMIX_RELEASE(proc); + } } } PMIX_DESTRUCT(&procs); diff --git a/src/prted/pmix/pmix_server_dyn.c b/src/prted/pmix/pmix_server_dyn.c index 8a2ba05f02..1797c61d02 100644 --- a/src/prted/pmix/pmix_server_dyn.c +++ b/src/prted/pmix/pmix_server_dyn.c @@ -518,6 +518,12 @@ static void interim(int sd, short args, void *cbdata) prte_set_attribute(&jdata->attributes, PRTE_JOB_CONTINUOUS, PRTE_ATTR_GLOBAL, &flag, PMIX_BOOL); + /*** CHILD INDEPENDENCE ***/ + } else if (PMIX_CHECK_KEY(info, PMIX_SPAWN_CHILD_SEP)) { + flag = PMIX_INFO_TRUE(info); + prte_set_attribute(&jdata->attributes, PRTE_JOB_CHILD_SEP, PRTE_ATTR_GLOBAL, + &flag, PMIX_BOOL); + /*** MAX RESTARTS ***/ } else if (PMIX_CHECK_KEY(info, PMIX_MAX_RESTARTS)) { for (i = 0; i < jdata->apps->size; i++) { diff --git a/src/util/attr.c b/src/util/attr.c index 98b45ab5c1..a04d49bf41 100644 --- a/src/util/attr.c +++ b/src/util/attr.c @@ -499,6 +499,8 @@ const char *prte_attr_key_to_str(prte_attribute_key_t key) return "DISPLAY PARSEABLE OUTPUT"; case PRTE_JOB_EXTEND_DVM: return "EXTEND DVM"; + case PRTE_JOB_CHILD_SEP: + return "CHILD SEP"; case PRTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; diff --git a/src/util/attr.h b/src/util/attr.h index 13eecb8344..9ea4359bdd 100644 --- a/src/util/attr.h +++ b/src/util/attr.h @@ -220,6 +220,9 @@ typedef uint16_t prte_job_flags_t; // are to be displayed #define PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT (PRTE_JOB_START_KEY + 110) // bool - display output in machine parsable format #define PRTE_JOB_EXTEND_DVM (PRTE_JOB_START_KEY + 111) // bool - DVM is being extended +#define PRTE_JOB_CHILD_SEP (PRTE_JOB_START_KEY + 116) // bool - child job is to be considered independent + // from its parent, do not terminate if + // parent dies first #define PRTE_JOB_MAX_KEY (PRTE_JOB_START_KEY + 200)