Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance archdetect to support detection of NVIDIA GPUs + using that in EESSI init script #767

5 changes: 5 additions & 0 deletions init/bash
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ if [ $? -eq 0 ]; then
show_msg "Prepending site path $EESSI_SITE_MODULEPATH to \$MODULEPATH..."
module use $EESSI_SITE_MODULEPATH

if [ ! -z ${EESSI_MODULEPATH_ACCEL} ]; then
ocaisa marked this conversation as resolved.
Show resolved Hide resolved
show_msg "Prepending $EESSI_MODULEPATH_ACCEL to \$MODULEPATH..."
module use $EESSI_MODULEPATH_ACCEL
fi

#show_msg ""
#show_msg "*** Known problems in the ${EESSI_VERSION} software stack ***"
#show_msg ""
Expand Down
22 changes: 20 additions & 2 deletions init/eessi_archdetect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,25 @@ cpupath(){
fi
}

accelpath() {
# If EESSI_ACCELERATOR_TARGET_OVERRIDE is set, use it
log "DEBUG" "accelpath: Override variable set as '$EESSI_ACCELERATOR_TARGET_OVERRIDE' "
ocaisa marked this conversation as resolved.
Show resolved Hide resolved
[ $EESSI_ACCELERATOR_TARGET_OVERRIDE ] && echo ${EESSI_ACCELERATOR_TARGET_OVERRIDE} && exit

# check for NVIDIA GPUs via nvidia-smi command
nvidia_smi=$(command -v nvidia-smi)
if [[ $? -eq 0 ]]; then
log "DEBUG" "accelpath: nvidia-smi command found @ ${nvidia_smi}"
gpu_info=$(nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader | head -1)
ocaisa marked this conversation as resolved.
Show resolved Hide resolved
cuda_cc=$(echo $gpu_info | sed 's/, /,/g' | cut -f4 -d, | sed 's/\.//g')
echo "accel/nvidia/cc${cuda_cc}"
else
log "DEBUG" "accelpath: nvidia-smi command not found"
ocaisa marked this conversation as resolved.
Show resolved Hide resolved
fi
}

# Parse command line arguments
USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] <action>"
USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] <action: cpupath or accelpath>"

while getopts 'hdva' OPTION; do
case "$OPTION" in
Expand All @@ -168,5 +185,6 @@ ARGUMENT=${1:-none}

case "$ARGUMENT" in
"cpupath") cpupath; exit;;
*) echo "$USAGE"; log "ERROR" "Missing <action> argument (possible actions: 'cpupath')";;
"accelpath") accelpath; exit;;
*) echo "$USAGE"; log "ERROR" "Missing <action> argument (possible actions: 'cpupath', 'accelpath')";;
esac
16 changes: 16 additions & 0 deletions init/eessi_environment_variables
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ if [ -d $EESSI_PREFIX ]; then
break
fi
done
export EESSI_ACCEL_PATH=$(${EESSI_INIT_DIR_PATH}/eessi_archdetect.sh accelpath)
if [ -z ${EESSI_ACCEL_PATH} ]; then
show_msg "archdetect could not find any accelerators"
else
EESSI_GPU_SOFTWARE_SUBDIR=${$EESSI_GPU_SOFTWARE_SUBDIR_OVERRIDE:-${EESSI_SOFTWARE_SUBDIR}}
EESSI_GPU_SOFTWARE_PATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_GPU_SOFTWARE_SUBDIR}
if [ -d ${EESSI_GPU_SOFTWARE_PATH}/${EESSI_ACCEL_PATH} ]; then
show_msg "archdetect found accelerator: ${EESSI_ACCEL_PATH}"
fi
fi
elif [ "$EESSI_USE_ARCHSPEC" == "1" ]; then
# note: eessi_software_subdir_for_host.py will pick up value from $EESSI_SOFTWARE_SUBDIR_OVERRIDE if it's defined!
export EESSI_EPREFIX_PYTHON=$EESSI_EPREFIX/usr/bin/python3
Expand Down Expand Up @@ -106,6 +116,12 @@ if [ -d $EESSI_PREFIX ]; then
false
fi

EESSI_MODULEPATH_ACCEL=${EESSI_GPU_SOFTWARE_PATH}/${EESSI_ACCEL_PATH}/${EESSI_MODULE_SUBDIR}
if [ -d ${EESSI_MODULEPATH_ACCEL} ]; then
export EESSI_MODULEPATH_ACCEL=${EESSI_MODULEPATH_ACCEL}
show_msg "Using ${EESSI_MODULEPATH_ACCEL} as additional directory (for accelerators) to be added to MODULEPATH."
fi

# Fix wrong path for RHEL >=8 libcurl
# This is required here because we ship curl in our compat layer. If we only provided
# curl as a module file we could instead do this via a `modluafooter` in an EasyBuild
Expand Down