From a122c6e638ce8c0e909031c7e8fde96054317925 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 12 Nov 2024 20:34:52 +0100 Subject: [PATCH 01/45] adjust installpath if EESSI_ACCELERATOR_TARGET is set --- EESSI-extend-2023.06-easybuild.eb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index 8e328c3ece..bfe7931c8f 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -87,6 +87,16 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then end eessi_cvmfs_install = true easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH") + eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET") + if (eessi_accelerator_target ~= nil) then + cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$") + if (cuda_compute_capability ~= nil) then + easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target) + easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2) + else + LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target) + end + end elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then -- Make sure no other EESSI install environment variables are set if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then @@ -146,6 +156,11 @@ setenv ("EASYBUILD_UMASK", "022") -- Allow this module to be loaded when running EasyBuild setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend") +-- Set environment variables if building for CUDA compute capabilities +if (easybuild_cuda_compute_capabilities ~= nil) then + setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities) +end + -- Set all related environment variables if we have project or user installations (including extending MODULEPATH) if (user_modulepath ~= nil) then -- Use a more restrictive umask for this case From 7cead8e19dde11b17bf75ba721b63fad4f7b736f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 12 Nov 2024 20:44:36 +0100 Subject: [PATCH 02/45] add rebuild easystack for EESSI-extend --- .../2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..e4c658784f --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,6 @@ +# 2024.11.12 +# for installations under /cvmfs, if EESSI_ACCELERATOR_TARGET is set, +# EESSI-extend should adjust EASYBUILD_INSTALLPATH and set +# EASYBUILD_CUDA_COMPUTE_CAPABILITIES +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb From 58c1da5ff401aea4d9829329f439cad7b1e28e1e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 11:00:15 +0100 Subject: [PATCH 03/45] adding changes from PR #812 to validate them here --- EESSI-install-software.sh | 9 ++- load_eessi_extend_module.sh | 106 ++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100755 load_eessi_extend_module.sh diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 65c43d5ac5..b365edd71b 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -273,7 +273,14 @@ unset EESSI_PROJECT_INSTALL unset EESSI_SITE_INSTALL export EESSI_CVMFS_INSTALL=1 module unload EESSI-extend -module load EESSI-extend/${EESSI_VERSION}-easybuild + +# The EESSI-extend module is being loaded (or installed if it doesn't exist yet). +# The script requires the EESSI_VERSION given as argument, a couple of +# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the +# function check_exit_code defined. +# NOTE, the script exits if those variables/functions are undefined. +export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} +source load_eessi_extend_module.sh ${EESSI_VERSION} if [ ! -z "${shared_fs_path}" ]; then shared_eb_sourcepath=${shared_fs_path}/easybuild/sources diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh new file mode 100755 index 0000000000..bc277c5f0a --- /dev/null +++ b/load_eessi_extend_module.sh @@ -0,0 +1,106 @@ +# Script to load the environment module for EESSI-extend. +# If that module is not available yet, a specific version will be installed using the latest EasyBuild. +# +# This script must be sourced, since it makes changes in the current environment, like loading an EESSI-extend module. +# +# Assumptions (if one is not satisfied the script prints a message and exits) +# - EESSI version is given as first argument +# - TMPDIR is set +# - EB is set +# - EASYBUILD_INSTALLPATH needs to be set +# - Function check_exit_code is defined; +# scripts/utils.sh in EESSI/software-layer repository defines this function, hence +# scripts/utils.sh shall be sourced before this script is run +# +# This script is part of the EESSI software layer, see +# https://github.com/EESSI/software-layer.git +# +# author: Kenneth Hoste (@boegel, HPC-UGent) +# author: Alan O'Cais (@ocaisa, CECAM) +# author: Thomas Roeblitz (@trz42, University of Bergen) +# +# license: GPLv2 +# +# +set -o pipefail + +# this script is *sourced*, not executed, so can't rely on $0 to determine path to self or script name +# $BASH_SOURCE points to correct path or script name, see also http://mywiki.wooledge.org/BashFAQ/028 +if [ $# -ne 1 ]; then + echo "Usage: source ${BASH_SOURCE} " >&2 + exit 1 +fi + +EESSI_EXTEND_VERSION="${1}-easybuild" + +# make sure that environment variables that we expect to be set are indeed set +if [ -z "${TMPDIR}" ]; then + echo "\$TMPDIR is not set; exiting" >&2 + exit 2 +fi + +# ${EB} is used to specify which 'eb' command should be used; +# can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC' +if [ -z "${EB}" ]; then + echo "\$EB is not set; exiting" >&2 + exit 2 +fi + +# ${EASYBUILD_INSTALLPATH} points to the installation path and needs to be set +if [ -z "${EASYBUILD_INSTALLPATH}" ]; then + echo "\$EASYBUILD_INSTALLPATH is not set; exiting" >&2 + exit 2 +fi + +# make sure that utility functions are defined (cfr. scripts/utils.sh script in EESSI/software-layer repo) +type check_exit_code +if [ $? -ne 0 ]; then + echo "check_exit_code function is not defined; exiting" >&2 + exit 3 +fi + +echo ">> Checking for EESSI-extend module..." + +ml_av_eessi_extend_out=${TMPDIR}/ml_av_eessi_extend.out +module avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + +if [[ $? -eq 0 ]]; then + echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" +else + echo_yellow ">> No module yet for EESSI-extend/${EESSI_EXTEND_VERSION}, installing it..." + + EB_TMPDIR=${TMPDIR}/ebtmp + echo ">> Using temporary installation of EasyBuild (in ${EB_TMPDIR})..." + pip_install_out=${TMPDIR}/pip_install.out + pip3 install --prefix ${EB_TMPDIR} easybuild &> ${pip_install_out} + + # keep track of original $PATH and $PYTHONPATH values, so we can restore them + ORIG_PATH=${PATH} + ORIG_PYTHONPATH=${PYTHONPATH} + + echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." + export PATH=${EB_TMPDIR}/bin:${PATH} + export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH} + eb_install_out=${TMPDIR}/eb_install.out + ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" + fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" + ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} + check_exit_code $? "${ok_msg}" "${fail_msg}" + + # restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed + export PATH=${ORIG_PATH} + export PYTHONPATH=${ORIG_PYTHONPATH} + unset EB_TMPDIR ORIG_PATH ORIG_PYTHONPATH + + module --ignore-cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + if [[ $? -eq 0 ]]; then + echo_green ">> EESSI-extend/${EESSI_EXTEND_VERSION} module installed!" + else + fatal_error "EESSI-extend/${EESSI_EXTEND_VERSION} module failed to install?! (output of 'pip install' in ${pip_install_out}, output of 'eb' in ${eb_install_out}, output of 'module avail EESSI-extend' in ${ml_av_eessi_extend_out})" + fi +fi + +echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..." +module --ignore-cache load EESSI-extend/${EESSI_EXTEND_VERSION} + +unset EESSI_EXTEND_VERSION From 679d18051080b38204aa400aeb393e2ab5a50de0 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 11:54:00 +0100 Subject: [PATCH 04/45] use script that loads and installs EESSI-extend --- EESSI-install-software.sh | 4 ++++ scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index b365edd71b..50eacc52bf 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -248,6 +248,10 @@ fi temp_install_storage=${TMPDIR}/temp_install_storage mkdir -p ${temp_install_storage} if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then + # need to ensure that some variables will be available to the script + # TMPDIR, EB, EESSI_VERSION, for EASYBUILD_INSTALLPATH (EESSI_PREFIX, + # EESSI_OS_TYPE, EESSI_SOFTWARE_SUBDIR_OVERRIDE) + export TMPDIR EB EESSI_VERSION EESSI_PREFIX EESSI_OS_TYPE EESSI_SOFTWARE_SUBDIR_OVERRIDE ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ -t ${temp_install_storage} \ --accept-cuda-eula \ diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index e6343595ad..51d139bec5 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -112,7 +112,14 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do unset EESSI_USER_INSTALL export EESSI_SITE_INSTALL=1 module unload EESSI-extend - module load EESSI-extend/${EESSI_VERSION}-easybuild + + # The EESSI-extend module is being loaded (or installed if it doesn't exist yet). + # The script requires the EESSI_VERSION given as argument, a couple of + # environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the + # function check_exit_code defined. + # NOTE, the script exits if those variables/functions are undefined. + export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} + source load_eessi_extend_module.sh ${EESSI_VERSION} # Install modules in hidden .modules dir to keep track of what was installed before # (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild From f9243d8f85a616b83c599a01c8a7bf871a89c163 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 12:14:07 +0100 Subject: [PATCH 05/45] facilitate a bit debugging --- load_eessi_extend_module.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index bc277c5f0a..f424c5b79b 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -103,4 +103,7 @@ fi echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..." module --ignore-cache load EESSI-extend/${EESSI_EXTEND_VERSION} +echo ">> Determining how to load EESSI-extend/${EESSI_EXTEND_VERSION} module..." +module --ignore-cache spider EESSI-extend/${EESSI_EXTEND_VERSION} + unset EESSI_EXTEND_VERSION From 68a882f707e70ed74ac7fa5235550461604caf63 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 12:23:09 +0100 Subject: [PATCH 06/45] fix module command arg typo --- load_eessi_extend_module.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index f424c5b79b..366da66a5f 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -92,7 +92,7 @@ else export PYTHONPATH=${ORIG_PYTHONPATH} unset EB_TMPDIR ORIG_PATH ORIG_PYTHONPATH - module --ignore-cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + module --ignore_cache avail EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} if [[ $? -eq 0 ]]; then echo_green ">> EESSI-extend/${EESSI_EXTEND_VERSION} module installed!" else @@ -101,9 +101,6 @@ else fi echo ">> Loading EESSI-extend/${EESSI_EXTEND_VERSION} module..." -module --ignore-cache load EESSI-extend/${EESSI_EXTEND_VERSION} - -echo ">> Determining how to load EESSI-extend/${EESSI_EXTEND_VERSION} module..." -module --ignore-cache spider EESSI-extend/${EESSI_EXTEND_VERSION} +module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} unset EESSI_EXTEND_VERSION From 37c1e9c5f29f6d9696e087629ea5572f4da82649 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 12:33:26 +0100 Subject: [PATCH 07/45] run module avail with --ignore_cache --- load_eessi_extend_module.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index 366da66a5f..88ddcf7f46 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -62,7 +62,9 @@ fi echo ">> Checking for EESSI-extend module..." ml_av_eessi_extend_out=${TMPDIR}/ml_av_eessi_extend.out -module avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} +# need to use --ignore_cache to avoid the case that the module was removed (to be +# rebuilt) but it is still in the cache +module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} if [[ $? -eq 0 ]]; then echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" From a83cde888ef1d341c96adce6563ce1c4f2f8d14f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 14:10:05 +0100 Subject: [PATCH 08/45] use previous overlay-upper dir(s) as left-most lowerdirs --- eessi_container.sh | 49 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index fc97f9877c..b6adc60503 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -746,9 +746,21 @@ do # to be able to see the contents of the read-write session we have to mount # the fuse-overlayfs (in read-only mode) on top of the CernVM-FS repository - echo "While processing '${cvmfs_repo_name}' to be mounted 'read-only' we detected an overlay-upper" - echo " directory (${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper) likely from a previous" - echo " session. Will use it as left-most directory in 'lowerdir' argument for fuse-overlayfs." + echo "While processing '${cvmfs_repo_name}' to be mounted 'read-only'" + echo " we detected one or more overlay-upper* directories" + echo " (${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper*)" + echo " likely originating from a previous session. Will use then as" + echo " left-most directory in 'lowerdir' argument for fuse-overlayfs." + + lowerdirs=/cvmfs_ro/${cvmfs_repo_name} + # check if there are more overlay-upper directories, e.g., with three digit suffix + for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do + lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} + done + # finally add most recent overlay-upper to lowerdirs + lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper:${lowerdirs} + [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} + [[ ${VERBOSE} -eq 1 ]] && echo ${lowerdirs} # make the target CernVM-FS repository available under /cvmfs_ro export EESSI_READONLY="container:cvmfs2 ${cvmfs_repo_name} /cvmfs_ro/${cvmfs_repo_name}" @@ -757,12 +769,7 @@ do # now, put the overlay-upper read-only on top of the repo and make it available under the usual prefix /cvmfs EESSI_READONLY_OVERLAY="container:fuse-overlayfs" - # The contents of the previous session are available under - # ${EESSI_TMPDIR} which is bind mounted to ${TMP_IN_CONTAINER}. - # Hence, we have to use ${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper - # the left-most directory given for the lowerdir argument is put on top, - # and with no upperdir=... the whole overlayfs is made available read-only - EESSI_READONLY_OVERLAY+=" -o lowerdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper:/cvmfs_ro/${cvmfs_repo_name}" + EESSI_READONLY_OVERLAY+=" -o lowerdir=${lowerdirs}" EESSI_READONLY_OVERLAY+=" /cvmfs/${cvmfs_repo_name}" export EESSI_READONLY_OVERLAY @@ -778,7 +785,27 @@ do export EESSI_FUSE_MOUNTS fi elif [[ ${cvmfs_repo_access} == "rw" ]] ; then - # use repo-specific overlay directories + # use repo-specific overlay directories; if there is already an + # overlay-upper (e.g., from a previous run) move it to overlay-upper-SEQ + # and create a new one; all overlay-upper-SEQs must be added to lowerdir + # starting with the lowest number first and preprending it to the lowerdir + # setting + lowerdirs=/cvmfs_ro/${cvmfs_repo_name} + if [ -d ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ]; then + # determine next sequence number + last_seq_num=$(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n | tail -n 1 | sed -e 's/^0*//') + if [ -n ${last_seq_num} ]; then + last_seq_num=0 + fi + next_seq_num=$(($last_seq_num + 1)) + next_ovl_upper=$(printf "overlay-upper-%03d" ${next_seq_num}) + mv ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ${EESSI_TMPDIR}/${cvmfs_repo_name}/${next_ovl_upper} + for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do + lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} + done + [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} + [[ ${VERBOSE} -eq 1 ]] && echo ${lowerdirs} + fi mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-work [[ ${VERBOSE} -eq 1 ]] && echo -e "TMP directory contents:\n$(ls -l ${EESSI_TMPDIR})" @@ -789,7 +816,7 @@ do EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" - EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${cvmfs_repo_name}" + EESSI_WRITABLE_OVERLAY+=" -o lowerdir=${lowerdirs}" EESSI_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper" EESSI_WRITABLE_OVERLAY+=" -o workdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-work" EESSI_WRITABLE_OVERLAY+=" /cvmfs/${cvmfs_repo_name}" From 16dce19146c8adf696aa8037dc691a2944d072ab Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 21:19:14 +0100 Subject: [PATCH 09/45] use alternative approach to remove software to be rebuilt --- EESSI-determine-rebuilds.sh | 123 ++++++++++++++++++++++++++++++++++++ EESSI-remove-software.sh | 13 ++-- bot/build.sh | 50 ++++++++++++++- eessi_container.sh | 54 ++++++++++------ 4 files changed, 216 insertions(+), 24 deletions(-) create mode 100755 EESSI-determine-rebuilds.sh diff --git a/EESSI-determine-rebuilds.sh b/EESSI-determine-rebuilds.sh new file mode 100755 index 0000000000..4f4d5ab713 --- /dev/null +++ b/EESSI-determine-rebuilds.sh @@ -0,0 +1,123 @@ +#!/bin/bash +# +# Script to determine which parts of the EESSI software stack (version set through init/eessi_defaults) +# have to be rebuilt + +# see example parsing of command line arguments at +# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop +# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash + +display_help() { + echo "usage: $0 [OPTIONS]" + echo " -g | --generic - instructs script to build for generic architecture target" + echo " -h | --help - display this usage information" +} + +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case $1 in + -g|--generic) + DETECTION_PARAMETERS="--generic" + shift + ;; + -h|--help) + display_help # Call your function + # no shifting needed here, we're done. + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +TOPDIR=$(dirname $(realpath $0)) + +export TMPDIR=$(mktemp -d /tmp/eessi-remove.XXXXXXXX) + +source $TOPDIR/scripts/utils.sh + +echo ">> Determining software subdirectory to use for current build host..." +if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then + export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) + echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" +else + echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" +fi + +echo ">> Setting up environment..." + +source $TOPDIR/init/bash + +if [ -d $EESSI_CVMFS_REPO ]; then + echo_green "$EESSI_CVMFS_REPO available, OK!" +else + fatal_error "$EESSI_CVMFS_REPO is not available!" +fi + +if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then + fatal_error "Failed to determine software subdirectory?!" +elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then + fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" +else + echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" +fi + +echo ">> Configuring EasyBuild..." +EB="eb" +source $TOPDIR/configure_easybuild + +echo ">> Setting up \$MODULEPATH..." +# make sure no modules are loaded +module --force purge +# ignore current $MODULEPATH entirely +module unuse $MODULEPATH +module use $EASYBUILD_INSTALLPATH/modules/all +if [[ -z ${MODULEPATH} ]]; then + fatal_error "Failed to set up \$MODULEPATH?!" +else + echo_green ">> MODULEPATH set up: ${MODULEPATH}" +fi + +# assume there's only one diff file that corresponds to the PR patch file +pr_diff=$(ls [0-9]*.diff | head -1) + +# if this script is run as root, use PR patch file to determine if software needs to be removed first +changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") +if [ -z ${changed_easystacks_rebuilds} ]; then + echo "No software needs to be removed." +else + for easystack_file in ${changed_easystacks_rebuilds}; do + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # load EasyBuild module (will be installed if it's not available yet) + source ${TOPDIR}/load_easybuild_module.sh ${eb_version} + + if [ -f ${easystack_file} ]; then + echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." + # we need to remove existing installation directories first, + # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) + # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) + rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + for app in ${rebuild_apps}; do + app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua + echo_yellow "Removing ${app_dir} and ${app_module}..." + find ${app_dir} -type d | sed -e 's/^/REMOVE_DIRECTORY /' + find ${app_dir} -type f | sed -e 's/^/REMOVE_FILE /' + echo "REMOVE_MODULE ${app_module}" + done + else + fatal_error "Easystack file ${easystack_file} not found!" + fi + done +fi diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 98576efcb0..c0818db393 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -114,11 +114,14 @@ if [ $EUID -eq 0 ]; then source ${TOPDIR}/load_easybuild_module.sh ${eb_version} if [ -f ${easystack_file} ]; then - echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." + echo_green "Software rebuild(s) requested in ${easystack_file}, so" + echo_green " determining which existing installation have to be removed (assuming contents" + echo_green " have been made writable/deletable)..." # we need to remove existing installation directories first, # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) - rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') for app in ${rebuild_apps}; do # Returns e.g. /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/modules/all: app_modulepath=$(module --terse av ${app} 2>&1 | head -n 1 | sed 's/://') @@ -126,9 +129,11 @@ if [ $EUID -eq 0 ]; then app_installprefix=$(dirname $(dirname ${app_modulepath})) app_dir=${app_installprefix}/software/${app} app_module=${app_installprefix}/modules/all/${app}.lua + # app_dir=${EASYBUILD_INSTALLPATH}/software/${app} + # app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." - rm -rf ${app_dir} - rm -rf ${app_module} + rm -rdfv ${app_dir} + rm -rdfv ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" diff --git a/bot/build.sh b/bot/build.sh index 3fd343e96f..718fceafc0 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -200,6 +200,49 @@ changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed if [[ -z "${changed_easystacks_rebuilds}" ]]; then echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." else + # determine which software packages (and modules) have to be removed + TARBALL_TMP_DETERMINE_STEP_DIR=${PREVIOUS_TMP_DIR}/determine_step + mkdir -p ${TARBALL_TMP_DETERMINE_STEP_DIR} + + # prepare arguments to eessi_container.sh specific to determine step + declare -a DETERMINE_STEP_ARGS=() + DETERMINE_STEP_ARGS+=("--save" "${TARBALL_TMP_DETERMINE_STEP_DIR}") + DETERMINE_STEP_ARGS+=("--storage" "${STORAGE}") + + # create tmp file for output of determine step + determine_outerr=$(mktemp determine.outerr.XXXX) + + echo "Executing command to determine software to be removed:" + echo "${software_layer_dir}/eessi_container.sh ${COMMON_ARGS[@]} ${DETERMINE_STEP_ARGS[@]}" + echo " -- ${software_layer_dir}/EESSI-determine-rebuilds.sh \"${DETERMINE_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${determine_outerr}" + ${software_layer_dir}/eessi_container.sh "${COMMON_ARGS[@]}" "${DETERMINE_STEP_ARGS[@]}" \ + -- ${software_layer_dir}/EESSI-determine-rebuilds.sh "${DETERMINE_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${determine_outerr} + + # process output file + # for each line containing 'REMOVE_DIRECTORY some_path' + # create a new directory ${STORAGE}/lower_dirs/some_path_stripped + # where the prefix /cvmfs/repo_name is removed from some_path + # set permission of the directory to u+rwx + # for each line containing 'REMOVE_FILE some_file_path' + # touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped + # where the prefix /cvmfs/repo_name is removed from some_file_path + # set permission of the file to u+rw + + LOWER_DIRS="${STORAGE}/lower_dirs" + mkdir -p "${LOWER_DIRS}" + + grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs + cat ${determine_outerr}.rm_dirs | while read remove_dir; do + mkdir -p ${STORAGE}/lower_dirs/${remove_dir} + chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} + done + + grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files + cat ${determine_outerr}.rm_files | while read remove_file; do + touch ${STORAGE}/lower_dirs/${remove_file} + chmod u+rw ${STORAGE}/lower_dirs/${remove_file} + done + # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} @@ -208,9 +251,14 @@ else declare -a REMOVAL_STEP_ARGS=() REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}") REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}") + # add fakeroot option in order to be able to remove software, see: # https://github.com/EESSI/software-layer/issues/312 - REMOVAL_STEP_ARGS+=("--fakeroot") + # REMOVAL_STEP_ARGS+=("--fakeroot") + + if [[ ! -z ${LOWER_DIRS} ]]; then + REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + fi # create tmp file for output of removal step removal_outerr=$(mktemp remove.outerr.XXXX) diff --git a/eessi_container.sh b/eessi_container.sh index b6adc60503..661d0f63f1 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -89,6 +89,11 @@ display_help() { echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," echo " MODE==install for a CUDA installation, MODE==run to" echo " attach a GPU, MODE==all for both [default: false]" + echo " -o | --lower-dirs DIRS - list of ':' separated directories that are used" + echo " in front of the default lower dir (CVMFS repo);" + echo " fuse-overlayfs will merge all lower directories;" + echo " the option can be used to make certain directories" + echo " in the CVMFS repo writable [default: none]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use; can be given multiple times;" echo " CFG may include a suffix ',access={ro,rw}' to" @@ -125,6 +130,7 @@ FAKEROOT=0 VERBOSE=0 STORAGE= LIST_REPOS=0 +LOWER_DIRS= MODE="shell" SETUP_NVIDIA=0 REPOSITORIES=() @@ -182,6 +188,10 @@ while [[ $# -gt 0 ]]; do NVIDIA_MODE="$2" shift 2 ;; + -o|--lower-dirs) + LOWER_DIRS="$2" + shift 2 + ;; -r|--repository) REPOSITORIES+=("$2") shift 2 @@ -753,10 +763,10 @@ do echo " left-most directory in 'lowerdir' argument for fuse-overlayfs." lowerdirs=/cvmfs_ro/${cvmfs_repo_name} - # check if there are more overlay-upper directories, e.g., with three digit suffix - for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do - lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} - done + # # check if there are more overlay-upper directories, e.g., with three digit suffix + # for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do + # lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} + # done # finally add most recent overlay-upper to lowerdirs lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper:${lowerdirs} [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} @@ -791,21 +801,21 @@ do # starting with the lowest number first and preprending it to the lowerdir # setting lowerdirs=/cvmfs_ro/${cvmfs_repo_name} - if [ -d ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ]; then - # determine next sequence number - last_seq_num=$(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n | tail -n 1 | sed -e 's/^0*//') - if [ -n ${last_seq_num} ]; then - last_seq_num=0 - fi - next_seq_num=$(($last_seq_num + 1)) - next_ovl_upper=$(printf "overlay-upper-%03d" ${next_seq_num}) - mv ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ${EESSI_TMPDIR}/${cvmfs_repo_name}/${next_ovl_upper} - for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do - lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} - done - [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} - [[ ${VERBOSE} -eq 1 ]] && echo ${lowerdirs} - fi + # if [ -d ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ]; then + # # determine next sequence number + # last_seq_num=$(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n | tail -n 1 | sed -e 's/^0*//') + # if [ -n ${last_seq_num} ]; then + # last_seq_num=0 + # fi + # next_seq_num=$(($last_seq_num + 1)) + # next_ovl_upper=$(printf "overlay-upper-%03d" ${next_seq_num}) + # mv ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ${EESSI_TMPDIR}/${cvmfs_repo_name}/${next_ovl_upper} + # for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do + # lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} + # done + # [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} + # [[ ${VERBOSE} -eq 1 ]] && echo ${lowerdirs} + # fi mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-work [[ ${VERBOSE} -eq 1 ]] && echo -e "TMP directory contents:\n$(ls -l ${EESSI_TMPDIR})" @@ -816,6 +826,12 @@ do EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" + if [[ ! -z ${LOWER_DIRS} ]]; then + # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as + # separator while the lowerdir overlayfs option uses ':' + export BIND_PATHS="${BIND_PATHS},${LOWER_DIRS/:/,}" + lowerdirs=${LOWER_DIRS}:${lowerdirs}" + fi EESSI_WRITABLE_OVERLAY+=" -o lowerdir=${lowerdirs}" EESSI_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper" EESSI_WRITABLE_OVERLAY+=" -o workdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-work" From bcb0b654bef3bbc1d5f8008a773054b15ac32292 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 21:38:57 +0100 Subject: [PATCH 10/45] fix syntax error --- eessi_container.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_container.sh b/eessi_container.sh index 661d0f63f1..069da99d14 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -830,7 +830,7 @@ do # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as # separator while the lowerdir overlayfs option uses ':' export BIND_PATHS="${BIND_PATHS},${LOWER_DIRS/:/,}" - lowerdirs=${LOWER_DIRS}:${lowerdirs}" + lowerdirs=${LOWER_DIRS}:${lowerdirs} fi EESSI_WRITABLE_OVERLAY+=" -o lowerdir=${lowerdirs}" EESSI_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper" From 0fb455d47477415fa34dff711c04abf5e32655db Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 21:53:38 +0100 Subject: [PATCH 11/45] remove script is not run by root anymore --- EESSI-remove-software.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index c0818db393..582ed61a7f 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -101,7 +101,7 @@ fi pr_diff=$(ls [0-9]*.diff | head -1) # if this script is run as root, use PR patch file to determine if software needs to be removed first -if [ $EUID -eq 0 ]; then +if [ $EUID -ne 0 ]; then changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") if [ -z ${changed_easystacks_rebuilds} ]; then echo "No software needs to be removed." @@ -141,5 +141,6 @@ if [ $EUID -eq 0 ]; then done fi else - fatal_error "This script can only be run by root!" + # fatal_error "This script can only be run by root!" + fatal_error "This script must not be run by root!" fi From 75e9c35720a83c722eb47c315d5456e2af151bfd Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:06:13 +0100 Subject: [PATCH 12/45] add debug info --- load_eessi_extend_module.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index 88ddcf7f46..a712ab31af 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -81,6 +81,10 @@ else ORIG_PYTHONPATH=${PYTHONPATH} echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." + ls -l ${EASYBUILD_INSTALLPATH} + ls -lR ${EASYBUILD_INSTALLPATH}/software/EESSI-extend + whoami + export PATH=${EB_TMPDIR}/bin:${PATH} export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH} eb_install_out=${TMPDIR}/eb_install.out From 5ba88d383418be6cb49c808f7efa563456766c04 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:14:17 +0100 Subject: [PATCH 13/45] set 777 perms --- bot/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 718fceafc0..223891117d 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -234,13 +234,13 @@ else grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs cat ${determine_outerr}.rm_dirs | while read remove_dir; do mkdir -p ${STORAGE}/lower_dirs/${remove_dir} - chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} + chmod ugo+rwx ${STORAGE}/lower_dirs/${remove_dir} done grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files cat ${determine_outerr}.rm_files | while read remove_file; do touch ${STORAGE}/lower_dirs/${remove_file} - chmod u+rw ${STORAGE}/lower_dirs/${remove_file} + chmod ugo+rw ${STORAGE}/lower_dirs/${remove_file} done # prepare directory to store tarball of tmp for removal and build steps From 47fe73c87b8949c781c121095918148848bf5bbc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:15:56 +0100 Subject: [PATCH 14/45] add more debug info --- load_eessi_extend_module.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index a712ab31af..fa3708f269 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -81,8 +81,8 @@ else ORIG_PYTHONPATH=${PYTHONPATH} echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." - ls -l ${EASYBUILD_INSTALLPATH} - ls -lR ${EASYBUILD_INSTALLPATH}/software/EESSI-extend + ls -lisa ${EASYBUILD_INSTALLPATH} + ls -lisaR ${EASYBUILD_INSTALLPATH}/software/EESSI-extend whoami export PATH=${EB_TMPDIR}/bin:${PATH} From acf987c936e5396a6324656ca513ae72d0ae169d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:22:11 +0100 Subject: [PATCH 15/45] add even more debug info --- load_eessi_extend_module.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index fa3708f269..1ab50daba0 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -91,7 +91,9 @@ else ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} - check_exit_code $? "${ok_msg}" "${fail_msg}" + ec=$? + ls -lisaR ${EASYBUILD_INSTALLPATH}/software/EESSI-extend + check_exit_code $ec "${ok_msg}" "${fail_msg}" # restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed export PATH=${ORIG_PATH} From 32e6d69a0a6b87979f06e960f40f6a2602afe288 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:29:59 +0100 Subject: [PATCH 16/45] add lower dirs to build step --- bot/build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bot/build.sh b/bot/build.sh index 223891117d..6bf7827d66 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -296,6 +296,10 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi +if [[ ! -z ${LOWER_DIRS} ]]; then + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") +fi + # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) From afa1356c5cd55544efffae89532833711e056741 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:45:14 +0100 Subject: [PATCH 17/45] limit perm to 700 --- bot/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 6bf7827d66..132770fd01 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -234,13 +234,13 @@ else grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs cat ${determine_outerr}.rm_dirs | while read remove_dir; do mkdir -p ${STORAGE}/lower_dirs/${remove_dir} - chmod ugo+rwx ${STORAGE}/lower_dirs/${remove_dir} + chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} done grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files cat ${determine_outerr}.rm_files | while read remove_file; do touch ${STORAGE}/lower_dirs/${remove_file} - chmod ugo+rw ${STORAGE}/lower_dirs/${remove_file} + chmod u+rw ${STORAGE}/lower_dirs/${remove_file} done # prepare directory to store tarball of tmp for removal and build steps From 9eebad7be3b3cbc1bd3b14a8614a13cc79c25f82 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 22:55:27 +0100 Subject: [PATCH 18/45] source configure_easybuild and run in subshell --- load_eessi_extend_module.sh | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index 1ab50daba0..9d13f313f3 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -80,20 +80,20 @@ else ORIG_PATH=${PATH} ORIG_PYTHONPATH=${PYTHONPATH} - echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." - ls -lisa ${EASYBUILD_INSTALLPATH} - ls -lisaR ${EASYBUILD_INSTALLPATH}/software/EESSI-extend - whoami - - export PATH=${EB_TMPDIR}/bin:${PATH} - export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH} - eb_install_out=${TMPDIR}/eb_install.out - ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" - fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" - ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} - ec=$? - ls -lisaR ${EASYBUILD_INSTALLPATH}/software/EESSI-extend - check_exit_code $ec "${ok_msg}" "${fail_msg}" + # source configure_easybuild to use correct eb settings + ( + EESSI_MAIN_DIR=$(dirname $(readlink -f $BASH_SOURCE)) + source ${EESSI_MAIN_DIR}/configure_easybuild + + echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." + export PATH=${EB_TMPDIR}/bin:${PATH} + export PYTHONPATH=$(ls -d ${EB_TMPDIR}/lib/python*/site-packages):${PYTHONPATH} + eb_install_out=${TMPDIR}/eb_install.out + ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" + fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" + ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} + check_exit_code $? "${ok_msg}" "${fail_msg}" + ) # restore origin $PATH and $PYTHONPATH values, and clean up environment variables that are no longer needed export PATH=${ORIG_PATH} From 2af57ee8678a1b291f830a64e7e8c7e733aac5c2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 13 Nov 2024 23:11:48 +0100 Subject: [PATCH 19/45] set 770 perms --- bot/build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 132770fd01..6a382b2fa4 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -222,11 +222,11 @@ else # for each line containing 'REMOVE_DIRECTORY some_path' # create a new directory ${STORAGE}/lower_dirs/some_path_stripped # where the prefix /cvmfs/repo_name is removed from some_path - # set permission of the directory to u+rwx + # set permission of the directory to ug+rwx # for each line containing 'REMOVE_FILE some_file_path' # touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped # where the prefix /cvmfs/repo_name is removed from some_file_path - # set permission of the file to u+rw + # set permission of the file to ug+rw LOWER_DIRS="${STORAGE}/lower_dirs" mkdir -p "${LOWER_DIRS}" @@ -234,13 +234,13 @@ else grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs cat ${determine_outerr}.rm_dirs | while read remove_dir; do mkdir -p ${STORAGE}/lower_dirs/${remove_dir} - chmod u+rwx ${STORAGE}/lower_dirs/${remove_dir} + chmod ug+rwx ${STORAGE}/lower_dirs/${remove_dir} done grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files cat ${determine_outerr}.rm_files | while read remove_file; do touch ${STORAGE}/lower_dirs/${remove_file} - chmod u+rw ${STORAGE}/lower_dirs/${remove_file} + chmod ug+rw ${STORAGE}/lower_dirs/${remove_file} done # prepare directory to store tarball of tmp for removal and build steps From 2184b303be49c5f0911ef65c70636a573a68b01c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 09:25:44 +0100 Subject: [PATCH 20/45] alternative approach to remove files/directories; one-by-one in depth-first order --- EESSI-remove-software.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 582ed61a7f..10f57fe678 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -132,8 +132,21 @@ if [ $EUID -ne 0 ]; then # app_dir=${EASYBUILD_INSTALLPATH}/software/${app} # app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." - rm -rdfv ${app_dir} - rm -rdfv ${app_module} + # rm -rdfv ${app_dir} + # rm -rdfv ${app_module} + # 1st remove files in depth-first order + for filepath in $(find ${app_dir} -depth -type f); do + echo " removing file ${filepath}" + rm -fv ${filepath} + done + # 2nd remove directories in depth-first order + for dirpath in $(find ${app_dir} -depth -type d); do + echo " removing directory ${dirpath}" + rmdir -v ${dirpath} + done + # 3rd remove module file + echo " removing module file ${app_module}" + rm -fv ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" From 2438c476acc05930de540e3a89f38f8ffd7dc2c4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 10:44:32 +0100 Subject: [PATCH 21/45] pre-package hook for EESSI-extend that stats all files/dirs in installation directory --- eb_hooks.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/eb_hooks.py b/eb_hooks.py index 03642656ea..20f0281492 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -962,6 +962,29 @@ def inject_gpu_property(ec): return ec +def pre_package_hook(self, *args, **kwargs): + """Main pre-package hook: trigger custom functions based on software name.""" + if self.name in PRE_PACKAGE_HOOKS: + PRE_PACKAGE_HOOKS[self.name](self, *args, **kwargs) + + +def pre_package_eessi_extend(self, *args, **kwargs): + """ + Pre-package hook for EESSI-extend: ls/stat all files/directories to work around + 'permission denied' issue when package got removed (and this hook is run when + the package is being rebuilt) + """ + if self.name == 'EESSI-extend': + dir_tree = [] + for root, _, files in os.walk(self.installdir): + dir_tree.append(root) + for f in files: + tree.append(os.path.join(root, f)) + for entry in dir_tree: + print(entry) + os.stat(entry) + + PARSE_HOOKS = { 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, @@ -1020,4 +1043,7 @@ def inject_gpu_property(ec): POST_POSTPROC_HOOKS = { 'CUDA': post_postproc_cuda, 'cuDNN': post_postproc_cudnn, + +PRE_PACKAGE_HOOKS = { + 'EESSI-extend': pre_package_eessi_extend, } From 29004cee3f49e3c38a6953842fb6f71a465e13ca Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 11:03:48 +0100 Subject: [PATCH 22/45] fix syntax error --- eb_hooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eb_hooks.py b/eb_hooks.py index 20f0281492..ea722b8c04 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -1043,6 +1043,7 @@ def pre_package_eessi_extend(self, *args, **kwargs): POST_POSTPROC_HOOKS = { 'CUDA': post_postproc_cuda, 'cuDNN': post_postproc_cudnn, +} PRE_PACKAGE_HOOKS = { 'EESSI-extend': pre_package_eessi_extend, From cf076be39d8d4f4320f7397b6cf8f3c155fcb407 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 11:09:43 +0100 Subject: [PATCH 23/45] fix another code error --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index ea722b8c04..62c5ee3eb6 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -979,7 +979,7 @@ def pre_package_eessi_extend(self, *args, **kwargs): for root, _, files in os.walk(self.installdir): dir_tree.append(root) for f in files: - tree.append(os.path.join(root, f)) + dir_tree.append(os.path.join(root, f)) for entry in dir_tree: print(entry) os.stat(entry) From 60b3f31d7ff411adff4e3d7aa639d0d7ca54e5f8 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 11:26:52 +0100 Subject: [PATCH 24/45] print file permissions --- eb_hooks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 62c5ee3eb6..9601aa1264 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -3,6 +3,7 @@ import glob import os import re +import stat import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess @@ -982,7 +983,7 @@ def pre_package_eessi_extend(self, *args, **kwargs): dir_tree.append(os.path.join(root, f)) for entry in dir_tree: print(entry) - os.stat(entry) + print(stat.S_IMODE(os.lstat(entry).st_mode)) PARSE_HOOKS = { From ace7d7e81571dd3130120958e55a129ffd8281c0 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 11:33:52 +0100 Subject: [PATCH 25/45] convert permissions to octal value --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 9601aa1264..b4a9affddf 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -983,7 +983,7 @@ def pre_package_eessi_extend(self, *args, **kwargs): dir_tree.append(os.path.join(root, f)) for entry in dir_tree: print(entry) - print(stat.S_IMODE(os.lstat(entry).st_mode)) + print(oct(stat.S_IMODE(os.lstat(entry).st_mode))) PARSE_HOOKS = { From e969bae487d8147c910b4996a6836ffb41000f95 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Thu, 14 Nov 2024 10:37:01 +0000 Subject: [PATCH 26/45] clean `${tmpdir}` after iterations in `install_cuda_and_libraries.sh` --- scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index e6343595ad..e1ca42fc9a 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -239,9 +239,9 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do echo_green "all installations at ${EESSI_SITE_SOFTWARE_PATH}/software/... succeeded!" fi - # clean up tmpdir - rm -rf "${tmpdir}" - # Restore MODULEPATH for next loop iteration MODULEPATH=${SAVE_MODULEPATH} done + +# clean up tmpdir +rm -rf "${tmpdir}" From f760fd25b088e00a010f73b9b92f28cae5d62cb3 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 11:55:14 +0100 Subject: [PATCH 27/45] show file creation time too --- eb_hooks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index b4a9affddf..5877d7c3b8 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -4,6 +4,7 @@ import os import re import stat +import time import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess @@ -983,7 +984,8 @@ def pre_package_eessi_extend(self, *args, **kwargs): dir_tree.append(os.path.join(root, f)) for entry in dir_tree: print(entry) - print(oct(stat.S_IMODE(os.lstat(entry).st_mode))) + statinfo = os.lstat(entry) + print(f"file: {entry}\nperms: {oct(stat.S_IMODE(statinfo.st_mode))}\ncreated: {time.ctime(statinfo.st_ctime)}\n") PARSE_HOOKS = { From 731514599738160303d548053929f80ee33ab490 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 14:33:03 +0100 Subject: [PATCH 28/45] disable hook, use recursive rm and do ls on specific directory --- EESSI-remove-software.sh | 34 +++++++++++++++++++--------------- eb_hooks.py | 2 +- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 10f57fe678..29a47c0686 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -132,21 +132,25 @@ if [ $EUID -ne 0 ]; then # app_dir=${EASYBUILD_INSTALLPATH}/software/${app} # app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." - # rm -rdfv ${app_dir} - # rm -rdfv ${app_module} - # 1st remove files in depth-first order - for filepath in $(find ${app_dir} -depth -type f); do - echo " removing file ${filepath}" - rm -fv ${filepath} - done - # 2nd remove directories in depth-first order - for dirpath in $(find ${app_dir} -depth -type d); do - echo " removing directory ${dirpath}" - rmdir -v ${dirpath} - done - # 3rd remove module file - echo " removing module file ${app_module}" - rm -fv ${app_module} + # suggestion: use the recursive rm's and ls a specific + # directory only (${app_dir}/easybuild) + rm -rdfv ${app_dir} + rm -rdfv ${app_module} + ls ${app_dir}/easybuild || true + + ## 1st remove files in depth-first order + #for filepath in $(find ${app_dir} -depth -type f); do + # echo " removing file ${filepath}" + # rm -fv ${filepath} + #done + ## 2nd remove directories in depth-first order + #for dirpath in $(find ${app_dir} -depth -type d); do + # echo " removing directory ${dirpath}" + # rmdir -v ${dirpath} + #done + ## 3rd remove module file + #echo " removing module file ${app_module}" + #rm -fv ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" diff --git a/eb_hooks.py b/eb_hooks.py index 5877d7c3b8..fd88d8399a 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -1049,5 +1049,5 @@ def pre_package_eessi_extend(self, *args, **kwargs): } PRE_PACKAGE_HOOKS = { - 'EESSI-extend': pre_package_eessi_extend, + # 'EESSI-extend': pre_package_eessi_extend, } From ea6a842479893ef2bab7755b09906f6b30ebb7b4 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 14:52:22 +0100 Subject: [PATCH 29/45] recreate 'installdir/easybuild' and use keeppreviousinstall option --- EESSI-remove-software.sh | 4 +++- .../2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml | 4 +++- load_eessi_extend_module.sh | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 29a47c0686..2e89a9fceb 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -136,7 +136,9 @@ if [ $EUID -ne 0 ]; then # directory only (${app_dir}/easybuild) rm -rdfv ${app_dir} rm -rdfv ${app_module} - ls ${app_dir}/easybuild || true + # ls didn't change the result (permission denied) + # ls ${app_dir}/easybuild || true + mkdir -p ${app_dir}/easybuild ## 1st remove files in depth-first order #for filepath in $(find ${app_dir} -depth -type f); do diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml index e4c658784f..86ec70d6ec 100644 --- a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -3,4 +3,6 @@ # EESSI-extend should adjust EASYBUILD_INSTALLPATH and set # EASYBUILD_CUDA_COMPUTE_CAPABILITIES easyconfigs: - - EESSI-extend-2023.06-easybuild.eb + - EESSI-extend-2023.06-easybuild.eb: + options: + keeppreviousinstall: True diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index 9d13f313f3..257570bc50 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -91,7 +91,7 @@ else eb_install_out=${TMPDIR}/eb_install.out ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" - ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} + ${EB} --keeppreviousinstall True "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} check_exit_code $? "${ok_msg}" "${fail_msg}" ) From e5d3692a9530bc098ec126823123051850913715 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 14 Nov 2024 15:01:28 +0100 Subject: [PATCH 30/45] remove using keeppreviousinstall --- .../2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml | 4 +--- load_eessi_extend_module.sh | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml index 86ec70d6ec..e4c658784f 100644 --- a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -3,6 +3,4 @@ # EESSI-extend should adjust EASYBUILD_INSTALLPATH and set # EASYBUILD_CUDA_COMPUTE_CAPABILITIES easyconfigs: - - EESSI-extend-2023.06-easybuild.eb: - options: - keeppreviousinstall: True + - EESSI-extend-2023.06-easybuild.eb diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index 257570bc50..9d13f313f3 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -91,7 +91,7 @@ else eb_install_out=${TMPDIR}/eb_install.out ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" - ${EB} --keeppreviousinstall True "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} + ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} check_exit_code $? "${ok_msg}" "${fail_msg}" ) From 0fe18c28878b42043e715e859461db827c93df03 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 10:07:34 +0100 Subject: [PATCH 31/45] show contents and recreate specific/problematic directory --- EESSI-remove-software.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 2e89a9fceb..8b882b38a9 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -136,9 +136,14 @@ if [ $EUID -ne 0 ]; then # directory only (${app_dir}/easybuild) rm -rdfv ${app_dir} rm -rdfv ${app_module} + echo_yellow "Contents of ${app_dir}/easybuild (should not exist)" + ls -l ${app_dir}/easybuild || true # ls didn't change the result (permission denied) # ls ${app_dir}/easybuild || true + # 2nd idea: recreate some directory mkdir -p ${app_dir}/easybuild + echo_yellow "Contents of ${app_dir}/easybuild after it got recreated with 'mkdir -p' (should be empty)" + ls -l ${app_dir}/easybuild || true ## 1st remove files in depth-first order #for filepath in $(find ${app_dir} -depth -type f); do From ddfec84cc9b8333c54480c56f43388369d551fef Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 10:17:24 +0100 Subject: [PATCH 32/45] only make directories and module file writable --- bot/build.sh | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index 6a382b2fa4..7b4414983d 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -223,7 +223,11 @@ else # create a new directory ${STORAGE}/lower_dirs/some_path_stripped # where the prefix /cvmfs/repo_name is removed from some_path # set permission of the directory to ug+rwx - # for each line containing 'REMOVE_FILE some_file_path' + # SKIP for each line containing 'REMOVE_FILE some_file_path' + # SKIP touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped + # SKIP where the prefix /cvmfs/repo_name is removed from some_file_path + # SKIP set permission of the file to ug+rw + # for each line containing 'REMOVE_MODULE some_file_path' # touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped # where the prefix /cvmfs/repo_name is removed from some_file_path # set permission of the file to ug+rw @@ -237,10 +241,16 @@ else chmod ug+rwx ${STORAGE}/lower_dirs/${remove_dir} done - grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files - cat ${determine_outerr}.rm_files | while read remove_file; do - touch ${STORAGE}/lower_dirs/${remove_file} - chmod ug+rw ${STORAGE}/lower_dirs/${remove_file} + # grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files + # cat ${determine_outerr}.rm_files | while read remove_file; do + # touch ${STORAGE}/lower_dirs/${remove_file} + # chmod ug+rw ${STORAGE}/lower_dirs/${remove_file} + # done + + grep ^REMOVE_MODULE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_modules + cat ${determine_outerr}.rm_modules | while read remove_module; do + touch ${STORAGE}/lower_dirs/${remove_module} + chmod ug+rw ${STORAGE}/lower_dirs/${remove_module} done # prepare directory to store tarball of tmp for removal and build steps From c7916ec5ffd2939e6d0862f6b46477335dbe87b2 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 10:18:23 +0100 Subject: [PATCH 33/45] remove hooks --- eb_hooks.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index fd88d8399a..03642656ea 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -3,8 +3,6 @@ import glob import os import re -import stat -import time import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess @@ -964,30 +962,6 @@ def inject_gpu_property(ec): return ec -def pre_package_hook(self, *args, **kwargs): - """Main pre-package hook: trigger custom functions based on software name.""" - if self.name in PRE_PACKAGE_HOOKS: - PRE_PACKAGE_HOOKS[self.name](self, *args, **kwargs) - - -def pre_package_eessi_extend(self, *args, **kwargs): - """ - Pre-package hook for EESSI-extend: ls/stat all files/directories to work around - 'permission denied' issue when package got removed (and this hook is run when - the package is being rebuilt) - """ - if self.name == 'EESSI-extend': - dir_tree = [] - for root, _, files in os.walk(self.installdir): - dir_tree.append(root) - for f in files: - dir_tree.append(os.path.join(root, f)) - for entry in dir_tree: - print(entry) - statinfo = os.lstat(entry) - print(f"file: {entry}\nperms: {oct(stat.S_IMODE(statinfo.st_mode))}\ncreated: {time.ctime(statinfo.st_ctime)}\n") - - PARSE_HOOKS = { 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, @@ -1047,7 +1021,3 @@ def pre_package_eessi_extend(self, *args, **kwargs): 'CUDA': post_postproc_cuda, 'cuDNN': post_postproc_cudnn, } - -PRE_PACKAGE_HOOKS = { - # 'EESSI-extend': pre_package_eessi_extend, -} From 1c24b7ab796e684131939ce034e07af0bd94f984 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 10:27:33 +0100 Subject: [PATCH 34/45] clean up lower dir changes --- eessi_container.sh | 48 +++++++++++----------------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index 069da99d14..73244778cf 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -756,21 +756,9 @@ do # to be able to see the contents of the read-write session we have to mount # the fuse-overlayfs (in read-only mode) on top of the CernVM-FS repository - echo "While processing '${cvmfs_repo_name}' to be mounted 'read-only'" - echo " we detected one or more overlay-upper* directories" - echo " (${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper*)" - echo " likely originating from a previous session. Will use then as" - echo " left-most directory in 'lowerdir' argument for fuse-overlayfs." - - lowerdirs=/cvmfs_ro/${cvmfs_repo_name} - # # check if there are more overlay-upper directories, e.g., with three digit suffix - # for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do - # lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} - # done - # finally add most recent overlay-upper to lowerdirs - lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper:${lowerdirs} - [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} - [[ ${VERBOSE} -eq 1 ]] && echo ${lowerdirs} + echo "While processing '${cvmfs_repo_name}' to be mounted 'read-only' we detected an overlay-upper" + echo " directory (${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper) likely from a previous" + echo " session. Will use it as left-most directory in 'lowerdir' argument for fuse-overlayfs." # make the target CernVM-FS repository available under /cvmfs_ro export EESSI_READONLY="container:cvmfs2 ${cvmfs_repo_name} /cvmfs_ro/${cvmfs_repo_name}" @@ -779,7 +767,12 @@ do # now, put the overlay-upper read-only on top of the repo and make it available under the usual prefix /cvmfs EESSI_READONLY_OVERLAY="container:fuse-overlayfs" - EESSI_READONLY_OVERLAY+=" -o lowerdir=${lowerdirs}" + # The contents of the previous session are available under + # ${EESSI_TMPDIR} which is bind mounted to ${TMP_IN_CONTAINER}. + # Hence, we have to use ${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper + # the left-most directory given for the lowerdir argument is put on top, + # and with no upperdir=... the whole overlayfs is made available read-only + EESSI_READONLY_OVERLAY+=" -o lowerdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper:/cvmfs_ro/${cvmfs_repo_name}" EESSI_READONLY_OVERLAY+=" /cvmfs/${cvmfs_repo_name}" export EESSI_READONLY_OVERLAY @@ -795,27 +788,7 @@ do export EESSI_FUSE_MOUNTS fi elif [[ ${cvmfs_repo_access} == "rw" ]] ; then - # use repo-specific overlay directories; if there is already an - # overlay-upper (e.g., from a previous run) move it to overlay-upper-SEQ - # and create a new one; all overlay-upper-SEQs must be added to lowerdir - # starting with the lowest number first and preprending it to the lowerdir - # setting - lowerdirs=/cvmfs_ro/${cvmfs_repo_name} - # if [ -d ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ]; then - # # determine next sequence number - # last_seq_num=$(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n | tail -n 1 | sed -e 's/^0*//') - # if [ -n ${last_seq_num} ]; then - # last_seq_num=0 - # fi - # next_seq_num=$(($last_seq_num + 1)) - # next_ovl_upper=$(printf "overlay-upper-%03d" ${next_seq_num}) - # mv ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper ${EESSI_TMPDIR}/${cvmfs_repo_name}/${next_ovl_upper} - # for dir in $(ls ${EESSI_TMPDIR}/${cvmfs_repo_name} | grep -E "overlay-upper-[0-9]{3}" | cut -f3 -d- | sort -n); do - # lowerdirs=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper-${dir}:${lowerdirs} - # done - # [[ ${VERBOSE} -eq 1 ]] && ls ${EESSI_TMPDIR}/${cvmfs_repo_name} - # [[ ${VERBOSE} -eq 1 ]] && echo ${lowerdirs} - # fi + # use repo-specific overlay directories mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-upper mkdir -p ${EESSI_TMPDIR}/${cvmfs_repo_name}/overlay-work [[ ${VERBOSE} -eq 1 ]] && echo -e "TMP directory contents:\n$(ls -l ${EESSI_TMPDIR})" @@ -826,6 +799,7 @@ do EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" + lowerdirs=/cvmfs_ro/${cvmfs_repo_name} if [[ ! -z ${LOWER_DIRS} ]]; then # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as # separator while the lowerdir overlayfs option uses ':' From 5fd8fd71dc88b15cbfa5b7053ac0ec85be610606 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 10:41:55 +0100 Subject: [PATCH 35/45] need to create directory for module file in lower_dirs --- bot/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bot/build.sh b/bot/build.sh index 7b4414983d..d07c5ab641 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -249,6 +249,9 @@ else grep ^REMOVE_MODULE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_modules cat ${determine_outerr}.rm_modules | while read remove_module; do + module_parent_dir=$(dirname ${STORAGE}/lower_dirs/${remove_module}) + mkdir -p ${module_parent_dir} + chmod ug+rw ${module_parent_dir} touch ${STORAGE}/lower_dirs/${remove_module} chmod ug+rw ${STORAGE}/lower_dirs/${remove_module} done From f7baeafec103cbe01c0aadebdec3fe3074178ebc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 10:51:10 +0100 Subject: [PATCH 36/45] add try-amend keeppreviousinstal=True option in easystack --- .../2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml index e4c658784f..cd0ecd1108 100644 --- a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -3,4 +3,6 @@ # EESSI-extend should adjust EASYBUILD_INSTALLPATH and set # EASYBUILD_CUDA_COMPUTE_CAPABILITIES easyconfigs: - - EESSI-extend-2023.06-easybuild.eb + - EESSI-extend-2023.06-easybuild.eb: + options: + try-amend: keeppreviousinstall=True From 3db71ef615064b6a5a23c540d19f9ef9fe9e4dbc Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 11:05:53 +0100 Subject: [PATCH 37/45] need keeppreviousinstall already in load_eessi_... script --- load_eessi_extend_module.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index 9d13f313f3..f24cf98757 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -91,7 +91,7 @@ else eb_install_out=${TMPDIR}/eb_install.out ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" - ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" 2>&1 | tee ${eb_install_out} + ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" --try-amend=keeppreviousinstall=True 2>&1 | tee ${eb_install_out} check_exit_code $? "${ok_msg}" "${fail_msg}" ) From c56712e2d40d95219126de1c06d4bca24e1a6338 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 14:16:47 +0100 Subject: [PATCH 38/45] revert to using standard method (fakeroot) to remove existing installation --- EESSI-determine-rebuilds.sh | 123 ------------------ EESSI-remove-software.sh | 43 ++---- bot/build.sh | 66 +--------- .../20241112-eb-4.9.4-EESSI-extend.yml | 8 +- eessi_container.sh | 19 +-- load_eessi_extend_module.sh | 2 + 6 files changed, 17 insertions(+), 244 deletions(-) delete mode 100755 EESSI-determine-rebuilds.sh diff --git a/EESSI-determine-rebuilds.sh b/EESSI-determine-rebuilds.sh deleted file mode 100755 index 4f4d5ab713..0000000000 --- a/EESSI-determine-rebuilds.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -# -# Script to determine which parts of the EESSI software stack (version set through init/eessi_defaults) -# have to be rebuilt - -# see example parsing of command line arguments at -# https://wiki.bash-hackers.org/scripting/posparams#using_a_while_loop -# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash - -display_help() { - echo "usage: $0 [OPTIONS]" - echo " -g | --generic - instructs script to build for generic architecture target" - echo " -h | --help - display this usage information" -} - -POSITIONAL_ARGS=() - -while [[ $# -gt 0 ]]; do - case $1 in - -g|--generic) - DETECTION_PARAMETERS="--generic" - shift - ;; - -h|--help) - display_help # Call your function - # no shifting needed here, we're done. - exit 0 - ;; - -*|--*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) # No more options - POSITIONAL_ARGS+=("$1") # save positional arg - shift - ;; - esac -done - -set -- "${POSITIONAL_ARGS[@]}" - -TOPDIR=$(dirname $(realpath $0)) - -export TMPDIR=$(mktemp -d /tmp/eessi-remove.XXXXXXXX) - -source $TOPDIR/scripts/utils.sh - -echo ">> Determining software subdirectory to use for current build host..." -if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then - export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS) - echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script" -else - echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" -fi - -echo ">> Setting up environment..." - -source $TOPDIR/init/bash - -if [ -d $EESSI_CVMFS_REPO ]; then - echo_green "$EESSI_CVMFS_REPO available, OK!" -else - fatal_error "$EESSI_CVMFS_REPO is not available!" -fi - -if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then - fatal_error "Failed to determine software subdirectory?!" -elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then - fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!" -else - echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!" -fi - -echo ">> Configuring EasyBuild..." -EB="eb" -source $TOPDIR/configure_easybuild - -echo ">> Setting up \$MODULEPATH..." -# make sure no modules are loaded -module --force purge -# ignore current $MODULEPATH entirely -module unuse $MODULEPATH -module use $EASYBUILD_INSTALLPATH/modules/all -if [[ -z ${MODULEPATH} ]]; then - fatal_error "Failed to set up \$MODULEPATH?!" -else - echo_green ">> MODULEPATH set up: ${MODULEPATH}" -fi - -# assume there's only one diff file that corresponds to the PR patch file -pr_diff=$(ls [0-9]*.diff | head -1) - -# if this script is run as root, use PR patch file to determine if software needs to be removed first -changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") -if [ -z ${changed_easystacks_rebuilds} ]; then - echo "No software needs to be removed." -else - for easystack_file in ${changed_easystacks_rebuilds}; do - # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file - eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g') - - # load EasyBuild module (will be installed if it's not available yet) - source ${TOPDIR}/load_easybuild_module.sh ${eb_version} - - if [ -f ${easystack_file} ]; then - echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." - # we need to remove existing installation directories first, - # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) - # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) - rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - for app in ${rebuild_apps}; do - app_dir=${EASYBUILD_INSTALLPATH}/software/${app} - app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua - echo_yellow "Removing ${app_dir} and ${app_module}..." - find ${app_dir} -type d | sed -e 's/^/REMOVE_DIRECTORY /' - find ${app_dir} -type f | sed -e 's/^/REMOVE_FILE /' - echo "REMOVE_MODULE ${app_module}" - done - else - fatal_error "Easystack file ${easystack_file} not found!" - fi - done -fi diff --git a/EESSI-remove-software.sh b/EESSI-remove-software.sh index 8b882b38a9..1a03a7af98 100755 --- a/EESSI-remove-software.sh +++ b/EESSI-remove-software.sh @@ -101,7 +101,7 @@ fi pr_diff=$(ls [0-9]*.diff | head -1) # if this script is run as root, use PR patch file to determine if software needs to be removed first -if [ $EUID -ne 0 ]; then +if [ $EUID -eq 0 ]; then changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing' | grep "/rebuilds/") if [ -z ${changed_easystacks_rebuilds} ]; then echo "No software needs to be removed." @@ -114,14 +114,11 @@ if [ $EUID -ne 0 ]; then source ${TOPDIR}/load_easybuild_module.sh ${eb_version} if [ -f ${easystack_file} ]; then - echo_green "Software rebuild(s) requested in ${easystack_file}, so" - echo_green " determining which existing installation have to be removed (assuming contents" - echo_green " have been made writable/deletable)..." + echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..." # we need to remove existing installation directories first, # so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R]) # * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion) - # rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') - rebuild_apps=$(eb --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') + rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}') for app in ${rebuild_apps}; do # Returns e.g. /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/modules/all: app_modulepath=$(module --terse av ${app} 2>&1 | head -n 1 | sed 's/://') @@ -129,35 +126,12 @@ if [ $EUID -ne 0 ]; then app_installprefix=$(dirname $(dirname ${app_modulepath})) app_dir=${app_installprefix}/software/${app} app_module=${app_installprefix}/modules/all/${app}.lua - # app_dir=${EASYBUILD_INSTALLPATH}/software/${app} - # app_module=${EASYBUILD_INSTALLPATH}/modules/all/${app}.lua echo_yellow "Removing ${app_dir} and ${app_module}..." - # suggestion: use the recursive rm's and ls a specific - # directory only (${app_dir}/easybuild) - rm -rdfv ${app_dir} - rm -rdfv ${app_module} - echo_yellow "Contents of ${app_dir}/easybuild (should not exist)" - ls -l ${app_dir}/easybuild || true - # ls didn't change the result (permission denied) - # ls ${app_dir}/easybuild || true - # 2nd idea: recreate some directory + rm -rf ${app_dir} + rm -rf ${app_module} + # recreate some directory to work around permission denied + # issues when rebuilding the package mkdir -p ${app_dir}/easybuild - echo_yellow "Contents of ${app_dir}/easybuild after it got recreated with 'mkdir -p' (should be empty)" - ls -l ${app_dir}/easybuild || true - - ## 1st remove files in depth-first order - #for filepath in $(find ${app_dir} -depth -type f); do - # echo " removing file ${filepath}" - # rm -fv ${filepath} - #done - ## 2nd remove directories in depth-first order - #for dirpath in $(find ${app_dir} -depth -type d); do - # echo " removing directory ${dirpath}" - # rmdir -v ${dirpath} - #done - ## 3rd remove module file - #echo " removing module file ${app_module}" - #rm -fv ${app_module} done else fatal_error "Easystack file ${easystack_file} not found!" @@ -165,6 +139,5 @@ if [ $EUID -ne 0 ]; then done fi else - # fatal_error "This script can only be run by root!" - fatal_error "This script must not be run by root!" + fatal_error "This script can only be run by root!" fi diff --git a/bot/build.sh b/bot/build.sh index d07c5ab641..81b3ef4660 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -200,62 +200,6 @@ changed_easystacks_rebuilds=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed if [[ -z "${changed_easystacks_rebuilds}" ]]; then echo "This PR does not add any easystack files in a rebuilds subdirectory, so let's skip the removal step." else - # determine which software packages (and modules) have to be removed - TARBALL_TMP_DETERMINE_STEP_DIR=${PREVIOUS_TMP_DIR}/determine_step - mkdir -p ${TARBALL_TMP_DETERMINE_STEP_DIR} - - # prepare arguments to eessi_container.sh specific to determine step - declare -a DETERMINE_STEP_ARGS=() - DETERMINE_STEP_ARGS+=("--save" "${TARBALL_TMP_DETERMINE_STEP_DIR}") - DETERMINE_STEP_ARGS+=("--storage" "${STORAGE}") - - # create tmp file for output of determine step - determine_outerr=$(mktemp determine.outerr.XXXX) - - echo "Executing command to determine software to be removed:" - echo "${software_layer_dir}/eessi_container.sh ${COMMON_ARGS[@]} ${DETERMINE_STEP_ARGS[@]}" - echo " -- ${software_layer_dir}/EESSI-determine-rebuilds.sh \"${DETERMINE_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${determine_outerr}" - ${software_layer_dir}/eessi_container.sh "${COMMON_ARGS[@]}" "${DETERMINE_STEP_ARGS[@]}" \ - -- ${software_layer_dir}/EESSI-determine-rebuilds.sh "${DETERMINE_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${determine_outerr} - - # process output file - # for each line containing 'REMOVE_DIRECTORY some_path' - # create a new directory ${STORAGE}/lower_dirs/some_path_stripped - # where the prefix /cvmfs/repo_name is removed from some_path - # set permission of the directory to ug+rwx - # SKIP for each line containing 'REMOVE_FILE some_file_path' - # SKIP touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped - # SKIP where the prefix /cvmfs/repo_name is removed from some_file_path - # SKIP set permission of the file to ug+rw - # for each line containing 'REMOVE_MODULE some_file_path' - # touch a new file ${STORAGE}/lower_dirs/some_file_path_stripped - # where the prefix /cvmfs/repo_name is removed from some_file_path - # set permission of the file to ug+rw - - LOWER_DIRS="${STORAGE}/lower_dirs" - mkdir -p "${LOWER_DIRS}" - - grep ^REMOVE_DIRECTORY ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_dirs - cat ${determine_outerr}.rm_dirs | while read remove_dir; do - mkdir -p ${STORAGE}/lower_dirs/${remove_dir} - chmod ug+rwx ${STORAGE}/lower_dirs/${remove_dir} - done - - # grep ^REMOVE_FILE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_files - # cat ${determine_outerr}.rm_files | while read remove_file; do - # touch ${STORAGE}/lower_dirs/${remove_file} - # chmod ug+rw ${STORAGE}/lower_dirs/${remove_file} - # done - - grep ^REMOVE_MODULE ${determine_outerr} | cut -f4- -d'/' > ${determine_outerr}.rm_modules - cat ${determine_outerr}.rm_modules | while read remove_module; do - module_parent_dir=$(dirname ${STORAGE}/lower_dirs/${remove_module}) - mkdir -p ${module_parent_dir} - chmod ug+rw ${module_parent_dir} - touch ${STORAGE}/lower_dirs/${remove_module} - chmod ug+rw ${STORAGE}/lower_dirs/${remove_module} - done - # prepare directory to store tarball of tmp for removal and build steps TARBALL_TMP_REMOVAL_STEP_DIR=${PREVIOUS_TMP_DIR}/removal_step mkdir -p ${TARBALL_TMP_REMOVAL_STEP_DIR} @@ -267,11 +211,7 @@ else # add fakeroot option in order to be able to remove software, see: # https://github.com/EESSI/software-layer/issues/312 - # REMOVAL_STEP_ARGS+=("--fakeroot") - - if [[ ! -z ${LOWER_DIRS} ]]; then - REMOVAL_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") - fi + REMOVAL_STEP_ARGS+=("--fakeroot") # create tmp file for output of removal step removal_outerr=$(mktemp remove.outerr.XXXX) @@ -309,10 +249,6 @@ if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi -if [[ ! -z ${LOWER_DIRS} ]]; then - BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") -fi - # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml index cd0ecd1108..2129eebee0 100644 --- a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -3,6 +3,8 @@ # EESSI-extend should adjust EASYBUILD_INSTALLPATH and set # EASYBUILD_CUDA_COMPUTE_CAPABILITIES easyconfigs: - - EESSI-extend-2023.06-easybuild.eb: - options: - try-amend: keeppreviousinstall=True + - EESSI-extend-2023.06-easybuild.eb +# the options are added to load_eessi_extend_module.sh +# - EESSI-extend-2023.06-easybuild.eb: +# options: +# try-amend: keeppreviousinstall=True diff --git a/eessi_container.sh b/eessi_container.sh index 73244778cf..fc97f9877c 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -89,11 +89,6 @@ display_help() { echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," echo " MODE==install for a CUDA installation, MODE==run to" echo " attach a GPU, MODE==all for both [default: false]" - echo " -o | --lower-dirs DIRS - list of ':' separated directories that are used" - echo " in front of the default lower dir (CVMFS repo);" - echo " fuse-overlayfs will merge all lower directories;" - echo " the option can be used to make certain directories" - echo " in the CVMFS repo writable [default: none]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use; can be given multiple times;" echo " CFG may include a suffix ',access={ro,rw}' to" @@ -130,7 +125,6 @@ FAKEROOT=0 VERBOSE=0 STORAGE= LIST_REPOS=0 -LOWER_DIRS= MODE="shell" SETUP_NVIDIA=0 REPOSITORIES=() @@ -188,10 +182,6 @@ while [[ $# -gt 0 ]]; do NVIDIA_MODE="$2" shift 2 ;; - -o|--lower-dirs) - LOWER_DIRS="$2" - shift 2 - ;; -r|--repository) REPOSITORIES+=("$2") shift 2 @@ -799,14 +789,7 @@ do EESSI_FUSE_MOUNTS+=("--fusemount" "${EESSI_READONLY}") EESSI_WRITABLE_OVERLAY="container:fuse-overlayfs" - lowerdirs=/cvmfs_ro/${cvmfs_repo_name} - if [[ ! -z ${LOWER_DIRS} ]]; then - # need to convert ':' in LOWER_DIRS to ',' because bind mounts use ',' as - # separator while the lowerdir overlayfs option uses ':' - export BIND_PATHS="${BIND_PATHS},${LOWER_DIRS/:/,}" - lowerdirs=${LOWER_DIRS}:${lowerdirs} - fi - EESSI_WRITABLE_OVERLAY+=" -o lowerdir=${lowerdirs}" + EESSI_WRITABLE_OVERLAY+=" -o lowerdir=/cvmfs_ro/${cvmfs_repo_name}" EESSI_WRITABLE_OVERLAY+=" -o upperdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-upper" EESSI_WRITABLE_OVERLAY+=" -o workdir=${TMP_IN_CONTAINER}/${cvmfs_repo_name}/overlay-work" EESSI_WRITABLE_OVERLAY+=" /cvmfs/${cvmfs_repo_name}" diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index f24cf98757..d3ba524053 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -91,6 +91,8 @@ else eb_install_out=${TMPDIR}/eb_install.out ok_msg="EESSI-extend/${EESSI_EXTEND_VERSION} installed, let's go!" fail_msg="Installing EESSI-extend/${EESSI_EXTEND_VERSION} failed, that's not good... (output: ${eb_install_out})" + # while always adding --try-amend=keep... may do no harm, we could make + # an attempt to figure out if it is needed, e.g., when we are rebuilding ${EB} "EESSI-extend-${EESSI_EXTEND_VERSION}.eb" --try-amend=keeppreviousinstall=True 2>&1 | tee ${eb_install_out} check_exit_code $? "${ok_msg}" "${fail_msg}" ) From d1264e11e6dac91e3e87ba0b35a658426fa9e3c0 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 15 Nov 2024 19:55:53 +0100 Subject: [PATCH 39/45] first load/install EESSI-extend module, then install CUDA and libraries --- EESSI-install-software.sh | 40 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 50eacc52bf..bb103d8c21 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -229,6 +229,27 @@ if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} fi +echo ">> Configuring EasyBuild..." + +# Make sure that we use the EESSI_CVMFS_INSTALL +# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded +# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL +unset EESSI_USER_INSTALL +unset EESSI_PROJECT_INSTALL +unset EESSI_SITE_INSTALL +export EESSI_CVMFS_INSTALL=1 +module unload EESSI-extend + +# The EESSI-extend module is being loaded (or installed if it doesn't exist yet). +# The script requires the EESSI_VERSION given as argument, a couple of +# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the +# function check_exit_code defined. +# NOTE, the script exits if those variables/functions are undefined. +export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} +# Loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH, +# e.g., to point to the installation directory for accelerators. +source load_eessi_extend_module.sh ${EESSI_VERSION} + # Install full CUDA SDK and cu* libraries in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install @@ -267,25 +288,6 @@ if command_exists "nvidia-smi"; then fi -echo ">> Configuring EasyBuild..." - -# Make sure that we use the EESSI_CVMFS_INSTALL -# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded -# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL -unset EESSI_USER_INSTALL -unset EESSI_PROJECT_INSTALL -unset EESSI_SITE_INSTALL -export EESSI_CVMFS_INSTALL=1 -module unload EESSI-extend - -# The EESSI-extend module is being loaded (or installed if it doesn't exist yet). -# The script requires the EESSI_VERSION given as argument, a couple of -# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the -# function check_exit_code defined. -# NOTE, the script exits if those variables/functions are undefined. -export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} -source load_eessi_extend_module.sh ${EESSI_VERSION} - if [ ! -z "${shared_fs_path}" ]; then shared_eb_sourcepath=${shared_fs_path}/easybuild/sources echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path" From 66c36c898a208fcdce1258f7bde53f08a5fa28f6 Mon Sep 17 00:00:00 2001 From: Richard Top Date: Mon, 18 Nov 2024 12:59:16 +0000 Subject: [PATCH 40/45] clean `${tmpdir}` content for each iteration in `install_cuda_and_libraries.sh` --- scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index e1ca42fc9a..ee219fb444 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -239,9 +239,9 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do echo_green "all installations at ${EESSI_SITE_SOFTWARE_PATH}/software/... succeeded!" fi + # clean up tmpdir content + rm -rf "${tmpdir}"/* + # Restore MODULEPATH for next loop iteration MODULEPATH=${SAVE_MODULEPATH} done - -# clean up tmpdir -rm -rf "${tmpdir}" From fedfd77582a42fa81d60754bd4054e5f4a8a14b9 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 18 Nov 2024 20:05:59 +0100 Subject: [PATCH 41/45] tweak script to just load EESSI-extend, and exit if it couldn't be loaded --- .../nvidia/install_cuda_and_libraries.sh | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index 51d139bec5..87b7653749 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -112,14 +112,19 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do unset EESSI_USER_INSTALL export EESSI_SITE_INSTALL=1 module unload EESSI-extend - - # The EESSI-extend module is being loaded (or installed if it doesn't exist yet). - # The script requires the EESSI_VERSION given as argument, a couple of - # environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the - # function check_exit_code defined. - # NOTE, the script exits if those variables/functions are undefined. - export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} - source load_eessi_extend_module.sh ${EESSI_VERSION} + ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out + # need to use --ignore_cache to avoid the case that the module was removed (to be + # rebuilt) but it is still in the cache and the rebuild failed + EESSI_EXTEND_VERSION=${EESSI_VERSION}-easybuild + module --ignore_cache avail 2>&1 | grep -i EESSI-extend/${EESSI_EXTEND_VERSION} &> ${ml_av_eessi_extend_out} + if [[ $? -eq 0 ]]; then + echo_green ">> Module for EESSI-extend/${EESSI_EXTEND_VERSION} found!" + else + error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n" + fatal_error "${error}" + fi + module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} + unset EESSI_EXTEND_VERSION # Install modules in hidden .modules dir to keep track of what was installed before # (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild From 635fe8894523376d00666420b659e6d08b37ef25 Mon Sep 17 00:00:00 2001 From: ocaisa Date: Tue, 19 Nov 2024 10:06:07 +0100 Subject: [PATCH 42/45] Remove tempdir after running `install_cuda_and_libraries.sh` --- scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index ee219fb444..119316dab2 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -245,3 +245,5 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do # Restore MODULEPATH for next loop iteration MODULEPATH=${SAVE_MODULEPATH} done +# Remove the temporary directory +rm -rf "${tmpdir}" From 83d501c8724e0877298a1ee49302864429ac7a46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 19 Nov 2024 16:01:25 +0100 Subject: [PATCH 43/45] add GDRCopy-2.4-GCCcore-13.2.0.eb --- .../software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml index 03c9ec8f98..d9b7dca5d5 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml @@ -12,3 +12,4 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21783 from-commit: 5fa3db9eb36f91cba3fbf351549f8ba2849abc33 + - GDRCopy-2.4-GCCcore-13.2.0.eb From 8e87c33bd7d7015877daae975f94404e025cfb7a Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 20 Nov 2024 21:51:40 +0100 Subject: [PATCH 44/45] various changes to address suggestions - EESSI-install-software.sh - rearranged setting up build environment and improved comments to make choices more clear - moved test if EasyBuild exists into install_cuda_and_libraries.sh, removed unnecessary export of environment variables and clarified comments accordingly - .../rebuilds/20241112-eb-4.9.4-EESSI-extend.yml - removed outdated comments - load_eessi_extend_module.sh - replaced sourcing configure_easybuild with two needed EASYBUILD_ environment settings - install_cuda_and_libraries.sh - removed comment and setting of EESSI_SITE_INSTALL - clarified comment when saving MODULEPATH - added a check if the required EasyBuild version exists and only if so use it to process the easystack file that required it --- EESSI-install-software.sh | 39 +++++++------------ .../20241112-eb-4.9.4-EESSI-extend.yml | 4 -- load_eessi_extend_module.sh | 4 +- .../nvidia/install_cuda_and_libraries.sh | 16 +++++--- 4 files changed, 25 insertions(+), 38 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index bb103d8c21..83c06c2184 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -231,54 +231,41 @@ fi echo ">> Configuring EasyBuild..." -# Make sure that we use the EESSI_CVMFS_INSTALL -# Since the path is set when loading EESSI-extend, we reload it to make sure it works - even if it is already loaded -# Note we need to do this after running install_cuda_and_libraries, since that does installations in the EESSI_SITE_INSTALL +# Make sure EESSI-extend is not loaded, and configure location variables for a +# CVMFS installation +module unload EESSI-extend unset EESSI_USER_INSTALL unset EESSI_PROJECT_INSTALL unset EESSI_SITE_INSTALL export EESSI_CVMFS_INSTALL=1 -module unload EESSI-extend -# The EESSI-extend module is being loaded (or installed if it doesn't exist yet). +# We now run 'source load_eessi_extend_module.sh' to load or install and load the +# EESSI-extend module which sets up all build environment settings. # The script requires the EESSI_VERSION given as argument, a couple of -# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the -# function check_exit_code defined. -# NOTE, the script exits if those variables/functions are undefined. +# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the +# function check_exit_code defined. +# NOTE 1, the script exits if those variables/functions are undefined. +# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH, +# e.g., to point to the installation directory for accelerators. +# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the +# EESSI-extend module itself needs to be installed. export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} -# Loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH, -# e.g., to point to the installation directory for accelerators. source load_eessi_extend_module.sh ${EESSI_VERSION} # Install full CUDA SDK and cu* libraries in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments -# The install_cuda... script uses EasyBuild. So, we need to check if we have EB -# or skip this step. echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary" -module_avail_out=$TMPDIR/ml.out -module avail 2>&1 | grep EasyBuild &> ${module_avail_out} -if [[ $? -eq 0 ]]; then - echo_green ">> Found an EasyBuild module" -else - echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})" - export skip_cuda_install=True -fi - temp_install_storage=${TMPDIR}/temp_install_storage mkdir -p ${temp_install_storage} if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - # need to ensure that some variables will be available to the script - # TMPDIR, EB, EESSI_VERSION, for EASYBUILD_INSTALLPATH (EESSI_PREFIX, - # EESSI_OS_TYPE, EESSI_SOFTWARE_SUBDIR_OVERRIDE) - export TMPDIR EB EESSI_VERSION EESSI_PREFIX EESSI_OS_TYPE EESSI_SOFTWARE_SUBDIR_OVERRIDE ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ -t ${temp_install_storage} \ --accept-cuda-eula \ --accept-cudnn-eula else - echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found" + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" fi # Install NVIDIA drivers in host_injections (if they exist) diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml index 2129eebee0..e4c658784f 100644 --- a/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241112-eb-4.9.4-EESSI-extend.yml @@ -4,7 +4,3 @@ # EASYBUILD_CUDA_COMPUTE_CAPABILITIES easyconfigs: - EESSI-extend-2023.06-easybuild.eb -# the options are added to load_eessi_extend_module.sh -# - EESSI-extend-2023.06-easybuild.eb: -# options: -# try-amend: keeppreviousinstall=True diff --git a/load_eessi_extend_module.sh b/load_eessi_extend_module.sh index d3ba524053..62b6e3f3ae 100755 --- a/load_eessi_extend_module.sh +++ b/load_eessi_extend_module.sh @@ -82,8 +82,8 @@ else # source configure_easybuild to use correct eb settings ( - EESSI_MAIN_DIR=$(dirname $(readlink -f $BASH_SOURCE)) - source ${EESSI_MAIN_DIR}/configure_easybuild + export EASYBUILD_PREFIX=${TMPDIR}/easybuild + export EASYBUILD_READ_ONLY_INSTALLDIR=1 echo ">> Final installation in ${EASYBUILD_INSTALLPATH}..." export PATH=${EB_TMPDIR}/bin:${PATH} diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index f7405e3ca4..cd4d1daf38 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -77,10 +77,6 @@ done # Make sure EESSI is initialised check_eessi_initialised -# Make sure that `EESSI-extend` will install in the site installation path EESSI_SITE_SOFTWARE_PATH -export EESSI_SITE_INSTALL=1 -echo "EESSI_SITE_SOFTWARE_PATH=${EESSI_SITE_SOFTWARE_PATH}" - # we need a directory we can use for temporary storage if [[ -z "${TEMP_DIR}" ]]; then tmpdir=$(mktemp -d) @@ -93,7 +89,7 @@ else fi echo "Created temporary directory '${tmpdir}'" -# use EESSI_SITE_SOFTWARE_PATH/.modules/all as MODULEPATH +# Store MODULEPATH so it can be restored at the end of each loop iteration SAVE_MODULEPATH=${MODULEPATH} for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do @@ -103,8 +99,16 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g') # Load EasyBuild version for this easystack file _before_ loading EESSI-extend - module avail EasyBuild + module_avail_out=${tmpdir}/ml.out + module avail 2>&1 | grep EasyBuild/${eb_version} &> ${module_avail_out} + if [[ $? -eq 0 ]]; then + echo_green ">> Found an EasyBuild/${eb_version} module" + else + echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${easystack_file} (see output in ${module_avail_out})" + continue + fi module load EasyBuild/${eb_version} + # Make sure EESSI-extend does a site install here # We need to reload it with the current environment variables set unset EESSI_CVMFS_INSTALL From 9812aa5cb5b6c168c03725fcb18d8b73257a0c68 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Thu, 21 Nov 2024 08:14:41 +0100 Subject: [PATCH 45/45] catch FATAL: messages when checking build result --- bot/check-build.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bot/check-build.sh b/bot/check-build.sh index f185b18dda..41aeab258e 100755 --- a/bot/check-build.sh +++ b/bot/check-build.sh @@ -17,6 +17,7 @@ # - SUCCESS (all of) # - working directory contains slurm-JOBID.out file # - working directory contains eessi*tar.gz +# - no message FATAL # - no message ERROR # - no message FAILED # - no message ' required modules missing:' @@ -25,6 +26,7 @@ # - FAILED (one of ... implemented as NOT SUCCESS) # - no slurm-JOBID.out file # - no tarball +# - message with FATAL # - message with ERROR # - message with FAILED # - message with ' required modules missing:' @@ -105,6 +107,16 @@ else [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi +FATAL=-1 +if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then + GP_fatal='FATAL: ' + grep_out=$(grep -v "^>> searching for " ${job_dir}/${job_out} | grep "${GP_fatal}") + [[ $? -eq 0 ]] && FATAL=1 || FATAL=0 + # have to be careful to not add searched for pattern into slurm out file + [[ ${VERBOSE} -ne 0 ]] && echo ">> searching for '"${GP_fatal}"'" + [[ ${VERBOSE} -ne 0 ]] && echo "${grep_out}" +fi + ERROR=-1 if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]]; then GP_error='ERROR: ' @@ -163,6 +175,7 @@ fi [[ ${VERBOSE} -ne 0 ]] && echo "SUMMARY: ${job_dir}/${job_out}" [[ ${VERBOSE} -ne 0 ]] && echo " : ()" +[[ ${VERBOSE} -ne 0 ]] && echo " FATAL......: $([[ $FATAL -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " ERROR......: $([[ $ERROR -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " FAILED.....: $([[ $FAILED -eq 1 ]] && echo 'yes' || echo 'no') (no)" [[ ${VERBOSE} -ne 0 ]] && echo " REQ_MISSING: $([[ $MISSING -eq 1 ]] && echo 'yes' || echo 'no') (no)" @@ -190,6 +203,7 @@ job_result_file=_bot_job${SLURM_JOB_ID}.result # Default reason: if [[ ${SLURM_OUTPUT_FOUND} -eq 1 ]] && \ + [[ ${FATAL} -eq 0 ]] && \ [[ ${ERROR} -eq 0 ]] && \ [[ ${FAILED} -eq 0 ]] && \ [[ ${MISSING} -eq 0 ]] && \ @@ -223,6 +237,7 @@ fi #
_Details_
#
# :white_check_mark: job output file slurm-4682.out
+# :white_check_mark: no message matching FATAL:
# :white_check_mark: no message matching ERROR:
# :white_check_mark: no message matching FAILED:
# :white_check_mark: no message matching required modules missing:
@@ -264,6 +279,7 @@ fi #
_Details_
#
# :white_check_mark: job output file slurm-4682.out
+# :x: no message matching FATAL:
# :x: no message matching ERROR:
# :white_check_mark: no message matching FAILED:
# :x: no message matching required modules missing:
@@ -381,6 +397,10 @@ success_msg="job output file ${job_out}" failure_msg="no job output file ${job_out}" comment_details_list=${comment_details_list}$(add_detail ${SLURM_OUTPUT_FOUND} 1 "${success_msg}" "${failure_msg}") +success_msg="no message matching ${GP_fatal}" +failure_msg="found message matching ${GP_fatal}" +comment_details_list=${comment_details_list}$(add_detail ${FATAL} 0 "${success_msg}" "${failure_msg}") + success_msg="no message matching ${GP_error}" failure_msg="found message matching ${GP_error}" comment_details_list=${comment_details_list}$(add_detail ${ERROR} 0 "${success_msg}" "${failure_msg}")