Merge branch 'main' into smg/detect-and-track-integration-test

SainsburyWellcomeCentre · Dec 9, 2024 · ad14219 · ad14219
2 parents 32952e4 + ba60fa9
commit ad14219
Show file tree

Hide file tree

Showing 14 changed files with 1,124 additions and 90 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -6,6 +6,7 @@ recursive-include guides *.md
 recursive-include crabs/tracker *.md
 recursive-include bash_scripts *.sh
 recursive-include notebooks *.py
+recursive-include notebooks *.ipynb
 recursive-include scripts *.py
 recursive-include crabs *.yaml
 recursive-include guides *.png

diff --git a/README.md b/README.md
@@ -138,7 +138,7 @@ To track crabs in a new video, using a trained detector and a tracker, run the f
 detect-and-track-video --trained_model_path <path-to-ckpt-file> --video_path <path-to-input-video>
 ```
 
-This will produce a `tracking_output_<timestamp>` directory with the output from tracking under the current working directory.
+This will produce a `tracking_output_<timestamp>` directory with the output from tracking under the current working directory. To avoid adding the `<timestamp>` suffix to the directory name, run the command with the `--output_dir_no_timestamp` flag. To see the full list of possible arguments to the `detect-and-track-video` command, run it with the `--help` flag.
 
 The tracking output consists of:
 - a .csv file named `<video-name>_tracks.csv`, with the tracked bounding boxes data;
@@ -153,8 +153,6 @@ If a file with ground-truth annotations is passed to the command (with the `--an
 
 <!-- When used in combination with the `--save_video` flag, the tracked video will contain predicted bounding boxes in red, and ground-truth bounding boxes in green. -- PR 216-->
 
-To see the full list of possible arguments to the `evaluate-detector` command, run it with the `--help` flag.
-
 ## Task-specific guides
 For further information on specific tasks, such as launching a training job or evaluating a set of models in the HPC cluster, please see [our guides](guides).
 

diff --git a/bash_scripts/run_detect_and_track_array.sh b/bash_scripts/run_detect_and_track_array.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+#SBATCH -p gpu # # partition
+#SBATCH --gres=gpu:1 # For any GPU: --gres=gpu:1. For a specific one: --gres=gpu:rtx5000
+#SBATCH -N 1   # number of nodes
+#SBATCH --ntasks-per-node 8 # 2 # max number of tasks per node
+#SBATCH --mem 32G # memory pool for all cores
+#SBATCH -t 3-00:00 # time (D-HH:MM)
+#SBATCH -o slurm_array.%A-%a.%N.out
+#SBATCH -e slurm_array.%A-%a.%N.err
+#SBATCH --mail-type=ALL
+#SBATCH [email protected]
+#SBATCH --array=0-233%25
+
+
+# NOTE on SBATCH command for array jobs
+# with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time.
+# the number of array jobs should match the number of input files
+
+
+# memory
+# see https://pytorch.org/docs/stable/notes/cuda.html#environment-variables
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+
+# -----------------------------
+# Error settings for bash
+# -----------------------------
+# see https://wizardzines.com/comics/bash-errors/
+set -e  # do not continue after errors
+set -u  # throw error if variable is unset
+set -o pipefail  # make the pipe fail if any part of it fails
+
+# ---------------------
+# Define variables
+# ----------------------
+
+# Path to the trained model
+CKPT_PATH="/ceph/zoo/users/sminano/ml-runs-all/ml-runs/317777717624044570/40b1688a76d94bd08175cb380d0a6e0e/checkpoints/last.ckpt"
+
+# Path to the tracking config file
+TRACKING_CONFIG_FILE="/ceph/zoo/users/sminano/cluster_tracking_config.yaml"
+
+# List of videos to run inference on: define VIDEOS_DIR and VIDEO_FILENAME
+# NOTE: if any of the paths have spaces, put the path in quotes, but stopping and re-starting at the wildcard.
+# e.g.: "/ceph/zoo/users/sminano/ml-runs-all/ml-runs-scratch/763954951706829194/"*"/checkpoints"
+# e.g.: "checkpoint-epoch="*".ckpt"
+# List of videos to run inference on
+VIDEOS_DIR="/ceph/zoo/users/sminano/escape_clips_all"
+VIDEO_FILENAME=*.mov
+mapfile -t LIST_VIDEOS < <(find $VIDEOS_DIR -type f -name $VIDEO_FILENAME)
+
+
+# Set output directory name
+# by default under current working directory
+OUTPUT_DIR_NAME="tracking_output_slurm_$SLURM_ARRAY_JOB_ID"
+
+# Select optional output
+SAVE_VIDEO=true
+SAVE_FRAMES=false
+
+
+# version of the codebase
+GIT_BRANCH=main
+
+# --------------------
+# Check inputs
+# --------------------
+# Check len(list of input data) matches max SLURM_ARRAY_TASK_COUNT
+# if not, exit
+if [[ $SLURM_ARRAY_TASK_COUNT -ne ${#LIST_VIDEOS[@]} ]]; then
+    echo "The number of array tasks does not match the number of input videos"
+    exit 1
+fi
+
+# -----------------------------
+# Create virtual environment
+# -----------------------------
+module load miniconda
+
+# Define a environment for each job in the
+# temporary directory of the compute node
+ENV_NAME=crabs-dev-$SLURM_ARRAY_JOB_ID-$SLURM_ARRAY_TASK_ID
+ENV_PREFIX=$TMPDIR/$ENV_NAME
+
+# create environment
+conda create \
+    --prefix $ENV_PREFIX \
+    -y \
+    python=3.10
+
+# activate environment
+source activate $ENV_PREFIX
+
+# install crabs package in virtual env
+python -m pip install git+https://github.com/SainsburyWellcomeCentre/crabs-exploration.git@$GIT_BRANCH
+
+# log pip and python locations
+echo $ENV_PREFIX
+which python
+which pip
+
+# print the version of crabs package (last number is the commit hash)
+echo "Git branch: $GIT_BRANCH"
+conda list crabs
+echo "-----"
+
+# ------------------------------------
+# GPU specs
+# ------------------------------------
+echo "Memory used per GPU before training"
+echo $(nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used --format=csv) #noheader
+echo "-----"
+
+
+# -------------------------
+# Run evaluation script
+# -------------------------
+# video used in this job
+INPUT_VIDEO=${LIST_VIDEOS[${SLURM_ARRAY_TASK_ID}]}
+
+echo "Running inference on $INPUT_VIDEO using trained model at $CKPT_PATH"
+
+# Set flags based on boolean variables
+if [ "$SAVE_FRAMES" = "true" ]; then
+    SAVE_FRAMES_FLAG="--save_frames"
+else
+    SAVE_FRAMES_FLAG=""
+fi
+
+if [ "$SAVE_VIDEO" = "true" ]; then
+    SAVE_VIDEO_FLAG="--save_video"
+else
+    SAVE_VIDEO_FLAG=""
+fi
+
+# run detect-and-track command
+# - to save all results from the array job in the same output directory
+#   we use --output_dir_no_timestamp
+# - the output directory is created under SLURM_SUBMIT_DIR by default
+detect-and-track-video  \
+    --trained_model_path $CKPT_PATH  \
+    --video_path $INPUT_VIDEO  \
+    --config_file $TRACKING_CONFIG_FILE  \
+    --output_dir $OUTPUT_DIR_NAME  \
+    --output_dir_no_timestamp  \
+    --accelerator gpu  \
+    $SAVE_FRAMES_FLAG  \
+    $SAVE_VIDEO_FLAG
+
+
+
+# copy tracking config to output directory
+shopt -s extglob  # Enable extended globbing
+
+# get tracking config filename without extension
+INPUT_VIDEO_NO_EXT="${INPUT_VIDEO##*/}"
+INPUT_VIDEO_NO_EXT="${INPUT_VIDEO_NO_EXT%.*}"
+
+cp "$TRACKING_CONFIG_FILE" "$SLURM_SUBMIT_DIR"/"$OUTPUT_DIR_NAME"/"$INPUT_VIDEO_NO_EXT"_config.yaml
+
+
+echo "Copied $TRACKING_CONFIG_FILE to $OUTPUT_DIR_NAME"
+
+
+# -----------------------------
+# Delete virtual environment
+# ----------------------------
+conda deactivate
+conda remove \
+    --prefix $ENV_PREFIX \
+    --all \
+    -y
diff --git a/bash_scripts/run_evaluation_array.sh b/bash_scripts/run_evaluation_array.sh
@@ -17,12 +17,6 @@
 # with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time.
 # the number of array jobs should match the number of input files
 
-# ---------------------
-# Source bashrc
-# ----------------------
-# Otherwise `which python` points to the miniconda module's Python
-# source ~/.bashrc
-
 
 # memory
 # see https://pytorch.org/docs/stable/notes/cuda.html#environment-variables
@@ -144,3 +138,13 @@ evaluate-detector  \
  --mlflow_folder $MLFLOW_FOLDER \
  $USE_TEST_SET_FLAG
 echo "-----"
+
+
+# -----------------------------
+# Delete virtual environment
+# ----------------------------
+conda deactivate
+conda remove \
+    --prefix $ENV_PREFIX \
+    --all \
+    -y
diff --git a/bash_scripts/run_training_array.sh b/bash_scripts/run_training_array.sh
@@ -17,12 +17,6 @@
 # with "SBATCH --array=0-n%m" ---> runs n separate jobs, but not more than m at a time.
 # the number of array jobs should match the number of input files
 
-# ---------------------
-# Source bashrc
-# ----------------------
-# Otherwise `which python` points to the miniconda module's Python
-# source ~/.bashrc
-
 
 # memory
 # see https://pytorch.org/docs/stable/notes/cuda.html#environment-variables
@@ -115,3 +109,12 @@ train-detector  \
  --experiment_name $EXPERIMENT_NAME \
  --seed_n $SPLIT_SEED \
  --mlflow_folder $MLFLOW_FOLDER \
+
+# -----------------------------
+# Delete virtual environment
+# ----------------------------
+conda deactivate
+conda remove \
+    --prefix $ENV_PREFIX \
+    --all \
+    -y
diff --git a/bash_scripts/run_training_single.sh b/bash_scripts/run_training_single.sh
@@ -12,13 +12,6 @@
 #SBATCH [email protected]
 
 
-# ---------------------
-# Source bashrc
-# ----------------------
-# Otherwise `which python` points to the miniconda module's Python
-# source ~/.bashrc
-
-
 # memory
 # see https://pytorch.org/docs/stable/notes/cuda.html#environment-variables
 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
@@ -101,3 +94,12 @@ train-detector  \
  --experiment_name $EXPERIMENT_NAME \
  --seed_n $SPLIT_SEED \
  --mlflow_folder $MLFLOW_FOLDER \
+
+# -----------------------------
+# Delete virtual environment
+# ----------------------------
+conda deactivate
+conda remove \
+    --prefix $ENV_PREFIX \
+    --all \
+    -y
diff --git a/crabs/tracker/track_video.py b/crabs/tracker/track_video.py
@@ -69,7 +69,7 @@ def __init__(self, args: argparse.Namespace) -> None:
 
         # input video data
         self.input_video_path = args.video_path
-        self.input_video_file_root = f"{Path(self.input_video_path).stem}"
+        self.input_video_file_root = Path(self.input_video_path).stem
 
         # tracking output directory root name
         self.tracking_output_dir_root = args.output_dir
@@ -92,15 +92,19 @@ def prep_outputs(self):
 
         This method:
         - creates a timestamped directory to store the tracking output.
+          Optionally the timestamp can be omitted.
         - sets the name of the output csv file for the tracked bounding boxes.
         - sets up the output video path if required.
         - sets up the frames subdirectory path if required.
         """
         # Create output directory
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        self.tracking_output_dir = Path(
-            self.tracking_output_dir_root + f"_{timestamp}"
-        )
+        if self.args.output_dir_no_timestamp:
+            self.tracking_output_dir = Path(self.tracking_output_dir_root)
+        else:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            self.tracking_output_dir = Path(
+                self.tracking_output_dir_root + f"_{timestamp}"
+            )
         self.tracking_output_dir.mkdir(parents=True, exist_ok=True)
 
         # Set name of output csv file
@@ -366,7 +370,9 @@ def tracking_parse_args(args):
         default="tracking_output",
         help=(
             "Root name of the directory to save the tracking output. "
-            "The name of the output directory is appended with a timestamp. "
+            "By default, the name of the output directory is appended with "
+            "a timestamp. The timestamp can be omitted with the "
+            "--output_dir_no_timestamp flag. "
             "The tracking output consist of a .csv. file named "
             "<video-name>_tracks.csv with the tracked bounding boxes. "
             "Optionally, it can include a video file named "
@@ -375,6 +381,14 @@ def tracking_parse_args(args):
             "Default: tracking_output_<timestamp>. "
         ),
     )
+    parser.add_argument(
+        "--output_dir_no_timestamp",
+        action="store_true",
+        help=(
+            "Flag to disable appending a timestamp to the output "
+            "directory name. "
+        ),
+    )
     parser.add_argument(
         "--save_video",
         action="store_true",