Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring tsvtool #338

Merged
merged 49 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
e8f725f
change diagnosis into group and subgroup
camillebrianceau Aug 4, 2022
a7c8b21
change diagnosis into group and subgroup
camillebrianceau Aug 4, 2022
4f9c5fa
change diagnosis into group and subgroup
camillebrianceau Aug 4, 2022
4f8952a
change diagnosis into group and subgroup
camillebrianceau Aug 4, 2022
32a0b1f
Update tsvtools tests
camillebrianceau Aug 4, 2022
99b3799
Update tsvtools tests
camillebrianceau Aug 4, 2022
0639387
Merge pull request #1 from aramis-lab/dev
camillebrianceau Aug 9, 2022
d5abf01
simplify kfold and split
camillebrianceau Aug 12, 2022
1eedee3
Merge branch 'cb_tsvtools_refactoring' of github.com:camillebrianceau…
camillebrianceau Aug 12, 2022
d17ef3d
update doc
camillebrianceau Aug 12, 2022
0bc462a
Add docs and add some changes
camillebrianceau Aug 23, 2022
d0515a3
Update tests
camillebrianceau Aug 23, 2022
bfc2cc1
update tests
camillebrianceau Aug 24, 2022
2a6880b
update getlabels
camillebrianceau Aug 25, 2022
f69bfea
Update tsvtools
camillebrianceau Aug 30, 2022
09ceb5e
update docs
camillebrianceau Aug 30, 2022
dfe2813
update jenkins
camillebrianceau Sep 1, 2022
9f5afc3
update tests
camillebrianceau Sep 1, 2022
6c038f4
Update test tsvtools
camillebrianceau Sep 6, 2022
e872228
test
camillebrianceau Sep 6, 2022
f2b24ee
update Jenkins
camillebrianceau Sep 6, 2022
72de85f
update
camillebrianceau Sep 12, 2022
a225c14
update
camillebrianceau Sep 12, 2022
e1cd587
update docs
camillebrianceau Sep 12, 2022
7348a58
update docs
camillebrianceau Sep 12, 2022
2477722
update kfold
camillebrianceau Sep 12, 2022
4d2e204
add get_metadata function
camillebrianceau Sep 13, 2022
3f32399
add tests
camillebrianceau Sep 14, 2022
518bdd8
Changes 30/09
camillebrianceau Sep 30, 2022
43f5fc9
Update
camillebrianceau Oct 4, 2022
8fff1a7
update
camillebrianceau Oct 5, 2022
210a197
update
camillebrianceau Oct 5, 2022
05cb9e1
Update clinicadl/tsvtools/analysis/analysis_cli.py
camillebrianceau Oct 5, 2022
45cae3d
update
camillebrianceau Oct 5, 2022
ca54819
resolve conflict
camillebrianceau Oct 5, 2022
81e9d6f
update
camillebrianceau Oct 5, 2022
8d649b2
Merge pull request #2 from aramis-lab/dev
camillebrianceau Oct 5, 2022
b903e75
update
camillebrianceau Oct 5, 2022
bb405e6
Merge branch 'cb_tsvtools_refactoring' of github.com:camillebrianceau…
camillebrianceau Oct 5, 2022
d5f1081
update after review
camillebrianceau Oct 5, 2022
a57e3b2
update tests
camillebrianceau Oct 5, 2022
6bd7e22
update tests
camillebrianceau Oct 5, 2022
269672c
Update clinicadl/tsvtools/get_metadata/get_metadata.py
camillebrianceau Oct 6, 2022
189aab3
Update clinicadl/tsvtools/get_metadata/get_metadata.py
camillebrianceau Oct 6, 2022
a33c239
Update clinicadl/tsvtools/get_progression/get_progression_cli.py
camillebrianceau Oct 6, 2022
7021b56
Update clinicadl/tsvtools/get_progression/get_progression_cli.py
camillebrianceau Oct 6, 2022
75e20df
Update clinicadl/tsvtools/split/split.py
camillebrianceau Oct 6, 2022
bbd2b53
Update clinicadl/tsvtools/split/split.py
camillebrianceau Oct 6, 2022
d360dea
couple of changes before merging
camillebrianceau Oct 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 19 additions & 16 deletions .jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,24 @@ pipeline {
'''
}
}
stage('TSVTOOL tests Linux') {
stage('tsvtools tests Linux') {
steps {
echo 'Testing tsvtool tasks...'
sh "echo 'Agent name: ${NODE_NAME}'"
sh '''
source "${CONDA_HOME}/etc/profile.d/conda.sh"
conda activate "${CONDA_ENV}"
cd $WORKSPACE/tests
poetry run pytest \
--junitxml=./test-reports/test_tsvtool_report.xml \
--verbose \
--disable-warnings \
test_tsvtool.py
conda deactivate
'''
catchError(buildResult: 'FAILURE', stageResult: 'UNSTABLE'){
echo 'Testing tsvtool tasks...'
sh "echo 'Agent name: ${NODE_NAME}'"
sh '''
source "${CONDA_HOME}/etc/profile.d/conda.sh"
conda activate "${CONDA_ENV}"
cd $WORKSPACE/tests
poetry run pytest \
--junitxml=./test-reports/test_tsvtool_report.xml \
--verbose \
--disable-warnings \
test_tsvtool.py
conda deactivate
'''
}

}
post {
always {
Expand Down Expand Up @@ -137,7 +140,7 @@ pipeline {
}
}
}
stage('Extract tests Linux') {
stage('Prepare data tests Linux') {
steps {
echo 'Testing extract task...'
sh "echo 'Agent name: ${NODE_NAME}'"
Expand All @@ -151,7 +154,7 @@ pipeline {
--junitxml=./test-reports/test_extract_report.xml \
--verbose \
--disable-warnings \
test_extract.py
test_prepare_data.py
conda deactivate
'''
}
Expand Down
4 changes: 2 additions & 2 deletions clinicadl/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import click

from clinicadl.extract.extract_cli import cli as extract_cli
from clinicadl.generate.generate_cli import cli as generate_cli
from clinicadl.interpret.interpret_cli import cli as interpret_cli
from clinicadl.predict.predict_cli import cli as predict_cli
from clinicadl.prepare_data.prepare_data_cli import cli as prepare_data_cli
from clinicadl.quality_check.qc_cli import cli as qc_cli
from clinicadl.random_search.random_search_cli import cli as random_search_cli
from clinicadl.train.train_cli import cli as train_cli
Expand Down Expand Up @@ -39,7 +39,7 @@ def cli(verbosity):
cli.add_command(tsvtools_cli)
cli.add_command(train_cli)
cli.add_command(generate_cli)
cli.add_command(extract_cli)
cli.add_command(prepare_data_cli)
cli.add_command(predict_cli)
cli.add_command(interpret_cli)
cli.add_command(qc_cli)
Expand Down
2 changes: 1 addition & 1 deletion clinicadl/generate/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import torch
from clinica.utils.inputs import RemoteFileStructure, clinica_file_reader, fetch_file

from clinicadl.extract.extract_utils import compute_extract_json
from clinicadl.prepare_data.prepare_data_utils import compute_extract_json
from clinicadl.utils.caps_dataset.data import CapsDataset
from clinicadl.utils.maps_manager.iotools import check_and_clean, commandline_to_json
from clinicadl.utils.preprocessing import write_preprocessing
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def DeepLearningPrepareData(caps_directory, tsv_file, n_proc, parameters):
from clinicadl.utils.exceptions import ClinicaDLArgumentError
from clinicadl.utils.preprocessing import write_preprocessing

from .extract_utils import check_mask_list, compute_folder_and_file_type
from .prepare_data_utils import check_mask_list, compute_folder_and_file_type

logger = getLogger("clinicadl")

Expand Down Expand Up @@ -70,7 +70,7 @@ def write_output_imgs(output_mode, container, subfolder):
if parameters["mode"] == "image" or not parameters["prepare_dl"]:

def prepare_image(file):
from .extract_utils import extract_images
from .prepare_data_utils import extract_images

logger.debug(f" Processing of {file}.")
container = container_from_filename(file)
Expand All @@ -84,7 +84,7 @@ def prepare_image(file):
elif parameters["prepare_dl"] and parameters["mode"] == "slice":

def prepare_slice(file):
from .extract_utils import extract_slices
from .prepare_data_utils import extract_slices

logger.debug(f" Processing of {file}.")
container = container_from_filename(file)
Expand All @@ -103,7 +103,7 @@ def prepare_slice(file):
elif parameters["prepare_dl"] and parameters["mode"] == "patch":

def prepare_patch(file):
from .extract_utils import extract_patches
from .prepare_data_utils import extract_patches

logger.debug(f" Processing of {file}.")
container = container_from_filename(file)
Expand All @@ -121,7 +121,7 @@ def prepare_patch(file):
elif parameters["prepare_dl"] and parameters["mode"] == "roi":

def prepare_roi(file):
from .extract_utils import extract_roi
from .prepare_data_utils import extract_roi

logger.debug(f" Processing of {file}.")
container = container_from_filename(file)
Expand All @@ -134,7 +134,7 @@ def prepare_roi(file):
parameters["roi_template"] = parameters["roi_custom_template"]
parameters["roi_mask_pattern"] = parameters["roi_custom_mask_pattern"]
else:
from .extract_utils import PATTERN_DICT, TEMPLATE_DICT
from .prepare_data_utils import PATTERN_DICT, TEMPLATE_DICT

parameters["roi_template"] = TEMPLATE_DICT[parameters["preprocessing"]]
parameters["roi_mask_pattern"] = PATTERN_DICT[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from clinicadl.utils import cli_param

from .extract import DeepLearningPrepareData
from .extract_utils import get_parameters_dict
from .prepare_data import DeepLearningPrepareData
from .prepare_data_utils import get_parameters_dict


@click.command(name="image", no_args_is_help=True)
Expand Down Expand Up @@ -297,7 +297,7 @@ def list_commands(self, ctx):
return self.commands.keys()


@click.group(cls=RegistrationOrderGroup, name="extract", no_args_is_help=True)
@click.group(cls=RegistrationOrderGroup, name="prepare-data", no_args_is_help=True)
def cli() -> None:
"""Extract Pytorch tensors from nifti images."""
pass
Expand Down
38 changes: 25 additions & 13 deletions clinicadl/tsvtools/analysis/analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding: utf-8

import os
from copy import copy
from os import path
from warnings import warn

Expand All @@ -9,6 +10,7 @@

from clinicadl.utils.tsvtools_utils import (
add_demographics,
cleaning_nan_diagnoses,
find_label,
first_session,
next_session,
Expand All @@ -33,6 +35,7 @@ def demographics_analysis(merged_tsv, formatted_data_path, results_path, diagnos

merged_df = pd.read_csv(merged_tsv, sep="\t")
merged_df.set_index(["participant_id", "session_id"], inplace=True)
merged_df = cleaning_nan_diagnoses(merged_df)
parent_directory = path.abspath(path.join(results_path, os.pardir))
os.makedirs(parent_directory, exist_ok=True)

Expand Down Expand Up @@ -70,29 +73,38 @@ def demographics_analysis(merged_tsv, formatted_data_path, results_path, diagnos

# Need all values for mean and variance (age, MMSE and scans)
diagnosis_dict = dict.fromkeys(diagnoses)
if not path.exists(formatted_data_path):
print(
f"getlabels.tsv file with all sessions was not found. "
# f"Loads baseline version instead."
)

for diagnosis in diagnoses:
diagnosis_dict[diagnosis] = {"age": [], "MMSE": [], "scans": []}
diagnosis_path = path.join(formatted_data_path, diagnosis + ".tsv")
if not path.exists(diagnosis_path):
print(
f"TSV file with all sessions was not found for diagnosis {diagnosis}. "
f"Loads baseline version instead."
)
diagnosis_path = path.join(formatted_data_path, diagnosis + "_baseline.tsv")
diagnosis_df = pd.read_csv(diagnosis_path, sep="\t")
diagnosis_demographics_df = add_demographics(diagnosis_df, merged_df, diagnosis)
getlabels_df = pd.read_csv(formatted_data_path, sep="\t")

interest_columns = getlabels_df.index.values
diagnosis_copy_df = copy(getlabels_df)
for i in interest_columns:
if diagnosis_copy_df.loc[i, "group"] != diagnosis:
diagnosis_copy_df.drop((i), inplace=True)

# diagnosis_df = pd.read_csv(diagnosis_path, sep="\t")
diagnosis_demographics_df = add_demographics(
diagnosis_copy_df, merged_df, diagnosis
)
diagnosis_demographics_df.set_index(
["participant_id", "session_id"], inplace=True
)
diagnosis_df.set_index(["participant_id", "session_id"], inplace=True)

for subject, subject_df in diagnosis_df.groupby(level=0):
diagnosis_copy_df.set_index(["participant_id", "session_id"], inplace=True)
for subject, subject_df in diagnosis_copy_df.groupby(level=0):
first_session_id = first_session(subject_df)
feature_absence = isinstance(
merged_df.loc[(subject, first_session_id), "diagnosis"], float
)
while feature_absence:
first_session_id = next_session(subject_df, first_session_id)

feature_absence = isinstance(
merged_df.loc[(subject, first_session_id), "diagnosis"], float
)
Expand Down Expand Up @@ -185,6 +197,6 @@ def demographics_analysis(merged_tsv, formatted_data_path, results_path, diagnos
f"NaN values were found for {key} values associated to diagnosis {diagnosis}"
)

results_df.index.name = "diagnosis"
results_df.index.name = "group"

results_df.to_csv(results_path, sep="\t")
8 changes: 3 additions & 5 deletions clinicadl/tsvtools/analysis/analysis_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

@click.command(name="analysis", no_args_is_help=True)
@cli_param.argument.merged_tsv
@cli_param.argument.formatted_data_directory
@cli_param.argument.formatted_data_tsv
@cli_param.argument.results_directory
@cli_param.option.diagnoses
def cli(merged_tsv, formatted_data_directory, results_directory, diagnoses):
def cli(merged_tsv, formatted_data_tsv, results_directory, diagnoses):
"""Demographic analysis of the extracted labels.

MERGED_TSV is the output of `clinica iotools merge-tsv`.
Expand All @@ -19,9 +19,7 @@ def cli(merged_tsv, formatted_data_directory, results_directory, diagnoses):
"""
from .analysis import demographics_analysis

demographics_analysis(
merged_tsv, formatted_data_directory, results_directory, diagnoses
)
demographics_analysis(merged_tsv, formatted_data_tsv, results_directory, diagnoses)


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions clinicadl/tsvtools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .analysis.analysis_cli import cli as analysis_cli
from .getlabels.getlabels_cli import cli as getlabels_cli
from .kfold.kfold_cli import cli as kfold_cli
from .restrict.restrict_cli import cli as restrict_cli
from .prepare_experiment.prepare_experiment_cli import cli as prepare_experiment_cli
from .split.split_cli import cli as split_cli


Expand All @@ -14,19 +14,19 @@ def list_commands(self, ctx):
return self.commands.keys()


@click.group(cls=RegistrationOrderGroup, name="tsvtool", no_args_is_help=True)
@click.group(cls=RegistrationOrderGroup, name="tsvtools", no_args_is_help=True)
def cli() -> None:
"""
Manipulation of TSV files to prepare and manage input data.
"""
pass


cli.add_command(restrict_cli)
cli.add_command(getlabels_cli)
cli.add_command(analysis_cli)
cli.add_command(split_cli)
cli.add_command(kfold_cli)
cli.add_command(prepare_experiment_cli)

if __name__ == "__main__":
cli()
Empty file.
6 changes: 6 additions & 0 deletions clinicadl/tsvtools/get_metadata/get_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def get_metadata():
camillebrianceau marked this conversation as resolved.
Show resolved Hide resolved
return None


# input : tsv file wanted
# output : tsv file with more metadata
8 changes: 8 additions & 0 deletions clinicadl/tsvtools/get_metadata/get_metadata_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import List

import click

from clinicadl.utils import cli_param

if __name__ == "__main__":
cli()
Loading