Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Skip processed visits for t1-volume-tissue-segmentation #1403

Draft
wants to merge 4 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from nipype import config

from clinica.pipelines.engine import Pipeline
from clinica.utils.bids import Visit
from clinica.utils.filemanip import extract_visits

cfg = dict(execution={"parameterize_dirs": False})
config.update_config(cfg)
Expand Down Expand Up @@ -63,6 +65,78 @@ def get_output_fields(self) -> List[str]:
"t1_mni",
]

def get_processed_visits(self) -> list[Visit]:
"""Return a list of visits for which the pipeline is assumed to have run already.

Before running the pipeline, for a given visit, if both the PET SUVR registered image
and the rigid transformation files already exist, then the visit is added to this list.
The pipeline will further skip these visits and run processing only for the remaining
visits.
"""
from functools import reduce

from clinica.utils.filemanip import extract_visits
from clinica.utils.input_files import (
t1_volume_dartel_input_tissue,
t1_volume_native_tpm,
t1_volume_native_tpm_in_mni,
)
from clinica.utils.inputs import clinica_file_reader

if not self.caps_directory.is_dir():
return []
visits = [
set(extract_visits(x[0]))
for x in [
clinica_file_reader(
self.subjects,
self.sessions,
self.caps_directory,
pattern,
)
for pattern in [
t1_volume_dartel_input_tissue(tissue_number=i)
for i in self.parameters["dartel_tissues"]
]
]
]
visits.extend(
[
set(extract_visits(x[0]))
for x in [
clinica_file_reader(
self.subjects,
self.sessions,
self.caps_directory,
pattern,
)
for pattern in [
t1_volume_native_tpm(tissue_number=i)
for i in self.parameters["tissue_classes"]
]
]
]
)
visits.extend(
[
set(extract_visits(x[0]))
for x in [
clinica_file_reader(
self.subjects,
self.sessions,
self.caps_directory,
pattern,
)
for pattern in [
t1_volume_native_tpm_in_mni(tissue_number=i, modulation=False)
for i in self.parameters["tissue_classes"]
]
]
]
)

return sorted(list(reduce(lambda x, y: x.intersection(y), visits)))

def _build_input_node(self):
"""Build and connect an input node to the pipeline.

Expand Down
77 changes: 74 additions & 3 deletions clinica/utils/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from clinica.pipelines.dwi.utils import DWIDataset

from .spm import SPMTissue, get_spm_tissue_from_index

__all__ = [
"build_test_image_cubic_object",
"build_bids_directory",
Expand Down Expand Up @@ -209,15 +211,84 @@ def _build_subjects(directory: Path, configuration: dict) -> None:
for pipeline_name, pipeline_config in configuration[
"pipelines"
].items():
(directory / "subjects" / sub / ses / pipeline_name).mkdir(
exist_ok=True
)
if pipeline_name.startswith("t1_volume"):
(directory / "subjects" / sub / ses / "t1").mkdir(exist_ok=True)
else:
(directory / "subjects" / sub / ses / pipeline_name).mkdir(
exist_ok=True
)
if pipeline_name == "t1_linear":
_build_t1_linear(directory, sub, ses, pipeline_config)
if pipeline_name == "pet_linear":
_build_pet_linear(directory, sub, ses, pipeline_config)
if pipeline_name == "t1":
_build_t1(directory, sub, ses, configuration)
if pipeline_name == "t1_volume_tissue_segmentation":
_build_t1_volume_tissue_segmentation(
directory, sub, ses, pipeline_config
)


def _build_t1_volume_tissue_segmentation(
directory: Path, sub: str, ses: str, config: dict
) -> None:
"""Build a fake t1-volume-tissue-segmentation file structure in a CAPS directory."""
segmentation_folder = (
directory / "subjects" / sub / ses / "t1" / "spm" / "segmentation"
)
segmentation_folder.mkdir(parents=True, exist_ok=True)
common_filename_part = f"{sub}_{ses}_T1w_segm-"
_build_t1_volume_tissue_segmentation_native_space(
segmentation_folder, common_filename_part, config
)
_build_t1_volume_tissue_segmentation_dartel_input(
segmentation_folder, common_filename_part, config
)
_build_t1_volume_tissue_segmentation_normalized_space(
segmentation_folder, common_filename_part, config
)


def _extract_tissues_from_config(config: dict, key: str) -> list[SPMTissue]:
if (tissue_classes := config.get(key, None)) is not None:
return [get_spm_tissue_from_index(i) for i in tissue_classes]
return [tissue for tissue in SPMTissue]


def _build_t1_volume_tissue_segmentation_native_space(
segmentation_folder: Path, common_filename_part: str, config: dict
) -> None:
(segmentation_folder / "native_space").mkdir(exist_ok=True)
for tissue in _extract_tissues_from_config(config, "tissue_classes"):
(
segmentation_folder
/ "native_space"
/ f"{common_filename_part}{tissue.value}_probability.nii.gz"
).touch()


def _build_t1_volume_tissue_segmentation_dartel_input(
segmentation_folder: Path, common_filename_part: str, config: dict
) -> None:
(segmentation_folder / "dartel_input").mkdir(exist_ok=True)
for tissue in _extract_tissues_from_config(config, "dartel_tissues"):
(
segmentation_folder
/ "dartel_input"
/ f"{common_filename_part}{tissue.value}_dartelinput.nii.gz"
).touch()


def _build_t1_volume_tissue_segmentation_normalized_space(
segmentation_folder: Path, common_filename_part: str, config: dict
) -> None:
(segmentation_folder / "normalized_space").mkdir(exist_ok=True)
for tissue in _extract_tissues_from_config(config, "tissue_classes"):
(
segmentation_folder
/ "normalized_space"
/ f"{common_filename_part}{tissue.value}_space-Ixi549Space_modulated-off_probability.nii.gz"
).touch()


def _build_t1_linear(directory: Path, sub: str, ses: str, config: dict) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from clinica.utils.testing_utils import build_caps_directory
import shutil

from packaging.version import Version

from clinica.utils.bids import Visit
from clinica.utils.testing_utils import build_bids_directory, build_caps_directory


def test_t1_volume_tissue_segmentation_info_loading(tmp_path):
Expand Down Expand Up @@ -28,7 +33,6 @@ def test_t1_volume_tissue_segmentation_info_loading(tmp_path):

def test_t1_volume_tissue_segmentation_dependencies(tmp_path, mocker):
from packaging.specifiers import SpecifierSet
from packaging.version import Version

from clinica.pipelines.t1_volume_tissue_segmentation.t1_volume_tissue_segmentation_pipeline import (
T1VolumeTissueSegmentation,
Expand All @@ -53,3 +57,158 @@ def test_t1_volume_tissue_segmentation_dependencies(tmp_path, mocker):
ThirdPartySoftware.SPM, SpecifierSet(">=12"), Version("12.7219")
),
]


def test_t1_volume_tissue_segmentation_get_processed_visits_empty(tmp_path, mocker):
from clinica.pipelines.t1_volume_tissue_segmentation.t1_volume_tissue_segmentation_pipeline import (
T1VolumeTissueSegmentation,
)

mocker.patch(
"clinica.utils.check_dependency._get_spm_version",
return_value=Version("12.7219"),
)
bids = build_bids_directory(
tmp_path / "bids", {"sub-01": ["ses-M000", "ses-M006"], "sub-02": ["ses-M000"]}
)
caps = build_caps_directory(tmp_path / "caps", {})

pipeline = T1VolumeTissueSegmentation(
bids_directory=str(bids),
caps_directory=str(caps),
parameters={
"tissue_classes": (1, 2, 3),
"dartel_tissues": (1, 2, 3),
},
)
assert pipeline.get_processed_visits() == []


def test_t1_volume_tissue_segmentation_get_processed_visits(tmp_path, mocker):
"""Test the get_processed_visits for T1VolumeTissueSegmentation.

We build a CAPS dataset with the following structure:

caps2
├── dataset_description.json
└── subjects
└── sub-01
└── ses-M000
└── t1
└── spm
└── segmentation
├── dartel_input
│ └── sub-01_ses-M000_T1w_segm-XXXXXXXX_dartelinput.nii.gz
├── native_space
│ └── sub-01_ses-M000_T1w_segm-YYYYYYYY_probability.nii.gz
└── normalized_space
└── sub-01_ses-M000_T1w_segm-YYYYYYYY_space-Ixi549Space_modulated-off_probability.nii.gz

And this for several subjects and sessions.
We can control the values for XXXXX and YYYYY through the lists of tissues that we pass
to the CAPS generator (XXXX is controlled by 'dartel_tissues', while YYYY is controlled by 'tissue_classes').

The purpose of this test is to verify that, depending on what the user wants in terms of tissues,
the pipeline correctly identifies the already processed visits as the ones having ALL images for
the tissues of interest. If there is at least one image missing, then the visit will be processed
again (and therefore won't be listed in the list of "processed" visits).
"""
from clinica.pipelines.t1_volume_tissue_segmentation.t1_volume_tissue_segmentation_pipeline import (
T1VolumeTissueSegmentation,
)

mocker.patch(
"clinica.utils.check_dependency._get_spm_version",
return_value=Version("12.7219"),
)
bids = build_bids_directory(
tmp_path / "bids",
{"sub-01": ["ses-M000", "ses-M006"], "sub-02": ["ses-M000", "ses-M012"]},
)
caps = build_caps_directory(
tmp_path / "caps",
{
"pipelines": {
"t1_volume_tissue_segmentation": {
"tissue_classes": (1, 2, 3, 4),
"dartel_tissues": (2, 4, 5, 6),
}
},
"subjects": {
"sub-01": ["ses-M006"],
"sub-02": ["ses-M000", "ses-M012"],
},
},
)
pipeline = T1VolumeTissueSegmentation(
bids_directory=str(bids),
caps_directory=str(caps),
parameters={
"tissue_classes": (1, 2, 5, 6),
"dartel_tissues": (2, 4, 6),
},
)
# No visit considered already processed since we are asking for tissues 1, 2, 5, and 6
# and the CAPS folder only contains tissues 1, 2, 3, and 4
assert pipeline.get_processed_visits() == []

pipeline = T1VolumeTissueSegmentation(
bids_directory=str(bids),
caps_directory=str(caps),
parameters={
"tissue_classes": (1, 2, 3, 4),
"dartel_tissues": (1, 2, 3),
},
)
# No visit considered already processed since we are asking for dartel tissues 1, 2, and 3
# and the CAPS folder only contains tissues 2, 4, 5, and 6 (1 and 3 are missing...)
assert pipeline.get_processed_visits() == []

pipeline = T1VolumeTissueSegmentation(
bids_directory=str(bids),
caps_directory=str(caps),
parameters={
"tissue_classes": (1, 2, 3),
"dartel_tissues": (2, 4, 5),
},
)
# All visits are considered processed since we are asking for tissues that are present in the CAPS folder
assert pipeline.get_processed_visits() == [
Visit("sub-01", "ses-M006"),
Visit("sub-02", "ses-M000"),
Visit("sub-02", "ses-M012"),
]

# Delete the folder "dartel_input" altogether for subject 02 session M000 (but keep the other folders)
shutil.rmtree(
tmp_path
/ "caps"
/ "subjects"
/ "sub-02"
/ "ses-M000"
/ "t1"
/ "spm"
/ "segmentation"
/ "dartel_input"
)
# Check that subject 02 session M000 is not considered a processed visit anymore
assert pipeline.get_processed_visits() == [
Visit("sub-01", "ses-M006"),
Visit("sub-02", "ses-M012"),
]

# Delete a single file in the "native_space" folder for subject 01 session M006 (keep other files and folders)
(
tmp_path
/ "caps"
/ "subjects"
/ "sub-01"
/ "ses-M006"
/ "t1"
/ "spm"
/ "segmentation"
/ "native_space"
/ "sub-01_ses-M006_T1w_segm-graymatter_probability.nii.gz"
).unlink()
# Check that subject 01 session M006 is not considered a processed visit anymore
assert pipeline.get_processed_visits() == [Visit("sub-02", "ses-M012")]
Loading