Skip to content

Commit

Permalink
[REF] OASIS-to-BIDS writing sessions tsv files now use simpler code w…
Browse files Browse the repository at this point in the history
…ith data frames (#1336)

* First proposition

* change column order

* Changes upon suggestions
  • Loading branch information
AliceJoubert authored Nov 7, 2024
1 parent 768d83e commit d9e0012
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 87 deletions.
11 changes: 6 additions & 5 deletions clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,23 +102,24 @@ def _create_participants_tsv(
encoding="utf-8",
)

@staticmethod
def _create_sessions_tsv(
self,
clinical_data_dir: Path,
bids_dir: Path,
bids_ids: list[str],
) -> None:
from .oasis_to_bids_utils import create_sessions_dict, write_sessions_tsv
from .oasis_to_bids_utils import create_sessions_df, write_sessions_tsv

sessions_dict = create_sessions_dict(
sessions_df = create_sessions_df(
clinical_data_dir=clinical_data_dir,
clinical_specifications_folder=Path(__file__).parents[1] / "specifications",
bids_ids=bids_ids,
)

write_sessions_tsv(bids_dir, sessions_dict)
write_sessions_tsv(bids_dir, sessions_df)

def _create_scans_tsv(self, bids_dir: Path) -> None:
@staticmethod
def _create_scans_tsv(bids_dir: Path) -> None:
from .oasis_to_bids_utils import write_scans_tsv

write_scans_tsv(bids_dir)
Expand Down
107 changes: 55 additions & 52 deletions clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
from pathlib import Path
from typing import Iterable
from typing import Iterable, Union

import pandas as pd

from clinica.iotools.bids_utils import StudyName, bids_id_factory

__all__ = ["create_sessions_dict", "write_sessions_tsv", "write_scans_tsv"]
__all__ = ["create_sessions_df", "write_sessions_tsv", "write_scans_tsv"]


def create_sessions_dict(
def _convert_cdr_to_diagnosis(cdr: Union[int, str]) -> str:
if cdr == 0:
return "CN"
elif isinstance(cdr, int) and cdr > 0:
return "AD"
else:
return "n/a"


def create_sessions_df(
clinical_data_dir: Path,
clinical_specifications_folder: Path,
bids_ids: Iterable[str],
) -> dict:
"""Extract the information regarding the sessions and store them in a dictionary (session M000 only).
) -> pd.DataFrame:
"""Extract the information regarding sessions M000 and store them in a dataframe.
Parameters
----------
Expand All @@ -24,85 +33,79 @@ def create_sessions_dict(
The path to the clinical file folder.
bids_ids : list of str
The list of bids ids.
The list of bids ids which are in the BIDS directory.
Returns
-------
dict :
Session dict.
pd.Dataframe :
Session df.
"""

study = StudyName.OASIS.value
location = f"{study} location"
spec = pd.read_csv(clinical_specifications_folder / "sessions.tsv", sep="\t")[
[study, location, "BIDS CLINICA"]
].dropna()
sessions_dict = {}

for loc in spec[location].unique():
file = pd.read_excel(clinical_data_dir / loc)
file["BIDS ID"] = file.ID.apply(
lambda x: bids_id_factory(StudyName.OASIS).from_original_study_id(x)
)
file.set_index("BIDS ID", drop=True, inplace=True)
sessions_df = pd.DataFrame()
for _, row in spec[spec[location] == loc].iterrows():
sessions_df[row["BIDS CLINICA"]] = file[row[[study]]]

sessions_df = sessions_df.loc[bids_ids]
sessions_df["diagnosis"] = sessions_df["diagnosis"].apply(
lambda x: "AD" if x > 0 else "CN"
sessions_df = pd.DataFrame()
if len(spec[location].unique()) == 1:
loc = spec[location].unique()[0]
else:
raise ValueError(
f"OASIS1 metadata is supposed to be contained in only 1 file, {len(spec[location].unique())} were detected : {spec[location].unique()}"
)
sessions_df["session_id"] = "ses-M000"

for bids_id, row in sessions_df.iterrows():
sessions_dict.update(
{bids_id: {"M000": {label: value for label, value in row.items()}}}
)
file = pd.read_excel(clinical_data_dir / loc)
file["BIDS ID"] = file.ID.apply(
lambda x: bids_id_factory(StudyName.OASIS).from_original_study_id(x)
)
file.set_index("BIDS ID", drop=True, inplace=True)

for _, row in spec[spec[location] == loc].iterrows():
sessions_df[row["BIDS CLINICA"]] = file[row[[study]]]

return sessions_dict
missing_subjects = set(bids_ids) - set(sessions_df.index)
for ms in missing_subjects:
sessions_df.loc[ms] = ["n/a" for _ in sessions_df.columns]

sessions_df = sessions_df.loc[bids_ids]

def write_sessions_tsv(bids_dir: Path, sessions_dict: dict) -> None:
"""Create <participant_id>_sessions.tsv files.
sessions_df["diagnosis"] = sessions_df["diagnosis"].apply(
lambda x: _convert_cdr_to_diagnosis(x)
)

Basically writes the content of the function
`clinica.iotools.bids_utils.create_sessions_dict` in several TSV files
following the BIDS specification.
sessions_df.insert(loc=0, column="session_id", value="ses-M000")

return sessions_df


def write_sessions_tsv(bids_dir: Path, sessions_df: pd.DataFrame) -> None:
"""Writes the content of the function `clinica.iotools.bids_utils.create_sessions_df`
in several TSV files following the BIDS specification.
Parameters
----------
bids_dir : Path
The path to the BIDS directory.
sessions_dict : dict
Dictionary containing sessions metadata.
sessions_df : DataFrame
Contains sessions metadata.
.. note::
This is the output of the function
`clinica.iotools.bids_utils.create_sessions_dict`.
`clinica.iotools.bids_utils.create_sessions_df`.
See also
--------
create_sessions_dict
create_sessions_df
"""
for subject_path in bids_dir.glob("sub-*"):
if subject_path.name in sessions_dict:
session_df = pd.DataFrame.from_dict(
sessions_dict[subject_path.name], orient="index"
)
cols = session_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
session_df = session_df[cols]
else:
print(f"No session data available for {subject_path}")
session_df = pd.DataFrame(columns=["session_id"])
session_df["session_id"] = pd.Series("M000")
session_df = session_df.set_index("session_id").fillna("n/a")
session_df.to_csv(
subject_path / f"{subject_path.name}_sessions.tsv",
for subject, data in sessions_df.iterrows():
session_path = bids_dir / subject
data.to_frame().T.to_csv(
session_path / f"{subject}_sessions.tsv",
sep="\t",
encoding="utf8",
index=False,
)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from os import write
from pathlib import Path

import numpy as np
Expand All @@ -7,7 +6,8 @@
from pandas.testing import assert_frame_equal

from clinica.iotools.converters.oasis_to_bids.oasis_to_bids_utils import (
create_sessions_dict,
_convert_cdr_to_diagnosis,
create_sessions_df,
write_scans_tsv,
write_sessions_tsv,
)
Expand Down Expand Up @@ -104,52 +104,76 @@ def _build_bids_dir(bids_dir: Path) -> None:


@pytest.fixture
def expected() -> dict:
def expected() -> pd.DataFrame:
expected = {
"sub-OASIS10001": {
"M000": {
"session_id": "ses-M000",
"cdr_global": 0,
"MMS": 29,
"diagnosis": "CN",
},
"session_id": "ses-M000",
"cdr_global": 0,
"MMS": 29,
"diagnosis": "CN",
},
"sub-OASIS10002": {
"M000": {
"session_id": "ses-M000",
"cdr_global": 0,
"MMS": 29,
"diagnosis": "CN",
}
"session_id": "ses-M000",
"cdr_global": 0,
"MMS": 29,
"diagnosis": "CN",
},
}

expected = pd.DataFrame.from_dict(expected).T
expected.index.names = ["BIDS ID"]

return expected


def test_create_sessions_dict_success(
def test_create_sessions_df_success(
tmp_path,
clinical_data_path: Path,
sessions_path_success: Path,
expected: dict,
expected: pd.DataFrame,
):
result = create_sessions_dict(
result = create_sessions_df(
clinical_data_path,
sessions_path_success,
["sub-OASIS10001", "sub-OASIS10002"],
)
assert_frame_equal(expected, result, check_like=True, check_dtype=False)

assert result == expected

def test_create_sessions_df_missing_clinical_data(
tmp_path,
clinical_data_path: Path,
sessions_path_success: Path,
expected: pd.DataFrame,
):
result = create_sessions_df(
clinical_data_path,
sessions_path_success,
["sub-OASIS10001", "sub-OASIS10002", "sub-OASIS10004"],
)
missing_line = pd.DataFrame.from_dict(
{
"sub-OASIS10004": {
"session_id": "ses-M000",
"diagnosis": "n/a",
"cdr_global": "n/a",
"MMS": "n/a",
}
}
).T
missing_line.index.names = ["BIDS ID"]

expected = pd.concat([expected, missing_line])
assert_frame_equal(expected, result, check_like=True, check_dtype=False)

def test_create_sessions_dict_error(

def test_create_sessions_df_file_not_found(
tmp_path,
clinical_data_path: Path,
sessions_path_error: Path,
expected: dict,
):
with pytest.raises(FileNotFoundError):
create_sessions_dict(
create_sessions_df(
clinical_data_path,
sessions_path_error,
["sub-OASIS10001", "sub-OASIS10002"],
Expand All @@ -161,22 +185,21 @@ def test_write_sessions_tsv(
clinical_data_path: Path,
bids_dir: Path,
sessions_path_success: Path,
expected: dict,
expected: pd.DataFrame,
):
sessions = create_sessions_dict(
sessions = create_sessions_df(
clinical_data_path,
sessions_path_success,
["sub-OASIS10001", "sub-OASIS10002"],
)
write_sessions_tsv(tmp_path / "BIDS", sessions)
sessions_files = list((tmp_path / "BIDS").rglob("*.tsv"))
write_sessions_tsv(bids_dir, sessions)
sessions_files = list(bids_dir.rglob("*.tsv"))

assert len(sessions_files) == 2
for file in sessions_files:
assert_frame_equal(
pd.read_csv(file, sep="\t").set_index("session_id", drop=False),
pd.DataFrame(expected[file.parent.name]).T.set_index(
"session_id", drop=False
),
pd.read_csv(file, sep="\t").reset_index(drop=True),
expected.loc[[file.parent.name]].reset_index(drop=True),
check_like=True,
check_dtype=False,
)
Expand Down Expand Up @@ -211,3 +234,17 @@ def test_write_scans_tsv(tmp_path, bids_dir: Path) -> None:
assert file["filename"].loc[0] == f"anat/{image_path.name}"
elif sub == "sub-OASIS10002":
assert file.empty


@pytest.mark.parametrize(
"cdr,diagnosis",
[
(0, "CN"),
(12, "AD"),
(-2, "n/a"),
("n/a", "n/a"),
("foo", "n/a"),
],
)
def test_convert_cdr_to_diagnosis(cdr, diagnosis):
assert diagnosis == _convert_cdr_to_diagnosis(cdr)

0 comments on commit d9e0012

Please sign in to comment.