From e1bc3e5f1316a6e4b2db37c2b9a8f06660ded855 Mon Sep 17 00:00:00 2001 From: Jeremi Levesque Date: Tue, 29 Oct 2024 16:09:26 -0400 Subject: [PATCH] Add dps to data zip on dvc --- dwi_ml/unit_tests/test_dataset.py | 31 +++++-- .../tests/test_create_hdf5_dataset.py | 92 ++++--------------- 2 files changed, 42 insertions(+), 81 deletions(-) diff --git a/dwi_ml/unit_tests/test_dataset.py b/dwi_ml/unit_tests/test_dataset.py index b6591700..b65b8b33 100755 --- a/dwi_ml/unit_tests/test_dataset.py +++ b/dwi_ml/unit_tests/test_dataset.py @@ -5,6 +5,7 @@ import h5py import torch +import numpy as np from dipy.io.stateful_tractogram import StatefulTractogram from dwi_ml.data.dataset.multi_subject_containers import \ @@ -24,18 +25,13 @@ from dwi_ml.unit_tests.utils.data_and_models_for_tests import \ fetch_testing_data -dps_key = 'mean_color_dps' +dps_key_1 = 'mean_color_dps' +dps_key_2 = 'mock_2d_dps' def test_multisubjectdataset(script_runner): data_dir = fetch_testing_data() - # Adding dps in the data to test better! - # toDO manage our test data!! - ret = script_runner.run( - 'pytest', 'scripts_python/tests/test_create_hdf5_dataset.py') - assert ret.success - hdf5_filename = os.path.join(data_dir, 'hdf5_file.hdf5') _non_lazy_version(hdf5_filename) @@ -95,6 +91,7 @@ def _verify_mri(mri_data, training_set, group_number): def _verify_sft_data(sft_data, group_number): expected_nb = TEST_EXPECTED_NB_STREAMLINES[group_number] assert len(sft_data.as_sft()) == expected_nb + expected_mock_2d_dps = np.random.RandomState(42).rand(expected_nb, 42) # First streamline's first coordinate: # Also verifying accessing by index @@ -102,12 +99,28 @@ def _verify_sft_data(sft_data, group_number): assert type(list_one) == StatefulTractogram assert len(list_one) == 1 assert len(list_one.streamlines[0][0, :]) == 3 # a x, y, z coordinate - assert dps_key in list_one.data_per_streamline.keys() + + # Both dps should be in the data_per_streamline + # of the sft. Also making sure that the data is + # the same as expected. + assert dps_key_1 in list_one.data_per_streamline.keys() + assert dps_key_2 in list_one.data_per_streamline.keys() + assert np.allclose( + list_one.data_per_streamline[dps_key_2][0], + expected_mock_2d_dps[0]) # Assessing by slice list_4 = sft_data.as_sft(slice(0, 4)) assert len(list_4) == 4 - assert dps_key in list_4.data_per_streamline.keys() + + # Same as above, but with slices. Both dps + # should be in the data_per_streamline and + # the data should be the same as expected. + assert dps_key_1 in list_4.data_per_streamline.keys() + assert dps_key_2 in list_4.data_per_streamline.keys() + assert np.allclose( + list_4.data_per_streamline[dps_key_2], + expected_mock_2d_dps[0:4]) def _non_lazy_version(hdf5_filename): diff --git a/scripts_python/tests/test_create_hdf5_dataset.py b/scripts_python/tests/test_create_hdf5_dataset.py index 36196e5d..661d9cf0 100644 --- a/scripts_python/tests/test_create_hdf5_dataset.py +++ b/scripts_python/tests/test_create_hdf5_dataset.py @@ -9,92 +9,40 @@ data_dir = fetch_testing_data() tmp_dir = tempfile.TemporaryDirectory() +# Note. Our test config file is: +# { +# "input": { +# "type": "volume", +# "files": ["anat/t1.nii.gz", "dwi/fa.nii.gz"], +# "standardization": "per_file", +# "std_mask": ["masks/wm.nii.gz"] +# }, +# "wm_mask": { +# "type": "volume", +# "files": ["masks/wm.nii.gz"], +# "standardization": "none" +# }, +# "streamlines": { +# "type": "streamlines", +# "files": ["example_bundle/Fornix.trk"], +# "dps_keys": ['mean_color_dps', 'mock_2d_dps'] +# } +# } def test_help_option(script_runner): ret = script_runner.run('dwiml_create_hdf5_dataset.py', '--help') assert ret.success -def _tmp_add_dps(script_runner): - import json - - # Fake-adding dps to our tractogram. - # ToDo Add dps in the data on the google drive. Currently, I need to do - # this ugly trick. - dwi_ml_folder = os.path.join(data_dir, 'dwi_ml_ready') - subj_folder = os.path.join(dwi_ml_folder, 'subjX', ) - in_trk = os.path.join(subj_folder, 'example_bundle', 'Fornix.trk') - in_anat = os.path.join(subj_folder, 'anat', 't1.nii.gz') - script_runner.run('scil_tractogram_assign_custom_color.py', in_trk, - in_trk, '--from_anatomy', in_anat, '-f') - script_runner.run('scil_tractogram_dpp_math.py', 'mean', in_trk, in_trk, - '-f', '--mode', 'dpp', '--in_dpp_name', 'color', - '--out_keys', 'mean_color') - ret = script_runner.run( - 'scil_tractogram_dpp_math.py', 'mean', in_trk, in_trk, - '-f', '--mode', 'dps', '--in_dpp_name', 'mean_color', - '--out_keys', 'mean_color_dps') - assert ret.success - - # toDo. Add DPS to our config file - config = { - "input": { - "type": "volume", - "files": ["anat/t1.nii.gz", "dwi/fa.nii.gz"], - "standardization": "per_file", - "std_mask": ["masks/wm.nii.gz"] - }, - "wm_mask": { - "type": "volume", - "files": ["masks/wm.nii.gz"], - "standardization": "none" - }, - "streamlines": { - "type": "streamlines", - "files": ["example_bundle/Fornix.trk"], - "dps_keys": 'mean_color_dps' - } - } - config_file = os.path.join(data_dir, 'code_creation/config_file.json') - os.remove(config_file) - with open(config_file, 'w') as json_file: - json.dump(config, json_file) - - def test_execution(script_runner): os.chdir(os.path.expanduser(tmp_dir.name)) - - _tmp_add_dps(script_runner) - # hdf5_output = 'test.hdf5' - # Overwriting current hdf5!! - hdf5_output = os.path.join(data_dir, 'hdf5_file.hdf5') - - # Note. Our test config file is: - # { - # "input": { - # "type": "volume", - # "files": ["anat/t1.nii.gz", "dwi/fa.nii.gz"], - # "standardization": "per_file", - # "std_mask": ["masks/wm.nii.gz"] - # }, - # "wm_mask": { - # "type": "volume", - # "files": ["masks/wm.nii.gz"], - # "standardization": "none" - # }, - # "streamlines": { - # "type": "streamlines", - # "files": ["example_bundle/Fornix.trk"], - # "dps_keys": 'mean_color_dps' - # } - # } dwi_ml_folder = os.path.join(data_dir, 'dwi_ml_ready') config_file = os.path.join(data_dir, 'code_creation/config_file.json') training_subjs = os.path.join(data_dir, 'code_creation/subjs_list.txt') validation_subjs = os.path.join(data_dir, 'code_creation/empty_subjs_list.txt') testing_subjs = validation_subjs - + hdf5_output = 'test.hdf5' ret = script_runner.run('dwiml_create_hdf5_dataset.py', '-f', dwi_ml_folder, hdf5_output, config_file, training_subjs, validation_subjs, testing_subjs)