From e1bc3e5f1316a6e4b2db37c2b9a8f06660ded855 Mon Sep 17 00:00:00 2001
From: Jeremi Levesque <jeremilevesque@hotmail.com>
Date: Tue, 29 Oct 2024 16:09:26 -0400
Subject: [PATCH] Add dps to data zip on dvc

---
 dwi_ml/unit_tests/test_dataset.py             | 31 +++++--
 .../tests/test_create_hdf5_dataset.py         | 92 ++++---------------
 2 files changed, 42 insertions(+), 81 deletions(-)

diff --git a/dwi_ml/unit_tests/test_dataset.py b/dwi_ml/unit_tests/test_dataset.py
index b6591700..b65b8b33 100755
--- a/dwi_ml/unit_tests/test_dataset.py
+++ b/dwi_ml/unit_tests/test_dataset.py
@@ -5,6 +5,7 @@
 
 import h5py
 import torch
+import numpy as np
 from dipy.io.stateful_tractogram import StatefulTractogram
 
 from dwi_ml.data.dataset.multi_subject_containers import \
@@ -24,18 +25,13 @@
 from dwi_ml.unit_tests.utils.data_and_models_for_tests import \
     fetch_testing_data
 
-dps_key = 'mean_color_dps'
+dps_key_1 = 'mean_color_dps'
+dps_key_2 = 'mock_2d_dps'
 
 
 def test_multisubjectdataset(script_runner):
     data_dir = fetch_testing_data()
 
-    # Adding dps in the data to test better!
-    # toDO manage our test data!!
-    ret = script_runner.run(
-        'pytest', 'scripts_python/tests/test_create_hdf5_dataset.py')
-    assert ret.success
-
     hdf5_filename = os.path.join(data_dir, 'hdf5_file.hdf5')
 
     _non_lazy_version(hdf5_filename)
@@ -95,6 +91,7 @@ def _verify_mri(mri_data, training_set, group_number):
 def _verify_sft_data(sft_data, group_number):
     expected_nb = TEST_EXPECTED_NB_STREAMLINES[group_number]
     assert len(sft_data.as_sft()) == expected_nb
+    expected_mock_2d_dps = np.random.RandomState(42).rand(expected_nb, 42)
 
     # First streamline's first coordinate:
     # Also verifying accessing by index
@@ -102,12 +99,28 @@ def _verify_sft_data(sft_data, group_number):
     assert type(list_one) == StatefulTractogram
     assert len(list_one) == 1
     assert len(list_one.streamlines[0][0, :]) == 3  # a x, y, z coordinate
-    assert dps_key in list_one.data_per_streamline.keys()
+
+    # Both dps should be in the data_per_streamline
+    # of the sft. Also making sure that the data is
+    # the same as expected.
+    assert dps_key_1 in list_one.data_per_streamline.keys()
+    assert dps_key_2 in list_one.data_per_streamline.keys()
+    assert np.allclose(
+        list_one.data_per_streamline[dps_key_2][0],
+        expected_mock_2d_dps[0])
 
     # Assessing by slice
     list_4 = sft_data.as_sft(slice(0, 4))
     assert len(list_4) == 4
-    assert dps_key in list_4.data_per_streamline.keys()
+
+    # Same as above, but with slices. Both dps
+    # should be in the data_per_streamline and
+    # the data should be the same as expected.
+    assert dps_key_1 in list_4.data_per_streamline.keys()
+    assert dps_key_2 in list_4.data_per_streamline.keys()
+    assert np.allclose(
+        list_4.data_per_streamline[dps_key_2],
+        expected_mock_2d_dps[0:4])
 
 
 def _non_lazy_version(hdf5_filename):
diff --git a/scripts_python/tests/test_create_hdf5_dataset.py b/scripts_python/tests/test_create_hdf5_dataset.py
index 36196e5d..661d9cf0 100644
--- a/scripts_python/tests/test_create_hdf5_dataset.py
+++ b/scripts_python/tests/test_create_hdf5_dataset.py
@@ -9,92 +9,40 @@
 data_dir = fetch_testing_data()
 tmp_dir = tempfile.TemporaryDirectory()
 
+# Note. Our test config file is:
+# {
+#     "input": {
+#          "type": "volume",
+#          "files": ["anat/t1.nii.gz", "dwi/fa.nii.gz"],
+#          "standardization": "per_file",
+#          "std_mask": ["masks/wm.nii.gz"]
+#     },
+#     "wm_mask": {
+#          "type": "volume",
+#          "files": ["masks/wm.nii.gz"],
+#          "standardization": "none"
+#     },
+#     "streamlines": {
+#          "type": "streamlines",
+#          "files": ["example_bundle/Fornix.trk"],
+#          "dps_keys": ['mean_color_dps', 'mock_2d_dps']
+#     }
+# }
 
 def test_help_option(script_runner):
     ret = script_runner.run('dwiml_create_hdf5_dataset.py', '--help')
     assert ret.success
 
 
-def _tmp_add_dps(script_runner):
-    import json
-
-    # Fake-adding dps to our tractogram.
-    # ToDo Add dps in the data on the google drive. Currently, I need to do
-    #  this ugly trick.
-    dwi_ml_folder = os.path.join(data_dir, 'dwi_ml_ready')
-    subj_folder = os.path.join(dwi_ml_folder, 'subjX', )
-    in_trk = os.path.join(subj_folder, 'example_bundle', 'Fornix.trk')
-    in_anat = os.path.join(subj_folder, 'anat', 't1.nii.gz')
-    script_runner.run('scil_tractogram_assign_custom_color.py', in_trk,
-                      in_trk, '--from_anatomy', in_anat, '-f')
-    script_runner.run('scil_tractogram_dpp_math.py', 'mean', in_trk, in_trk,
-                      '-f', '--mode', 'dpp', '--in_dpp_name', 'color',
-                      '--out_keys', 'mean_color')
-    ret = script_runner.run(
-        'scil_tractogram_dpp_math.py', 'mean', in_trk, in_trk,
-        '-f', '--mode', 'dps', '--in_dpp_name', 'mean_color',
-        '--out_keys', 'mean_color_dps')
-    assert ret.success
-
-    # toDo. Add DPS to our config file
-    config = {
-        "input": {
-            "type": "volume",
-            "files": ["anat/t1.nii.gz", "dwi/fa.nii.gz"],
-            "standardization": "per_file",
-            "std_mask": ["masks/wm.nii.gz"]
-        },
-        "wm_mask": {
-            "type": "volume",
-            "files": ["masks/wm.nii.gz"],
-            "standardization": "none"
-        },
-        "streamlines": {
-            "type": "streamlines",
-            "files": ["example_bundle/Fornix.trk"],
-            "dps_keys": 'mean_color_dps'
-        }
-    }
-    config_file = os.path.join(data_dir, 'code_creation/config_file.json')
-    os.remove(config_file)
-    with open(config_file, 'w') as json_file:
-        json.dump(config, json_file)
-
-
 def test_execution(script_runner):
     os.chdir(os.path.expanduser(tmp_dir.name))
-
-    _tmp_add_dps(script_runner)
-    # hdf5_output = 'test.hdf5'
-    # Overwriting current hdf5!!
-    hdf5_output = os.path.join(data_dir, 'hdf5_file.hdf5')
-
-    # Note. Our test config file is:
-    # {
-    #     "input": {
-    #          "type": "volume",
-    #          "files": ["anat/t1.nii.gz", "dwi/fa.nii.gz"],
-    #          "standardization": "per_file",
-    #          "std_mask": ["masks/wm.nii.gz"]
-    #     },
-    #     "wm_mask": {
-    #          "type": "volume",
-    #          "files": ["masks/wm.nii.gz"],
-    #          "standardization": "none"
-    #     },
-    #     "streamlines": {
-    #          "type": "streamlines",
-    #          "files": ["example_bundle/Fornix.trk"],
-    #          "dps_keys": 'mean_color_dps'
-    #     }
-    # }
     dwi_ml_folder = os.path.join(data_dir, 'dwi_ml_ready')
     config_file = os.path.join(data_dir, 'code_creation/config_file.json')
     training_subjs = os.path.join(data_dir, 'code_creation/subjs_list.txt')
     validation_subjs = os.path.join(data_dir,
                                     'code_creation/empty_subjs_list.txt')
     testing_subjs = validation_subjs
-
+    hdf5_output = 'test.hdf5'
     ret = script_runner.run('dwiml_create_hdf5_dataset.py', '-f',
                             dwi_ml_folder, hdf5_output, config_file,
                             training_subjs, validation_subjs, testing_subjs)