From d1df261665a8de53b23df20ea45ed4d5d8e19727 Mon Sep 17 00:00:00 2001 From: Hang Jung Ling <106228386+HangJung97@users.noreply.github.com> Date: Tue, 20 Feb 2024 09:05:24 +0100 Subject: [PATCH] Add `verbose` parameter to preprocessing classes and config (#154) --- ascent/configs/preprocess_and_plan.yaml | 3 + ascent/configs/preprocessor/dealias_du.yaml | 1 + ascent/configs/preprocessor/regression.yaml | 1 + ascent/configs/preprocessor/segmentation.yaml | 1 + ascent/preprocessing/preprocessing.py | 152 +++++++++++------- 5 files changed, 99 insertions(+), 59 deletions(-) diff --git a/ascent/configs/preprocess_and_plan.yaml b/ascent/configs/preprocess_and_plan.yaml index e1b5bfe..c4a4c62 100644 --- a/ascent/configs/preprocess_and_plan.yaml +++ b/ascent/configs/preprocess_and_plan.yaml @@ -24,6 +24,9 @@ pl2d: True # set true to plan 3D experiment pl3d: True +# set true to print preprocessing message +verbose: False + hydra: output_subdir: null run: diff --git a/ascent/configs/preprocessor/dealias_du.yaml b/ascent/configs/preprocessor/dealias_du.yaml index b65a22b..0df6935 100644 --- a/ascent/configs/preprocessor/dealias_du.yaml +++ b/ascent/configs/preprocessor/dealias_du.yaml @@ -4,3 +4,4 @@ do_resample: ${resample} do_normalize: ${normalize} num_workers: 12 overwrite_existing: ${overwrite} +verbose: ${verbose} diff --git a/ascent/configs/preprocessor/regression.yaml b/ascent/configs/preprocessor/regression.yaml index c6c8751..f713fcf 100644 --- a/ascent/configs/preprocessor/regression.yaml +++ b/ascent/configs/preprocessor/regression.yaml @@ -4,3 +4,4 @@ do_resample: ${resample} do_normalize: ${normalize} num_workers: 12 overwrite_existing: ${overwrite} +verbose: ${verbose} diff --git a/ascent/configs/preprocessor/segmentation.yaml b/ascent/configs/preprocessor/segmentation.yaml index 54c65c2..984325d 100644 --- a/ascent/configs/preprocessor/segmentation.yaml +++ b/ascent/configs/preprocessor/segmentation.yaml @@ -4,3 +4,4 @@ do_resample: ${resample} do_normalize: ${normalize} num_workers: 12 overwrite_existing: ${overwrite} +verbose: ${verbose} diff --git a/ascent/preprocessing/preprocessing.py b/ascent/preprocessing/preprocessing.py index eaf05b1..b1922f4 100644 --- a/ascent/preprocessing/preprocessing.py +++ b/ascent/preprocessing/preprocessing.py @@ -78,6 +78,7 @@ def resample_image( lowres_axis: Optional[np.ndarray] = None, interp_order: int = 3, order_z: int = 0, + verbose: bool = False, ) -> np.ndarray: """Resample an image. @@ -88,6 +89,7 @@ def resample_image( lowres_axis: Axis of lowest resolution. interp_order: Interpolation order of skimage.transform.resize. order_z: Interpolation order for the lowest resolution axis in case of anisotropic image. + verbose: Whether to log the resampling message. Returns: Resampled image. @@ -98,7 +100,8 @@ def resample_image( image = image.astype(float) resized_channels = [] if anisotropy_flag: - print("Anisotropic image, using separate z resampling") + if verbose: + print("Anisotropic image, using separate z resampling") axis = lowres_axis[0] if axis == 0: new_shape_2d = new_shape[1:] @@ -138,7 +141,8 @@ def resample_image( ) resized_channels.append(resized.astype(dtype_data)) else: - print("Not using separate z resampling") + if verbose: + print("Not using separate z resampling") for image_c in image: resized = resize( image_c, @@ -153,7 +157,8 @@ def resample_image( reshaped = np.stack(resized_channels, axis=0) return reshaped.astype(dtype_data) else: - print("No resampling necessary") + if verbose: + print("No resampling necessary") return image @@ -164,6 +169,7 @@ def resample_label( lowres_axis: Optional[np.ndarray] = None, interp_order: int = 1, order_z: int = 0, + verbose: bool = False, ) -> np.ndarray: """Resample a label. @@ -174,6 +180,7 @@ def resample_label( lowres_axis: Axis of lowest resolution. interp_order: Interpolation order of skimage.transform.resize. order_z: Interpolation order for the lowest resolution axis in case of anisotropic label. + verbose: Whether to log the resampling message. Returns: Resampled label. @@ -183,7 +190,8 @@ def resample_label( reshaped = np.zeros(new_shape, dtype=np.uint8) n_class = np.max(label) if anisotropy_flag: - print("Anisotropic label, using separate z resampling") + if verbose: + print("Anisotropic label, using separate z resampling") axis = lowres_axis[0] depth = shape[axis] if axis == 0: @@ -236,7 +244,8 @@ def resample_label( else: reshaped = reshaped_2d.astype(np.uint8) else: - print("Not using separate z resampling") + if verbose: + print("Not using separate z resampling") for class_ in range(1, int(n_class) + 1): mask = label[0] == class_ resized = resize( @@ -253,7 +262,8 @@ def resample_label( reshaped = np.expand_dims(reshaped, 0) return reshaped else: - print("No resampling necessary") + if verbose: + print("No resampling necessary") return label @@ -275,6 +285,7 @@ def __init__( do_normalize: bool = True, num_workers: int = 12, overwrite_existing: bool = False, + verbose: bool = False, ) -> None: """Initialize class instance. @@ -284,6 +295,7 @@ def __init__( do_normalize: Whether to normalize data. num_workers: Number of workers to run the preprocessing. overwrite_existing: Whether to overwrite the preprocessed data if it exists. + verbose: Whether to log the preprocessing message. """ self.dataset_path = os.path.join(dataset_path, "raw") self.cropped_folder = os.path.join(dataset_path, "cropped") @@ -295,6 +307,7 @@ def __init__( self.do_normalize = do_normalize self.num_workers = num_workers self.overwrite_existing = overwrite_existing + self.verbose = verbose self.target_spacing = None self.intensity_properties = OrderedDict() self.all_size_reductions = [] @@ -490,7 +503,8 @@ def _crop( properties["original_spacing"] = np.array(data["image"].meta["pixdim"][1:4].tolist()) box_start, box_end = generate_spatial_bounding_box(data["image"]) properties["crop_bbox"] = np.vstack([box_start, box_end]) - print("\nCropping %s..." % properties["case_identifier"]) + if self.verbose: + print("\nCropping %s..." % properties["case_identifier"]) data = SpatialCropd( keys=["image", "label"], roi_start=box_start, @@ -500,15 +514,16 @@ def _crop( properties["cropping_size_reduction"] = np.prod( properties["shape_after_cropping"] ) / np.prod(properties["original_shape"]) - print( - "before crop:", - tuple([data["image"].shape[0], *properties["original_shape"].tolist()]), - "after crop:", - tuple([data["image"].shape[0], *properties["shape_after_cropping"].tolist()]), - "spacing:", - properties["original_spacing"], - "\n", - ) + if self.verbose: + print( + "before crop:", + tuple([data["image"].shape[0], *properties["original_shape"].tolist()]), + "after crop:", + tuple([data["image"].shape[0], *properties["shape_after_cropping"].tolist()]), + "spacing:", + properties["original_spacing"], + "\n", + ) cropped_filename = os.path.join( self.cropped_folder, "%s.npz" % properties["case_identifier"] @@ -517,7 +532,8 @@ def _crop( self.cropped_folder, "%s.pkl" % properties["case_identifier"] ) all_data = np.vstack([data["image"].array, data["label"].array]) - print("\nSaving to", cropped_filename) + if self.verbose: + print("\nSaving to", cropped_filename) np.savez_compressed(cropped_filename, data=all_data) with open(properties_name, "wb") as f: pickle.dump(properties, f) # nosec B301 @@ -643,10 +659,12 @@ def _determine_whether_to_use_mask_for_norm(self) -> dict[int, bool]: use_nonzero_mask_for_norm[i] = False else: if np.median(self.all_size_reductions) < 3 / 4.0: - print("Using nonzero mask for normalization") + if self.verbose: + print("Using nonzero mask for normalization") use_nonzero_mask_for_norm[i] = True else: - print("Not using nonzero mask for normalization") + if self.verbose: + print("Not using nonzero mask for normalization") use_nonzero_mask_for_norm[i] = False use_nonzero_mask_for_normalization = use_nonzero_mask_for_norm @@ -673,7 +691,8 @@ def _resample_and_normalize(self, case_identifier: str) -> None: """ data, seg, properties = self._load_cropped(case_identifier) if not self.do_resample: - print("\n", "Skip resampling...") + if self.verbose: + print("\n", "Skip resampling...") properties["resampling_flag"] = False properties["shape_after_resampling"] = np.array(data[0].shape) properties["spacing_after_resampling"] = properties["original_spacing"] @@ -703,8 +722,8 @@ def _resample_and_normalize(self, case_identifier: str) -> None: properties["shape_after_cropping"], self.target_spacing, ) - data = resample_image(data, new_shape, anisotropy_flag, axis, 3, 0) - seg = resample_label(seg, new_shape, anisotropy_flag, axis, 1, 0) + data = resample_image(data, new_shape, anisotropy_flag, axis, 3, 0, self.verbose) + seg = resample_label(seg, new_shape, anisotropy_flag, axis, 1, 0, self.verbose) properties["anisotropy_flag"] = anisotropy_flag properties["shape_after_resampling"] = np.array(data[0].shape) properties["spacing_after_resampling"] = np.array(self.target_spacing) @@ -713,11 +732,12 @@ def _resample_and_normalize(self, case_identifier: str) -> None: "spacing": properties["spacing_after_resampling"], "data.shape (data is resampled)": data.shape, } - - print("before:", before, "\nafter: ", after, "\n") + if self.verbose: + print("before:", before, "\nafter: ", after, "\n") if not self.do_normalize: - print("\nSkip normalization...") + if self.verbose: + print("\nSkip normalization...") properties["normalization_flag"] = False else: properties["normalization_flag"] = True @@ -725,13 +745,14 @@ def _resample_and_normalize(self, case_identifier: str) -> None: data, seg = self._normalize(data, seg) all_data = np.vstack((data, seg)).astype(np.float32) - print( - "Saving: ", - os.path.join( - self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier - ), - "\n", - ) + if self.verbose: + print( + "Saving: ", + os.path.join( + self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier + ), + "\n", + ) np.savez_compressed( os.path.join( self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier @@ -763,7 +784,8 @@ def _normalize(self, data: np.ndarray, seg: np.ndarray) -> tuple[np.ndarray, np. """ if not len(self.use_nonzero_mask) == len(data): raise ValueError("use_nonzero_mask flags should have the same length as data") - print("Normalization...") + if self.verbose: + print("Normalization...") for c in range(len(data)): scheme = self.modalities[c] if scheme == "CT": @@ -788,7 +810,8 @@ def _normalize(self, data: np.ndarray, seg: np.ndarray) -> tuple[np.ndarray, np. data[c][mask].std() + 1e-8 ) data[c][mask == 0] = 0 - print("Normalization done") + if self.verbose: + print("Normalization done") return data, seg def _get_all_shapes_after_resampling( @@ -992,7 +1015,8 @@ def _crop( properties["original_spacing"] = np.array(data["image"].meta["pixdim"][1:4].tolist()) properties["crop_bbox"] = [] - print("\nSkip cropping %s..." % properties["case_identifier"]) + if self.verbose: + print("\nSkip cropping %s..." % properties["case_identifier"]) properties["shape_after_cropping"] = properties["original_shape"] properties["cropping_size_reduction"] = 1 @@ -1003,7 +1027,8 @@ def _crop( self.cropped_folder, "%s.pkl" % properties["case_identifier"] ) all_data = np.vstack([data["image"].array, data["label"].array]) - print("\nSaving to", cropped_filename) + if self.verbose: + print("\nSaving to", cropped_filename) np.savez_compressed(cropped_filename, data=all_data) with open(properties_name, "wb") as f: pickle.dump(properties, f) # nosec B301 @@ -1063,7 +1088,8 @@ def _resample_and_normalize(self, case_identifier: str) -> None: """ data, seg, properties = self._load_cropped(case_identifier) if not self.do_resample: - print("\n", "Skip resampling...") + if self.verbose: + print("\n", "Skip resampling...") properties["resampling_flag"] = False properties["shape_after_resampling"] = np.array(data[0].shape) properties["spacing_after_resampling"] = properties["original_spacing"] @@ -1093,8 +1119,10 @@ def _resample_and_normalize(self, case_identifier: str) -> None: properties["shape_after_cropping"], self.target_spacing, ) - data = resample_image(data, new_shape, anisotropy_flag, axis, 3, 0) - seg = resample_image(seg.astype(np.float32), new_shape, anisotropy_flag, axis, 3, 0) + data = resample_image(data, new_shape, anisotropy_flag, axis, 3, 0, self.verbose) + seg = resample_image( + seg.astype(np.float32), new_shape, anisotropy_flag, axis, 3, 0, self.verbose + ) properties["anisotropy_flag"] = anisotropy_flag properties["shape_after_resampling"] = np.array(data[0].shape) @@ -1104,11 +1132,12 @@ def _resample_and_normalize(self, case_identifier: str) -> None: "spacing": properties["spacing_after_resampling"], "data.shape (data is resampled)": data.shape, } - - print("before:", before, "\nafter: ", after, "\n") + if self.verbose: + print("before:", before, "\nafter: ", after, "\n") if not self.do_normalize: - print("\nSkip normalization...") + if self.verbose: + print("\nSkip normalization...") properties["normalization_flag"] = False else: properties["normalization_flag"] = True @@ -1116,13 +1145,14 @@ def _resample_and_normalize(self, case_identifier: str) -> None: data, seg = self._normalize(data, seg) all_data = np.vstack((data, seg.astype(np.float32))).astype(np.float32) - print( - "Saving: ", - os.path.join( - self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier - ), - "\n", - ) + if self.verbose: + print( + "Saving: ", + os.path.join( + self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier + ), + "\n", + ) np.savez_compressed( os.path.join( self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier @@ -1257,7 +1287,8 @@ def _resample_and_normalize(self, case_identifier: str) -> None: # contains the segmentation of aliased pixels data, seg, properties = self._load_cropped(case_identifier) if not self.do_resample: - print("\n", "Skip resampling...") + if self.verbose: + print("\n", "Skip resampling...") properties["resampling_flag"] = False properties["shape_after_resampling"] = np.array(data[0].shape) properties["spacing_after_resampling"] = properties["original_spacing"] @@ -1304,10 +1335,12 @@ def _resample_and_normalize(self, case_identifier: str) -> None: "data.shape (data is resampled)": data.shape, } - print("before:", before, "\nafter: ", after, "\n") + if self.verbose: + print("before:", before, "\nafter: ", after, "\n") if not self.do_normalize: - print("\nSkip normalization...") + if self.verbose: + print("\nSkip normalization...") properties["normalization_flag"] = False else: properties["normalization_flag"] = True @@ -1315,13 +1348,14 @@ def _resample_and_normalize(self, case_identifier: str) -> None: data, seg = self._normalize(data, seg) all_data = np.vstack((data, seg)).astype(np.float32) - print( - "Saving: ", - os.path.join( - self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier - ), - "\n", - ) + if self.verbose: + print( + "Saving: ", + os.path.join( + self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier + ), + "\n", + ) np.savez_compressed( os.path.join( self.preprocessed_folder, "data_and_properties", "%s.npz" % case_identifier