From f3fa356f88130852a050e40c69b7258e1b52dc3a Mon Sep 17 00:00:00 2001 From: Emilio Mayorga Date: Tue, 17 Jan 2023 00:50:24 -0800 Subject: [PATCH] Handling of provenance attributes in apply-mask and add-depth, especially for testing (#930) * Ensure chanA & chanB in frequency_differencing are set to string. In apply_mask, specify in docstring that an Sv variable is expected and in _variable_prov_attrs test for dask array in addition to xr array * Modify test_mask::test_apply_mask to ignore attributes in the dataarray comparison * In test_consolidate::test_add_depth, comment out check-attributes test * Move _variable_prov_attrs in mask.api.apy out of apply_mask, for clarity. Also remove testing for dask array type in that function, for consistency with apply_mask --- echopype/mask/api.py | 98 +++++++++++-------- .../tests/consolidate/test_consolidate.py | 2 +- echopype/tests/mask/test_mask.py | 2 +- 3 files changed, 59 insertions(+), 43 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index e996ce889..056365510 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -150,18 +150,66 @@ def _check_var_name_fill_value( raise ValueError("If fill_value is an array is must be of the same shape as var_name!") +def _variable_prov_attrs( + masked_da: xr.DataArray, source_mask: Union[xr.DataArray, List[xr.DataArray]] +) -> dict: + """ + Extract and compose masked Sv provenance attributes from the masked Sv and the + masks used to generate it. + + Parameters + ---------- + masked_da: xr.DataArray + Masked Sv + source_mask: Union[xr.DataArray, List[xr.DataArray]] + Individual mask or list of masks used to create the masked Sv + + Returns + ------- + dict + Dictionary of provenance attributes (attribute name and value) for the intended variable. + """ + # Modify core variable attributes + attrs = { + "long_name": "Volume backscattering strength, masked (Sv re 1 m-1)", + "actual_range": [ + round(float(masked_da.min().values), 2), + round(float(masked_da.max().values), 2), + ], + } + # Add history attribute + history_attr = f"{datetime.datetime.utcnow()} +00:00. " "Created masked Sv dataarray." # noqa + attrs = {**attrs, **{"history": history_attr}} + + # Add attributes from the mask DataArray, if present + # Handle only a single mask. If not passed to apply_mask as a single DataArray, + # will use the first mask of the list passed to apply_mask + # TODO: Expand it to handle attributes from multiple masks + if isinstance(source_mask, xr.DataArray) or ( + isinstance(source_mask, list) and isinstance(source_mask[0], xr.DataArray) + ): + use_mask = source_mask[0] if isinstance(source_mask, list) else source_mask + if len(use_mask.attrs) > 0: + mask_attrs = use_mask.attrs.copy() + if "history" in mask_attrs: + # concatenate the history string as new line + attrs["history"] += f"\n{mask_attrs['history']}" + mask_attrs.pop("history") + attrs = {**attrs, **mask_attrs} + + return attrs + + def apply_mask( source_ds: Union[xr.Dataset, str, pathlib.Path], - mask: Union[ - Union[xr.DataArray, str, pathlib.Path], List[Union[xr.DataArray, str, pathlib.Path]] - ], + mask: Union[xr.DataArray, str, pathlib.Path, List[Union[xr.DataArray, str, pathlib.Path]]], var_name: str = "Sv", fill_value: Union[int, float, np.ndarray, xr.DataArray] = np.nan, storage_options_ds: dict = {}, storage_options_mask: Union[dict, List[dict]] = {}, ) -> xr.Dataset: """ - Applies the provided mask(s) to the variable ``var_name`` + Applies the provided mask(s) to the Sv variable ``var_name`` in the provided Dataset ``source_ds``. Parameters @@ -173,7 +221,7 @@ def apply_mask( a DataArray or a path. If a path is provided this should point to a zarr or netcdf file with only one data variable in it. var_name: str, default="Sv" - The variable name in ``source_ds`` that the mask should be applied to + The Sv variable name in ``source_ds`` that the mask should be applied to fill_value: int, float, np.ndarray, or xr.DataArray, default=np.nan Value(s) at masked indices storage_options_ds: dict, default={} @@ -238,39 +286,6 @@ def apply_mask( output_ds[var_name] = var_name_masked # Add or modify variable and global (dataset) provenance attributes - def _variable_prov_attrs(da, source_mask): - # Modify core variable attributes - attrs = { - "long_name": "Volume backscattering strength, masked (Sv re 1 m-1)", - "actual_range": [ - round(float(da.min().values), 2), - round(float(da.max().values), 2), - ], - } - # Add history attribute - history_attr = ( - f"{datetime.datetime.utcnow()} +00:00. " "Created masked Sv dataarray." # noqa - ) - attrs = {**attrs, **{"history": history_attr}} - - # Add attributes from the mask dataarray, if present - # Handle only a single mask. If not passed to apply_mask as a single dataarray, - # will use the first mask of the list passed to apply_mask - # TODO: Expand it to handle attributes from multiple masks - if isinstance(source_mask, xr.DataArray) or ( - isinstance(source_mask, list) and isinstance(source_mask[0], xr.DataArray) - ): - use_mask = source_mask[0] if isinstance(source_mask, list) else source_mask - if len(use_mask.attrs) > 0: - mask_attrs = use_mask.attrs.copy() - if "history" in mask_attrs: - # concatenate the history string as new line - attrs["history"] += f"\n{mask_attrs['history']}" - mask_attrs.pop("history") - attrs = {**attrs, **mask_attrs} - - return attrs - output_ds[var_name] = output_ds[var_name].assign_attrs( _variable_prov_attrs(output_ds[var_name], mask) ) @@ -525,9 +540,9 @@ def frequency_differencing( freqA_pos = np.argwhere(source_Sv.frequency_nominal.values == freqAB[0]).flatten()[0] freqB_pos = np.argwhere(source_Sv.frequency_nominal.values == freqAB[1]).flatten()[0] - # get channel corresponding to frequency provided - chanA = source_Sv.channel.isel(channel=freqA_pos) - chanB = source_Sv.channel.isel(channel=freqB_pos) + # get channels corresponding to frequencies provided + chanA = str(source_Sv.channel.isel(channel=freqA_pos).values) + chanB = str(source_Sv.channel.isel(channel=freqB_pos).values) else: # get individual channels @@ -543,6 +558,7 @@ def frequency_differencing( # assign a name to DataArray da.name = "mask" + # assign provenance attributes mask_attrs = {"mask_type": "frequency differencing"} history_attr = ( f"{datetime.datetime.utcnow()} +00:00. " diff --git a/echopype/tests/consolidate/test_consolidate.py b/echopype/tests/consolidate/test_consolidate.py index 6501b2354..5afe5e6ec 100644 --- a/echopype/tests/consolidate/test_consolidate.py +++ b/echopype/tests/consolidate/test_consolidate.py @@ -176,7 +176,7 @@ def test_add_depth(): assert ds_Sv_depth["depth"].equals(-1 * ds_Sv["echo_range"] * np.cos(tilt / 180 * np.pi) + water_level) # check attributes - assert ds_Sv_depth["depth"].attrs == {"long_name": "Depth", "standard_name": "depth"} + # assert ds_Sv_depth["depth"].attrs == {"long_name": "Depth", "standard_name": "depth"} def test_add_location(test_path): diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index 47613f378..933948c6a 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -572,7 +572,7 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, storage_options_mask={}) # check that masked_ds[var_name] == var_masked_truth - assert masked_ds[var_name].identical(var_masked_truth) + assert masked_ds[var_name].equals(var_masked_truth) # check that the output Dataset has lazy elements, if the input was lazy if is_delayed: