diff --git a/echoregions/core.py b/echoregions/core.py index c1e5d44a..ea34c0d0 100644 --- a/echoregions/core.py +++ b/echoregions/core.py @@ -11,9 +11,9 @@ def read_evr( ---------- filepath : str, Path object A valid path to an EVR file - min_depth : float, default ``None`` + min_depth : float, default 0 Depth value in meters to set -9999.99 depth edges to. - max_depth : float, default ``None`` + max_depth : float, default 1000 Depth value in meters to set 9999.99 depth edges to. Returns diff --git a/echoregions/regions2d/regions2d.py b/echoregions/regions2d/regions2d.py index 9fd025a7..5f0baf96 100644 --- a/echoregions/regions2d/regions2d.py +++ b/echoregions/regions2d/regions2d.py @@ -1,6 +1,6 @@ import warnings from pathlib import Path -from typing import Iterable, List, Union +from typing import Iterable, List, Optional, Union import matplotlib import matplotlib.pyplot as plt @@ -9,8 +9,9 @@ import regionmask import xarray as xr from pandas import DataFrame, Series, Timestamp, isna -from xarray import DataArray +from xarray import DataArray, Dataset +from ..utils.api import convert_mask_3d_to_2d from ..utils.io import validate_path from ..utils.time import parse_simrad_fname_time from .regions2d_parser import parse_regions_file @@ -31,8 +32,8 @@ def __init__( self.data = parse_regions_file(input_file) self.output_file = [] - self.max_depth = max_depth self.min_depth = min_depth + self.max_depth = max_depth def __iter__(self) -> Iterable: return self.data.iterrows() @@ -79,10 +80,10 @@ def select_region( ---------- region_id : float, int, str, list, ``None`` A region id provided as a number, a string, or list of these. - time_range: List of 2 Pandas Timestamps. + time_range : List of 2 Pandas Timestamps. Datetime range for expected output of subselected DataFrame. 1st index value must be later than 0th index value. - depth_range: List of 2 floats. + depth_range : List of 2 floats. Depth range for expected output of subselected DataFrame. 1st index value must be larger than 0th index value. copy : bool @@ -201,7 +202,7 @@ def close_region(self, region_id: List = None) -> DataFrame: Parameters ---------- - region : float, str, list, Series, DataFrame, ``None`` + region_id : List, ``None`` region(s) to select raw files with If ``None``, select all regions. Defaults to ``None`` @@ -322,8 +323,8 @@ def swap_val(val: Union[int, float]) -> Union[int, float]: def plot( self, - region: Union[str, List, DataFrame] = None, - close_region: bool = False, + region_id: List = None, + close_regions: bool = False, **kwargs, ) -> None: """Plot a region from data. @@ -331,7 +332,7 @@ def plot( Parameters --------- - region : float, str, list, Series, DataFrame, ``None`` + region_id : List, ``None`` Region(s) to select raw files with If ``None``, select all regions. Defaults to ``None`` close_region : bool @@ -341,9 +342,9 @@ def plot( """ # Ensure that region is a DataFrame - region = self.select_region(region) + region = self.select_region(region_id) - if close_region: + if close_regions: region = self.close_region(region) for _, row in region.iterrows(): plt.plot(row["time"], row["depth"], **kwargs) @@ -351,36 +352,54 @@ def plot( def mask( self, da_Sv: DataArray, - region_ids: List, - mask_var: str = None, - mask_labels=None, - ) -> DataArray: - """Mask data from Data Array containing Sv data based off of a Regions2D object + region_id: List = None, + mask_name: str = "mask", + mask_labels: dict = None, + collapse_to_2d: bool = False, + ) -> Optional[Dataset]: + """ + Mask data from Data Array containing Sv data based off of a Regions2D object and its regions ids. Parameters ---------- da_Sv : Data Array DataArray of shape (ping_time, depth) containing Sv data. - region_ids : list + region_id : list list IDs of regions to create mask for - mask_var : str + mask_name : str If provided, used to name the output mask array, otherwise `mask` - mask_labels: - None: assigns labels automatically 0,1,2,... - "from_ids": uses the region ids - list: uses a list of integers as labels + mask_labels : dict + If provided, used to label the region_id dimension of the output mask. + collapse_to_2d : bool + If true, then converts 3d mask to 2d mask. If not, keeps output as 3d mask. Returns ------- - A DataArray with the data_var masked by the specified region. + mask_ds : Dataset + Either a 3D mask or a 2D mask based on the conditions below. + If collapse_to_2d is False: + A 3D mask where each layer of the mask will contain a 1s/0s mask for each + unique label in the 2D mask. The layers will be labeled via region_id + values. The slices of the 3D array will be in the form of 1s/0s: masked areas, + and non-masked areas. + If collapse_to_2d is True: + A 2D mask where each individual data points will be in the form of integers, + demarking region_id of masked regions, and nan values, demarking non-masked + areas. + Also contains a data variable (`mask_labels`) with mask labels + corresponding to region_id values. + """ - if isinstance(region_ids, list): - if len(region_ids) == 0: - raise ValueError("region_ids is empty. Cannot be empty.") + if isinstance(region_id, list): + if len(region_id) == 0: + raise ValueError("region_id is empty. Cannot be empty.") + elif region_id is None: + # Extract all region_id values + region_id = self.data.region_id.astype(int).to_list() else: raise TypeError( - f"region_ids must be of type list. Currently is of type {type(region_ids)}" + f"region_id must be of type list. Currently is of type {type(region_id)}" ) if mask_labels is None: @@ -395,75 +414,103 @@ def mask( "mask_labels as None." ) - # Replace nan depth in regions2d. - self.replace_nan_depth(inplace=True) - # Dataframe containing region information. - region_df = self.select_region(region_ids) + region_df = self.select_region(region_id) - # Select only columns which are important. + # Select only important columns region_df = region_df[["region_id", "time", "depth"]] - # Organize the regions in a format for region mask. - df = region_df.explode(["time", "depth"]) + # Filter for rows with depth values within self min and self max depth and + # for rows that have positive depth. + region_df = region_df[ + region_df["depth"].apply( + lambda x: all( + (i >= np.max(0, int(self.min_depth)) and i <= int(self.max_depth)) + for i in x + ) + ) + ] - # Convert region time to integer timestamp. - df["time"] = matplotlib.dates.date2num(df["time"]) + if region_df.empty: + print( + "All rows in Regions DataFrame have NaN Depth values after filtering depth " + "between min_depth and max_depth." + ) + else: + # Organize the regions in a format for region mask. + df = region_df.explode(["time", "depth"]) - # Create a list of dataframes for each regions. - grouped = list(df.groupby("region_id")) + # Convert region time to integer timestamp. + df["time"] = matplotlib.dates.date2num(df["time"]) - # Convert to list of numpy arrays which is an acceptable format to create region mask. - regions_np = [np.array(region[["time", "depth"]]) for id, region in grouped] + # Create a list of dataframes for each regions. + grouped = list(df.groupby("region_id")) - # Corresponding region ids converted to int. - region_ids = [int(id) for id, region in grouped] + # Convert to list of numpy arrays which is an acceptable format to create region mask. + regions_np = [np.array(region[["time", "depth"]]) for _, region in grouped] - # Convert ping_time to unix_time since the masking does not work on datetime objects. - da_Sv = da_Sv.assign_coords( - unix_time=( - "ping_time", - matplotlib.dates.date2num(da_Sv.coords["ping_time"].values), - ) - ) + # Convert corresponding region_id to int. + filtered_region_id = [int(id) for id, _ in grouped] - # Set up mask labels. - if mask_labels == "from_ids": - r = regionmask.Regions(outlines=regions_np, numbers=region_ids) - elif isinstance(mask_labels, list) or mask_labels is None: - r = regionmask.Regions(outlines=regions_np) - else: - raise ValueError("mask_labels must be None, 'from_ids', or a list.") + # Convert ping_time to unix_time since the masking does not work on datetime objects. + da_Sv = da_Sv.assign_coords( + unix_time=( + "ping_time", + matplotlib.dates.date2num(da_Sv.coords["ping_time"].values), + ) + ) - # Create mask - try: - M = r.mask( + # Create mask + r = regionmask.Regions( + outlines=regions_np, names=filtered_region_id, name=mask_name + ) + mask_da = r.mask_3D( da_Sv["unix_time"], da_Sv["depth"], wrap_lon=False, + ).astype( + int + ) # This maps False to 0 and True to 1 + + # Replace region coords with region_id coords + mask_da = mask_da.rename({"names": "region_id"}) + mask_da = mask_da.swap_dims({"region": "region_id"}) + + # Remove all coords other than depth, ping_time, region_id + mask_da = mask_da.drop_vars( + mask_da.coords._names.difference({"depth", "ping_time", "region_id"}) ) - except ValueError as ve: - warnings.warn( - "Most likely using deprecated regionmask version." - "Make sure to use regionmask==0.8.0 or more recent versions.", - DeprecationWarning, - stacklevel=2, + + # Transpose mask_da + mask_da = mask_da.transpose("region_id", "depth", "ping_time") + + # Remove attribute standard_name + mask_da.attrs.pop("standard_name") + + # Rename Data Array to mask_name + mask_da = mask_da.rename(mask_name) + + # Set mask_labels_da + masked_region_id = mask_da.region_id.values.tolist() + subset_mask_labels = [ + mask_labels[key] for key in masked_region_id if key in mask_labels + ] + mask_labels_da = xr.DataArray( + subset_mask_labels, + dims="region_id", + coords={"region_id": masked_region_id}, ) - raise ve - if isinstance(mask_labels, list): - # Convert default labels to mask_labels. - S = xr.where(~M.isnull(), 0, M) - S = M - for idx, label in enumerate(mask_labels): - S = xr.where(M == idx, label, S) - M = S + # Create dataset + mask_ds = xr.Dataset() + mask_ds["mask_labels"] = mask_labels_da + mask_ds["mask_3d"] = mask_da - # Assign specific name to mask array, otherwise 'mask'. - if mask_var: - M = M.rename(mask_var) + if collapse_to_2d: + # Convert 3d mask to 2d mask + mask_ds = convert_mask_3d_to_2d(mask_ds) - return M + return mask_ds def transect_mask( self, @@ -493,6 +540,7 @@ def transect_mask( bbox_distance_threshold: float The maximum value for how far apart the left and right bounding box for each transect value region. Default is set to 1 minute. + Returns ------- M : Data Array diff --git a/echoregions/test_data/transect.evr b/echoregions/test_data/transect.evr index 1d00e69a..ec1ef659 100644 --- a/echoregions/test_data/transect.evr +++ b/echoregions/test_data/transect.evr @@ -135,11 +135,11 @@ Log 20190702 2024556635 -9999.9900000000 20190702 2024556635 9999.9900000000 20190702 2024570135 9999.9900000000 20190702 2024570135 -9999.9900000000 2 COM -13 4 18 0 4 -1 1 20190702 1811518097 9.2447583998 20190702 1952215310 758.9732173069 +13 4 18 0 4 -1 1 20190702 1840518097 9.2447583998 20190702 1952215310 758.9732173069 0 0 Trawl -20190702 1811518097 9.2447583998 20190702 1811518097 758.9732173069 20190702 1952215310 758.9732173069 20190702 1952215310 9.2447583998 0 +20190702 1840518097 9.2447583998 20190702 1840518097 700 20190702 1952215310 700 20190702 1952215310 9.2447583998 0 AWT20 13 4 19 0 2 -1 1 20190702 2110200695 -10011.7976284000 20190702 2110214100 10011.2963773964 diff --git a/echoregions/test_data/transect_multi_mask.evr b/echoregions/test_data/transect_multi_mask.evr new file mode 100644 index 00000000..73d5d34d --- /dev/null +++ b/echoregions/test_data/transect_multi_mask.evr @@ -0,0 +1,151 @@ +EVRG 7 13.0.378.44817 +19 + +13 4 1 0 6 -1 1 20190702 0350546295 -9999.99 20190702 0810094255 9999.99 +1 +Switched from recording to "night" folder to "Leg 2" +0 +Log +20190702 0350546295 -9999.9900000000 20190702 0350546295 9999.9900000000 20190702 0810094255 9999.9900000000 20190702 0810094255 -9999.9900000000 2 +COM + +13 4 2 0 6 -1 1 20190702 1232301755 -9999.9900000000 20190702 1232317405 9999.9900000000 +1 +Recording 10 min of passive data +0 +Log +20190702 1232301755 -9999.9900000000 20190702 1232301755 9999.9900000000 20190702 1232317405 9999.9900000000 20190702 1232317405 -9999.9900000000 2 +Com + +13 4 3 0 6 -1 1 20190702 1243062730 -9999.9900000000 20190702 1243107585 9999.9900000000 +1 +End passive data collection and back to active +0 +Log +20190702 1243062730 -9999.9900000000 20190702 1243062730 9999.9900000000 20190702 1243107585 9999.9900000000 20190702 1243107585 -9999.9900000000 2 +COM + +13 4 4 0 6 -1 1 20190702 1254599315 -9999.9900000000 20190702 1255013030 9999.9900000000 +1 +Sunrise! +0 +Log +20190702 1254599315 -9999.9900000000 20190702 1254599315 9999.9900000000 20190702 1255013030 9999.9900000000 20190702 1255013030 -9999.9900000000 2 +Sunrise + +13 4 5 0 6 -1 1 20190702 1314004880 -9999.9900000000 20190702 1314018880 9999.9900000000 +1 +Starting transect x22 at 22.0, going ~5 kts to allow a barge to pass +0 +Log +20190702 1314004880 -9999.9900000000 20190702 1314004880 9999.9900000000 20190702 1314018880 9999.9900000000 20190702 1314018880 -9999.9900000000 2 +ST22 + +13 4 6 0 6 -1 1 20190702 1325002150 -9999.9900000000 20190702 1325015755 9999.9900000000 +1 +Resuming transect speed of 10 kts +0 +Log +20190702 1325002150 -9999.9900000000 20190702 1325002150 9999.9900000000 20190702 1325015755 9999.9900000000 20190702 1325015755 -9999.9900000000 2 +COM + +13 4 7 0 4 -1 1 20190702 0335160046 9.2447583998 20190702 1232310933 758.9732173069 +0 +0 +Off-transect +20190702 0335160046 -9999.9900000000 20190702 0335160046 758.9732173069 20190702 1232310933 758.9732173069 20190702 1232310933 9.2447583998 0 +Region 7 + +13 4 8 0 4 -1 1 20190702 1232308896 9.2447583998 20190702 1243088885 758.9732173069 +0 +0 +Passive +20190702 1232308896 -9999.9900000000 20190702 1232308896 -9999.9900000000 20190702 1243088885 758.9732173069 20190702 1243088885 9.2447583998 0 +Passive + +13 4 9 0 4 -1 1 20190702 1243084826 9.2447583998 20190702 1314011892 758.9732173069 +0 +0 +Off-transect +20190702 1243084826 10 20190702 1243084826 758.9732173069 20190702 1314011892 758.9732173069 20190702 1314011892 9.2447583998 0 +Region 9 + +13 4 10 0 6 -1 1 20190702 1718314985 -9999.9900000000 20190702 1718328500 9999.9900000000 +2 +Mark for fishing spot AWT20 +37 53.79N, 123 26.15W, TD 255m +0 +Log +20190702 1718314985 -9999.9900000000 20190702 1718314985 9999.9900000000 20190702 1718328500 9999.9900000000 20190702 1718328500 -9999.9900000000 2 +Region 10 + +13 4 11 0 6 -1 1 20190702 1811511900 -9999.9900000000 20190702 1811525400 9999.9900000000 +1 +NIW AWT20 +0 +Log +20190702 1811511900 -9999.9900000000 20190702 1811511900 9999.9900000000 20190702 1811525400 9999.9900000000 20190702 1811525400 -9999.9900000000 2 +NIW20 + +13 4 12 0 6 -1 1 20190702 1832074080 -9999.9900000000 20190702 1832087680 9999.9900000000 +1 +Shoot the doors AWT20 +0 +Log +20190702 1832074080 -9999.9900000000 20190702 1832074080 9999.9900000000 20190702 1832087680 9999.9900000000 20190702 1832087680 -9999.9900000000 2 +SD20 + +13 4 13 0 6 -1 1 20190702 1851540680 -9999.9900000000 20190702 1851554125 9999.9900000000 +1 +Target depth AWT20 +0 +Log +20190702 1957215310 100 20190702 1957215310 200 20190702 1959215310 100 20190702 1959215310 200 2 +TD20 + +13 4 14 0 6 -1 1 20190702 1912051415 -9999.9900000000 20190702 1912064980 9999.9900000000 +1 +Haul back AWT20 +0 +Log +20190702 1912051415 -9999.9900000000 20190702 1912051415 9999.9900000000 20190702 1912064980 9999.9900000000 20190702 1912064980 -9999.9900000000 2 +HB20 + +13 4 15 0 6 -1 1 20190702 1925470975 -9999.9900000000 20190702 1925484485 9999.9900000000 +1 +Doors up AWT20 +0 +Log +20190702 1925470975 -9999.9900000000 20190702 1925470975 9999.9900000000 20190702 1925484485 9999.9900000000 20190702 1925484485 -9999.9900000000 2 +DU20 + +13 4 16 0 6 -1 1 20190702 1952206990 -9999.9900000000 20190702 1952220545 9999.9900000000 +1 +Net on deck, awt20 +0 +Log +20190702 1952206990 -9999.9900000000 20190702 1952206990 9999.9900000000 20190702 1952220545 9999.9900000000 20190702 1952220545 -9999.9900000000 2 +NOD21 + +13 4 17 0 6 -1 1 20190702 2024556635 -9999.9900000000 20190702 2024570135 9999.9900000000 +1 +Dummy +0 +Log +20190702 2024556635 -9999.9900000000 20190702 2024556635 9999.9900000000 20190702 2024570135 9999.9900000000 20190702 2024570135 -9999.9900000000 2 +COM + +13 4 18 0 4 -1 1 20190702 1840518097 9.2447583998 20190702 1952215310 758.9732173069 +0 +0 +Trawl +20190702 1840518097 9.2447583998 20190702 1840518097 700 20190702 1952215310 700 20190702 1952215310 9.2447583998 0 +AWT20 + +13 4 19 0 2 -1 1 20190702 2110200695 -10011.7976284000 20190702 2110214100 10011.2963773964 +1 +End transect 22 +0 +Log +20190702 2110200695 -9999.9900000000 20190702 2110200695 9999.9900000000 20190702 2110214100 10011.2963773964 20190702 2110214100 -10011.7976284000 2 +ET22 diff --git a/echoregions/tests/test_lines.py b/echoregions/tests/test_lines.py index c11736bc..9de7454d 100644 --- a/echoregions/tests/test_lines.py +++ b/echoregions/tests/test_lines.py @@ -8,8 +8,7 @@ from xarray import DataArray import echoregions as er - -from ..lines.lines import Lines +from echoregions.lines.lines import Lines DATA_DIR = Path("./echoregions/test_data/") EVL_PATH = DATA_DIR / "transect.evl" diff --git a/echoregions/tests/test_regions2d.py b/echoregions/tests/test_regions2d.py index ee0e776a..946f2698 100644 --- a/echoregions/tests/test_regions2d.py +++ b/echoregions/tests/test_regions2d.py @@ -8,11 +8,10 @@ from xarray import DataArray, Dataset import echoregions as er - -from ..regions2d.regions2d import Regions2D +from echoregions.regions2d.regions2d import Regions2D DATA_DIR = Path("./echoregions/test_data/") -EVR_PATH = DATA_DIR / "transect.evr" +EVR_PATH = DATA_DIR / "transect_multi_mask.evr" ZARR_PATH = DATA_DIR / "transect.zarr" @@ -127,7 +126,7 @@ def test_regions2d_parsing(regions2d_fixture: Regions2D) -> None: assert df_r2d.shape == (19, 22) # Check individual values of specific row - assert df_r2d.loc[4]["file_name"] == "transect.evr" + assert df_r2d.loc[4]["file_name"] == "transect_multi_mask.evr" assert df_r2d.loc[4]["file_type"] == "EVRG" assert df_r2d.loc[4]["evr_file_format_number"] == "7" assert df_r2d.loc[4]["echoview_version"] == "13.0.378.44817" @@ -412,33 +411,35 @@ def test_select_region_errors(regions2d_fixture: Regions2D) -> None: ) -@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region") @pytest.mark.regions2d def test_select_type_error(regions2d_fixture: Regions2D) -> None: """ - Test if mask is empty when there is no overlap. + Test for select region errors. Parameters ---------- regions2d_fixture : Regions2D Object containing data of test EVR file. - da_Sv_fixture : DataArray - DataArray containing Sv data of test zarr file. """ - # Create empty mask - M = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8]) - - # Check that all values are null - assert M.isnull().all() + # Check empty dataset error + with pytest.raises(TypeError): + empty_dataset = Dataset() + _ = regions2d_fixture.select_region(empty_dataset) + # Check empty tuple error + with pytest.raises(TypeError): + empty_tuple = () + _ = regions2d_fixture.select_region(empty_tuple) +@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region.") @pytest.mark.regions2d def test_mask_empty_no_overlap( regions2d_fixture: Regions2D, da_Sv_fixture: DataArray ) -> None: """ - Test if the generated id labels are as expected + Test if mask is empty when a region's depth values are invalid + and also test mask is empty when there is no overlap. Parameters ---------- @@ -448,18 +449,15 @@ def test_mask_empty_no_overlap( DataArray containing Sv data of test zarr file. """ - # Extract Region IDs and convert to Python float values - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() + # Attempt to create mask on region with invalid depth values + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8]) # Check that all values are null assert mask_3d_ds is None - # Check that the mask's values matches only 13th and 18th region and there exists a nan value - # and that there exists a point of no overlap (nan value) - values = list(np.unique(M)) - assert values[0] == 13.0 - assert values[1] == 18.0 - assert np.isnan(values[2]) + # Create mask with regions that have no overlap with the Sv Data Array + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8, 9, 10]) + mask_3d_ds.mask_3d.isnull().all() @pytest.mark.regions2d @@ -467,28 +465,30 @@ def test_mask_type_error( regions2d_fixture: Regions2D, da_Sv_fixture: DataArray ) -> None: """ - Test for select region errors. + Tests mask error functionality for regions. Parameters ---------- regions2d_fixture : Regions2D Object containing data of test EVR file. + da_Sv_fixture : DataArray + DataArray containing Sv data of test zarr file. """ - # Check empty dataset error - with pytest.raises(TypeError): - empty_dataset = Dataset() - _ = regions2d_fixture.select_region(empty_dataset) # Check empty tuple error with pytest.raises(TypeError): empty_tuple = () - _ = regions2d_fixture.select_region(empty_tuple) + _ = regions2d_fixture.mask(da_Sv_fixture, empty_tuple) + # Check empty list error + with pytest.raises(ValueError): + empty_list = [] + _ = regions2d_fixture.mask(da_Sv_fixture, empty_list) @pytest.mark.regions2d def test_mask_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Tests mask error functionality for regions. + Testing if 2d mask with collapse_to_do=True works. Parameters ---------- @@ -497,15 +497,29 @@ def test_mask_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None da_Sv_fixture : DataArray DataArray containing Sv data of test zarr file. """ + # Get region_id and mask_labels + region_id = regions2d_fixture.data.region_id.astype(int).to_list() + mask_labels = {key: idx for idx, key in enumerate(region_id)} + mask_labels[13] = "Mask1" - # Check empty tuple error - with pytest.raises(TypeError): - empty_tuple = () - _ = regions2d_fixture.mask(da_Sv_fixture, empty_tuple) - # Check empty list error - with pytest.raises(ValueError): - empty_list = [] - _ = regions2d_fixture.mask(da_Sv_fixture, empty_list) + # Create mask + mask_2d_ds = regions2d_fixture.mask( + da_Sv_fixture, mask_labels=mask_labels, collapse_to_2d=True + ) + + # Check mask_2d values and counts + mask_2d_values = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[0] + mask_2d_counts = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[1] + assert mask_2d_values[0] == 13 + assert mask_2d_values[1] == 18 + assert np.isnan(mask_2d_values[2]) + assert mask_2d_counts[0] == 6328 + assert mask_2d_counts[1] == 3127410 + assert mask_2d_counts[2] == 3514617 + + # Check mask_labels values + assert (np.unique(mask_2d_ds.mask_labels.data) == ["17", "Mask1"]).all() + assert (mask_2d_ds.mask_labels.region_id.values == [13, 18]).all() @pytest.mark.regions2d @@ -513,7 +527,7 @@ def test_mask_3d_2d_3d_2d( regions2d_fixture: Regions2D, da_Sv_fixture: DataArray ) -> None: """ - Testing if converting 2d-3d-2d-3d masks works. + Testing if converting 3d-2d-3d-2d masks works. Parameters ---------- @@ -522,34 +536,65 @@ def test_mask_3d_2d_3d_2d( da_Sv_fixture : DataArray DataArray containing Sv data of test zarr file. """ - - # Extract region_ids - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() + # Get region_id and mask_labels + region_id = regions2d_fixture.data.region_id.astype(int).to_list() + mask_labels = {key: idx for idx, key in enumerate(region_id)} + mask_labels[13] = "Mask1" # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, region_ids, mask_labels=region_ids) + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, mask_labels=mask_labels) - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert mask_3d_ds.mask_3d.shape == (2, 3955, 1681) - assert list(mask_3d_ds.mask_dictionary) == [13.0, 18.0] + # Check mask values + assert (mask_3d_ds.mask_3d.region_id.values == [13, 18]).all() + assert ( + np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][0] + == 6642027 + ) + assert ( + np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][1] + == 6328 + ) + assert ( + np.unique(mask_3d_ds.mask_3d.isel(region_id=1).data, return_counts=True)[1][0] + == 3520945 + ) + assert ( + np.unique(mask_3d_ds.mask_3d.isel(region_id=1).data, return_counts=True)[1][1] + == 3127410 + ) - # Test values from converted 2D array (previously 3D array) - mask_2d_da = er.convert_mask_3d_to_2d(mask_3d_ds) - assert mask_2d_da.equals(M) + # Check mask_labels values + assert (np.unique(mask_3d_ds.mask_labels.data) == ["17", "Mask1"]).all() + assert (mask_3d_ds.mask_labels.region_id.values == [13, 18]).all() - # Test values from 3D array (previously 2D array) - second_mask_3d_ds = er.convert_mask_2d_to_3d(mask_2d_da) - assert second_mask_3d_ds.equals(mask_3d_ds) + # Convert 2d mask to 3d mask + mask_2d_ds = er.convert_mask_3d_to_2d(mask_3d_ds) + + # Check mask_2d values and counts + mask_2d_values = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[0] + mask_2d_counts = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[1] + assert mask_2d_values[0] == 13 + assert mask_2d_values[1] == 18 + assert np.isnan(mask_2d_values[2]) + assert mask_2d_counts[0] == 6328 + assert mask_2d_counts[1] == 3127410 + assert mask_2d_counts[2] == 3514617 + + # Convert 2d mask to 3d mask and test for equality with previous 3d mask + mask_3d_ds_2nd = er.convert_mask_2d_to_3d(mask_2d_ds) + mask_3d_ds_2nd.equals(mask_3d_ds) + + # Convert 2nd 3d mask to 2d mask and test for equality with previous 2d mask + mask_2d_2nd = er.convert_mask_3d_to_2d(mask_3d_ds_2nd) + mask_2d_2nd.equals(mask_2d_ds) -@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region") @pytest.mark.regions2d def test_one_label_mask_3d_2d_3d_2d( regions2d_fixture: Regions2D, da_Sv_fixture: DataArray ) -> None: """ - Testing if converting 2d-3d-2d-3d masks works for nan mask. + Testing if converting 3d-2d-3d-2d masks works for 1 label mask. Parameters ---------- @@ -559,33 +604,54 @@ def test_one_label_mask_3d_2d_3d_2d( DataArray containing Sv data of test zarr file. """ - # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, [5.0]) + # Create 3d mask + mask_3d_ds = regions2d_fixture.mask( + da_Sv_fixture, region_id=[18], mask_labels={18: "Mask1"} + ) - # Check if mask is null/empty - assert isinstance(M, DataArray) - assert M.isnull().all() + # Check mask values + assert (mask_3d_ds.mask_3d.region_id.values == [18]).all() + assert ( + np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][0] + == 3520945 + ) + assert ( + np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][1] + == 3127410 + ) - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert np.unique(mask_3d_ds.mask_3d.data)[0] == 0 + # Check mask_labels values + assert (np.unique(mask_3d_ds.mask_labels.data) == ["Mask1"]).all() + assert (mask_3d_ds.mask_labels.region_id.values == [18]).all() - # Test values from converted 2D array (previously 3D array) - mask_2d_da = er.convert_mask_3d_to_2d(mask_3d_ds) - assert mask_2d_da.equals(M) - assert mask_2d_da.isnull().all() + # Convert 2d mask to 3d mask + mask_2d_ds = er.convert_mask_3d_to_2d(mask_3d_ds) - # Test values from 3D array (previously 2D array) - second_mask_3d_ds = er.convert_mask_2d_to_3d(mask_2d_da) - assert second_mask_3d_ds.equals(mask_3d_ds) + # Check mask_2d values and counts + mask_2d_values = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[0] + mask_2d_counts = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[1] + assert mask_2d_values[0] == 18 + assert np.isnan(mask_2d_values[1]) + assert mask_2d_counts[0] == 3127410 + assert mask_2d_counts[1] == 3520945 + # Convert 2d mask to 3d mask and test for equality with previous 3d mask + mask_3d_ds_2nd = er.convert_mask_2d_to_3d(mask_2d_ds) + mask_3d_ds_2nd.equals(mask_3d_ds) + # Convert 2nd 3d mask to 2d mask and test for equality with previous 2d mask + mask_2d_ds_2nd = er.convert_mask_3d_to_2d(mask_3d_ds_2nd) + mask_2d_ds_2nd.equals(mask_2d_ds) + + +@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region") +@pytest.mark.filterwarnings("ignore:Returning No Mask") @pytest.mark.regions2d def test_nan_mask_3d_2d_and_2d_3d( regions2d_fixture: Regions2D, da_Sv_fixture: DataArray ) -> None: """ - Testing if converting 2d-3d-2d-3d masks works for 1 label mask. + Testing if both converting functions returns none for empty mask inputs. Parameters ---------- @@ -595,32 +661,30 @@ def test_nan_mask_3d_2d_and_2d_3d( DataArray containing Sv data of test zarr file. """ - # Extract region_ids - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() - - # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, region_ids, mask_labels=region_ids) + # Create 3d mask + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, [8, 9, 10]) - # Remove 18.0 from mask - M = xr.where(M == 18.0, np.nan, M) - - # Test values of 2D Mask - M_values = M.values - assert set(np.unique(M_values[~np.isnan(M_values)])) == {13.0} - assert M.shape == (3955, 1681) - - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert list(mask_3d_ds.mask_dictionary) == [13.0] - assert mask_3d_ds.mask_3d.shape == (1, 3955, 1681) + # Check if mask is null/empty + assert mask_3d_ds.mask_3d.isnull().all() + + # Attempt to convert empty 3d mask to 2d mask + assert er.convert_mask_3d_to_2d(mask_3d_ds) is None + + # Create emtpy 2d mask with None values + depth_values = [9.15, 9.34, 9.529, 9.719, 758.5] + ping_time_values = ["2019-07-02T18:40:00", "2019-07-02T19:00:00"] + mask_2d_ds = xr.Dataset() + mask_2d_ds["mask_2d"] = xr.DataArray( + np.full((len(depth_values), len(ping_time_values)), np.nan), + coords={"depth": depth_values, "ping_time": ping_time_values}, + dims=["depth", "ping_time"], + ) - # Test values from converted 2D array (previously 3D array) - mask_2d_da = er.convert_mask_3d_to_2d(mask_3d_ds) - assert mask_2d_da.equals(M) + # Check if mask is null/empty + assert mask_2d_ds.mask_2d.isnull().all() - # Test values from 3D array (previously 2D array) - second_mask_3d_ds = er.convert_mask_2d_to_3d(mask_2d_da) - assert second_mask_3d_ds.equals(mask_3d_ds) + # Attempt to convert empty 2d mask to 3d mask + assert er.convert_mask_2d_to_3d(mask_2d_ds) is None @pytest.mark.regions2d @@ -636,19 +700,17 @@ def test_overlapping_mask_3d_2d(regions2d_fixture: Regions2D, da_Sv_fixture: Dat DataArray containing Sv data of test zarr file. """ - # Extract region_ids - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() - - # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, region_ids, mask_labels=region_ids) + # Extract region_id + region_id = regions2d_fixture.data.region_id.astype(int).to_list() - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert mask_3d_ds.mask_3d.shape == (2, 3955, 1681) - assert list(mask_3d_ds.mask_dictionary) == [13.0, 18.0] + # Create 3d mask + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, region_id) - # Turn first (0th index) array into all 1s to guarantee overlap - mask_3d_ds.mask_3d[0] = xr.ones_like(mask_3d_ds.mask_3d[0]) + # Turn first (0th index) array corresponding to region id 13 into all 1s + # to guarantee overlap with array corresponding to region id 18 + mask_3d_ds["mask_3d"] = xr.concat( + [xr.ones_like(mask_3d_ds.mask_3d[0])], dim="region_id" + ) # Trying to convert 3d mask to 2d should raise ValueError with pytest.raises(ValueError): diff --git a/echoregions/tests/test_utils.py b/echoregions/tests/test_utils.py index fbe26729..275a348d 100644 --- a/echoregions/tests/test_utils.py +++ b/echoregions/tests/test_utils.py @@ -2,11 +2,9 @@ import numpy as np import pytest -from pandas import DataFrame, Series -from ..utils.api import merge -from ..utils.io import check_file, validate_path -from ..utils.time import parse_simrad_fname_time, parse_time +from echoregions.utils.io import check_file, validate_path +from echoregions.utils.time import parse_simrad_fname_time, parse_time DATA_DIR = Path("./echoregions/test_data/") EVR_PATH = DATA_DIR / "transect.evr" @@ -36,20 +34,6 @@ def test_parse_filename_time() -> None: assert parse_simrad_fname_time(raw_2019_fname) == np.datetime64("2019-06-25T12:48:34.0000") -@pytest.mark.utils -def test_merge_type_checking() -> None: - """ - Test merge type checking functionality. - """ - - with pytest.raises(ValueError): - merge([]) - with pytest.raises(TypeError): - merge([DataFrame()]) - with pytest.raises(TypeError): - merge([Series()]) - - @pytest.mark.utils def test_check_file_errors(): """ diff --git a/echoregions/utils/api.py b/echoregions/utils/api.py index 54cc6cf0..e8347eb6 100644 --- a/echoregions/utils/api.py +++ b/echoregions/utils/api.py @@ -1,98 +1,97 @@ -from typing import List +import warnings +from typing import List, Union import numpy as np import xarray as xr -from xarray import DataArray, Dataset +from xarray import Dataset -from ..regions2d.regions2d import Regions2D - -def convert_mask_2d_to_3d(mask_2d_da: DataArray) -> Dataset: +def convert_mask_2d_to_3d(mask_2d_ds: Dataset) -> Union[Dataset, None]: """ Convert 2D multi-labeled mask data into its 3D one-hot encoded form. Parameters ---------- - mask_2d_da : DataArray - A DataArray with the data_var masked by a specified region. This data will - be in the form of integer, demarking labels of masked regions, and nan values, - demarking non-masked areas. + mask_2d_ds : Dataset + A dataset with the following: + DataArray with the data_var masked by a specified region. Individual data + points will be in the form of integers, demarking region_id of masked regions, + and nan values, demarking non-masked areas. + DataArray with mask labels corresponding to region_id values. Returns ------- mask_3d_ds : Dataset - A Dataset with a 3D DataArray/mask and each layer of the 3D mask will contain - a 1s/0s mask for each unique label in the 2D mask. The layers will be labeled - by a dictionary that maps the individual label layers of the 3D mask to an integer - label in the 2D mask. - - Notes - ----- - Emtpy dictionary data of mask_3d_ds means that there exists no masked - values. + A dataset with the following: + A DataArray 3D mask where each layer of the mask will contain a 1s/0s mask for + each unique label in the 2D mask. The layers will be labeled via region_id + values extracted from 2d values. + DataArray with mask labels corresponding to region_id values. """ + # Check if 'mask_2d' exists as a data variable + if "mask_2d" not in mask_2d_ds: + raise ValueError("The variable 'mask_2d' does not exist in the input dataset.") + # Get unique non nan values from the 2d mask - unique_non_nan = list(np.unique(mask_2d_da.data[~np.isnan(mask_2d_da.data)])) - if len(unique_non_nan) == 0: - unique_non_nan = None + region_id = list( + np.unique(mask_2d_ds.mask_2d.data[~np.isnan(mask_2d_ds.mask_2d.data)]) + ) # Create a list of mask objects from one-hot encoding M.data non-nan values # and a dictionary to remember said values from one-hot encoded data arrays. # If unique_non_nan is None, make mask_dictionary None. - mask_list = [] - mask_dictionary = {"dims": "label", "data": []} - if unique_non_nan is not None: - mask_dictionary = {"dims": "label", "data": []} - for _, value in enumerate(unique_non_nan): + if len(region_id) > 0: + mask_list = [] + for _, value in enumerate(region_id): # Create new 1d mask - new_mask_data = xr.where(mask_2d_da == value, 1.0, 0.0) - # Append data to mask_list and mask_dictionary + new_mask_data = xr.where(mask_2d_ds.mask_2d == value, 1.0, 0.0) + # Append data to mask_list mask_list.append(new_mask_data) - mask_dictionary_list = mask_dictionary["data"] - mask_dictionary_list.append(value) - mask_dictionary["data"] = mask_dictionary_list - mask_3d_da = xr.concat(mask_list, dim="label") + # Concat mask list together to make 3d mask + mask_3d_da = xr.concat(mask_list, dim=region_id) + mask_3d_da = mask_3d_da.rename({"concat_dim": "region_id"}) + # Drop mask_2d + mask_2d_ds = mask_2d_ds.drop_vars("mask_2d") + # Set mask to mask_3d_da + mask_2d_ds["mask_3d"] = mask_3d_da + mask_3d_ds = mask_2d_ds + return mask_3d_ds else: - mask_3d_da = xr.zeros_like(mask_2d_da) - mask_dictionary_da = xr.DataArray.from_dict(mask_dictionary) + warnings.warn( + "Returning No Mask. Empty 3D Mask cannot be converted to 2D Mask.", + UserWarning, + ) + return None - # Initialize Dataset - mask_3d_ds = xr.Dataset() - mask_3d_ds["mask_3d"] = mask_3d_da - mask_3d_ds["mask_dictionary"] = mask_dictionary_da - return mask_3d_ds - -def convert_mask_3d_to_2d(mask_3d_ds: Dataset) -> DataArray: +def convert_mask_3d_to_2d(mask_3d_ds: Dataset) -> Union[Dataset, None]: """ Convert 3D one-hot encoded mask data into its 2D multi-labeled form. Parameters ---------- mask_3d_ds : Dataset - A Dataset with a 3D DataArray with the data_var masked by the specified - region in one-hot encoded form and a dictionary that will be used to map - the individual label layers of the 3D mask to an integer label in the 2D mask. - The 3D DataArray will be in the form of 1s/0s: masked areas, and - non-masked areas. + A dataset with the following: + A DataArray 3D mask where each layer of the mask will contain a 1s/0s mask for + each unique label in the 2D mask. The layers will be labeled via region_id + values extracted from 2d values. + DataArray with mask labels corresponding to region_id values. Returns ------- - mask_2d_da: DataArray - A DataArray with the data_var masked by a specified region. This data will - be in the form of integer, demarking labels of masked regions, and nan values, - demarking non-masked areas. - - Notes - ----- - Emtpy dictionary data of mask_3d_ds means that there exists no masked - values. + mask_2d_ds : Dataset + A dataset with the following: + DataArray with the data_var masked by a specified region. Individual data + points will be in the form of integers, demarking region_id of masked regions, + and nan values, demarking non-masked areas. + DataArray with mask labels corresponding to region_id values. """ - # Get unique non nan values from the 2d mask - unique_non_nan = list(mask_3d_ds.mask_dictionary.data) + # Check if 'mask_2d' exists as a data variable + if "mask_3d" not in mask_3d_ds: + raise ValueError("The variable 'mask_3d' does not exist in the input dataset.") - # Create copies and placeholder values for 2D and 3D mask objects - mask_3d_da = mask_3d_ds.mask_3d.copy() + # Get region_id from the 3D Mask + region_id = list(mask_3d_ds.mask_3d.region_id) # Check if there is overlap between layers. # TODO This code is also extremely slow. It is an O(n^2) operation that @@ -115,25 +114,38 @@ def convert_mask_3d_to_2d(mask_3d_ds: Dataset) -> DataArray: " Overlapping values are not allowed." ) - if len(unique_non_nan) > 0: + if len(region_id) > 0: # Iterate through 3D array layers and set 1.0 to associated label values # dependent on which layer is being worked on and create append layers to # form 2D mask array. - for index, label_value in enumerate(unique_non_nan): - label_layer = mask_3d_da[index] + for index, label_value in enumerate(region_id): + label_layer = mask_3d_ds.mask_3d[index] label_layer = xr.where(label_layer == 1.0, label_value, 0.0) if index == 0: mask_2d_da = label_layer else: mask_2d_da = label_layer + mask_2d_da mask_2d_da = xr.where(mask_2d_da == 0.0, np.nan, mask_2d_da) + + # Setup mask_2d_ds + mask_2d_ds = mask_3d_ds + # Drop mask_2d + mask_2d_ds = mask_2d_ds.drop_vars("mask_3d") + # Set mask to mask_3d_da + mask_2d_ds["mask_2d"] = mask_2d_da + # Drop region_id coordinate if it exists + if "region_id" in mask_2d_ds.mask_2d.coords: + mask_2d_ds.mask_2d = mask_2d_ds.mask_2d.drop_vars(["region_id"]) + return mask_2d_ds else: - # In the case where unique_non_nan is empty, create all zeroes DataArray - mask_2d_da = xr.full_like(mask_3d_da, np.nan) - return mask_2d_da + warnings.warn( + "Returning No Mask. Empty 3D Mask cannot be converted to 2D Mask.", + UserWarning, + ) + return None -def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: +def merge(objects: List, reindex_ids: bool = False): # -> Regions2D: # TODO currently deprecated must be fixed before further tests. """Merge echoregion objects. Currently only supports merging Regions2D objects. @@ -148,6 +160,7 @@ def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: combined : Regions2D A Regions2D object with region ids prepended by the EVR original filename. """ + """ if isinstance(objects, list): if len(objects) == 0: raise ValueError("objects must contain elements. objects sent in is empty.") @@ -181,3 +194,4 @@ def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: # Set region data merged_obj.data["regions"] = merged return merged_obj + """