From b7d67d0225c0d6bf02a7cd8a1515e8f08aa132d7 Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Mon, 2 Oct 2023 12:52:53 -0700 Subject: [PATCH] Modification of masking using regionmask 3D instead of 2D (#144) Modification of regions2D masking function using regionmask's 3D mask function instead of it's 2D function, and subsequent modifications to the 2d-to-3d and 3d-to-2d helper API functions. Additionally, modified setup to more closely match that of Echopype. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Wu-Jung Lee Co-authored-by: Don Setiawan --- .github/dependabot.yml | 7 + .github/workflows/pypi.yml | 104 ++++++ .pre-commit-config.yaml | 5 - _echoregions_version.py | 1 + echoregions/__init__.py | 4 +- echoregions/core.py | 8 +- echoregions/lines/lines.py | 19 +- echoregions/regions2d/regions2d.py | 286 +++++++++-------- echoregions/regions2d/regions2d_parser.py | 4 +- echoregions/test_data/transect.evr | 4 +- echoregions/test_data/transect_multi_mask.evr | 151 +++++++++ echoregions/tests/test_lines.py | 11 +- echoregions/tests/test_regions2d.py | 302 +++++++++--------- echoregions/tests/test_utils.py | 32 +- echoregions/utils/api.py | 163 +++++----- echoregions/utils/io.py | 4 +- echoregions/utils/time.py | 11 +- pyproject.toml | 26 ++ requirements-dev.txt | 2 + setup.cfg | 50 ++- setup.py | 21 +- 21 files changed, 755 insertions(+), 460 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/pypi.yml create mode 100644 _echoregions_version.py create mode 100644 echoregions/test_data/transect_multi_mask.evr create mode 100644 pyproject.toml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..bad6ba3f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + # Check for updates once a week + interval: 'weekly' diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 00000000..0cbd518c --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,104 @@ +# Publish archives to PyPI and TestPyPI using GitHub Actions +name: Publish to PyPI +on: [workflow_dispatch] +jobs: + + build-artifact: + + name: Build echoregions package + runs-on: ubuntu-20.04 + if: github.repository == 'OSOceanAcoustics/echoregions' + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # fetch all history so that setuptools-scm works + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4.7.0 + with: + python-version: 3.9 + + - name: Install dependencies + run: python -m pip install setuptools wheel + + # This step is only necessary for testing purposes and for TestPyPI + - name: Fix up version string for TestPyPI + if: ${{ !startsWith(github.ref, 'refs/tags') }} + run: | + # Change setuptools-scm local_scheme to "no-local-version" so the + # local part of the version isn't included, making the version string + # compatible with PyPI. + sed --in-place "s/node-and-date/no-local-version/g" pyproject.toml + + - name: Build source and wheel distributions + run: | + python setup.py sdist bdist_wheel + echo "" + echo "Generated files:" + ls -lh dist/ + - uses: actions/upload-artifact@v3 + with: + name: releases + path: dist + + test-built-dist: + + name: Test echoregions package + needs: build-artifact + runs-on: ubuntu-20.04 + permissions: + contents: read + id-token: write + steps: + - uses: actions/setup-python@v4.7.0 + name: Install Python + with: + python-version: 3.9 + - uses: actions/download-artifact@v3 + with: + name: releases + path: dist + + - name: List contents of built dist + run: | + ls -ltrh + ls -ltrh dist + + - name: Publish to Test PyPI + uses: pypa/gh-action-pypi-publish@v1.8.10 + with: + repository-url: https://test.pypi.org/legacy/ + verbose: true + skip-existing: true + + - name: Check pypi packages + run: | + sleep 3 + python -m pip install --upgrade pip + + echo "=== Testing wheel file ===" + # Install wheel to get dependencies and check import + python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade --pre echoregions + python -c "import echoregions;" + echo "=== Done testing wheel file ===" + + echo "=== Testing source tar file ===" + # Install tar gz and check import + python -m pip uninstall --yes echoregions + python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade --pre --no-binary=:all: echoregions + python -c "import echoregions;" + echo "=== Done testing source tar file ===" + + publish-pypi: + name: Push echoregions to production pypi + needs: test-built-dist + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v3 + with: + name: releases + path: dist + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@v1.8.10 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ce98cba4..fcdb8a78 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,11 +20,6 @@ repos: hooks: - id: black - - repo: https://github.com/PyCQA/flake8 - rev: 6.1.0 - hooks: - - id: flake8 - - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: diff --git a/_echoregions_version.py b/_echoregions_version.py new file mode 100644 index 00000000..b8f00b5d --- /dev/null +++ b/_echoregions_version.py @@ -0,0 +1 @@ +version = "0.1.dev348+ga480450.d20231002" diff --git a/echoregions/__init__.py b/echoregions/__init__.py index fe883ce8..ba72c525 100644 --- a/echoregions/__init__.py +++ b/echoregions/__init__.py @@ -4,7 +4,9 @@ that contain the majority of the functionality within echoregions. """ +from _echoregions_version import version as __version__ # noqa + from .core import read_evl, read_evr from .utils.api import convert_mask_2d_to_3d, convert_mask_3d_to_2d -__all__ = ["read_evl", "read_evr", "convert_mask_2d_to_3d", "convert_mask_3d_to_2d"] +__all__ = ["read_evl", "read_evr", "convert_mask_2d_to_3d", "convert_mask_3d_to_2d"] # noqa diff --git a/echoregions/core.py b/echoregions/core.py index b6d8d653..c3cb5a7b 100644 --- a/echoregions/core.py +++ b/echoregions/core.py @@ -2,18 +2,16 @@ from .regions2d.regions2d import Regions2D -def read_evr( - filepath: str, min_depth: float = None, max_depth: float = None -) -> Regions2D: +def read_evr(filepath: str, min_depth: float = 0.0, max_depth: float = 1000.0) -> Regions2D: """Read an EVR file into a Regions2D object. Parameters ---------- filepath : str, Path object A valid path to an EVR file - min_depth : float, default ``None`` + min_depth : float, default 0 Depth value in meters to set -9999.99 depth edges to. - max_depth : float, default ``None`` + max_depth : float, default 1000 Depth value in meters to set 9999.99 depth edges to. Returns diff --git a/echoregions/lines/lines.py b/echoregions/lines/lines.py index b878a252..286a525e 100644 --- a/echoregions/lines/lines.py +++ b/echoregions/lines/lines.py @@ -23,9 +23,7 @@ def __init__(self, input_file: str, nan_depth_value: float = None): self.data = parse_line_file(input_file) self.output_file = [] - self._nan_depth_value = ( - None # Set to replace -10000.99 depth values with (EVL only) - ) + self._nan_depth_value = None # Set to replace -10000.99 depth values with (EVL only) self._nan_depth_value = nan_depth_value def __iter__(self) -> Iterable: @@ -67,9 +65,7 @@ def to_csv(self, save_path: bool = None) -> None: path to save the CSV file to """ # Check if the save directory is safe - save_path = validate_path( - save_path=save_path, input_file=self.input_file, ext=".csv" - ) + save_path = validate_path(save_path=save_path, input_file=self.input_file, ext=".csv") # Reorder columns and export to csv self.data.to_csv(save_path, index=False) self.output_file.append(save_path) @@ -88,9 +84,7 @@ def to_json(self, save_path: str = None, pretty: bool = True, **kwargs) -> None: keyword arguments passed into `parse_file` """ # Check if the save directory is safe - save_path = validate_path( - save_path=save_path, input_file=self.input_file, ext=".json" - ) + save_path = validate_path(save_path=save_path, input_file=self.input_file, ext=".json") indent = 4 if pretty else None # Save the entire parsed EVR dictionary as a JSON file @@ -176,8 +170,7 @@ def mask(self, da_Sv: DataArray, method: str = "nearest", limit_area: str = None if not isinstance(da_Sv, DataArray): raise TypeError( - "Input da_Sv must be of type DataArray. da_Sv was instead" - f" of type {type(da_Sv)}" + "Input da_Sv must be of type DataArray. da_Sv was instead" f" of type {type(da_Sv)}" ) def filter_bottom(bottom, start_date, end_date): @@ -264,9 +257,7 @@ def filter_bottom(bottom, start_date, end_date): print(e) # convert to data array for bottom - bottom_da = bottom_interpolated[ - "depth" - ].to_xarray() # .rename({'index':'ping_time'}) + bottom_da = bottom_interpolated["depth"].to_xarray() # .rename({'index':'ping_time'}) bottom_da = bottom_da.rename({"time": "ping_time"}) # create a data array of depths diff --git a/echoregions/regions2d/regions2d.py b/echoregions/regions2d/regions2d.py index 4b12459f..530adeb9 100644 --- a/echoregions/regions2d/regions2d.py +++ b/echoregions/regions2d/regions2d.py @@ -1,6 +1,6 @@ import warnings from pathlib import Path -from typing import Iterable, List, Union +from typing import Iterable, List, Optional, Union import matplotlib import matplotlib.pyplot as plt @@ -9,8 +9,9 @@ import regionmask import xarray as xr from pandas import DataFrame, Series, Timestamp, isna -from xarray import DataArray +from xarray import DataArray, Dataset +from ..utils.api import convert_mask_3d_to_2d from ..utils.io import validate_path from ..utils.time import parse_simrad_fname_time from .regions2d_parser import parse_regions_file @@ -27,22 +28,12 @@ def __init__( min_depth: Union[int, float] = None, max_depth: Union[int, float] = None, ): - self._min_depth = ( - None # Set to replace -9999.99 depth values which are EVR min range - ) - self._max_depth = ( - None # Set to replace 9999.99 depth values which are EVR max range - ) - self._nan_depth_value = ( - None # Set to replace -10000.99 depth values with (EVL only) - ) - self.input_file = input_file self.data = parse_regions_file(input_file) self.output_file = [] - self.max_depth = max_depth self.min_depth = min_depth + self.max_depth = max_depth def __iter__(self) -> Iterable: return self.data.iterrows() @@ -59,9 +50,7 @@ def to_csv(self, save_path: bool = None) -> None: path to save the CSV file to """ # Check if the save directory is safe - save_path = validate_path( - save_path=save_path, input_file=self.input_file, ext=".csv" - ) + save_path = validate_path(save_path=save_path, input_file=self.input_file, ext=".csv") # Reorder columns and export to csv self.data.to_csv(save_path, index=False) self.output_file.append(save_path) @@ -91,10 +80,10 @@ def select_region( ---------- region_id : float, int, str, list, ``None`` A region id provided as a number, a string, or list of these. - time_range: List of 2 Pandas Timestamps. + time_range : List of 2 Pandas Timestamps. Datetime range for expected output of subselected DataFrame. 1st index value must be later than 0th index value. - depth_range: List of 2 floats. + depth_range : List of 2 floats. Depth range for expected output of subselected DataFrame. 1st index value must be larger than 0th index value. copy : bool @@ -179,8 +168,7 @@ def select_region( region = region[ region["time"].apply( lambda depth_array: all( - depth_range[0] <= float(x) - and depth_range[1] >= float(x) + depth_range[0] <= float(x) and depth_range[1] >= float(x) for x in depth_array ) ) @@ -209,14 +197,12 @@ def select_region( ) return region - def close_region( - self, region: Union[float, str, List, Series, DataFrame] = None - ) -> DataFrame: + def close_region(self, region_id: List = None) -> DataFrame: """Close a region by appending the first point to end of the list of points. Parameters ---------- - region : float, str, list, Series, DataFrame, ``None`` + region_id : List, ``None`` region(s) to select raw files with If ``None``, select all regions. Defaults to ``None`` @@ -225,13 +211,9 @@ def close_region( DataFrame Returns a new DataFrame with closed regions """ - region = self.select_region(region, copy=True) - region["time"] = region.apply( - lambda row: np.append(row["time"], row["time"][0]), axis=1 - ) - region["depth"] = region.apply( - lambda row: np.append(row["depth"], row["depth"][0]), axis=1 - ) + region = self.select_region(region_id, copy=True) + region["time"] = region.apply(lambda row: np.append(row["time"], row["time"][0]), axis=1) + region["depth"] = region.apply(lambda row: np.append(row["depth"], row["depth"][0]), axis=1) return region def select_sonar_file( @@ -286,9 +268,7 @@ def select_sonar_file( if np.all(sonar_file_times < region_times.min()) or np.all( sonar_file_times > region_times.max() ): - print( - "Sonar file times did not overlap at all with region times. Returning empty list" - ) + print("Sonar file times did not overlap at all with region times. Returning empty list") return [] else: # Get lower and upper index of filetimes @@ -339,8 +319,8 @@ def swap_val(val: Union[int, float]) -> Union[int, float]: def plot( self, - region: Union[str, List, DataFrame] = None, - close_region: bool = False, + region_id: List = None, + close_regions: bool = False, **kwargs, ) -> None: """Plot a region from data. @@ -348,7 +328,7 @@ def plot( Parameters --------- - region : float, str, list, Series, DataFrame, ``None`` + region_id : List, ``None`` Region(s) to select raw files with If ``None``, select all regions. Defaults to ``None`` close_region : bool @@ -358,122 +338,172 @@ def plot( """ # Ensure that region is a DataFrame - region = self.select_region(region) + region = self.select_region(region_id) - if close_region: - region = self.Regions2D.close_region(region) + if close_regions: + region = self.close_region(region) for _, row in region.iterrows(): plt.plot(row["time"], row["depth"], **kwargs) def mask( self, da_Sv: DataArray, - region_ids: List, - mask_var: str = None, - mask_labels=None, - ) -> DataArray: - """Mask data from Data Array containing Sv data based off of a Regions2D object + region_id: List = None, + mask_name: str = "mask", + mask_labels: dict = None, + collapse_to_2d: bool = False, + ) -> Optional[Dataset]: + """ + Mask data from Data Array containing Sv data based off of a Regions2D object and its regions ids. Parameters ---------- da_Sv : Data Array DataArray of shape (ping_time, depth) containing Sv data. - region_ids : list + region_id : list list IDs of regions to create mask for - mask_var : str + mask_name : str If provided, used to name the output mask array, otherwise `mask` - mask_labels: - None: assigns labels automatically 0,1,2,... - "from_ids": uses the region ids - list: uses a list of integers as labels + mask_labels : dict + If provided, used to label the region_id dimension of the output mask. + collapse_to_2d : bool + If true, then converts 3d mask to 2d mask. If not, keeps output as 3d mask. Returns ------- - A DataArray with the data_var masked by the specified region. + mask_ds : Dataset + Either a 3D mask or a 2D mask based on the conditions below. + If collapse_to_2d is False: + A 3D mask where each layer of the mask will contain a 1s/0s mask for each + unique label in the 2D mask. The layers will be labeled via region_id + values. The slices of the 3D array will be in the form of 1s/0s: masked areas, + and non-masked areas. + If collapse_to_2d is True: + A 2D mask where each individual data points will be in the form of integers, + demarking region_id of masked regions, and nan values, demarking non-masked + areas. + Also contains a data variable (`mask_labels`) with mask labels + corresponding to region_id values. + """ - if isinstance(region_ids, list): - if len(region_ids) == 0: - raise ValueError("region_ids is empty. Cannot be empty.") + if isinstance(region_id, list): + if len(region_id) == 0: + raise ValueError("region_id is empty. Cannot be empty.") + elif region_id is None: + # Extract all region_id values + region_id = self.data.region_id.astype(int).to_list() else: raise TypeError( - f"region_ids must be of type list. Currently is of type {type(region_ids)}" + f"region_id must be of type list. Currently is of type {type(region_id)}" ) - if isinstance(mask_labels, list) and (len(mask_labels) != len(region_ids)): + if mask_labels is None: + # Create mask_labels with each region_id as a key and values starting from 0 + mask_labels = {key: idx for idx, key in enumerate(region_id)} + + # Check that region_id and mask_labels are of the same size + if len(set(region_id) - set(mask_labels.keys())) > 0: raise ValueError( - "If mask_labels is a list, it should be of same length as region_ids." + "Each region_id' must be a key in 'mask_labels'. " + "If you would prefer 0 based indexing as values for mask_labels, leave " + "mask_labels as None." ) - # Replace nan depth in regions2d. - self.replace_nan_depth(inplace=True) - # Dataframe containing region information. - region_df = self.select_region(region_ids) + region_df = self.select_region(region_id) - # Select only columns which are important. + # Select only important columns region_df = region_df[["region_id", "time", "depth"]] - # Organize the regions in a format for region mask. - df = region_df.explode(["time", "depth"]) + # Filter for rows with depth values within self min and self max depth and + # for rows that have positive depth. + region_df = region_df[ + region_df["depth"].apply( + lambda x: all( + (i >= np.max(0, int(self.min_depth)) and i <= int(self.max_depth)) for i in x + ) + ) + ] + + if region_df.empty: + print( + "All rows in Regions DataFrame have NaN Depth values after filtering depth " + "between min_depth and max_depth." + ) + else: + # Organize the regions in a format for region mask. + df = region_df.explode(["time", "depth"]) - # Convert region time to integer timestamp. - df["time"] = matplotlib.dates.date2num(df["time"]) + # Convert region time to integer timestamp. + df["time"] = matplotlib.dates.date2num(df["time"]) - # Create a list of dataframes for each regions. - grouped = list(df.groupby("region_id")) + # Create a list of dataframes for each regions. + grouped = list(df.groupby("region_id")) - # Convert to list of numpy arrays which is an acceptable format to create region mask. - regions_np = [np.array(region[["time", "depth"]]) for id, region in grouped] + # Convert to list of numpy arrays which is an acceptable format to create region mask. + regions_np = [np.array(region[["time", "depth"]]) for _, region in grouped] - # Corresponding region ids converted to int. - region_ids = [int(id) for id, region in grouped] + # Convert corresponding region_id to int. + filtered_region_id = [int(id) for id, _ in grouped] - # Convert ping_time to unix_time since the masking does not work on datetime objects. - da_Sv = da_Sv.assign_coords( - unix_time=( - "ping_time", - matplotlib.dates.date2num(da_Sv.coords["ping_time"].values), + # Convert ping_time to unix_time since the masking does not work on datetime objects. + da_Sv = da_Sv.assign_coords( + unix_time=( + "ping_time", + matplotlib.dates.date2num(da_Sv.coords["ping_time"].values), + ) ) - ) - # Set up mask labels. - if mask_labels == "from_ids": - r = regionmask.Regions(outlines=regions_np, numbers=region_ids) - elif isinstance(mask_labels, list) or mask_labels is None: - r = regionmask.Regions(outlines=regions_np) - else: - raise ValueError("mask_labels must be None, 'from_ids', or a list.") - - # Create mask - try: - M = r.mask( + # Create mask + r = regionmask.Regions(outlines=regions_np, names=filtered_region_id, name=mask_name) + mask_da = r.mask_3D( da_Sv["unix_time"], da_Sv["depth"], wrap_lon=False, + ).astype( + int + ) # This maps False to 0 and True to 1 + + # Replace region coords with region_id coords + mask_da = mask_da.rename({"names": "region_id"}) + mask_da = mask_da.swap_dims({"region": "region_id"}) + + # Remove all coords other than depth, ping_time, region_id + mask_da = mask_da.drop_vars( + mask_da.coords._names.difference({"depth", "ping_time", "region_id"}) ) - except ValueError as ve: - warnings.warn( - "Most likely using deprecated regionmask version." - "Make sure to use regionmask==0.8.0 or more recent versions.", - DeprecationWarning, - stacklevel=2, + + # Transpose mask_da + mask_da = mask_da.transpose("region_id", "depth", "ping_time") + + # Remove attribute standard_name + mask_da.attrs.pop("standard_name") + + # Rename Data Array to mask_name + mask_da = mask_da.rename(mask_name) + + # Set mask_labels_da + masked_region_id = mask_da.region_id.values.tolist() + subset_mask_labels = [ + mask_labels[key] for key in masked_region_id if key in mask_labels + ] + mask_labels_da = xr.DataArray( + subset_mask_labels, + dims="region_id", + coords={"region_id": masked_region_id}, ) - raise ve - if isinstance(mask_labels, list): - # Convert default labels to mask_labels. - S = xr.where(~M.isnull(), 0, M) - S = M - for idx, label in enumerate(mask_labels): - S = xr.where(M == idx, label, S) - M = S + # Create dataset + mask_ds = xr.Dataset() + mask_ds["mask_labels"] = mask_labels_da + mask_ds["mask_3d"] = mask_da - # Assign specific name to mask array, otherwise 'mask'. - if mask_var: - M = M.rename(mask_var) + if collapse_to_2d: + # Convert 3d mask to 2d mask + mask_ds = convert_mask_3d_to_2d(mask_ds) - return M + return mask_ds def transect_mask( self, @@ -503,6 +533,7 @@ def transect_mask( bbox_distance_threshold: float The maximum value for how far apart the left and right bounding box for each transect value region. Default is set to 1 minute. + Returns ------- M : Data Array @@ -520,8 +551,7 @@ def transect_mask( # Check that there are 4 unique transect strings if len(transect_strs) != len(set(transect_strs)): raise ValueError( - "There exist duplicate values in transect_dict. " - "All values must be unique." + "There exist duplicate values in transect_dict. " "All values must be unique." ) for transect_str in transect_strs: if not isinstance(transect_str, str): @@ -540,9 +570,9 @@ def transect_mask( ].copy() # Create a new column which stores the transect_type without the transect number - transect_df.loc[:, "transect_type"] = transect_df.loc[ - :, "region_name" - ].str.extract(rf"({start_str}|{break_str}|{resume_str}|{end_str})") + transect_df.loc[:, "transect_type"] = transect_df.loc[:, "region_name"].str.extract( + rf"({start_str}|{break_str}|{resume_str}|{end_str})" + ) # Check if for all transects, there exists 1 start_str transect type. # If there does not exists a start_str transect, set the first region @@ -586,9 +616,7 @@ def transect_mask( # Checking the maximum width of a transect log region bbox. # If over a minute, throw an error. - max_time = ( - transect_df["region_bbox_right"] - transect_df["region_bbox_left"] - ).max() + max_time = (transect_df["region_bbox_right"] - transect_df["region_bbox_left"]).max() max_time_minutes = max_time.total_seconds() / 60 if max_time_minutes > bbox_distance_threshold: Warning( @@ -605,18 +633,14 @@ def transect_mask( transect_df = transect_df.sort_values(by="region_bbox_left") # Create new shifted columns with the next transect log type and next region # bbox left datetime value. - transect_df.loc[:, "transect_type_next"] = transect_df.loc[ - :, "transect_type" - ].shift(-1) - transect_df.loc[:, "region_bbox_left_next"] = transect_df.loc[ - :, "region_bbox_left" - ].shift(-1) + transect_df.loc[:, "transect_type_next"] = transect_df.loc[:, "transect_type"].shift(-1) + transect_df.loc[:, "region_bbox_left_next"] = transect_df.loc[:, "region_bbox_left"].shift( + -1 + ) # Check if start_str followed by break_str/end_str. start_transect_rows = transect_df[transect_df["transect_type"] == start_str] - start_transect_type_next_list = list( - start_transect_rows["transect_type_next"].values - ) + start_transect_type_next_list = list(start_transect_rows["transect_type_next"].values) for transect_type_next in start_transect_type_next_list: if transect_type_next not in [break_str, end_str]: raise ValueError( @@ -627,9 +651,7 @@ def transect_mask( # Check if break_str followed by resume_str. break_transect_rows = transect_df[transect_df["transect_type"] == break_str] - break_transect_type_next_list = list( - break_transect_rows["transect_type_next"].values - ) + break_transect_type_next_list = list(break_transect_rows["transect_type_next"].values) for transect_type_next in break_transect_type_next_list: if transect_type_next != resume_str: raise ValueError( @@ -639,9 +661,7 @@ def transect_mask( # Check if resume_str followed by break_str/end_str. resume_transect_rows = transect_df[transect_df["transect_type"] == resume_str] - resume_transect_type_next_list = list( - resume_transect_rows["transect_type_next"].values - ) + resume_transect_type_next_list = list(resume_transect_rows["transect_type_next"].values) for transect_type_next in resume_transect_type_next_list: if transect_type_next not in [break_str, end_str]: raise ValueError( @@ -652,9 +672,7 @@ def transect_mask( # Check if end_str followed by start_str or if NA. end_transect_rows = transect_df[transect_df["transect_type"] == end_str] - end_transect_type_next_list = list( - end_transect_rows["transect_type_next"].values - ) + end_transect_type_next_list = list(end_transect_rows["transect_type_next"].values) for transect_type_next in end_transect_type_next_list: # If this value is not NA, check if it is start_str. if not isna(transect_type_next): diff --git a/echoregions/regions2d/regions2d_parser.py b/echoregions/regions2d/regions2d_parser.py index f0a3c099..0bbf585b 100644 --- a/echoregions/regions2d/regions2d_parser.py +++ b/echoregions/regions2d/regions2d_parser.py @@ -115,9 +115,7 @@ def _parse_points(line: str) -> Tuple[ndarray]: r_notes = [fid.readline().strip() for line in range(n_note_lines)] # Add detection settings to region data n_detection_setting_lines = int(fid.readline().strip()) - r_detection_settings = [ - fid.readline().strip() for line in range(n_detection_setting_lines) - ] + r_detection_settings = [fid.readline().strip() for line in range(n_detection_setting_lines)] # Add class to region data r_metadata["region_class"] = fid.readline().strip() # Add point x and y diff --git a/echoregions/test_data/transect.evr b/echoregions/test_data/transect.evr index 1d00e69a..ec1ef659 100644 --- a/echoregions/test_data/transect.evr +++ b/echoregions/test_data/transect.evr @@ -135,11 +135,11 @@ Log 20190702 2024556635 -9999.9900000000 20190702 2024556635 9999.9900000000 20190702 2024570135 9999.9900000000 20190702 2024570135 -9999.9900000000 2 COM -13 4 18 0 4 -1 1 20190702 1811518097 9.2447583998 20190702 1952215310 758.9732173069 +13 4 18 0 4 -1 1 20190702 1840518097 9.2447583998 20190702 1952215310 758.9732173069 0 0 Trawl -20190702 1811518097 9.2447583998 20190702 1811518097 758.9732173069 20190702 1952215310 758.9732173069 20190702 1952215310 9.2447583998 0 +20190702 1840518097 9.2447583998 20190702 1840518097 700 20190702 1952215310 700 20190702 1952215310 9.2447583998 0 AWT20 13 4 19 0 2 -1 1 20190702 2110200695 -10011.7976284000 20190702 2110214100 10011.2963773964 diff --git a/echoregions/test_data/transect_multi_mask.evr b/echoregions/test_data/transect_multi_mask.evr new file mode 100644 index 00000000..73d5d34d --- /dev/null +++ b/echoregions/test_data/transect_multi_mask.evr @@ -0,0 +1,151 @@ +EVRG 7 13.0.378.44817 +19 + +13 4 1 0 6 -1 1 20190702 0350546295 -9999.99 20190702 0810094255 9999.99 +1 +Switched from recording to "night" folder to "Leg 2" +0 +Log +20190702 0350546295 -9999.9900000000 20190702 0350546295 9999.9900000000 20190702 0810094255 9999.9900000000 20190702 0810094255 -9999.9900000000 2 +COM + +13 4 2 0 6 -1 1 20190702 1232301755 -9999.9900000000 20190702 1232317405 9999.9900000000 +1 +Recording 10 min of passive data +0 +Log +20190702 1232301755 -9999.9900000000 20190702 1232301755 9999.9900000000 20190702 1232317405 9999.9900000000 20190702 1232317405 -9999.9900000000 2 +Com + +13 4 3 0 6 -1 1 20190702 1243062730 -9999.9900000000 20190702 1243107585 9999.9900000000 +1 +End passive data collection and back to active +0 +Log +20190702 1243062730 -9999.9900000000 20190702 1243062730 9999.9900000000 20190702 1243107585 9999.9900000000 20190702 1243107585 -9999.9900000000 2 +COM + +13 4 4 0 6 -1 1 20190702 1254599315 -9999.9900000000 20190702 1255013030 9999.9900000000 +1 +Sunrise! +0 +Log +20190702 1254599315 -9999.9900000000 20190702 1254599315 9999.9900000000 20190702 1255013030 9999.9900000000 20190702 1255013030 -9999.9900000000 2 +Sunrise + +13 4 5 0 6 -1 1 20190702 1314004880 -9999.9900000000 20190702 1314018880 9999.9900000000 +1 +Starting transect x22 at 22.0, going ~5 kts to allow a barge to pass +0 +Log +20190702 1314004880 -9999.9900000000 20190702 1314004880 9999.9900000000 20190702 1314018880 9999.9900000000 20190702 1314018880 -9999.9900000000 2 +ST22 + +13 4 6 0 6 -1 1 20190702 1325002150 -9999.9900000000 20190702 1325015755 9999.9900000000 +1 +Resuming transect speed of 10 kts +0 +Log +20190702 1325002150 -9999.9900000000 20190702 1325002150 9999.9900000000 20190702 1325015755 9999.9900000000 20190702 1325015755 -9999.9900000000 2 +COM + +13 4 7 0 4 -1 1 20190702 0335160046 9.2447583998 20190702 1232310933 758.9732173069 +0 +0 +Off-transect +20190702 0335160046 -9999.9900000000 20190702 0335160046 758.9732173069 20190702 1232310933 758.9732173069 20190702 1232310933 9.2447583998 0 +Region 7 + +13 4 8 0 4 -1 1 20190702 1232308896 9.2447583998 20190702 1243088885 758.9732173069 +0 +0 +Passive +20190702 1232308896 -9999.9900000000 20190702 1232308896 -9999.9900000000 20190702 1243088885 758.9732173069 20190702 1243088885 9.2447583998 0 +Passive + +13 4 9 0 4 -1 1 20190702 1243084826 9.2447583998 20190702 1314011892 758.9732173069 +0 +0 +Off-transect +20190702 1243084826 10 20190702 1243084826 758.9732173069 20190702 1314011892 758.9732173069 20190702 1314011892 9.2447583998 0 +Region 9 + +13 4 10 0 6 -1 1 20190702 1718314985 -9999.9900000000 20190702 1718328500 9999.9900000000 +2 +Mark for fishing spot AWT20 +37 53.79N, 123 26.15W, TD 255m +0 +Log +20190702 1718314985 -9999.9900000000 20190702 1718314985 9999.9900000000 20190702 1718328500 9999.9900000000 20190702 1718328500 -9999.9900000000 2 +Region 10 + +13 4 11 0 6 -1 1 20190702 1811511900 -9999.9900000000 20190702 1811525400 9999.9900000000 +1 +NIW AWT20 +0 +Log +20190702 1811511900 -9999.9900000000 20190702 1811511900 9999.9900000000 20190702 1811525400 9999.9900000000 20190702 1811525400 -9999.9900000000 2 +NIW20 + +13 4 12 0 6 -1 1 20190702 1832074080 -9999.9900000000 20190702 1832087680 9999.9900000000 +1 +Shoot the doors AWT20 +0 +Log +20190702 1832074080 -9999.9900000000 20190702 1832074080 9999.9900000000 20190702 1832087680 9999.9900000000 20190702 1832087680 -9999.9900000000 2 +SD20 + +13 4 13 0 6 -1 1 20190702 1851540680 -9999.9900000000 20190702 1851554125 9999.9900000000 +1 +Target depth AWT20 +0 +Log +20190702 1957215310 100 20190702 1957215310 200 20190702 1959215310 100 20190702 1959215310 200 2 +TD20 + +13 4 14 0 6 -1 1 20190702 1912051415 -9999.9900000000 20190702 1912064980 9999.9900000000 +1 +Haul back AWT20 +0 +Log +20190702 1912051415 -9999.9900000000 20190702 1912051415 9999.9900000000 20190702 1912064980 9999.9900000000 20190702 1912064980 -9999.9900000000 2 +HB20 + +13 4 15 0 6 -1 1 20190702 1925470975 -9999.9900000000 20190702 1925484485 9999.9900000000 +1 +Doors up AWT20 +0 +Log +20190702 1925470975 -9999.9900000000 20190702 1925470975 9999.9900000000 20190702 1925484485 9999.9900000000 20190702 1925484485 -9999.9900000000 2 +DU20 + +13 4 16 0 6 -1 1 20190702 1952206990 -9999.9900000000 20190702 1952220545 9999.9900000000 +1 +Net on deck, awt20 +0 +Log +20190702 1952206990 -9999.9900000000 20190702 1952206990 9999.9900000000 20190702 1952220545 9999.9900000000 20190702 1952220545 -9999.9900000000 2 +NOD21 + +13 4 17 0 6 -1 1 20190702 2024556635 -9999.9900000000 20190702 2024570135 9999.9900000000 +1 +Dummy +0 +Log +20190702 2024556635 -9999.9900000000 20190702 2024556635 9999.9900000000 20190702 2024570135 9999.9900000000 20190702 2024570135 -9999.9900000000 2 +COM + +13 4 18 0 4 -1 1 20190702 1840518097 9.2447583998 20190702 1952215310 758.9732173069 +0 +0 +Trawl +20190702 1840518097 9.2447583998 20190702 1840518097 700 20190702 1952215310 700 20190702 1952215310 9.2447583998 0 +AWT20 + +13 4 19 0 2 -1 1 20190702 2110200695 -10011.7976284000 20190702 2110214100 10011.2963773964 +1 +End transect 22 +0 +Log +20190702 2110200695 -9999.9900000000 20190702 2110200695 9999.9900000000 20190702 2110214100 10011.2963773964 20190702 2110214100 -10011.7976284000 2 +ET22 diff --git a/echoregions/tests/test_lines.py b/echoregions/tests/test_lines.py index a523af4f..9de7454d 100644 --- a/echoregions/tests/test_lines.py +++ b/echoregions/tests/test_lines.py @@ -8,8 +8,7 @@ from xarray import DataArray import echoregions as er - -from ..lines.lines import Lines +from echoregions.lines.lines import Lines DATA_DIR = Path("./echoregions/test_data/") EVL_PATH = DATA_DIR / "transect.evl" @@ -168,16 +167,12 @@ def test_replace_nan_depth() -> None: """ lines_1 = er.read_evl(EVL_PATH, nan_depth_value=20) - lines_1.data.loc[ - 0, "depth" - ] = -10000.99 # Replace a value with the one used for nans + lines_1.data.loc[0, "depth"] = -10000.99 # Replace a value with the one used for nans lines_1.replace_nan_depth(inplace=True) assert lines_1.data.loc[0, "depth"] == 20 lines_2 = er.read_evl(EVL_PATH, nan_depth_value=20) - lines_2.data.loc[ - 0, "depth" - ] = -10000.99 # Replace a value with the one used for nans + lines_2.data.loc[0, "depth"] = -10000.99 # Replace a value with the one used for nans regions = lines_2.replace_nan_depth(inplace=False) assert regions.loc[0, "depth"] == 20 diff --git a/echoregions/tests/test_regions2d.py b/echoregions/tests/test_regions2d.py index 57b75839..7ea10b7d 100644 --- a/echoregions/tests/test_regions2d.py +++ b/echoregions/tests/test_regions2d.py @@ -8,11 +8,10 @@ from xarray import DataArray, Dataset import echoregions as er - -from ..regions2d.regions2d import Regions2D +from echoregions.regions2d.regions2d import Regions2D DATA_DIR = Path("./echoregions/test_data/") -EVR_PATH = DATA_DIR / "transect.evr" +EVR_PATH = DATA_DIR / "transect_multi_mask.evr" ZARR_PATH = DATA_DIR / "transect.zarr" @@ -127,7 +126,7 @@ def test_regions2d_parsing(regions2d_fixture: Regions2D) -> None: assert df_r2d.shape == (19, 22) # Check individual values of specific row - assert df_r2d.loc[4]["file_name"] == "transect.evr" + assert df_r2d.loc[4]["file_name"] == "transect_multi_mask.evr" assert df_r2d.loc[4]["file_type"] == "EVRG" assert df_r2d.loc[4]["evr_file_format_number"] == "7" assert df_r2d.loc[4]["echoview_version"] == "13.0.378.44817" @@ -136,12 +135,8 @@ def test_regions2d_parsing(regions2d_fixture: Regions2D) -> None: assert df_r2d.loc[4]["region_name"] == "ST22" assert df_r2d.loc[4]["region_class"] == "Log" assert df_r2d.loc[4]["region_creation_type"] == "6" - assert df_r2d.loc[4]["region_bbox_left"] == pd.to_datetime( - "2019-07-02 13:14:00.488000" - ) - assert df_r2d.loc[4]["region_bbox_right"] == pd.to_datetime( - "2019-07-02 13:14:01.888000" - ) + assert df_r2d.loc[4]["region_bbox_left"] == pd.to_datetime("2019-07-02 13:14:00.488000") + assert df_r2d.loc[4]["region_bbox_right"] == pd.to_datetime("2019-07-02 13:14:01.888000") assert df_r2d.loc[4]["region_bbox_top"] == -9999.99 assert df_r2d.loc[4]["region_bbox_bottom"] == 9999.99 assert df_r2d.loc[4]["region_point_count"] == "4" @@ -416,35 +411,33 @@ def test_select_region_errors(regions2d_fixture: Regions2D) -> None: ) -@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region") @pytest.mark.regions2d -def test_mask_no_overlap( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_select_type_error(regions2d_fixture: Regions2D) -> None: """ - Test if mask is empty when there is no overlap. + Test for select region errors. Parameters ---------- regions2d_fixture : Regions2D Object containing data of test EVR file. - da_Sv_fixture : DataArray - DataArray containing Sv data of test zarr file. """ - # Create empty mask - M = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8]) - - # Check that all values are null - assert M.isnull().all() + # Check empty dataset error + with pytest.raises(TypeError): + empty_dataset = Dataset() + _ = regions2d_fixture.select_region(empty_dataset) + # Check empty tuple error + with pytest.raises(TypeError): + empty_tuple = () + _ = regions2d_fixture.select_region(empty_tuple) +@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region.") @pytest.mark.regions2d -def test_mask_correct_labels( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_mask_empty_no_overlap(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Test if the generated id labels are as expected + Test if mask is empty when a region's depth values are invalid + and also test mask is empty when there is no overlap. Parameters ---------- @@ -454,49 +447,44 @@ def test_mask_correct_labels( DataArray containing Sv data of test zarr file. """ - # Extract Region IDs and convert to Python float values - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() + # Attempt to create mask on region with invalid depth values + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8]) - # Create mask. - M = regions2d_fixture.mask( - da_Sv_fixture.isel(channel=0), region_ids, mask_labels=region_ids - ) + # Check that all values are null + assert mask_3d_ds is None - # Check that the mask's values matches only 13th and 18th region and there exists a nan value - # and that there exists a point of no overlap (nan value) - values = list(np.unique(M)) - assert values[0] == 13.0 - assert values[1] == 18.0 - assert np.isnan(values[2]) + # Create mask with regions that have no overlap with the Sv Data Array + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8, 9, 10]) + mask_3d_ds.mask_3d.isnull().all() @pytest.mark.regions2d -def test_select_type_error(regions2d_fixture: Regions2D) -> None: +def test_mask_type_error(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Test for select region errors. + Tests mask error functionality for regions. Parameters ---------- regions2d_fixture : Regions2D Object containing data of test EVR file. + da_Sv_fixture : DataArray + DataArray containing Sv data of test zarr file. """ - # Check empty dataset error - with pytest.raises(TypeError): - empty_dataset = Dataset() - _ = regions2d_fixture.select_region(empty_dataset) # Check empty tuple error with pytest.raises(TypeError): empty_tuple = () - _ = regions2d_fixture.select_region(empty_tuple) + _ = regions2d_fixture.mask(da_Sv_fixture, empty_tuple) + # Check empty list error + with pytest.raises(ValueError): + empty_list = [] + _ = regions2d_fixture.mask(da_Sv_fixture, empty_list) @pytest.mark.regions2d -def test_mask_type_error( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_mask_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Tests mask error functionality for regions. + Testing if 2d mask with collapse_to_do=True works. Parameters ---------- @@ -505,23 +493,33 @@ def test_mask_type_error( da_Sv_fixture : DataArray DataArray containing Sv data of test zarr file. """ + # Get region_id and mask_labels + region_id = regions2d_fixture.data.region_id.astype(int).to_list() + mask_labels = {key: idx for idx, key in enumerate(region_id)} + mask_labels[13] = "Mask1" - # Check empty tuple error - with pytest.raises(TypeError): - empty_tuple = () - _ = regions2d_fixture.mask(da_Sv_fixture, empty_tuple) - # Check empty list error - with pytest.raises(ValueError): - empty_list = [] - _ = regions2d_fixture.mask(da_Sv_fixture, empty_list) + # Create mask + mask_2d_ds = regions2d_fixture.mask(da_Sv_fixture, mask_labels=mask_labels, collapse_to_2d=True) + + # Check mask_2d values and counts + mask_2d_values = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[0] + mask_2d_counts = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[1] + assert mask_2d_values[0] == 13 + assert mask_2d_values[1] == 18 + assert np.isnan(mask_2d_values[2]) + assert mask_2d_counts[0] == 6328 + assert mask_2d_counts[1] == 3127410 + assert mask_2d_counts[2] == 3514617 + + # Check mask_labels values + assert (np.unique(mask_2d_ds.mask_labels.data) == ["17", "Mask1"]).all() + assert (mask_2d_ds.mask_labels.region_id.values == [13, 18]).all() @pytest.mark.regions2d -def test_mask_2d_3d_2d_3d( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_mask_3d_2d_3d_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Testing if converting 2d-3d-2d-3d masks works. + Testing if converting 3d-2d-3d-2d masks works. Parameters ---------- @@ -530,34 +528,51 @@ def test_mask_2d_3d_2d_3d( da_Sv_fixture : DataArray DataArray containing Sv data of test zarr file. """ - - # Extract region_ids - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() + # Get region_id and mask_labels + region_id = regions2d_fixture.data.region_id.astype(int).to_list() + mask_labels = {key: idx for idx, key in enumerate(region_id)} + mask_labels[13] = "Mask1" # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, region_ids, mask_labels=region_ids) + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, mask_labels=mask_labels) - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert mask_3d_ds.mask_3d.shape == (2, 3955, 1681) - assert list(mask_3d_ds.mask_dictionary) == [13.0, 18.0] + # Check mask values + assert (mask_3d_ds.mask_3d.region_id.values == [13, 18]).all() + assert np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][0] == 6642027 + assert np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][1] == 6328 + assert np.unique(mask_3d_ds.mask_3d.isel(region_id=1).data, return_counts=True)[1][0] == 3520945 + assert np.unique(mask_3d_ds.mask_3d.isel(region_id=1).data, return_counts=True)[1][1] == 3127410 - # Test values from converted 2D array (previously 3D array) - mask_2d_da = er.convert_mask_3d_to_2d(mask_3d_ds) - assert mask_2d_da.equals(M) + # Check mask_labels values + assert (np.unique(mask_3d_ds.mask_labels.data) == ["17", "Mask1"]).all() + assert (mask_3d_ds.mask_labels.region_id.values == [13, 18]).all() - # Test values from 3D array (previously 2D array) - second_mask_3d_ds = er.convert_mask_2d_to_3d(mask_2d_da) - assert second_mask_3d_ds.equals(mask_3d_ds) + # Convert 2d mask to 3d mask + mask_2d_ds = er.convert_mask_3d_to_2d(mask_3d_ds) + + # Check mask_2d values and counts + mask_2d_values = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[0] + mask_2d_counts = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[1] + assert mask_2d_values[0] == 13 + assert mask_2d_values[1] == 18 + assert np.isnan(mask_2d_values[2]) + assert mask_2d_counts[0] == 6328 + assert mask_2d_counts[1] == 3127410 + assert mask_2d_counts[2] == 3514617 + + # Convert 2d mask to 3d mask and test for equality with previous 3d mask + mask_3d_ds_2nd = er.convert_mask_2d_to_3d(mask_2d_ds) + mask_3d_ds_2nd.equals(mask_3d_ds) + + # Convert 2nd 3d mask to 2d mask and test for equality with previous 2d mask + mask_2d_2nd = er.convert_mask_3d_to_2d(mask_3d_ds_2nd) + mask_2d_2nd.equals(mask_2d_ds) -@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region") @pytest.mark.regions2d -def test_nan_mask_2d_3d_2d_3d( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_one_label_mask_3d_2d_3d_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Testing if converting 2d-3d-2d-3d masks works for nan mask. + Testing if converting 3d-2d-3d-2d masks works for 1 label mask. Parameters ---------- @@ -567,33 +582,44 @@ def test_nan_mask_2d_3d_2d_3d( DataArray containing Sv data of test zarr file. """ - # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, [5.0]) + # Create 3d mask + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, region_id=[18], mask_labels={18: "Mask1"}) - # Check if mask is null/empty - assert isinstance(M, DataArray) - assert M.isnull().all() + # Check mask values + assert (mask_3d_ds.mask_3d.region_id.values == [18]).all() + assert np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][0] == 3520945 + assert np.unique(mask_3d_ds.mask_3d.isel(region_id=0).data, return_counts=True)[1][1] == 3127410 + + # Check mask_labels values + assert (np.unique(mask_3d_ds.mask_labels.data) == ["Mask1"]).all() + assert (mask_3d_ds.mask_labels.region_id.values == [18]).all() + + # Convert 2d mask to 3d mask + mask_2d_ds = er.convert_mask_3d_to_2d(mask_3d_ds) - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert np.unique(mask_3d_ds.mask_3d.data)[0] == 0 + # Check mask_2d values and counts + mask_2d_values = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[0] + mask_2d_counts = np.unique(mask_2d_ds.mask_2d.data, return_counts=True)[1] + assert mask_2d_values[0] == 18 + assert np.isnan(mask_2d_values[1]) + assert mask_2d_counts[0] == 3127410 + assert mask_2d_counts[1] == 3520945 - # Test values from converted 2D array (previously 3D array) - mask_2d_da = er.convert_mask_3d_to_2d(mask_3d_ds) - assert mask_2d_da.equals(M) - assert mask_2d_da.isnull().all() + # Convert 2d mask to 3d mask and test for equality with previous 3d mask + mask_3d_ds_2nd = er.convert_mask_2d_to_3d(mask_2d_ds) + mask_3d_ds_2nd.equals(mask_3d_ds) - # Test values from 3D array (previously 2D array) - second_mask_3d_ds = er.convert_mask_2d_to_3d(mask_2d_da) - assert second_mask_3d_ds.equals(mask_3d_ds) + # Convert 2nd 3d mask to 2d mask and test for equality with previous 2d mask + mask_2d_ds_2nd = er.convert_mask_3d_to_2d(mask_3d_ds_2nd) + mask_2d_ds_2nd.equals(mask_2d_ds) +@pytest.mark.filterwarnings("ignore:No gridpoint belongs to any region") +@pytest.mark.filterwarnings("ignore:Returning No Mask") @pytest.mark.regions2d -def test_one_label_mask_2d_3d_2d_3d( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_nan_mask_3d_2d_and_2d_3d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ - Testing if converting 2d-3d-2d-3d masks works for 1 label mask. + Testing if both converting functions returns none for empty mask inputs. Parameters ---------- @@ -603,38 +629,34 @@ def test_one_label_mask_2d_3d_2d_3d( DataArray containing Sv data of test zarr file. """ - # Extract region_ids - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() - - # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, region_ids, mask_labels=region_ids) - - # Remove 18.0 from mask - M = xr.where(M == 18.0, np.nan, M) + # Create 3d mask + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, [8, 9, 10]) - # Test values of 2D Mask - M_values = M.values - assert set(np.unique(M_values[~np.isnan(M_values)])) == {13.0} - assert M.shape == (3955, 1681) - - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert list(mask_3d_ds.mask_dictionary) == [13.0] - assert mask_3d_ds.mask_3d.shape == (1, 3955, 1681) + # Check if mask is null/empty + assert mask_3d_ds.mask_3d.isnull().all() + + # Attempt to convert empty 3d mask to 2d mask + assert er.convert_mask_3d_to_2d(mask_3d_ds) is None + + # Create emtpy 2d mask with None values + depth_values = [9.15, 9.34, 9.529, 9.719, 758.5] + ping_time_values = ["2019-07-02T18:40:00", "2019-07-02T19:00:00"] + mask_2d_ds = xr.Dataset() + mask_2d_ds["mask_2d"] = xr.DataArray( + np.full((len(depth_values), len(ping_time_values)), np.nan), + coords={"depth": depth_values, "ping_time": ping_time_values}, + dims=["depth", "ping_time"], + ) - # Test values from converted 2D array (previously 3D array) - mask_2d_da = er.convert_mask_3d_to_2d(mask_3d_ds) - assert mask_2d_da.equals(M) + # Check if mask is null/empty + assert mask_2d_ds.mask_2d.isnull().all() - # Test values from 3D array (previously 2D array) - second_mask_3d_ds = er.convert_mask_2d_to_3d(mask_2d_da) - assert second_mask_3d_ds.equals(mask_3d_ds) + # Attempt to convert empty 2d mask to 3d mask + assert er.convert_mask_2d_to_3d(mask_2d_ds) is None @pytest.mark.regions2d -def test_overlapping_mask_3d_2d( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_overlapping_mask_3d_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ Testing if converting 3d to 2d with overlapping mask produces error. @@ -646,19 +668,15 @@ def test_overlapping_mask_3d_2d( DataArray containing Sv data of test zarr file. """ - # Extract region_ids - region_ids = regions2d_fixture.data.region_id.astype(float).to_list() - - # Create mask - M = regions2d_fixture.mask(da_Sv_fixture, region_ids, mask_labels=region_ids) + # Extract region_id + region_id = regions2d_fixture.data.region_id.astype(int).to_list() - # Test values from converted 3D array (previous 2D array) - mask_3d_ds = er.convert_mask_2d_to_3d(M) - assert mask_3d_ds.mask_3d.shape == (2, 3955, 1681) - assert list(mask_3d_ds.mask_dictionary) == [13.0, 18.0] + # Create 3d mask + mask_3d_ds = regions2d_fixture.mask(da_Sv_fixture, region_id) - # Turn first (0th index) array into all 1s to guarantee overlap - mask_3d_ds.mask_3d[0] = xr.ones_like(mask_3d_ds.mask_3d[0]) + # Turn first (0th index) array corresponding to region id 13 into all 1s + # to guarantee overlap with array corresponding to region id 18 + mask_3d_ds["mask_3d"] = xr.concat([xr.ones_like(mask_3d_ds.mask_3d[0])], dim="region_id") # Trying to convert 3d mask to 2d should raise ValueError with pytest.raises(ValueError): @@ -666,9 +684,7 @@ def test_overlapping_mask_3d_2d( @pytest.mark.regions2d -def test_within_transect( - regions2d_fixture: Regions2D, da_Sv_fixture: DataArray -) -> None: +def test_within_transect(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None: """ Tests functionality for transect_mask. @@ -682,9 +698,7 @@ def test_within_transect( # Create transect mask with no errors transect_dict = {"start": "ST", "break": "BT", "resume": "RT", "end": "ET"} - M = regions2d_fixture.transect_mask( - da_Sv=da_Sv_fixture, transect_dict=transect_dict - ).compute() + M = regions2d_fixture.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict).compute() # Check M dimensions assert M.shape == (3955, 1681) @@ -747,9 +761,7 @@ def test_within_transect_bad_dict(da_Sv_fixture: DataArray) -> None: "end": "ET", } with pytest.raises(ValueError): - _ = r2d.transect_mask( - da_Sv=da_Sv_fixture, transect_dict=transect_dict_duplicate - ) + _ = r2d.transect_mask(da_Sv=da_Sv_fixture, transect_dict=transect_dict_duplicate) # Create dictionary with integers transect_dict_int = {"start": "ST", "break": "BT", "resume": "RT", "end": 4} diff --git a/echoregions/tests/test_utils.py b/echoregions/tests/test_utils.py index 64eb0767..275a348d 100644 --- a/echoregions/tests/test_utils.py +++ b/echoregions/tests/test_utils.py @@ -2,11 +2,9 @@ import numpy as np import pytest -from pandas import DataFrame, Series -from ..utils.api import merge -from ..utils.io import check_file, validate_path -from ..utils.time import parse_simrad_fname_time, parse_time +from echoregions.utils.io import check_file, validate_path +from echoregions.utils.time import parse_simrad_fname_time, parse_time DATA_DIR = Path("./echoregions/test_data/") EVR_PATH = DATA_DIR / "transect.evr" @@ -31,27 +29,9 @@ def test_parse_filename_time() -> None: """ Sv_2017_fname = ["TestString2017-D20170625-T124834.Sv"] - assert parse_simrad_fname_time(Sv_2017_fname) == np.datetime64( - "2017-06-25T12:48:34.0000" - ) + assert parse_simrad_fname_time(Sv_2017_fname) == np.datetime64("2017-06-25T12:48:34.0000") raw_2019_fname = ["Summer2019-D20190625-T124834.raw"] - assert parse_simrad_fname_time(raw_2019_fname) == np.datetime64( - "2019-06-25T12:48:34.0000" - ) - - -@pytest.mark.utils -def test_merge_type_checking() -> None: - """ - Test merge type checking functionality. - """ - - with pytest.raises(ValueError): - merge([]) - with pytest.raises(TypeError): - merge([DataFrame()]) - with pytest.raises(TypeError): - merge([Series()]) + assert parse_simrad_fname_time(raw_2019_fname) == np.datetime64("2019-06-25T12:48:34.0000") @pytest.mark.utils @@ -91,6 +71,4 @@ def test_validate_path_errors(): validate_path(save_path=EVL_PATH.__str__(), ext="EVL") with pytest.raises(ValueError): # Check for mismatch of file name and file format - validate_path( - save_path=EVL_PATH.__str__(), input_file=EVL_PATH_DNE.__str__(), ext="EVR" - ) + validate_path(save_path=EVL_PATH.__str__(), input_file=EVL_PATH_DNE.__str__(), ext="EVR") diff --git a/echoregions/utils/api.py b/echoregions/utils/api.py index 6e9f0de9..60a1b8b9 100644 --- a/echoregions/utils/api.py +++ b/echoregions/utils/api.py @@ -1,105 +1,103 @@ -from typing import List +import warnings +from typing import List, Union import numpy as np import xarray as xr -from xarray import DataArray, Dataset +from xarray import Dataset -from ..regions2d.regions2d import Regions2D - -def convert_mask_2d_to_3d(mask_2d_da: DataArray) -> Dataset: +def convert_mask_2d_to_3d(mask_2d_ds: Dataset) -> Union[Dataset, None]: """ Convert 2D multi-labeled mask data into its 3D one-hot encoded form. Parameters ---------- - mask_2d_da : DataArray - A DataArray with the data_var masked by a specified region. This data will - be in the form of integer, demarking labels of masked regions, and nan values, - demarking non-masked areas. + mask_2d_ds : Dataset + A dataset with the following: + DataArray with the data_var masked by a specified region. Individual data + points will be in the form of integers, demarking region_id of masked regions, + and nan values, demarking non-masked areas. + DataArray with mask labels corresponding to region_id values. Returns ------- mask_3d_ds : Dataset - A Dataset with a 3D DataArray/mask and each layer of the 3D mask will contain - a 1s/0s mask for each unique label in the 2D mask. The layers will be labeled - by a dictionary that maps the individual label layers of the 3D mask to an integer - label in the 2D mask. - - Notes - ----- - Emtpy dictionary data of mask_3d_ds means that there exists no masked - values. + A dataset with the following: + A DataArray 3D mask where each layer of the mask will contain a 1s/0s mask for + each unique label in the 2D mask. The layers will be labeled via region_id + values extracted from 2d values. + DataArray with mask labels corresponding to region_id values. """ + # Check if 'mask_2d' exists as a data variable + if "mask_2d" not in mask_2d_ds: + raise ValueError("The variable 'mask_2d' does not exist in the input dataset.") + # Get unique non nan values from the 2d mask - unique_non_nan = list(np.unique(mask_2d_da.data[~np.isnan(mask_2d_da.data)])) - if len(unique_non_nan) == 0: - unique_non_nan = None + region_id = list(np.unique(mask_2d_ds.mask_2d.data[~np.isnan(mask_2d_ds.mask_2d.data)])) # Create a list of mask objects from one-hot encoding M.data non-nan values # and a dictionary to remember said values from one-hot encoded data arrays. # If unique_non_nan is None, make mask_dictionary None. - mask_list = [] - mask_dictionary = {"dims": "label", "data": []} - if unique_non_nan is not None: - mask_dictionary = {"dims": "label", "data": []} - for _, value in enumerate(unique_non_nan): + if len(region_id) > 0: + mask_list = [] + for _, value in enumerate(region_id): # Create new 1d mask - new_mask_data = xr.where(mask_2d_da == value, 1.0, 0.0) - # Append data to mask_list and mask_dictionary + new_mask_data = xr.where(mask_2d_ds.mask_2d == value, 1.0, 0.0) + # Append data to mask_list mask_list.append(new_mask_data) - mask_dictionary_list = mask_dictionary["data"] - mask_dictionary_list.append(value) - mask_dictionary["data"] = mask_dictionary_list - mask_3d_da = xr.concat(mask_list, dim="label") + # Concat mask list together to make 3d mask + mask_3d_da = xr.concat(mask_list, dim=region_id) + mask_3d_da = mask_3d_da.rename({"concat_dim": "region_id"}) + # Drop mask_2d + mask_2d_ds = mask_2d_ds.drop_vars("mask_2d") + # Set mask to mask_3d_da + mask_2d_ds["mask_3d"] = mask_3d_da + mask_3d_ds = mask_2d_ds + return mask_3d_ds else: - mask_3d_da = xr.zeros_like(mask_2d_da) - mask_dictionary_da = xr.DataArray.from_dict(mask_dictionary) - - # Initialize Dataset - mask_3d_ds = xr.Dataset() - mask_3d_ds["mask_3d"] = mask_3d_da - mask_3d_ds["mask_dictionary"] = mask_dictionary_da - return mask_3d_ds + warnings.warn( + "Returning No Mask. Empty 3D Mask cannot be converted to 2D Mask.", + UserWarning, + ) + return None -def convert_mask_3d_to_2d(mask_3d_ds: Dataset) -> DataArray: +def convert_mask_3d_to_2d(mask_3d_ds: Dataset) -> Union[Dataset, None]: """ Convert 3D one-hot encoded mask data into its 2D multi-labeled form. Parameters ---------- mask_3d_ds : Dataset - A Dataset with a 3D DataArray with the data_var masked by the specified - region in one-hot encoded form and a dictionary that will be used to map - the individual label layers of the 3D mask to an integer label in the 2D mask. - The 3D DataArray will be in the form of 1s/0s: masked areas, and - non-masked areas. + A dataset with the following: + A DataArray 3D mask where each layer of the mask will contain a 1s/0s mask for + each unique label in the 2D mask. The layers will be labeled via region_id + values extracted from 2d values. + DataArray with mask labels corresponding to region_id values. Returns ------- - mask_2d_da: DataArray - A DataArray with the data_var masked by a specified region. This data will - be in the form of integer, demarking labels of masked regions, and nan values, - demarking non-masked areas. - - Notes - ----- - Emtpy dictionary data of mask_3d_ds means that there exists no masked - values. + mask_2d_ds : Dataset + A dataset with the following: + DataArray with the data_var masked by a specified region. Individual data + points will be in the form of integers, demarking region_id of masked regions, + and nan values, demarking non-masked areas. + DataArray with mask labels corresponding to region_id values. """ - # Get unique non nan values from the 2d mask - unique_non_nan = list(mask_3d_ds.mask_dictionary.data) + # Check if 'mask_2d' exists as a data variable + if "mask_3d" not in mask_3d_ds: + raise ValueError("The variable 'mask_3d' does not exist in the input dataset.") - # Create copies and placeholder values for 2D and 3D mask objects - mask_3d_da = mask_3d_ds.mask_3d.copy() + # Get region_id from the 3D Mask + region_id = list(mask_3d_ds.mask_3d.region_id) # Check if there is overlap between layers. - # TODO For now, overlap between layers will not be allowed. - # Allowing overlapping layers can be explored in later development. - if len(unique_non_nan) > 1: + # TODO This code is also extremely slow. It is an O(n^2) operation that + # can be parallelized due to the index operations being independent to + # one another. + if len(region_id) > 1: non_zero_indices_list = [ - np.transpose(np.nonzero(np_mask)) for np_mask in mask_3d_da.data + np.transpose(np.nonzero(np_mask)) for np_mask in mask_3d_ds.mask_3d.data ] for index_main, non_zero_indices_main in enumerate(non_zero_indices_list): main_set = set([tuple(x) for x in non_zero_indices_main]) @@ -114,25 +112,38 @@ def convert_mask_3d_to_2d(mask_3d_ds: Dataset) -> DataArray: " Overlapping values are not allowed." ) - if len(unique_non_nan) > 0: + if len(region_id) > 0: # Iterate through 3D array layers and set 1.0 to associated label values # dependent on which layer is being worked on and create append layers to # form 2D mask array. - for index, label_value in enumerate(unique_non_nan): - label_layer = mask_3d_da[index] + for index, label_value in enumerate(region_id): + label_layer = mask_3d_ds.mask_3d[index] label_layer = xr.where(label_layer == 1.0, label_value, 0.0) if index == 0: mask_2d_da = label_layer else: mask_2d_da = label_layer + mask_2d_da mask_2d_da = xr.where(mask_2d_da == 0.0, np.nan, mask_2d_da) + + # Setup mask_2d_ds + mask_2d_ds = mask_3d_ds + # Drop mask_2d + mask_2d_ds = mask_2d_ds.drop_vars("mask_3d") + # Set mask to mask_3d_da + mask_2d_ds["mask_2d"] = mask_2d_da + # Drop region_id coordinate if it exists + if "region_id" in mask_2d_ds.mask_2d.coords: + mask_2d_ds.mask_2d = mask_2d_ds.mask_2d.drop_vars(["region_id"]) + return mask_2d_ds else: - # In the case where unique_non_nan is empty, create all zeroes DataArray - mask_2d_da = xr.full_like(mask_3d_da, np.nan) - return mask_2d_da + warnings.warn( + "Returning No Mask. Empty 3D Mask cannot be converted to 2D Mask.", + UserWarning, + ) + return None -def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: +def merge(objects: List, reindex_ids: bool = False): # -> Regions2D: # TODO currently deprecated must be fixed before further tests. """Merge echoregion objects. Currently only supports merging Regions2D objects. @@ -147,15 +158,14 @@ def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: combined : Regions2D A Regions2D object with region ids prepended by the EVR original filename. """ + """ if isinstance(objects, list): if len(objects) == 0: raise ValueError("objects must contain elements. objects sent in is empty.") if not all(isinstance(o, Regions2D) for o in objects): raise TypeError("Invalid elements in objects. Must be of type Regions2D") else: - raise TypeError( - f"Invalid objects Type: {type(objects)}. Must be of type List[DataFrame]" - ) + raise TypeError(f"Invalid objects Type: {type(objects)}. Must be of type List[DataFrame]") merged_idx = [] merged_data = [] @@ -168,7 +178,7 @@ def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: else: merged_idx += [ f"{regions.data['metadata']['file_name']}_{r}" - for r in regions.region_ids + for r in regions.region_id ] # Attach region information to region ids merged = dict(zip(merged_idx, merged_data)) @@ -178,9 +188,8 @@ def merge(objects: List[Regions2D], reindex_ids: bool = False) -> Regions2D: merged_obj.data["metadata"] = objects[0].data["metadata"] # Combine metadata of all Regions2D objects for field in objects[0].data["metadata"].keys(): - merged_obj.data["metadata"][field] = [ - o.data["metadata"][field] for o in objects - ] + merged_obj.data["metadata"][field] = [o.data["metadata"][field] for o in objects] # Set region data merged_obj.data["regions"] = merged return merged_obj + """ diff --git a/echoregions/utils/io.py b/echoregions/utils/io.py index 48325394..1ff337a1 100644 --- a/echoregions/utils/io.py +++ b/echoregions/utils/io.py @@ -3,9 +3,7 @@ from typing import List, Union -def validate_path( - save_path: str = None, input_file: str = None, ext: str = ".json" -) -> str: +def validate_path(save_path: str = None, input_file: str = None, ext: str = ".json") -> str: """ Checks if save_path is specified. If not try to create one with the input_file and ext. diff --git a/echoregions/utils/time.py b/echoregions/utils/time.py index 268b4650..cf74d50b 100644 --- a/echoregions/utils/time.py +++ b/echoregions/utils/time.py @@ -33,9 +33,7 @@ def parse_time( np.datetime64 or float converted input datetime """ - if isinstance(ev_time, np.ndarray) and np.issubdtype( - ev_time.dtype, "datetime64[ms]" - ): + if isinstance(ev_time, np.ndarray) and np.issubdtype(ev_time.dtype, "datetime64[ms]"): return ev_time elif not isinstance(ev_time, str) and not isinstance(ev_time, list): raise ValueError("'ev_time' must be type str or list") @@ -73,11 +71,8 @@ def parse_simrad_fname_time(filenames: List[str]) -> datetime64: ) else: raise TypeError( - "Filenames contains non string element." - f"Invalid element is of type {type(f)}" + "Filenames contains non string element." f"Invalid element is of type {type(f)}" ) else: - raise TypeError( - f"Filenames must be type list. Filenames is of type {type(filenames)}" - ) + raise TypeError(f"Filenames must be type list. Filenames is of type {type(filenames)}") return parse_time(f_list, "%Y%m%d %H%M%S") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..77a0b101 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ + +[build-system] +requires = [ + "setuptools >= 48", + "setuptools_scm[toml] >= 4, <6", + "setuptools_scm_git_archive", + "wheel >= 0.29.0", +] +build-backend = 'setuptools.build_meta' + +[tool.setuptools_scm] +fallback_version = "unknown" +local_scheme = "node-and-date" +write_to = "_echoregions_version.py" +write_to_template = 'version = "{version}"' + +[tool.black] +line-length = 100 + +[tool.isort] +known_first_party = "echoregions" +known_third_party = ["_echoregions_version", "setuptools"] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +line_length = 100 diff --git a/requirements-dev.txt b/requirements-dev.txt index f20010a8..ca14aa03 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,3 +5,5 @@ pre-commit pytest xarray>=2023.2.0 pytest-cov +setuptools_scm +wheel diff --git a/setup.cfg b/setup.cfg index 847b6c92..40df79ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,15 +1,39 @@ -[black] -line-length = 100 +[metadata] +name = echoregions +maintainer = Caesar Tuguinay +maintainer_email = ctuguina@uw.edu +description = Parsers and functions for working with EVR and EVL files +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/OSOceanAcoustics/echoregions +license = Apache License, Version 2.0 +classifiers = + Development Status :: 4 - Beta + Environment :: Console + Intended Audience :: Science/Research + License :: OSI Approved :: Apache Software License + Operating System :: OS Independent + Programming Language :: Python + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Topic :: Scientific/Engineering +author = Caesar Tuguinay +author_email = ctuguina@uw.edu +platforms = OS Independent -[flake8] -max-line-length = 100 -output-file = flake8_log.txt -tee = True -ignore = E722,E203,W503,T001 +[options] +packages = find: +platforms = any +py_modules = + _echoregions_version +include_package_data = True +python_requires = >=3.9 +setup_requires = + setuptools_scm -[isort] -multi_line_output = 3 -include_trailing_comma = True -force_grid_wrap = 0 -combine_as_imports = True -line_length = 100 +[options.extras_require] +plot = + matplotlib>=3.7 + cmocean diff --git a/setup.py b/setup.py index c0864ea1..a64e4ada 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,10 @@ -from setuptools import find_packages, setup +from __future__ import absolute_import, division, print_function -# Dependencies +from setuptools import setup + +# Dynamically read dependencies from requirements file with open("requirements.txt") as f: requirements = f.readlines() -INSTALL_REQUIRES = [t.strip() for t in requirements] -setup( - name="echoregions", - version="0.1", - author="Caesar Tuguinay", - author_email="ctuguina@uw.edu", - maintainer="Caesar Tuguinay", - maintainer_email="ctuguina@uw.edu", - description="Parsers and functions for working with EVR and EVL files", - url="https://github.com/OSOceanAcoustics/echoregions", - install_requires=INSTALL_REQUIRES, - packages=find_packages(), -) +if __name__ == "__main__": + setup(install_requires=requirements)