incorporate valentinas 2nd round review comments

OSOceanAcoustics · Apr 5, 2024 · 6582b84 · 6582b84
1 parent 411fd44
commit 6582b84
Show file tree

Hide file tree

Showing 10 changed files with 234 additions and 235 deletions.
diff --git a/README.md b/README.md
@@ -8,10 +8,10 @@
 Echoregions is a Python Package that interfaces with annotations of water column sonar data for training machine learning models or doing other downstream processing such as biomass estimation.
 
 The annotations are typically regions indicating the presence of specific animal species or lines delineating ocean boundaries, such as the seafloor or sea surface, in the "echogram" (sonar images formed by echo returns). The interfacing functionalities operate in two directions:
-- Annotation to ML: Parsing and organizing manual annotations for preparing training and test datasets for ML developments
+- Annotation to ML: Parsing and organizing annotations for preparing training and test datasets for ML developments
 - ML to annotation: Generating annotations from ML predictions that can be used for further downstream processing
 
-At present, functionalities in the Annotation to ML direction have been built for interfacing the Echoview software that is widely used in the fisheries acoustics community. We plan to add functionalities in the ML to Annotation direction in the near future.
+At present, functionalities in the Annotation to ML direction have been built for interfacing the [Echoview](https://echoview.com/products-and-services/echoview/) software that is widely used in the fisheries acoustics community. We plan to add functionalities in the ML to Annotation direction in the near future.
 
 ## Functionalities
 

diff --git a/docs/source/Lines_functionality.ipynb b/docs/source/Lines_functionality.ipynb
diff --git a/docs/source/Regions2D_functionality.ipynb b/docs/source/Regions2D_functionality.ipynb
diff --git a/docs/source/index.md b/docs/source/index.md
@@ -3,10 +3,10 @@
 Echoregions is a Python Package that interfaces with annotations of water column sonar data for training machine learning models or doing other downstream processing such as biomass estimation.
 
 The annotations are typically regions indicating the presence of specific animal species or lines delineating ocean boundaries, such as the seafloor or sea surface, in the "echogram" (sonar images formed by echo returns). The interfacing functionalities operate in two directions:
-- Annotation to ML: Parsing and organizing manual annotations for preparing training and test datasets for ML developments
+- Annotation to ML: Parsing and organizing annotations for preparing training and test datasets for ML developments
 - ML to Annotation: Generating annotations from ML predictions that can be used for further downstream processing
 
-At present, functionalities in the Annotation to ML direction have been built for interfacing manual annotations from the Echoview software, which is widely used in the fisheries acoustics community. We plan to add functionalities in the ML to Annotation direction in the near future.
+At present, functionalities in the Annotation to ML direction have been built for interfacing manual annotations from the [Echoview](https://echoview.com/products-and-services/echoview/) software, which is widely used in the fisheries acoustics community. We plan to add functionalities in the ML to Annotation direction in the near future.
 
 ## Functionalities
 

diff --git a/echoregions/lines/lines.py b/echoregions/lines/lines.py
@@ -163,7 +163,7 @@ def plot(
     def mask(self, da_Sv: DataArray, **kwargs):
         """
         Subsets a bottom dataset to the range of an Sv dataset. Create a mask of
-        the same shape as data found in the Sonar object:
+        the same shape as data found in the Echogram object:
         Bottom: True, Otherwise: False.
 
         Parameters
@@ -178,7 +178,7 @@ def mask(self, da_Sv: DataArray, **kwargs):
         bottom_mask : Xarray DataArray
             Matrix of coordinates (ping_time, depth) with values such that bottom: False,
             otherwise: True
-        bottom_contours : pd.DataFrame
+        bottom_points : pd.DataFrame
             DataFrame containing depth and time.
 
         Notes
@@ -207,7 +207,7 @@ def filter_bottom(bottom, start_date, end_date):
         lines_df = self.data
 
         # new index
-        sonar_ping_time = list(da_Sv.ping_time.data)
+        echogram_ping_time = list(da_Sv.ping_time.data)
 
         # filter bottom within start and end time
         start_time = da_Sv.ping_time.data.min()
@@ -218,25 +218,25 @@ def filter_bottom(bottom, start_date, end_date):
         if len(filtered_bottom) > 0:
             # create joint index
             joint_index = list(
-                set(list(pd.DataFrame(sonar_ping_time)[0]) + list(filtered_bottom.index))
+                set(list(pd.DataFrame(echogram_ping_time)[0]) + list(filtered_bottom.index))
             )
 
-            # Interpolate on the sonar coordinates. Note that some interpolation kwaargs
+            # Interpolate on the echogram coordinates. Note that some interpolation kwaargs
             # will result in some interpolation NaN values. The ffill and bfill lines
             # are there to fill in these NaN values.
             # TODO There exists a problem where when we use .loc prior to reindexing
             # we are hit with a key not found error.
-            bottom_contours = (
+            bottom_points = (
                 filtered_bottom[["depth"]]
                 .reindex(joint_index)
                 .interpolate(**kwargs)
-                .loc[sonar_ping_time]
+                .loc[echogram_ping_time]
                 .ffill()
                 .bfill()
             )
 
             # convert to data array for bottom
-            bottom_da = bottom_contours["depth"].to_xarray()
+            bottom_da = bottom_points["depth"].to_xarray()
             bottom_da = bottom_da.rename({"time": "ping_time"})
 
             # create a data array of depths
@@ -246,17 +246,17 @@ def filter_bottom(bottom, start_date, end_date):
             # bottom: True, otherwise: False
             bottom_mask = depth_da >= bottom_da
 
-            # Reset bottom_contours index so that time index becomes time column
-            bottom_contours = bottom_contours.reset_index()
+            # Reset bottom_points index so that time index becomes time column
+            bottom_points = bottom_points.reset_index()
 
         else:
             # Set everything to False
             bottom_mask = xr.full_like(da_Sv, False)
 
-            # Set bottom contours to empty DataFrame with time and depth columns
-            bottom_contours = pd.DataFrame(columns=["depth", "time"])
+            # Set bottom points to empty DataFrame with time and depth columns
+            bottom_points = pd.DataFrame(columns=["depth", "time"])
 
         # Bottom: True becomes 1, False becomes 0
         bottom_mask = bottom_mask.where(True, 1, 0)
 
-        return bottom_mask, bottom_contours
+        return bottom_mask, bottom_points
diff --git a/echoregions/lines/lines_parser.py b/echoregions/lines/lines_parser.py
@@ -70,7 +70,7 @@ def parse_evl(input_file: str):
 def parse_lines_df(input_file: Union[str, pd.DataFrame]) -> pd.DataFrame:
     """
     Parses lines dataframe data. This function assumes that the input_file is output
-    from lines object's to_csv function or the input_file is bottom_contour output
+    from lines object's to_csv function or the input_file is bottom_points output
     from lines object's mask function.
 
     Parameters

diff --git a/echoregions/regions2d/regions2d.py b/echoregions/regions2d/regions2d.py
@@ -482,7 +482,7 @@ def mask(
                 areas.
             Also contains a data variable (`mask_labels`) with mask labels
             corresponding to region_id values.
-        region_contours : pd.DataFrame
+        region_points : pd.DataFrame
             DataFrame containing region_id, depth, and time.
         """
         if not isinstance(da_Sv, DataArray):
@@ -598,12 +598,12 @@ def mask(
                 # Convert 3d mask to 2d mask
                 mask_ds = convert_mask_3d_to_2d(mask_ds)
 
-            # Get region_contours
-            region_contours = region_df[region_df["region_id"].isin(masked_region_id)][
+            # Get region_points
+            region_points = region_df[region_df["region_id"].isin(masked_region_id)][
                 ["region_id", "time", "depth"]
             ]
 
-            return mask_ds, region_contours
+            return mask_ds, region_points
 
     def transect_mask(
         self,

diff --git a/echoregions/regions2d/regions2d_parser.py b/echoregions/regions2d/regions2d_parser.py
@@ -150,7 +150,7 @@ def _parse_points(line: str) -> Tuple[ndarray]:
 def parse_regions_df(input_file: Union[str, pd.DataFrame]) -> pd.DataFrame:
     """
     Parses regions dataframe data. This function assumes that the input_file is output
-    from regions2d object's to_csv function or the input_file is region_contour output
+    from regions2d object's to_csv function or the input_file is region_points output
     from regions2d object's mask function.
 
     Parameters

diff --git a/echoregions/tests/test_lines.py b/echoregions/tests/test_lines.py
@@ -223,7 +223,7 @@ def test_lines_mask(lines_fixture: Lines, da_Sv_fixture: DataArray) -> None:
         DataArray containing Sv data of test zarr file.
     """
 
-    M, bottom_contours = lines_fixture.mask(
+    M, bottom_points = lines_fixture.mask(
         da_Sv_fixture.isel(channel=0),
         method="slinear",
         limit=5,
@@ -243,13 +243,13 @@ def test_lines_mask(lines_fixture: Lines, da_Sv_fixture: DataArray) -> None:
     assert counts[0] > counts[1]
 
     # Assert that time is datetime64
-    assert pd.api.types.is_datetime64_any_dtype(bottom_contours["time"])
+    assert pd.api.types.is_datetime64_any_dtype(bottom_points["time"])
 
     # Assert that depth is float64
-    assert pd.api.types.is_float_dtype(bottom_contours["depth"])
+    assert pd.api.types.is_float_dtype(bottom_points["depth"])
 
-    # Place bottom contours in Lines object
-    lines_2 = Lines(bottom_contours, None, input_type="CSV")
+    # Place bottom points in Lines object
+    lines_2 = Lines(bottom_points, None, input_type="CSV")
 
     # Run lines masking to check if masking runs
     _, _ = lines_2.mask(da_Sv_fixture.isel(channel=0))
@@ -270,7 +270,7 @@ def test_lines_mask_empty(lines_fixture: Lines, da_Sv_fixture: DataArray) -> Non
     # Create empty lines object
     lines_fixture.data = lines_fixture.data[0:0]
 
-    M, bottom_contours_1 = lines_fixture.mask(da_Sv_fixture.isel(channel=0))
+    M, bottom_points_1 = lines_fixture.mask(da_Sv_fixture.isel(channel=0))
 
     # Compute unique values
     unique_values = np.unique(M.compute().data, return_counts=True)
@@ -283,17 +283,17 @@ def test_lines_mask_empty(lines_fixture: Lines, da_Sv_fixture: DataArray) -> Non
     assert len(values) == 1 and len(counts) == 1
     assert values[0] == 0
 
-    # Assert that bottom_contours is empty
-    assert bottom_contours_1.empty
+    # Assert that bottom_points is empty
+    assert bottom_points_1.empty
 
-    # Use bottom contours to create Lines object
-    lines_2 = Lines(bottom_contours_1, None, input_type="CSV")
+    # Use bottom points to create Lines object
+    lines_2 = Lines(bottom_points_1, None, input_type="CSV")
 
     # Run lines masking to check if masking runs
-    _, bottom_contours_2 = lines_2.mask(da_Sv_fixture.isel(channel=0))
+    _, bottom_points_2 = lines_2.mask(da_Sv_fixture.isel(channel=0))
 
-    # Assert that bottom_contours is empty
-    assert bottom_contours_2.empty
+    # Assert that bottom_points is empty
+    assert bottom_points_2.empty
 
 
 @pytest.mark.lines

diff --git a/echoregions/tests/test_regions2d.py b/echoregions/tests/test_regions2d.py
@@ -518,18 +518,16 @@ def test_mask_empty_no_overlap(regions2d_fixture: Regions2D, da_Sv_fixture: Data
     assert mask_output_1 is None
 
     # Create mask with regions that have no overlap with the Sv Data Array
-    mask_3d_ds, region_contours_1 = regions2d_fixture.mask(
-        da_Sv_fixture.isel(channel=0), [8, 9, 10]
-    )
+    mask_3d_ds, region_points_1 = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0), [8, 9, 10])
 
     # Check that this mask is empty
     assert mask_3d_ds.mask_3d.isnull().all()
 
-    # Check that region_contours_1 is empty
-    assert region_contours_1.empty
+    # Check that region_points_1 is empty
+    assert region_points_1.empty
 
-    # Use region contours to create Regions2D object
-    r2d_2 = Regions2D(region_contours_1, min_depth=0, max_depth=1000, input_type="CSV")
+    # Use region points to create Regions2D object
+    r2d_2 = Regions2D(region_points_1, min_depth=0, max_depth=1000, input_type="CSV")
 
     # Run Regions2d masking to check if masking runs
     mask_output_2 = r2d_2.mask(da_Sv_fixture.isel(channel=0))
@@ -622,10 +620,10 @@ def test_mask_2d(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None
 
 
 @pytest.mark.regions2d
-def test_mask_region_contours(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None:
+def test_mask_region_points(regions2d_fixture: Regions2D, da_Sv_fixture: DataArray) -> None:
     """
-    Testing if masking, saving region contours into new regions2d,
-    and masking again produces the same region contours.
+    Testing if masking, saving region points into new regions2d,
+    and masking again produces the same region points.
 
     Parameters
     ----------
@@ -637,16 +635,16 @@ def test_mask_region_contours(regions2d_fixture: Regions2D, da_Sv_fixture: DataA
     # Get region_id and mask_labels
 
     # Create mask
-    _, region_contours_1 = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0))
+    _, region_points_1 = regions2d_fixture.mask(da_Sv_fixture.isel(channel=0))
 
-    # Use region contours to create Regions2D object
-    r2d_2 = Regions2D(region_contours_1, min_depth=0, max_depth=1000, input_type="CSV")
+    # Use region points to create Regions2D object
+    r2d_2 = Regions2D(region_points_1, min_depth=0, max_depth=1000, input_type="CSV")
 
     # Run Regions2D masking to check if masking runs
-    _, region_contours_2 = r2d_2.mask(da_Sv_fixture.isel(channel=0))
+    _, region_points_2 = r2d_2.mask(da_Sv_fixture.isel(channel=0))
 
-    # Check if the two contours are equal
-    region_contours_1.equals(region_contours_2)
+    # Check if the two points are equal
+    region_points_1.equals(region_points_2)
 
 
 @pytest.mark.regions2d