OSOceanAcoustics · leewujung · Dec 24, 2022 · Dec 14, 2022 · Dec 14, 2022
diff --git a/echopype/testing.py b/echopype/testing.py
@@ -1,4 +1,208 @@
+"""testing.py
+
+Helper module for testing related things.
+"""
+import json
+import os
 from pathlib import Path
 
+import fsspec
+import xarray as xr
+
+from echopype.echodata.echodata import EchoData
+
+__all__ = [
+    "_check_consolidated",
+    "_check_output_files",
+    "_create_path_str",
+    "_check_and_drop_var",
+    "_check_and_drop_attr",
+    "_compare_ed_against_tree",
+]
+
 HERE = Path(__file__).parent.absolute()
 TEST_DATA_FOLDER = HERE / "test_data"
+
+
+def _check_consolidated(echodata: EchoData, zmeta_path: Path) -> None:
+    """
+    Checks for the presence of `.zgroup`
+    for every group in echodata within the `.zmetadata`
+    file.
+
+    Parameters
+    ----------
+    echodata : EchoData
+        The echodata object to be checked.
+    zmeta_path : pathlib.Path
+        The path to the .zmetadata for the zarr file.
+    """
+    # Check that every group is in
+    # the zmetadata if consolidated
+    expected_zgroups = [
+        os.path.join(p, ".zgroup") if p != "Top-level" else ".zgroup" for p in echodata.group_paths
+    ]
+
+    with open(zmeta_path) as f:
+        meta_json = json.load(f)
+
+    file_groups = [k for k in meta_json["metadata"].keys() if k.endswith(".zgroup")]
+
+    for g in expected_zgroups:
+        assert g in file_groups, f"{g} not Found!"
+
+
+def _check_file_group(data_file, engine, groups):
+    for g in groups:
+        ds = xr.open_dataset(data_file, engine=engine, group=g)
+
+        assert isinstance(ds, xr.Dataset) is True
+
+
+def _check_output_files(engine, output_files, storage_options):
+    groups = [
+        "Provenance",
+        "Environment",
+        "Sonar/Beam_group1",
+        "Sonar",
+        "Vendor_specific",
+        "Platform",
+    ]
+    if isinstance(output_files, list):
+        fs = fsspec.get_mapper(output_files[0], **storage_options).fs
+        for f in output_files:
+            if engine == "zarr":
+                _check_file_group(fs.get_mapper(f), engine, groups)
+                fs.delete(f, recursive=True)
+            else:
+                _check_file_group(f, engine, groups)
+                fs.delete(f)
+    else:
+        fs = fsspec.get_mapper(output_files, **storage_options).fs
+        if engine == "zarr":
+            _check_file_group(fs.get_mapper(output_files), engine, groups)
+            fs.delete(output_files, recursive=True)
+        else:
+            _check_file_group(output_files, engine, groups)
+            fs.delete(output_files)
+
+
+def _create_path_str(test_folder, paths):
+    return str(test_folder.joinpath(*paths).absolute())
+
+
+def _check_and_drop_var(ed, tree, grp_path, var):
+    """
+    This function performs minimal checks of
+    a variable contained both in an EchoData object
+    and a Datatree. It ensures that the dimensions,
+    attributes, and data types are the same. Once
+    the checks have passed, it then drops these
+    variables from both the EchoData object and the
+    Datatree.
+
+    Parameters
+    ----------
+    ed : EchoData
+        EchoData object that contains the variable
+        to check and drop.
+    tree : Datatree
+        Datatree object that contains the variable
+        to check and drop.
+    grp_path : str
+        The path to the group that the variable is in.
+    var : str
+        The variable to be checked and dropped.
+
+    Notes
+    -----
+    The Datatree object is created from an EchoData
+    object written to a netcdf file.
+    """
+
+    ed_var = ed[grp_path][var]
+    tree_var = tree[grp_path].ds[var]
+
+    # make sure that the dimensions and attributes
+    # are the same for the variable
+    assert ed_var.dims == tree_var.dims
+    assert ed_var.attrs == tree_var.attrs
+
+    # make sure that the data types are correct too
+    assert isinstance(ed_var.values, type(tree_var.values))
+
+    # drop variables so we can check that datasets are identical
+    ed[grp_path] = ed[grp_path].drop(var)
+    tree[grp_path].ds = tree[grp_path].ds.drop(var)
+
+
+def _check_and_drop_attr(ed, tree, grp_path, attr, typ):
+    """
+    This function performs minimal checks of
+    an attribute contained both in an EchoData object
+    and a Datatree group. This function only works for
+    a group's attribute, it cannot work on variable
+    attributes. It ensures that the attribute exists
+    and that it has the expected data type. Once
+    the checks have passed, it then drops the
+    attribute from both the EchoData object and the
+    Datatree.
+
+    Parameters
+    ----------
+    ed : EchoData
+        EchoData object that contains the attribute
+        to check and drop.
+    tree : Datatree
+        Datatree object that contains the attribute
+        to check and drop.
+    grp_path : str
+        The path to the group that the attribute  is in.
+    attr : str
+        The attribute to be checked and dropped.
+    typ : type
+        The expected data type of the attribute.
+
+    Notes
+    -----
+    The Datatree object is created from an EchoData
+    object written to a netcdf file.
+    """
+
+    # make sure that the attribute exists
+    assert attr in ed[grp_path].attrs.keys()
+    assert attr in tree[grp_path].ds.attrs.keys()
+
+    # make sure that the value of the attribute is the right type
+    assert isinstance(ed[grp_path].attrs[attr], typ)
+    assert isinstance(tree[grp_path].ds.attrs[attr], typ)
+
+    # drop the attribute so we can directly compare datasets
+    del ed[grp_path].attrs[attr]
+    del tree[grp_path].ds.attrs[attr]
+
+
+def _compare_ed_against_tree(ed, tree):
+    """
+    This function compares the Datasets
+    of ed against tree and makes sure they
+    are identical.
+
+    Parameters
+    ----------
+    ed : EchoData
+        EchoData object
+    tree : Datatree
+        Datatree object
+
+    Notes
+    -----
+    The Datatree object is created from an EchoData
+    object written to a netcdf file.
+    """
+
+    for grp_path in ed.group_paths:
+        if grp_path == "Top-level":
+            assert tree.ds.identical(ed[grp_path])
+        else:
+            assert tree[grp_path].ds.identical(ed[grp_path])
diff --git a/echopype/tests/README.md b/echopype/tests/README.md
@@ -0,0 +1,35 @@
+# Echopype tests
+
+This directory contains all the various tests for echopype.
+
+Sample datasets for testing can be found one directory above this one called `test_data`.
+The actual datasets are not stored within the repository, but are stored in a Google Drive folder.
+When the testing infrastructures are spun up, data are pulled from the Google Drive into the repository to be used for testing.
+
+## Current structure
+
+Currently the tests are separated by modules, mirroring a similar structure of the software modules.
+Within each test modules, a mix of *integration* and *unit* testings can be found.
+The following modules are currently being tested:
+
+- calibrate
+- consolidate
+- convert
+- echodata
+- metrics
+- preprocess
+- utils
+- visualize
+- core
+
+Throughout each module, there are special file called `conftest.py`.
+This file contains all of the fixtures used within that module.
+However, the `conftest.py` within the root of the `tests` directory should contain fixtures that can be used by all tests,
+therefore, the fixtures `scope` must be `"session"`.
+
+Within echopype there's a module called `testing`. Within this module contains any helper functions to be used in testing.
+
+Lastly, the test files begin with the word `test` sucn as `test_echodata.py`.
+These files contains code for *unit testing*,
+and any files that ends with `integration` contains code that performs *integration testing* against actual datasets, for example,
+`test_echodata_integration.py`.
diff --git a/echopype/tests/calibrate/__init__.py b/echopype/tests/calibrate/__init__.py
diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py
@@ -1,12 +1,115 @@
 """``pytest`` configuration."""
 
 import pytest
+import numpy as np
+import xarray as xr
+from datatree import DataTree
 
-import fsspec
-
+from echopype.convert.set_groups_base import SetGroupsBase
+from echopype.echodata import EchoData
 from echopype.testing import TEST_DATA_FOLDER
 
 
+class MockSetGroups(SetGroupsBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def set_beam(self) -> xr.Dataset:
+        ds = xr.Dataset(attrs={"beam_mode": "vertical", "conversion_equation_t": "type_3"})
+        return ds
+
+    def set_env(self) -> xr.Dataset:
+        # TODO: add mock data
+        ds = xr.Dataset()
+
+        env_attr_dict = {"notes": "This is a mock env dataset, hence no data is found!"}
+        ds = ds.assign_attrs(env_attr_dict)
+
+        return ds
+
+    def set_platform(self) -> xr.Dataset:
+        # TODO: add mock data
+        ds = xr.Dataset(
+            attrs={
+                "platform_code_ICES": 315,
+                "platform_name": "My mock boat",
+                "platform_type": "Research vessel",
+            }
+        )
+
+        return ds
+
+    def set_nmea(self) -> xr.Dataset:
+        # TODO: add mock data
+        ds = xr.Dataset(
+            attrs={
+                "description": "All Mock NMEA datagrams",
+            }
+        )
+
+        return ds
+
+    def set_sonar(self) -> xr.Dataset:
+        # TODO: add mock data
+        ds = xr.Dataset()
+
+        # Assemble sonar group global attribute dictionary
+        sonar_attr_dict = {
+            "sonar_manufacturer": "Simrad",
+            "sonar_model": self.sonar_model,
+            # transducer (sonar) serial number is not stored in the EK60 raw data file,
+            # so sonar_serial_number can't be populated from the raw datagrams
+            "sonar_serial_number": "",
+            "sonar_software_name": "",
+            "sonar_software_version": "0.1.0",
+            "sonar_type": "echosounder",
+        }
+        ds = ds.assign_attrs(sonar_attr_dict)
+
+        return ds
+
+    def set_vendor(self) -> xr.Dataset:
+        # TODO: add mock data
+        ds = xr.Dataset(attrs={"created_by": "Mock test"})
+        return ds
+
+
+@pytest.fixture(scope="session")
+def mock_echodata(
+    sonar_model="TEST",
+    file_chk="./test.raw",
+    xml_chk=None,
+):
+    # Setup tree dictionary
+    tree_dict = {}
+
+    setgrouper = MockSetGroups(
+        parser_obj=None,
+        input_file=file_chk,
+        xml_path=xml_chk,
+        output_path=None,
+        sonar_model=sonar_model,
+        params={"survey_name": "mock_survey"},
+    )
+    tree_dict["/"] = setgrouper.set_toplevel(sonar_model, date_created=np.datetime64("1970-01-01"))
+    tree_dict["Environment"] = setgrouper.set_env()
+    tree_dict["Platform"] = setgrouper.set_platform()
+    tree_dict["Platform/NMEA"] = setgrouper.set_nmea()
+    tree_dict["Provenance"] = setgrouper.set_provenance()
+    tree_dict["Sonar"] = None
+    tree_dict["Sonar/Beam_group1"] = setgrouper.set_beam()
+    tree_dict["Sonar"] = setgrouper.set_sonar()
+    tree_dict["Vendor_specific"] = setgrouper.set_vendor()
+
+    tree = DataTree.from_dict(tree_dict, name="root")
+    echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model)
+    echodata._set_tree(tree)
+    echodata._load_tree()
+    return echodata
+
+
+
+
 @pytest.fixture(scope="session")
 def dump_output_dir():
     return TEST_DATA_FOLDER / "dump"

diff --git a/echopype/tests/consolidate/__init__.py b/echopype/tests/consolidate/__init__.py
diff --git a/echopype/tests/convert/__init__.py b/echopype/tests/convert/__init__.py