From c95e26b4c64346d0c3008ffbbb7a241d8b3d5b0e Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Wed, 14 Dec 2022 10:33:14 -0800 Subject: [PATCH 1/2] Add README for testing, add init, and small refactor --- echopype/testing.py | 140 +++++++++++++++++ echopype/tests/README.md | 30 ++++ echopype/tests/calibrate/__init__.py | 0 echopype/tests/conftest.py | 2 - echopype/tests/consolidate/__init__.py | 0 echopype/tests/convert/__init__.py | 0 echopype/tests/echodata/__init__.py | 0 echopype/tests/echodata/conftest.py | 8 + echopype/tests/echodata/test_echodata.py | 8 +- echopype/tests/echodata/test_zarr_combine.py | 2 +- echopype/tests/echodata/utils.py | 150 ------------------- echopype/tests/metrics/__init__.py | 0 echopype/tests/preprocess/__init__.py | 0 echopype/tests/utils/__init__.py | 0 echopype/tests/visualize/__init__.py | 0 15 files changed, 180 insertions(+), 160 deletions(-) create mode 100644 echopype/tests/README.md create mode 100644 echopype/tests/calibrate/__init__.py create mode 100644 echopype/tests/consolidate/__init__.py create mode 100644 echopype/tests/convert/__init__.py create mode 100644 echopype/tests/echodata/__init__.py create mode 100644 echopype/tests/echodata/conftest.py delete mode 100644 echopype/tests/echodata/utils.py create mode 100644 echopype/tests/metrics/__init__.py create mode 100644 echopype/tests/preprocess/__init__.py create mode 100644 echopype/tests/utils/__init__.py create mode 100644 echopype/tests/visualize/__init__.py diff --git a/echopype/testing.py b/echopype/testing.py index b871fffd3..f1ddc44d9 100644 --- a/echopype/testing.py +++ b/echopype/testing.py @@ -1,4 +1,144 @@ +"""testing.py + +Helper module for testing related things. +""" +import json +import os from pathlib import Path +import numpy as np +import xarray as xr +from datatree import DataTree + +from echopype.convert.set_groups_base import SetGroupsBase +from echopype.echodata.echodata import EchoData + +__all__ = ["get_mock_echodata", "check_consolidated"] + HERE = Path(__file__).parent.absolute() TEST_DATA_FOLDER = HERE / "test_data" + + +class SetGroupsTest(SetGroupsBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def set_beam(self) -> xr.Dataset: + ds = xr.Dataset(attrs={"beam_mode": "vertical", "conversion_equation_t": "type_3"}) + return ds + + def set_env(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset() + + env_attr_dict = {"notes": "This is a mock env dataset, hence no data is found!"} + ds = ds.assign_attrs(env_attr_dict) + + return ds + + def set_platform(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset( + attrs={ + "platform_code_ICES": 315, + "platform_name": "My mock boat", + "platform_type": "Research vessel", + } + ) + + return ds + + def set_nmea(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset( + attrs={ + "description": "All Mock NMEA datagrams", + } + ) + + return ds + + def set_sonar(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset() + + # Assemble sonar group global attribute dictionary + sonar_attr_dict = { + "sonar_manufacturer": "Simrad", + "sonar_model": self.sonar_model, + # transducer (sonar) serial number is not stored in the EK60 raw data file, + # so sonar_serial_number can't be populated from the raw datagrams + "sonar_serial_number": "", + "sonar_software_name": "", + "sonar_software_version": "0.1.0", + "sonar_type": "echosounder", + } + ds = ds.assign_attrs(sonar_attr_dict) + + return ds + + def set_vendor(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset(attrs={"created_by": "Mock test"}) + return ds + + +def get_mock_echodata( + sonar_model="TEST", + file_chk="./test.raw", + xml_chk=None, +): + # Setup tree dictionary + tree_dict = {} + + setgrouper = SetGroupsTest( + parser_obj=None, + input_file=file_chk, + xml_path=xml_chk, + output_path=None, + sonar_model=sonar_model, + params={"survey_name": "mock_survey"}, + ) + tree_dict["/"] = setgrouper.set_toplevel(sonar_model, date_created=np.datetime64("1970-01-01")) + tree_dict["Environment"] = setgrouper.set_env() + tree_dict["Platform"] = setgrouper.set_platform() + tree_dict["Platform/NMEA"] = setgrouper.set_nmea() + tree_dict["Provenance"] = setgrouper.set_provenance() + tree_dict["Sonar"] = None + tree_dict["Sonar/Beam_group1"] = setgrouper.set_beam() + tree_dict["Sonar"] = setgrouper.set_sonar() + tree_dict["Vendor_specific"] = setgrouper.set_vendor() + + tree = DataTree.from_dict(tree_dict, name="root") + echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model) + echodata._set_tree(tree) + echodata._load_tree() + return echodata + + +def check_consolidated(echodata: EchoData, zmeta_path: Path) -> None: + """ + Checks for the presence of `.zgroup` + for every group in echodata within the `.zmetadata` + file. + + Parameters + ---------- + echodata : EchoData + The echodata object to be checked. + zmeta_path : pathlib.Path + The path to the .zmetadata for the zarr file. + """ + # Check that every group is in + # the zmetadata if consolidated + expected_zgroups = [ + os.path.join(p, ".zgroup") if p != "Top-level" else ".zgroup" for p in echodata.group_paths + ] + + with open(zmeta_path) as f: + meta_json = json.load(f) + + file_groups = [k for k in meta_json["metadata"].keys() if k.endswith(".zgroup")] + + for g in expected_zgroups: + assert g in file_groups, f"{g} not Found!" diff --git a/echopype/tests/README.md b/echopype/tests/README.md new file mode 100644 index 000000000..fdd9106c1 --- /dev/null +++ b/echopype/tests/README.md @@ -0,0 +1,30 @@ +# Echopype tests + +This directory contains all the various tests for echopype. + +Sample datasets for testing can be found one directory above this one called `test_data`. +The actual datasets are not stored within the repository, but are stored in a Google Drive folder. +When the testing infrastructures are spun up, data are pulled from the Google Drive into the repository to be used for testing. + +## Current structure + +Currently the tests are separated by modules, mirroring a similar structure of the software modules. +Within each test modules, a mix of *integration* and *unit* testings can be found. +The following modules are currently being tested: + +- calibrate +- consolidate +- convert +- echodata +- metrics +- preprocess +- utils +- visualize +- core + +Throughout each module, there are special file called `conftest.py`. +This file contains all of the fixtures used within that module. +However, the `conftest.py` within the root of the `tests` directory should contain fixtures that can be used by all tests, +therefore, the fixtures `scope` must be `"session"`. + +Within echopype's software module there's a module called `testing`. Within this module contains any helper functions or classes to be used in testing. diff --git a/echopype/tests/calibrate/__init__.py b/echopype/tests/calibrate/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py index 0d161d57e..245235a40 100644 --- a/echopype/tests/conftest.py +++ b/echopype/tests/conftest.py @@ -2,8 +2,6 @@ import pytest -import fsspec - from echopype.testing import TEST_DATA_FOLDER diff --git a/echopype/tests/consolidate/__init__.py b/echopype/tests/consolidate/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/convert/__init__.py b/echopype/tests/convert/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/echodata/__init__.py b/echopype/tests/echodata/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/echodata/conftest.py b/echopype/tests/echodata/conftest.py new file mode 100644 index 000000000..82164c5fd --- /dev/null +++ b/echopype/tests/echodata/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from echopype.testing import get_mock_echodata + + +@pytest.fixture +def mock_echodata(): + return get_mock_echodata() diff --git a/echopype/tests/echodata/test_echodata.py b/echopype/tests/echodata/test_echodata.py index 4037fca74..8793a1700 100644 --- a/echopype/tests/echodata/test_echodata.py +++ b/echopype/tests/echodata/test_echodata.py @@ -1,6 +1,5 @@ from textwrap import dedent -import os import fsspec from pathlib import Path import shutil @@ -17,7 +16,7 @@ import xarray as xr import numpy as np -from utils import get_mock_echodata, check_consolidated +from echopype.testing import check_consolidated @pytest.fixture(scope="module") @@ -207,10 +206,6 @@ class TestEchoData: 'Vendor_specific', } - @pytest.fixture(scope="class") - def mock_echodata(self): - return get_mock_echodata() - @pytest.fixture(scope="class") def converted_zarr(self, single_ek60_zarr): return single_ek60_zarr @@ -405,7 +400,6 @@ def test_compute_range(compute_range_samples): else: raise AssertionError - mobile_env_params = EnvParams( xr.Dataset( data_vars={ diff --git a/echopype/tests/echodata/test_zarr_combine.py b/echopype/tests/echodata/test_zarr_combine.py index 3ae8bb1bf..9380c6c0b 100644 --- a/echopype/tests/echodata/test_zarr_combine.py +++ b/echopype/tests/echodata/test_zarr_combine.py @@ -13,7 +13,7 @@ import pytest import zarr import os.path -from utils import check_consolidated +from echopype.testing import check_consolidated @pytest.fixture(scope="module") diff --git a/echopype/tests/echodata/utils.py b/echopype/tests/echodata/utils.py deleted file mode 100644 index 011dafeeb..000000000 --- a/echopype/tests/echodata/utils.py +++ /dev/null @@ -1,150 +0,0 @@ -import os -import json -from pathlib import Path - -import xarray as xr - -from datatree import DataTree - -import numpy as np - -from echopype.convert.set_groups_base import SetGroupsBase -from echopype.echodata.echodata import EchoData - - -class SetGroupsTest(SetGroupsBase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def set_beam(self) -> xr.Dataset: - ds = xr.Dataset( - attrs={"beam_mode": "vertical", "conversion_equation_t": "type_3"} - ) - return ds - - def set_env(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset() - - env_attr_dict = { - "notes": "This is a mock env dataset, hence no data is found!" - } - ds = ds.assign_attrs(env_attr_dict) - - return ds - - def set_platform(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset( - attrs={ - "platform_code_ICES": 315, - "platform_name": "My mock boat", - "platform_type": "Research vessel", - } - ) - - return ds - - def set_nmea(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset( - attrs={ - "description": "All Mock NMEA datagrams", - } - ) - - return ds - - def set_sonar(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset() - - # Assemble sonar group global attribute dictionary - sonar_attr_dict = { - "sonar_manufacturer": "Simrad", - "sonar_model": self.sonar_model, - # transducer (sonar) serial number is not stored in the EK60 raw data file, - # so sonar_serial_number can't be populated from the raw datagrams - "sonar_serial_number": "", - "sonar_software_name": "", - "sonar_software_version": "0.1.0", - "sonar_type": "echosounder", - } - ds = ds.assign_attrs(sonar_attr_dict) - - return ds - - def set_vendor(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset(attrs={"created_by": "Mock test"}) - return ds - - -def get_mock_echodata( - sonar_model='TEST', - file_chk='./test.raw', - xml_chk=None, -): - # Setup tree dictionary - tree_dict = {} - - setgrouper = SetGroupsTest( - parser_obj=None, - input_file=file_chk, - xml_path=xml_chk, - output_path=None, - sonar_model=sonar_model, - params={"survey_name": "mock_survey"}, - ) - tree_dict["/"] = setgrouper.set_toplevel( - sonar_model, date_created=np.datetime64("1970-01-01") - ) - tree_dict["Environment"] = setgrouper.set_env() - tree_dict["Platform"] = setgrouper.set_platform() - tree_dict["Platform/NMEA"] = setgrouper.set_nmea() - tree_dict["Provenance"] = setgrouper.set_provenance() - tree_dict["Sonar"] = None - tree_dict["Sonar/Beam_group1"] = setgrouper.set_beam() - tree_dict["Sonar"] = setgrouper.set_sonar() - tree_dict["Vendor_specific"] = setgrouper.set_vendor() - - tree = DataTree.from_dict(tree_dict, name="root") - echodata = EchoData( - source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model - ) - echodata._set_tree(tree) - echodata._load_tree() - return echodata - - -def check_consolidated(echodata: EchoData, zmeta_path: Path) -> None: - """ - Checks for the presence of `.zgroup` - for every group in echodata within the `.zmetadata` - file. - - Parameters - ---------- - echodata : EchoData - The echodata object to be checked. - zmeta_path : pathlib.Path - The path to the .zmetadata for the zarr file. - """ - # Check that every group is in - # the zmetadata if consolidated - expected_zgroups = [ - os.path.join(p, '.zgroup') if p != 'Top-level' else '.zgroup' - for p in echodata.group_paths - ] - - with open(zmeta_path) as f: - meta_json = json.load(f) - - file_groups = [ - k - for k in meta_json['metadata'].keys() - if k.endswith('.zgroup') - ] - - for g in expected_zgroups: - assert g in file_groups, f"{g} not Found!" diff --git a/echopype/tests/metrics/__init__.py b/echopype/tests/metrics/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/preprocess/__init__.py b/echopype/tests/preprocess/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/utils/__init__.py b/echopype/tests/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/echopype/tests/visualize/__init__.py b/echopype/tests/visualize/__init__.py new file mode 100644 index 000000000..e69de29bb From 33cc4f25feb72234cbfcf0d4edb2e47ea05d01b8 Mon Sep 17 00:00:00 2001 From: lsetiawan Date: Wed, 14 Dec 2022 15:09:19 -0800 Subject: [PATCH 2/2] Update echodata module to reflect framework --- echopype/testing.py | 268 ++++++---- echopype/tests/README.md | 7 +- echopype/tests/conftest.py | 105 ++++ .../test_convert_source_target_locs.py | 41 +- echopype/tests/echodata/conftest.py | 344 ++++++++++++- .../tests/echodata/test_api_integration.py | 31 ++ ...combine.py => test_combine_integration.py} | 120 ----- echopype/tests/echodata/test_echodata.py | 480 +++--------------- .../echodata/test_echodata_integration.py | 466 +++++++++++++++++ echopype/tests/echodata/test_echodata_misc.py | 49 -- .../tests/echodata/test_echodata_structure.py | 307 ----------- echopype/tests/echodata/test_zarr_combine.py | 46 +- 12 files changed, 1191 insertions(+), 1073 deletions(-) create mode 100644 echopype/tests/echodata/test_api_integration.py rename echopype/tests/echodata/{test_echodata_combine.py => test_combine_integration.py} (68%) create mode 100644 echopype/tests/echodata/test_echodata_integration.py delete mode 100644 echopype/tests/echodata/test_echodata_misc.py delete mode 100644 echopype/tests/echodata/test_echodata_structure.py diff --git a/echopype/testing.py b/echopype/testing.py index f1ddc44d9..16314d9c7 100644 --- a/echopype/testing.py +++ b/echopype/testing.py @@ -6,117 +6,25 @@ import os from pathlib import Path -import numpy as np +import fsspec import xarray as xr -from datatree import DataTree -from echopype.convert.set_groups_base import SetGroupsBase from echopype.echodata.echodata import EchoData -__all__ = ["get_mock_echodata", "check_consolidated"] +__all__ = [ + "_check_consolidated", + "_check_output_files", + "_create_path_str", + "_check_and_drop_var", + "_check_and_drop_attr", + "_compare_ed_against_tree", +] HERE = Path(__file__).parent.absolute() TEST_DATA_FOLDER = HERE / "test_data" -class SetGroupsTest(SetGroupsBase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def set_beam(self) -> xr.Dataset: - ds = xr.Dataset(attrs={"beam_mode": "vertical", "conversion_equation_t": "type_3"}) - return ds - - def set_env(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset() - - env_attr_dict = {"notes": "This is a mock env dataset, hence no data is found!"} - ds = ds.assign_attrs(env_attr_dict) - - return ds - - def set_platform(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset( - attrs={ - "platform_code_ICES": 315, - "platform_name": "My mock boat", - "platform_type": "Research vessel", - } - ) - - return ds - - def set_nmea(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset( - attrs={ - "description": "All Mock NMEA datagrams", - } - ) - - return ds - - def set_sonar(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset() - - # Assemble sonar group global attribute dictionary - sonar_attr_dict = { - "sonar_manufacturer": "Simrad", - "sonar_model": self.sonar_model, - # transducer (sonar) serial number is not stored in the EK60 raw data file, - # so sonar_serial_number can't be populated from the raw datagrams - "sonar_serial_number": "", - "sonar_software_name": "", - "sonar_software_version": "0.1.0", - "sonar_type": "echosounder", - } - ds = ds.assign_attrs(sonar_attr_dict) - - return ds - - def set_vendor(self) -> xr.Dataset: - # TODO: add mock data - ds = xr.Dataset(attrs={"created_by": "Mock test"}) - return ds - - -def get_mock_echodata( - sonar_model="TEST", - file_chk="./test.raw", - xml_chk=None, -): - # Setup tree dictionary - tree_dict = {} - - setgrouper = SetGroupsTest( - parser_obj=None, - input_file=file_chk, - xml_path=xml_chk, - output_path=None, - sonar_model=sonar_model, - params={"survey_name": "mock_survey"}, - ) - tree_dict["/"] = setgrouper.set_toplevel(sonar_model, date_created=np.datetime64("1970-01-01")) - tree_dict["Environment"] = setgrouper.set_env() - tree_dict["Platform"] = setgrouper.set_platform() - tree_dict["Platform/NMEA"] = setgrouper.set_nmea() - tree_dict["Provenance"] = setgrouper.set_provenance() - tree_dict["Sonar"] = None - tree_dict["Sonar/Beam_group1"] = setgrouper.set_beam() - tree_dict["Sonar"] = setgrouper.set_sonar() - tree_dict["Vendor_specific"] = setgrouper.set_vendor() - - tree = DataTree.from_dict(tree_dict, name="root") - echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model) - echodata._set_tree(tree) - echodata._load_tree() - return echodata - - -def check_consolidated(echodata: EchoData, zmeta_path: Path) -> None: +def _check_consolidated(echodata: EchoData, zmeta_path: Path) -> None: """ Checks for the presence of `.zgroup` for every group in echodata within the `.zmetadata` @@ -142,3 +50,159 @@ def check_consolidated(echodata: EchoData, zmeta_path: Path) -> None: for g in expected_zgroups: assert g in file_groups, f"{g} not Found!" + + +def _check_file_group(data_file, engine, groups): + for g in groups: + ds = xr.open_dataset(data_file, engine=engine, group=g) + + assert isinstance(ds, xr.Dataset) is True + + +def _check_output_files(engine, output_files, storage_options): + groups = [ + "Provenance", + "Environment", + "Sonar/Beam_group1", + "Sonar", + "Vendor_specific", + "Platform", + ] + if isinstance(output_files, list): + fs = fsspec.get_mapper(output_files[0], **storage_options).fs + for f in output_files: + if engine == "zarr": + _check_file_group(fs.get_mapper(f), engine, groups) + fs.delete(f, recursive=True) + else: + _check_file_group(f, engine, groups) + fs.delete(f) + else: + fs = fsspec.get_mapper(output_files, **storage_options).fs + if engine == "zarr": + _check_file_group(fs.get_mapper(output_files), engine, groups) + fs.delete(output_files, recursive=True) + else: + _check_file_group(output_files, engine, groups) + fs.delete(output_files) + + +def _create_path_str(test_folder, paths): + return str(test_folder.joinpath(*paths).absolute()) + + +def _check_and_drop_var(ed, tree, grp_path, var): + """ + This function performs minimal checks of + a variable contained both in an EchoData object + and a Datatree. It ensures that the dimensions, + attributes, and data types are the same. Once + the checks have passed, it then drops these + variables from both the EchoData object and the + Datatree. + + Parameters + ---------- + ed : EchoData + EchoData object that contains the variable + to check and drop. + tree : Datatree + Datatree object that contains the variable + to check and drop. + grp_path : str + The path to the group that the variable is in. + var : str + The variable to be checked and dropped. + + Notes + ----- + The Datatree object is created from an EchoData + object written to a netcdf file. + """ + + ed_var = ed[grp_path][var] + tree_var = tree[grp_path].ds[var] + + # make sure that the dimensions and attributes + # are the same for the variable + assert ed_var.dims == tree_var.dims + assert ed_var.attrs == tree_var.attrs + + # make sure that the data types are correct too + assert isinstance(ed_var.values, type(tree_var.values)) + + # drop variables so we can check that datasets are identical + ed[grp_path] = ed[grp_path].drop(var) + tree[grp_path].ds = tree[grp_path].ds.drop(var) + + +def _check_and_drop_attr(ed, tree, grp_path, attr, typ): + """ + This function performs minimal checks of + an attribute contained both in an EchoData object + and a Datatree group. This function only works for + a group's attribute, it cannot work on variable + attributes. It ensures that the attribute exists + and that it has the expected data type. Once + the checks have passed, it then drops the + attribute from both the EchoData object and the + Datatree. + + Parameters + ---------- + ed : EchoData + EchoData object that contains the attribute + to check and drop. + tree : Datatree + Datatree object that contains the attribute + to check and drop. + grp_path : str + The path to the group that the attribute is in. + attr : str + The attribute to be checked and dropped. + typ : type + The expected data type of the attribute. + + Notes + ----- + The Datatree object is created from an EchoData + object written to a netcdf file. + """ + + # make sure that the attribute exists + assert attr in ed[grp_path].attrs.keys() + assert attr in tree[grp_path].ds.attrs.keys() + + # make sure that the value of the attribute is the right type + assert isinstance(ed[grp_path].attrs[attr], typ) + assert isinstance(tree[grp_path].ds.attrs[attr], typ) + + # drop the attribute so we can directly compare datasets + del ed[grp_path].attrs[attr] + del tree[grp_path].ds.attrs[attr] + + +def _compare_ed_against_tree(ed, tree): + """ + This function compares the Datasets + of ed against tree and makes sure they + are identical. + + Parameters + ---------- + ed : EchoData + EchoData object + tree : Datatree + Datatree object + + Notes + ----- + The Datatree object is created from an EchoData + object written to a netcdf file. + """ + + for grp_path in ed.group_paths: + if grp_path == "Top-level": + assert tree.ds.identical(ed[grp_path]) + else: + assert tree[grp_path].ds.identical(ed[grp_path]) diff --git a/echopype/tests/README.md b/echopype/tests/README.md index fdd9106c1..223869bd2 100644 --- a/echopype/tests/README.md +++ b/echopype/tests/README.md @@ -27,4 +27,9 @@ This file contains all of the fixtures used within that module. However, the `conftest.py` within the root of the `tests` directory should contain fixtures that can be used by all tests, therefore, the fixtures `scope` must be `"session"`. -Within echopype's software module there's a module called `testing`. Within this module contains any helper functions or classes to be used in testing. +Within echopype there's a module called `testing`. Within this module contains any helper functions to be used in testing. + +Lastly, the test files begin with the word `test` sucn as `test_echodata.py`. +These files contains code for *unit testing*, +and any files that ends with `integration` contains code that performs *integration testing* against actual datasets, for example, +`test_echodata_integration.py`. diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py index 245235a40..30f22dd90 100644 --- a/echopype/tests/conftest.py +++ b/echopype/tests/conftest.py @@ -1,10 +1,115 @@ """``pytest`` configuration.""" import pytest +import numpy as np +import xarray as xr +from datatree import DataTree +from echopype.convert.set_groups_base import SetGroupsBase +from echopype.echodata import EchoData from echopype.testing import TEST_DATA_FOLDER +class MockSetGroups(SetGroupsBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def set_beam(self) -> xr.Dataset: + ds = xr.Dataset(attrs={"beam_mode": "vertical", "conversion_equation_t": "type_3"}) + return ds + + def set_env(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset() + + env_attr_dict = {"notes": "This is a mock env dataset, hence no data is found!"} + ds = ds.assign_attrs(env_attr_dict) + + return ds + + def set_platform(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset( + attrs={ + "platform_code_ICES": 315, + "platform_name": "My mock boat", + "platform_type": "Research vessel", + } + ) + + return ds + + def set_nmea(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset( + attrs={ + "description": "All Mock NMEA datagrams", + } + ) + + return ds + + def set_sonar(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset() + + # Assemble sonar group global attribute dictionary + sonar_attr_dict = { + "sonar_manufacturer": "Simrad", + "sonar_model": self.sonar_model, + # transducer (sonar) serial number is not stored in the EK60 raw data file, + # so sonar_serial_number can't be populated from the raw datagrams + "sonar_serial_number": "", + "sonar_software_name": "", + "sonar_software_version": "0.1.0", + "sonar_type": "echosounder", + } + ds = ds.assign_attrs(sonar_attr_dict) + + return ds + + def set_vendor(self) -> xr.Dataset: + # TODO: add mock data + ds = xr.Dataset(attrs={"created_by": "Mock test"}) + return ds + + +@pytest.fixture(scope="session") +def mock_echodata( + sonar_model="TEST", + file_chk="./test.raw", + xml_chk=None, +): + # Setup tree dictionary + tree_dict = {} + + setgrouper = MockSetGroups( + parser_obj=None, + input_file=file_chk, + xml_path=xml_chk, + output_path=None, + sonar_model=sonar_model, + params={"survey_name": "mock_survey"}, + ) + tree_dict["/"] = setgrouper.set_toplevel(sonar_model, date_created=np.datetime64("1970-01-01")) + tree_dict["Environment"] = setgrouper.set_env() + tree_dict["Platform"] = setgrouper.set_platform() + tree_dict["Platform/NMEA"] = setgrouper.set_nmea() + tree_dict["Provenance"] = setgrouper.set_provenance() + tree_dict["Sonar"] = None + tree_dict["Sonar/Beam_group1"] = setgrouper.set_beam() + tree_dict["Sonar"] = setgrouper.set_sonar() + tree_dict["Vendor_specific"] = setgrouper.set_vendor() + + tree = DataTree.from_dict(tree_dict, name="root") + echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model) + echodata._set_tree(tree) + echodata._load_tree() + return echodata + + + + @pytest.fixture(scope="session") def dump_output_dir(): return TEST_DATA_FOLDER / "dump" diff --git a/echopype/tests/convert/test_convert_source_target_locs.py b/echopype/tests/convert/test_convert_source_target_locs.py index 843f7ad83..254dc5390 100644 --- a/echopype/tests/convert/test_convert_source_target_locs.py +++ b/echopype/tests/convert/test_convert_source_target_locs.py @@ -9,51 +9,12 @@ import os -import fsspec import xarray as xr import pytest from tempfile import TemporaryDirectory from echopype import open_raw from echopype.utils.coding import DEFAULT_ENCODINGS - - -def _check_file_group(data_file, engine, groups): - for g in groups: - ds = xr.open_dataset(data_file, engine=engine, group=g) - - assert isinstance(ds, xr.Dataset) is True - - -def _check_output_files(engine, output_files, storage_options): - groups = [ - "Provenance", - "Environment", - "Sonar/Beam_group1", - "Sonar", - "Vendor_specific", - "Platform", - ] - if isinstance(output_files, list): - fs = fsspec.get_mapper(output_files[0], **storage_options).fs - for f in output_files: - if engine == "zarr": - _check_file_group(fs.get_mapper(f), engine, groups) - fs.delete(f, recursive=True) - else: - _check_file_group(f, engine, groups) - fs.delete(f) - else: - fs = fsspec.get_mapper(output_files, **storage_options).fs - if engine == "zarr": - _check_file_group(fs.get_mapper(output_files), engine, groups) - fs.delete(output_files, recursive=True) - else: - _check_file_group(output_files, engine, groups) - fs.delete(output_files) - - -def _create_path_str(test_folder, paths): - return str(test_folder.joinpath(*paths).absolute()) +from echopype.testing import _check_output_files, _create_path_str @pytest.fixture( diff --git a/echopype/tests/echodata/conftest.py b/echopype/tests/echodata/conftest.py index 82164c5fd..f3e46971e 100644 --- a/echopype/tests/echodata/conftest.py +++ b/echopype/tests/echodata/conftest.py @@ -1,8 +1,346 @@ import pytest +import fsspec -from echopype.testing import get_mock_echodata + +@pytest.fixture +def azfp_path(test_path): + return test_path['AZFP'] + + +@pytest.fixture +def ek60_path(test_path): + return test_path['EK60'] + + +@pytest.fixture +def ek80_path(test_path): + return test_path['EK80'] + + +@pytest.fixture(scope="class") +def single_ek60_zarr(test_path): + return ( + test_path['EK60'] + / "ncei-wcsd" + / "Summer2017-D20170615-T190214__NEW.zarr" + ) + + +@pytest.fixture( + params=[ + single_ek60_zarr, + (str, "ncei-wcsd", "Summer2017-D20170615-T190214.zarr"), + (None, "ncei-wcsd", "Summer2017-D20170615-T190214__NEW.nc"), + "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.nc", + "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.zarr", + "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.zarr", + fsspec.get_mapper( + "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.zarr", + **dict( + client_kwargs=dict(endpoint_url="http://localhost:9000/"), + key="minioadmin", + secret="minioadmin", + ), + ), + ], + ids=[ + "ek60_zarr_path", + "ek60_zarr_path_string", + "ek60_netcdf_path", + "ek60_netcdf_s3_string", + "ek60_zarr_http_string", + "ek60_zarr_s3_string", + "ek60_zarr_s3_FSMap", + ], +) +def ek60_converted_zarr(request, test_path): + if isinstance(request.param, tuple): + desired_type, *paths = request.param + if desired_type is not None: + return desired_type(test_path['EK60'].joinpath(*paths)) + else: + return test_path['EK60'].joinpath(*paths) + else: + return request.param + + +@pytest.fixture( + params=[ + ( + ("EK60", "ncei-wcsd", "Summer2017-D20170615-T190214.raw"), + "EK60", + None, + None, + "CW", + "power", + ), + ( + ("EK80_NEW", "D20211004-T233354.raw"), + "EK80", + None, + None, + "CW", + "power", + ), + ( + ("EK80_NEW", "echopype-test-D20211004-T235930.raw"), + "EK80", + None, + None, + "BB", + "complex", + ), + ( + ("EK80_NEW", "D20211004-T233115.raw"), + "EK80", + None, + None, + "CW", + "complex", + ), + ( + ("ES70", "D20151202-T020259.raw"), + "ES70", + None, + None, + None, + None, + ), + ( + ("AZFP", "ooi", "17032923.01A"), + "AZFP", + ("AZFP", "ooi", "17032922.XML"), + "Sv", + None, + None, + ), + ( + ("AZFP", "ooi", "17032923.01A"), + "AZFP", + ("AZFP", "ooi", "17032922.XML"), + "TS", + None, + None, + ), + ( + ("AD2CP", "raw", "090", "rawtest.090.00001.ad2cp"), + "AD2CP", + None, + None, + None, + None, + ), + ], + ids=[ + "ek60_cw_power", + "ek80_cw_power", + "ek80_bb_complex", + "ek80_cw_complex", + "es70", + "azfp_sv", + "azfp_sp", + "ad2cp", + ], +) +def compute_range_samples(request, test_path): + ( + filepath, + sonar_model, + azfp_xml_path, + azfp_cal_type, + ek_waveform_mode, + ek_encode_mode, + ) = request.param + if sonar_model.lower() == 'es70': + pytest.xfail( + reason="Not supported at the moment", + ) + path_model, *paths = filepath + filepath = test_path[path_model].joinpath(*paths) + + if azfp_xml_path is not None: + path_model, *paths = azfp_xml_path + azfp_xml_path = test_path[path_model].joinpath(*paths) + return ( + filepath, + sonar_model, + azfp_xml_path, + azfp_cal_type, + ek_waveform_mode, + ek_encode_mode, + ) + + +@pytest.fixture( + params=[ + { + "path_model": "EK60", + "raw_path": "Winter2017-D20170115-T150122.raw", + }, + { + "path_model": "EK80", + "raw_path": "D20170912-T234910.raw", + }, + ], + ids=[ + "ek60_winter2017", + "ek80_summer2017", + ], +) +def range_check_files(request, test_path): + return ( + request.param["path_model"], + test_path[request.param["path_model"]].joinpath( + request.param['raw_path'] + ), + ) + + +@pytest.fixture( + params=[ + ( + { + "randint_low": 10, + "randint_high": 5000, + "num_datasets": 20, + "group": "test_group", + "zarr_name": "combined_echodatas.zarr", + "delayed_ds_list": False, + } + ), + ( + { + "randint_low": 10, + "randint_high": 5000, + "num_datasets": 20, + "group": "test_group", + "zarr_name": "combined_echodatas.zarr", + "delayed_ds_list": True, + } + ), + ], + ids=["in-memory-ds_list", "lazy-ds_list"], + scope="module", +) +def append_ds_list_params(request): + return list(request.param.values()) + + +@pytest.fixture +def ek60_test_data(test_path): + files = [ + ("ncei-wcsd", "Summer2017-D20170620-T011027.raw"), + ("ncei-wcsd", "Summer2017-D20170620-T014302.raw"), + ("ncei-wcsd", "Summer2017-D20170620-T021537.raw"), + ] + return [test_path["EK60"].joinpath(*f) for f in files] + + +@pytest.fixture +def ek60_diff_range_sample_test_data(test_path): + files = [ + ("ncei-wcsd", "SH1701", "TEST-D20170114-T202932.raw"), + ("ncei-wcsd", "SH1701", "TEST-D20170114-T203337.raw"), + ("ncei-wcsd", "SH1701", "TEST-D20170114-T203853.raw"), + ] + return [test_path["EK60"].joinpath(*f) for f in files] @pytest.fixture -def mock_echodata(): - return get_mock_echodata() +def ek80_test_data(test_path): + files = [ + ("echopype-test-D20211005-T000706.raw",), + ("echopype-test-D20211005-T000737.raw",), + ("echopype-test-D20211005-T000810.raw",), + ("echopype-test-D20211005-T000843.raw",), + ] + return [test_path["EK80_NEW"].joinpath(*f) for f in files] + + +@pytest.fixture +def ek80_broadband_same_range_sample_test_data(test_path): + files = [ + ("ncei-wcsd", "SH1707", "Reduced_D20170826-T205615.raw"), + ("ncei-wcsd", "SH1707", "Reduced_D20170826-T205659.raw"), + ("ncei-wcsd", "SH1707", "Reduced_D20170826-T205742.raw"), + ] + return [test_path["EK80"].joinpath(*f) for f in files] + + +@pytest.fixture +def ek80_narrowband_diff_range_sample_test_data(test_path): + files = [ + ("ncei-wcsd", "SH2106", "EK80", "Reduced_Hake-D20210701-T130426.raw"), + ("ncei-wcsd", "SH2106", "EK80", "Reduced_Hake-D20210701-T131325.raw"), + ("ncei-wcsd", "SH2106", "EK80", "Reduced_Hake-D20210701-T131621.raw"), + ] + return [test_path["EK80"].joinpath(*f) for f in files] + + +@pytest.fixture +def azfp_test_data(test_path): + + # TODO: in the future we should replace these files with another set of + # similarly small set of files, for example the files from the location below: + # "https://rawdata.oceanobservatories.org/files/CE01ISSM/R00015/instrmts/dcl37/ZPLSC_sn55076/DATA/202109/*" + # This is because we have lost track of where the current files came from, + # since the filenames does not contain the site identifier. + files = [ + ("ooi", "18100407.01A"), + ("ooi", "18100408.01A"), + ("ooi", "18100409.01A"), + ] + return [test_path["AZFP"].joinpath(*f) for f in files] + + +@pytest.fixture +def azfp_test_xml(test_path): + return test_path["AZFP"].joinpath(*("ooi", "18092920.XML")) + + +@pytest.fixture( + params=[ + {"sonar_model": "EK60", "xml_file": None, "files": "ek60_test_data"}, + { + "sonar_model": "EK60", + "xml_file": None, + "files": "ek60_diff_range_sample_test_data", + }, + { + "sonar_model": "AZFP", + "xml_file": "azfp_test_xml", + "files": "azfp_test_data", + }, + { + "sonar_model": "EK80", + "xml_file": None, + "files": "ek80_broadband_same_range_sample_test_data", + }, + { + "sonar_model": "EK80", + "xml_file": None, + "files": "ek80_narrowband_diff_range_sample_test_data", + }, + ], + ids=[ + "ek60", + "ek60_diff_range_sample", + "azfp", + "ek80_bb_same_range_sample", + "ek80_nb_diff_range_sample", + ], +) +def raw_datasets(request): + files = request.param["files"] + xml_file = request.param["xml_file"] + if xml_file is not None: + xml_file = request.getfixturevalue(xml_file) + + files = request.getfixturevalue(files) + + return ( + files, + request.param['sonar_model'], + xml_file, + request.node.callspec.id, + ) diff --git a/echopype/tests/echodata/test_api_integration.py b/echopype/tests/echodata/test_api_integration.py new file mode 100644 index 000000000..b592102d6 --- /dev/null +++ b/echopype/tests/echodata/test_api_integration.py @@ -0,0 +1,31 @@ +import fsspec +from echopype import open_converted +from echopype.echodata import EchoData + +def test_open_converted(ek60_converted_zarr, minio_bucket): # noqa + def _check_path(zarr_path): + storage_options = {} + if zarr_path.startswith("s3://"): + storage_options = dict( + client_kwargs=dict(endpoint_url="http://localhost:9000/"), + key="minioadmin", + secret="minioadmin", + ) + return storage_options + + storage_options = {} + if not isinstance(ek60_converted_zarr, fsspec.FSMap): + storage_options = _check_path(str(ek60_converted_zarr)) + + try: + ed = open_converted( + ek60_converted_zarr, storage_options=storage_options + ) + assert isinstance(ed, EchoData) is True + except Exception as e: + if ( + isinstance(ek60_converted_zarr, str) + and ek60_converted_zarr.startswith("s3://") + and ek60_converted_zarr.endswith(".nc") + ): + assert isinstance(e, ValueError) is True diff --git a/echopype/tests/echodata/test_echodata_combine.py b/echopype/tests/echodata/test_combine_integration.py similarity index 68% rename from echopype/tests/echodata/test_echodata_combine.py rename to echopype/tests/echodata/test_combine_integration.py index aea73e989..dc9528028 100644 --- a/echopype/tests/echodata/test_echodata_combine.py +++ b/echopype/tests/echodata/test_combine_integration.py @@ -2,7 +2,6 @@ from pathlib import Path import numpy as np -import pytest import xarray as xr import echopype @@ -13,125 +12,6 @@ from dask.distributed import Client -@pytest.fixture -def ek60_diff_range_sample_test_data(test_path): - files = [ - ("ncei-wcsd", "SH1701", "TEST-D20170114-T202932.raw"), - ("ncei-wcsd", "SH1701", "TEST-D20170114-T203337.raw"), - ("ncei-wcsd", "SH1701", "TEST-D20170114-T203853.raw"), - ] - return [test_path["EK60"].joinpath(*f) for f in files] - - -@pytest.fixture(scope="module") -def ek60_test_data(test_path): - files = [ - ("ncei-wcsd", "Summer2017-D20170620-T011027.raw"), - ("ncei-wcsd", "Summer2017-D20170620-T014302.raw"), - ("ncei-wcsd", "Summer2017-D20170620-T021537.raw"), - ] - return [test_path["EK60"].joinpath(*f) for f in files] - - -@pytest.fixture -def ek80_test_data(test_path): - files = [ - ("echopype-test-D20211005-T000706.raw",), - ("echopype-test-D20211005-T000737.raw",), - ("echopype-test-D20211005-T000810.raw",), - ("echopype-test-D20211005-T000843.raw",), - ] - return [test_path["EK80_NEW"].joinpath(*f) for f in files] - - -@pytest.fixture -def ek80_broadband_same_range_sample_test_data(test_path): - files = [ - ("ncei-wcsd", "SH1707", "Reduced_D20170826-T205615.raw"), - ("ncei-wcsd", "SH1707", "Reduced_D20170826-T205659.raw"), - ("ncei-wcsd", "SH1707", "Reduced_D20170826-T205742.raw"), - ] - return [test_path["EK80"].joinpath(*f) for f in files] - - -@pytest.fixture -def ek80_narrowband_diff_range_sample_test_data(test_path): - files = [ - ("ncei-wcsd", "SH2106", "EK80", "Reduced_Hake-D20210701-T130426.raw"), - ("ncei-wcsd", "SH2106", "EK80", "Reduced_Hake-D20210701-T131325.raw"), - ("ncei-wcsd", "SH2106", "EK80", "Reduced_Hake-D20210701-T131621.raw"), - ] - return [test_path["EK80"].joinpath(*f) for f in files] - - -@pytest.fixture -def azfp_test_data(test_path): - - # TODO: in the future we should replace these files with another set of - # similarly small set of files, for example the files from the location below: - # "https://rawdata.oceanobservatories.org/files/CE01ISSM/R00015/instrmts/dcl37/ZPLSC_sn55076/DATA/202109/*" - # This is because we have lost track of where the current files came from, - # since the filenames does not contain the site identifier. - files = [ - ("ooi", "18100407.01A"), - ("ooi", "18100408.01A"), - ("ooi", "18100409.01A"), - ] - return [test_path["AZFP"].joinpath(*f) for f in files] - - -@pytest.fixture -def azfp_test_xml(test_path): - return test_path["AZFP"].joinpath(*("ooi", "18092920.XML")) - - -@pytest.fixture( - params=[ - { - "sonar_model": "EK60", - "xml_file": None, - "files": "ek60_test_data" - }, - { - "sonar_model": "EK60", - "xml_file": None, - "files": "ek60_diff_range_sample_test_data" - }, - { - "sonar_model": "AZFP", - "xml_file": "azfp_test_xml", - "files": "azfp_test_data" - }, - { - "sonar_model": "EK80", - "xml_file": None, - "files": "ek80_broadband_same_range_sample_test_data" - }, - { - "sonar_model": "EK80", - "xml_file": None, - "files": "ek80_narrowband_diff_range_sample_test_data" - } - ], - ids=["ek60", "ek60_diff_range_sample", "azfp", - "ek80_bb_same_range_sample", "ek80_nb_diff_range_sample"] -) -def raw_datasets(request): - files = request.param["files"] - xml_file = request.param["xml_file"] - if xml_file is not None: - xml_file = request.getfixturevalue(xml_file) - - files = request.getfixturevalue(files) - - return ( - files, - request.param['sonar_model'], - xml_file, - request.node.callspec.id - ) - - def test_combine_echodata(raw_datasets): ( files, diff --git a/echopype/tests/echodata/test_echodata.py b/echopype/tests/echodata/test_echodata.py index 8793a1700..73e10e2ad 100644 --- a/echopype/tests/echodata/test_echodata.py +++ b/echopype/tests/echodata/test_echodata.py @@ -1,197 +1,19 @@ from textwrap import dedent - -import fsspec from pathlib import Path import shutil from datatree import DataTree from zarr.errors import GroupNotFoundError -import echopype -from echopype.calibrate.env_params import EnvParams -from echopype.echodata import EchoData -from echopype import open_converted +from echopype.echodata.echodata import EchoData import pytest import xarray as xr import numpy as np -from echopype.testing import check_consolidated - - -@pytest.fixture(scope="module") -def single_ek60_zarr(test_path): - return ( - test_path['EK60'] / "ncei-wcsd" / "Summer2017-D20170615-T190214__NEW.zarr" - ) - - -@pytest.fixture( - params=[ - single_ek60_zarr, - (str, "ncei-wcsd", "Summer2017-D20170615-T190214.zarr"), - (None, "ncei-wcsd", "Summer2017-D20170615-T190214__NEW.nc"), - "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.nc", - "http://localhost:8080/data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.zarr", - "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.zarr", - fsspec.get_mapper( - "s3://data/ek60/ncei-wcsd/Summer2017-D20170615-T190214.zarr", - **dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ), - ), - ], - ids=[ - "ek60_zarr_path", - "ek60_zarr_path_string", - "ek60_netcdf_path", - "ek60_netcdf_s3_string", - "ek60_zarr_http_string", - "ek60_zarr_s3_string", - "ek60_zarr_s3_FSMap", - ], -) -def ek60_converted_zarr(request, test_path): - if isinstance(request.param, tuple): - desired_type, *paths = request.param - if desired_type is not None: - return desired_type(test_path['EK60'].joinpath(*paths)) - else: - return test_path['EK60'].joinpath(*paths) - else: - return request.param - - -@pytest.fixture( - params=[ - ( - ("EK60", "ncei-wcsd", "Summer2017-D20170615-T190214.raw"), - "EK60", - None, - None, - "CW", - "power", - ), - ( - ("EK80_NEW", "D20211004-T233354.raw"), - "EK80", - None, - None, - "CW", - "power", - ), - ( - ("EK80_NEW", "echopype-test-D20211004-T235930.raw"), - "EK80", - None, - None, - "BB", - "complex", - ), - ( - ("EK80_NEW", "D20211004-T233115.raw"), - "EK80", - None, - None, - "CW", - "complex", - ), - ( - ("ES70", "D20151202-T020259.raw"), - "ES70", - None, - None, - None, - None, - ), - ( - ("AZFP", "ooi", "17032923.01A"), - "AZFP", - ("AZFP", "ooi", "17032922.XML"), - "Sv", - None, - None, - ), - ( - ("AZFP", "ooi", "17032923.01A"), - "AZFP", - ("AZFP", "ooi", "17032922.XML"), - "TS", - None, - None, - ), - ( - ("AD2CP", "raw", "090", "rawtest.090.00001.ad2cp"), - "AD2CP", - None, - None, - None, - None, - ), - ], - ids=[ - "ek60_cw_power", - "ek80_cw_power", - "ek80_bb_complex", - "ek80_cw_complex", - "es70", - "azfp_sv", - "azfp_sp", - "ad2cp", - ], +from echopype.testing import ( + _check_consolidated, ) -def compute_range_samples(request, test_path): - ( - filepath, - sonar_model, - azfp_xml_path, - azfp_cal_type, - ek_waveform_mode, - ek_encode_mode, - ) = request.param - if sonar_model.lower() == 'es70': - pytest.xfail( - reason="Not supported at the moment", - ) - path_model, *paths = filepath - filepath = test_path[path_model].joinpath(*paths) - - if azfp_xml_path is not None: - path_model, *paths = azfp_xml_path - azfp_xml_path = test_path[path_model].joinpath(*paths) - return ( - filepath, - sonar_model, - azfp_xml_path, - azfp_cal_type, - ek_waveform_mode, - ek_encode_mode, - ) - - -@pytest.fixture( - params=[ - { - "path_model": "EK60", - "raw_path": "Winter2017-D20170115-T150122.raw", - }, - { - "path_model": "EK80", - "raw_path": "D20170912-T234910.raw", - }, - ], - ids=[ - "ek60_winter2017", - "ek80_summer2017", - ], -) -def range_check_files(request, test_path): - return ( - request.param["path_model"], - test_path[request.param["path_model"]].joinpath(request.param['raw_path']) - ) class TestEchoData: @@ -295,9 +117,16 @@ def test_getitem(self, converted_zarr): def test_setitem(self, converted_zarr): ed = self.create_ed(converted_raw_path=converted_zarr) - ed['Sonar/Beam_group1'] = ed['Sonar/Beam_group1'].rename({'beam': 'beam_newname'}) + ed['Sonar/Beam_group1'] = ed['Sonar/Beam_group1'].rename( + {'beam': 'beam_newname'} + ) - assert sorted(ed['Sonar/Beam_group1'].dims.keys()) == ['beam_newname', 'channel', 'ping_time', 'range_sample'] + assert sorted(ed['Sonar/Beam_group1'].dims.keys()) == [ + 'beam_newname', + 'channel', + 'ping_time', + 'range_sample', + ] try: ed['SomeRandomGroup'] = 'Testing value' @@ -315,15 +144,71 @@ def test_get_dataset(self, converted_zarr): assert result is None assert isinstance(ed_result, xr.Dataset) + @staticmethod + def test__harmonize_env_param_time(): + # Scalar + p = 10.05 + assert EchoData._harmonize_env_param_time(p=p) == 10.05 + + # time1 length=1, should return length=1 numpy array + p = xr.DataArray( + data=[1], + coords={ + "time1": np.array( + ["2017-06-20T01:00:00"], dtype="datetime64[ns]" + ) + }, + dims=["time1"], + ) + assert EchoData._harmonize_env_param_time(p=p) == 1 + + # time1 length>1, interpolate to tareget ping_time + p = xr.DataArray( + data=np.array([0, 1]), + coords={ + "time1": np.arange( + "2017-06-20T01:00:00", + "2017-06-20T01:00:31", + np.timedelta64(30, "s"), + dtype="datetime64[ns]", + ) + }, + dims=["time1"], + ) + # ping_time target is identical to time1 + ping_time_target = p["time1"].rename({"time1": "ping_time"}) + p_new = EchoData._harmonize_env_param_time( + p=p, ping_time=ping_time_target + ) + assert (p_new["ping_time"] == ping_time_target).all() + assert (p_new.data == p.data).all() + # ping_time target requires actual interpolation + ping_time_target = xr.DataArray( + data=[1], + coords={ + "ping_time": np.array( + ["2017-06-20T01:00:15"], dtype="datetime64[ns]" + ) + }, + dims=["ping_time"], + ) + p_new = EchoData._harmonize_env_param_time( + p=p, ping_time=ping_time_target["ping_time"] + ) + assert p_new["ping_time"] == ping_time_target["ping_time"] + assert p_new.data == 0.5 + @pytest.mark.parametrize("consolidated", [True, False]) def test_to_zarr_consolidated(self, mock_echodata, consolidated): """ Tests to_zarr consolidation. Currently, this test uses a mock EchoData object that only - has attributes. The consolidated flag provided will be used in every to_zarr call (which - is used to write each EchoData group to zarr_path). + has attributes. The consolidated flag provided will be used in every to_zarr call (which + is used to write each EchoData group to zarr_path). """ zarr_path = Path('test.zarr') - mock_echodata.to_zarr(str(zarr_path), consolidated=consolidated, overwrite=True) + mock_echodata.to_zarr( + str(zarr_path), consolidated=consolidated, overwrite=True + ) check = True if consolidated else False zmeta_path = zarr_path / ".zmetadata" @@ -331,228 +216,7 @@ def test_to_zarr_consolidated(self, mock_echodata, consolidated): assert zmeta_path.exists() is check if check is True: - check_consolidated(mock_echodata, zmeta_path) + _check_consolidated(mock_echodata, zmeta_path) # clean up the zarr file shutil.rmtree(zarr_path) - - -def test_open_converted(ek60_converted_zarr, minio_bucket): # noqa - def _check_path(zarr_path): - storage_options = {} - if zarr_path.startswith("s3://"): - storage_options = dict( - client_kwargs=dict(endpoint_url="http://localhost:9000/"), - key="minioadmin", - secret="minioadmin", - ) - return storage_options - - storage_options = {} - if not isinstance(ek60_converted_zarr, fsspec.FSMap): - storage_options = _check_path(str(ek60_converted_zarr)) - - try: - ed = open_converted( - ek60_converted_zarr, storage_options=storage_options - ) - assert isinstance(ed, EchoData) is True - except Exception as e: - if ( - isinstance(ek60_converted_zarr, str) - and ek60_converted_zarr.startswith("s3://") - and ek60_converted_zarr.endswith(".nc") - ): - assert isinstance(e, ValueError) is True - - -def test_compute_range(compute_range_samples): - ( - filepath, - sonar_model, - azfp_xml_path, - azfp_cal_type, - ek_waveform_mode, - ek_encode_mode, - ) = compute_range_samples - ed = echopype.open_raw(filepath, sonar_model, azfp_xml_path) - rng = np.random.default_rng(0) - stationary_env_params = EnvParams( - xr.Dataset( - data_vars={ - "pressure": ("time3", np.arange(50)), - "salinity": ("time3", np.arange(50)), - "temperature": ("time3", np.arange(50)), - }, - coords={ - "time3": np.arange("2017-06-20T01:00", "2017-06-20T01:25", np.timedelta64(30, "s"), dtype="datetime64[ns]") - } - ), - data_kind="stationary" - ) - if "time3" in ed["Platform"] and sonar_model != "AD2CP": - ed.compute_range(stationary_env_params, azfp_cal_type, ek_waveform_mode) - else: - try: - ed.compute_range(stationary_env_params, ek_waveform_mode="CW", azfp_cal_type="Sv") - except ValueError: - pass - else: - raise AssertionError - - mobile_env_params = EnvParams( - xr.Dataset( - data_vars={ - "pressure": ("time", np.arange(100)), - "salinity": ("time", np.arange(100)), - "temperature": ("time", np.arange(100)), - }, - coords={ - "latitude": ("time", rng.random(size=100) + 44), - "longitude": ("time", rng.random(size=100) - 125), - } - ), - data_kind="mobile" - ) - if "latitude" in ed["Platform"] and "longitude" in ed["Platform"] and sonar_model != "AD2CP" and not np.isnan(ed["Platform"]["time1"]).all(): - ed.compute_range(mobile_env_params, azfp_cal_type, ek_waveform_mode) - else: - try: - ed.compute_range(mobile_env_params, ek_waveform_mode="CW", azfp_cal_type="Sv") - except ValueError: - pass - else: - raise AssertionError - - env_params = {"sound_speed": 343} - if sonar_model == "AD2CP": - try: - ed.compute_range( - env_params, ek_waveform_mode="CW", azfp_cal_type="Sv" - ) - except ValueError: - pass # AD2CP is not currently supported in ed.compute_range - else: - raise AssertionError - else: - echo_range = ed.compute_range( - env_params, - azfp_cal_type, - ek_waveform_mode, - ) - assert isinstance(echo_range, xr.DataArray) - - -def test_nan_range_entries(range_check_files): - sonar_model, ek_file = range_check_files - echodata = echopype.open_raw(ek_file, sonar_model=sonar_model) - if sonar_model == "EK80": - ds_Sv = echopype.calibrate.compute_Sv(echodata, waveform_mode='BB', encode_mode='complex') - range_output = echodata.compute_range(env_params=[], ek_waveform_mode='BB') - nan_locs_backscatter_r = ~echodata["Sonar/Beam_group1"].backscatter_r.isel(beam=0).drop("beam").isnull() - else: - ds_Sv = echopype.calibrate.compute_Sv(echodata) - range_output = echodata.compute_range(env_params=[]) - nan_locs_backscatter_r = ~echodata["Sonar/Beam_group1"].backscatter_r.isel(beam=0).drop("beam").isnull() - - nan_locs_Sv_range = ~ds_Sv.echo_range.isnull() - nan_locs_range = ~range_output.isnull() - assert xr.Dataset.equals(nan_locs_backscatter_r, nan_locs_range) - assert xr.Dataset.equals(nan_locs_backscatter_r, nan_locs_Sv_range) - - -@pytest.mark.parametrize( - ["ext_type", "sonar_model", "updated", "path_model", "raw_path", "platform_data"], - [ - ( - "external-trajectory", - "EK80", - ("pitch", "roll", "longitude", "latitude"), - "EK80", - ( - "saildrone", - "SD2019_WCS_v05-Phase0-D20190617-T125959-0.raw", - ), - ( - "saildrone", - "saildrone-gen_5-fisheries-acoustics-code-sprint-sd1039-20190617T130000-20190618T125959-1_hz-v1.1595357449818.nc", #noqa - ), - ), - ( - "fixed-location", - "EK60", - ("longitude", "latitude"), - "EK60", - ( - "ooi", - "CE02SHBP-MJ01C-07-ZPLSCB101_OOI-D20191201-T000000.raw" - ), - (-100.0, -50.0), - ), - ], -) -def test_update_platform( - ext_type, - sonar_model, - updated, - path_model, - raw_path, - platform_data, - test_path -): - raw_file = test_path[path_model] / raw_path[0] / raw_path[1] - ed = echopype.open_raw(raw_file, sonar_model=sonar_model) - - for variable in updated: - assert np.isnan(ed["Platform"][variable].values).all() - - if ext_type == "external-trajectory": - extra_platform_data_file_name = platform_data[1] - extra_platform_data = xr.open_dataset( - test_path[path_model] / platform_data[0] / extra_platform_data_file_name - ) - elif ext_type == "fixed-location": - extra_platform_data_file_name = None - extra_platform_data = xr.Dataset( - { - "longitude": (["time"], np.array([float(platform_data[0])])), - "latitude": (["time"], np.array([float(platform_data[1])])), - }, - coords={ - "time": (["time"], np.array([ed['Sonar/Beam_group1'].ping_time.values.min()])) - }, - ) - - ed.update_platform( - extra_platform_data, - extra_platform_data_file_name=extra_platform_data_file_name, - ) - - for variable in updated: - assert not np.isnan(ed["Platform"][variable].values).all() - - # times have max interval of 2s - # check times are > min(ed["Sonar/Beam_group1"]["ping_time"]) - 2s - assert ( - ed["Platform"]["time1"] - > ed["Sonar/Beam_group1"]["ping_time"].min() - np.timedelta64(2, "s") - ).all() - # check there is only 1 time < min(ed["Sonar/Beam_group1"]["ping_time"]) - assert ( - np.count_nonzero( - ed["Platform"]["time1"] < ed["Sonar/Beam_group1"]["ping_time"].min() - ) - <= 1 - ) - # check times are < max(ed["Sonar/Beam_group1"]["ping_time"]) + 2s - assert ( - ed["Platform"]["time1"] - < ed["Sonar/Beam_group1"]["ping_time"].max() + np.timedelta64(2, "s") - ).all() - # check there is only 1 time > max(ed["Sonar/Beam_group1"]["ping_time"]) - assert ( - np.count_nonzero( - ed["Platform"]["time1"] > ed["Sonar/Beam_group1"]["ping_time"].max() - ) - <= 1 - ) diff --git a/echopype/tests/echodata/test_echodata_integration.py b/echopype/tests/echodata/test_echodata_integration.py new file mode 100644 index 000000000..e2b8abf12 --- /dev/null +++ b/echopype/tests/echodata/test_echodata_integration.py @@ -0,0 +1,466 @@ +from typing import Any, Dict, Optional +from datatree import open_datatree + +import echopype +from echopype.calibrate.env_params import EnvParams +from echopype.echodata.echodata import EchoData, XARRAY_ENGINE_MAP +from echopype import open_converted + +import pytest +import xarray as xr +import numpy as np + +from echopype.testing import ( + _compare_ed_against_tree, + _check_and_drop_attr, + _check_and_drop_var, +) + + +def test_compute_range(compute_range_samples): + ( + filepath, + sonar_model, + azfp_xml_path, + azfp_cal_type, + ek_waveform_mode, + ek_encode_mode, + ) = compute_range_samples + ed = echopype.open_raw(filepath, sonar_model, azfp_xml_path) + rng = np.random.default_rng(0) + stationary_env_params = EnvParams( + xr.Dataset( + data_vars={ + "pressure": ("time3", np.arange(50)), + "salinity": ("time3", np.arange(50)), + "temperature": ("time3", np.arange(50)), + }, + coords={ + "time3": np.arange( + "2017-06-20T01:00", + "2017-06-20T01:25", + np.timedelta64(30, "s"), + dtype="datetime64[ns]", + ) + }, + ), + data_kind="stationary", + ) + if "time3" in ed["Platform"] and sonar_model != "AD2CP": + ed.compute_range( + stationary_env_params, azfp_cal_type, ek_waveform_mode + ) + else: + try: + ed.compute_range( + stationary_env_params, + ek_waveform_mode="CW", + azfp_cal_type="Sv", + ) + except ValueError: + pass + else: + raise AssertionError + + mobile_env_params = EnvParams( + xr.Dataset( + data_vars={ + "pressure": ("time", np.arange(100)), + "salinity": ("time", np.arange(100)), + "temperature": ("time", np.arange(100)), + }, + coords={ + "latitude": ("time", rng.random(size=100) + 44), + "longitude": ("time", rng.random(size=100) - 125), + }, + ), + data_kind="mobile", + ) + if ( + "latitude" in ed["Platform"] + and "longitude" in ed["Platform"] + and sonar_model != "AD2CP" + and not np.isnan(ed["Platform"]["time1"]).all() + ): + ed.compute_range(mobile_env_params, azfp_cal_type, ek_waveform_mode) + else: + try: + ed.compute_range( + mobile_env_params, ek_waveform_mode="CW", azfp_cal_type="Sv" + ) + except ValueError: + pass + else: + raise AssertionError + + env_params = {"sound_speed": 343} + if sonar_model == "AD2CP": + try: + ed.compute_range( + env_params, ek_waveform_mode="CW", azfp_cal_type="Sv" + ) + except ValueError: + pass # AD2CP is not currently supported in ed.compute_range + else: + raise AssertionError + else: + echo_range = ed.compute_range( + env_params, + azfp_cal_type, + ek_waveform_mode, + ) + assert isinstance(echo_range, xr.DataArray) + + +def test_nan_range_entries(range_check_files): + sonar_model, ek_file = range_check_files + echodata = echopype.open_raw(ek_file, sonar_model=sonar_model) + if sonar_model == "EK80": + ds_Sv = echopype.calibrate.compute_Sv( + echodata, waveform_mode='BB', encode_mode='complex' + ) + range_output = echodata.compute_range( + env_params=[], ek_waveform_mode='BB' + ) + nan_locs_backscatter_r = ( + ~echodata["Sonar/Beam_group1"] + .backscatter_r.isel(beam=0) + .drop("beam") + .isnull() + ) + else: + ds_Sv = echopype.calibrate.compute_Sv(echodata) + range_output = echodata.compute_range(env_params=[]) + nan_locs_backscatter_r = ( + ~echodata["Sonar/Beam_group1"] + .backscatter_r.isel(beam=0) + .drop("beam") + .isnull() + ) + + nan_locs_Sv_range = ~ds_Sv.echo_range.isnull() + nan_locs_range = ~range_output.isnull() + assert xr.Dataset.equals(nan_locs_backscatter_r, nan_locs_range) + assert xr.Dataset.equals(nan_locs_backscatter_r, nan_locs_Sv_range) + + +@pytest.mark.parametrize( + [ + "ext_type", + "sonar_model", + "updated", + "path_model", + "raw_path", + "platform_data", + ], + [ + ( + "external-trajectory", + "EK80", + ("pitch", "roll", "longitude", "latitude"), + "EK80", + ( + "saildrone", + "SD2019_WCS_v05-Phase0-D20190617-T125959-0.raw", + ), + ( + "saildrone", + "saildrone-gen_5-fisheries-acoustics-code-sprint-sd1039-20190617T130000-20190618T125959-1_hz-v1.1595357449818.nc", # noqa + ), + ), + ( + "fixed-location", + "EK60", + ("longitude", "latitude"), + "EK60", + ("ooi", "CE02SHBP-MJ01C-07-ZPLSCB101_OOI-D20191201-T000000.raw"), + (-100.0, -50.0), + ), + ], +) +def test_update_platform( + ext_type, + sonar_model, + updated, + path_model, + raw_path, + platform_data, + test_path, +): + raw_file = test_path[path_model] / raw_path[0] / raw_path[1] + ed = echopype.open_raw(raw_file, sonar_model=sonar_model) + + for variable in updated: + assert np.isnan(ed["Platform"][variable].values).all() + + if ext_type == "external-trajectory": + extra_platform_data_file_name = platform_data[1] + extra_platform_data = xr.open_dataset( + test_path[path_model] + / platform_data[0] + / extra_platform_data_file_name + ) + elif ext_type == "fixed-location": + extra_platform_data_file_name = None + extra_platform_data = xr.Dataset( + { + "longitude": (["time"], np.array([float(platform_data[0])])), + "latitude": (["time"], np.array([float(platform_data[1])])), + }, + coords={ + "time": ( + ["time"], + np.array([ed['Sonar/Beam_group1'].ping_time.values.min()]), + ) + }, + ) + + ed.update_platform( + extra_platform_data, + extra_platform_data_file_name=extra_platform_data_file_name, + ) + + for variable in updated: + assert not np.isnan(ed["Platform"][variable].values).all() + + # times have max interval of 2s + # check times are > min(ed["Sonar/Beam_group1"]["ping_time"]) - 2s + assert ( + ed["Platform"]["time1"] + > ed["Sonar/Beam_group1"]["ping_time"].min() - np.timedelta64(2, "s") + ).all() + # check there is only 1 time < min(ed["Sonar/Beam_group1"]["ping_time"]) + assert ( + np.count_nonzero( + ed["Platform"]["time1"] + < ed["Sonar/Beam_group1"]["ping_time"].min() + ) + <= 1 + ) + # check times are < max(ed["Sonar/Beam_group1"]["ping_time"]) + 2s + assert ( + ed["Platform"]["time1"] + < ed["Sonar/Beam_group1"]["ping_time"].max() + np.timedelta64(2, "s") + ).all() + # check there is only 1 time > max(ed["Sonar/Beam_group1"]["ping_time"]) + assert ( + np.count_nonzero( + ed["Platform"]["time1"] + > ed["Sonar/Beam_group1"]["ping_time"].max() + ) + <= 1 + ) + + +def _tree_from_file( + converted_raw_path: str, + ed_storage_options: Optional[Dict[str, Any]] = {}, + open_kwargs: Dict[str, Any] = {}, +): + """ + Checks that converted_raw_path exists, sanitizes the path, + obtains the path's suffix, and lastly opens the file + as a datatree. + + Parameters + ---------- + converted_raw_path : str + path to converted data file + ed_storage_options : dict + options for cloud storage used by EchoData + open_kwargs : dict + optional keyword arguments to be passed + into xr.open_dataset + + Returns + ------- + A Datatree object representing the converted data file. + """ + + # the purpose of this class is so I can use + # functions in EchoData as if they were static + # TODO: There is a better way to do this if + # we change functions in EchoData to static methods + class temp_class(object): + storage_options = ed_storage_options + + EchoData._check_path(temp_class, converted_raw_path) + converted_raw_path = EchoData._sanitize_path( + temp_class, converted_raw_path + ) + suffix = EchoData._check_suffix(temp_class, converted_raw_path) + + tree = open_datatree( + converted_raw_path, + engine=XARRAY_ENGINE_MAP[suffix], + **open_kwargs, + ) + + return tree + + +def _get_conversion_file_lists(azfp_path, ek60_path, ek80_path): + + converted_raw_paths_v06x = [ + ek60_path / "ek60-Summer2017-D20170615-T190214-ep-v06x.nc", + ek60_path / "ek60-combined-ep-v06x.nc", + ek80_path / "ek80-Summer2018--D20180905-T033113-ep-v06x.nc", + ek80_path / "ek80-2018115-D20181213-T094600-ep-v06x.nc", + ek80_path / "ek80-2019118-group2survey-D20191214-T081342-ep-v06x.nc", + ek80_path + / "ek80-Green2-Survey2-FM-short-slow-D20191004-T211557-ep-v06x.nc", + azfp_path / "azfp-17082117_01A_17041823_XML-ep-v06x.nc", + ] + + converted_raw_paths_v05x = [ + ek60_path / "ek60-Summer2017-D20170615-T190214-ep-v05x.nc", + ek60_path / "ek60-combined-ep-v05x.nc", + ek80_path / "ek80-Summer2018--D20180905-T033113-ep-v05x.nc", + ek80_path / "ek80-2018115-D20181213-T094600-ep-v05x.nc", + ek80_path / "ek80-2019118-group2survey-D20191214-T081342-ep-v05x.nc", + ek80_path + / "ek80-Green2-Survey2-FM-short-slow-D20191004-T211557-ep-v05x.nc", + azfp_path / "azfp-17082117_01A_17041823_XML-ep-v05x.nc", + ] + + return converted_raw_paths_v06x, converted_raw_paths_v05x + + +def test_v05x_v06x_conversion_structure(azfp_path, ek60_path, ek80_path): + """ + Tests that version 0.5.x echopype files + have been correctly converted to the + 0.6.x structure. + """ + + pytest.xfail( + "PR #881 has caused these tests to fail for EK80 sonar models. While we " + "revise this test structure, these tests will be skipped. Please see issue " + "https://github.com/OSOceanAcoustics/echopype/issues/884 for more information." + ) + + ( + converted_raw_paths_v06x, + converted_raw_paths_v05x, + ) = _get_conversion_file_lists(azfp_path, ek60_path, ek80_path) + + for path_v05x, path_v06x in zip( + converted_raw_paths_v05x, converted_raw_paths_v06x + ): + + ed_v05x = open_converted(path_v05x) + tree_v06x = _tree_from_file(converted_raw_path=path_v06x) + + # dictionary of attributes to drop (from the group only) where + # the group path is the key and the value is a list of tuples + # of the form (attr, type of attr expected) + attrs_to_drop = { + "Provenance": [ + ("conversion_software_version", str), + ("conversion_time", str), + ] + } + + # check and drop attributes that cannot be directly compared + # because their values are not the same + for key, val in attrs_to_drop.items(): + for var in val: + _check_and_drop_attr(ed_v05x, tree_v06x, key, var[0], var[1]) + + _check_and_drop_var( + ed_v05x, tree_v06x, "Provenance", "source_filenames" + ) + + # The following if block is for the case where we have a combined file + # TODO: look into this after v0.6.0 release + if "echodata_filename" in ed_v05x["Provenance"]: + prov_comb_names = [ + "echodata_filename", + "top_attrs", + "environment_attrs", + "platform_attrs", + "nmea_attrs", + "provenance_attrs", + "sonar_attrs", + "beam_attrs", + "vendor_attrs", + "top_attr_key", + "environment_attr_key", + "platform_attr_key", + "nmea_attr_key", + "provenance_attr_key", + "sonar_attr_key", + "beam_attr_key", + "vendor_attr_key", + ] + + for name in prov_comb_names: + _check_and_drop_var(ed_v05x, tree_v06x, "Provenance", name) + + ed_v05x["Provenance"] = ed_v05x["Provenance"].drop("src_filenames") + + # ignore direct comparison of the variables Sonar.sonar_serial_number, + # Platform.drop_keel_offset_is_manual, and Platform.water_level_draft_is_manual + # for EK80, this data is not present in v0.5.x + if ed_v05x["Top-level"].attrs["keywords"] == "EK80": + + # dictionary of variables to drop where the group path is the + # key and the variables are the value + vars_to_drop = { + "Sonar": ["sonar_serial_number"], + "Platform": [ + "drop_keel_offset_is_manual", + "water_level_draft_is_manual", + ], + "Environment": [ + "sound_velocity_profile", + "sound_velocity_profile_depth", + "sound_velocity_source", + "transducer_name", + "transducer_sound_speed", + ], + } + + # check and drop variables that cannot be directly compared + # because their values are not the same + for key, val in vars_to_drop.items(): + for var in val: + _check_and_drop_var(ed_v05x, tree_v06x, key, var) + + # sort the beam groups for EK80 according to channel (necessary for comparison) + ed_v05x['Sonar/Beam_group1'] = ed_v05x['Sonar/Beam_group1'].sortby( + "channel" + ) + + if 'Sonar/Beam_group2' in ed_v05x.group_paths: + ed_v05x['Sonar/Beam_group2'] = ed_v05x[ + 'Sonar/Beam_group2' + ].sortby("channel") + + # sort the Platform group by channel for EK80 (necessary for comparison) + tree_v06x['Platform'].ds = tree_v06x['Platform'].ds.sortby( + 'channel' + ) + ed_v05x['Platform'] = ed_v05x['Platform'].sortby('channel') + + # remove all attributes from Vendor_specific (data is missing sometimes) + tree_v06x["Vendor_specific"].ds.attrs = {"blank": 'None'} + ed_v05x["Vendor_specific"].attrs = {"blank": 'None'} + + _compare_ed_against_tree(ed_v05x, tree_v06x) + + +def test_echodata_structure(azfp_path, ek60_path, ek80_path): + """ + Makes sure that all raw files opened + create the expected EchoData structure. + """ + + # TODO: create this test once dev is in its final form. + # check and remove conversion time from attributes + # _check_and_drop_attr(ed_v05x, tree_v06x, "Provenance", "conversion_time", str) + # compare_ed_against_tree(ed_v05x, tree_v06x) + + pytest.xfail( + "Full testing of the EchoData Structure has not been implemented yet." + ) diff --git a/echopype/tests/echodata/test_echodata_misc.py b/echopype/tests/echodata/test_echodata_misc.py deleted file mode 100644 index 49412349f..000000000 --- a/echopype/tests/echodata/test_echodata_misc.py +++ /dev/null @@ -1,49 +0,0 @@ -from echopype.echodata import EchoData - -import xarray as xr -import numpy as np - - -def test_harmonize_env_param_time(): - # Scalar - p = 10.05 - assert EchoData._harmonize_env_param_time(p=p) == 10.05 - - # time1 length=1, should return length=1 numpy array - p = xr.DataArray( - data=[1], - coords={ - "time1": np.array(["2017-06-20T01:00:00"], dtype="datetime64[ns]") - }, - dims=["time1"] - ) - assert EchoData._harmonize_env_param_time(p=p) == 1 - - # time1 length>1, interpolate to tareget ping_time - p = xr.DataArray( - data=np.array([0, 1]), - coords={ - "time1": np.arange("2017-06-20T01:00:00", "2017-06-20T01:00:31", np.timedelta64(30, "s"), dtype="datetime64[ns]") - }, - dims=["time1"] - ) - # ping_time target is identical to time1 - ping_time_target = p["time1"].rename({"time1": "ping_time"}) - p_new = EchoData._harmonize_env_param_time(p=p, ping_time=ping_time_target) - assert (p_new["ping_time"] == ping_time_target).all() - assert (p_new.data == p.data).all() - # ping_time target requires actual interpolation - ping_time_target = xr.DataArray( - data=[1], - coords={ - "ping_time": np.array(["2017-06-20T01:00:15"], dtype="datetime64[ns]") - }, - dims=["ping_time"] - ) - p_new = EchoData._harmonize_env_param_time(p=p, ping_time=ping_time_target["ping_time"]) - assert p_new["ping_time"] == ping_time_target["ping_time"] - assert p_new.data == 0.5 - - - - \ No newline at end of file diff --git a/echopype/tests/echodata/test_echodata_structure.py b/echopype/tests/echodata/test_echodata_structure.py deleted file mode 100644 index ec335572a..000000000 --- a/echopype/tests/echodata/test_echodata_structure.py +++ /dev/null @@ -1,307 +0,0 @@ -from typing import Any, Dict, Optional -from datatree import open_datatree -import pytest -from echopype.echodata.echodata import EchoData, XARRAY_ENGINE_MAP -from echopype.echodata.api import open_converted - - -@pytest.fixture -def azfp_path(test_path): - return test_path['AZFP'] - - -@pytest.fixture -def ek60_path(test_path): - return test_path['EK60'] - - -@pytest.fixture -def ek80_path(test_path): - return test_path['EK80'] - - -def _tree_from_file(converted_raw_path: str, - ed_storage_options: Optional[Dict[str, Any]] = {}, - open_kwargs: Dict[str, Any] = {}): - """ - Checks that converted_raw_path exists, sanitizes the path, - obtains the path's suffix, and lastly opens the file - as a datatree. - - Parameters - ---------- - converted_raw_path : str - path to converted data file - ed_storage_options : dict - options for cloud storage used by EchoData - open_kwargs : dict - optional keyword arguments to be passed - into xr.open_dataset - - Returns - ------- - A Datatree object representing the converted data file. - """ - - # the purpose of this class is so I can use - # functions in EchoData as if they were static - # TODO: There is a better way to do this if - # we change functions in EchoData to static methods - class temp_class(object): - storage_options = ed_storage_options - - EchoData._check_path(temp_class, converted_raw_path) - converted_raw_path = EchoData._sanitize_path(temp_class, - converted_raw_path) - suffix = EchoData._check_suffix(temp_class, - converted_raw_path) - - tree = open_datatree( - converted_raw_path, - engine=XARRAY_ENGINE_MAP[suffix], - **open_kwargs, - ) - - return tree - - -def _check_and_drop_var(ed, tree, grp_path, var): - """ - This function performs minimal checks of - a variable contained both in an EchoData object - and a Datatree. It ensures that the dimensions, - attributes, and data types are the same. Once - the checks have passed, it then drops these - variables from both the EchoData object and the - Datatree. - - Parameters - ---------- - ed : EchoData - EchoData object that contains the variable - to check and drop. - tree : Datatree - Datatree object that contains the variable - to check and drop. - grp_path : str - The path to the group that the variable is in. - var : str - The variable to be checked and dropped. - - Notes - ----- - The Datatree object is created from an EchoData - object written to a netcdf file. - """ - - ed_var = ed[grp_path][var] - tree_var = tree[grp_path].ds[var] - - # make sure that the dimensions and attributes - # are the same for the variable - assert ed_var.dims == tree_var.dims - assert ed_var.attrs == tree_var.attrs - - # make sure that the data types are correct too - assert isinstance(ed_var.values, type(tree_var.values)) - - # drop variables so we can check that datasets are identical - ed[grp_path] = ed[grp_path].drop(var) - tree[grp_path].ds = tree[grp_path].ds.drop(var) - - -def _check_and_drop_attr(ed, tree, grp_path, attr, typ): - """ - This function performs minimal checks of - an attribute contained both in an EchoData object - and a Datatree group. This function only works for - a group's attribute, it cannot work on variable - attributes. It ensures that the attribute exists - and that it has the expected data type. Once - the checks have passed, it then drops the - attribute from both the EchoData object and the - Datatree. - - Parameters - ---------- - ed : EchoData - EchoData object that contains the attribute - to check and drop. - tree : Datatree - Datatree object that contains the attribute - to check and drop. - grp_path : str - The path to the group that the attribute is in. - attr : str - The attribute to be checked and dropped. - typ : type - The expected data type of the attribute. - - Notes - ----- - The Datatree object is created from an EchoData - object written to a netcdf file. - """ - - # make sure that the attribute exists - assert attr in ed[grp_path].attrs.keys() - assert attr in tree[grp_path].ds.attrs.keys() - - # make sure that the value of the attribute is the right type - assert isinstance(ed[grp_path].attrs[attr], typ) - assert isinstance(tree[grp_path].ds.attrs[attr], typ) - - # drop the attribute so we can directly compare datasets - del ed[grp_path].attrs[attr] - del tree[grp_path].ds.attrs[attr] - - -def compare_ed_against_tree(ed, tree): - """ - This function compares the Datasets - of ed against tree and makes sure they - are identical. - - Parameters - ---------- - ed : EchoData - EchoData object - tree : Datatree - Datatree object - - Notes - ----- - The Datatree object is created from an EchoData - object written to a netcdf file. - """ - - for grp_path in ed.group_paths: - if grp_path == "Top-level": - assert tree.ds.identical(ed[grp_path]) - else: - assert tree[grp_path].ds.identical(ed[grp_path]) - - -def _get_conversion_file_lists(azfp_path, ek60_path, ek80_path): - - converted_raw_paths_v06x = [ek60_path / "ek60-Summer2017-D20170615-T190214-ep-v06x.nc", - ek60_path / "ek60-combined-ep-v06x.nc", - ek80_path / "ek80-Summer2018--D20180905-T033113-ep-v06x.nc", - ek80_path / "ek80-2018115-D20181213-T094600-ep-v06x.nc", - ek80_path / "ek80-2019118-group2survey-D20191214-T081342-ep-v06x.nc", - ek80_path / "ek80-Green2-Survey2-FM-short-slow-D20191004-T211557-ep-v06x.nc", - azfp_path / "azfp-17082117_01A_17041823_XML-ep-v06x.nc"] - - converted_raw_paths_v05x = [ek60_path / "ek60-Summer2017-D20170615-T190214-ep-v05x.nc", - ek60_path / "ek60-combined-ep-v05x.nc", - ek80_path / "ek80-Summer2018--D20180905-T033113-ep-v05x.nc", - ek80_path / "ek80-2018115-D20181213-T094600-ep-v05x.nc", - ek80_path / "ek80-2019118-group2survey-D20191214-T081342-ep-v05x.nc", - ek80_path / "ek80-Green2-Survey2-FM-short-slow-D20191004-T211557-ep-v05x.nc", - azfp_path / "azfp-17082117_01A_17041823_XML-ep-v05x.nc"] - - return converted_raw_paths_v06x, converted_raw_paths_v05x - - -def test_v05x_v06x_conversion_structure(azfp_path, ek60_path, ek80_path): - """ - Tests that version 0.5.x echopype files - have been correctly converted to the - 0.6.x structure. - """ - - pytest.xfail("PR #881 has caused these tests to fail for EK80 sonar models. While we " - "revise this test structure, these tests will be skipped. Please see issue " - "https://github.com/OSOceanAcoustics/echopype/issues/884 for more information.") - - converted_raw_paths_v06x, converted_raw_paths_v05x = \ - _get_conversion_file_lists(azfp_path, ek60_path, ek80_path) - - for path_v05x, path_v06x in zip(converted_raw_paths_v05x, converted_raw_paths_v06x): - - ed_v05x = open_converted(path_v05x) - tree_v06x = _tree_from_file(converted_raw_path=path_v06x) - - # dictionary of attributes to drop (from the group only) where - # the group path is the key and the value is a list of tuples - # of the form (attr, type of attr expected) - attrs_to_drop = { - "Provenance": [("conversion_software_version", str), - ("conversion_time", str)] - } - - # check and drop attributes that cannot be directly compared - # because their values are not the same - for key, val in attrs_to_drop.items(): - for var in val: - _check_and_drop_attr(ed_v05x, tree_v06x, key, var[0], var[1]) - - _check_and_drop_var(ed_v05x, tree_v06x, "Provenance", "source_filenames") - - # The following if block is for the case where we have a combined file - # TODO: look into this after v0.6.0 release - if "echodata_filename" in ed_v05x["Provenance"]: - prov_comb_names = ["echodata_filename", "top_attrs", "environment_attrs", - "platform_attrs", "nmea_attrs", "provenance_attrs", - "sonar_attrs", "beam_attrs", "vendor_attrs", - "top_attr_key", "environment_attr_key", - "platform_attr_key", "nmea_attr_key", "provenance_attr_key", - "sonar_attr_key", "beam_attr_key", "vendor_attr_key"] - - for name in prov_comb_names: - _check_and_drop_var(ed_v05x, tree_v06x, "Provenance", name) - - ed_v05x["Provenance"] = ed_v05x["Provenance"].drop("src_filenames") - - # ignore direct comparison of the variables Sonar.sonar_serial_number, - # Platform.drop_keel_offset_is_manual, and Platform.water_level_draft_is_manual - # for EK80, this data is not present in v0.5.x - if ed_v05x["Top-level"].attrs["keywords"] == "EK80": - - # dictionary of variables to drop where the group path is the - # key and the variables are the value - vars_to_drop = {"Sonar": ["sonar_serial_number"], - "Platform": ["drop_keel_offset_is_manual", - "water_level_draft_is_manual"], - "Environment": ["sound_velocity_profile", - "sound_velocity_profile_depth", - "sound_velocity_source", - "transducer_name", - "transducer_sound_speed"] - } - - # check and drop variables that cannot be directly compared - # because their values are not the same - for key, val in vars_to_drop.items(): - for var in val: - _check_and_drop_var(ed_v05x, tree_v06x, key, var) - - # sort the beam groups for EK80 according to channel (necessary for comparison) - ed_v05x['Sonar/Beam_group1'] = ed_v05x['Sonar/Beam_group1'].sortby("channel") - - if 'Sonar/Beam_group2' in ed_v05x.group_paths: - ed_v05x['Sonar/Beam_group2'] = ed_v05x['Sonar/Beam_group2'].sortby("channel") - - # sort the Platform group by channel for EK80 (necessary for comparison) - tree_v06x['Platform'].ds = tree_v06x['Platform'].ds.sortby('channel') - ed_v05x['Platform'] = ed_v05x['Platform'].sortby('channel') - - # remove all attributes from Vendor_specific (data is missing sometimes) - tree_v06x["Vendor_specific"].ds.attrs = {"blank": 'None'} - ed_v05x["Vendor_specific"].attrs = {"blank": 'None'} - - compare_ed_against_tree(ed_v05x, tree_v06x) - - -def test_echodata_structure(azfp_path, ek60_path, ek80_path): - """ - Makes sure that all raw files opened - create the expected EchoData structure. - """ - - # TODO: create this test once dev is in its final form. - # check and remove conversion time from attributes - # _check_and_drop_attr(ed_v05x, tree_v06x, "Provenance", "conversion_time", str) - # compare_ed_against_tree(ed_v05x, tree_v06x) - - pytest.xfail("Full testing of the EchoData Structure has not been implemented yet.") diff --git a/echopype/tests/echodata/test_zarr_combine.py b/echopype/tests/echodata/test_zarr_combine.py index 9380c6c0b..044b46fde 100644 --- a/echopype/tests/echodata/test_zarr_combine.py +++ b/echopype/tests/echodata/test_zarr_combine.py @@ -1,7 +1,6 @@ from collections import defaultdict from echopype.echodata.zarr_combine import ZarrCombine from dask.distributed import Client -import shutil import numpy as np import xarray as xr import echopype @@ -13,47 +12,8 @@ import pytest import zarr import os.path -from echopype.testing import check_consolidated - - -@pytest.fixture(scope="module") -def ek60_test_data(test_path): - files = [ - ("ncei-wcsd", "Summer2017-D20170620-T011027.raw"), - ("ncei-wcsd", "Summer2017-D20170620-T014302.raw"), - ("ncei-wcsd", "Summer2017-D20170620-T021537.raw"), - ] - return [test_path["EK60"].joinpath(*f) for f in files] - - -@pytest.fixture( - params=[ - ( - { - "randint_low": 10, - "randint_high": 5000, - "num_datasets": 20, - "group": "test_group", - "zarr_name": "combined_echodatas.zarr", - "delayed_ds_list": False, - } - ), - ( - { - "randint_low": 10, - "randint_high": 5000, - "num_datasets": 20, - "group": "test_group", - "zarr_name": "combined_echodatas.zarr", - "delayed_ds_list": True, - } - ), - ], - ids=["in-memory-ds_list", "lazy-ds_list"], - scope="module", -) -def append_ds_list_params(request): - return list(request.param.values()) + +from echopype.testing import _check_consolidated def get_ranges(lengths: np.ndarray) -> List[Tuple[int, int]]: @@ -419,7 +379,7 @@ def test_combine_consolidated(self, ek60_test_data, consolidated): assert zmeta_path.exists() is check if check is True: - check_consolidated(combined_echodata, zmeta_path) + _check_consolidated(combined_echodata, zmeta_path) temp_zarr_dir.cleanup()