Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initialize testing framework setup #902

Merged
merged 2 commits into from
Dec 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 204 additions & 0 deletions echopype/testing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,208 @@
"""testing.py

Helper module for testing related things.
"""
import json
import os
from pathlib import Path

import fsspec
import xarray as xr

from echopype.echodata.echodata import EchoData

__all__ = [
"_check_consolidated",
"_check_output_files",
"_create_path_str",
"_check_and_drop_var",
"_check_and_drop_attr",
"_compare_ed_against_tree",
]

HERE = Path(__file__).parent.absolute()
TEST_DATA_FOLDER = HERE / "test_data"


def _check_consolidated(echodata: EchoData, zmeta_path: Path) -> None:
"""
Checks for the presence of `.zgroup`
for every group in echodata within the `.zmetadata`
file.

Parameters
----------
echodata : EchoData
The echodata object to be checked.
zmeta_path : pathlib.Path
The path to the .zmetadata for the zarr file.
"""
# Check that every group is in
# the zmetadata if consolidated
expected_zgroups = [
os.path.join(p, ".zgroup") if p != "Top-level" else ".zgroup" for p in echodata.group_paths
]

with open(zmeta_path) as f:
meta_json = json.load(f)

file_groups = [k for k in meta_json["metadata"].keys() if k.endswith(".zgroup")]

for g in expected_zgroups:
assert g in file_groups, f"{g} not Found!"


def _check_file_group(data_file, engine, groups):
for g in groups:
ds = xr.open_dataset(data_file, engine=engine, group=g)

assert isinstance(ds, xr.Dataset) is True


def _check_output_files(engine, output_files, storage_options):
groups = [
"Provenance",
"Environment",
"Sonar/Beam_group1",
"Sonar",
"Vendor_specific",
"Platform",
]
if isinstance(output_files, list):
fs = fsspec.get_mapper(output_files[0], **storage_options).fs
for f in output_files:
if engine == "zarr":
_check_file_group(fs.get_mapper(f), engine, groups)
fs.delete(f, recursive=True)
else:
_check_file_group(f, engine, groups)
fs.delete(f)
else:
fs = fsspec.get_mapper(output_files, **storage_options).fs
if engine == "zarr":
_check_file_group(fs.get_mapper(output_files), engine, groups)
fs.delete(output_files, recursive=True)
else:
_check_file_group(output_files, engine, groups)
fs.delete(output_files)


def _create_path_str(test_folder, paths):
return str(test_folder.joinpath(*paths).absolute())


def _check_and_drop_var(ed, tree, grp_path, var):
"""
This function performs minimal checks of
a variable contained both in an EchoData object
and a Datatree. It ensures that the dimensions,
attributes, and data types are the same. Once
the checks have passed, it then drops these
variables from both the EchoData object and the
Datatree.

Parameters
----------
ed : EchoData
EchoData object that contains the variable
to check and drop.
tree : Datatree
Datatree object that contains the variable
to check and drop.
grp_path : str
The path to the group that the variable is in.
var : str
The variable to be checked and dropped.

Notes
-----
The Datatree object is created from an EchoData
object written to a netcdf file.
"""

ed_var = ed[grp_path][var]
tree_var = tree[grp_path].ds[var]

# make sure that the dimensions and attributes
# are the same for the variable
assert ed_var.dims == tree_var.dims
assert ed_var.attrs == tree_var.attrs

# make sure that the data types are correct too
assert isinstance(ed_var.values, type(tree_var.values))

# drop variables so we can check that datasets are identical
ed[grp_path] = ed[grp_path].drop(var)
tree[grp_path].ds = tree[grp_path].ds.drop(var)


def _check_and_drop_attr(ed, tree, grp_path, attr, typ):
"""
This function performs minimal checks of
an attribute contained both in an EchoData object
and a Datatree group. This function only works for
a group's attribute, it cannot work on variable
attributes. It ensures that the attribute exists
and that it has the expected data type. Once
the checks have passed, it then drops the
attribute from both the EchoData object and the
Datatree.

Parameters
----------
ed : EchoData
EchoData object that contains the attribute
to check and drop.
tree : Datatree
Datatree object that contains the attribute
to check and drop.
grp_path : str
The path to the group that the attribute is in.
attr : str
The attribute to be checked and dropped.
typ : type
The expected data type of the attribute.

Notes
-----
The Datatree object is created from an EchoData
object written to a netcdf file.
"""

# make sure that the attribute exists
assert attr in ed[grp_path].attrs.keys()
assert attr in tree[grp_path].ds.attrs.keys()

# make sure that the value of the attribute is the right type
assert isinstance(ed[grp_path].attrs[attr], typ)
assert isinstance(tree[grp_path].ds.attrs[attr], typ)

# drop the attribute so we can directly compare datasets
del ed[grp_path].attrs[attr]
del tree[grp_path].ds.attrs[attr]


def _compare_ed_against_tree(ed, tree):
"""
This function compares the Datasets
of ed against tree and makes sure they
are identical.

Parameters
----------
ed : EchoData
EchoData object
tree : Datatree
Datatree object

Notes
-----
The Datatree object is created from an EchoData
object written to a netcdf file.
"""

for grp_path in ed.group_paths:
if grp_path == "Top-level":
assert tree.ds.identical(ed[grp_path])
else:
assert tree[grp_path].ds.identical(ed[grp_path])
35 changes: 35 additions & 0 deletions echopype/tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Echopype tests

This directory contains all the various tests for echopype.

Sample datasets for testing can be found one directory above this one called `test_data`.
The actual datasets are not stored within the repository, but are stored in a Google Drive folder.
When the testing infrastructures are spun up, data are pulled from the Google Drive into the repository to be used for testing.

## Current structure

Currently the tests are separated by modules, mirroring a similar structure of the software modules.
Within each test modules, a mix of *integration* and *unit* testings can be found.
The following modules are currently being tested:

- calibrate
- consolidate
- convert
- echodata
- metrics
- preprocess
- utils
- visualize
- core

Throughout each module, there are special file called `conftest.py`.
This file contains all of the fixtures used within that module.
However, the `conftest.py` within the root of the `tests` directory should contain fixtures that can be used by all tests,
therefore, the fixtures `scope` must be `"session"`.

Within echopype there's a module called `testing`. Within this module contains any helper functions to be used in testing.

Lastly, the test files begin with the word `test` sucn as `test_echodata.py`.
These files contains code for *unit testing*,
and any files that ends with `integration` contains code that performs *integration testing* against actual datasets, for example,
`test_echodata_integration.py`.
Empty file.
107 changes: 105 additions & 2 deletions echopype/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,115 @@
"""``pytest`` configuration."""

import pytest
import numpy as np
import xarray as xr
from datatree import DataTree

import fsspec

from echopype.convert.set_groups_base import SetGroupsBase
from echopype.echodata import EchoData
from echopype.testing import TEST_DATA_FOLDER


class MockSetGroups(SetGroupsBase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def set_beam(self) -> xr.Dataset:
ds = xr.Dataset(attrs={"beam_mode": "vertical", "conversion_equation_t": "type_3"})
return ds

def set_env(self) -> xr.Dataset:
# TODO: add mock data
ds = xr.Dataset()

env_attr_dict = {"notes": "This is a mock env dataset, hence no data is found!"}
ds = ds.assign_attrs(env_attr_dict)

return ds

def set_platform(self) -> xr.Dataset:
# TODO: add mock data
ds = xr.Dataset(
attrs={
"platform_code_ICES": 315,
"platform_name": "My mock boat",
"platform_type": "Research vessel",
}
)

return ds

def set_nmea(self) -> xr.Dataset:
# TODO: add mock data
ds = xr.Dataset(
attrs={
"description": "All Mock NMEA datagrams",
}
)

return ds

def set_sonar(self) -> xr.Dataset:
# TODO: add mock data
ds = xr.Dataset()

# Assemble sonar group global attribute dictionary
sonar_attr_dict = {
"sonar_manufacturer": "Simrad",
"sonar_model": self.sonar_model,
# transducer (sonar) serial number is not stored in the EK60 raw data file,
# so sonar_serial_number can't be populated from the raw datagrams
"sonar_serial_number": "",
"sonar_software_name": "",
"sonar_software_version": "0.1.0",
"sonar_type": "echosounder",
}
ds = ds.assign_attrs(sonar_attr_dict)

return ds

def set_vendor(self) -> xr.Dataset:
# TODO: add mock data
ds = xr.Dataset(attrs={"created_by": "Mock test"})
return ds


@pytest.fixture(scope="session")
def mock_echodata(
sonar_model="TEST",
file_chk="./test.raw",
xml_chk=None,
):
# Setup tree dictionary
tree_dict = {}

setgrouper = MockSetGroups(
parser_obj=None,
input_file=file_chk,
xml_path=xml_chk,
output_path=None,
sonar_model=sonar_model,
params={"survey_name": "mock_survey"},
)
tree_dict["/"] = setgrouper.set_toplevel(sonar_model, date_created=np.datetime64("1970-01-01"))
tree_dict["Environment"] = setgrouper.set_env()
tree_dict["Platform"] = setgrouper.set_platform()
tree_dict["Platform/NMEA"] = setgrouper.set_nmea()
tree_dict["Provenance"] = setgrouper.set_provenance()
tree_dict["Sonar"] = None
tree_dict["Sonar/Beam_group1"] = setgrouper.set_beam()
tree_dict["Sonar"] = setgrouper.set_sonar()
tree_dict["Vendor_specific"] = setgrouper.set_vendor()

tree = DataTree.from_dict(tree_dict, name="root")
echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model)
echodata._set_tree(tree)
echodata._load_tree()
return echodata




@pytest.fixture(scope="session")
def dump_output_dir():
return TEST_DATA_FOLDER / "dump"
Expand Down
Empty file.
Empty file.
Loading