Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update software behavior to use the default echopype home directory #954

Merged
merged 14 commits into from
Mar 11, 2023
Merged
2 changes: 1 addition & 1 deletion .ci_helpers/run-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
)
args = parser.parse_args()
if args.local:
temp_path = Path("temp_echopype_output")
temp_path = Path("~/.echopype/temp_output")
dump_path = Path("echopype/test_data/dump")
if temp_path.exists():
shutil.rmtree(temp_path)
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ toolbox
*.mat
ek60/
azfp/
temp_echopype_output/
notebooks/*_dev/
_echopype_version.py
!echopype/test_data
Expand Down
6 changes: 3 additions & 3 deletions docs/source/convert.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ The examples below apply equally to both methods, except as noted.
A destination folder or file path should be specified with the ``save_path``
argument in these methods in order to control the location of the converted files.
If the argument is not specified, the converted ``.nc`` and ``.zarr``
files are saved into a folder called ``temp_echopype_output`` under the
current execution folder. This folder will be created if it doesn't already exists.
files are saved into the directory ``~/.echopype/temp_output``.
This folder will be created if it doesn't already exists.


Specify metadata attributes
Expand Down Expand Up @@ -227,7 +227,7 @@ the platform code from the
.. ``combine`` argument (the default is ``combine=False``). In that case,
.. ``save_path`` must be specified explicitly. If ``save_path`` is only a filename
.. rather than a full file path, the combined output file will be saved to the
.. default ``temp_echopype_output`` folder.
.. default ``~/.echopype/temp_output`` folder.

.. .. code-block:: python

Expand Down
4 changes: 2 additions & 2 deletions docs/source/open-converted.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@
" ed_list.append(ep.open_converted(converted_file)) # already converted files are lazy-loaded\n",
"```\n",
"\n",
"Finally, we apply `combine_echodata` on this list to combine all the data into a single `EchoData` object. Here, we will store the final combined form in the Zarr path `path_to/combined_echodatas.zarr` and use the client we established above: \n",
"Finally, we apply `combine_echodata` on this list to combine all the data into a single `EchoData` object. Here, we will store the final combined form in the Zarr path `path_to/combined_echodata.zarr` and use the client we established above: \n",
"```python\n",
"combined_ed = ep.combine_echodata(\n",
" ed_list, \n",
" zarr_path='path_to/combined_echodatas.zarr', \n",
" zarr_path='path_to/combined_echodata.zarr', \n",
" client=client\n",
")\n",
"```\n",
Expand Down
3 changes: 1 addition & 2 deletions echopype/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@

from . import calibrate, consolidate, mask, preprocess, utils
from .convert.api import open_raw
from .core import init_ep_dir
from .echodata.api import open_converted
from .echodata.combine import combine_echodata
from .utils.io import init_ep_dir
from .utils.log import verbose

# Turn off verbosity for echopype
verbose(override=True)

# Initialize echopype working directory
init_ep_dir()

__all__ = [
Expand Down
3 changes: 1 addition & 2 deletions echopype/convert/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,7 @@ def open_raw(
options for cloud storage
use_swap: bool
If True, variables with a large memory footprint will be
written to a temporary zarr store called ``parsed2zarr_temp_files``
in the echopype's ``temp_output`` directory
written to a temporary zarr store at ``~/.echopype/temp_output/parsed2zarr_temp_files``
max_mb : int
The maximum data chunk size in Megabytes (MB), when offloading
variables with a large memory footprint to a temporary zarr store
Expand Down
4 changes: 2 additions & 2 deletions echopype/convert/parsed_to_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import zarr

from ..utils.io import check_file_permissions
from ..utils.io import ECHOPYPE_DIR, check_file_permissions


class Parsed2Zarr:
Expand Down Expand Up @@ -40,7 +40,7 @@ def _create_zarr_info(self):
check_file_permissions(current_dir)

# construct temporary directory that will hold the zarr file
out_dir = current_dir.joinpath(Path("temp_echopype_output") / "parsed2zarr_temp_files")
out_dir = current_dir / ECHOPYPE_DIR / "temp_output" / "parsed2zarr_temp_files"
if not out_dir.exists():
out_dir.mkdir(parents=True)

Expand Down
9 changes: 0 additions & 9 deletions echopype/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import re
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, Union

from fsspec.mapping import FSMap
Expand All @@ -24,14 +23,6 @@
FileFormatHint = Literal[".nc", ".zarr"]
EngineHint = Literal["netcdf4", "zarr"]

ECHOPYPE_DIR = Path(os.path.expanduser("~")) / ".echopype"


def init_ep_dir():
"""Initialize hidden directory for echopype"""
if not ECHOPYPE_DIR.exists():
ECHOPYPE_DIR.mkdir(exist_ok=True)


def validate_azfp_ext(test_ext: str):
if not re.fullmatch(r"\.\d{2}[a-zA-Z]", test_ext):
Expand Down
4 changes: 2 additions & 2 deletions echopype/echodata/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def check_zarr_path(
raise ValueError("The provided zarr_path input must have a '.zarr' suffix!")

# set default source_file name (will be used only if zarr_path is None)
source_file = "combined_echodatas.zarr"
source_file = "combined_echodata.zarr"

validated_path = validate_output_path(
source_file=source_file,
Expand Down Expand Up @@ -534,7 +534,7 @@ def combine_echodata(
* The instance attributes ``source_file`` and ``converted_raw_path`` of the combined
``EchoData`` object will be copied from the first ``EchoData`` object in the given list.
* If no ``zarr_path`` is provided, the combined zarr file will be
``'temp_echopype_output/combined_echodatas.zarr'`` under the current working directory.
``'~/.echopype/temp_output/combined_echodata.zarr'``.
* If no ``client`` is provided, then a client with a local scheduler will be used. The
created scheduler and client will be shutdown once computation has finished.
* For each run of this function, we print our the client dashboard link.
Expand Down
8 changes: 4 additions & 4 deletions echopype/tests/echodata/test_echodata_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_combine_echodata(raw_datasets):

# create temporary directory for zarr store
temp_zarr_dir = tempfile.TemporaryDirectory()
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodatas.zarr")
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodata.zarr")

# create dask client
client = Client()
Expand Down Expand Up @@ -239,7 +239,7 @@ def test_attr_storage(ek60_test_data):

# create temporary directory for zarr store
temp_zarr_dir = tempfile.TemporaryDirectory()
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodatas.zarr")
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodata.zarr")

# create dask client
client = Client()
Expand Down Expand Up @@ -285,7 +285,7 @@ def test_combined_encodings(ek60_test_data):

# create temporary directory for zarr store
temp_zarr_dir = tempfile.TemporaryDirectory()
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodatas.zarr")
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodata.zarr")

# create dask client
client = Client()
Expand Down Expand Up @@ -332,7 +332,7 @@ def test_combined_echodata_repr(ek60_test_data):

# create temporary directory for zarr store
temp_zarr_dir = tempfile.TemporaryDirectory()
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodatas.zarr")
zarr_file_name = os.path.join(temp_zarr_dir.name, "combined_echodata.zarr")

# create dask client
client = Client()
Expand Down
4 changes: 2 additions & 2 deletions echopype/tests/echodata/test_zarr_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def ek60_test_data(test_path):
"randint_high": 5000,
"num_datasets": 20,
"group": "test_group",
"zarr_name": "combined_echodatas.zarr",
"zarr_name": "combined_echodata.zarr",
"delayed_ds_list": False,
}
),
Expand All @@ -44,7 +44,7 @@ def ek60_test_data(test_path):
"randint_high": 5000,
"num_datasets": 20,
"group": "test_group",
"zarr_name": "combined_echodatas.zarr",
"zarr_name": "combined_echodata.zarr",
"delayed_ds_list": True,
}
),
Expand Down
22 changes: 2 additions & 20 deletions echopype/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pytest

if TYPE_CHECKING:
from ..core import SonarModelsHint
from ..core import SONAR_MODELS, init_ep_dir
from echopype.core import SonarModelsHint
from echopype.core import SONAR_MODELS
import echopype.core


Expand Down Expand Up @@ -66,21 +66,3 @@ def test_file_extension_validation_should_fail(
raise ValueError(
f"\"{ext}\" should have been rejected for sonar model {sonar_model}"
)


def test_init_ep_dir(monkeypatch):
temp_user_dir = tempfile.TemporaryDirectory()
echopype_dir = Path(temp_user_dir.name) / ".echopype"

# Create the .echopype in a temp dir instead of user space.
# Doing this will avoid accidentally deleting current
# working directory
monkeypatch.setattr(echopype.core, "ECHOPYPE_DIR", echopype_dir)

assert echopype.core.ECHOPYPE_DIR.exists() is False

init_ep_dir()

assert echopype.core.ECHOPYPE_DIR.exists() is True

temp_user_dir.cleanup()
27 changes: 26 additions & 1 deletion echopype/tests/utils/test_utils_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,18 @@
from pathlib import Path
import pytest
from typing import Tuple
import tempfile
import platform
import xarray as xr

from echopype.utils.io import sanitize_file_path, validate_output_path, env_indep_joinpath, validate_source_ds_da
from echopype.utils.io import (
sanitize_file_path,
validate_output_path,
env_indep_joinpath,
validate_source_ds_da,
init_ep_dir
)
import echopype.utils.io


@pytest.mark.parametrize(
Expand Down Expand Up @@ -294,3 +302,20 @@ def test_validate_source_ds_da(source_ds_da_input, storage_options_input, true_f
else:
assert isinstance(source_ds_output, str)
assert file_type_output == true_file_type

def test_init_ep_dir(monkeypatch):
temp_user_dir = tempfile.TemporaryDirectory()
echopype_dir = Path(temp_user_dir.name) / ".echopype"

# Create the .echopype in a temp dir instead of user space.
# Doing this will avoid accidentally deleting current
# working directory
monkeypatch.setattr(echopype.utils.io, "ECHOPYPE_DIR", echopype_dir)

assert echopype.utils.io.ECHOPYPE_DIR.exists() is False

init_ep_dir()

assert echopype.utils.io.ECHOPYPE_DIR.exists() is True

temp_user_dir.cleanup()
61 changes: 52 additions & 9 deletions echopype/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@
logger = _init_logger(__name__)


ECHOPYPE_DIR = Path(os.path.expanduser("~")) / ".echopype"


def init_ep_dir():
"""Initialize hidden directory for echopype"""
if not ECHOPYPE_DIR.exists():
ECHOPYPE_DIR.mkdir(exist_ok=True)


def get_files_from_dir(folder):
"""Retrieves all Netcdf and Zarr files from a given folder"""
valid_ext = [".nc", ".zarr"]
Expand Down Expand Up @@ -151,10 +160,33 @@ def validate_output_path(
source_file: str,
engine: str,
output_storage_options: Dict = {},
save_path: Union[None, Path, str] = None,
save_path: Optional[Union[Path, str]] = None,
) -> str:
"""
Assemble output file names and path.
Assembles output file names and path.

The final resulting file will be saved as provided in save path.
If a directory path is provided then the final file name will use
the same name as the source file and saved within the directory
path in `save_path` or echopype's `temp_output` directory.

Example 1.
source_file - test.raw
engine - zarr
save_path - /path/dir/
output is /path/dir/test.zarr

Example 2.
source_file - test.raw
engine - zarr
save_path - None
output is ~/.echopype/temp_output/test.zarr

Example 3.
source_file - test.raw
engine - zarr
save_path - /path/dir/myzarr.zarr
output is /path/dir/myzarr.zarr
leewujung marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
Expand All @@ -165,21 +197,32 @@ def validate_output_path(
output_storage_options : dict
Storage options for remote output path
save_path : str | Path | None
Either a directory or a file. If none then the save path is 'temp_echopype_output/'
in the current working directory.
Either a directory or a file path.
If it's not provided, we will save output file(s)
in the echopype's `temp_output` directory.

Returns
-------
str
The final string path of the resulting file.

Raises
------
ValueError
If engine is not one of the supported output engine of
zarr or netcdf
TypeError
If `save_path` is not of type Path or str
"""
if engine not in SUPPORTED_ENGINES:
ValueError(f"Engine {engine} is not supported for file export.")

file_ext = SUPPORTED_ENGINES[engine]["ext"]

if save_path is None:
logger.warning("save_path is not provided")
logger.warning("A directory or file path is not provided!")

current_dir = Path.cwd()
# Check permission, raise exception if no permission
check_file_permissions(current_dir)
out_dir = current_dir.joinpath(Path("temp_echopype_output"))
out_dir = ECHOPYPE_DIR / "temp_output"
if not out_dir.exists():
out_dir.mkdir(parents=True)

Expand Down