Skip to content

Commit

Permalink
not quite working zarr implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
sneakers-the-rat committed Apr 30, 2024
1 parent a345cc6 commit d884055
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 7 deletions.
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"linkml-runtime": ("https://linkml.io/linkml/", None),
"dask": ("https://docs.dask.org/en/stable/", None),
"h5py": ("https://docs.h5py.org/en/stable/", None),
"zarr": ("https://zarr.readthedocs.io/en/stable/", None),
}

# -- Options for HTML output -------------------------------------------------
Expand Down
69 changes: 68 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ dask = [
hdf5 = [
"h5py>=3.10.0"
]
zarr = [
"zarr>=2.17.2",
]
arrays = [
"numpydantic[dask,hdf5]"
"numpydantic[dask,hdf5,zarr]"
]
tests = [
"numpydantic[arrays]",
Expand Down
9 changes: 8 additions & 1 deletion src/numpydantic/interface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,12 @@
from numpydantic.interface.hdf5 import H5Interface
from numpydantic.interface.interface import Interface
from numpydantic.interface.numpy import NumpyInterface
from numpydantic.interface.zarr import ZarrInterface

__all__ = ["Interface", "DaskInterface", "H5Interface", "NumpyInterface"]
__all__ = [
"Interface",
"DaskInterface",
"H5Interface",
"NumpyInterface",
"ZarrInterface",
]
120 changes: 118 additions & 2 deletions src/numpydantic/interface/zarr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,121 @@
"""
Interface to zarr arrays
(Not Implemented)
"""

import contextlib
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional, Union, Sequence

from numpydantic.interface.interface import Interface

try:
from zarr.core import Array as ZarrArray
from zarr.storage import StoreLike
import zarr
except ImportError:
ZarrArray = None
StoreLike = None
storage = None


@dataclass
class ZarrArrayPath:
"""
Map to an array within a zarr store.
See :func:`zarr.open`
"""

file: Union[Path, str]
"""Location of Zarr store file or directory"""
path: Optional[str] = None
"""Path to array within hierarchical zarr store"""

def open(self, **kwargs) -> ZarrArray:
return zarr.open(str(self.file), path=self.path, **kwargs)

@classmethod
def from_iterable(cls, spec: Sequence) -> "ZarrArrayPath":
if len(spec) == 1:
return ZarrArrayPath(file=spec[0])
elif len(spec) == 2:
return ZarrArrayPath(file=spec[0], path=spec[1])
else:
raise ValueError("Only len 1-2 iterables can be used for a ZarrArrayPath")


class ZarrInterface(Interface):
"""
Interface to in-memory or on-disk zarr arrays
"""

input_types = (Path, ZarrArray, ZarrArrayPath)
return_type = ZarrArray

@classmethod
def enabled(cls) -> bool:
"""True if zarr is installed"""
return ZarrArray is not None

@staticmethod
def _get_array(
array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> ZarrArray:
if isinstance(array, ZarrArray):
return array

if isinstance(array, (str, Path)):
array = ZarrArrayPath(file=array)
elif isinstance(array, (tuple, list)):
array = ZarrArrayPath.from_iterable(array)

return array.open(mode="a")

@classmethod
def check(cls, array: Any) -> bool:
"""
Check if array is in-memory zarr array,
a path to a zarr array, or a :class:`.ZarrArrayPath`
"""
if isinstance(array, ZarrArray):
return True

# See if can be coerced to ZarrArrayPath
if isinstance(array, (Path, str)):
array = ZarrArrayPath(file=array)

if isinstance(array, (tuple, list)):
# something that can be coerced to ZarrArrayPath
with contextlib.suppress(ValueError):
array = ZarrArrayPath.from_iterable(array)

if isinstance(array, ZarrArrayPath):
with contextlib.suppress(Exception):
arr = array.open(mode="r")
if isinstance(arr, ZarrArray):
return True

return False

def before_validation(
self, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> ZarrArray:
"""
Ensure that the zarr array is opened
"""
return self._get_array(array)

@classmethod
def to_json(
cls, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> dict:
"""
Dump just the metadata for an array from :meth:`zarr.core.Array.info_items`
plus the :meth:`zarr.core.Array.hexdigest`
"""
array = cls._get_array(array)
info = array.info_items()
info_dict = {i[0]: i[1] for i in info}
info_dict["hexdigest"] = array.hexdigest()
return info_dict
20 changes: 20 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import pytest
from nptyping import Number
from pydantic import BaseModel, Field
import zarr

from numpydantic.interface.hdf5 import H5ArrayPath
from numpydantic.interface.zarr import ZarrArrayPath
from numpydantic import NDArray, Shape
from numpydantic.maps import python_to_nptyping

Expand Down Expand Up @@ -105,3 +107,21 @@ def _hdf5_array(
return H5ArrayPath(Path(hdf5_file.filename), array_path)

return _hdf5_array


@pytest.fixture(scope="function")
def zarr_nested_array(tmp_output_dir_func) -> ZarrArrayPath:
"""Zarr array within a nested array"""
file = tmp_output_dir_func / "nested.zarr"
path = "a/b/c"
root = zarr.open(str(file), mode="w")
array = root.zeros(path, shape=(100, 100), chunks=(10, 10))
return ZarrArrayPath(file=file, path=path)


@pytest.fixture(scope="function")
def zarr_array(tmp_output_dir_func) -> Path:
file = tmp_output_dir_func / "array.zarr"
array = zarr.open(str(file), mode="w", shape=(100, 100), chunks=(10, 10))
array[:] = 0
return file
16 changes: 14 additions & 2 deletions tests/test_interface/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import numpy as np
import dask.array as da
import zarr

from numpydantic import interface
from tests.fixtures import hdf5_array
from tests.fixtures import hdf5_array, zarr_nested_array, zarr_array


@pytest.fixture(
Expand All @@ -14,8 +15,19 @@
(np.zeros((3, 4)), interface.NumpyInterface),
(hdf5_array, interface.H5Interface),
(da.random.random((10, 10)), interface.DaskInterface),
(zarr.ones((10, 10)), interface.ZarrInterface),
(zarr_nested_array, interface.ZarrInterface),
(zarr_array, interface.ZarrInterface),
],
ids=[
"numpy_list",
"numpy",
"H5ArrayPath",
"dask",
"zarr_memory",
"zarr_nested",
"zarr_array",
],
ids=["numpy_list", "numpy", "H5ArrayPath", "dask"],
)
def interface_type(request):
return request.param
64 changes: 64 additions & 0 deletions tests/test_interface/test_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pytest
import zarr

from pydantic import ValidationError

from numpydantic.interface import ZarrInterface


@pytest.fixture()
def dir_array(tmp_output_dir_func) -> zarr.DirectoryStore:
store = zarr.DirectoryStore(tmp_output_dir_func / "array.zarr")
return store


@pytest.fixture()
def zip_array(tmp_output_dir_func) -> zarr.ZipStore:
store = zarr.ZipStore(tmp_output_dir_func / "array.zip", mode="w")
return store


@pytest.fixture()
def nested_dir_array(tmp_output_dir_func) -> zarr.NestedDirectoryStore:
store = zarr.NestedDirectoryStore(tmp_output_dir_func / "nested")
return store


STORES = (
dir_array,
zip_array,
)
"""stores for single arrays"""


def test_zarr_enabled():
assert ZarrInterface.enabled()


def test_zarr_check(interface_type):
"""
We should only use the zarr interface for zarr-like things
"""
if interface_type[1] is ZarrInterface:
assert ZarrInterface.check(interface_type[0])
else:
assert not ZarrInterface.check(interface_type[0])


@pytest.mark.parametrize(
"array,passes",
[
(zarr.zeros((5, 10)), True),
(zarr.zeros((5, 10, 3)), True),
(zarr.zeros((5, 10, 3, 4)), True),
(zarr.zeros((5, 10, 4)), False),
(zarr.zeros((5, 10, 3, 6)), False),
(zarr.zeros((5, 10, 4, 6)), False),
],
)
def test_zarr_shape(model_rgb, array, passes):
if passes:
model_rgb(array=array)
else:
with pytest.raises(ValidationError):
model_rgb(array=array)

0 comments on commit d884055

Please sign in to comment.