catalystneuro · weiglszonja · May 30, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
diff --git a/docs/source/api/imaging_extractors/bioformatsimagingextractors.rst b/docs/source/api/imaging_extractors/bioformatsimagingextractors.rst
@@ -0,0 +1,7 @@
+BioFormatsImagingExtractor
+--------------------------
+.. automodule:: roiextractors.extractors.bioformatsimagingextractors.bioformatsimagingextractor
+
+CxdImagingExtractor
+-------------------
+.. automodule:: roiextractors.extractors.bioformatsimagingextractors.cxdimagingextractor
diff --git a/docs/source/api/imaging_extractors/index.rst b/docs/source/api/imaging_extractors/index.rst
@@ -15,3 +15,4 @@ ImagingExtractors
   micromanagertiffimagingextractor
   tiffimagingextractor
   scanimagetiffimagingextractors
+  bioformatsimagingextractors
diff --git a/requirements-full.txt b/requirements-full.txt
@@ -3,3 +3,4 @@ scanimage-tiff-reader==1.4.1.4
 neuroconv[video]>=0.4.6  # Uses the VideoCaptureContext class
 natsort>=8.3.1
 isx>=1.0.4
+aicsimageio>=4.14.0
diff --git a/src/roiextractors/extractorlist.py b/src/roiextractors/extractorlist.py
@@ -1,5 +1,6 @@
 """Listing of available formats for extraction."""
 
+from .extractors.bioformatsimagingextractors import CxdImagingExtractor
 from .extractors.caiman import CaimanSegmentationExtractor
 from .extractors.hdf5imagingextractor import Hdf5ImagingExtractor
 from .extractors.numpyextractors import (
@@ -52,6 +53,7 @@
     MemmapImagingExtractor,
     VolumetricImagingExtractor,
     InscopixImagingExtractor,
+    CxdImagingExtractor,
 ]
 
 segmentation_extractor_full_list = [

diff --git a/src/roiextractors/extractors/bioformatsimagingextractors/__init__.py b/src/roiextractors/extractors/bioformatsimagingextractors/__init__.py
@@ -0,0 +1,18 @@
+"""A collection of ImagingExtractors for reading files with Bio-Formats.
+
+Modules
+-------
+bioformatsimagingextractor
+    The base class for Bio-Formats imaging extractors.
+cxdimagingextractor
+    Specialized extractor for CXD files produced via Hamamatsu Photonics.
+
+Classes
+-------
+BioFormatsImagingExtractor
+    The base ImagingExtractor for Bio-Formats.
+CxdImagingExtractor
+    Specialized extractor for reading CXD files produced via Hamamatsu Photonics.
+"""
+
+from .cxdimagingextractor import CxdImagingExtractor
diff --git a/src/roiextractors/extractors/bioformatsimagingextractors/bioformats_utils.py b/src/roiextractors/extractors/bioformatsimagingextractors/bioformats_utils.py
@@ -0,0 +1,87 @@
+from pathlib import Path
+
+
+import numpy as np
+import aicsimageio
+from aicsimageio.formats import FORMAT_IMPLEMENTATIONS
+from ome_types import OME
+
+from ...extraction_tools import PathType
+
+
+def check_file_format_is_supported(file_path: PathType):
+    """
+    Check if the file format is supported by BioformatsReader from aicsimageio.
+
+    Returns ValueError if the file format is not supported.
+
+    Parameters
+    ----------
+    file_path : PathType
+        Path to the file.
+    """
+    bioformats_reader = "aicsimageio.readers.bioformats_reader.BioformatsReader"
+    supported_file_suffixes = [
+        suffix_name for suffix_name, reader in FORMAT_IMPLEMENTATIONS.items() if bioformats_reader in reader
+    ]
+
+    file_suffix = Path(file_path).suffix.replace(".", "")
+    if file_suffix not in supported_file_suffixes:
+        raise ValueError(f"File '{file_path}' is not supported by BioformatsReader.")
+
+
+def extract_ome_metadata(
+    file_path: PathType,
+) -> OME:
+    """
+    Extract OME metadata from a file using aicsimageio.
+
+    Parameters
+    ----------
+    file_path : PathType
+        Path to the file.
+    """
+    check_file_format_is_supported(file_path)
+
+    with aicsimageio.readers.bioformats_reader.BioFile(file_path) as reader:
+        ome_metadata = reader.ome_metadata
+
+    return ome_metadata
+
+
+def parse_ome_metadata(metadata: OME) -> dict:
+    """
+    Parse metadata in OME format to extract relevant information and store it standard keys for ImagingExtractors.
+
+    Currently supports:
+    - num_frames
+    - sampling_frequency
+    - num_channels
+    - num_planes
+    - num_rows (height of the image)
+    - num_columns (width of the image)
+    - dtype
+    - channel_names
+
+    """
+    images_metadata = metadata.images[0]
+    pixels_metadata = images_metadata.pixels
+
+    sampling_frequency = None
+    if pixels_metadata.time_increment is not None:
+        sampling_frequency = 1 / pixels_metadata.time_increment
+
+    channel_names = [channel.id for channel in pixels_metadata.channels]
+
+    metadata_parsed = dict(
+        num_frames=images_metadata.pixels.size_t,
+        sampling_frequency=sampling_frequency,
+        num_channels=images_metadata.pixels.size_c,
+        num_planes=images_metadata.pixels.size_z,
+        num_rows=images_metadata.pixels.size_y,
+        num_columns=images_metadata.pixels.size_x,
+        dtype=np.dtype(pixels_metadata.type.numpy_dtype),
+        channel_names=channel_names,
+    )
+
+    return metadata_parsed
diff --git a/src/roiextractors/extractors/bioformatsimagingextractors/bioformatsimagingextractor.py b/src/roiextractors/extractors/bioformatsimagingextractors/bioformatsimagingextractor.py
@@ -0,0 +1,156 @@
+"""ImagingExtractor for reading files supported by Bio-Formats.
+
+Classes
+-------
+BioFormatsImagingExtractor
+    The base ImagingExtractor for Bio-Formats.
+"""
+
+from typing import Tuple
+
+import numpy as np
+
+from ...imagingextractor import ImagingExtractor
+from ...extraction_tools import PathType, DtypeType
+
+
+class BioFormatsImagingExtractor(ImagingExtractor):
+    """Imaging extractor for files supported by Bio-Formats."""
+
+    extractor_name = "BioFormatsImaging"
+
+    def __init__(
+        self,
+        file_path: PathType,
+        channel_name: str,
+        plane_name: str,
+        dimension_order: str,
+        parsed_metadata: dict,
+    ):
+        r"""
+        Create a BioFormatsImagingExtractor instance from a file supported by Bio-Formats.
+
+        Supported file formats: https://bio-formats.readthedocs.io/en/stable/supported-formats.html
+
+        This extractor requires `bioformats_jar` to be installed in the environment,
+        and requires the java executable to be available on the path (or via the JAVA_HOME environment variable),
+        along with the mvn executable.
+
+        If you are using conda, you can install with `conda install -c conda-forge bioformats_jar`.
+        Note: you may need to reactivate your conda environment after installing.
+        If you are still getting a JVMNotFoundException, try:
+        # mac and linux:
+        `export JAVA_HOME=$CONDA_PREFIX`
+
+        # windows:
+        `set JAVA_HOME=%CONDA_PREFIX%\\Library`
+
+        Parameters
+        ----------
+        file_path : PathType
+            Path to the file.
+        channel_name : str
+            The name of the channel for this extractor.
+        plane_name : str
+            The name of the plane for this extractor.
+        dimension_order : str
+            The order of dimension for reading the frames. For .cxd format it is "TCZYX".
+            See aicsimageio.dimensions.DimensionNames and aicsimageio.dimensions.Dimensions for more information.
+        parsed_metadata: dict
+            Parsed metadata dictionary in the form outputted by parse_ome_metadata in order to be parsed
+            correctly.
+        """
+        from .bioformats_utils import check_file_format_is_supported
+        import aicsimageio
+
+        self.file_path = file_path
+        super().__init__()
+
+        check_file_format_is_supported(self.file_path)
+
+        self.dimension_order = dimension_order
+
+        self._num_frames = parsed_metadata["num_frames"]
+        self._num_channels = parsed_metadata["num_channels"]
+        self._num_planes = parsed_metadata["num_planes"]
+        self._num_rows = parsed_metadata["num_rows"]
+        self._num_columns = parsed_metadata["num_columns"]
+        self._dtype = parsed_metadata["dtype"]
+        self._sampling_frequency = parsed_metadata["sampling_frequency"]
+        self._channel_names = parsed_metadata["channel_names"]
+        self._plane_names = [f"{i}" for i in range(self._num_planes)]
+
+        if channel_name not in self._channel_names:
+            raise ValueError(
+                f"The selected channel '{channel_name}' is not a valid channel name."
+                f" The available channel names are: {self._channel_names}."
+            )
+        self.channel_index = self._channel_names.index(channel_name)
+
+        if plane_name not in self._plane_names:
+            raise ValueError(
+                f"The selected plane '{plane_name}' is not a valid plane name."
+                f" The available plane names are: {self._plane_names}."
+            )
+        self.plane_index = self._plane_names.index(plane_name)
+
+        with aicsimageio.readers.bioformats_reader.BioFile(self.file_path) as reader:
+            self._video = reader.to_dask()
+
+    def get_channel_names(self) -> list:
+        return self._channel_names
+
+    def get_dtype(self) -> DtypeType:
+        return self._dtype
+
+    def get_image_size(self) -> Tuple[int, int]:
+        return self._num_rows, self._num_columns
+
+    def get_num_channels(self) -> int:
+        return self._num_channels
+
+    def get_num_frames(self) -> int:
+        return self._num_frames
+
+    def get_sampling_frequency(self):
+        return self._sampling_frequency
+
+    def check_frame_inputs(self, frame) -> None:
+        """Check that the frame index is valid. Raise ValueError if not.
+
+        Parameters
+        ----------
+        frame : int
+            The index of the frame to retrieve.
+
+        Raises
+        ------
+        ValueError
+            If the frame index is invalid.
+        """
+        if frame is None:
+            return
+        if frame >= self._num_frames:
+            raise ValueError(f"Frame index ({frame}) exceeds number of frames ({self._num_frames}).")
+        if frame < 0:
+            raise ValueError(f"Frame index ({frame}) must be greater than or equal to 0.")
+
+    def get_video(self, start_frame=None, end_frame=None, channel: int = 0) -> np.ndarray:
+        self.check_frame_inputs(start_frame)
+        self.check_frame_inputs(end_frame)
+
+        dimension_dict = {
+            "T": slice(start_frame, end_frame),
+            "C": self.channel_index,
+            "Z": self.plane_index,
+            "Y": slice(None),
+            "X": slice(None),
+        }
+        slices = [dimension_dict[dimension] for dimension in self.dimension_order]
+        video = self._video[tuple(slices)]
+
+        # re-arrange axis to ensure video axes are time x height x width
+        axis_order = tuple("TYX".index(dim) for dim in self.dimension_order if dim in "TYX")
+        video = video.transpose(axis_order)
+
+        return video.compute()