Skip to content

Commit

Permalink
Remove usage of PanImgFolder (#93)
Browse files Browse the repository at this point in the history
All folders must be associated with a file (in our only case: jpegs must
be associated with a DZI file). This PR removes the `PanImgFolder` model
and replaces it with an optional `directory` attribute on `PanImgFile`.
  • Loading branch information
jmsmkn authored Mar 7, 2023
1 parent 41dafec commit 3e3a485
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 97 deletions.
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# History

## 0.11.0 (2023-03-07)

* Removes `PanImgFolder` and outputs of `new_folders`, instead `directory` is added to `PanImgFile`

## 0.10.0 (2023-03-03)

* Removed support for Python 3.7
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ Under the hood we use:

## Usage

`panimg` takes a folder and tries to convert the containing files to MHA or TIFF.
`panimg` takes a directory and tries to convert the containing files to MHA or TIFF.
By default, it will try to convert files from subdirectories as well.
To only convert files in the top level directory, set `recurse_subdirectories` to `False`.
It will try several strategies for loading the contained files, and if an image is found it will output it to the output folder.
It will try several strategies for loading the contained files, and if an image is found it will output it to the output directory.
It will return a structure containing information about what images were produced, what images were used to form the new images, image metadata, and any errors from any of the strategies.


**NOTE: Alpha software, do not run this on folders you do not have a backup of.**
**NOTE: Alpha software, do not run this on directories you do not have a backup of.**

```python
from pathlib import Path
Expand All @@ -48,7 +48,7 @@ result = convert(
`panimg` is also accessible from the command line.
Install the package from pip as before, then you can use:

**NOTE: Alpha software, do not run this on folders you do not have a backup of.**
**NOTE: Alpha software, do not run this on directories you do not have a backup of.**

```shell
panimg convert /path/to/files/ /where/files/will/go/
Expand Down
2 changes: 1 addition & 1 deletion panimg/image_builders/metaio_mhd_mha.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def detect_mhd_file(headers: Dict[str, str], path: Path) -> bool:
if path not in data_file_path.parents:
raise ValueError(
f"{element_data_file_key} references a file which is not in "
f"the uploaded data folder"
f"the uploaded data directory"
)
if not data_file_path.is_file():
raise ValueError("Data container of mhd file is missing")
Expand Down
9 changes: 1 addition & 8 deletions panimg/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,27 +163,20 @@ class PanImgFile:
image_id: UUID
image_type: ImageType
file: Path


@dataclass(frozen=True)
class PanImgFolder:
image_id: UUID
folder: Path
directory: Optional[Path] = None


@dataclass
class PanImgResult:
new_images: Set[PanImg]
new_image_files: Set[PanImgFile]
new_folders: Set[PanImgFolder]
consumed_files: Set[Path]
file_errors: Dict[Path, List[str]]


@dataclass
class PostProcessorResult:
new_image_files: Set[PanImgFile]
new_folders: Set[PanImgFolder]


class SimpleITKImage(BaseModel):
Expand Down
32 changes: 5 additions & 27 deletions panimg/panimg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,7 @@

from panimg.exceptions import UnconsumedFilesException
from panimg.image_builders import DEFAULT_IMAGE_BUILDERS
from panimg.models import (
PanImg,
PanImgFile,
PanImgFolder,
PanImgResult,
PostProcessorResult,
)
from panimg.models import PanImg, PanImgFile, PanImgResult, PostProcessorResult
from panimg.post_processors import DEFAULT_POST_PROCESSORS
from panimg.types import ImageBuilder, PostProcessor

Expand All @@ -28,7 +22,6 @@ def convert(
) -> PanImgResult:
new_images: Set[PanImg] = set()
new_image_files: Set[PanImgFile] = set()
new_folders: Set[PanImgFolder] = set()
consumed_files: Set[Path] = set()
file_errors: DefaultDict[Path, List[str]] = defaultdict(list)

Expand All @@ -44,7 +37,6 @@ def convert(
consumed_files=consumed_files,
new_images=new_images,
new_image_files=new_image_files,
new_folders=new_folders,
file_errors=file_errors,
recurse_subdirectories=recurse_subdirectories,
)
Expand All @@ -56,12 +48,10 @@ def convert(
else DEFAULT_POST_PROCESSORS,
)
new_image_files |= result.new_image_files
new_folders |= result.new_folders

return PanImgResult(
new_images=new_images,
new_image_files=new_image_files,
new_folders=new_folders,
consumed_files=consumed_files,
file_errors=file_errors,
)
Expand All @@ -75,7 +65,6 @@ def _convert_directory(
consumed_files: Set[Path],
new_images: Set[PanImg],
new_image_files: Set[PanImgFile],
new_folders: Set[PanImgFolder],
file_errors: DefaultDict[Path, List[str]],
recurse_subdirectories: bool = True,
):
Expand All @@ -96,7 +85,6 @@ def _convert_directory(
consumed_files=consumed_files,
new_images=new_images,
new_image_files=new_image_files,
new_folders=new_folders,
file_errors=file_errors,
recurse_subdirectories=recurse_subdirectories,
)
Expand All @@ -121,7 +109,6 @@ def _convert_directory(

new_images |= builder_result.new_images
new_image_files |= builder_result.new_image_files
new_folders |= builder_result.new_folders
consumed_files |= builder_result.consumed_files

if builder_result.consumed_files:
Expand Down Expand Up @@ -160,7 +147,6 @@ def _build_files(
return PanImgResult(
new_images=new_images,
new_image_files=new_image_files,
new_folders=set(),
consumed_files=consumed_files,
file_errors=file_errors,
)
Expand All @@ -172,12 +158,11 @@ def post_process(
"""
Run a set of post processors on a set of image files
Post processors add new files and folders to existing images,
Post processors add new files and directories to existing images,
such as DZI creation for TIFF images, or thumbnail generation.
They do not produce new image entities.
"""
new_image_files: Set[PanImgFile] = set()
new_folders: Set[PanImgFolder] = set()

logger.info(f"Post processing {len(image_files)} image(s)")

Expand All @@ -190,19 +175,12 @@ def post_process(
filtered_files = {
f for f in result.new_image_files if f.image_id in existing_ids
}
filtered_folders = {
f for f in result.new_folders if f.image_id in existing_ids
}

excluded_files = result.new_image_files - filtered_files
excluded_folders = result.new_folders - filtered_folders

if excluded_files or excluded_folders:
logger.warning(f"Ignoring: {excluded_files} {excluded_folders}")
if excluded_files:
logger.warning(f"Ignoring: {excluded_files}")

new_image_files |= filtered_files
new_folders |= filtered_folders

return PostProcessorResult(
new_image_files=new_image_files, new_folders=new_folders
)
return PostProcessorResult(new_image_files=new_image_files)
25 changes: 5 additions & 20 deletions panimg/post_processors/tiff_to_dzi.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import logging
from typing import Set

from panimg.models import (
ImageType,
PanImgFile,
PanImgFolder,
PostProcessorResult,
)
from panimg.models import ImageType, PanImgFile, PostProcessorResult
from panimg.settings import DZI_TILE_SIZE

try:
Expand All @@ -27,7 +22,6 @@ def tiff_to_dzi(*, image_files: Set[PanImgFile]) -> PostProcessorResult:
)

new_image_files: Set[PanImgFile] = set()
new_folders: Set[PanImgFolder] = set()

for file in image_files:
if file.image_type == ImageType.TIFF:
Expand All @@ -38,15 +32,12 @@ def tiff_to_dzi(*, image_files: Set[PanImgFile]) -> PostProcessorResult:
continue

new_image_files |= result.new_image_files
new_folders |= result.new_folders

return PostProcessorResult(
new_image_files=new_image_files, new_folders=new_folders
)
return PostProcessorResult(new_image_files=new_image_files)


def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult:
# Creates a dzi file and corresponding tiles in folder {pk}_files
# Creates a dzi file and corresponding tiles in directory {pk}_files
dzi_output = tiff_file.file.parent / str(tiff_file.image_id)

image = pyvips.Image.new_from_file(
Expand All @@ -59,13 +50,7 @@ def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult:
image_id=tiff_file.image_id,
image_type=ImageType.DZI,
file=(dzi_output.parent / f"{dzi_output.name}.dzi").absolute(),
directory=(dzi_output.parent / f"{dzi_output.name}_files").absolute(),
)

new_folder = PanImgFolder(
image_id=tiff_file.image_id,
folder=(dzi_output.parent / f"{dzi_output.name}_files").absolute(),
)

return PostProcessorResult(
new_image_files={new_file}, new_folders={new_folder}
)
return PostProcessorResult(new_image_files={new_file})
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "panimg"
version = "0.10.0"
version = "0.11.0"
description = "Conversion of medical images to MHA and TIFF."
license = "Apache-2.0"
authors = ["James Meakin <[email protected]>"]
Expand Down
6 changes: 3 additions & 3 deletions tests/test_dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,21 +191,21 @@ def test_image_builder_dicom_4d_enhanced():


@pytest.mark.parametrize(
"folder,element_type",
"directory,element_type",
[
("dicom_4d", "MET_SHORT"),
("dicom_intercept", "MET_FLOAT"),
("dicom_slope", "MET_FLOAT"),
],
)
def test_dicom_rescaling(folder, element_type, tmpdir):
def test_dicom_rescaling(directory, element_type, tmpdir):
"""
2.dcm in dicom_intercept and dicom_slope has been modified to add a
small intercept (0.01) or slope (1.001) respectively.
"""
files = [
Path(d[0]).joinpath(f)
for d in os.walk(RESOURCE_PATH / folder)
for d in os.walk(RESOURCE_PATH / directory)
for f in d[2]
]
result = _build_files(
Expand Down
50 changes: 31 additions & 19 deletions tests/test_post_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@
import pytest

from panimg import convert, post_process
from panimg.models import (
ImageType,
PanImgFile,
PanImgFolder,
PostProcessorResult,
)
from panimg.models import ImageType, PanImgFile, PostProcessorResult
from tests import RESOURCE_PATH


Expand Down Expand Up @@ -39,10 +34,16 @@ def test_dzi_creation(tmpdir_factory, post_processors):

if post_processors is None:
assert len(result.new_image_files) == 3
assert len(result.new_folders) == 1
assert (
len([f for f in result.new_image_files if f.directory is None])
== 2
)
else:
assert len(result.new_image_files) == 2
assert len(result.new_folders) == 0
assert (
len([f for f in result.new_image_files if f.directory is None])
== 2
)


def bad_post_processor(*, image_files: Set[PanImgFile]) -> PostProcessorResult:
Expand All @@ -57,17 +58,30 @@ def bad_post_processor(*, image_files: Set[PanImgFile]) -> PostProcessorResult:
for f in image_files
}

good_folders = {
PanImgFolder(image_id=f.image_id, folder=Path("foo"))
good_directories = {
PanImgFile(
image_id=f.image_id,
image_type=f.image_type,
file=Path("foo"),
directory=Path("foo_files"),
)
for f in image_files
}
bad_folders = {
PanImgFolder(image_id=uuid4(), folder=Path("foo")) for _ in image_files
bad_directories = {
PanImgFile(
image_id=uuid4(),
image_type=f.image_type,
file=Path("foo"),
directory=Path("foo_files"),
)
for f in image_files
}

return PostProcessorResult(
new_image_files=good_files | bad_files,
new_folders=good_folders | bad_folders,
new_image_files=good_files
| bad_files
| good_directories
| bad_directories,
)


Expand All @@ -82,15 +96,13 @@ def test_post_processors_are_filtered():

# The bad processor should produce twice as many outputs than inputs
assert len(image_files) == 3
assert len(raw_result.new_image_files) == 6
assert len(raw_result.new_folders) == 6
assert len(raw_result.new_image_files) == 12

# The bad results should be filtered out
result = post_process(
image_files=image_files, post_processors=[bad_post_processor]
)

assert len(result.new_image_files) == 3
assert len(result.new_folders) == 3
assert len(result.new_image_files) == 6
assert len([f for f in result.new_image_files if f.directory is None]) == 3
assert {f.image_id for f in result.new_image_files} == existing_ids
assert {f.image_id for f in result.new_folders} == existing_ids
Loading

0 comments on commit 3e3a485

Please sign in to comment.