From 3e3a485b315bd4cca0640d25ee870ff6ebf6a18a Mon Sep 17 00:00:00 2001 From: James Meakin <12661555+jmsmkn@users.noreply.github.com> Date: Tue, 7 Mar 2023 14:37:24 +0100 Subject: [PATCH] Remove usage of `PanImgFolder` (#93) All folders must be associated with a file (in our only case: jpegs must be associated with a DZI file). This PR removes the `PanImgFolder` model and replaces it with an optional `directory` attribute on `PanImgFile`. --- HISTORY.md | 4 ++ README.md | 8 ++-- panimg/image_builders/metaio_mhd_mha.py | 2 +- panimg/models.py | 9 +---- panimg/panimg.py | 32 +++------------- panimg/post_processors/tiff_to_dzi.py | 25 +++---------- pyproject.toml | 2 +- tests/test_dicom.py | 6 +-- tests/test_post_processors.py | 50 +++++++++++++++---------- tests/test_tiff.py | 10 ++--- tests/test_tiff_to_dzi.py | 11 +----- 11 files changed, 62 insertions(+), 97 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 825d441..2c0e496 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # History +## 0.11.0 (2023-03-07) + +* Removes `PanImgFolder` and outputs of `new_folders`, instead `directory` is added to `PanImgFile` + ## 0.10.0 (2023-03-03) * Removed support for Python 3.7 diff --git a/README.md b/README.md index 1600bfc..3a8ea37 100644 --- a/README.md +++ b/README.md @@ -24,14 +24,14 @@ Under the hood we use: ## Usage -`panimg` takes a folder and tries to convert the containing files to MHA or TIFF. +`panimg` takes a directory and tries to convert the containing files to MHA or TIFF. By default, it will try to convert files from subdirectories as well. To only convert files in the top level directory, set `recurse_subdirectories` to `False`. -It will try several strategies for loading the contained files, and if an image is found it will output it to the output folder. +It will try several strategies for loading the contained files, and if an image is found it will output it to the output directory. It will return a structure containing information about what images were produced, what images were used to form the new images, image metadata, and any errors from any of the strategies. -**NOTE: Alpha software, do not run this on folders you do not have a backup of.** +**NOTE: Alpha software, do not run this on directories you do not have a backup of.** ```python from pathlib import Path @@ -48,7 +48,7 @@ result = convert( `panimg` is also accessible from the command line. Install the package from pip as before, then you can use: -**NOTE: Alpha software, do not run this on folders you do not have a backup of.** +**NOTE: Alpha software, do not run this on directories you do not have a backup of.** ```shell panimg convert /path/to/files/ /where/files/will/go/ diff --git a/panimg/image_builders/metaio_mhd_mha.py b/panimg/image_builders/metaio_mhd_mha.py index a2d8cc3..0f148f0 100644 --- a/panimg/image_builders/metaio_mhd_mha.py +++ b/panimg/image_builders/metaio_mhd_mha.py @@ -49,7 +49,7 @@ def detect_mhd_file(headers: Dict[str, str], path: Path) -> bool: if path not in data_file_path.parents: raise ValueError( f"{element_data_file_key} references a file which is not in " - f"the uploaded data folder" + f"the uploaded data directory" ) if not data_file_path.is_file(): raise ValueError("Data container of mhd file is missing") diff --git a/panimg/models.py b/panimg/models.py index b1d7fcb..ffce979 100644 --- a/panimg/models.py +++ b/panimg/models.py @@ -163,19 +163,13 @@ class PanImgFile: image_id: UUID image_type: ImageType file: Path - - -@dataclass(frozen=True) -class PanImgFolder: - image_id: UUID - folder: Path + directory: Optional[Path] = None @dataclass class PanImgResult: new_images: Set[PanImg] new_image_files: Set[PanImgFile] - new_folders: Set[PanImgFolder] consumed_files: Set[Path] file_errors: Dict[Path, List[str]] @@ -183,7 +177,6 @@ class PanImgResult: @dataclass class PostProcessorResult: new_image_files: Set[PanImgFile] - new_folders: Set[PanImgFolder] class SimpleITKImage(BaseModel): diff --git a/panimg/panimg.py b/panimg/panimg.py index c2e6b15..32b58e9 100644 --- a/panimg/panimg.py +++ b/panimg/panimg.py @@ -5,13 +5,7 @@ from panimg.exceptions import UnconsumedFilesException from panimg.image_builders import DEFAULT_IMAGE_BUILDERS -from panimg.models import ( - PanImg, - PanImgFile, - PanImgFolder, - PanImgResult, - PostProcessorResult, -) +from panimg.models import PanImg, PanImgFile, PanImgResult, PostProcessorResult from panimg.post_processors import DEFAULT_POST_PROCESSORS from panimg.types import ImageBuilder, PostProcessor @@ -28,7 +22,6 @@ def convert( ) -> PanImgResult: new_images: Set[PanImg] = set() new_image_files: Set[PanImgFile] = set() - new_folders: Set[PanImgFolder] = set() consumed_files: Set[Path] = set() file_errors: DefaultDict[Path, List[str]] = defaultdict(list) @@ -44,7 +37,6 @@ def convert( consumed_files=consumed_files, new_images=new_images, new_image_files=new_image_files, - new_folders=new_folders, file_errors=file_errors, recurse_subdirectories=recurse_subdirectories, ) @@ -56,12 +48,10 @@ def convert( else DEFAULT_POST_PROCESSORS, ) new_image_files |= result.new_image_files - new_folders |= result.new_folders return PanImgResult( new_images=new_images, new_image_files=new_image_files, - new_folders=new_folders, consumed_files=consumed_files, file_errors=file_errors, ) @@ -75,7 +65,6 @@ def _convert_directory( consumed_files: Set[Path], new_images: Set[PanImg], new_image_files: Set[PanImgFile], - new_folders: Set[PanImgFolder], file_errors: DefaultDict[Path, List[str]], recurse_subdirectories: bool = True, ): @@ -96,7 +85,6 @@ def _convert_directory( consumed_files=consumed_files, new_images=new_images, new_image_files=new_image_files, - new_folders=new_folders, file_errors=file_errors, recurse_subdirectories=recurse_subdirectories, ) @@ -121,7 +109,6 @@ def _convert_directory( new_images |= builder_result.new_images new_image_files |= builder_result.new_image_files - new_folders |= builder_result.new_folders consumed_files |= builder_result.consumed_files if builder_result.consumed_files: @@ -160,7 +147,6 @@ def _build_files( return PanImgResult( new_images=new_images, new_image_files=new_image_files, - new_folders=set(), consumed_files=consumed_files, file_errors=file_errors, ) @@ -172,12 +158,11 @@ def post_process( """ Run a set of post processors on a set of image files - Post processors add new files and folders to existing images, + Post processors add new files and directories to existing images, such as DZI creation for TIFF images, or thumbnail generation. They do not produce new image entities. """ new_image_files: Set[PanImgFile] = set() - new_folders: Set[PanImgFolder] = set() logger.info(f"Post processing {len(image_files)} image(s)") @@ -190,19 +175,12 @@ def post_process( filtered_files = { f for f in result.new_image_files if f.image_id in existing_ids } - filtered_folders = { - f for f in result.new_folders if f.image_id in existing_ids - } excluded_files = result.new_image_files - filtered_files - excluded_folders = result.new_folders - filtered_folders - if excluded_files or excluded_folders: - logger.warning(f"Ignoring: {excluded_files} {excluded_folders}") + if excluded_files: + logger.warning(f"Ignoring: {excluded_files}") new_image_files |= filtered_files - new_folders |= filtered_folders - return PostProcessorResult( - new_image_files=new_image_files, new_folders=new_folders - ) + return PostProcessorResult(new_image_files=new_image_files) diff --git a/panimg/post_processors/tiff_to_dzi.py b/panimg/post_processors/tiff_to_dzi.py index 2815e6c..813a338 100644 --- a/panimg/post_processors/tiff_to_dzi.py +++ b/panimg/post_processors/tiff_to_dzi.py @@ -1,12 +1,7 @@ import logging from typing import Set -from panimg.models import ( - ImageType, - PanImgFile, - PanImgFolder, - PostProcessorResult, -) +from panimg.models import ImageType, PanImgFile, PostProcessorResult from panimg.settings import DZI_TILE_SIZE try: @@ -27,7 +22,6 @@ def tiff_to_dzi(*, image_files: Set[PanImgFile]) -> PostProcessorResult: ) new_image_files: Set[PanImgFile] = set() - new_folders: Set[PanImgFolder] = set() for file in image_files: if file.image_type == ImageType.TIFF: @@ -38,15 +32,12 @@ def tiff_to_dzi(*, image_files: Set[PanImgFile]) -> PostProcessorResult: continue new_image_files |= result.new_image_files - new_folders |= result.new_folders - return PostProcessorResult( - new_image_files=new_image_files, new_folders=new_folders - ) + return PostProcessorResult(new_image_files=new_image_files) def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult: - # Creates a dzi file and corresponding tiles in folder {pk}_files + # Creates a dzi file and corresponding tiles in directory {pk}_files dzi_output = tiff_file.file.parent / str(tiff_file.image_id) image = pyvips.Image.new_from_file( @@ -59,13 +50,7 @@ def _create_dzi_image(*, tiff_file: PanImgFile) -> PostProcessorResult: image_id=tiff_file.image_id, image_type=ImageType.DZI, file=(dzi_output.parent / f"{dzi_output.name}.dzi").absolute(), + directory=(dzi_output.parent / f"{dzi_output.name}_files").absolute(), ) - new_folder = PanImgFolder( - image_id=tiff_file.image_id, - folder=(dzi_output.parent / f"{dzi_output.name}_files").absolute(), - ) - - return PostProcessorResult( - new_image_files={new_file}, new_folders={new_folder} - ) + return PostProcessorResult(new_image_files={new_file}) diff --git a/pyproject.toml b/pyproject.toml index fa8b5bb..c5aa001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "panimg" -version = "0.10.0" +version = "0.11.0" description = "Conversion of medical images to MHA and TIFF." license = "Apache-2.0" authors = ["James Meakin "] diff --git a/tests/test_dicom.py b/tests/test_dicom.py index e2820f6..d0348ef 100644 --- a/tests/test_dicom.py +++ b/tests/test_dicom.py @@ -191,21 +191,21 @@ def test_image_builder_dicom_4d_enhanced(): @pytest.mark.parametrize( - "folder,element_type", + "directory,element_type", [ ("dicom_4d", "MET_SHORT"), ("dicom_intercept", "MET_FLOAT"), ("dicom_slope", "MET_FLOAT"), ], ) -def test_dicom_rescaling(folder, element_type, tmpdir): +def test_dicom_rescaling(directory, element_type, tmpdir): """ 2.dcm in dicom_intercept and dicom_slope has been modified to add a small intercept (0.01) or slope (1.001) respectively. """ files = [ Path(d[0]).joinpath(f) - for d in os.walk(RESOURCE_PATH / folder) + for d in os.walk(RESOURCE_PATH / directory) for f in d[2] ] result = _build_files( diff --git a/tests/test_post_processors.py b/tests/test_post_processors.py index ae33ee3..b74a1b6 100644 --- a/tests/test_post_processors.py +++ b/tests/test_post_processors.py @@ -6,12 +6,7 @@ import pytest from panimg import convert, post_process -from panimg.models import ( - ImageType, - PanImgFile, - PanImgFolder, - PostProcessorResult, -) +from panimg.models import ImageType, PanImgFile, PostProcessorResult from tests import RESOURCE_PATH @@ -39,10 +34,16 @@ def test_dzi_creation(tmpdir_factory, post_processors): if post_processors is None: assert len(result.new_image_files) == 3 - assert len(result.new_folders) == 1 + assert ( + len([f for f in result.new_image_files if f.directory is None]) + == 2 + ) else: assert len(result.new_image_files) == 2 - assert len(result.new_folders) == 0 + assert ( + len([f for f in result.new_image_files if f.directory is None]) + == 2 + ) def bad_post_processor(*, image_files: Set[PanImgFile]) -> PostProcessorResult: @@ -57,17 +58,30 @@ def bad_post_processor(*, image_files: Set[PanImgFile]) -> PostProcessorResult: for f in image_files } - good_folders = { - PanImgFolder(image_id=f.image_id, folder=Path("foo")) + good_directories = { + PanImgFile( + image_id=f.image_id, + image_type=f.image_type, + file=Path("foo"), + directory=Path("foo_files"), + ) for f in image_files } - bad_folders = { - PanImgFolder(image_id=uuid4(), folder=Path("foo")) for _ in image_files + bad_directories = { + PanImgFile( + image_id=uuid4(), + image_type=f.image_type, + file=Path("foo"), + directory=Path("foo_files"), + ) + for f in image_files } return PostProcessorResult( - new_image_files=good_files | bad_files, - new_folders=good_folders | bad_folders, + new_image_files=good_files + | bad_files + | good_directories + | bad_directories, ) @@ -82,15 +96,13 @@ def test_post_processors_are_filtered(): # The bad processor should produce twice as many outputs than inputs assert len(image_files) == 3 - assert len(raw_result.new_image_files) == 6 - assert len(raw_result.new_folders) == 6 + assert len(raw_result.new_image_files) == 12 # The bad results should be filtered out result = post_process( image_files=image_files, post_processors=[bad_post_processor] ) - assert len(result.new_image_files) == 3 - assert len(result.new_folders) == 3 + assert len(result.new_image_files) == 6 + assert len([f for f in result.new_image_files if f.directory is None]) == 3 assert {f.image_id for f in result.new_image_files} == existing_ids - assert {f.image_id for f in result.new_folders} == existing_ids diff --git a/tests/test_tiff.py b/tests/test_tiff.py index c679f40..6348fe1 100644 --- a/tests/test_tiff.py +++ b/tests/test_tiff.py @@ -158,7 +158,7 @@ def test_load_with_tiff( source_dir, filename, expected_error_message, tmpdir_factory ): error_message = "" - # Copy resource file to writable temp folder + # Copy resource file to writable temp directory temp_file = Path(tmpdir_factory.mktemp("temp") / filename) shutil.copy(source_dir / filename, temp_file) gc_file = GrandChallengeTiffFile(temp_file) @@ -178,7 +178,7 @@ def test_load_with_tiff( [(RESOURCE_PATH, "valid_tiff.tif"), (RESOURCE_PATH, "no_dzi.tif")], ) def test_load_with_open_slide(source_dir, filename, tmpdir_factory): - # Copy resource file to writable temp folder + # Copy resource file to writable temp directory temp_file = Path(tmpdir_factory.mktemp("temp") / filename) shutil.copy(source_dir / filename, temp_file) gc_file = GrandChallengeTiffFile(temp_file) @@ -231,7 +231,7 @@ def test_tiff_image_entry_creation( # Integration test of all features being accessed through the image builder def test_image_builder_tiff(tmpdir_factory): - # Copy resource files to writable temp folder + # Copy resource files to writable temp directory temp_dir = Path(tmpdir_factory.mktemp("temp") / "resources") output_dir = Path(tmpdir_factory.mktemp("output")) @@ -263,7 +263,7 @@ def test_image_builder_tiff(tmpdir_factory): def test_handle_complex_files(tmpdir_factory): - # Copy resource files to writable temp folder + # Copy resource files to writable temp directory temp_dir = Path(tmpdir_factory.mktemp("temp") / "resources") shutil.copytree(RESOURCE_PATH / "complex_tiff", temp_dir) files = [Path(d[0]).joinpath(f) for d in os.walk(temp_dir) for f in d[2]] @@ -327,7 +327,7 @@ def test_convert_to_tiff(resource, tmpdir_factory): def test_error_handling(tmpdir_factory): - # Copy resource files to writable temp folder + # Copy resource files to writable temp directory # The content files are dummy files and won't compile to tiff. # The point is to test the loading of gc_files and make sure all # related files are associated with the gc_file diff --git a/tests/test_tiff_to_dzi.py b/tests/test_tiff_to_dzi.py index 81d5ad3..59c93f4 100644 --- a/tests/test_tiff_to_dzi.py +++ b/tests/test_tiff_to_dzi.py @@ -30,18 +30,12 @@ def test_dzi_creation(tmpdir_factory): assert ( new_file.file == image_file.file.parent / f"{image_file.image_id}.dzi" ) - - assert len(result.new_folders) == 1 - - new_folder = result.new_folders.pop() - - assert new_folder.image_id == image_file.image_id assert ( - new_folder.folder + new_file.directory == image_file.file.parent / f"{image_file.image_id}_files" ) - assert len(list((new_folder.folder).rglob("*.jpeg"))) == 9 + assert len(list((new_file.directory).rglob("*.jpeg"))) == 9 def test_no_exception_when_failed(tmpdir_factory, caplog): @@ -57,7 +51,6 @@ def test_no_exception_when_failed(tmpdir_factory, caplog): result = tiff_to_dzi(image_files={image_file}) assert len(result.new_image_files) == 0 - assert len(result.new_folders) == 0 # The last warning should be from our logger last_log = caplog.records[-1]