Skip to content

Commit

Permalink
Return generators from builders (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmsmkn authored Mar 23, 2021
1 parent d57c19e commit 32b8e8b
Show file tree
Hide file tree
Showing 24 changed files with 580 additions and 410 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# History

## 0.2.0 (2021-03-23)

* Builders now return generators
* Added post processors

## 0.1.0 (2021-03-09)

* Initial version
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,22 @@ To customise the post processors that run you can do this with
```python
result = convert(..., post_processors=[...])
```

#### Using Strategies Directly

If you want to run a particular strategy directly which returns a generator of images for a set of files you can do this with

```python
files = {f for f in Path("/foo/").glob("*.dcm") if f.is_file()}

try:
for result in image_builder_dicom(files=files):
sitk_image = result.image
process(sitk_image) # etc. you can also look at result.name for the name of the file,
# and result.consumed_files to see what files were used for this image
except UnconsumedFilesException as e:
# e.errors is keyed with a Path to a file that could not be consumed,
# with a list of all the errors found with loading it,
# the user can then choose what to do with that information
...
```
2 changes: 1 addition & 1 deletion panimg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.0"
__version__ = "0.2.0"

from .panimg import convert

Expand Down
16 changes: 16 additions & 0 deletions panimg/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,18 @@
from pathlib import Path
from typing import Dict, List


class ValidationError(Exception):
pass


class UnconsumedFilesException(Exception):
"""
Raised on completion of an image builder and there are unconsumed files.
Contains a dictionary with a map of the errors encountered when loading
the unconsumed file.
"""

def __init__(self, *args, file_errors: Dict[Path, List[str]]):
super().__init__(*args)
self.file_errors = file_errors
9 changes: 9 additions & 0 deletions panimg/image_builders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,12 @@
image_builder_tiff,
image_builder_fallback,
]

__all__ = [
"image_builder_mhd",
"image_builder_nifti",
"image_builder_dicom",
"image_builder_tiff",
"image_builder_fallback",
"DEFAULT_IMAGE_BUILDERS",
]
73 changes: 26 additions & 47 deletions panimg/image_builders/dicom.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from collections import defaultdict, namedtuple
from math import isclose
from pathlib import Path
from typing import Set
from typing import DefaultDict, Iterator, List, Set

import SimpleITK
import numpy as np
import pydicom

from panimg.image_builders.utils import convert_itk_to_internal
from panimg.models import PanImgFile, PanImgResult
from panimg.exceptions import UnconsumedFilesException
from panimg.models import SimpleITKImage

NUMPY_IMAGE_TYPES = {
"character": SimpleITK.sitkUInt8,
Expand Down Expand Up @@ -46,7 +46,7 @@ def pixel_data_reached(tag, vr, length):
return pydicom.datadict.keyword_for_tag(tag) == "PixelData"


def _get_headers_by_study(files):
def _get_headers_by_study(files, file_errors):
"""
Gets all headers from dicom files found in path.
Expand All @@ -62,7 +62,6 @@ def _get_headers_by_study(files):
grouped by study id.
"""
studies = {}
errors = defaultdict(list)
indices = {}

for file in files:
Expand Down Expand Up @@ -91,20 +90,22 @@ def _get_headers_by_study(files):
studies[key]["index"] = index
studies[key]["headers"] = headers
except Exception as e:
errors[file].append(format_error(str(e)))
file_errors[file].append(format_error(str(e)))

for key in studies:
studies[key]["headers"].sort(
key=lambda x: int(x["data"].InstanceNumber)
)
return studies, errors
return studies


def format_error(message: str) -> str:
return f"Dicom image builder: {message}"


def _validate_dicom_files(files: Set[Path]):
def _validate_dicom_files(
files: Set[Path], file_errors: DefaultDict[Path, List[str]]
):
"""
Gets the headers for all dicom files on path and validates them.
Expand All @@ -123,7 +124,7 @@ def _validate_dicom_files(files: Set[Path]):
Any study with an inconsistent amount of slices per time point is discarded.
"""
studies, errors = _get_headers_by_study(files)
studies = _get_headers_by_study(files=files, file_errors=file_errors)
result = []
dicom_dataset = namedtuple(
"dicom_dataset", ["headers", "n_time", "n_slices", "index"]
Expand All @@ -149,7 +150,7 @@ def _validate_dicom_files(files: Set[Path]):
continue
if len(headers) % n_time > 0:
for d in headers:
errors[d["file"]].append(
file_errors[d["file"]].append(
format_error("Number of slices per time point differs")
)
continue
Expand All @@ -160,7 +161,7 @@ def _validate_dicom_files(files: Set[Path]):
)
)
del studies
return result, errors
return result


def _extract_direction(dicom_ds, direction):
Expand All @@ -177,9 +178,7 @@ def _extract_direction(dicom_ds, direction):
return direction


def _process_dicom_file( # noqa: C901
*, dicom_ds, created_image_prefix, output_directory
):
def _process_dicom_file(*, dicom_ds): # noqa: C901
ref_file = pydicom.dcmread(str(dicom_ds.headers[0]["file"]))
ref_origin = tuple(
float(i) for i in getattr(ref_file, "ImagePositionPatient", (0, 0, 0))
Expand Down Expand Up @@ -256,15 +255,13 @@ def _process_dicom_file( # noqa: C901
if getattr(ref_file, f, False):
img.SetMetaData(f, str(getattr(ref_file, f)))

# Convert the SimpleITK image to our internal representation
return convert_itk_to_internal(
simple_itk_image=img,
return SimpleITKImage(
image=img,
name=(
f"{created_image_prefix}"
f"-{dicom_ds.headers[0]['data'].StudyInstanceUID}"
f"-{dicom_ds.index}"
f"{dicom_ds.headers[0]['data'].StudyInstanceUID}-{dicom_ds.index}"
),
output_directory=output_directory,
consumed_files={d["file"] for d in dicom_ds.headers},
spacing_valid=True,
)


Expand Down Expand Up @@ -337,13 +334,7 @@ def _create_itk_from_dcm(
return img


def image_builder_dicom(
*,
files: Set[Path],
output_directory: Path,
created_image_prefix: str = "",
**_,
) -> PanImgResult:
def image_builder_dicom(*, files: Set[Path]) -> Iterator[SimpleITKImage]:
"""
Constructs image objects by inspecting files in a directory.
Expand All @@ -361,28 +352,16 @@ def image_builder_dicom(
- a list files associated with the detected images
- path->error message map describing what is wrong with a given file
"""
studies, file_errors = _validate_dicom_files(files)
new_images = set()
new_image_files: Set[PanImgFile] = set()
consumed_files: Set[Path] = set()
file_errors: DefaultDict[Path, List[str]] = defaultdict(list)

studies = _validate_dicom_files(files=files, file_errors=file_errors)

for dicom_ds in studies:
try:
n_image, n_image_files = _process_dicom_file(
dicom_ds=dicom_ds,
created_image_prefix=created_image_prefix,
output_directory=output_directory,
)
new_images.add(n_image)
new_image_files |= set(n_image_files)
consumed_files |= {d["file"] for d in dicom_ds.headers}
yield _process_dicom_file(dicom_ds=dicom_ds)
except Exception as e:
for d in dicom_ds.headers:
file_errors[d["file"]].append(format_error(str(e)))

return PanImgResult(
consumed_files=consumed_files,
file_errors=file_errors,
new_images=new_images,
new_image_files=new_image_files,
new_folders=set(),
)
if file_errors:
raise UnconsumedFilesException(file_errors=file_errors)
40 changes: 14 additions & 26 deletions panimg/image_builders/fallback.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Set
from typing import DefaultDict, Iterator, List, Set

import SimpleITK
import numpy as np
from PIL import Image
from PIL.Image import DecompressionBombError

from panimg.exceptions import ValidationError
from panimg.image_builders.utils import convert_itk_to_internal
from panimg.models import PanImgFile, PanImgResult
from panimg.exceptions import UnconsumedFilesException, ValidationError
from panimg.models import SimpleITKImage


def format_error(message: str) -> str:
return f"Fallback image builder: {message}"


def image_builder_fallback(
*, files: Set[Path], output_directory: Path, **_
) -> PanImgResult:
def image_builder_fallback(*, files: Set[Path]) -> Iterator[SimpleITKImage]:
"""
Constructs image objects by inspecting files in a directory.
Expand All @@ -36,10 +33,8 @@ def image_builder_fallback(
- a list files associated with the detected images
- path->error message map describing what is wrong with a given file
"""
errors: Dict[Path, List[str]] = defaultdict(list)
new_images = set()
new_image_files: Set[PanImgFile] = set()
consumed_files = set()
file_errors: DefaultDict[Path, List[str]] = defaultdict(list)

for file in files:
try:
img = Image.open(file)
Expand All @@ -52,22 +47,15 @@ def image_builder_fallback(
img_array = np.array(img)
is_vector = img.mode != "L"
img = SimpleITK.GetImageFromArray(img_array, isVector=is_vector)
n_image, n_image_files = convert_itk_to_internal(
simple_itk_image=img,

yield SimpleITKImage(
image=img,
name=file.name,
use_spacing=False,
output_directory=output_directory,
consumed_files={file},
spacing_valid=False,
)
new_images.add(n_image)
new_image_files |= set(n_image_files)
consumed_files.add(file)
except (OSError, ValidationError, DecompressionBombError):
errors[file].append(format_error("Not a valid image file"))
file_errors[file].append(format_error("Not a valid image file"))

return PanImgResult(
consumed_files=consumed_files,
file_errors=errors,
new_images=new_images,
new_image_files=new_image_files,
new_folders=set(),
)
if file_errors:
raise UnconsumedFilesException(file_errors=file_errors)
Loading

0 comments on commit 32b8e8b

Please sign in to comment.