Skip to content

Commit

Permalink
Move DICOM file sorting into DicomDataset class (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
nlessmann authored Jun 22, 2022
1 parent 98dc2b2 commit 9d5e20f
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 43 deletions.
84 changes: 43 additions & 41 deletions panimg/image_builders/dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,30 @@ def _iter_origins(self):
else:
yield np.array(file_origin, dtype=float)

def _sort_slices_by_instance_number(self):
"""
Sorts the slices of this study according to InstanceNumber.
Raises
------
ValueError
If for any slice the InstanceNumber value is missing or
not a number.
"""
if len(self.headers) == 1:
return # no need to sort if there is only a single file

try:
self.headers.sort(key=lambda x: int(x["data"].InstanceNumber))
except (TypeError, AttributeError) as e:
# InstanceNumber is missing, empty or None but is needed to sort
# the slices (could also sort by coordinates, but that is a lot
# more complicated so currently not implemented)
raise ValueError(
"Could not determine slice order "
"due to missing or invalid InstanceNumber"
) from e

def _determine_slice_order(self):
# Compute coordinate differences between successive slices
origin = None
Expand Down Expand Up @@ -323,6 +347,12 @@ def _add_temporal_metadata(self, img: SimpleITK.Image, z_order: int):
img.SetMetaData("Exposures", " ".join(exposures))

def read(self) -> SimpleITKImage:
# Sort slices by instance number, which might be result in an order
# in which slices are ordered from low to high coordinates according
# to the default DICOM coordinate system - or the other way around.
# First sort the slices, then find out which of these two options it
# is (z_order will be 1 or -1, or 0 if there is a single slice)
self._sort_slices_by_instance_number()
origin, spacing, z_order = self._determine_slice_order()

# Create ITK image from DICOM
Expand All @@ -344,41 +374,6 @@ def read(self) -> SimpleITKImage:
)


def _sort_headers_per_study(studies, file_errors):
"""
For each study, sorts the headers according to InstanceNumber.
If for any header this value is missing or not a number, the
corresponding study is removed as reading the pixel data
would then not be reliable.
Parameters
----------
studies
Dictionary of DICOM headers grouped by study (items are
modified in place)
file_errors
Dictionary in which reading errors are recorded per file
"""
ignored_studies = []
for key, study in studies.items():
if len(study["headers"]) == 1:
continue # no need to sort if there is only a single file

try:
study["headers"].sort(key=lambda x: int(x["data"].InstanceNumber))
except (TypeError, AttributeError) as e:
# InstanceNumber is missing, empty or None but is needed to sort
# the slices (could also sort by coordinates, but that is a lot
# more complicated)
for header in study["headers"]:
file_errors[header["file"]].append(format_error(str(e)))
ignored_studies.append(key)

for key in ignored_studies:
# Remove studies that would be read with messed up slice order
del studies[key]


def _get_headers_by_study(
files: Set[Path], file_errors: DefaultDict[Path, List[str]]
):
Expand All @@ -395,7 +390,7 @@ def _get_headers_by_study(
Returns
-------
A dictionary of sorted headers for all dicom image files found within path,
A dictionary of headers for all dicom image files found within path,
grouped by study id.
"""
study_key_type = Tuple[str, ...]
Expand Down Expand Up @@ -444,7 +439,6 @@ def _get_headers_by_study(
except Exception as e:
file_errors[file].append(format_error(str(e)))

_sort_headers_per_study(studies, file_errors)
return studies


Expand Down Expand Up @@ -476,13 +470,21 @@ def _find_valid_dicom_files(
if not headers:
continue

data = headers[-1]["data"]
n_files = len(headers)
n_time = int(getattr(data, "TemporalPositionIndex", 0))
n_time = max(
int(getattr(header["data"], "TemporalPositionIndex", 0))
for header in headers
)

arbitrary_header = headers[0]["data"]
try:
n_slices_per_file = len(data.PerFrameFunctionalGroupsSequence)
n_slices_per_file = len(
arbitrary_header.PerFrameFunctionalGroupsSequence
)
except AttributeError:
n_slices_per_file = int(getattr(data, "NumberOfFrames", 1))
n_slices_per_file = int(
getattr(arbitrary_header, "NumberOfFrames", 1)
)
n_slices = n_files * n_slices_per_file

if n_time < 1:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def test_get_headers_by_study():
studies = _get_headers_by_study(files, defaultdict(list))
assert len(studies) == 1
for key in studies:
assert [x["file"] for x in studies[key]["headers"]] == [
assert {x["file"] for x in studies[key]["headers"]} == {
DICOM_DIR / f"{x}.dcm" for x in range(1, 77)
]
}

for root, _, files in os.walk(RESOURCE_PATH):
files = [Path(root).joinpath(f) for f in files]
Expand Down

0 comments on commit 9d5e20f

Please sign in to comment.