From d25b3988f26964fd0aa451680667ded274b6bb43 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sun, 27 Jun 2021 19:10:11 +0100 Subject: [PATCH 01/12] Add lazy::record module Impl detail for recording the positions of all elements in advance. --- object/src/lazy/mod.rs | 1 + object/src/lazy/record.rs | 471 ++++++++++++++++++++++++++++++++++++++ object/src/lib.rs | 1 + 3 files changed, 473 insertions(+) create mode 100644 object/src/lazy/mod.rs create mode 100644 object/src/lazy/record.rs diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs new file mode 100644 index 000000000..1f46c7356 --- /dev/null +++ b/object/src/lazy/mod.rs @@ -0,0 +1 @@ +mod record; diff --git a/object/src/lazy/record.rs b/object/src/lazy/record.rs new file mode 100644 index 000000000..68855172b --- /dev/null +++ b/object/src/lazy/record.rs @@ -0,0 +1,471 @@ +//! Data structures and algorithms for DICOM data set record tables. +//! +//! A complete table of element records +//! (with some meta-information and byte positions) +//! can be obtained from a parser +//! by creating a [`DataSetTableBuilder`] +//! and invoking [`update`] on each token. +//! +//! [`update`]: DataSetTableBuilder::update +//! + +use std::{collections::BTreeMap, iter::FromIterator}; + +use dicom_core::{value::C, DataDictionary, DataElementHeader, Length, Tag}; +use dicom_parser::{ + dataset::{lazy_read::LazyDataSetReader, LazyDataToken}, + StatefulDecode, +}; + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct DataSetTable { + table: BTreeMap, +} + +impl FromIterator for DataSetTable { + fn from_iter>(iter: T) -> Self { + DataSetTable { + table: iter + .into_iter() + .map(|record| (record.tag(), record)) + .collect(), + } + } +} + +impl DataSetTable { + pub fn new() -> Self { + Self::default() + } + + pub fn by_tag(&self, tag: Tag) -> Option<&DataSetRecord> { + self.table.get(&tag) + } +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct DataSetTableBuilder { + records: Vec, + /// current amount of data set nesting. + /// 0 means push new elements to `table`, + /// 1 or more means push them to last record at the given depth + depth: u32, + last_header: Option, +} + +impl DataSetTableBuilder { + pub fn new() -> Self { + Self::default() + } + + pub fn update(&mut self, token: &LazyDataToken) + where + D: StatefulDecode, + { + match token { + LazyDataToken::ElementHeader(..) => { + // no-op + } + LazyDataToken::LazyValue { header, decoder } => { + // record element header and position into table + let records = self.records_at(self.depth); + records.push(DataSetRecordBuilder::Element { + header: *header, + position: decoder.position(), + }) + } + LazyDataToken::SequenceStart { tag, len } => { + // add depth, create empty sequence record + let records = self.records_at(self.depth); + records.push(DataSetRecordBuilder::Sequence { + tag: *tag, + length: *len, + items: vec![], + }); + self.depth += 1; + } + LazyDataToken::ItemStart { len } => { + // create new item at record + match self.last_record_at(self.depth) { + DataSetRecordBuilder::Sequence { items, .. } => { + items.push(Default::default()); + } + DataSetRecordBuilder::PixelSequence { fragment_positions } => { + // record position if length is 0 + // (because then we have no LazyItemValue + // and the position must be recorded anyway) + if *len == Length(0) { + // Note: because the position cannot be identified from here, + // we place an arbitrary value with the assumption + // that the zero length will be checked beforehand + // and that no read is actually attempted. + fragment_positions.push(None); + } + } + _ => unreachable!("Unexpected record type"), + } + } + LazyDataToken::SequenceEnd => { + // remove depth + self.depth -= 1; + } + LazyDataToken::PixelSequenceStart => { + // create new empty pixel sequence record + let records = self.records_at(self.depth); + records.push(DataSetRecordBuilder::PixelSequence { + fragment_positions: Default::default(), + }); + self.depth += 1; + } + LazyDataToken::LazyItemValue { len: _, decoder } => { + // update pixel sequence record + match self.last_record_at(self.depth) { + DataSetRecordBuilder::PixelSequence { fragment_positions } => { + // record and push position + fragment_positions.push(Some(decoder.position())); + } + _ => unreachable!("Unexpected record type"), + } + } + LazyDataToken::ItemEnd => { + // no-op + } + _ => unreachable!("unsupported token variant"), + } + } + + pub fn build(self) -> DataSetTable { + DataSetTable::from_iter(self.records.into_iter().map(DataSetRecordBuilder::build)) + } + + fn records_at(&mut self, depth: u32) -> &mut Vec { + let mut records = &mut self.records; + + for i in 0..depth { + // go in self.depth times + if let Some(DataSetRecordBuilder::Sequence { items, .. }) = records.last_mut() { + if let Some(item) = items.last_mut() { + records = &mut item.records; + } else { + unreachable!("last record at depth {} does not have any items", i); + } + } else { + unreachable!("last record at depth {} is not a sequence", i); + } + } + records + } + + fn last_record_at(&mut self, depth: u32) -> &mut DataSetRecordBuilder { + let mut records = &mut self.records; + + for _ in 1..depth { + match records.last_mut().expect("missing record") { + DataSetRecordBuilder::Sequence { items, .. } => { + let item = items.last_mut().unwrap(); + records = &mut item.records; + } + _ => unreachable!(), + } + } + + records.last_mut().expect("missing last record") + } +} + +/// A record of value positions on a persisted DICOM data set. +#[derive(Debug, Clone, PartialEq)] +pub enum DataSetRecord { + /// Primitive data element + Element { + /// data element header + header: DataElementHeader, + /// the byte position of the value + position: u64, + }, + /// Data element sequence + Sequence { + /// sequence element tag + tag: Tag, + /// the length according to the persisted data set + length: Length, + items: Vec, + }, + /// Encapsulated pixel sequence + PixelSequence { + /// the byte positions of each fragment in order + /// (the first fragment is the offset table), + /// `None` if the fragment is empty + fragment_positions: C>, + }, +} + +impl DataSetRecord { + pub fn tag(&self) -> Tag { + match self { + DataSetRecord::Element { header, .. } => header.tag, + DataSetRecord::Sequence { tag, .. } => *tag, + DataSetRecord::PixelSequence { .. } => Tag(0x7FE0, 0x0010), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum DataSetRecordBuilder { + /// Primitive data element + Element { + /// data element header + header: DataElementHeader, + /// the byte position of the value + position: u64, + }, + /// Data element sequence + Sequence { + /// sequence element tag + tag: Tag, + /// the length according to the persisted data set + length: Length, + items: Vec, + }, + /// Encapsulated pixel sequence + PixelSequence { + /// the byte positions of each fragment in order, + /// `None` if the fragment is empty. + fragment_positions: C>, + }, +} + +impl DataSetRecordBuilder { + pub fn build(self) -> DataSetRecord { + match self { + DataSetRecordBuilder::Element { header, position } => { + DataSetRecord::Element { header, position } + } + DataSetRecordBuilder::Sequence { tag, length, items } => DataSetRecord::Sequence { + tag, + length, + items: items.into_iter().map(DataSetTableBuilder::build).collect(), + }, + DataSetRecordBuilder::PixelSequence { fragment_positions } => { + DataSetRecord::PixelSequence { fragment_positions } + } + } + } +} + +/// A lazy data set reader which updates a data set table builder +/// as it fetches new tokens. +/// +/// It still uses [`LazyDataSetReader`][1] as its underlying implementation. +/// +/// [1]: dicom_parser::dataset::lazy_read::LazyDataSetReader +#[derive(Debug)] +pub struct RecordBuildingDataSetReader<'a, S, D> { + builder: &'a mut DataSetTableBuilder, + reader: LazyDataSetReader, +} + +impl<'a, S, D> RecordBuildingDataSetReader<'a, S, D> +where + S: StatefulDecode, + D: DataDictionary, +{ + pub fn new(reader: LazyDataSetReader, builder: &'a mut DataSetTableBuilder) -> Self { + RecordBuildingDataSetReader { builder, reader } + } + + /** Advance and retrieve the next DICOM data token. + * + * If a token is obtained, + * the referenced builder is automatically updated. + * + * **Note:** For the data set to be successfully parsed, + * the resulting data tokens needs to be consumed + * if they are of a value type. + */ + pub fn next( + &mut self, + ) -> Option>> { + match self.reader.next() { + Some(Ok(token)) => { + self.builder.update(&token); + Some(Ok(token)) + } + e @ Some(Err(_)) => e, + None => None, + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Read; + + use dicom_core::{DataElementHeader, Length, Tag, VR}; + use dicom_encoding::{ + decode::{basic::LittleEndianBasicDecoder, explicit_le::ExplicitVRLittleEndianDecoder}, + text::DefaultCharacterSetCodec, + }; + use dicom_parser::{dataset::lazy_read::LazyDataSetReader, StatefulDecoder}; + + use crate::lazy::record::{DataSetRecord, DataSetTable}; + + use super::DataSetTableBuilder; + + fn validate_create_table_explicit_vr(source: R, gt: &DataSetTable) + where + R: Read, + { + let stateful_decoder = StatefulDecoder::new( + source, + ExplicitVRLittleEndianDecoder::default(), + LittleEndianBasicDecoder::default(), + Box::new(DefaultCharacterSetCodec::default()) as Box<_>, + ); + + let mut dataset_reader = LazyDataSetReader::new(stateful_decoder); + + let mut b = DataSetTableBuilder::new(); + + while let Some(token) = dataset_reader.next() { + let token = token.unwrap(); + b.update(&token); + token.skip().unwrap(); + } + + let table = b.build(); + + assert_eq!(&table, gt); + } + + #[test] + fn lazy_record_from_sequence_explicit() { + #[rustfmt::skip] + static DATA: &[u8] = &[ + 0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions + b'S', b'Q', // VR + 0x00, 0x00, // reserved + 0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2) + // -- 12 -- + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2) + // -- 20 -- + 0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1 + // -- 30 -- + 0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2 + // -- 40 -- + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1) + // -- 48 -- + 0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4 + // -- 58 -- + 0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4 + b'T', b'E', b'S', b'T', // value = "TEST" + ]; + + let sequence_record: DataSetRecord = DataSetRecord::Sequence { + tag: Tag(0x0018, 0x6011), + length: Length(46), + items: vec![ + vec![ + DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0018, 0x6012), + vr: VR::US, + len: Length(2), + }, + position: 28, + }, + DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0018, 0x6014), + vr: VR::US, + len: Length(2), + }, + position: 38, + }, + ] + .into_iter() + .collect(), + vec![DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0018, 0x6012), + vr: VR::US, + len: Length(2), + }, + position: 56, + }] + .into_iter() + .collect(), + ], + }; + + let ground_truth: DataSetTable = vec![ + sequence_record, + DataSetRecord::Element { + header: DataElementHeader { + tag: Tag(0x0020, 0x4000), + vr: VR::LT, + len: Length(4), + }, + position: 66, + }, + ] + .into_iter() + .collect(); + + validate_create_table_explicit_vr(DATA, &ground_truth); + } + + #[test] + fn lazy_record_from_encapsulated_pixel_data() { + #[rustfmt::skip] + static DATA: &[u8] = &[ + 0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData + b'O', b'B', // VR + 0x00, 0x00, // reserved + 0xff, 0xff, 0xff, 0xff, // length: undefined + // -- 12 -- Pixel Item 0: empty offset table + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x00, 0x00, 0x00, 0x00, // item length: 0 + // -- 20 -- First fragment of pixel data + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x20, 0x00, 0x00, 0x00, // item length: 32 + // -- 28 -- Pixel Item 1: Compressed Fragment + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + // -- 60 -- Second fragment of pixel data + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x10, 0x00, 0x00, 0x00, // item length: 16 + // -- 68 -- Pixel Item 2: Compressed Fragment + 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, + 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, + // -- 84 -- End of pixel data + 0xfe, 0xff, 0xdd, 0xe0, // sequence end tag + 0x00, 0x00, 0x00, 0x00, + // -- 92 -- padding + 0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding + b'O', b'B', // VR + 0x00, 0x00, // reserved + 0x08, 0x00, 0x00, 0x00, // length: 8 + // -- 104 -- + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + + let ground_truth = vec![ + DataSetRecord::PixelSequence { + fragment_positions: smallvec::smallvec![None, Some(28), Some(68)], + }, + DataSetRecord::Element { + header: DataElementHeader::new(Tag(0xFFFC, 0xFFFC), VR::OB, Length(8)), + position: 104, + }, + ] + .into_iter() + .collect(); + + validate_create_table_explicit_vr(DATA, &ground_truth); + } +} diff --git a/object/src/lib.rs b/object/src/lib.rs index 4b003a921..9e916da1a 100644 --- a/object/src/lib.rs +++ b/object/src/lib.rs @@ -108,6 +108,7 @@ //! # run().unwrap(); //! ``` pub mod file; +pub mod lazy; pub mod mem; pub mod meta; #[deprecated( From ee7b4e3b0dfef21257e6b090c5f30d010d334828 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sun, 27 Jun 2021 19:10:30 +0100 Subject: [PATCH 02/12] [parser] simplify DataSetReader::skip --- parser/src/dataset/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parser/src/dataset/mod.rs b/parser/src/dataset/mod.rs index 6808217f0..828a01f03 100644 --- a/parser/src/dataset/mod.rs +++ b/parser/src/dataset/mod.rs @@ -200,14 +200,14 @@ impl LazyDataToken where D: decode::StatefulDecode, { - pub fn skip(self) -> Result<()> { + pub fn skip(self) -> crate::stateful::decode::Result<()> { match self { LazyDataToken::LazyValue { header, mut decoder, - } => decoder.skip_bytes(header.len.0).context(SkipValueSnafu), + } => decoder.skip_bytes(header.len.0), LazyDataToken::LazyItemValue { len, mut decoder } => { - decoder.skip_bytes(len).context(SkipValueSnafu) + decoder.skip_bytes(len) } _ => Ok(()), // do nothing } From bac008784624e87f1c649e8e4c11e65e355b2cda Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sun, 27 Jun 2021 19:11:09 +0100 Subject: [PATCH 03/12] Extend test case in dataset::lazy_read - include more pixel data fragments --- parser/src/dataset/lazy_read.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/parser/src/dataset/lazy_read.rs b/parser/src/dataset/lazy_read.rs index f7fc56d38..d2c81b76d 100644 --- a/parser/src/dataset/lazy_read.rs +++ b/parser/src/dataset/lazy_read.rs @@ -910,14 +910,21 @@ mod tests { 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, - // -- 60 -- End of pixel data + // -- 64 -- Second fragment of pixel data + 0xfe, 0xff, 0x00, 0xe0, // item start tag + 0x10, 0x00, 0x00, 0x00, // item length: 16 + // -- 72 -- Compressed Fragment + 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, + 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, + // -- 88 -- End of pixel data 0xfe, 0xff, 0xdd, 0xe0, // sequence end tag 0x00, 0x00, 0x00, 0x00, - // -- 68 -- padding + // -- 96 -- padding 0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding b'O', b'B', // VR 0x00, 0x00, // reserved 0x08, 0x00, 0x00, 0x00, // length: 8 + // -- 108 -- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; @@ -929,6 +936,9 @@ mod tests { DataToken::ItemStart { len: Length(32) }, DataToken::ItemValue(vec![0x99; 32]), DataToken::ItemEnd, + DataToken::ItemStart { len: Length(16) }, + DataToken::ItemValue(vec![0xbb; 16]), + DataToken::ItemEnd, DataToken::SequenceEnd, DataToken::ElementHeader(DataElementHeader::new( Tag(0xfffc, 0xfffc), From 4d24e40116669cc50d8f7e76379f5b11c791688b Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sun, 18 Jul 2021 10:58:35 +0100 Subject: [PATCH 04/12] [parser] add LazyDataSetReader.into_token - tweak docs --- parser/src/dataset/lazy_read.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/parser/src/dataset/lazy_read.rs b/parser/src/dataset/lazy_read.rs index d2c81b76d..b895a6bd7 100644 --- a/parser/src/dataset/lazy_read.rs +++ b/parser/src/dataset/lazy_read.rs @@ -210,12 +210,16 @@ where }) } - /** Advance and retrieve the next DICOM data token. - * - * **Note:** For the data set to be successfully parsed, - * the resulting data tokens needs to be consumed - * if they are of a value type. - */ + /// Retrieve the inner stateful decoder from this data set reader. + pub fn into_decoder(self) -> S { + self.parser + } + + /// Advance and retrieve the next DICOM data token. + /// + /// **Note:** For the data set to be successfully parsed, + /// the resulting data tokens needs to be consumed + /// if they are of a value type. pub fn next(&mut self) -> Option>> { if self.hard_break { return None; From 04cccffa4a21c7be43c8f4bdb6e8683eb4c9c070 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sun, 18 Jul 2021 10:59:10 +0100 Subject: [PATCH 05/12] [object] aadd LazyDataSetReader.into_inner --- object/src/lazy/record.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/object/src/lazy/record.rs b/object/src/lazy/record.rs index 68855172b..6e1e71735 100644 --- a/object/src/lazy/record.rs +++ b/object/src/lazy/record.rs @@ -274,6 +274,10 @@ where RecordBuildingDataSetReader { builder, reader } } + pub fn into_inner(self) -> LazyDataSetReader { + self.reader + } + /** Advance and retrieve the next DICOM data token. * * If a token is obtained, From 1c80177bbd808886e9ab451e7b2c4c45b4b642bc Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sun, 25 Jul 2021 11:28:13 +0100 Subject: [PATCH 06/12] [parser] rename LazyDataSetReader.next to advance - [object] rename accordingly at lazy::record --- object/src/lazy/record.rs | 6 +++--- parser/src/dataset/lazy_read.rs | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/object/src/lazy/record.rs b/object/src/lazy/record.rs index 6e1e71735..2f48b6290 100644 --- a/object/src/lazy/record.rs +++ b/object/src/lazy/record.rs @@ -287,10 +287,10 @@ where * the resulting data tokens needs to be consumed * if they are of a value type. */ - pub fn next( + pub fn advance( &mut self, ) -> Option>> { - match self.reader.next() { + match self.reader.advance() { Some(Ok(token)) => { self.builder.update(&token); Some(Ok(token)) @@ -331,7 +331,7 @@ mod tests { let mut b = DataSetTableBuilder::new(); - while let Some(token) = dataset_reader.next() { + while let Some(token) = dataset_reader.advance() { let token = token.unwrap(); b.update(&token); token.skip().unwrap(); diff --git a/parser/src/dataset/lazy_read.rs b/parser/src/dataset/lazy_read.rs index b895a6bd7..c92cbbcd1 100644 --- a/parser/src/dataset/lazy_read.rs +++ b/parser/src/dataset/lazy_read.rs @@ -220,7 +220,7 @@ where /// **Note:** For the data set to be successfully parsed, /// the resulting data tokens needs to be consumed /// if they are of a value type. - pub fn next(&mut self) -> Option>> { + pub fn advance(&mut self) -> Option>> { if self.hard_break { return None; } @@ -482,7 +482,7 @@ mod tests { let mut dset_reader = LazyDataSetReader::new(parser); let mut gt_iter = ground_truth.into_iter(); - while let Some(res) = dset_reader.next() { + while let Some(res) = dset_reader.advance() { let gt_token = gt_iter.next().expect("ground truth is shorter"); let token = res.expect("should parse without an error"); let token = token.into_owned().unwrap(); @@ -1039,7 +1039,7 @@ mod tests { let mut dset_reader = LazyDataSetReader::new(parser); let mut gt_iter = ground_truth.into_iter(); - while let Some(res) = dset_reader.next() { + while let Some(res) = dset_reader.advance() { let token = res.expect("should parse without an error"); let gt_token = gt_iter.next().expect("ground truth is shorter"); match token { @@ -1092,7 +1092,7 @@ mod tests { let mut dset_reader = LazyDataSetReader::new(parser); let token = dset_reader - .next() + .advance() .expect("Expected token 1") .expect("Failed to read token 1"); @@ -1104,7 +1104,7 @@ mod tests { }; let token = dset_reader - .next() + .advance() .expect("Expected token 2") .expect("Failed to read token 2"); @@ -1124,7 +1124,7 @@ mod tests { ); let token = dset_reader - .next() + .advance() .expect("Expected token 3") .expect("Failed to read token 3"); @@ -1136,7 +1136,7 @@ mod tests { }; let token = dset_reader - .next() + .advance() .expect("Expected token 4") .expect("Failed to read token 4"); @@ -1156,7 +1156,7 @@ mod tests { ); assert!( - dset_reader.next().is_none(), + dset_reader.advance().is_none(), "unexpected number of tokens remaining" ); } From d26cdf64479e6fe288567081daddc3ca24b28e7e Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 30 Jul 2021 14:39:21 +0100 Subject: [PATCH 07/12] [object] lazy element module base impl Ongoing implementation of lazy DICOM data elements --- object/src/lazy/element.rs | 254 ++++++++++++++++ object/src/lazy/mod.rs | 575 ++++++++++++++++++++++++++++++++++++- 2 files changed, 828 insertions(+), 1 deletion(-) create mode 100644 object/src/lazy/element.rs diff --git a/object/src/lazy/element.rs b/object/src/lazy/element.rs new file mode 100644 index 000000000..1bd0642c9 --- /dev/null +++ b/object/src/lazy/element.rs @@ -0,0 +1,254 @@ +use std::collections::BTreeMap; + +use super::{PositionToValue as PositionToValueSnafu, ReadValue as ReadValueSnafu}; +use dicom_core::{header::HasLength, DataElementHeader, DicomValue, Length, Tag}; +use dicom_dictionary_std::StandardDataDictionary; +use dicom_parser::StatefulDecode; +use snafu::ResultExt; + +use crate::{InMemDicomObject, mem::InMemFragment, util::ReadSeek}; + +type Result = std::result::Result; + +/// A lazy element, which may be loaded in memory or not. +#[derive(Debug, Clone)] +pub struct LazyElement { + header: DataElementHeader, + position: u64, + value: MaybeValue, +} + +impl LazyElement { + /// Create a new lazy element with the given properties, + /// without loading its value in memory. + pub fn new_unloaded(header: DataElementHeader, position: u64) -> Self { + LazyElement { + header, + position, + value: MaybeValue::Unloaded, + } + } + + /// Create a new lazy element with the given properties, + /// already loaded with an in-memory value. + pub fn new_loaded(header: DataElementHeader, position: u64, value: LoadedValue) -> Self { + LazyElement { + header, + position, + value: MaybeValue::Loaded { + value, + dirty: false, + }, + } + } + + /// Ensure that the value is loaded in memory, + /// fetching it from the given source if necessary. + /// + /// The operation is a no-op if the value is already loaded. + pub fn load(&mut self, mut source: S) -> Result<()> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + match self.value { + MaybeValue::Loaded { .. } => Ok(()), + MaybeValue::Unloaded => { + source.seek(self.position).context(PositionToValueSnafu)?; + let value = source + .read_value_preserved(&self.header) + .context(ReadValueSnafu)?; + self.value = MaybeValue::Loaded { + value: DicomValue::from(value), + dirty: false, + }; + Ok(()) + } + } + } +} + +/// A DICOM value which may be loaded in memory or not. +/// +/// Loading the value can only be done through the respective [`LazyElement`]. +/// +#[derive(Debug, Clone)] +pub enum MaybeValue { + Loaded { value: LoadedValue, dirty: bool }, + Unloaded, +} + +impl MaybeValue { + /// Return a reference to the loaded value, + /// or `None` if the value is not loaded. + pub fn value(&self) -> Option<&LoadedValue> { + match self { + MaybeValue::Loaded { value, .. } => Some(value), + MaybeValue::Unloaded => None, + } + } + + pub fn is_loaded(&self) -> bool { + match self { + MaybeValue::Loaded { .. } => true, + MaybeValue::Unloaded => false, + } + } +} + +pub type LoadedValue = DicomValue, InMemFragment>; + +/// A DICOM object nested in a lazy DICOM object. +/// +/// The type parameter `S` represents the borrowed stateful reader, +/// implementing `StatefulDecode`. +/// `D` is for the element dictionary. +#[derive(Debug, Clone)] +pub struct LazyNestedObject { + /// the element dictionary + entries: BTreeMap>, + /// the data attribute dictionary + dict: D, + /// The length of the DICOM object in bytes. + /// It is usually undefined, unless it is part of an item + /// in a sequence with a specified length in its item header. + len: Length, +} + +impl HasLength for LazyNestedObject { + fn length(&self) -> Length { + self.len + } +} + +impl LazyNestedObject { + /// Load each element in the object. + pub fn load(&mut self, mut source: S) -> Result<()> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + for (_tag, elem) in &mut self.entries { + elem.load(&mut source)?; + } + Ok(()) + } + + /// Load each element in the object and turn it into an. + pub fn into_mem(self, mut source: S) -> Result> { + todo!() + } +} + +#[cfg(test)] +mod tests { + use byteordered::Endianness; + use dicom_core::DataElementHeader; + use dicom_core::DicomValue; + use dicom_core::Length; + use dicom_core::PrimitiveValue; + use dicom_core::Tag; + use dicom_core::VR; + use dicom_dictionary_std::StandardDataDictionary; + use dicom_encoding::decode::basic::BasicDecoder; + use dicom_encoding::decode::explicit_le::ExplicitVRLittleEndianDecoder; + use dicom_encoding::decode::implicit_le::ImplicitVRLittleEndianDecoder; + use dicom_encoding::text::DefaultCharacterSetCodec; + use dicom_parser::StatefulDecode; + use dicom_parser::StatefulDecoder; + + use crate::InMemDicomObject; + use crate::mem::InMemElement; + + use super::LazyElement; + use super::LazyNestedObject; + use super::MaybeValue; + + #[test] + fn lazy_element_single() { + let data_in = [ + 0x10, 0x00, 0x10, 0x00, // Tag(0x0010, 0x0010) + 0x08, 0x00, 0x00, 0x00, // Length: 8 + b'D', b'o', b'e', b'^', b'J', b'o', b'h', b'n', + ]; + + // Create a stateful reader for the data + let decoder = ImplicitVRLittleEndianDecoder::default(); + let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let mut cursor = std::io::Cursor::new(data_in); + let mut parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + // Create an unloaded lazy element (actual value starts at 8) + let mut lazy_element: LazyElement = LazyElement { + header: DataElementHeader::new(Tag(0x0010, 0x0010), VR::PN, Length(8)), + position: 8, + value: MaybeValue::Unloaded, + }; + + // Load the lazy element + lazy_element + .load(&mut parser) + .expect("Failed to load lazy element"); + match lazy_element.value { + MaybeValue::Unloaded => panic!("element should be loaded"), + MaybeValue::Loaded { value, dirty } => { + assert_eq!(value.to_clean_str().unwrap(), "Doe^John"); + assert_eq!(dirty, false); + } + } + } + + #[test] + fn lazy_element_somewhere_in_middle() { + let data_in = [ + // 30 bytes of irrelevant data + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30 + // actual element is here + 0x10, 0x00, 0x10, 0x00, // Tag(0x0010, 0x0010) + 0x08, 0x00, 0x00, 0x00, // Length: 8 + b'D', b'o', b'e', b'^', b'J', b'o', b'h', b'n', + // 10 more bytes of irrelevant data (@ 46) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 66 + ]; + + // Create a stateful reader for the data + let decoder = ImplicitVRLittleEndianDecoder::default(); + let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let mut cursor = std::io::Cursor::new(data_in); + let mut parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + // move cursor to the end (simulating a full file read) + parser.seek(66).expect("Failed to seek to end of file"); + + // Create an unloaded lazy element + let mut lazy_element: LazyElement = LazyElement { + header: DataElementHeader::new(Tag(0x0010, 0x0010), VR::PN, Length(8)), + position: 38, + value: MaybeValue::Unloaded, + }; + + // Load the lazy element + lazy_element + .load(&mut parser) + .expect("Failed to load lazy element"); + match lazy_element.value { + MaybeValue::Unloaded => panic!("element should be loaded"), + MaybeValue::Loaded { value, dirty } => { + assert_eq!(value.to_clean_str().unwrap(), "Doe^John"); + assert_eq!(dirty, false); + } + } + } +} diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs index 1f46c7356..23ce02b1b 100644 --- a/object/src/lazy/mod.rs +++ b/object/src/lazy/mod.rs @@ -1 +1,574 @@ -mod record; +//! This module contains the implementation for a lazily evaluated DICOM object. +//! +//! In a lazy DICOM object, larger DICOM elements +//! may be skipped during the decoding process, +//! and thus not be immediately available in memory. +//! A pointer to the original data source is kept for future access, +//! so that the element is fetched and its value is decoded on demand. + +use dicom_transfer_syntax_registry::TransferSyntaxRegistry; +use smallvec::SmallVec; +use std::fs::File; +use std::io::{BufReader, Read}; +use std::path::Path; +use std::{collections::BTreeMap, io::Seek, io::SeekFrom}; + +use crate::DicomObject; +use crate::lazy::record::{DataSetRecord, DataSetRecordBuilder, DataSetTableBuilder}; +use crate::{meta::FileMetaTable, util::ReadSeek, FileDicomObject}; +use dicom_core::header::{HasLength, Header}; +use dicom_core::value::{Value, C}; +use dicom_core::{ + dictionary::{DataDictionary, DictionaryEntry}, + DataElementHeader, DicomValue, +}; +use dicom_core::{DataElement, Length, Tag, VR}; +use dicom_encoding::text::{SpecificCharacterSet, TextCodec}; +use dicom_encoding::transfer_syntax::TransferSyntaxIndex; +use dicom_parser::{ + dataset::lazy_read::LazyDataSetReader, stateful::decode::Error as StatefulDecodeError, +}; +use dicom_parser::{dataset::read::Error as ParserError, StatefulDecode}; +use dicom_parser::{ + dataset::{DataToken, LazyDataToken}, + DynStatefulDecoder, +}; +use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; + +pub use self::element::{LazyElement, LazyNestedObject, MaybeValue}; +use self::record::{DataSetTable, RecordBuildingDataSetReader}; + +pub(crate) mod element; +pub mod record; + +/// The type of a pixel data fragment. +pub type InMemFragment = Vec; + +type ParserResult = std::result::Result; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Could not open file '{}'", filename.display()))] + OpenFile { + filename: std::path::PathBuf, + backtrace: Backtrace, + source: std::io::Error, + }, + #[snafu(display("Could not read from file '{}'", filename.display()))] + ReadFile { + filename: std::path::PathBuf, + backtrace: Backtrace, + source: std::io::Error, + }, + /// Could not parse meta group data set + ParseMetaDataSet { + #[snafu(backtrace)] + source: crate::meta::Error, + }, + /// Could not create data set parser + CreateParser { + #[snafu(backtrace)] + source: dicom_parser::dataset::lazy_read::Error, + }, + /// Could not read data set token + ReadToken { + #[snafu(backtrace)] + source: dicom_parser::dataset::lazy_read::Error, + }, + #[snafu(display("Could not write to file '{}'", filename.display()))] + WriteFile { + filename: std::path::PathBuf, + backtrace: Backtrace, + source: std::io::Error, + }, + /// Could not write object preamble + WritePreamble { + backtrace: Backtrace, + source: std::io::Error, + }, + #[snafu(display("Unknown data attribute named `{}`", name))] + NoSuchAttributeName { name: String, backtrace: Backtrace }, + #[snafu(display("Missing element value"))] + MissingElementValue { backtrace: Backtrace }, + #[snafu(display("Unsupported transfer syntax `{}`", uid))] + UnsupportedTransferSyntax { uid: String, backtrace: Backtrace }, + /// Could not position data source to value + PositionToValue { source: StatefulDecodeError }, + /// Could not read value from data source + ReadValue { source: StatefulDecodeError }, + /// Could not read pixel data offset table + ReadOffsetTable { source: StatefulDecodeError }, + #[snafu(display("Unexpected token {:?}", token))] + UnexpectedToken { + token: dicom_parser::dataset::LazyDataTokenRepr, + backtrace: Backtrace, + }, + /// Premature data set end + PrematureEnd { backtrace: Backtrace }, +} + +pub type Result = std::result::Result; + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct OpenFileOptions { + pub dictionary: D, + pub ts_index: T, +} + +/// A DICOM object which fetches elements from a data source on demand. +#[derive(Debug, Clone)] +pub struct LazyDicomObject { + /// the binary source to fetch DICOM data from + source: S, + /// the element dictionary + entries: BTreeMap>, + + records: DataSetTable, + + /// the data element dictionary + dict: D, + /// The length of the DICOM object in bytes. + /// It is usually undefined, unless it is part of an item + /// in a sequence with a specified length in its item header. + len: Length, +} + +type LazyFileDicomObject = FileDicomObject, D>>; + +/* +impl LazyFileDicomObject { + /// Load a new lazy DICOM object from a file + pub fn from_file

(path: P) -> Result + where + P: AsRef, + D: DataDictionary, + D: Clone, + D: Default, + { + Self::from_file_with( + path, + OpenFileOptions::<_, TransferSyntaxRegistry>::default(), + ) + } + + /// Load a new lazy DICOM object from a file, + /// using the given options. + pub fn from_file_with(path: P, options: OpenFileOptions) -> Result + where + P: AsRef, + T: TransferSyntaxIndex, + D: DataDictionary, + D: Clone, + { + let OpenFileOptions { + dictionary, + ts_index, + } = options; + + let path = path.as_ref(); + let mut file = File::open(path).with_context(|| OpenFile { filename: path })?; + + // skip preamble + { + let mut buf = [0u8; 128]; + // skip the preamble + file.read_exact(&mut buf) + .with_context(|| ReadFile { filename: path })?; + } + + // read metadata header + let meta = FileMetaTable::from_reader(&mut file).context(ParseMetaDataSet)?; + + // read rest of data according to metadata, feed it to object + if let Some(ts) = options.ts_index.get(&meta.transfer_syntax) { + let cs = SpecificCharacterSet::Default; + let mut dataset = + LazyDataSetReader::new_with_dictionary(file, dictionary.clone(), ts, cs) + .context(CreateParser)?; + + let mut builder = DataSetTableBuilder::new(); + let mut entries = BTreeMap::new(); + + let mut dataset = RecordBuildingDataSetReader::new(dataset, &mut builder); + + LazyDicomObject::build_object( + &mut dataset, + &mut entries, + dictionary, + false, + Length::UNDEFINED, + )?; + + Ok(FileDicomObject { + meta, + obj: LazyDicomObject { + source: dataset.into_inner().into_decoder(), + entries, + records: builder.build(), + dict: dictionary, + len: Length::UNDEFINED, + }, + }) + } else { + UnsupportedTransferSyntax { + uid: meta.transfer_syntax, + } + .fail() + } + } +} + +impl HasLength for LazyDicomObject +where + S: StatefulDecode, + D: DataDictionary, +{ + fn length(&self) -> Length { + Length::UNDEFINED + } + + fn is_empty(&self) -> bool { + self.entries.is_empty() + } +} + +impl LazyDicomObject +where + S: StatefulDecode, + D: DataDictionary, +{ + + fn build_primitive_element() { + + } + + /// Build an object by consuming a data set parser. + fn build_object( + dataset: &mut RecordBuildingDataSetReader, + entries: &mut BTreeMap>, + dict: D, + in_item: bool, + len: Length, + ) -> Result<()> { + let mut pixel_sequence_record = None; + + // perform a structured parsing of incoming tokens + while let Some(token) = dataset.next() { + let token = token.context(ReadToken)?; + + let elem = match token { + LazyDataToken::PixelSequenceStart => { + pixel_sequence_record = Some(LazyDicomObject::build_encapsulated_data(&mut *dataset)?); + continue; + } + LazyDataToken::ElementHeader(header) => { + // fetch respective value, place it in the entries + let next_token = dataset.next().context(MissingElementValue)?; + match next_token.context(ReadToken)? { + t @ LazyDataToken::LazyValue { header, decoder } => LazyElement { + header, + position: decoder.position(), + value: LazyValue::Unloaded {}, + }, + token => { + return UnexpectedToken { token }.fail(); + } + } + } + LazyDataToken::SequenceStart { tag, len } => { + // delegate sequence building to another function + let items = Self::build_sequence(tag, len, &mut *dataset, &dict)?; + LazyElement::new(tag, VR::SQ, Value::Sequence { items, size: len }) + } + LazyDataToken::ItemEnd if in_item => { + // end of item, leave now + return Ok(()); + } + token => return UnexpectedToken { token }.fail(), + }; + entries.insert(elem.header.tag(), elem); + } + + Ok(()) + } + + /// Construct a lazy record of pixel data fragment positions + /// and its offset table. + fn build_encapsulated_data( + dataset: &mut RecordBuildingDataSetReader, + ) -> Result { + // continue fetching tokens to retrieve: + // - the offset table + // - the positions of the various compressed fragments + let mut offset_table = None; + + let mut fragment_positions = C::new(); + + while let Some(token) = dataset.next() { + match token.context(ReadToken)? { + LazyDataToken::LazyItemValue { len, decoder } => { + if offset_table.is_none() { + // retrieve the data into the offset table + let mut data = Vec::new(); + decoder.read_to_vec(len, &mut data).context(ReadOffsetTable)?; + offset_table = Some(data.into()); + } else { + fragment_positions.push(decoder.position()); + } + } + LazyDataToken::ItemEnd => { + // at the end of the first item ensure the presence of + // an empty offset_table here, so that the next items + // are seen as compressed fragments + if offset_table.is_none() { + offset_table = Some(C::new()) + } + } + LazyDataToken::ItemStart { len: _ } => { /* no-op */ } + LazyDataToken::SequenceEnd => { + // end of pixel data + break; + } + // the following variants are unexpected + token @ LazyDataToken::ElementHeader(_) + | token @ LazyDataToken::PixelSequenceStart + | token @ LazyDataToken::SequenceStart { .. } + | token @ LazyDataToken::LazyValue { .. } => { + return UnexpectedToken { token }.fail(); + } + } + } + + Ok(PixelSequenceRecord { + offset_table: offset_table.unwrap_or_default(), + fragment_positions, + }) + } + + /// Build a DICOM sequence by consuming a data set parser. + fn build_sequence( + _tag: Tag, + _len: Length, + dataset: &mut I, + dict: &D, + ) -> Result>> + where + I: Iterator>, + { + let mut items: C<_> = SmallVec::new(); + while let Some(token) = dataset.next() { + match token.context(ReadToken)? { + DataToken::ItemStart { len } => { + items.push(Self::build_nested_object( + &mut *dataset, + dict.clone(), + true, + len, + )?); + } + DataToken::SequenceEnd => { + return Ok(items); + } + token => return UnexpectedToken { token }.fail(), + }; + } + + // iterator fully consumed without a sequence delimiter + PrematureEnd.fail() + } + + /// Build a nested object by consuming a data set parser. + fn build_nested_object( + dataset: &mut LazyDataSetReader, + dict: D, + in_item: bool, + len: Length, + ) -> Result> { + let mut entries: BTreeMap> = BTreeMap::new(); + // perform a structured parsing of incoming tokens + while let Some(token) = dataset.next() { + let elem = match token.context(ReadToken)? { + LazyDataToken::PixelSequenceStart => { + let value = LazyDicomObject::build_encapsulated_data(&mut *dataset)?; + LazyElement::new( + DataElementHeader::new(Tag(0x7fe0, 0x0010), VR::OB, todo!()), + todo!(), + value, + ) + } + LazyDataToken::ElementHeader(header) => { + // fetch respective value, place it in the entries + let next_token = dataset.next().context(MissingElementValue)?; + match next_token.context(ReadToken)? { + t @ LazyDataToken::LazyValue { header, decoder } => { + // TODO choose whether to eagerly fetch the elemet or keep it unloaded + LazyElement { + header, + position: decoder.position(), + value: LazyValue::Unloaded, + } + }, + token => { + return UnexpectedToken { token }.fail(); + } + } + } + LazyDataToken::SequenceStart { tag, len } => { + // delegate sequence building to another function + let items = Self::build_sequence(tag, len, dataset, &dict)?; + + // !!! Lazy Element does not fit the sequence system + todo!() + //LazyElement::new(tag, VR::SQ, Value::Sequence { items, size: len }) + } + LazyDataToken::ItemEnd if in_item => { + // end of item, leave now + return Ok(LazyNestedObject { entries, dict, len }); + } + token => return UnexpectedToken { token }.fail(), + }; + entries.insert(elem.header.tag(), elem); + } + + Ok(LazyNestedObject { entries, dict, len }) + } +} + + +impl LazyElement +where + S: StatefulDecode, + ::Reader: Seek, +{ + fn new(header: DataElementHeader, position: u64, value: LazyValue) -> Self { + LazyElement { + header, + position, + value, + } + } + + /// Ensure that the value is loaded in memory. + pub fn load(&mut self, source: &mut S) -> Result<&mut Self> { + match &mut self.value { + LazyValue::Unloaded => { + let value = self.fetch(source)?; + self.value = LazyValue::Loaded { + value: Some(value), + dirty: false, + }; + Ok(self) + } + LazyValue::Loaded { .. } => Ok(self), + } + } + + /// Retrieve an independent copy of the value from the original source, + /// without saving it in the element. + fn fetch(&mut self, source: &mut S) -> Result> { + source.seek(self.position).context(PositionToValue)?; + + if self.header.is_non_primitive() { + todo!("non primitive value retrieval not implemented yet") + } else { + let prim = source.read_value(&self.header).context(ReadValue)?; + Ok(prim.into()) + } + } + + /// Take the copy of the element in memory. + /// + /// + pub fn take(&mut self) -> Option> { + match &mut self.value { + LazyValue::Loaded { value, .. } => { + let out = value.take(); + self.value = LazyValue::Unloaded; + out + } + LazyValue::Unloaded { .. } => return None, + } + } +} + +impl LazyValue { + fn inner(&self) -> Option<&LoadedValue> { + match self { + LazyValue::Loaded { value, .. } => value.as_ref(), + LazyValue::Unloaded => None, + } + } + + fn inner_mut(&mut self) -> Option<&mut LoadedValue> { + match self { + LazyValue::Loaded { value, .. } => value.as_mut(), + LazyValue::Unloaded => None, + } + } +} + +*/ +#[cfg(test)] +mod tests { + + use super::*; + use crate::InMemDicomObject; + use crate::{meta::FileMetaTableBuilder, open_file, Error}; + use byteordered::Endianness; + use dicom_core::value::PrimitiveValue; + use dicom_core::{ + dicom_value, + header::{DataElementHeader, Length, VR}, + }; + use dicom_encoding::{ + decode::{basic::BasicDecoder, implicit_le::ImplicitVRLittleEndianDecoder}, + encode::EncoderFor, + text::DefaultCharacterSetCodec, + transfer_syntax::implicit_le::ImplicitVRLittleEndianEncoder, + }; + use dicom_parser::{dataset::IntoTokens, StatefulDecoder}; + use tempfile; + + fn assert_obj_eq(obj1: &InMemDicomObject, obj2: &InMemDicomObject) + where + D: std::fmt::Debug, + { + // debug representation because it makes a stricter comparison and + // assumes that Undefined lengths are equal. + assert_eq!(format!("{:?}", obj1), format!("{:?}", obj2)) + } + + #[test] + #[ignore] + fn inmem_object_read_dataset() { + let data_in = [ + 0x10, 0x00, 0x10, 0x00, // Tag(0x0010, 0x0010) + 0x08, 0x00, 0x00, 0x00, // Length: 8 + b'D', b'o', b'e', b'^', b'J', b'o', b'h', b'n', + ]; + + let decoder = ImplicitVRLittleEndianDecoder::default(); + let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let mut cursor = &data_in[..]; + let parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + let obj = todo!(); // LazyDicomObject::read_dataset(parser).unwrap(); + + let mut gt = InMemDicomObject::create_empty(); + + let patient_name = DataElement::new( + Tag(0x0010, 0x0010), + VR::PN, + dicom_value!(Strs, ["Doe^John"]), + ); + gt.put(patient_name); + + //assert_eq!(obj, gt); + } + +} From 46708f7a1119deafdceaa1bbaf1076dc8cb8de91 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 30 Jul 2021 18:32:02 +0100 Subject: [PATCH 08/12] [object] tweak lazy::element impl - also add crate visible `InMemoryDicomObject::from_parts` --- object/src/lazy/element.rs | 230 ++++++++++++++++++++++++++++++++++--- object/src/mem.rs | 20 +++- 2 files changed, 232 insertions(+), 18 deletions(-) diff --git a/object/src/lazy/element.rs b/object/src/lazy/element.rs index 1bd0642c9..2627bd178 100644 --- a/object/src/lazy/element.rs +++ b/object/src/lazy/element.rs @@ -1,12 +1,16 @@ use std::collections::BTreeMap; use super::{PositionToValue as PositionToValueSnafu, ReadValue as ReadValueSnafu}; -use dicom_core::{header::HasLength, DataElementHeader, DicomValue, Length, Tag}; +use dicom_core::{DataDictionary, DataElementHeader, DicomValue, Length, Tag, header::HasLength}; use dicom_dictionary_std::StandardDataDictionary; use dicom_parser::StatefulDecode; use snafu::ResultExt; -use crate::{InMemDicomObject, mem::InMemFragment, util::ReadSeek}; +use crate::{ + mem::{InMemElement, InMemFragment}, + util::ReadSeek, + InMemDicomObject, +}; type Result = std::result::Result; @@ -18,7 +22,11 @@ pub struct LazyElement { value: MaybeValue, } -impl LazyElement { +impl LazyElement +where + D: DataDictionary, + D: Clone, +{ /// Create a new lazy element with the given properties, /// without loading its value in memory. pub fn new_unloaded(header: DataElementHeader, position: u64) -> Self { @@ -46,7 +54,7 @@ impl LazyElement { /// fetching it from the given source if necessary. /// /// The operation is a no-op if the value is already loaded. - pub fn load(&mut self, mut source: S) -> Result<()> + pub fn load(&mut self, source: &mut S) -> Result<()> where S: StatefulDecode, ::Reader: ReadSeek, @@ -66,6 +74,20 @@ impl LazyElement { } } } + + /// Convert the lazy element into an in-memory element, + /// loading it from the given source if necessary. + pub fn into_mem(mut self, source: &mut S) -> Result> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + self.load(source)?; + + let value = self.value.into_mem(source)?; + + Ok(InMemElement::new(self.header.tag, self.header.vr, value)) + } } /// A DICOM value which may be loaded in memory or not. @@ -78,7 +100,11 @@ pub enum MaybeValue { Unloaded, } -impl MaybeValue { +impl MaybeValue +where + D: DataDictionary, + D: Clone, +{ /// Return a reference to the loaded value, /// or `None` if the value is not loaded. pub fn value(&self) -> Option<&LoadedValue> { @@ -94,6 +120,44 @@ impl MaybeValue { MaybeValue::Unloaded => false, } } + + /// **Pre-condition:** the value must be loaded. + fn into_mem(self, source: &mut S) -> Result, InMemFragment>> + where + S: StatefulDecode, + ::Reader: ReadSeek, + { + match self { + MaybeValue::Loaded { value, .. } => { + match value { + DicomValue::Primitive(primitive) => { + // accept primitive value as is + Ok(DicomValue::from(primitive)) + } + DicomValue::PixelSequence { + offset_table, + fragments, + } => { + // accept pixel sequence as is + Ok(DicomValue::PixelSequence { + offset_table, + fragments, + }) + } + DicomValue::Sequence { items, size } => { + // recursively turn each item into memory + let items: Result<_> = items + .into_iter() + .map(|item| item.into_mem(source)) + .collect(); + let items = items?; + Ok(DicomValue::Sequence { items, size }) + } + } + } + _ => panic!("Value should be loaded"), + } + } } pub type LoadedValue = DicomValue, InMemFragment>; @@ -121,22 +185,38 @@ impl HasLength for LazyNestedObject { } } -impl LazyNestedObject { +impl LazyNestedObject +where + D: DataDictionary, + D: Clone, +{ /// Load each element in the object. - pub fn load(&mut self, mut source: S) -> Result<()> + pub fn load(&mut self, source: &mut S) -> Result<()> where S: StatefulDecode, ::Reader: ReadSeek, { - for (_tag, elem) in &mut self.entries { - elem.load(&mut source)?; + for elem in &mut self.entries.values_mut() { + elem.load(&mut *source)?; } Ok(()) } - /// Load each element in the object and turn it into an. - pub fn into_mem(self, mut source: S) -> Result> { - todo!() + /// Load each element in the object and turn it into an in-memory object. + pub fn into_mem(mut self, source: &mut S) -> Result> + where + S: StatefulDecode, + ::Reader: ReadSeek, + D: DataDictionary, + D: Clone, + { + self.load(&mut *source)?; + + let entries: Result<_> = self.entries.into_values() + .map(|elem| elem.into_mem(&mut *source).map(|elem| (elem.header().tag, elem))) + .collect(); + + Ok(InMemDicomObject::from_parts(entries?, self.dict, self.len)) } } @@ -144,11 +224,10 @@ impl LazyNestedObject { mod tests { use byteordered::Endianness; use dicom_core::DataElementHeader; - use dicom_core::DicomValue; use dicom_core::Length; - use dicom_core::PrimitiveValue; use dicom_core::Tag; use dicom_core::VR; + use dicom_core::dicom_value; use dicom_dictionary_std::StandardDataDictionary; use dicom_encoding::decode::basic::BasicDecoder; use dicom_encoding::decode::explicit_le::ExplicitVRLittleEndianDecoder; @@ -157,8 +236,8 @@ mod tests { use dicom_parser::StatefulDecode; use dicom_parser::StatefulDecoder; - use crate::InMemDicomObject; use crate::mem::InMemElement; + use crate::InMemDicomObject; use super::LazyElement; use super::LazyNestedObject; @@ -251,4 +330,125 @@ mod tests { } } } + #[test] + fn lazy_nested_object() { + static DATA_IN: &[u8] = &[ + // SequenceStart: (0008,2218) ; len = 54 (#=3) + 0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + // -- 12, -- + // ItemStart: len = 46 + 0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00, + // -- 20, -- + // ElementHeader: (0008,0100) CodeValue; len = 8 + 0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue + b'T', b'-', b'D', b'1', b'2', b'1', b'3', b' ', + // -- 36, -- + // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4 + 0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue + b'S', b'R', b'T', b' ', + // -- 48, -- + // (0008,0104) CodeMeaning; len = 10 + 0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue + b'J', b'a', b'w', b' ', b'r', b'e', b'g', b'i', b'o', b'n', + // -- 66 -- + // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0 + 0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // ElementHeader: (2050,0020) PresentationLUTShape; len = 8 + 0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue + b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', + ]; + + // Create a stateful reader for the data + let decoder = ExplicitVRLittleEndianDecoder::default(); + let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let mut cursor = std::io::Cursor::new(DATA_IN); + let mut parser = StatefulDecoder::new( + &mut cursor, + decoder, + BasicDecoder::new(Endianness::Little), + text, + ); + + // move cursor to the end (simulating a full file read) + parser.seek(94).expect("Failed to seek to end of file"); + + // construct accurate nested object, unloaded + let mut nested_object: LazyNestedObject = LazyNestedObject { + entries: vec![ + // CodeValue element + ( + Tag(0x0008, 0x0100), + LazyElement::new_unloaded( + DataElementHeader::new(Tag(0x0008, 0x0100), VR::SH, Length(8)), + 28, + ), + ), + // CodingSchemeDesignator element + ( + Tag(0x0008, 0x0102), + LazyElement::new_unloaded( + DataElementHeader::new(Tag(0x0008, 0x0102), VR::SH, Length(4)), + 44, + ), + ), + // CodeMeaning element + ( + Tag(0x0008, 0x0104), + LazyElement::new_unloaded( + DataElementHeader::new(Tag(0x0008, 0x0104), VR::LO, Length(10)), + 56, + ), + ), + ] + .into_iter() + .collect(), + dict: Default::default(), + len: Length(46), + }; + + // load nested object + nested_object + .load(&mut parser) + .expect("Failed to load nested object"); + + for e in nested_object.entries.values() { + assert!(e.value.is_loaded()); + } + + // turn it into an in-memory DICOM object, + // test with ground truth + let inmem = nested_object + .into_mem(&mut parser) + .expect("Failed to load all object into memory"); + + let gt: InMemDicomObject = InMemDicomObject::from_element_iter(vec![ + InMemElement::new( + Tag(0x0008, 0x0100), + VR::SH, + dicom_value!(Strs, ["T-D1213 "]), + ), + InMemElement::new(Tag(0x0008, 0x0102), VR::SH, dicom_value!(Strs, ["SRT "])), + InMemElement::new( + Tag(0x0008, 0x0104), + VR::LO, + dicom_value!(Strs, ["Jaw region"]), + ), + ]); + + assert_eq_elements(&inmem, >); + } + + /// Assert that two objects are equal + /// by traversing their elements in sequence + /// and checking that those are equal. + fn assert_eq_elements(obj1: &InMemDicomObject, obj2: &InMemDicomObject) + where + D: std::fmt::Debug, + { + // iterate through all elements in both objects + // and check that they are equal + for (e1, e2) in std::iter::Iterator::zip(obj1.into_iter(), obj2) { + assert_eq!(e1, e2); + } + } } diff --git a/object/src/mem.rs b/object/src/mem.rs index 07ebcd441..71883be9d 100644 --- a/object/src/mem.rs +++ b/object/src/mem.rs @@ -378,6 +378,20 @@ where D: DataDictionary, D: Clone, { + /// Create an in-memory DICOM object from its constituent parts. + /// + /// This is currently crate-only because + /// it is useful for converting between DICOM object implementations, + /// but can produce inconsistent objects + /// if used with incoherent parameters. + pub(crate) fn from_parts(entries: BTreeMap>, dict: D, len: Length) -> Self { + InMemDicomObject { + entries, + dict, + len, + } + } + /// Create a new empty object, using the given dictionary for name lookup. pub fn new_empty_with_dict(dict: D) -> Self { InMemDicomObject { @@ -388,11 +402,11 @@ where } /// Construct a DICOM object from an iterator of structured elements. - pub fn from_element_source_with_dict(iter: I, dict: D) -> Result + pub fn from_element_source_with_dict(iter: I, dict: D) -> Result where - I: IntoIterator>>, + I: IntoIterator, E>>, { - let entries: Result<_> = iter.into_iter().map_ok(|e| (e.tag(), e)).collect(); + let entries: Result<_, E> = iter.into_iter().map_ok(|e| (e.tag(), e)).collect(); Ok(InMemDicomObject { entries: entries?, dict, From 2ecb5c67d8b33b2836afc181db68deb25e67e264 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 30 Jul 2021 21:09:26 +0100 Subject: [PATCH 09/12] [object] clean lazy module WIP --- object/src/lazy/mod.rs | 122 ++++++++--------------------------------- 1 file changed, 22 insertions(+), 100 deletions(-) diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs index 23ce02b1b..5dfebd971 100644 --- a/object/src/lazy/mod.rs +++ b/object/src/lazy/mod.rs @@ -120,11 +120,10 @@ pub struct OpenFileOptions { pub struct LazyDicomObject { /// the binary source to fetch DICOM data from source: S, - /// the element dictionary + /// the element dictionary at this level entries: BTreeMap>, - + /// the full record table records: DataSetTable, - /// the data element dictionary dict: D, /// The length of the DICOM object in bytes. @@ -133,7 +132,7 @@ pub struct LazyDicomObject { len: Length, } -type LazyFileDicomObject = FileDicomObject, D>>; +pub type LazyFileDicomObject = FileDicomObject, D>>; /* impl LazyFileDicomObject { @@ -220,7 +219,8 @@ impl LazyFileDicomObject { impl HasLength for LazyDicomObject where - S: StatefulDecode, + S: StatefulDecode, + ::Reader: ReadSeek, D: DataDictionary, { fn length(&self) -> Length { @@ -234,18 +234,15 @@ where impl LazyDicomObject where - S: StatefulDecode, + S: StatefulDecode, + ::Reader: ReadSeek, D: DataDictionary, { - fn build_primitive_element() { - - } - /// Build an object by consuming a data set parser. fn build_object( dataset: &mut RecordBuildingDataSetReader, - entries: &mut BTreeMap>, + entries: &mut BTreeMap>, dict: D, in_item: bool, len: Length, @@ -253,7 +250,7 @@ where let mut pixel_sequence_record = None; // perform a structured parsing of incoming tokens - while let Some(token) = dataset.next() { + while let Some(token) = dataset.advance() { let token = token.context(ReadToken)?; let elem = match token { @@ -263,13 +260,9 @@ where } LazyDataToken::ElementHeader(header) => { // fetch respective value, place it in the entries - let next_token = dataset.next().context(MissingElementValue)?; + let next_token = dataset.advance().context(MissingElementValue)?; match next_token.context(ReadToken)? { - t @ LazyDataToken::LazyValue { header, decoder } => LazyElement { - header, - position: decoder.position(), - value: LazyValue::Unloaded {}, - }, + t @ LazyDataToken::LazyValue { header, decoder } => LazyElement::new_unloaded(header, decoder.position()), token => { return UnexpectedToken { token }.fail(); } @@ -278,7 +271,8 @@ where LazyDataToken::SequenceStart { tag, len } => { // delegate sequence building to another function let items = Self::build_sequence(tag, len, &mut *dataset, &dict)?; - LazyElement::new(tag, VR::SQ, Value::Sequence { items, size: len }) + let position = 0; + LazyElement::new_loaded(DataElementHeader::new(tag, VR::SQ, len), 0, Value::Sequence { items, size: len }) } LazyDataToken::ItemEnd if in_item => { // end of item, leave now @@ -304,7 +298,7 @@ where let mut fragment_positions = C::new(); - while let Some(token) = dataset.next() { + while let Some(token) = dataset.advance() { match token.context(ReadToken)? { LazyDataToken::LazyItemValue { len, decoder } => { if offset_table.is_none() { @@ -361,7 +355,7 @@ where DataToken::ItemStart { len } => { items.push(Self::build_nested_object( &mut *dataset, - dict.clone(), + *dict.clone(), true, len, )?); @@ -383,14 +377,14 @@ where dict: D, in_item: bool, len: Length, - ) -> Result> { - let mut entries: BTreeMap> = BTreeMap::new(); + ) -> Result { + let mut entries: BTreeMap> = BTreeMap::new(); // perform a structured parsing of incoming tokens - while let Some(token) = dataset.next() { + while let Some(token) = dataset.advance() { let elem = match token.context(ReadToken)? { LazyDataToken::PixelSequenceStart => { let value = LazyDicomObject::build_encapsulated_data(&mut *dataset)?; - LazyElement::new( + LazyElement::new_loaded( DataElementHeader::new(Tag(0x7fe0, 0x0010), VR::OB, todo!()), todo!(), value, @@ -398,14 +392,14 @@ where } LazyDataToken::ElementHeader(header) => { // fetch respective value, place it in the entries - let next_token = dataset.next().context(MissingElementValue)?; + let next_token = dataset.advance().context(MissingElementValue)?; match next_token.context(ReadToken)? { t @ LazyDataToken::LazyValue { header, decoder } => { // TODO choose whether to eagerly fetch the elemet or keep it unloaded LazyElement { header, position: decoder.position(), - value: LazyValue::Unloaded, + value: MaybeValue::Unloaded, } }, token => { @@ -434,80 +428,8 @@ where } } - -impl LazyElement -where - S: StatefulDecode, - ::Reader: Seek, -{ - fn new(header: DataElementHeader, position: u64, value: LazyValue) -> Self { - LazyElement { - header, - position, - value, - } - } - - /// Ensure that the value is loaded in memory. - pub fn load(&mut self, source: &mut S) -> Result<&mut Self> { - match &mut self.value { - LazyValue::Unloaded => { - let value = self.fetch(source)?; - self.value = LazyValue::Loaded { - value: Some(value), - dirty: false, - }; - Ok(self) - } - LazyValue::Loaded { .. } => Ok(self), - } - } - - /// Retrieve an independent copy of the value from the original source, - /// without saving it in the element. - fn fetch(&mut self, source: &mut S) -> Result> { - source.seek(self.position).context(PositionToValue)?; - - if self.header.is_non_primitive() { - todo!("non primitive value retrieval not implemented yet") - } else { - let prim = source.read_value(&self.header).context(ReadValue)?; - Ok(prim.into()) - } - } - - /// Take the copy of the element in memory. - /// - /// - pub fn take(&mut self) -> Option> { - match &mut self.value { - LazyValue::Loaded { value, .. } => { - let out = value.take(); - self.value = LazyValue::Unloaded; - out - } - LazyValue::Unloaded { .. } => return None, - } - } -} - -impl LazyValue { - fn inner(&self) -> Option<&LoadedValue> { - match self { - LazyValue::Loaded { value, .. } => value.as_ref(), - LazyValue::Unloaded => None, - } - } - - fn inner_mut(&mut self) -> Option<&mut LoadedValue> { - match self { - LazyValue::Loaded { value, .. } => value.as_mut(), - LazyValue::Unloaded => None, - } - } -} - */ + #[cfg(test)] mod tests { From 701b8fc2a01eff17304bc1a68e1dabacf8c688dc Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Wed, 4 Aug 2021 11:47:13 +0100 Subject: [PATCH 10/12] [object] Extend lazy element to cover partially loaded pixel sequences - [core] tweak documentation of `P` type parameter in `Value` --- core/src/value/mod.rs | 4 +- object/src/lazy/element.rs | 90 +++++++++++++++++++++++++++++++++----- object/src/lazy/mod.rs | 7 +++ 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/core/src/value/mod.rs b/core/src/value/mod.rs index 9eec4e498..49667346b 100644 --- a/core/src/value/mod.rs +++ b/core/src/value/mod.rs @@ -46,8 +46,8 @@ pub trait DicomValueType: HasLength { /// /// `I` is the complex type for nest data set items, which should usually /// implement [`HasLength`]. -/// `P` is the encapsulated pixel data provider, which should usually -/// implement `AsRef<[u8]>`. +/// `P` is the encapsulated pixel data fragment type, +/// which should usually implement `AsRef<[u8]>`. /// /// [`HasLength`]: ../header/trait.HasLength.html #[derive(Debug, Clone, PartialEq)] diff --git a/object/src/lazy/element.rs b/object/src/lazy/element.rs index 2627bd178..548dc8348 100644 --- a/object/src/lazy/element.rs +++ b/object/src/lazy/element.rs @@ -1,10 +1,11 @@ use std::collections::BTreeMap; -use super::{PositionToValue as PositionToValueSnafu, ReadValue as ReadValueSnafu}; +use super::{PositionToValue as PositionToValueSnafu, ReadValue as ReadValueSnafu, ReadFragment as ReadFragmentSnafu, UnloadedFragment as UnloadedFragmentSnafu}; use dicom_core::{DataDictionary, DataElementHeader, DicomValue, Length, Tag, header::HasLength}; use dicom_dictionary_std::StandardDataDictionary; use dicom_parser::StatefulDecode; -use snafu::ResultExt; +use snafu::{OptionExt, ResultExt}; +use smallvec::SmallVec; use crate::{ mem::{InMemElement, InMemFragment}, @@ -59,8 +60,21 @@ where S: StatefulDecode, ::Reader: ReadSeek, { - match self.value { + match &mut self.value { MaybeValue::Loaded { .. } => Ok(()), + MaybeValue::PixelSequence { fragments, .. } => { + // load each fragment individually + for fragment in fragments { + if fragment.data.is_some() { + continue; + } + source.seek(fragment.position).context(PositionToValueSnafu)?; + let mut data = Vec::with_capacity(fragment.length as usize); + source.read_to_vec(fragment.length, &mut data).context(ReadFragmentSnafu)?; + fragment.data = Some(data); + } + Ok(()) + }, MaybeValue::Unloaded => { source.seek(self.position).context(PositionToValueSnafu)?; let value = source @@ -96,7 +110,26 @@ where /// #[derive(Debug, Clone)] pub enum MaybeValue { - Loaded { value: LoadedValue, dirty: bool }, + /// A DICOM value that is at least partially loaded in memory. + /// + /// + /// Its nested DICOM data sets or fragments might not be all loaded + /// in the case of sequences. + Loaded { + /// the value proper + value: LoadedValue, + dirty: bool, + }, + /// a DICOM value that is a pixel sequence, + /// where each fragment can be loaded independently + PixelSequence { + /// the offset table for each pixel data frame + offset_table: SmallVec<[u32; 2]>, + /// the sequence of fragments + fragments: SmallVec<[MaybeFragment; 2]>, + }, + /// a DICOM value which is not loaded, + /// and so is unreachable from here Unloaded, } @@ -110,18 +143,26 @@ where pub fn value(&self) -> Option<&LoadedValue> { match self { MaybeValue::Loaded { value, .. } => Some(value), + MaybeValue::PixelSequence { fragments, .. } => todo!("retrieving pixel sequences"), MaybeValue::Unloaded => None, } } + /// Check whether the element is loaded at this level. + /// + /// **Note:** + /// this method does not check + /// whether nested data sets or any pixel data fragments + /// are fully loaded. pub fn is_loaded(&self) -> bool { match self { MaybeValue::Loaded { .. } => true, + MaybeValue::PixelSequence { .. } => true, MaybeValue::Unloaded => false, } } - /// **Pre-condition:** the value must be loaded. + /// **Pre-condition:** the value must be fully loaded. fn into_mem(self, source: &mut S) -> Result, InMemFragment>> where S: StatefulDecode, @@ -138,10 +179,14 @@ where offset_table, fragments, } => { + let fragments: Result> = fragments.into_iter() + .enumerate() + .map(|(i, f)| f.data.context(UnloadedFragmentSnafu { index: i as u32 })) + .collect(); // accept pixel sequence as is Ok(DicomValue::PixelSequence { offset_table, - fragments, + fragments: fragments?, }) } DicomValue::Sequence { items, size } => { @@ -160,7 +205,30 @@ where } } -pub type LoadedValue = DicomValue, InMemFragment>; +/// A fragment of a pixel sequence, +/// which may be loaded in memory or not. +#[derive(Debug, Clone)] +pub struct MaybeFragment { + /// The offset of the fragment data relative to the original source + position: u64, + /// The number of data bytes in this fragment + length: u32, + /// The actual data proper, + /// which might not be loaded. + data: Option>, +} + +/// Type definition for a value which has been loaded into memory, +/// at least partially, +/// at one level. +/// +/// If it is a primitive value, +/// then is sure to be all in memory. +/// In the case of a sequence, +/// the nested objects may or may not be loaded. +/// In the case of a pixel sequence, +/// each fragments may be loaded in memory or not. +pub type LoadedValue = DicomValue, MaybeFragment>; /// A DICOM object nested in a lazy DICOM object. /// @@ -191,7 +259,7 @@ where D: Clone, { /// Load each element in the object. - pub fn load(&mut self, source: &mut S) -> Result<()> + pub fn load_all(&mut self, source: &mut S) -> Result<()> where S: StatefulDecode, ::Reader: ReadSeek, @@ -210,7 +278,7 @@ where D: DataDictionary, D: Clone, { - self.load(&mut *source)?; + self.load_all(&mut *source)?; let entries: Result<_> = self.entries.into_values() .map(|elem| elem.into_mem(&mut *source).map(|elem| (elem.header().tag, elem))) @@ -275,6 +343,7 @@ mod tests { .expect("Failed to load lazy element"); match lazy_element.value { MaybeValue::Unloaded => panic!("element should be loaded"), + MaybeValue::PixelSequence { .. } => unreachable!("element is not a pixel sequence"), MaybeValue::Loaded { value, dirty } => { assert_eq!(value.to_clean_str().unwrap(), "Doe^John"); assert_eq!(dirty, false); @@ -324,6 +393,7 @@ mod tests { .expect("Failed to load lazy element"); match lazy_element.value { MaybeValue::Unloaded => panic!("element should be loaded"), + MaybeValue::PixelSequence { .. } => unreachable!("element is not a pixel sequence"), MaybeValue::Loaded { value, dirty } => { assert_eq!(value.to_clean_str().unwrap(), "Doe^John"); assert_eq!(dirty, false); @@ -408,7 +478,7 @@ mod tests { // load nested object nested_object - .load(&mut parser) + .load_all(&mut parser) .expect("Failed to load nested object"); for e in nested_object.entries.values() { diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs index 5dfebd971..9cab20e17 100644 --- a/object/src/lazy/mod.rs +++ b/object/src/lazy/mod.rs @@ -96,6 +96,8 @@ pub enum Error { PositionToValue { source: StatefulDecodeError }, /// Could not read value from data source ReadValue { source: StatefulDecodeError }, + /// Could not read fragment from data source + ReadFragment { source: StatefulDecodeError }, /// Could not read pixel data offset table ReadOffsetTable { source: StatefulDecodeError }, #[snafu(display("Unexpected token {:?}", token))] @@ -103,6 +105,11 @@ pub enum Error { token: dicom_parser::dataset::LazyDataTokenRepr, backtrace: Backtrace, }, + #[snafu(display("Pixel data fragment #{} was expected to be loaded, but was not", index))] + UnloadedFragment { + index: u32, + backtrace: Backtrace, + }, /// Premature data set end PrematureEnd { backtrace: Backtrace }, } From a3401202063c27805e5e6822872f1c5623ab9f11 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Thu, 30 Sep 2021 08:52:52 +0100 Subject: [PATCH 11/12] [lazy] WIP - Add more core, remove commented out code --- object/src/lazy/element.rs | 27 ++-- object/src/lazy/mod.rs | 315 ++++++++++++------------------------- object/src/lazy/record.rs | 11 +- 3 files changed, 119 insertions(+), 234 deletions(-) diff --git a/object/src/lazy/element.rs b/object/src/lazy/element.rs index 548dc8348..705aa16b3 100644 --- a/object/src/lazy/element.rs +++ b/object/src/lazy/element.rs @@ -15,15 +15,18 @@ use crate::{ type Result = std::result::Result; -/// A lazy element, which may be loaded in memory or not. +/// A DICOM element, which may be loaded in memory or not. +/// +/// This type alone does not have the means to load the element's value. +/// A byte source must be provided whenever a load is attempted. #[derive(Debug, Clone)] -pub struct LazyElement { +pub struct MaybeElement { header: DataElementHeader, position: u64, value: MaybeValue, } -impl LazyElement +impl MaybeElement where D: DataDictionary, D: Clone, @@ -31,7 +34,7 @@ where /// Create a new lazy element with the given properties, /// without loading its value in memory. pub fn new_unloaded(header: DataElementHeader, position: u64) -> Self { - LazyElement { + MaybeElement { header, position, value: MaybeValue::Unloaded, @@ -41,7 +44,7 @@ where /// Create a new lazy element with the given properties, /// already loaded with an in-memory value. pub fn new_loaded(header: DataElementHeader, position: u64, value: LoadedValue) -> Self { - LazyElement { + MaybeElement { header, position, value: MaybeValue::Loaded { @@ -238,7 +241,7 @@ pub type LoadedValue = DicomValue, MaybeFragment>; #[derive(Debug, Clone)] pub struct LazyNestedObject { /// the element dictionary - entries: BTreeMap>, + entries: BTreeMap>, /// the data attribute dictionary dict: D, /// The length of the DICOM object in bytes. @@ -307,7 +310,7 @@ mod tests { use crate::mem::InMemElement; use crate::InMemDicomObject; - use super::LazyElement; + use super::MaybeElement; use super::LazyNestedObject; use super::MaybeValue; @@ -331,7 +334,7 @@ mod tests { ); // Create an unloaded lazy element (actual value starts at 8) - let mut lazy_element: LazyElement = LazyElement { + let mut lazy_element: MaybeElement = MaybeElement { header: DataElementHeader::new(Tag(0x0010, 0x0010), VR::PN, Length(8)), position: 8, value: MaybeValue::Unloaded, @@ -381,7 +384,7 @@ mod tests { parser.seek(66).expect("Failed to seek to end of file"); // Create an unloaded lazy element - let mut lazy_element: LazyElement = LazyElement { + let mut lazy_element: MaybeElement = MaybeElement { header: DataElementHeader::new(Tag(0x0010, 0x0010), VR::PN, Length(8)), position: 38, value: MaybeValue::Unloaded, @@ -448,7 +451,7 @@ mod tests { // CodeValue element ( Tag(0x0008, 0x0100), - LazyElement::new_unloaded( + MaybeElement::new_unloaded( DataElementHeader::new(Tag(0x0008, 0x0100), VR::SH, Length(8)), 28, ), @@ -456,7 +459,7 @@ mod tests { // CodingSchemeDesignator element ( Tag(0x0008, 0x0102), - LazyElement::new_unloaded( + MaybeElement::new_unloaded( DataElementHeader::new(Tag(0x0008, 0x0102), VR::SH, Length(4)), 44, ), @@ -464,7 +467,7 @@ mod tests { // CodeMeaning element ( Tag(0x0008, 0x0104), - LazyElement::new_unloaded( + MaybeElement::new_unloaded( DataElementHeader::new(Tag(0x0008, 0x0104), VR::LO, Length(10)), 56, ), diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs index 9cab20e17..325bebfba 100644 --- a/object/src/lazy/mod.rs +++ b/object/src/lazy/mod.rs @@ -6,11 +6,13 @@ //! A pointer to the original data source is kept for future access, //! so that the element is fetched and its value is decoded on demand. +use dicom_dictionary_std::StandardDataDictionary; use dicom_transfer_syntax_registry::TransferSyntaxRegistry; use smallvec::SmallVec; use std::fs::File; use std::io::{BufReader, Read}; use std::path::Path; +use std::cell::RefCell; use std::{collections::BTreeMap, io::Seek, io::SeekFrom}; use crate::DicomObject; @@ -35,7 +37,8 @@ use dicom_parser::{ }; use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -pub use self::element::{LazyElement, LazyNestedObject, MaybeValue}; +use self::element::LoadedValue; +pub use self::element::{MaybeElement, LazyNestedObject, MaybeValue}; use self::record::{DataSetTable, RecordBuildingDataSetReader}; pub(crate) mod element; @@ -112,13 +115,20 @@ pub enum Error { }, /// Premature data set end PrematureEnd { backtrace: Backtrace }, + #[snafu(display("No such data element with tag {}", tag))] + NoSuchDataElementTag { tag: Tag, backtrace: Backtrace }, } pub type Result = std::result::Result; +/// The options for opening a DICOM file +/// as a lazily evaluated object. #[derive(Debug, Default, Clone, PartialEq)] -pub struct OpenFileOptions { +#[non_exhaustive] +pub struct OpenFileOptions { + /// the data dictionary to use pub dictionary: D, + /// the transfer syntax registry to use pub ts_index: T, } @@ -128,7 +138,7 @@ pub struct LazyDicomObject { /// the binary source to fetch DICOM data from source: S, /// the element dictionary at this level - entries: BTreeMap>, + entries: BTreeMap>, /// the full record table records: DataSetTable, /// the data element dictionary @@ -141,22 +151,41 @@ pub struct LazyDicomObject { pub type LazyFileDicomObject = FileDicomObject, D>>; -/* -impl LazyFileDicomObject { +/// A temporary reference to a DICOM element which fetches its value on demand. +#[derive(Debug)] +pub struct LazyElement<'a, S: 'a, D> { + source: &'a mut S, + elem: &'a mut MaybeElement, +} + +impl<'a, S, D> LazyElement<'a, S, D> +where + S: StatefulDecode, + ::Reader: ReadSeek, + D: Clone + DataDictionary, +{ + + pub fn to_value(self) -> Result> { + self.elem.load(self.source)?; + + todo!() + } +} + +impl LazyFileDicomObject { /// Load a new lazy DICOM object from a file pub fn from_file

(path: P) -> Result where P: AsRef, - D: DataDictionary, - D: Clone, - D: Default, { Self::from_file_with( path, OpenFileOptions::<_, TransferSyntaxRegistry>::default(), ) } +} +impl LazyFileDicomObject { /// Load a new lazy DICOM object from a file, /// using the given options. pub fn from_file_with(path: P, options: OpenFileOptions) -> Result @@ -186,9 +215,9 @@ impl LazyFileDicomObject { let meta = FileMetaTable::from_reader(&mut file).context(ParseMetaDataSet)?; // read rest of data according to metadata, feed it to object - if let Some(ts) = options.ts_index.get(&meta.transfer_syntax) { + if let Some(ts) = ts_index.get(&meta.transfer_syntax) { let cs = SpecificCharacterSet::Default; - let mut dataset = + let dataset = LazyDataSetReader::new_with_dictionary(file, dictionary.clone(), ts, cs) .context(CreateParser)?; @@ -200,7 +229,7 @@ impl LazyFileDicomObject { LazyDicomObject::build_object( &mut dataset, &mut entries, - dictionary, + dictionary.clone(), false, Length::UNDEFINED, )?; @@ -224,18 +253,49 @@ impl LazyFileDicomObject { } } -impl HasLength for LazyDicomObject +impl LazyDicomObject +where + S: StatefulDecode, + ::Reader: ReadSeek, +{ + + pub fn read_dataset(reader: LazyDataSetReader) -> Result { + Self::read_dataset_with(reader, StandardDataDictionary) + } +} + + +impl LazyDicomObject where S: StatefulDecode, ::Reader: ReadSeek, D: DataDictionary, { - fn length(&self) -> Length { - Length::UNDEFINED + + pub fn read_dataset_with(reader: LazyDataSetReader, dict: D) -> Result { + todo!() } - fn is_empty(&self) -> bool { - self.entries.is_empty() + pub fn element<'a>(&'a mut self, tag: Tag) -> Result> { + let source = &mut self.source; + self.entries + .get_mut(&tag) + .ok_or_else(|| NoSuchDataElementTag { tag }.build()) + .map(move |elem| LazyElement { + source, + elem, + }) + } + + pub fn element_mut<'a>(&'a mut self, tag: Tag) -> Result> { + let source = &mut self.source; + self.entries + .get_mut(&tag) + .ok_or_else(|| NoSuchDataElementTag { tag }.build()) + .map(move |elem| LazyElement { + source, + elem, + }) } } @@ -249,223 +309,46 @@ where /// Build an object by consuming a data set parser. fn build_object( dataset: &mut RecordBuildingDataSetReader, - entries: &mut BTreeMap>, + entries: &mut BTreeMap>, dict: D, in_item: bool, len: Length, ) -> Result<()> { - let mut pixel_sequence_record = None; - - // perform a structured parsing of incoming tokens - while let Some(token) = dataset.advance() { - let token = token.context(ReadToken)?; - - let elem = match token { - LazyDataToken::PixelSequenceStart => { - pixel_sequence_record = Some(LazyDicomObject::build_encapsulated_data(&mut *dataset)?); - continue; - } - LazyDataToken::ElementHeader(header) => { - // fetch respective value, place it in the entries - let next_token = dataset.advance().context(MissingElementValue)?; - match next_token.context(ReadToken)? { - t @ LazyDataToken::LazyValue { header, decoder } => LazyElement::new_unloaded(header, decoder.position()), - token => { - return UnexpectedToken { token }.fail(); - } - } - } - LazyDataToken::SequenceStart { tag, len } => { - // delegate sequence building to another function - let items = Self::build_sequence(tag, len, &mut *dataset, &dict)?; - let position = 0; - LazyElement::new_loaded(DataElementHeader::new(tag, VR::SQ, len), 0, Value::Sequence { items, size: len }) - } - LazyDataToken::ItemEnd if in_item => { - // end of item, leave now - return Ok(()); - } - token => return UnexpectedToken { token }.fail(), - }; - entries.insert(elem.header.tag(), elem); - } - - Ok(()) + todo!() } +} - /// Construct a lazy record of pixel data fragment positions - /// and its offset table. - fn build_encapsulated_data( - dataset: &mut RecordBuildingDataSetReader, - ) -> Result { - // continue fetching tokens to retrieve: - // - the offset table - // - the positions of the various compressed fragments - let mut offset_table = None; - - let mut fragment_positions = C::new(); - - while let Some(token) = dataset.advance() { - match token.context(ReadToken)? { - LazyDataToken::LazyItemValue { len, decoder } => { - if offset_table.is_none() { - // retrieve the data into the offset table - let mut data = Vec::new(); - decoder.read_to_vec(len, &mut data).context(ReadOffsetTable)?; - offset_table = Some(data.into()); - } else { - fragment_positions.push(decoder.position()); - } - } - LazyDataToken::ItemEnd => { - // at the end of the first item ensure the presence of - // an empty offset_table here, so that the next items - // are seen as compressed fragments - if offset_table.is_none() { - offset_table = Some(C::new()) - } - } - LazyDataToken::ItemStart { len: _ } => { /* no-op */ } - LazyDataToken::SequenceEnd => { - // end of pixel data - break; - } - // the following variants are unexpected - token @ LazyDataToken::ElementHeader(_) - | token @ LazyDataToken::PixelSequenceStart - | token @ LazyDataToken::SequenceStart { .. } - | token @ LazyDataToken::LazyValue { .. } => { - return UnexpectedToken { token }.fail(); - } - } - } - - Ok(PixelSequenceRecord { - offset_table: offset_table.unwrap_or_default(), - fragment_positions, - }) - } - - /// Build a DICOM sequence by consuming a data set parser. - fn build_sequence( - _tag: Tag, - _len: Length, - dataset: &mut I, - dict: &D, - ) -> Result>> - where - I: Iterator>, - { - let mut items: C<_> = SmallVec::new(); - while let Some(token) = dataset.next() { - match token.context(ReadToken)? { - DataToken::ItemStart { len } => { - items.push(Self::build_nested_object( - &mut *dataset, - *dict.clone(), - true, - len, - )?); - } - DataToken::SequenceEnd => { - return Ok(items); - } - token => return UnexpectedToken { token }.fail(), - }; - } - - // iterator fully consumed without a sequence delimiter - PrematureEnd.fail() +impl HasLength for LazyDicomObject +where + S: StatefulDecode, + ::Reader: ReadSeek, + D: DataDictionary, +{ + fn length(&self) -> Length { + Length::UNDEFINED } - /// Build a nested object by consuming a data set parser. - fn build_nested_object( - dataset: &mut LazyDataSetReader, - dict: D, - in_item: bool, - len: Length, - ) -> Result { - let mut entries: BTreeMap> = BTreeMap::new(); - // perform a structured parsing of incoming tokens - while let Some(token) = dataset.advance() { - let elem = match token.context(ReadToken)? { - LazyDataToken::PixelSequenceStart => { - let value = LazyDicomObject::build_encapsulated_data(&mut *dataset)?; - LazyElement::new_loaded( - DataElementHeader::new(Tag(0x7fe0, 0x0010), VR::OB, todo!()), - todo!(), - value, - ) - } - LazyDataToken::ElementHeader(header) => { - // fetch respective value, place it in the entries - let next_token = dataset.advance().context(MissingElementValue)?; - match next_token.context(ReadToken)? { - t @ LazyDataToken::LazyValue { header, decoder } => { - // TODO choose whether to eagerly fetch the elemet or keep it unloaded - LazyElement { - header, - position: decoder.position(), - value: MaybeValue::Unloaded, - } - }, - token => { - return UnexpectedToken { token }.fail(); - } - } - } - LazyDataToken::SequenceStart { tag, len } => { - // delegate sequence building to another function - let items = Self::build_sequence(tag, len, dataset, &dict)?; - - // !!! Lazy Element does not fit the sequence system - todo!() - //LazyElement::new(tag, VR::SQ, Value::Sequence { items, size: len }) - } - LazyDataToken::ItemEnd if in_item => { - // end of item, leave now - return Ok(LazyNestedObject { entries, dict, len }); - } - token => return UnexpectedToken { token }.fail(), - }; - entries.insert(elem.header.tag(), elem); - } - - Ok(LazyNestedObject { entries, dict, len }) + fn is_empty(&self) -> bool { + self.entries.is_empty() } } -*/ - #[cfg(test)] mod tests { + use std::io::Cursor; + use super::*; - use crate::InMemDicomObject; - use crate::{meta::FileMetaTableBuilder, open_file, Error}; use byteordered::Endianness; - use dicom_core::value::PrimitiveValue; use dicom_core::{ dicom_value, header::{DataElementHeader, Length, VR}, }; use dicom_encoding::{ decode::{basic::BasicDecoder, implicit_le::ImplicitVRLittleEndianDecoder}, - encode::EncoderFor, text::DefaultCharacterSetCodec, - transfer_syntax::implicit_le::ImplicitVRLittleEndianEncoder, }; - use dicom_parser::{dataset::IntoTokens, StatefulDecoder}; - use tempfile; - - fn assert_obj_eq(obj1: &InMemDicomObject, obj2: &InMemDicomObject) - where - D: std::fmt::Debug, - { - // debug representation because it makes a stricter comparison and - // assumes that Undefined lengths are equal. - assert_eq!(format!("{:?}", obj1), format!("{:?}", obj2)) - } + use dicom_parser::StatefulDecoder; #[test] #[ignore] @@ -478,26 +361,26 @@ mod tests { let decoder = ImplicitVRLittleEndianDecoder::default(); let text = Box::new(DefaultCharacterSetCodec) as Box<_>; - let mut cursor = &data_in[..]; + let mut cursor = Cursor::new(&data_in[..]); let parser = StatefulDecoder::new( &mut cursor, decoder, BasicDecoder::new(Endianness::Little), text, ); + let dataset = LazyDataSetReader::new(parser); - let obj = todo!(); // LazyDicomObject::read_dataset(parser).unwrap(); - - let mut gt = InMemDicomObject::create_empty(); + let mut obj: LazyDicomObject<_, _> = LazyDicomObject::read_dataset(dataset).unwrap(); let patient_name = DataElement::new( Tag(0x0010, 0x0010), VR::PN, - dicom_value!(Strs, ["Doe^John"]), + DicomValue::new(dicom_value!(Strs, ["Doe^John"])), ); - gt.put(patient_name); - //assert_eq!(obj, gt); + let lazy_patient_name = obj.element(Tag(0x0010, 0x0010)).expect("Failed to retrieve element"); + + } } diff --git a/object/src/lazy/record.rs b/object/src/lazy/record.rs index 2f48b6290..7227244bb 100644 --- a/object/src/lazy/record.rs +++ b/object/src/lazy/record.rs @@ -260,21 +260,20 @@ impl DataSetRecordBuilder { /// /// [1]: dicom_parser::dataset::lazy_read::LazyDataSetReader #[derive(Debug)] -pub struct RecordBuildingDataSetReader<'a, S, D> { +pub struct RecordBuildingDataSetReader<'a, S> { builder: &'a mut DataSetTableBuilder, - reader: LazyDataSetReader, + reader: LazyDataSetReader, } -impl<'a, S, D> RecordBuildingDataSetReader<'a, S, D> +impl<'a, S, D> RecordBuildingDataSetReader<'a, S> where S: StatefulDecode, - D: DataDictionary, { - pub fn new(reader: LazyDataSetReader, builder: &'a mut DataSetTableBuilder) -> Self { + pub fn new(reader: LazyDataSetReader, builder: &'a mut DataSetTableBuilder) -> Self { RecordBuildingDataSetReader { builder, reader } } - pub fn into_inner(self) -> LazyDataSetReader { + pub fn into_inner(self) -> LazyDataSetReader { self.reader } From 751f258c99a60ffe98ef504f26dfb6b820d17fd5 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Fri, 23 Sep 2022 10:52:01 +0100 Subject: [PATCH 12/12] [object] fix lazy object against latest version - update snafu usage - drop type parameter `D` where it was dropped - replace use of `to_clean_str` - fix other things - clean up imports --- object/src/lazy/element.rs | 14 +++--- object/src/lazy/mod.rs | 93 +++++++++++++------------------------- object/src/lazy/record.rs | 10 ++-- 3 files changed, 44 insertions(+), 73 deletions(-) diff --git a/object/src/lazy/element.rs b/object/src/lazy/element.rs index 705aa16b3..8b60e7692 100644 --- a/object/src/lazy/element.rs +++ b/object/src/lazy/element.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use super::{PositionToValue as PositionToValueSnafu, ReadValue as ReadValueSnafu, ReadFragment as ReadFragmentSnafu, UnloadedFragment as UnloadedFragmentSnafu}; +use super::{PositionToValueSnafu, ReadValueSnafu, ReadFragmentSnafu, UnloadedFragmentSnafu}; use dicom_core::{DataDictionary, DataElementHeader, DicomValue, Length, Tag, header::HasLength}; use dicom_dictionary_std::StandardDataDictionary; use dicom_parser::StatefulDecode; @@ -303,7 +303,7 @@ mod tests { use dicom_encoding::decode::basic::BasicDecoder; use dicom_encoding::decode::explicit_le::ExplicitVRLittleEndianDecoder; use dicom_encoding::decode::implicit_le::ImplicitVRLittleEndianDecoder; - use dicom_encoding::text::DefaultCharacterSetCodec; + use dicom_encoding::text::SpecificCharacterSet; use dicom_parser::StatefulDecode; use dicom_parser::StatefulDecoder; @@ -324,7 +324,7 @@ mod tests { // Create a stateful reader for the data let decoder = ImplicitVRLittleEndianDecoder::default(); - let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let text = SpecificCharacterSet::Default; let mut cursor = std::io::Cursor::new(data_in); let mut parser = StatefulDecoder::new( &mut cursor, @@ -348,7 +348,7 @@ mod tests { MaybeValue::Unloaded => panic!("element should be loaded"), MaybeValue::PixelSequence { .. } => unreachable!("element is not a pixel sequence"), MaybeValue::Loaded { value, dirty } => { - assert_eq!(value.to_clean_str().unwrap(), "Doe^John"); + assert_eq!(value.to_str().unwrap(), "Doe^John"); assert_eq!(dirty, false); } } @@ -371,7 +371,7 @@ mod tests { // Create a stateful reader for the data let decoder = ImplicitVRLittleEndianDecoder::default(); - let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let text = SpecificCharacterSet::Default; let mut cursor = std::io::Cursor::new(data_in); let mut parser = StatefulDecoder::new( &mut cursor, @@ -398,7 +398,7 @@ mod tests { MaybeValue::Unloaded => panic!("element should be loaded"), MaybeValue::PixelSequence { .. } => unreachable!("element is not a pixel sequence"), MaybeValue::Loaded { value, dirty } => { - assert_eq!(value.to_clean_str().unwrap(), "Doe^John"); + assert_eq!(value.to_str().unwrap(), "Doe^John"); assert_eq!(dirty, false); } } @@ -433,7 +433,7 @@ mod tests { // Create a stateful reader for the data let decoder = ExplicitVRLittleEndianDecoder::default(); - let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let text = SpecificCharacterSet::Default; let mut cursor = std::io::Cursor::new(DATA_IN); let mut parser = StatefulDecoder::new( &mut cursor, diff --git a/object/src/lazy/mod.rs b/object/src/lazy/mod.rs index 325bebfba..25c1926a4 100644 --- a/object/src/lazy/mod.rs +++ b/object/src/lazy/mod.rs @@ -8,37 +8,27 @@ use dicom_dictionary_std::StandardDataDictionary; use dicom_transfer_syntax_registry::TransferSyntaxRegistry; -use smallvec::SmallVec; +use std::collections::BTreeMap; use std::fs::File; -use std::io::{BufReader, Read}; +use std::io::Read; use std::path::Path; -use std::cell::RefCell; -use std::{collections::BTreeMap, io::Seek, io::SeekFrom}; -use crate::DicomObject; -use crate::lazy::record::{DataSetRecord, DataSetRecordBuilder, DataSetTableBuilder}; +use crate::lazy::record::DataSetTableBuilder; use crate::{meta::FileMetaTable, util::ReadSeek, FileDicomObject}; -use dicom_core::header::{HasLength, Header}; -use dicom_core::value::{Value, C}; -use dicom_core::{ - dictionary::{DataDictionary, DictionaryEntry}, - DataElementHeader, DicomValue, -}; -use dicom_core::{DataElement, Length, Tag, VR}; -use dicom_encoding::text::{SpecificCharacterSet, TextCodec}; +use dicom_core::dictionary::DataDictionary; +use dicom_core::header::HasLength; +use dicom_core::{Length, Tag}; +use dicom_encoding::text::SpecificCharacterSet; use dicom_encoding::transfer_syntax::TransferSyntaxIndex; +use dicom_parser::DynStatefulDecoder; use dicom_parser::{ dataset::lazy_read::LazyDataSetReader, stateful::decode::Error as StatefulDecodeError, }; use dicom_parser::{dataset::read::Error as ParserError, StatefulDecode}; -use dicom_parser::{ - dataset::{DataToken, LazyDataToken}, - DynStatefulDecoder, -}; -use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; +use snafu::{Backtrace, ResultExt, Snafu}; use self::element::LoadedValue; -pub use self::element::{MaybeElement, LazyNestedObject, MaybeValue}; +pub use self::element::{LazyNestedObject, MaybeElement, MaybeValue}; use self::record::{DataSetTable, RecordBuildingDataSetReader}; pub(crate) mod element; @@ -108,11 +98,11 @@ pub enum Error { token: dicom_parser::dataset::LazyDataTokenRepr, backtrace: Backtrace, }, - #[snafu(display("Pixel data fragment #{} was expected to be loaded, but was not", index))] - UnloadedFragment { - index: u32, - backtrace: Backtrace, - }, + #[snafu(display( + "Pixel data fragment #{} was expected to be loaded, but was not", + index + ))] + UnloadedFragment { index: u32, backtrace: Backtrace }, /// Premature data set end PrematureEnd { backtrace: Backtrace }, #[snafu(display("No such data element with tag {}", tag))] @@ -164,7 +154,6 @@ where ::Reader: ReadSeek, D: Clone + DataDictionary, { - pub fn to_value(self) -> Result> { self.elem.load(self.source)?; @@ -201,25 +190,24 @@ impl LazyFileDicomObject { } = options; let path = path.as_ref(); - let mut file = File::open(path).with_context(|| OpenFile { filename: path })?; + let mut file = File::open(path).with_context(|_| OpenFileSnafu { filename: path })?; // skip preamble { let mut buf = [0u8; 128]; // skip the preamble file.read_exact(&mut buf) - .with_context(|| ReadFile { filename: path })?; + .with_context(|_| ReadFileSnafu { filename: path })?; } // read metadata header - let meta = FileMetaTable::from_reader(&mut file).context(ParseMetaDataSet)?; + let meta = FileMetaTable::from_reader(&mut file).context(ParseMetaDataSetSnafu)?; // read rest of data according to metadata, feed it to object if let Some(ts) = ts_index.get(&meta.transfer_syntax) { let cs = SpecificCharacterSet::Default; let dataset = - LazyDataSetReader::new_with_dictionary(file, dictionary.clone(), ts, cs) - .context(CreateParser)?; + LazyDataSetReader::new_with_ts_cs(file, ts, cs).context(CreateParserSnafu)?; let mut builder = DataSetTableBuilder::new(); let mut entries = BTreeMap::new(); @@ -245,7 +233,7 @@ impl LazyFileDicomObject { }, }) } else { - UnsupportedTransferSyntax { + UnsupportedTransferSyntaxSnafu { uid: meta.transfer_syntax, } .fail() @@ -258,21 +246,18 @@ where S: StatefulDecode, ::Reader: ReadSeek, { - pub fn read_dataset(reader: LazyDataSetReader) -> Result { Self::read_dataset_with(reader, StandardDataDictionary) } } - impl LazyDicomObject where S: StatefulDecode, ::Reader: ReadSeek, D: DataDictionary, { - - pub fn read_dataset_with(reader: LazyDataSetReader, dict: D) -> Result { + pub fn read_dataset_with(reader: LazyDataSetReader, dict: D) -> Result { todo!() } @@ -280,22 +265,16 @@ where let source = &mut self.source; self.entries .get_mut(&tag) - .ok_or_else(|| NoSuchDataElementTag { tag }.build()) - .map(move |elem| LazyElement { - source, - elem, - }) + .ok_or_else(|| NoSuchDataElementTagSnafu { tag }.build()) + .map(move |elem| LazyElement { source, elem }) } pub fn element_mut<'a>(&'a mut self, tag: Tag) -> Result> { let source = &mut self.source; self.entries .get_mut(&tag) - .ok_or_else(|| NoSuchDataElementTag { tag }.build()) - .map(move |elem| LazyElement { - source, - elem, - }) + .ok_or_else(|| NoSuchDataElementTagSnafu { tag }.build()) + .map(move |elem| LazyElement { source, elem }) } } @@ -305,10 +284,9 @@ where ::Reader: ReadSeek, D: DataDictionary, { - /// Build an object by consuming a data set parser. fn build_object( - dataset: &mut RecordBuildingDataSetReader, + dataset: &mut RecordBuildingDataSetReader, entries: &mut BTreeMap>, dict: D, in_item: bool, @@ -340,14 +318,8 @@ mod tests { use super::*; use byteordered::Endianness; - use dicom_core::{ - dicom_value, - header::{DataElementHeader, Length, VR}, - }; - use dicom_encoding::{ - decode::{basic::BasicDecoder, implicit_le::ImplicitVRLittleEndianDecoder}, - text::DefaultCharacterSetCodec, - }; + use dicom_core::{dicom_value, header::VR, DataElement, DicomValue}; + use dicom_encoding::decode::{basic::BasicDecoder, implicit_le::ImplicitVRLittleEndianDecoder}; use dicom_parser::StatefulDecoder; #[test] @@ -360,7 +332,7 @@ mod tests { ]; let decoder = ImplicitVRLittleEndianDecoder::default(); - let text = Box::new(DefaultCharacterSetCodec) as Box<_>; + let text = SpecificCharacterSet::Default; let mut cursor = Cursor::new(&data_in[..]); let parser = StatefulDecoder::new( &mut cursor, @@ -378,9 +350,8 @@ mod tests { DicomValue::new(dicom_value!(Strs, ["Doe^John"])), ); - let lazy_patient_name = obj.element(Tag(0x0010, 0x0010)).expect("Failed to retrieve element"); - - + let lazy_patient_name = obj + .element(Tag(0x0010, 0x0010)) + .expect("Failed to retrieve element"); } - } diff --git a/object/src/lazy/record.rs b/object/src/lazy/record.rs index 7227244bb..9a6afb965 100644 --- a/object/src/lazy/record.rs +++ b/object/src/lazy/record.rs @@ -5,13 +5,13 @@ //! can be obtained from a parser //! by creating a [`DataSetTableBuilder`] //! and invoking [`update`] on each token. -//! +//! //! [`update`]: DataSetTableBuilder::update //! use std::{collections::BTreeMap, iter::FromIterator}; -use dicom_core::{value::C, DataDictionary, DataElementHeader, Length, Tag}; +use dicom_core::{value::C, DataElementHeader, Length, Tag}; use dicom_parser::{ dataset::{lazy_read::LazyDataSetReader, LazyDataToken}, StatefulDecode, @@ -265,7 +265,7 @@ pub struct RecordBuildingDataSetReader<'a, S> { reader: LazyDataSetReader, } -impl<'a, S, D> RecordBuildingDataSetReader<'a, S> +impl<'a, S> RecordBuildingDataSetReader<'a, S> where S: StatefulDecode, { @@ -307,7 +307,7 @@ mod tests { use dicom_core::{DataElementHeader, Length, Tag, VR}; use dicom_encoding::{ decode::{basic::LittleEndianBasicDecoder, explicit_le::ExplicitVRLittleEndianDecoder}, - text::DefaultCharacterSetCodec, + text::SpecificCharacterSet, }; use dicom_parser::{dataset::lazy_read::LazyDataSetReader, StatefulDecoder}; @@ -323,7 +323,7 @@ mod tests { source, ExplicitVRLittleEndianDecoder::default(), LittleEndianBasicDecoder::default(), - Box::new(DefaultCharacterSetCodec::default()) as Box<_>, + SpecificCharacterSet::Default, ); let mut dataset_reader = LazyDataSetReader::new(stateful_decoder);