diff --git a/starfile/parser.py b/starfile/parser.py index dd79654..e1a3c65 100644 --- a/starfile/parser.py +++ b/starfile/parser.py @@ -54,7 +54,8 @@ def _parse_data_block(self): self._parse_loop_block() return - elif line.startswith('data_') or self.crawler.current_line_number == self.n_lines: + elif line.startswith( + 'data_') or self.crawler.current_line_number == self.n_lines: self._parse_simple_block_from_buffer() return @@ -74,6 +75,8 @@ def _parse_loop_block(self): self.crawler.increment_line_number() header = self._parse_loop_header() df = self._parse_loop_data() + if df is None: + df = pd.DataFrame({h: None for h in header}, index=[0]) df.columns = header df.name = self._current_data_block_name self._add_dataframe(df) @@ -157,7 +160,7 @@ def _parse_loop_header(self) -> List[str]: return self.text_buffer.buffer - def _parse_loop_data(self) -> pd.DataFrame: + def _parse_loop_data(self) -> Union[pd.DataFrame, None]: self.text_buffer.clear() while self.crawler.current_line_number <= self.n_lines: @@ -167,8 +170,16 @@ def _parse_loop_data(self) -> pd.DataFrame: self.text_buffer.add_line(current_line) self.crawler.increment_line_number() - df = pd.read_csv(StringIO(self.text_buffer.as_str()), delim_whitespace=True, header=None, - comment='#') + # check whether the buffer is empty + if self.text_buffer.is_empty: + return None + + df = pd.read_csv( + StringIO(self.text_buffer.as_str()), + delim_whitespace=True, + header=None, + comment='#' + ) return df def dataframes_to_numeric(self): @@ -204,7 +215,3 @@ def dataframe_at_index(self, idx: int): def dataframes_as_list(self): return list(self.dataframes.values()) - - - - diff --git a/starfile/utils.py b/starfile/utils.py index ef17325..736847b 100644 --- a/starfile/utils.py +++ b/starfile/utils.py @@ -12,6 +12,15 @@ class TextBuffer: def __init__(self): self.buffer = deque() + @property + def is_empty(self) -> bool: + if len(self.buffer) == 0: + return True + elif len(self.buffer) <= 100: # arbitrary, avoid iterating large buffer + return all([item.strip() == '' for item in self.buffer]) + else: + return False + def clear(self): self.buffer = deque() diff --git a/tests/constants.py b/tests/constants.py index 25333a7..e75ca1c 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -20,6 +20,7 @@ non_existant_file = test_data_directory / 'non_existant_file.star' two_single_line_loop_blocks = test_data_directory / 'two_single_line_loop_blocks.star' two_basic_blocks = test_data_directory / 'two_basic_blocks.star' +empty_loop = test_data_directory / 'empty_loop.star' # Example DataFrame for testing cars = {'Brand': ['Honda_Civic', 'Toyota_Corolla', 'Ford_Focus', 'Audi_A4'], diff --git a/tests/data/empty_loop.star b/tests/data/empty_loop.star new file mode 100644 index 0000000..ff80f9f --- /dev/null +++ b/tests/data/empty_loop.star @@ -0,0 +1,6 @@ + +data_ + +loop_ +_rlnCoordinateX #1 + diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 31f245a..c5f0232 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -5,10 +5,23 @@ import pytest from starfile.parser import StarParser -from .constants import loop_simple, postprocess, pipeline, rln31_style, optimiser_2d, optimiser_3d, \ - sampling_2d, \ - sampling_3d, single_line_middle_of_multiblock, single_line_end_of_multiblock, non_existant_file, \ - loop_simple_columns, two_single_line_loop_blocks, two_basic_blocks +from .constants import ( + loop_simple, + postprocess, + pipeline, + rln31_style, + optimiser_2d, + optimiser_3d, + sampling_2d, + sampling_3d, + single_line_middle_of_multiblock, + single_line_end_of_multiblock, + non_existant_file, + loop_simple_columns, + two_single_line_loop_blocks, + two_basic_blocks, + empty_loop, +) from .utils import generate_large_star_file, remove_large_star_file, million_row_file @@ -59,9 +72,10 @@ def test_read_multiblock_file(): assert isinstance(df, pd.DataFrame) assert s.dataframes['general'].shape == (1, 6) - assert all(['rlnFinalResolution', 'rlnBfactorUsedForSharpening', 'rlnUnfilteredMapHalf1', - 'rlnUnfilteredMapHalf2', 'rlnMaskName', 'rlnRandomiseFrom'] - == s.dataframes['general'].columns) + assert all( + ['rlnFinalResolution', 'rlnBfactorUsedForSharpening', 'rlnUnfilteredMapHalf1', + 'rlnUnfilteredMapHalf2', 'rlnMaskName', 'rlnRandomiseFrom'] + == s.dataframes['general'].columns) assert s.dataframes['fsc'].shape == (49, 7) assert s.dataframes['guinier'].shape == (49, 3) @@ -197,3 +211,9 @@ def test_two_basic_blocks(): assert len(parser.dataframes) == 2 for df in parser.dataframes.values(): assert df.shape == (1, 3) + + +def test_empty_loop_block(): + """Parsing an empty loop block should return an empty dataframe.""" + parser = StarParser(empty_loop) + assert len(parser.dataframes) == 1