Skip to content

Commit

Permalink
Merge pull request #859 from deepmodeling/zjgemi
Browse files Browse the repository at this point in the history
fix: optimize memory usage in HDF5Dataset
  • Loading branch information
zjgemi authored Sep 11, 2024
2 parents 7edb39a + 3fa4046 commit 0f2bcf5
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
9 changes: 7 additions & 2 deletions src/dflow/python/opio.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@ class NestedDictPath(NestedDictBase):


class HDF5Dataset:
def __init__(self, dataset):
self.dataset = dataset
def __init__(self, file, key):
self.file = file
self.key = key

@property
def dataset(self):
return self.file[self.key]

def __deepcopy__(self, memo=None):
return self
Expand Down
2 changes: 1 addition & 1 deletion src/dflow/python/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def handle_input_artifact(name, sign, slices=None, data_root="/tmp",
res = None
for path in path_object:
f = h5py.File(path, "r")
datasets = {k: HDF5Dataset(f[k]) for k in f.keys()}
datasets = {k: HDF5Dataset(f, k) for k in f.keys()}
datasets = expand(datasets)
if isinstance(datasets, list):
if res is None:
Expand Down

0 comments on commit 0f2bcf5

Please sign in to comment.