Skip to content

Commit

Permalink
Fixed Group by and regex for last modified time
Browse files Browse the repository at this point in the history
  • Loading branch information
leewujung committed Sep 11, 2024
1 parent e3a5dc5 commit acd01ea
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
18 changes: 13 additions & 5 deletions echodataflow/stages/subflows/initialization_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import scipy
import torch
import xarray as xr
import re
from distributed import Client, LocalCluster
from fastapi.encoders import jsonable_encoder
from prefect import flow
Expand Down Expand Up @@ -604,13 +605,20 @@ def process_store_folder(config: Dataset, store: str, end_time: datetime):
for file in files:
try:
basename = os.path.basename(file)
date_time_str = basename.split('-')[1].split('_')[0][1:] + basename.split('-')[2].split('_')[0]
file_time = datetime.strptime(date_time_str, "%Y%m%dT%H%M%S").replace(tzinfo=timezone.utc)
relevant_files[file_time] = file
timestamps.append(file_time)
match = re.search(r'D(\d{8})-T(\d{6})', basename)
if match:
date_str = match.group(1)
time_str = match.group(2)
file_time = datetime.strptime(date_str + time_str, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc)
relevant_files[file_time] = file
timestamps.append(file_time)

except ValueError:
continue

if len(timestamps) == 0:
raise ValueError('No files detected at source')

if config.args.time_rounding_flag:
end_time = floor_time(end_time, config.args.window_mins)

Expand All @@ -619,7 +627,7 @@ def process_store_folder(config: Dataset, store: str, end_time: datetime):
start_time = start_time.replace(second=0, microsecond=0)

start_index = 0

i = 1
for i, ts in enumerate(timestamps):
if ts >= start_time:
start_index += i - 1
Expand Down
7 changes: 5 additions & 2 deletions echodataflow/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,11 @@ def fetch_slice_from_store(edf_group: Group, config: Dataset, options: Dict[str,

store_slice = store_slice.sortby('ping_time')

# Group by ping_time and take the mean to handle overlaps
store_slice = store_slice.groupby('ping_time').mean()
try:
# Group by ping_time and take the mean to handle overlaps
store_slice = store_slice.groupby('ping_time').mean()
except Exception as e:
print('Failed to group the data')

del store

Expand Down

0 comments on commit acd01ea

Please sign in to comment.