Skip to content

Commit

Permalink
Merge branch 'release/2.6.0' into feature/issue-189
Browse files Browse the repository at this point in the history
  • Loading branch information
sliu008 authored Sep 15, 2023
2 parents cf35bbe + 5a336d6 commit ce49371
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 31 deletions.
51 changes: 49 additions & 2 deletions .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ jobs:
run: |
poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
- name: SonarCloud Scan
id: sonarcloud
uses: sonarsource/sonarcloud-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
with:
args: >
-Dsonar.organization=${{ github.repository_owner }}
-Dsonar.projectKey=${{ github.repository_owner }}_l2ss-py
-Dsonar.python.coverage.reportPaths=build/reports/coverage.xml
-Dsonar.sources=podaac/
-Dsonar.tests=tests/
-Dsonar.projectName=l2ss-py
-Dsonar.projectVersion=${{ env.software_version }}
-Dsonar.python.version=3.8,3.9,3.10
continue-on-error: true
- name: Wait to retry sonarcloud scan
if: steps.sonarcloud.outcome == 'failure'
run: |
sleep 40
- name: SonarCloud Scan Retry
id: sonarcloud-retry
if: steps.sonarcloud.outcome == 'failure'
uses: sonarsource/sonarcloud-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down Expand Up @@ -142,19 +165,43 @@ jobs:
git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
git push origin "${{ env.software_version }}"
- name: Publish UMM-S with new version
id: publish-umm-s
uses: podaac/[email protected]
if: |
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release')
with:
umm-s-json: 'cmr/l2ss_cmr_umm_s.json'
umm-json: 'cmr/l2ss_cmr_umm_s.json'
provider: 'POCLOUD'
env: ${{ env.venue }}
version: ${{ env.software_version }}
timeout: 60
disable_removal: 'true'
umm_type: 'umm-s'
use_associations: 'false'
env:
LAUNCHPAD_TOKEN_SIT: ${{secrets.LAUNCHPAD_TOKEN_SIT}}
LAUNCHPAD_TOKEN_UAT: ${{secrets.LAUNCHPAD_TOKEN_UAT}}
LAUNCHPAD_TOKEN_OPS: ${{secrets.LAUNCHPAD_TOKEN_OPS}}
continue-on-error: true
- name: Wait to retry publishing umm-s
if: steps.publish-umm-s.outcome == 'failure'
run: |
sleep 120
- name: Publish UMM-S with new version retry
id: publish-umm-s-retry
uses: podaac/[email protected]
if: |
steps.publish-umm-s.outcome == 'failure'
with:
umm-json: 'cmr/l2ss_cmr_umm_s.json'
provider: 'POCLOUD'
env: ${{ env.venue }}
version: ${{ env.software_version }}
timeout: 60
disable_removal: 'true'
umm_type: 'umm-s'
use_associations: 'false'
use_associations: 'false'
env:
LAUNCHPAD_TOKEN_SIT: ${{secrets.LAUNCHPAD_TOKEN_SIT}}
LAUNCHPAD_TOKEN_UAT: ${{secrets.LAUNCHPAD_TOKEN_UAT}}
Expand Down
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [issue/182](https://github.com/podaac/l2ss-py/issues/182): Update code so doesn't remove '/' on attribute values.
- [issue/178](https://github.com/podaac/l2ss-py/issues/178): Add function to make sure dimension in subset is same as original file
- Update github action umm updater to 0.5.0
- [issue/172](https://github.com/podaac/l2ss-py/issues/178): Fix shapefile subsetting by passing correct variable to subset function.
### Deprecated
### Removed
### Fixed
- [issue/119](https://github.com/podaac/l2ss-py/issues/119): GPM variable dimensions are renamed from "phony_dim" to the dimension names in the variable attribute "DimensionNames"
- [issue/184](https://github.com/podaac/l2ss-py/issues/184): boundary box argument at the command line is changed to allow decimal numbers (i.e., floats) for coordinates
- [issue/189](https://github.com/podaac/l2ss-py/issues/189): Fix temporal subsetting for SWOT collections, use mask_and_scale args for opening granule file if we have an overflow in time fill value, use original dataset encoding when writing file.

- [issue/194](https://github.com/podaac/l2ss-py/issues/194): Return coordinate variables if requested in a variable subset
### Security


Expand Down
9 changes: 8 additions & 1 deletion podaac/subsetter/group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,18 @@ def walk_h5py(data_new, group):
walk_h5py(data_new, data_new.name)

# Get the instrument name from the file attributes
instrument = data_new['__HDFEOS__ADDITIONAL__FILE_ATTRIBUTES'].attrs['InstrumentName'].decode("utf-8")

additional_file_attributes = data_new.get('__HDFEOS__ADDITIONAL__FILE_ATTRIBUTES')
instrument = ""

if additional_file_attributes:
instrument = additional_file_attributes.attrs['InstrumentName'].decode("utf-8")
if 'OMI' in instrument:
hdf_type = 'OMI'
elif 'MLS' in instrument:
hdf_type = 'MLS'
else:
hdf_type = None

for del_group in del_group_list:
del data_new[del_group]
Expand Down
2 changes: 1 addition & 1 deletion podaac/subsetter/run_subsetter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def parse_args(args: list) -> tuple:
)
parser.add_argument(
'--bbox',
type=int,
type=float,
default=[-180, -90, 180, 90],
nargs=4,
action='store',
Expand Down
72 changes: 50 additions & 22 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import operator
import os
from itertools import zip_longest
from typing import List, Tuple, Union
from typing import List, Optional, Tuple, Union
import traceback
import dateutil
from dateutil import parser

Expand Down Expand Up @@ -744,7 +745,7 @@ def build_cond(str_timestamp, compare):
return temporal_cond


def get_base_group_names(lats): # pylint: disable=too-many-branches
def get_base_group_names(lats: List[str]) -> Tuple[List[str], List[Union[int, str]]]: # pylint: disable=too-many-branches
"""Latitude groups may be at different depths. This function gets the level
number that makes each latitude group unique from the other latitude names"""
unique_groups = []
Expand All @@ -756,7 +757,7 @@ def get_base_group_names(lats): # pylint: disable=too-many-branches
# put the groups in the same levels in the same list
group_list_transpose = np.array(group_list).T.tolist()

diff_count = ['' for i in range(len(group_list))]
diff_count = ['' for _ in range(len(group_list))]
group_count = 0
# loop through each group level
for my_list in group_list_transpose:
Expand Down Expand Up @@ -788,7 +789,7 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches
lat_var_names: list,
lon_var_names: list,
time_var_names: list,
variables=None,
variables: Optional[List[str]] = None,
bbox: np.ndarray = None,
cut: bool = True,
min_time: str = None,
Expand All @@ -806,6 +807,8 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches
Name of the longitude variables in the given dataset
time_var_names : list
Name of the time variables in the given dataset
variables : list[str]
List of variables to include in the result
bbox : np.array
Spatial bounding box to subset Dataset with.
cut : bool
Expand Down Expand Up @@ -836,13 +839,6 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches
else:
unique_groups = [f'{GROUP_DELIM}{GROUP_DELIM.join(x.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1])}' for x in lat_var_names]

# get unique group names for latitude coordinates
diff_count = [-1]
if len(lat_var_names) > 1:
unique_groups, diff_count = get_base_group_names(lat_var_names)
else:
unique_groups = [f'{GROUP_DELIM}{GROUP_DELIM.join(x.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1])}' for x in lat_var_names]

datasets = []
total_list = [] # don't include repeated variables
for lat_var_name, lon_var_name, time_var_name, diffs in zip( # pylint: disable=too-many-nested-blocks
Expand All @@ -853,6 +849,11 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches

if diffs == -1: # if the lat name is in the root group: take only the root group vars
group_vars = list(dataset.data_vars.keys())
# include the coordinate variables if user asks for
group_vars.extend([
var for var in list(dataset.coords.keys())
if var in variables and var not in group_vars
])
else:
group_vars = [
var for var in dataset.data_vars.keys()
Expand All @@ -861,13 +862,20 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches
# include variables that aren't in a latitude group
if variables:
group_vars.extend([
var for var in dataset.data_vars.keys()
if var in variables and var not in group_vars and var not in total_list and not var.startswith(tuple(unique_groups))
var for var in dataset.variables.keys()
if (var in variables and
var not in group_vars and
var not in total_list and
not var.startswith(tuple(unique_groups))
)
])
else:
group_vars.extend([
var for var in dataset.data_vars.keys()
if var not in group_vars and var not in total_list and not var.startswith(tuple(unique_groups))
if (var not in group_vars and
var not in total_list and
not var.startswith(tuple(unique_groups))
)
])

# group dimensions do not get carried over if unused by data variables (MLS nTotalTimes var)
Expand Down Expand Up @@ -1065,6 +1073,33 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
xarray.coding.times.decode_cf_datetime = decode_cf_datetime


def open_dataset_test(file, args):
"""
Open a NetCDF dataset using xarray, handling specific exceptions.
This function attempts to open a NetCDF dataset using the provided arguments.
If an OverflowError with a specific message is encountered, it modifies the
'mask_and_scale' argument to True and retries opening the dataset.
Args:
file (str): Path to the NetCDF file.
args (dict): Dictionary of arguments to pass to xr.open_dataset.
Returns:
None: The function modifies the 'args' dictionary in place.
"""
try:
test_xr_open = xr.open_dataset(file, **args)
test_xr_open.close()
except Exception: # pylint: disable=broad-except
traceback_str = traceback.format_exc()

# Check for the specific OverflowError message
if "Python int too large to convert to C long" in traceback_str and "Failed to decode variable 'time': unable to decode time units" in traceback_str:
args["mask_and_scale"] = True


def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
variables: Union[List[str], str, None] = (),
# pylint: disable=too-many-branches, disable=too-many-statements
Expand Down Expand Up @@ -1162,14 +1197,7 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,

if min_time or max_time:
args['decode_times'] = True
# check fill value and dtype, we know that this will cause an integer Overflow with xarray
if 'time' in nc_dataset.variables.keys():
try:
if nc_dataset['time'].getncattr('_FillValue') == nc.default_fillvals.get('f8') and \
nc_dataset['time'].dtype == 'float64':
args['mask_and_scale'] = True
except AttributeError:
pass
open_dataset_test(file_to_subset, args)

with xr.open_dataset(
xr.backends.NetCDF4DataStore(nc_dataset),
Expand Down
2 changes: 1 addition & 1 deletion podaac/subsetter/subset_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def process_item(self, item: pystac.Item, source: harmony.message.Source) -> pys
harmony_bbox = message.subset.bbox

if message.subset and message.subset.shape:
subset_params['shapefile_path'] = download(
subset_params['shapefile'] = download(
message.subset.shape.href,
temp_dir,
logger=self.logger,
Expand Down
2 changes: 1 addition & 1 deletion podaac/subsetter/xarray_enhancements.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_indexers_from_1d(cond: xr.Dataset) -> dict:

def get_indexers_from_nd(cond: xr.Dataset, cut: bool) -> dict:
"""
Get indexers from a dataset with more than 1 dimensions.
Get indexers from a dataset with more than one dimension.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

[tool.poetry]
name = "l2ss-py"
version = "2.6.0rc2"
version = "2.6.0rc11"
description = "L2 Subsetter Service"
authors = ["podaac-tva <[email protected]>"]
license = "Apache-2.0"
Expand Down
4 changes: 3 additions & 1 deletion tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2141,7 +2141,7 @@ def test_tropomi_utc_time(data_dir, subset_output_dir, request):
"""Verify that the time UTC values are conserved in S5P files"""
trop_dir = join(data_dir, 'tropomi')
trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
variable = ['/PRODUCT/time_utc']
variable = ['/PRODUCT/time_utc', '/PRODUCT/corner']
bbox = np.array(((-180, 180), (-90, 90)))
output_file = "{}_{}".format(request.node.name, trop_file)
shutil.copyfile(
Expand All @@ -2161,6 +2161,8 @@ def test_tropomi_utc_time(data_dir, subset_output_dir, request):
assert in_nc_dataset.groups['PRODUCT'].variables['time_utc'][:].squeeze()[0] ==\
out_nc_dataset.groups['PRODUCT'].variables['time_utc'][:].squeeze()[0]

assert out_nc_dataset.groups['PRODUCT'].variables['corner']

def test_bad_time_unit(subset_output_dir):
"""TODO: give this function a description
"""
Expand Down

0 comments on commit ce49371

Please sign in to comment.