From c99cbb5dc40c18be9c49e106f31855d2df2df155 Mon Sep 17 00:00:00 2001 From: sliu008 <69875423+sliu008@users.noreply.github.com> Date: Mon, 28 Aug 2023 11:38:30 -0700 Subject: [PATCH 01/24] Feature/update umm updater (#188) * /version 2.7.0a0 * update umm updater * remove conflicts * minor fix to umm updater parameters --------- Co-authored-by: l2ss-py bot --- .github/workflows/build-pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml index afadcb94..76d54454 100644 --- a/.github/workflows/build-pipeline.yml +++ b/.github/workflows/build-pipeline.yml @@ -147,7 +147,7 @@ jobs: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') with: - umm-s-json: 'cmr/l2ss_cmr_umm_s.json' + umm-json: 'cmr/l2ss_cmr_umm_s.json' provider: 'POCLOUD' env: ${{ env.venue }} version: ${{ env.software_version }} From f378c799de99c6b93dc20084a7a085923def8574 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Mon, 28 Aug 2023 18:54:02 +0000 Subject: [PATCH 02/24] /version 2.6.0rc3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 013f9b79..b9025c10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc2" +version = "2.6.0rc3" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From b95d543ecdc2d70d3a4365bb9fba214c81b29b1c Mon Sep 17 00:00:00 2001 From: Curtis Banh <30607061+cqbanh@users.noreply.github.com> Date: Mon, 28 Aug 2023 19:07:45 -0700 Subject: [PATCH 03/24] added retry duplicate step - publish-umm-s --- .github/workflows/build-pipeline.yml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml index 76d54454..eb6a5f0f 100644 --- a/.github/workflows/build-pipeline.yml +++ b/.github/workflows/build-pipeline.yml @@ -142,6 +142,7 @@ jobs: git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}" git push origin "${{ env.software_version }}" - name: Publish UMM-S with new version + id: publish-umm-s uses: podaac/cmr-umm-updater@0.5.0 if: | github.ref == 'refs/heads/main' || @@ -154,7 +155,26 @@ jobs: timeout: 60 disable_removal: 'true' umm_type: 'umm-s' - use_associations: 'false' + use_associations: 'false' + env: + LAUNCHPAD_TOKEN_SIT: ${{secrets.LAUNCHPAD_TOKEN_SIT}} + LAUNCHPAD_TOKEN_UAT: ${{secrets.LAUNCHPAD_TOKEN_UAT}} + LAUNCHPAD_TOKEN_OPS: ${{secrets.LAUNCHPAD_TOKEN_OPS}} + continue-on-error: true + - name: Publish UMM-S with new version retry + id: publish-umm-s-retry + uses: podaac/cmr-umm-updater@0.5.0 + if: | + steps.publish-umm-s.outcome == 'failure' + with: + umm-json: 'cmr/l2ss_cmr_umm_s.json' + provider: 'POCLOUD' + env: ${{ env.venue }} + version: ${{ env.software_version }} + timeout: 60 + disable_removal: 'true' + umm_type: 'umm-s' + use_associations: 'false' env: LAUNCHPAD_TOKEN_SIT: ${{secrets.LAUNCHPAD_TOKEN_SIT}} LAUNCHPAD_TOKEN_UAT: ${{secrets.LAUNCHPAD_TOKEN_UAT}} From 9ef5bfa58c9704f60c6b97935ae140de5c1fd269 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 29 Aug 2023 02:46:13 +0000 Subject: [PATCH 04/24] /version 2.6.0rc4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b9025c10..8fa580e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc3" +version = "2.6.0rc4" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 0a1029c36b0cb45839bb64465b755f0201fa5a41 Mon Sep 17 00:00:00 2001 From: Curtis Banh <30607061+cqbanh@users.noreply.github.com> Date: Mon, 28 Aug 2023 22:51:54 -0700 Subject: [PATCH 05/24] added retry step for sonarcloud --- .github/workflows/build-pipeline.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml index eb6a5f0f..ea8a7b62 100644 --- a/.github/workflows/build-pipeline.yml +++ b/.github/workflows/build-pipeline.yml @@ -86,6 +86,25 @@ jobs: run: | poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/ - name: SonarCloud Scan + id: sonarcloud + uses: sonarsource/sonarcloud-github-action@master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + with: + args: > + -Dsonar.organization=${{ github.repository_owner }} + -Dsonar.projectKey=${{ github.repository_owner }}_l2ss-py + -Dsonar.python.coverage.reportPaths=build/reports/coverage.xml + -Dsonar.sources=podaac/ + -Dsonar.tests=tests/ + -Dsonar.projectName=l2ss-py + -Dsonar.projectVersion=${{ env.software_version }} + -Dsonar.python.version=3.8,3.9,3.10 + continue-on-error: true + - name: SonarCloud Scan Retry + id: sonarcloud-retry + if: steps.sonarcloud.outcome == 'failure' uses: sonarsource/sonarcloud-github-action@master env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 06b3a8e50dafd160b16ae49089c8c3c865e3c0de Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 29 Aug 2023 06:08:38 +0000 Subject: [PATCH 06/24] /version 2.6.0rc5 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8fa580e5..65d87e4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc4" +version = "2.6.0rc5" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 8383e999f16e52bf1c1b2ef638a689731ebbf758 Mon Sep 17 00:00:00 2001 From: Curtis Banh <30607061+cqbanh@users.noreply.github.com> Date: Mon, 28 Aug 2023 23:28:31 -0700 Subject: [PATCH 07/24] added wait time before retrying failed steps --- .github/workflows/build-pipeline.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml index ea8a7b62..02c39db4 100644 --- a/.github/workflows/build-pipeline.yml +++ b/.github/workflows/build-pipeline.yml @@ -102,6 +102,10 @@ jobs: -Dsonar.projectVersion=${{ env.software_version }} -Dsonar.python.version=3.8,3.9,3.10 continue-on-error: true + - name: Wait to retry sonarcloud scan + if: steps.sonarcloud.outcome == 'failure' + run: | + sleep 20 - name: SonarCloud Scan Retry id: sonarcloud-retry if: steps.sonarcloud.outcome == 'failure' @@ -180,6 +184,10 @@ jobs: LAUNCHPAD_TOKEN_UAT: ${{secrets.LAUNCHPAD_TOKEN_UAT}} LAUNCHPAD_TOKEN_OPS: ${{secrets.LAUNCHPAD_TOKEN_OPS}} continue-on-error: true + - name: Wait to retry publishing umm-s + if: steps.publish-umm-s.outcome == 'failure' + run: | + sleep 20 - name: Publish UMM-S with new version retry id: publish-umm-s-retry uses: podaac/cmr-umm-updater@0.5.0 From 54f833f8970961ff1261d33b52f157511503330a Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 29 Aug 2023 06:42:23 +0000 Subject: [PATCH 08/24] /version 2.6.0rc6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 65d87e4e..88e52766 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc5" +version = "2.6.0rc6" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From abc793107c32c9da380638a2a7e0731848a39b96 Mon Sep 17 00:00:00 2001 From: sliu008 <69875423+sliu008@users.noreply.github.com> Date: Tue, 5 Sep 2023 11:37:54 -0700 Subject: [PATCH 09/24] Feature/issue 172 (#193) * fix shapefile subsetting * update changelog --- CHANGELOG.md | 1 + podaac/subsetter/subset_harmony.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c7c881c..10cb9396 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - [issue/182](https://github.com/podaac/l2ss-py/issues/182): Update code so doesn't remove '/' on attribute values. - [issue/178](https://github.com/podaac/l2ss-py/issues/178): Add function to make sure dimension in subset is same as original file +- [issue/172](https://github.com/podaac/l2ss-py/issues/178): Fix shapefile subsetting by passing correct variable to subset function. ### Deprecated ### Removed ### Fixed diff --git a/podaac/subsetter/subset_harmony.py b/podaac/subsetter/subset_harmony.py index d120f1cd..7dfc84ef 100644 --- a/podaac/subsetter/subset_harmony.py +++ b/podaac/subsetter/subset_harmony.py @@ -136,7 +136,7 @@ def process_item(self, item: pystac.Item, source: harmony.message.Source) -> pys harmony_bbox = message.subset.bbox if message.subset and message.subset.shape: - subset_params['shapefile_path'] = download( + subset_params['shapefile'] = download( message.subset.shape.href, temp_dir, logger=self.logger, From 4687f9888f82c35a4793b9f23fbba29d400cd8af Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 5 Sep 2023 18:56:34 +0000 Subject: [PATCH 10/24] /version 2.7.0a2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4c382252..f0301e8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.7.0a1" +version = "2.7.0a2" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 87dd39a1d52b7d745fecb47066035a83d3d90231 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 5 Sep 2023 19:49:55 +0000 Subject: [PATCH 11/24] /version 2.6.0rc7 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 88e52766..f9f08f54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc6" +version = "2.6.0rc7" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From a9d0229584020bb20122420e8f4189519f1398ce Mon Sep 17 00:00:00 2001 From: Curtis Banh <30607061+cqbanh@users.noreply.github.com> Date: Tue, 5 Sep 2023 13:34:57 -0700 Subject: [PATCH 12/24] longer wait time before retry in build --- .github/workflows/build-pipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml index 02c39db4..7ca4903f 100644 --- a/.github/workflows/build-pipeline.yml +++ b/.github/workflows/build-pipeline.yml @@ -105,7 +105,7 @@ jobs: - name: Wait to retry sonarcloud scan if: steps.sonarcloud.outcome == 'failure' run: | - sleep 20 + sleep 40 - name: SonarCloud Scan Retry id: sonarcloud-retry if: steps.sonarcloud.outcome == 'failure' @@ -187,7 +187,7 @@ jobs: - name: Wait to retry publishing umm-s if: steps.publish-umm-s.outcome == 'failure' run: | - sleep 20 + sleep 120 - name: Publish UMM-S with new version retry id: publish-umm-s-retry uses: podaac/cmr-umm-updater@0.5.0 From 4f2cf238fcd646a7b64759584c85e280d77efcb3 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 5 Sep 2023 20:49:26 +0000 Subject: [PATCH 13/24] /version 2.6.0rc8 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f9f08f54..420f2f74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc7" +version = "2.6.0rc8" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 2a08f971919753ac8dd68ff0246872b8c8a45bed Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Wed, 6 Sep 2023 23:06:52 +0000 Subject: [PATCH 14/24] /version 2.6.0rc9 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 420f2f74..f417c39c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc8" +version = "2.6.0rc9" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 6b9c9578b33c6291343e7792763f2a9a397d920a Mon Sep 17 00:00:00 2001 From: sliu008 <69875423+sliu008@users.noreply.github.com> Date: Tue, 12 Sep 2023 10:36:48 -0700 Subject: [PATCH 15/24] Feature/issue 189 (#190) * add function that test opening of granule file, and change fill value encoding * update changelog, and change exception catching when testing opening granule file * improve on keeping original encoding * fix pylint * revert encoding --- CHANGELOG.md | 2 ++ podaac/subsetter/subset.py | 57 ++++++++++++++++++++++++++------------ tests/test_subset.py | 18 ++++++++++++ 3 files changed, 60 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10cb9396..ffd028d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### Fixed - [issue/119](https://github.com/podaac/l2ss-py/issues/119): GPM variable dimensions are renamed from "phony_dim" to the dimension names in the variable attribute "DimensionNames" +- [issue/189](https://github.com/podaac/l2ss-py/issues/189): Fix temporal subsetting for SWOT collections, use mask_and_scale args for opening granule file if we have an overflow in time fill value, use original dataset encoding when writing file. + ### Security diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py index fc4c4459..9c82a947 100644 --- a/podaac/subsetter/subset.py +++ b/podaac/subsetter/subset.py @@ -25,6 +25,7 @@ import os from itertools import zip_longest from typing import List, Tuple, Union +import traceback import dateutil from dateutil import parser @@ -1065,6 +1066,33 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): xarray.coding.times.decode_cf_datetime = decode_cf_datetime +def open_dataset_test(file, args): + """ + Open a NetCDF dataset using xarray, handling specific exceptions. + + This function attempts to open a NetCDF dataset using the provided arguments. + If an OverflowError with a specific message is encountered, it modifies the + 'mask_and_scale' argument to True and retries opening the dataset. + + Args: + file (str): Path to the NetCDF file. + args (dict): Dictionary of arguments to pass to xr.open_dataset. + + Returns: + None: The function modifies the 'args' dictionary in place. + + """ + try: + test_xr_open = xr.open_dataset(file, **args) + test_xr_open.close() + except Exception: # pylint: disable=broad-except + traceback_str = traceback.format_exc() + + # Check for the specific OverflowError message + if "Python int too large to convert to C long" in traceback_str and "Failed to decode variable 'time': unable to decode time units" in traceback_str: + args["mask_and_scale"] = True + + def subset(file_to_subset: str, bbox: np.ndarray, output_file: str, variables: Union[List[str], str, None] = (), # pylint: disable=too-many-branches, disable=too-many-statements @@ -1162,10 +1190,15 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str, if min_time or max_time: args['decode_times'] = True + open_dataset_test(file_to_subset, args) + with xr.open_dataset( xr.backends.NetCDF4DataStore(nc_dataset), **args ) as dataset: + + original_dataset = dataset + lat_var_names, lon_var_names, time_var_names = get_coordinate_variable_names( dataset=dataset, lat_var_names=lat_var_names, @@ -1224,28 +1257,18 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str, lon_var_names=lon_var_names )) else: - encoding = {} - compression = {"zlib": True, "complevel": 5, "_FillValue": None} - - if (min_time or max_time) and not all( - dim_size == 1 for dim_size in dataset.dims.values()): - encoding = { - var_name: { - 'units': nc_dataset.variables[var_name].__dict__['units'], - 'zlib': True, - "complevel": 5, - "_FillValue": None - } for var_name in time_var_names - if 'units' in nc_dataset.variables[var_name].__dict__ - } for var in dataset.data_vars: - if var not in encoding: - encoding[var] = compression if dataset[var].dtype == 'S1' and isinstance(dataset[var].attrs.get('_FillValue'), bytes): dataset[var].attrs['_FillValue'] = dataset[var].attrs['_FillValue'].decode('UTF-8') + var_encoding = { + "zlib": True, + "complevel": 5, + "_FillValue": original_dataset[var].encoding.get('_FillValue') + } + data_var = dataset[var].copy() - data_var.load().to_netcdf(output_file, 'a', encoding={var: encoding.get(var)}) + data_var.load().to_netcdf(output_file, 'a', encoding={var: var_encoding}) del data_var with nc.Dataset(output_file, 'a') as dataset_attr: diff --git a/tests/test_subset.py b/tests/test_subset.py index 66c508b0..f318cfdc 100644 --- a/tests/test_subset.py +++ b/tests/test_subset.py @@ -158,6 +158,24 @@ def test_subset_variables(test_file, data_dir, subset_output_dir, request): decode_times=False, decode_coords=False) + + nc_in_ds = nc.Dataset(join(data_dir, test_file)) + nc_out_ds = nc.Dataset(join(subset_output_dir, output_file)) + + time_var_name = None + try: + lat_var_name = subset.compute_coordinate_variable_names(in_ds)[0][0] + time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name]) + except ValueError: + # unable to determine lon lat vars + pass + + if time_var_name: + assert nc_in_ds[time_var_name].units == nc_out_ds[time_var_name].units + + nc_in_ds.close() + nc_out_ds.close() + for in_var, out_var in zip(in_ds.data_vars.items(), out_ds.data_vars.items()): # compare names assert in_var[0] == out_var[0] From 8ad6d4e91f5b4fa1ea6e48d47658b549bce73266 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 12 Sep 2023 17:51:02 +0000 Subject: [PATCH 16/24] /version 2.7.0a3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f0301e8c..caa1cbc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.7.0a2" +version = "2.7.0a3" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From f573e1e37416f842bd3d7998b5af817edb02cfa6 Mon Sep 17 00:00:00 2001 From: Daniel Kaufman <114174502+danielfromearth@users.noreply.github.com> Date: Tue, 12 Sep 2023 14:07:55 -0400 Subject: [PATCH 17/24] Feature/issue 184 (#196) * grammar typo * fix parser arg type from int to float * update CHANGELOG.md --- CHANGELOG.md | 2 +- podaac/subsetter/run_subsetter.py | 2 +- podaac/subsetter/xarray_enhancements.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffd028d5..de9ed421 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,8 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### Fixed - [issue/119](https://github.com/podaac/l2ss-py/issues/119): GPM variable dimensions are renamed from "phony_dim" to the dimension names in the variable attribute "DimensionNames" +- [issue/184](https://github.com/podaac/l2ss-py/issues/184): boundary box argument at the command line is changed to allow decimal numbers (i.e., floats) for coordinates - [issue/189](https://github.com/podaac/l2ss-py/issues/189): Fix temporal subsetting for SWOT collections, use mask_and_scale args for opening granule file if we have an overflow in time fill value, use original dataset encoding when writing file. - ### Security diff --git a/podaac/subsetter/run_subsetter.py b/podaac/subsetter/run_subsetter.py index d3a499f4..45f341ce 100644 --- a/podaac/subsetter/run_subsetter.py +++ b/podaac/subsetter/run_subsetter.py @@ -35,7 +35,7 @@ def parse_args(args: list) -> tuple: ) parser.add_argument( '--bbox', - type=int, + type=float, default=[-180, -90, 180, 90], nargs=4, action='store', diff --git a/podaac/subsetter/xarray_enhancements.py b/podaac/subsetter/xarray_enhancements.py index 37028794..5930b482 100644 --- a/podaac/subsetter/xarray_enhancements.py +++ b/podaac/subsetter/xarray_enhancements.py @@ -54,7 +54,7 @@ def get_indexers_from_1d(cond: xr.Dataset) -> dict: def get_indexers_from_nd(cond: xr.Dataset, cut: bool) -> dict: """ - Get indexers from a dataset with more than 1 dimensions. + Get indexers from a dataset with more than one dimension. Parameters ---------- From 8787a22007e6ff0a9f2d84aeaef8c7709166a087 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 12 Sep 2023 18:26:55 +0000 Subject: [PATCH 18/24] /version 2.7.0a4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index caa1cbc1..026749a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.7.0a3" +version = "2.7.0a4" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 0993668ca2e2bd7164c8a63ad3b391d0ce2608ba Mon Sep 17 00:00:00 2001 From: Nick Lenssen Date: Tue, 12 Sep 2023 14:59:17 -0400 Subject: [PATCH 19/24] Feature/issue 194 (#195) * Include variables in coordinates if variable subset is true * remove unneccessary comments * remove extra whitespace * Update changelog and add a return line in group handling * add space * add/remove lines like previous PR * typo * remove duplicate code block * add type hints * remove unused indexer * add newlines for readability * use imported `List[str]` rather than `list[str]` for python 3.8 compatibility * update reading of hdf5 files * Add optional import in typing library --------- Co-authored-by: nlensse1 Co-authored-by: danielfromearth Co-authored-by: Daniel Kaufman <114174502+danielfromearth@users.noreply.github.com> Co-authored-by: Simon Liu --- CHANGELOG.md | 1 + podaac/subsetter/group_handling.py | 9 +++++++- podaac/subsetter/subset.py | 35 ++++++++++++++++++------------ tests/test_subset.py | 4 +++- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de9ed421..cc01cb38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [issue/119](https://github.com/podaac/l2ss-py/issues/119): GPM variable dimensions are renamed from "phony_dim" to the dimension names in the variable attribute "DimensionNames" - [issue/184](https://github.com/podaac/l2ss-py/issues/184): boundary box argument at the command line is changed to allow decimal numbers (i.e., floats) for coordinates - [issue/189](https://github.com/podaac/l2ss-py/issues/189): Fix temporal subsetting for SWOT collections, use mask_and_scale args for opening granule file if we have an overflow in time fill value, use original dataset encoding when writing file. +- [issue/194](https://github.com/podaac/l2ss-py/issues/194): Return coordinate variables if requested in a variable subset ### Security diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py index 63806c72..8fa78c76 100644 --- a/podaac/subsetter/group_handling.py +++ b/podaac/subsetter/group_handling.py @@ -242,11 +242,18 @@ def walk_h5py(data_new, group): walk_h5py(data_new, data_new.name) # Get the instrument name from the file attributes - instrument = data_new['__HDFEOS__ADDITIONAL__FILE_ATTRIBUTES'].attrs['InstrumentName'].decode("utf-8") + + additional_file_attributes = data_new.get('__HDFEOS__ADDITIONAL__FILE_ATTRIBUTES') + instrument = "" + + if additional_file_attributes: + instrument = additional_file_attributes.attrs['InstrumentName'].decode("utf-8") if 'OMI' in instrument: hdf_type = 'OMI' elif 'MLS' in instrument: hdf_type = 'MLS' + else: + hdf_type = None for del_group in del_group_list: del data_new[del_group] diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py index 9c82a947..fe530ca1 100644 --- a/podaac/subsetter/subset.py +++ b/podaac/subsetter/subset.py @@ -24,7 +24,7 @@ import operator import os from itertools import zip_longest -from typing import List, Tuple, Union +from typing import List, Optional, Tuple, Union import traceback import dateutil from dateutil import parser @@ -745,7 +745,7 @@ def build_cond(str_timestamp, compare): return temporal_cond -def get_base_group_names(lats): # pylint: disable=too-many-branches +def get_base_group_names(lats: List[str]) -> Tuple[List[str], List[Union[int, str]]]: # pylint: disable=too-many-branches """Latitude groups may be at different depths. This function gets the level number that makes each latitude group unique from the other latitude names""" unique_groups = [] @@ -757,7 +757,7 @@ def get_base_group_names(lats): # pylint: disable=too-many-branches # put the groups in the same levels in the same list group_list_transpose = np.array(group_list).T.tolist() - diff_count = ['' for i in range(len(group_list))] + diff_count = ['' for _ in range(len(group_list))] group_count = 0 # loop through each group level for my_list in group_list_transpose: @@ -789,7 +789,7 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches lat_var_names: list, lon_var_names: list, time_var_names: list, - variables=None, + variables: Optional[List[str]] = None, bbox: np.ndarray = None, cut: bool = True, min_time: str = None, @@ -807,6 +807,8 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches Name of the longitude variables in the given dataset time_var_names : list Name of the time variables in the given dataset + variables : list[str] + List of variables to include in the result bbox : np.array Spatial bounding box to subset Dataset with. cut : bool @@ -837,13 +839,6 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches else: unique_groups = [f'{GROUP_DELIM}{GROUP_DELIM.join(x.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1])}' for x in lat_var_names] - # get unique group names for latitude coordinates - diff_count = [-1] - if len(lat_var_names) > 1: - unique_groups, diff_count = get_base_group_names(lat_var_names) - else: - unique_groups = [f'{GROUP_DELIM}{GROUP_DELIM.join(x.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1])}' for x in lat_var_names] - datasets = [] total_list = [] # don't include repeated variables for lat_var_name, lon_var_name, time_var_name, diffs in zip( # pylint: disable=too-many-nested-blocks @@ -854,6 +849,11 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches if diffs == -1: # if the lat name is in the root group: take only the root group vars group_vars = list(dataset.data_vars.keys()) + # include the coordinate variables if user asks for + group_vars.extend([ + var for var in list(dataset.coords.keys()) + if var in variables and var not in group_vars + ]) else: group_vars = [ var for var in dataset.data_vars.keys() @@ -862,13 +862,20 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches # include variables that aren't in a latitude group if variables: group_vars.extend([ - var for var in dataset.data_vars.keys() - if var in variables and var not in group_vars and var not in total_list and not var.startswith(tuple(unique_groups)) + var for var in dataset.variables.keys() + if (var in variables and + var not in group_vars and + var not in total_list and + not var.startswith(tuple(unique_groups)) + ) ]) else: group_vars.extend([ var for var in dataset.data_vars.keys() - if var not in group_vars and var not in total_list and not var.startswith(tuple(unique_groups)) + if (var not in group_vars and + var not in total_list and + not var.startswith(tuple(unique_groups)) + ) ]) # group dimensions do not get carried over if unused by data variables (MLS nTotalTimes var) diff --git a/tests/test_subset.py b/tests/test_subset.py index f318cfdc..b3f71ee4 100644 --- a/tests/test_subset.py +++ b/tests/test_subset.py @@ -2141,7 +2141,7 @@ def test_tropomi_utc_time(data_dir, subset_output_dir, request): """Verify that the time UTC values are conserved in S5P files""" trop_dir = join(data_dir, 'tropomi') trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4' - variable = ['/PRODUCT/time_utc'] + variable = ['/PRODUCT/time_utc', '/PRODUCT/corner'] bbox = np.array(((-180, 180), (-90, 90))) output_file = "{}_{}".format(request.node.name, trop_file) shutil.copyfile( @@ -2161,6 +2161,8 @@ def test_tropomi_utc_time(data_dir, subset_output_dir, request): assert in_nc_dataset.groups['PRODUCT'].variables['time_utc'][:].squeeze()[0] ==\ out_nc_dataset.groups['PRODUCT'].variables['time_utc'][:].squeeze()[0] + assert out_nc_dataset.groups['PRODUCT'].variables['corner'] + def test_bad_time_unit(subset_output_dir): """TODO: give this function a description """ From 49abc37782c95adce2d83011d685670cf977671c Mon Sep 17 00:00:00 2001 From: sliu008 <69875423+sliu008@users.noreply.github.com> Date: Tue, 12 Sep 2023 12:00:21 -0700 Subject: [PATCH 20/24] update reading of hdf5 files (#198) From 6f9a20e6b0ca98722dfe3b6f32cebbdf42eea1c1 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 12 Sep 2023 19:17:13 +0000 Subject: [PATCH 21/24] /version 2.7.0a5 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 026749a0..d736c7ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.7.0a4" +version = "2.7.0a5" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From eabe177a0fcbfdfe43b4df0ca16c7d7a8ac7f4d1 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 12 Sep 2023 20:04:05 +0000 Subject: [PATCH 22/24] /version 2.6.0rc10 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f417c39c..879b1a48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc9" +version = "2.6.0rc10" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0" From 170302ded6a6fa26d86f397d62b1451fcfb21f9b Mon Sep 17 00:00:00 2001 From: James Wood Date: Tue, 12 Sep 2023 16:10:56 -0700 Subject: [PATCH 23/24] deploy From 5a336d6fafd36d4984fa9479612804b49c4b2dd4 Mon Sep 17 00:00:00 2001 From: l2ss-py bot Date: Tue, 12 Sep 2023 23:24:52 +0000 Subject: [PATCH 24/24] /version 2.6.0rc11 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 879b1a48..3fc33f22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ [tool.poetry] name = "l2ss-py" -version = "2.6.0rc10" +version = "2.6.0rc11" description = "L2 Subsetter Service" authors = ["podaac-tva "] license = "Apache-2.0"