From 1ca86cd01d841dba9e5a71e47bc8878bfe1988f1 Mon Sep 17 00:00:00 2001 From: Adam Theisen Date: Wed, 12 Apr 2023 10:34:01 -0500 Subject: [PATCH] Doi (#657) * ENH: AirNow API must have updating some algorithms. What it's displaying now is what I would have assumed would be the closed reported station. * ENH: Adding feature for users to be able to get the citation for a datastream. Also adds text after data are done downloading from ARM to please reference the citation of the datastream * ENH: Changing doi.org to just doi --- act/discovery/__init__.py | 2 +- act/discovery/get_armfiles.py | 56 +++++++++++++++++++++++++++++++++++ act/tests/test_discovery.py | 15 ++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py index 9268ec1faa..f3318ab0ce 100644 --- a/act/discovery/__init__.py +++ b/act/discovery/__init__.py @@ -10,7 +10,7 @@ __name__, submodules=['get_armfiles', 'get_cropscape', 'get_airnow', 'get_noaa_psl', 'get_neon'], submod_attrs={ - 'get_armfiles': ['download_data'], + 'get_armfiles': ['download_data', 'get_arm_doi'], 'get_asos': ['get_asos'], 'get_airnow': ['get_airnow_bounded_obs', 'get_airnow_obs', 'get_airnow_forecast'], 'get_cropscape': ['croptype'], diff --git a/act/discovery/get_armfiles.py b/act/discovery/get_armfiles.py index 1bbae4c373..3d730212e1 100644 --- a/act/discovery/get_armfiles.py +++ b/act/discovery/get_armfiles.py @@ -8,6 +8,8 @@ import os import sys from datetime import timedelta +import requests +import textwrap try: from urllib.request import urlopen @@ -166,4 +168,58 @@ def download_data(username, token, datastream, startdate, enddate, time=None, ou 'No files returned or url status error.\n' 'Check datastream name, start, and end date.' ) + # Get ARM DOI and print it out + doi = get_arm_doi(datastream, start_datetime.strftime('%Y-%m-%d'), end_datetime.strftime('%Y-%m-%d')) + print('\nIf you use these data to prepare a publication, please cite:\n') + print(textwrap.fill(doi, width=80)) + print('') + return file_names + + +def get_arm_doi(datastream, startdate, enddate): + """ + This function will return a citation with DOI, if available, for specified + datastream and date range + + Parameters + ---------- + datastream : str + The name of the datastream to get a DOI for. This must be ARM standard names + startdate : str + Start date for the citation in the format YY-MM-DD + enddate : str + End date for the citation in the format YY-MM-DD + + Returns + ------- + doi : str + Returns the citation as a string + + """ + + site = datastream[0:3] + level = datastream.split('.')[-1] + + # Get the instrument class code from the datastream name + metadata_url = 'https://adc.arm.gov/solr8/metadata/select?q=datastream%3A' + datastream + r = requests.get(url=metadata_url) + response = r.json()['response'] + if len(response['docs']) == 0: + raise ValueError('Check parameters') + response = response['docs'][0] + inst_class = response['instrument_class_code'] + + # Get the DOI information + doi_url = 'https://adc.arm.gov/citationservice/citation/inst-class?id=' + inst_class + '&citationType=apa' + doi_url += '&site=' + site + doi_url += '&dataLevel=' + level + doi_url += '&startDate=' + startdate + doi_url += '&endDate=' + enddate + doi = requests.get(url=doi_url) + if len(doi.text) > 0: + doi = doi.json()['citation'] + else: + doi = 'N/A' + + return doi diff --git a/act/tests/test_discovery.py b/act/tests/test_discovery.py index c945839ca5..51fe97de1a 100644 --- a/act/tests/test_discovery.py +++ b/act/tests/test_discovery.py @@ -273,3 +273,18 @@ def test_neon(): assert len(result) == 40 assert any('readme' in r for r in result) assert any('sensor_position' in r for r in result) + + +def test_arm_doi(): + datastream = 'sgpmetE13.b1' + startdate = '2022-01-01' + enddate = '2022-12-31' + doi = act.discovery.get_arm_doi(datastream, startdate, enddate) + + assert len(doi) > 10 + assert isinstance(doi, str) + assert 'doi' in doi + assert 'Kyrouac' in doi + + with np.testing.assert_raises(ValueError): + doi = act.discovery.get_arm_doi('test', startdate, enddate)