Skip to content

Commit

Permalink
Make use of AUTOMATION env var consistent across demo2 notebooks (#167)
Browse files Browse the repository at this point in the history
* These changes make use of AUTOMATION env var consistent across notebooks.
* Restore previous contents so we can fix CL1->CL2 references and Demo3 refactor as separate PRs.
* Run notebooks preserving output cells (using samples_1 instead of samples_145 S3 pipeline_run folder).
  • Loading branch information
MichaelTiemannOSC authored Jul 5, 2022
1 parent 00e8734 commit 8690c44
Show file tree
Hide file tree
Showing 6 changed files with 1,413 additions and 633 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The following demos provide examples of how to use the tools available with [Ope
* [Ingest raw data from S3 as tables on Trino](notebooks/demo1/demo1-create-tables.ipynb)
* [Run SQL queries from a Jupyter Notebook environment](notebooks/demo1/demo1-join-tables.ipynb)
* [Demo 1 Elyra Pipeline](https://github.com/os-climate/aicoe-osc-demo/blob/master/notebooks/demo1/demo1.pipeline)
* [Results visualized on a Superset Dashboard](https://superset-secure-odh-superset.apps.odh-cl1.apps.os-climate.org/superset/dashboard/3/)
* [Results visualized on a Superset Dashboard](https://superset-secure-odh-superset.apps.odh-cl1.apps.os-climate.org/superset/dashboard/3)
* [Video on creating Elyra Pipelines and Superset Dashboard](https://youtu.be/TFgsR7UlcHA)


Expand Down
2 changes: 1 addition & 1 deletion notebooks/demo2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
BASE_INFER_KPI_FOLDER = DATA_FOLDER / "infer_KPI"

CHECKPOINT_S3_PREFIX = "aicoe-osc-demo/saved_models"
DATA_S3_PREFIX = "aicoe-osc-demo/pipeline_run/samples_145"
DATA_S3_PREFIX = "aicoe-osc-demo/pipeline_run/samples_1"
BASE_PDF_S3_PREFIX = f"{DATA_S3_PREFIX}/pdfs"
BASE_ANNOTATION_S3_PREFIX = f"{DATA_S3_PREFIX}/annotations"
BASE_EXTRACTION_S3_PREFIX = f"{DATA_S3_PREFIX}/extraction"
Expand Down
157 changes: 75 additions & 82 deletions notebooks/demo2/create_results_table.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@
"outputs": [],
"source": [
"# Load credentials\n",
"dotenv_dir = \"/opt/app-root/src/aicoe-osc-demo\"\n",
"dotenv_dir = os.environ.get(\n",
" \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n",
")\n",
"dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n",
"if os.path.exists(dotenv_path):\n",
" load_dotenv(dotenv_path=dotenv_path, override=True)"
Expand Down Expand Up @@ -106,7 +108,13 @@
"source": [
"if os.getenv(\"AUTOMATION\"):\n",
" if not os.path.exists(config.BASE_INFER_KPI_FOLDER):\n",
" pathlib.Path(config.BASE_INFER_KPI_FOLDER).mkdir(parents=True, exist_ok=True)"
" pathlib.Path(config.BASE_INFER_KPI_FOLDER).mkdir(parents=True, exist_ok=True)\n",
"\n",
" # Download a sample dataset file from s3\n",
" s3c.download_files_in_prefix_to_dir(\n",
" s3_prefix=config.BASE_INFER_KPI_S3_PREFIX,\n",
" destination_dir=config.BASE_INFER_KPI_FOLDER\n",
" )"
]
},
{
Expand Down Expand Up @@ -151,66 +159,66 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>413749035_Eversource Energy_2019-12-31</td>\n",
" <td>sustainability-report-2019</td>\n",
" <td>In which year was the annual report or the sus...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2019</td>\n",
" <td>7</td>\n",
" <td>• Our core utility operations performed very w...</td>\n",
" <td>3</td>\n",
" <td>This report focuses on the sustainability topi...</td>\n",
" <td>Text</td>\n",
" <td>13.372849</td>\n",
" <td>-10.76948</td>\n",
" <td>-25.76948</td>\n",
" <td>12.819071</td>\n",
" <td>-11.384018</td>\n",
" <td>-26.384018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>413749035_Eversource Energy_2019-12-31</td>\n",
" <td>sustainability-report-2019</td>\n",
" <td>In which year was the annual report or the sus...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2019</td>\n",
" <td>34</td>\n",
" <td>The American Council for an Energy-Efficient E...</td>\n",
" <td>2018</td>\n",
" <td>7</td>\n",
" <td>According to IPCC’s 1.5 C report from 2018 and...</td>\n",
" <td>Text</td>\n",
" <td>12.66205</td>\n",
" <td>-9.417558</td>\n",
" <td>-24.417558</td>\n",
" <td>12.50875</td>\n",
" <td>-6.967497</td>\n",
" <td>-21.967497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>413749035_Eversource Energy_2019-12-31</td>\n",
" <td>sustainability-report-2019</td>\n",
" <td>In which year was the annual report or the sus...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2019</td>\n",
" <td>12</td>\n",
" <td>The Eversource Internal Audit Department perfo...</td>\n",
" <td>26</td>\n",
" <td>Equinor Sustainability report 2019 High value ...</td>\n",
" <td>Text</td>\n",
" <td>12.373636</td>\n",
" <td>-10.899869</td>\n",
" <td>-25.899869</td>\n",
" <td>12.427496</td>\n",
" <td>-9.680325</td>\n",
" <td>-24.680325</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>413749035_Eversource Energy_2019-12-31</td>\n",
" <td>sustainability-report-2019</td>\n",
" <td>In which year was the annual report or the sus...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2019</td>\n",
" <td>118</td>\n",
" <td>These are referenced throughout our 2019 Susta...</td>\n",
" <td>8</td>\n",
" <td>Equinor Sustainability report 2019Low carbon —...</td>\n",
" <td>Text</td>\n",
" <td>12.245757</td>\n",
" <td>-10.556628</td>\n",
" <td>-25.556628</td>\n",
" <td>12.356202</td>\n",
" <td>-8.748007</td>\n",
" <td>-23.748007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>413749035_Eversource Energy_2019-12-31</td>\n",
" <td>sustainability-report-2019</td>\n",
" <td>What is the annual total production from coal?</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>no_answer</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>Text</td>\n",
" <td>2.720188</td>\n",
" <td>2.840454</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
Expand All @@ -219,32 +227,32 @@
"</div>"
],
"text/plain": [
" pdf_name \\\n",
"0 413749035_Eversource Energy_2019-12-31 \n",
"1 413749035_Eversource Energy_2019-12-31 \n",
"2 413749035_Eversource Energy_2019-12-31 \n",
"3 413749035_Eversource Energy_2019-12-31 \n",
"4 413749035_Eversource Energy_2019-12-31 \n",
" pdf_name \\\n",
"0 sustainability-report-2019 \n",
"1 sustainability-report-2019 \n",
"2 sustainability-report-2019 \n",
"3 sustainability-report-2019 \n",
"4 sustainability-report-2019 \n",
"\n",
" kpi kpi_id answer page \\\n",
"0 In which year was the annual report or the sus... <NA> 2019 7 \n",
"1 In which year was the annual report or the sus... <NA> 2019 34 \n",
"2 In which year was the annual report or the sus... <NA> 2019 12 \n",
"3 In which year was the annual report or the sus... <NA> 2019 118 \n",
"0 In which year was the annual report or the sus... <NA> 2019 3 \n",
"1 In which year was the annual report or the sus... <NA> 2018 7 \n",
"2 In which year was the annual report or the sus... <NA> 2019 26 \n",
"3 In which year was the annual report or the sus... <NA> 2019 8 \n",
"4 What is the annual total production from coal? <NA> no_answer <NA> \n",
"\n",
" paragraph source score \\\n",
"0 • Our core utility operations performed very w... Text 13.372849 \n",
"1 The American Council for an Energy-Efficient E... Text 12.66205 \n",
"2 The Eversource Internal Audit Department perfo... Text 12.373636 \n",
"3 These are referenced throughout our 2019 Susta... Text 12.245757 \n",
"4 <NA> Text 2.720188 \n",
"0 This report focuses on the sustainability topi... Text 12.819071 \n",
"1 According to IPCC’s 1.5 C report from 2018 and... Text 12.50875 \n",
"2 Equinor Sustainability report 2019 High value ... Text 12.427496 \n",
"3 Equinor Sustainability report 2019Low carbon —... Text 12.356202 \n",
"4 <NA> Text 2.840454 \n",
"\n",
" no_ans_score no_answer_score_plus_boost \n",
"0 -10.76948 -25.76948 \n",
"1 -9.417558 -24.417558 \n",
"2 -10.899869 -25.899869 \n",
"3 -10.556628 -25.556628 \n",
"0 -11.384018 -26.384018 \n",
"1 -6.967497 -21.967497 \n",
"2 -9.680325 -24.680325 \n",
"3 -8.748007 -23.748007 \n",
"4 <NA> <NA> "
]
},
Expand All @@ -254,12 +262,6 @@
}
],
"source": [
"# Download a sample dataset file from s3\n",
"s3c.download_files_in_prefix_to_dir(\n",
" s3_prefix=config.BASE_INFER_KPI_S3_PREFIX,\n",
" destination_dir=config.BASE_INFER_KPI_FOLDER\n",
")\n",
"\n",
"all_files = glob.glob(str(config.BASE_INFER_KPI_FOLDER / \"*.csv\"))\n",
"list_of_files = []\n",
"\n",
Expand Down Expand Up @@ -315,22 +317,22 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 689 entries, 0 to 688\n",
"RangeIndex: 96 entries, 0 to 95\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 pdf_name 689 non-null string \n",
" 1 kpi 689 non-null string \n",
" 0 pdf_name 96 non-null string \n",
" 1 kpi 96 non-null string \n",
" 2 kpi_id 0 non-null Int64 \n",
" 3 answer 689 non-null string \n",
" 4 page 555 non-null Int64 \n",
" 5 paragraph 555 non-null string \n",
" 6 source 689 non-null string \n",
" 7 score 689 non-null Float64\n",
" 8 no_ans_score 555 non-null Float64\n",
" 9 no_answer_score_plus_boost 555 non-null Float64\n",
" 3 answer 96 non-null string \n",
" 4 page 79 non-null Int64 \n",
" 5 paragraph 79 non-null string \n",
" 6 source 96 non-null string \n",
" 7 score 96 non-null Float64\n",
" 8 no_ans_score 79 non-null Float64\n",
" 9 no_answer_score_plus_boost 79 non-null Float64\n",
"dtypes: Float64(3), Int64(2), string(5)\n",
"memory usage: 57.3 KB\n"
"memory usage: 8.1 KB\n"
]
}
],
Expand Down Expand Up @@ -359,15 +361,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"200\n",
"200\n",
"200\n",
"200\n",
"200\n",
"200\n",
"200\n",
"200\n",
"200\n",
"200\n"
]
}
Expand Down Expand Up @@ -464,16 +457,16 @@
{
"data": {
"text/plain": [
"['sustainability-report-2019',\n",
"['90044053_Fisher & Paykel Hl_2017-11-07',\n",
" 'In which year was the annual report or the sustainability report published?',\n",
" None,\n",
" '2019',\n",
" 26,\n",
" 'Equinor Sustainability report 2019 High value — creating shared value',\n",
" '2017',\n",
" 2,\n",
" 'Corporate Responsibility and Sustainability Report 2017Fisher & Paykel Healthcare Corporation Limited',\n",
" 'Text',\n",
" 12.427505493164062,\n",
" -9.680328369140623,\n",
" -24.680328369140625]"
" 11.549626350402832,\n",
" -8.787019729614258,\n",
" -23.787019729614254]"
]
},
"execution_count": 11,
Expand Down Expand Up @@ -514,7 +507,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.8.8"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 8690c44

Please sign in to comment.