diff --git a/notebooks/states_selected_emmisions_areas_per_person.ipynb b/notebooks/states_selected_emmisions_areas_per_person.ipynb new file mode 100644 index 0000000..0ab1303 --- /dev/null +++ b/notebooks/states_selected_emmisions_areas_per_person.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "bfd2060f", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import eurostat" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "624bbaeb", + "metadata": {}, + "outputs": [], + "source": [ + "year = 2020\n", + "area_filter = ['TOTX4_MEMONIA','CRF1A1', 'CRF1A3', 'CRF1D1A', 'CRF2', 'CRF1A4', 'CRF1A2', 'CRF3', 'CRF5']\n", + "states_dict = {'Rakousko':'AT', \n", + " 'Belgie':'BE', \n", + " 'Bulharsko':'BG',\n", + " 'Kypr':'CY',\n", + " 'Česká Republika':'CZ',\n", + " 'Německo':'DE',\n", + " 'Dánsko':'DK',\n", + " 'Estonsko':'EE',\n", + " 'Španělsko':'ES',\n", + " 'Finsko':'FI',\n", + " 'Francie':'FR',\n", + " 'Chorvatsko':'HR',\n", + " 'Maďarsko':'HU',\n", + " 'Irsko':'IE',\n", + " 'Itálie':'IT',\n", + " 'Litva':'LT',\n", + " 'Lucembursko':'LU',\n", + " 'Lotyšsko':'LV',\n", + " 'Malta':'MT',\n", + " 'Nizozemsko':'NL',\n", + " 'Polsko':'PL',\n", + " 'Portugalsko':'PT',\n", + " 'Rumunsko':'RO',\n", + " 'Švédsko':'SE',\n", + " 'Slovinsko':'SI',\n", + " 'Slovensko':'SK',}" + ] + }, + { + "cell_type": "markdown", + "id": "b2fe39d6", + "metadata": {}, + "source": [ + "Potřebuješ přidat ty sektory, ze kterých pak tvoříš ty obdelníčky \n", + "- celkové emise: TOTX4_MEMONIA\n", + "- Energetika: CRF1A1\n", + "- Doprava: [CRF1A3 + CRF1D1A]\n", + "- Průmyslové procesy: CRF2\n", + "- Domácnosti a instituce: CRF1A4\n", + "- Spalování v průmyslu: CRF1A2\n", + "- Zemědělství: CRF3\n", + "- Odpadové hospodářství: CRF5\n", + "- Jiné: rozdíl do celku\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "611b4bbe", + "metadata": {}, + "outputs": [], + "source": [ + "# tohle už dělá to co od toho potřebuju :)\n", + "def _get_basic_emis_data(year, states_dict, area_filter):\n", + " df_sorce_emis = eurostat.get_data_df('env_air_gge')\n", + " df_basic_emis = df_sorce_emis.rename(columns={'geo\\\\time': 'geo'})\n", + " df_basic_emis = df_basic_emis.query(\"src_crf == @area_filter and unit == 'MIO_T' and airpol == 'GHG'\") # používám tady ten area_filter dobře?\n", + " df_basic_emis = df_basic_emis.loc[df_basic_emis['geo'].isin(states_dict.values())] # add states according to states_dict\n", + " df_basic_emis = df_basic_emis[[\"src_crf\",\"geo\", year]]\n", + " return df_basic_emis" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "eb81d455", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "df_basic_emis = _get_basic_emis_data(year, states_dict, area_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4de2db54", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
src_crfgeo2020
13369CRF1A1AT8.80730
13370CRF1A1BE19.01476
13371CRF1A1BG18.24960
13373CRF1A1CY3.03325
13374CRF1A1CZ41.60333
\n", + "
" + ], + "text/plain": [ + " src_crf geo 2020\n", + "13369 CRF1A1 AT 8.80730\n", + "13370 CRF1A1 BE 19.01476\n", + "13371 CRF1A1 BG 18.24960\n", + "13373 CRF1A1 CY 3.03325\n", + "13374 CRF1A1 CZ 41.60333" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_basic_emis.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f8aaa42b", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
src_crfgeo2020
13369CRF1A1AT8.80730
13513CRF1A2AT10.54950
13801CRF1A3AT21.18303
14193CRF1A4AT9.02680
14624CRF1D1AAT1.05298
14765CRF2AT15.48929
16259CRF3AT6.96425
18408CRF5AT1.20924
19196TOTX4_MEMONIAAT74.64500
\n", + "
" + ], + "text/plain": [ + " src_crf geo 2020\n", + "13369 CRF1A1 AT 8.80730\n", + "13513 CRF1A2 AT 10.54950\n", + "13801 CRF1A3 AT 21.18303\n", + "14193 CRF1A4 AT 9.02680\n", + "14624 CRF1D1A AT 1.05298\n", + "14765 CRF2 AT 15.48929\n", + "16259 CRF3 AT 6.96425\n", + "18408 CRF5 AT 1.20924\n", + "19196 TOTX4_MEMONIA AT 74.64500" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_basic_emis.query(\"geo == 'AT'\")" + ] + }, + { + "cell_type": "markdown", + "id": "6b9bc9ed", + "metadata": {}, + "source": [ + "POSTUP\n", + "- spoj řádky [CRF1A3 + CRF1D1A] pro kolonku doprava\n", + "- přidat kolonku \"jině\" = dopočíst rozdíl do celku" + ] + }, + { + "cell_type": "markdown", + "id": "8390c02b", + "metadata": {}, + "source": [ + "ODTUD zatím jen tupě přejaté" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0d8c1f4a", + "metadata": {}, + "outputs": [], + "source": [ + "# from EU_emissions_ntb\n", + "# get demography data\n", + "def _get_demo_data(year, states_dict):\n", + " \"\"\"Import data from Eurostat,\n", + " rename columns, select certain data.\n", + " \"\"\"\n", + " df_sorce_demo = eurostat.get_data_df('demo_pjan')\n", + " df_demo = df_sorce_demo.rename(columns={'geo\\\\time': 'geo'})\n", + " df_demo = df_demo.query(\"age == 'TOTAL' and sex == 'T'\")\n", + " df_demo = df_demo.loc[df_demo['geo'].isin(states_dict.values())] # add states according to states_dict\n", + " df_demo = df_demo[[\"geo\", year]]\n", + " return df_demo" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1f2ea5db", + "metadata": {}, + "outputs": [], + "source": [ + "# from EU_emissions_ntb\n", + "# merge dataframes\n", + "def _merge_df(year, df_emis, df_demo, states_dict):\n", + " \"\"\"merge dataframes,\n", + " adjust dataframe.\n", + " \"\"\"\n", + " # merge df\n", + " df_merged = df_emis.merge(df_demo, on='geo', suffixes=('_emis', '_popul')) # \"suffixes\" rename columns \n", + " \n", + " # adjust df\n", + " df_merged['em_per_capita'] = (df_merged[f'{year}_emis'] * 1000000) / df_merged[f'{year}_popul']\n", + " df_merged['country'] = states_dict.keys()\n", + " df_merged = df_merged.set_index('country')\n", + " df_merged = df_merged.sort_values(f'{year}_emis', ascending = False)\n", + " return df_merged" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9608923c", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name '_get_emis_data' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mC:\\Users\\MATJ~1\\AppData\\Local\\Temp/ipykernel_19408/1988500784.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf_emis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_emis_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstates_dict\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# tohle už tu ted není - nahrad \"df_basic_emis\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mdf_demo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_demo_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstates_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mdf_merged\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_merge_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_emis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_demo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstates_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mdf_merged\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mNameError\u001b[0m: name '_get_emis_data' is not defined" + ] + } + ], + "source": [ + "df_emis = _get_emis_data(year, states_dict) # tohle už tu ted není - nahrad \"df_basic_emis\"\n", + "df_demo = _get_demo_data(year, states_dict)\n", + "df_merged = _merge_df(year, df_emis, df_demo, states_dict)\n", + "\n", + "df_merged" + ] + }, + { + "cell_type": "markdown", + "id": "ae3e3aef", + "metadata": {}, + "source": [ + "Jiné možná k něčemu použitelné" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "135fad48", + "metadata": {}, + "outputs": [], + "source": [ + "df_basic_emis['src_crf'].unique()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}