diff --git a/notebooks/states_selected_emmisions_areas_per_person.ipynb b/notebooks/states_selected_emmisions_areas_per_person.ipynb
new file mode 100644
index 0000000..0ab1303
--- /dev/null
+++ b/notebooks/states_selected_emmisions_areas_per_person.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "bfd2060f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import eurostat"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "624bbaeb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "year = 2020\n",
+ "area_filter = ['TOTX4_MEMONIA','CRF1A1', 'CRF1A3', 'CRF1D1A', 'CRF2', 'CRF1A4', 'CRF1A2', 'CRF3', 'CRF5']\n",
+ "states_dict = {'Rakousko':'AT', \n",
+ " 'Belgie':'BE', \n",
+ " 'Bulharsko':'BG',\n",
+ " 'Kypr':'CY',\n",
+ " 'Česká Republika':'CZ',\n",
+ " 'Německo':'DE',\n",
+ " 'Dánsko':'DK',\n",
+ " 'Estonsko':'EE',\n",
+ " 'Španělsko':'ES',\n",
+ " 'Finsko':'FI',\n",
+ " 'Francie':'FR',\n",
+ " 'Chorvatsko':'HR',\n",
+ " 'Maďarsko':'HU',\n",
+ " 'Irsko':'IE',\n",
+ " 'Itálie':'IT',\n",
+ " 'Litva':'LT',\n",
+ " 'Lucembursko':'LU',\n",
+ " 'Lotyšsko':'LV',\n",
+ " 'Malta':'MT',\n",
+ " 'Nizozemsko':'NL',\n",
+ " 'Polsko':'PL',\n",
+ " 'Portugalsko':'PT',\n",
+ " 'Rumunsko':'RO',\n",
+ " 'Švédsko':'SE',\n",
+ " 'Slovinsko':'SI',\n",
+ " 'Slovensko':'SK',}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b2fe39d6",
+ "metadata": {},
+ "source": [
+ "Potřebuješ přidat ty sektory, ze kterých pak tvoříš ty obdelníčky \n",
+ "- celkové emise: TOTX4_MEMONIA\n",
+ "- Energetika: CRF1A1\n",
+ "- Doprava: [CRF1A3 + CRF1D1A]\n",
+ "- Průmyslové procesy: CRF2\n",
+ "- Domácnosti a instituce: CRF1A4\n",
+ "- Spalování v průmyslu: CRF1A2\n",
+ "- Zemědělství: CRF3\n",
+ "- Odpadové hospodářství: CRF5\n",
+ "- Jiné: rozdíl do celku\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "611b4bbe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# tohle už dělá to co od toho potřebuju :)\n",
+ "def _get_basic_emis_data(year, states_dict, area_filter):\n",
+ " df_sorce_emis = eurostat.get_data_df('env_air_gge')\n",
+ " df_basic_emis = df_sorce_emis.rename(columns={'geo\\\\time': 'geo'})\n",
+ " df_basic_emis = df_basic_emis.query(\"src_crf == @area_filter and unit == 'MIO_T' and airpol == 'GHG'\") # používám tady ten area_filter dobře?\n",
+ " df_basic_emis = df_basic_emis.loc[df_basic_emis['geo'].isin(states_dict.values())] # add states according to states_dict\n",
+ " df_basic_emis = df_basic_emis[[\"src_crf\",\"geo\", year]]\n",
+ " return df_basic_emis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "eb81d455",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "df_basic_emis = _get_basic_emis_data(year, states_dict, area_filter)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "4de2db54",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " src_crf | \n",
+ " geo | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 13369 | \n",
+ " CRF1A1 | \n",
+ " AT | \n",
+ " 8.80730 | \n",
+ "
\n",
+ " \n",
+ " 13370 | \n",
+ " CRF1A1 | \n",
+ " BE | \n",
+ " 19.01476 | \n",
+ "
\n",
+ " \n",
+ " 13371 | \n",
+ " CRF1A1 | \n",
+ " BG | \n",
+ " 18.24960 | \n",
+ "
\n",
+ " \n",
+ " 13373 | \n",
+ " CRF1A1 | \n",
+ " CY | \n",
+ " 3.03325 | \n",
+ "
\n",
+ " \n",
+ " 13374 | \n",
+ " CRF1A1 | \n",
+ " CZ | \n",
+ " 41.60333 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " src_crf geo 2020\n",
+ "13369 CRF1A1 AT 8.80730\n",
+ "13370 CRF1A1 BE 19.01476\n",
+ "13371 CRF1A1 BG 18.24960\n",
+ "13373 CRF1A1 CY 3.03325\n",
+ "13374 CRF1A1 CZ 41.60333"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_basic_emis.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f8aaa42b",
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " src_crf | \n",
+ " geo | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 13369 | \n",
+ " CRF1A1 | \n",
+ " AT | \n",
+ " 8.80730 | \n",
+ "
\n",
+ " \n",
+ " 13513 | \n",
+ " CRF1A2 | \n",
+ " AT | \n",
+ " 10.54950 | \n",
+ "
\n",
+ " \n",
+ " 13801 | \n",
+ " CRF1A3 | \n",
+ " AT | \n",
+ " 21.18303 | \n",
+ "
\n",
+ " \n",
+ " 14193 | \n",
+ " CRF1A4 | \n",
+ " AT | \n",
+ " 9.02680 | \n",
+ "
\n",
+ " \n",
+ " 14624 | \n",
+ " CRF1D1A | \n",
+ " AT | \n",
+ " 1.05298 | \n",
+ "
\n",
+ " \n",
+ " 14765 | \n",
+ " CRF2 | \n",
+ " AT | \n",
+ " 15.48929 | \n",
+ "
\n",
+ " \n",
+ " 16259 | \n",
+ " CRF3 | \n",
+ " AT | \n",
+ " 6.96425 | \n",
+ "
\n",
+ " \n",
+ " 18408 | \n",
+ " CRF5 | \n",
+ " AT | \n",
+ " 1.20924 | \n",
+ "
\n",
+ " \n",
+ " 19196 | \n",
+ " TOTX4_MEMONIA | \n",
+ " AT | \n",
+ " 74.64500 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " src_crf geo 2020\n",
+ "13369 CRF1A1 AT 8.80730\n",
+ "13513 CRF1A2 AT 10.54950\n",
+ "13801 CRF1A3 AT 21.18303\n",
+ "14193 CRF1A4 AT 9.02680\n",
+ "14624 CRF1D1A AT 1.05298\n",
+ "14765 CRF2 AT 15.48929\n",
+ "16259 CRF3 AT 6.96425\n",
+ "18408 CRF5 AT 1.20924\n",
+ "19196 TOTX4_MEMONIA AT 74.64500"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_basic_emis.query(\"geo == 'AT'\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b9bc9ed",
+ "metadata": {},
+ "source": [
+ "POSTUP\n",
+ "- spoj řádky [CRF1A3 + CRF1D1A] pro kolonku doprava\n",
+ "- přidat kolonku \"jině\" = dopočíst rozdíl do celku"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8390c02b",
+ "metadata": {},
+ "source": [
+ "ODTUD zatím jen tupě přejaté"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "0d8c1f4a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# from EU_emissions_ntb\n",
+ "# get demography data\n",
+ "def _get_demo_data(year, states_dict):\n",
+ " \"\"\"Import data from Eurostat,\n",
+ " rename columns, select certain data.\n",
+ " \"\"\"\n",
+ " df_sorce_demo = eurostat.get_data_df('demo_pjan')\n",
+ " df_demo = df_sorce_demo.rename(columns={'geo\\\\time': 'geo'})\n",
+ " df_demo = df_demo.query(\"age == 'TOTAL' and sex == 'T'\")\n",
+ " df_demo = df_demo.loc[df_demo['geo'].isin(states_dict.values())] # add states according to states_dict\n",
+ " df_demo = df_demo[[\"geo\", year]]\n",
+ " return df_demo"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1f2ea5db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# from EU_emissions_ntb\n",
+ "# merge dataframes\n",
+ "def _merge_df(year, df_emis, df_demo, states_dict):\n",
+ " \"\"\"merge dataframes,\n",
+ " adjust dataframe.\n",
+ " \"\"\"\n",
+ " # merge df\n",
+ " df_merged = df_emis.merge(df_demo, on='geo', suffixes=('_emis', '_popul')) # \"suffixes\" rename columns \n",
+ " \n",
+ " # adjust df\n",
+ " df_merged['em_per_capita'] = (df_merged[f'{year}_emis'] * 1000000) / df_merged[f'{year}_popul']\n",
+ " df_merged['country'] = states_dict.keys()\n",
+ " df_merged = df_merged.set_index('country')\n",
+ " df_merged = df_merged.sort_values(f'{year}_emis', ascending = False)\n",
+ " return df_merged"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "9608923c",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name '_get_emis_data' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32mC:\\Users\\MATJ~1\\AppData\\Local\\Temp/ipykernel_19408/1988500784.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf_emis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_emis_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstates_dict\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# tohle už tu ted není - nahrad \"df_basic_emis\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mdf_demo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_demo_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstates_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mdf_merged\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_merge_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_emis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_demo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstates_dict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mdf_merged\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mNameError\u001b[0m: name '_get_emis_data' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "df_emis = _get_emis_data(year, states_dict) # tohle už tu ted není - nahrad \"df_basic_emis\"\n",
+ "df_demo = _get_demo_data(year, states_dict)\n",
+ "df_merged = _merge_df(year, df_emis, df_demo, states_dict)\n",
+ "\n",
+ "df_merged"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae3e3aef",
+ "metadata": {},
+ "source": [
+ "Jiné možná k něčemu použitelné"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "135fad48",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_basic_emis['src_crf'].unique()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}