From 0bfa202efebb0ee01962d346f8871e0413136208 Mon Sep 17 00:00:00 2001 From: Clayton O'Dell Date: Fri, 6 Dec 2024 13:52:46 -0500 Subject: [PATCH] Updated 12 example notebooks to run on Colab. Fixes #385 Signed-off-by: Clayton O'Dell --- examples/demo_FACTS.ipynb | 1566 +++--- examples/demo_gerryfair.ipynb | 1746 +++--- examples/demo_json_explainers.ipynb | 554 +- examples/demo_lfr.ipynb | 919 ++-- examples/demo_mdss_classifier_metric.ipynb | 2533 ++++----- examples/demo_mdss_detector.ipynb | 3256 ++++++------ examples/demo_meta_classifier.ipynb | 937 ++-- examples/demo_ot_metric.ipynb | 2798 +++++----- .../demo_reject_option_classification.ipynb | 1582 +++--- examples/demo_reweighing_preproc.ipynb | 1915 +++---- examples/demo_short_gerryfair_test.ipynb | 369 +- examples/tutorial_medical_expenditure.ipynb | 4667 +++++++++-------- 12 files changed, 12046 insertions(+), 10796 deletions(-) diff --git a/examples/demo_FACTS.ipynb b/examples/demo_FACTS.ipynb index a99d26ef..7541cc86 100644 --- a/examples/demo_FACTS.ipynb +++ b/examples/demo_FACTS.ipynb @@ -1,775 +1,847 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fairness auditing for subgroups using Fairness Aware Counterfactuals for Subgroups (FACTS).\n", - "\n", - "[FACTS](https://arxiv.org/abs/2306.14978) is an efficient, model-agnostic, highly parameterizable, and explainable framework for auditing subgroup fairness through counterfactual explanations. FACTS focuses on identifying a specific type of bias, i.e. the *difficulty in achieving recourse*. In short, it focuses on the population that has obtained the unfavorable outcome (*affected population*) by a ML model and tries to identify differences in the difficulty of changing the ML model's decision to obtain the favorable outcome, between affected subpopulations.\n", - "\n", - "In this notebook, we will see how to use this algorithm for discovering subgroups where the bias of a model (logistic regression for simplicity) between Males and Females is high.\n", - "\n", - "We will use the Adult dataset from UCI ([reference](https://archive.ics.uci.edu/ml/datasets/adult))." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Preliminaries" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import dependencies\n", - "\n", - "As usual in python, the first step is to import all necessary packages." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ + "cells": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:No module named 'tempeh': fetch_lawschool_gpa will be unavailable. To install, run:\n", - "pip install 'aif360[LawSchoolGPA]'\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "\n", - "from aif360.sklearn.datasets.openml_datasets import fetch_adult\n", - "from aif360.sklearn.detectors.facts.clean import clean_dataset\n", - "from aif360.sklearn.detectors.facts import FACTS, FACTS_bias_scan\n", - "\n", - "from IPython.display import display\n", - "\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below, you can change the `random_seed` variable to `None` if you would like for the pseudo-random parts to actually change between runs. We have set it to a specific value for reproducibility." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "random_seed = 131313 # for reproducibility" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_FACTS.ipynb)\n" + ], + "metadata": { + "id": "3VmYWXk6gRbj" + } + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ageworkclasseducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countryincome
0(16.999, 26.0]Private7.0Never-marriedMachine-op-inspctOwn-childBlackMale0.00.0FullTimeUnited-States0
1(34.0, 41.0]Private9.0Married-civ-spouseFarming-fishingMarriedWhiteMale0.00.0OverTimeUnited-States0
2(26.0, 34.0]Local-gov12.0Married-civ-spouseProtective-servMarriedWhiteMale0.00.0FullTimeUnited-States1
3(41.0, 50.0]Private10.0Married-civ-spouseMachine-op-inspctMarriedBlackMale7688.00.0FullTimeUnited-States1
4(26.0, 34.0]Private6.0Never-marriedOther-serviceNot-in-familyWhiteMale0.00.0MidTimeUnited-States0
\n", - "
" + "cell_type": "markdown", + "metadata": { + "id": "qHiNaF-VgPB4" + }, + "source": [ + "# Fairness auditing for subgroups using Fairness Aware Counterfactuals for Subgroups (FACTS).\n", + "\n", + "[FACTS](https://arxiv.org/abs/2306.14978) is an efficient, model-agnostic, highly parameterizable, and explainable framework for auditing subgroup fairness through counterfactual explanations. FACTS focuses on identifying a specific type of bias, i.e. the *difficulty in achieving recourse*. In short, it focuses on the population that has obtained the unfavorable outcome (*affected population*) by a ML model and tries to identify differences in the difficulty of changing the ML model's decision to obtain the favorable outcome, between affected subpopulations.\n", + "\n", + "In this notebook, we will see how to use this algorithm for discovering subgroups where the bias of a model (logistic regression for simplicity) between Males and Females is high.\n", + "\n", + "We will use the Adult dataset from UCI ([reference](https://archive.ics.uci.edu/ml/datasets/adult))." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4cavLAfZgPB5" + }, + "source": [ + "# Preliminaries" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1FLtToZmgPB6" + }, + "source": [ + "## Import dependencies\n", + "\n", + "As usual in python, the first step is to import all necessary packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qz_p2JzegPB6", + "outputId": "dc1206fb-14b9-4ac7-ce74-0f2d83ce6a61" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:No module named 'tempeh': fetch_lawschool_gpa will be unavailable. To install, run:\n", + "pip install 'aif360[LawSchoolGPA]'\n" + ] + } ], - "text/plain": [ - " age workclass education-num marital-status \\\n", - "0 (16.999, 26.0] Private 7.0 Never-married \n", - "1 (34.0, 41.0] Private 9.0 Married-civ-spouse \n", - "2 (26.0, 34.0] Local-gov 12.0 Married-civ-spouse \n", - "3 (41.0, 50.0] Private 10.0 Married-civ-spouse \n", - "4 (26.0, 34.0] Private 6.0 Never-married \n", - "\n", - " occupation relationship race sex capital-gain capital-loss \\\n", - "0 Machine-op-inspct Own-child Black Male 0.0 0.0 \n", - "1 Farming-fishing Married White Male 0.0 0.0 \n", - "2 Protective-serv Married White Male 0.0 0.0 \n", - "3 Machine-op-inspct Married Black Male 7688.0 0.0 \n", - "4 Other-service Not-in-family White Male 0.0 0.0 \n", - "\n", - " hours-per-week native-country income \n", - "0 FullTime United-States 0 \n", - "1 OverTime United-States 0 \n", - "2 FullTime United-States 1 \n", - "3 FullTime United-States 1 \n", - "4 MidTime United-States 0 " + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "\n", + "from aif360.sklearn.datasets.openml_datasets import fetch_adult\n", + "from aif360.sklearn.detectors.facts.clean import clean_dataset\n", + "from aif360.sklearn.detectors.facts import FACTS, FACTS_bias_scan\n", + "\n", + "from IPython.display import display\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# load the adult dataset and perform some simple preprocessing steps\n", - "# See output for a glimpse of the final dataset's characteristics\n", - "X, y, sample_weight = fetch_adult()\n", - "data = clean_dataset(X.assign(income=y), \"adult\")\n", - "display(data.head())\n", - "\n", - "# split into train-test data\n", - "y = data['income']\n", - "X = data.drop('income', axis=1)\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=random_seed, stratify=y)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Example Model to be used for Auditing\n", - "\n", - "We use the train set to train a simple logistic regression model. This will serve as the demonstrative model, which we will then treat as a black box and apply our algorithm.\n", - "\n", - "Of course, any model can be used in its place. Our purpose here is not to produce a good model, but to audit the fairness of an existing one." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "#### here, we incrementally build the example model. It consists of one preprocessing step,\n", - "#### which is to turn categorical features into the respective one-hot encodings, and\n", - "#### a simple scikit-learn logistic regressor.\n", - "categorical_features = X.select_dtypes(include=[\"object\", \"category\"]).columns.to_list()\n", - "categorical_features_onehot_transformer = ColumnTransformer(\n", - " transformers=[\n", - " (\"one-hot-encoder\", OneHotEncoder(), categorical_features)\n", - " ],\n", - " remainder=\"passthrough\"\n", - ")\n", - "model = Pipeline([\n", - " (\"one-hot-encoder\", categorical_features_onehot_transformer),\n", - " (\"clf\", LogisticRegression(max_iter=1500))\n", - "])\n", - "\n", - "#### train the model\n", - "model = model.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy = 85.16%\n" - ] - } - ], - "source": [ - "# showcase model's accuracy\n", - "y_pred = model.predict(X_test)\n", - "print(f\"Accuracy = {(y_test.values == y_pred).sum() / y_test.shape[0]:.2%}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A Practical Example of FACTS\n", - "\n", - "The real essence of our work starts here. Specifically, we showcase the generation of candidate subpopulation groups and counterfactuals and the detection of those groups that exhibit the greatest unfairness, with respect to one of several metrics." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load and Fit FACTS" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# load FACTS framework with:\n", - "# - the model to be audited\n", - "# - protected attribute \"sex\" and\n", - "# - assigning equal, unit weights to all features for cost computation.\n", - "# - no features forbidden from changing, i.e. user can specify any features that cannot change at all.\n", - "detector = FACTS(\n", - " clf=model,\n", - " prot_attr=\"sex\",\n", - " feature_weights={f: 1 for f in X.columns},\n", - " feats_not_allowed_to_change=[]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "m1Ye20legPB8" + }, + "source": [ + "Below, you can change the `random_seed` variable to `None` if you would like for the pseudo-random parts to actually change between runs. We have set it to a specific value for reproducibility." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computing candidate subgroups.\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t8KJD-ICgPB8" + }, + "outputs": [], + "source": [ + "random_seed = 131313 # for reproducibility" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████| 1046/1046 [00:00<00:00, 523287.45it/s]" - ] + "cell_type": "markdown", + "metadata": { + "id": "Tq79M4GxgPB8" + }, + "source": [ + "## Load Dataset" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of subgroups: 563\n", - "Computing candidate recourses for all subgroups.\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HwnmXFJ_gPB8", + "outputId": "3cddbc05-73e7-44c5-8482-af3b942f5c6f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclasseducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countryincome
0(16.999, 26.0]Private7.0Never-marriedMachine-op-inspctOwn-childBlackMale0.00.0FullTimeUnited-States0
1(34.0, 41.0]Private9.0Married-civ-spouseFarming-fishingMarriedWhiteMale0.00.0OverTimeUnited-States0
2(26.0, 34.0]Local-gov12.0Married-civ-spouseProtective-servMarriedWhiteMale0.00.0FullTimeUnited-States1
3(41.0, 50.0]Private10.0Married-civ-spouseMachine-op-inspctMarriedBlackMale7688.00.0FullTimeUnited-States1
4(26.0, 34.0]Private6.0Never-marriedOther-serviceNot-in-familyWhiteMale0.00.0MidTimeUnited-States0
\n", + "
" + ], + "text/plain": [ + " age workclass education-num marital-status \\\n", + "0 (16.999, 26.0] Private 7.0 Never-married \n", + "1 (34.0, 41.0] Private 9.0 Married-civ-spouse \n", + "2 (26.0, 34.0] Local-gov 12.0 Married-civ-spouse \n", + "3 (41.0, 50.0] Private 10.0 Married-civ-spouse \n", + "4 (26.0, 34.0] Private 6.0 Never-married \n", + "\n", + " occupation relationship race sex capital-gain capital-loss \\\n", + "0 Machine-op-inspct Own-child Black Male 0.0 0.0 \n", + "1 Farming-fishing Married White Male 0.0 0.0 \n", + "2 Protective-serv Married White Male 0.0 0.0 \n", + "3 Machine-op-inspct Married Black Male 7688.0 0.0 \n", + "4 Other-service Not-in-family White Male 0.0 0.0 \n", + "\n", + " hours-per-week native-country income \n", + "0 FullTime United-States 0 \n", + "1 OverTime United-States 0 \n", + "2 FullTime United-States 1 \n", + "3 FullTime United-States 1 \n", + "4 MidTime United-States 0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# load the adult dataset and perform some simple preprocessing steps\n", + "# See output for a glimpse of the final dataset's characteristics\n", + "X, y, sample_weight = fetch_adult()\n", + "data = clean_dataset(X.assign(income=y), \"adult\")\n", + "display(data.head())\n", + "\n", + "# split into train-test data\n", + "y = data['income']\n", + "X = data.drop('income', axis=1)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=random_seed, stratify=y)" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "100%|█████████████████████████████████████████████████████████████████████████████| 563/563 [00:00<00:00, 50669.32it/s]" - ] + "cell_type": "markdown", + "metadata": { + "id": "WdckzZdwgPB8" + }, + "source": [ + "## Example Model to be used for Auditing\n", + "\n", + "We use the train set to train a simple logistic regression model. This will serve as the demonstrative model, which we will then treat as a black box and apply our algorithm.\n", + "\n", + "Of course, any model can be used in its place. Our purpose here is not to produce a good model, but to audit the fairness of an existing one." + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computing percentages of individuals flipped by each action independently.\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9NuWT4LMgPB9" + }, + "outputs": [], + "source": [ + "#### here, we incrementally build the example model. It consists of one preprocessing step,\n", + "#### which is to turn categorical features into the respective one-hot encodings, and\n", + "#### a simple scikit-learn logistic regressor.\n", + "categorical_features = X.select_dtypes(include=[\"object\", \"category\"]).columns.to_list()\n", + "categorical_features_onehot_transformer = ColumnTransformer(\n", + " transformers=[\n", + " (\"one-hot-encoder\", OneHotEncoder(), categorical_features)\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "model = Pipeline([\n", + " (\"one-hot-encoder\", categorical_features_onehot_transformer),\n", + " (\"clf\", LogisticRegression(max_iter=1500))\n", + "])\n", + "\n", + "#### train the model\n", + "model = model.fit(X_train, y_train)" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "100%|████████████████████████████████████████████████████████████████████████████████| 590/590 [00:13<00:00, 43.37it/s]" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iHQwJo_PgPB9", + "outputId": "88e849b7-f24a-4b87-fc48-256612ed3626" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy = 85.16%\n" + ] + } + ], + "source": [ + "# showcase model's accuracy\n", + "y_pred = model.predict(X_test)\n", + "print(f\"Accuracy = {(y_test.values == y_pred).sum() / y_test.shape[0]:.2%}\")" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computing percentages of individuals flipped by any action with cost up to c, for every c\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "avuD6xf-gPB9" + }, + "source": [ + "# A Practical Example of FACTS\n", + "\n", + "The real essence of our work starts here. Specifically, we showcase the generation of candidate subpopulation groups and counterfactuals and the detection of those groups that exhibit the greatest unfairness, with respect to one of several metrics." + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "100%|████████████████████████████████████████████████████████████████████████████████| 416/416 [00:12<00:00, 32.57it/s]\n" - ] - } - ], - "source": [ - "# generates candidate subpopulation groups for bias and candidate actions\n", - "detector = detector.fit(X_test)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Detect Groups with Unfairness in Protected Subgroups (using \"Equal Choice for Recourse\" metric)\n", - "\n", - "Here we demonstrate the `bias_scan` method of our detector, which ranks subpopulation groups from most to least unfair, with respect to the chosen metric and, of course, the protected attribute.\n", - "\n", - "For the purposes of the demo, we use the \"Equal Choice for Recourse\" definition / metric. This posits that the classifier acts fairly for the group in question if the protected subgroups can choose among the same number of sufficiently effective actions to achieve recourse. By sufficiently effective we mean those actions (out of all candidates) which work for at least $100\\phi \\%$ (for some $\\phi \\in [0,1]$) of the subgroup.\n", - "\n", - "Given this definition, the respective unfairness *metric* is defined to be the difference in the number of sufficiently effective actions between the two protected subgroups.\n", - "\n", - "**Suggestion**: this metric may find utility in scenarios where the aim is to guarantee that protected subgroups have a similar range of options available to them when it comes to making adjustments in order to attain a favorable outcome. For example, when evaluating job candidates, the employer may wish to ensure that applicants from different backgrounds (that currently fail to meet expectations) have an equal array of career / retraining opportunities that may land them the job, so as to ensure diversity in all sectors of the company, which employ individuals with a plethora of roles." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Detects the top `top_count` most biased groups based on the given metric\n", - "# available metrics are:\n", - "# - equal-effectiveness\n", - "# - equal-choice-for-recourse\n", - "# - equal-effectiveness-within-budget\n", - "# - equal-cost-of-effectiveness\n", - "# - equal-mean-recourse\n", - "# - fair-tradeoff\n", - "# a short description for each metric is given below\n", - "detector.bias_scan(\n", - " metric=\"equal-choice-for-recourse\",\n", - " phi=0.1,\n", - " top_count=3\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "35vYtSBFgPB9" + }, + "source": [ + "## Load and Fit FACTS" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "If \u001b[1mage = (26.0, 34.0], hours-per-week = FullTime\u001b[0m:\n", - "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.59%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.73%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m3.98%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.39%\u001b[39m.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", - "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.78%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m19.66%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m10.63%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m13.39%\u001b[39m.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-3.00\u001b[39m\n", - "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse. Unfairness score = 3.\u001b[39m\n", - "If \u001b[1mage = (26.0, 34.0], capital-loss = 0.0, hours-per-week = FullTime\u001b[0m:\n", - "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.34%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.67%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m4.08%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.28%\u001b[39m.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", - "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.27%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m18.43%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m9.27%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m11.92%\u001b[39m.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-2.00\u001b[39m\n", - "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse. Unfairness score = 2.\u001b[39m\n", - "If \u001b[1mhours-per-week = FullTime, native-country = United-States\u001b[0m:\n", - "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m41.66%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m2.62%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m1.79%\u001b[39m.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", - "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m46.78%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m10.08%\u001b[39m.\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m8.70%\u001b[39m.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-1.00\u001b[39m\n", - "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse. Unfairness score = 1.\u001b[39m\n" - ] - } - ], - "source": [ - "# prints the result into a nicely formatted report\n", - "detector.print_recourse_report(\n", - " show_action_costs=False,\n", - " show_subgroup_costs=True,\n", - " show_unbiased_subgroups=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Example Output Breakdown" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let us now disect the above example and the output we see, one step at a time.\n", - "\n", - "#### Prelude: $\\phi = 0.1$\n", - "\n", - "As we mentioned in the general description of this metric, this is the parameter that determines whether we consider an action sufficiently effective or not. So, here, we consider an action effective if it manages to flip the prediction for at least 10% of the individuals under study, and ineffective otherwise.\n", - "\n", - "#### **age = (26.0, 34.0], hours-per-week = FullTime**\n", - "\n", - "This is the first (hence, most biased) group. The group description is mostly self-explanatory: everything inside this block concerns all those (affected) individuals that are from 26 (not inclusive) to 34 years old and have a fulltime job. Now, since the output has the same structure for all groups, let us consider this group as an example and further disect the output we see in this block.\n", - "\n", - "#### *Protected subgroups 'Male' / 'Female'*\n", - "\n", - "We split the population of this group, according to the protected attribute. Hence, we distinguish between males that are 26-34 years old and have a fulltime job and females that are 26-34 years old and have a fulltime job.\n", - "\n", - "The \"covered\" percentage reported here in blue signifies that out of all affected females, 10.59% are 26-34 years old and have a fulltime job, while the respective percentage for males is 13.78%.\n", - "\n", - "#### *Make age = (41.0, 50.0], hours-per-week = OverTime*\n", - "\n", - "This is one of the 3 actions we have tried to apply on the individuals in the current subpopulation group. We report the action, along with its effectiveness and, optionally, the cost; here we omit the action cost because the \"Equal Choice for Recourse\" metric does not take it into account.\n", - "\n", - "At this point, let us give a more direct interpretation for the **effectiveness**. In this case, for example, the interpretation could be the following: if all females aged 26-34 with fulltime jobs change their age group to 41-50 years old and their working hours to overtime, then 7.73% of them will actually manage to receive the positive prediction from the model. The rest will still receive the negative prediction.\n", - "\n", - "#### *Protected Subgroups' Aggregate Cost*\n", - "\n", - "The \"aggregate cost of the above recourses\" message shows how we quantify the *cost of recourse* for all actions in each protected subgroup.\n", - "\n", - "This is derived directly from the definition of each metric. Here, for example, we use the \"Equal Choice for Recourse\" metric, which counts the number of effective actions available to each of the protected subgroups. In this group, females have no (sufficiently) effective actions, and as such we say that they gain 0 units. Males have 3 effective actions, so they gain 3 units.\n", - "\n", - "Finally, to keep the formalization of having costs everywhere, we rephrase this instead into males having a recourse cost of -3 and females having a recourse cost of 0.\n", - "\n", - "As we also mention in the next paragraph, the final bias score of the subgroup is nothing more than the absolute difference of these 2 costs.\n", - "\n", - "#### *Bias Deduction / Metric Application*\n", - "\n", - "Given the above, one can see that the (same) actions, if applied to females of the subpopulation group, cannot yield more than 10% effectiveness, while in males they achieve up to 19.66%! This is why we argue that, in the terms of bias of recourse, this group exhibits bias against females.\n", - "\n", - "This is, of course, with respect to the \"Equal Choice for Recourse\" metric, which posits that the 2 protected subgroups should have the same number of effective actions. Since none of the 3 actions are sufficiently effective for females, and all 3 of them are sufficiently effective for males, we score this group as having a bias measure of $|0 - 3| = 3$." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Example without Bias of Recourse\n", - "\n", - "For completeness, we also demonstrate how, for some choices of metrics and parameters, FACTS may fail to find any subpopulation groups that exhibit bias between the protected populations, and thus deduce that in this case there is no recourse related bias." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "detector.bias_scan(\n", - " metric=\"equal-choice-for-recourse\",\n", - " phi=0.7,\n", - " top_count=3\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3FEEyse9gPB9" + }, + "outputs": [], + "source": [ + "# load FACTS framework with:\n", + "# - the model to be audited\n", + "# - protected attribute \"sex\" and\n", + "# - assigning equal, unit weights to all features for cost computation.\n", + "# - no features forbidden from changing, i.e. user can specify any features that cannot change at all.\n", + "detector = FACTS(\n", + " clf=model,\n", + " prot_attr=\"sex\",\n", + " feature_weights={f: 1 for f in X.columns},\n", + " feats_not_allowed_to_change=[]\n", + ")" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1mWith the given parameters, no recourses showing unfairness have been found!\u001b[0m\n" - ] - } - ], - "source": [ - "# prints the result into a nicely formatted report\n", - "detector.print_recourse_report(\n", - " show_action_costs=False,\n", - " show_subgroup_costs=True,\n", - " show_unbiased_subgroups=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Aternative API\n", - "\n", - "We also provide a more succinct API in the form of a wrapper function. This is closer in style to the API of existing `aif360` detectors.\n", - "\n", - "The previous example could be run equivalently with the following." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "duEkwMgngPB-", + "outputId": "ddcb45a9-fa3e-466c-e492-bf6a8b26b2dc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing candidate subgroups.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████| 1046/1046 [00:00<00:00, 523287.45it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of subgroups: 563\n", + "Computing candidate recourses for all subgroups.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "100%|█████████████████████████████████████████████████████████████████████████████| 563/563 [00:00<00:00, 50669.32it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing percentages of individuals flipped by each action independently.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "100%|████████████████████████████████████████████████████████████████████████████████| 590/590 [00:13<00:00, 43.37it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing percentages of individuals flipped by any action with cost up to c, for every c\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "100%|████████████████████████████████████████████████████████████████████████████████| 416/416 [00:12<00:00, 32.57it/s]\n" + ] + } + ], + "source": [ + "# generates candidate subpopulation groups for bias and candidate actions\n", + "detector = detector.fit(X_test)" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "If \u001b[1mage = (26.0, 34.0], hours-per-week = FullTime\u001b[0m:\n", - "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.59%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.73%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m3.98%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.39%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", - "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.78%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m19.66%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m10.63%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m13.39%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-3.00\u001b[39m\n", - "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse.. Unfairness score = 3.\u001b[39m\n", - "If \u001b[1mage = (26.0, 34.0], capital-loss = 0.0, hours-per-week = FullTime\u001b[0m:\n", - "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.34%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.67%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m4.08%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.28%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", - "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.27%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m18.43%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m9.27%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m11.92%\u001b[39m and counterfactual cost = 2.0.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-2.00\u001b[39m\n", - "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse.. Unfairness score = 2.\u001b[39m\n", - "If \u001b[1mhours-per-week = FullTime, native-country = United-States\u001b[0m:\n", - "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m41.66%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m2.62%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m1.79%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", - "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m46.78%\u001b[39m covered\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m10.08%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m8.70%\u001b[39m and counterfactual cost = 1.0.\n", - "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-1.00\u001b[39m\n", - "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse.. Unfairness score = 1.\u001b[39m\n" - ] - } - ], - "source": [ - "most_biased_subgroups = FACTS_bias_scan(\n", - " X=X_test,\n", - " clf=model,\n", - " prot_attr=\"sex\",\n", - " feature_weights={f: 1 for f in X.columns},\n", - " feats_not_allowed_to_change=[],\n", - " metric=\"equal-choice-for-recourse\",\n", - " phi=0.1,\n", - " top_count=3,\n", - " verbose=False, # hides progress bars\n", - " print_recourse_report=True,\n", - " show_action_costs=True,\n", - " show_subgroup_costs=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "cFPOLhkEgPB-" + }, + "source": [ + "## Detect Groups with Unfairness in Protected Subgroups (using \"Equal Choice for Recourse\" metric)\n", + "\n", + "Here we demonstrate the `bias_scan` method of our detector, which ranks subpopulation groups from most to least unfair, with respect to the chosen metric and, of course, the protected attribute.\n", + "\n", + "For the purposes of the demo, we use the \"Equal Choice for Recourse\" definition / metric. This posits that the classifier acts fairly for the group in question if the protected subgroups can choose among the same number of sufficiently effective actions to achieve recourse. By sufficiently effective we mean those actions (out of all candidates) which work for at least $100\\phi \\%$ (for some $\\phi \\in [0,1]$) of the subgroup.\n", + "\n", + "Given this definition, the respective unfairness *metric* is defined to be the difference in the number of sufficiently effective actions between the two protected subgroups.\n", + "\n", + "**Suggestion**: this metric may find utility in scenarios where the aim is to guarantee that protected subgroups have a similar range of options available to them when it comes to making adjustments in order to attain a favorable outcome. For example, when evaluating job candidates, the employer may wish to ensure that applicants from different backgrounds (that currently fail to meet expectations) have an equal array of career / retraining opportunities that may land them the job, so as to ensure diversity in all sectors of the company, which employ individuals with a plethora of roles." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lApb3FmVgPB-" + }, + "outputs": [], + "source": [ + "# Detects the top `top_count` most biased groups based on the given metric\n", + "# available metrics are:\n", + "# - equal-effectiveness\n", + "# - equal-choice-for-recourse\n", + "# - equal-effectiveness-within-budget\n", + "# - equal-cost-of-effectiveness\n", + "# - equal-mean-recourse\n", + "# - fair-tradeoff\n", + "# a short description for each metric is given below\n", + "detector.bias_scan(\n", + " metric=\"equal-choice-for-recourse\",\n", + " phi=0.1,\n", + " top_count=3\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QbuX_rC1gPB-", + "outputId": "8292d5e3-b86e-4f3a-fcb5-4eac0093e3ba" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "If \u001b[1mage = (26.0, 34.0], hours-per-week = FullTime\u001b[0m:\n", + "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.59%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.73%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m3.98%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.39%\u001b[39m.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", + "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.78%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m19.66%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m10.63%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m13.39%\u001b[39m.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-3.00\u001b[39m\n", + "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse. Unfairness score = 3.\u001b[39m\n", + "If \u001b[1mage = (26.0, 34.0], capital-loss = 0.0, hours-per-week = FullTime\u001b[0m:\n", + "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.34%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.67%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m4.08%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.28%\u001b[39m.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", + "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.27%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m18.43%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m9.27%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m11.92%\u001b[39m.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-2.00\u001b[39m\n", + "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse. Unfairness score = 2.\u001b[39m\n", + "If \u001b[1mhours-per-week = FullTime, native-country = United-States\u001b[0m:\n", + "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m41.66%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m2.62%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m1.79%\u001b[39m.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", + "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m46.78%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m10.08%\u001b[39m.\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m8.70%\u001b[39m.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-1.00\u001b[39m\n", + "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse. Unfairness score = 1.\u001b[39m\n" + ] + } + ], + "source": [ + "# prints the result into a nicely formatted report\n", + "detector.print_recourse_report(\n", + " show_action_costs=False,\n", + " show_subgroup_costs=True,\n", + " show_unbiased_subgroups=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HK5JNhNwgPB-" + }, + "source": [ + "### Example Output Breakdown" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xf-Qbq6agPB-" + }, + "source": [ + "Let us now disect the above example and the output we see, one step at a time.\n", + "\n", + "#### Prelude: $\\phi = 0.1$\n", + "\n", + "As we mentioned in the general description of this metric, this is the parameter that determines whether we consider an action sufficiently effective or not. So, here, we consider an action effective if it manages to flip the prediction for at least 10% of the individuals under study, and ineffective otherwise.\n", + "\n", + "#### **age = (26.0, 34.0], hours-per-week = FullTime**\n", + "\n", + "This is the first (hence, most biased) group. The group description is mostly self-explanatory: everything inside this block concerns all those (affected) individuals that are from 26 (not inclusive) to 34 years old and have a fulltime job. Now, since the output has the same structure for all groups, let us consider this group as an example and further disect the output we see in this block.\n", + "\n", + "#### *Protected subgroups 'Male' / 'Female'*\n", + "\n", + "We split the population of this group, according to the protected attribute. Hence, we distinguish between males that are 26-34 years old and have a fulltime job and females that are 26-34 years old and have a fulltime job.\n", + "\n", + "The \"covered\" percentage reported here in blue signifies that out of all affected females, 10.59% are 26-34 years old and have a fulltime job, while the respective percentage for males is 13.78%.\n", + "\n", + "#### *Make age = (41.0, 50.0], hours-per-week = OverTime*\n", + "\n", + "This is one of the 3 actions we have tried to apply on the individuals in the current subpopulation group. We report the action, along with its effectiveness and, optionally, the cost; here we omit the action cost because the \"Equal Choice for Recourse\" metric does not take it into account.\n", + "\n", + "At this point, let us give a more direct interpretation for the **effectiveness**. In this case, for example, the interpretation could be the following: if all females aged 26-34 with fulltime jobs change their age group to 41-50 years old and their working hours to overtime, then 7.73% of them will actually manage to receive the positive prediction from the model. The rest will still receive the negative prediction.\n", + "\n", + "#### *Protected Subgroups' Aggregate Cost*\n", + "\n", + "The \"aggregate cost of the above recourses\" message shows how we quantify the *cost of recourse* for all actions in each protected subgroup.\n", + "\n", + "This is derived directly from the definition of each metric. Here, for example, we use the \"Equal Choice for Recourse\" metric, which counts the number of effective actions available to each of the protected subgroups. In this group, females have no (sufficiently) effective actions, and as such we say that they gain 0 units. Males have 3 effective actions, so they gain 3 units.\n", + "\n", + "Finally, to keep the formalization of having costs everywhere, we rephrase this instead into males having a recourse cost of -3 and females having a recourse cost of 0.\n", + "\n", + "As we also mention in the next paragraph, the final bias score of the subgroup is nothing more than the absolute difference of these 2 costs.\n", + "\n", + "#### *Bias Deduction / Metric Application*\n", + "\n", + "Given the above, one can see that the (same) actions, if applied to females of the subpopulation group, cannot yield more than 10% effectiveness, while in males they achieve up to 19.66%! This is why we argue that, in the terms of bias of recourse, this group exhibits bias against females.\n", + "\n", + "This is, of course, with respect to the \"Equal Choice for Recourse\" metric, which posits that the 2 protected subgroups should have the same number of effective actions. Since none of the 3 actions are sufficiently effective for females, and all 3 of them are sufficiently effective for males, we score this group as having a bias measure of $|0 - 3| = 3$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qba9HsyEgPB-" + }, + "source": [ + "### Example without Bias of Recourse\n", + "\n", + "For completeness, we also demonstrate how, for some choices of metrics and parameters, FACTS may fail to find any subpopulation groups that exhibit bias between the protected populations, and thus deduce that in this case there is no recourse related bias." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9ZazYqQ5gPB_" + }, + "outputs": [], + "source": [ + "detector.bias_scan(\n", + " metric=\"equal-choice-for-recourse\",\n", + " phi=0.7,\n", + " top_count=3\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LNuBKASZgPB_", + "outputId": "f2c75748-e4af-4719-b774-c8904bb5f6f6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1mWith the given parameters, no recourses showing unfairness have been found!\u001b[0m\n" + ] + } + ], + "source": [ + "# prints the result into a nicely formatted report\n", + "detector.print_recourse_report(\n", + " show_action_costs=False,\n", + " show_subgroup_costs=True,\n", + " show_unbiased_subgroups=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HIp_hVhLgPB_" + }, + "source": [ + "## Aternative API\n", + "\n", + "We also provide a more succinct API in the form of a wrapper function. This is closer in style to the API of existing `aif360` detectors.\n", + "\n", + "The previous example could be run equivalently with the following." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RyIvVWtKgPB_", + "outputId": "cfc9da11-5120-48f1-fbad-63c5018f4b95" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "If \u001b[1mage = (26.0, 34.0], hours-per-week = FullTime\u001b[0m:\n", + "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.59%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.73%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m3.98%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.39%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", + "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.78%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m19.66%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m10.63%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m13.39%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-3.00\u001b[39m\n", + "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse.. Unfairness score = 3.\u001b[39m\n", + "If \u001b[1mage = (26.0, 34.0], capital-loss = 0.0, hours-per-week = FullTime\u001b[0m:\n", + "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m10.34%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m7.67%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m4.08%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m5.28%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", + "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m13.27%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m18.43%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (41.0, 50.0]\u001b[39m\u001b[0m with effectiveness \u001b[32m9.27%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\tMake \u001b[1m\u001b[31mage = (34.0, 41.0]\u001b[39m, \u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m11.92%\u001b[39m and counterfactual cost = 2.0.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-2.00\u001b[39m\n", + "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse.. Unfairness score = 2.\u001b[39m\n", + "If \u001b[1mhours-per-week = FullTime, native-country = United-States\u001b[0m:\n", + "\tProtected Subgroup '\u001b[1mFemale\u001b[0m', \u001b[34m41.66%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m2.62%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m1.79%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m0.00\u001b[39m\n", + "\tProtected Subgroup '\u001b[1mMale\u001b[0m', \u001b[34m46.78%\u001b[39m covered\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = OverTime\u001b[39m\u001b[0m with effectiveness \u001b[32m10.08%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\tMake \u001b[1m\u001b[31mhours-per-week = BrainDrain\u001b[39m\u001b[0m with effectiveness \u001b[32m8.70%\u001b[39m and counterfactual cost = 1.0.\n", + "\t\t\u001b[1mAggregate cost\u001b[0m of the above recourses = \u001b[35m-1.00\u001b[39m\n", + "\t\u001b[35mBias against Female with respect to equal-choice-for-recourse.. Unfairness score = 1.\u001b[39m\n" + ] + } + ], + "source": [ + "most_biased_subgroups = FACTS_bias_scan(\n", + " X=X_test,\n", + " clf=model,\n", + " prot_attr=\"sex\",\n", + " feature_weights={f: 1 for f in X.columns},\n", + " feats_not_allowed_to_change=[],\n", + " metric=\"equal-choice-for-recourse\",\n", + " phi=0.1,\n", + " top_count=3,\n", + " verbose=False, # hides progress bars\n", + " print_recourse_report=True,\n", + " show_action_costs=True,\n", + " show_subgroup_costs=True,\n", + ")" + ] + }, { - "data": { - "text/plain": [ - "[({'hours-per-week': 'FullTime', 'native-country': 'United-States'}, 1),\n", - " ({'age': Interval(26.0, 34.0, closed='right'), 'hours-per-week': 'FullTime'},\n", - " 3),\n", - " ({'age': Interval(26.0, 34.0, closed='right'),\n", - " 'capital-loss': 0.0,\n", - " 'hours-per-week': 'FullTime'},\n", - " 2)]" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IGbEBY7NgPB_", + "outputId": "5796d312-5ef6-4b0f-c2ec-8888e3376552" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[({'hours-per-week': 'FullTime', 'native-country': 'United-States'}, 1),\n", + " ({'age': Interval(26.0, 34.0, closed='right'), 'hours-per-week': 'FullTime'},\n", + " 3),\n", + " ({'age': Interval(26.0, 34.0, closed='right'),\n", + " 'capital-loss': 0.0,\n", + " 'hours-per-week': 'FullTime'},\n", + " 2)]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "most_biased_subgroups" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3po1Pz8KgPB_" + }, + "source": [ + "# Short Description of all Definitions / Metrics of Subgroup Recourse Fairness\n", + "\n", + "Here we give a brief description of each of the metrics available in our framework apart from \"Equal Choice for Recourse\".\n", + "\n", + "## Equal Effectiveness\n", + "\n", + "The classifier is considered to act fairly for a population group if the same proportion of individuals in the protected subgroups can achieve recourse.\n", + "\n", + "**Suggestion**: this metric ignores costs altogether and compares only the percentage of males VS females that can cross the model's decision boundary by the same actions. We would use it in applications where the goal is equal impact, in the sense that a change (or a set thereof) affects the same proportion of individuals in the protected subgroups. For example, in a hiring scenario, a similar proportion of males and females are expected to benefit from the same change.\n", + "\n", + "## Equal Effectiveness within Budget\n", + "\n", + "The classifier is considered to act fairly for a population group if the same proportion of individuals in the protected subgroups can achieve recourse with a cost at most $c$, where $c$ is some user-provided cost budget.\n", + "\n", + "**Suggestion**: this metric is similar to the above, but puts a bound on how large the cost of an action can be. Could be used to limit changes with undesirably large cost, e.g., salary changes up to 10K.\n", + "\n", + "## Equal Cost of Effectiveness\n", + "\n", + "The classifier is considered to act fairly for a population group if the minimum cost required to be sufficiently effective in the protected subgroups is equal. Again, as in \"Equal Choice for Recourse\", by \"sufficiently effective\" we refer to those actions that successfully flip the model's decision for at least $100\\phi \\%$ (for $\\phi \\in [0,1]$) of the subgroup.\n", + "\n", + "**Suggestion**: this metric could be useful when an external factor imposes a specific threshold, e.g. in credit risk assessment, a guideline which states that the effort required to be 80% certain that you will have your loan accepted should be the same for males and females.\n", + "\n", + "## Equal (Conditional) Mean Recourse\n", + "\n", + "This definition extends the notion of *burden* from literature ([reference](https://dl.acm.org/doi/10.1145/3375627.3375812)) to the case where not all individuals may achieve recourse. Omitting some details, given any set of individuals, the **conditional mean recourse cost** is the mean recourse cost among the subset of individuals that can actually achieve recourse, i.e. by at least one of the available actions.\n", + "\n", + "Given the above, this definition considers the classifier to act fairly for a population group if the (conditional) mean recourse cost for the protected subgroups is the same.\n", + "\n", + "**Suggestion**: this metric compares the mean cost required to achieve recourse for the protected subgroups. It could be useful in a scenario like loan approval, where one needs to ensure that the cost of changes needed to receive the loan are the same for males and females on average.\n", + "\n", + "## Fair Effectiveness-Cost Trade-Off\n", + "\n", + "This is the strictest definition, which considers the classifier to act fairly for a population group only if the protected subgroups have the same effectiveness-cost distribution (checked in the implementation via a statistical test).\n", + "\n", + "Equivalently, Equal Effectiveness within Budget must hold for *every* value of the cost budget $c$.\n", + "\n", + "**Suggestion**: this metric considers all available actions and compares all their possible trade-offs between effectiveness and cost among the protected subgroups. This could be useful for cases where the protected attribute should have absolutely no impact on the available options to achieve recourse, such as in high-risk situations like estimating the risk of a convicted individual to act unlawfully in the future (as in the well known [COMPAS dataset](https://www.propublica.org/datastore/dataset/compas-recidivism-risk-score-data-and-analysis))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5sGwLyBNgPCA" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "most_biased_subgroups" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Short Description of all Definitions / Metrics of Subgroup Recourse Fairness\n", - "\n", - "Here we give a brief description of each of the metrics available in our framework apart from \"Equal Choice for Recourse\".\n", - "\n", - "## Equal Effectiveness\n", - "\n", - "The classifier is considered to act fairly for a population group if the same proportion of individuals in the protected subgroups can achieve recourse.\n", - "\n", - "**Suggestion**: this metric ignores costs altogether and compares only the percentage of males VS females that can cross the model's decision boundary by the same actions. We would use it in applications where the goal is equal impact, in the sense that a change (or a set thereof) affects the same proportion of individuals in the protected subgroups. For example, in a hiring scenario, a similar proportion of males and females are expected to benefit from the same change.\n", - "\n", - "## Equal Effectiveness within Budget\n", - "\n", - "The classifier is considered to act fairly for a population group if the same proportion of individuals in the protected subgroups can achieve recourse with a cost at most $c$, where $c$ is some user-provided cost budget.\n", - "\n", - "**Suggestion**: this metric is similar to the above, but puts a bound on how large the cost of an action can be. Could be used to limit changes with undesirably large cost, e.g., salary changes up to 10K.\n", - "\n", - "## Equal Cost of Effectiveness\n", - "\n", - "The classifier is considered to act fairly for a population group if the minimum cost required to be sufficiently effective in the protected subgroups is equal. Again, as in \"Equal Choice for Recourse\", by \"sufficiently effective\" we refer to those actions that successfully flip the model's decision for at least $100\\phi \\%$ (for $\\phi \\in [0,1]$) of the subgroup.\n", - "\n", - "**Suggestion**: this metric could be useful when an external factor imposes a specific threshold, e.g. in credit risk assessment, a guideline which states that the effort required to be 80% certain that you will have your loan accepted should be the same for males and females.\n", - "\n", - "## Equal (Conditional) Mean Recourse\n", - "\n", - "This definition extends the notion of *burden* from literature ([reference](https://dl.acm.org/doi/10.1145/3375627.3375812)) to the case where not all individuals may achieve recourse. Omitting some details, given any set of individuals, the **conditional mean recourse cost** is the mean recourse cost among the subset of individuals that can actually achieve recourse, i.e. by at least one of the available actions.\n", - "\n", - "Given the above, this definition considers the classifier to act fairly for a population group if the (conditional) mean recourse cost for the protected subgroups is the same.\n", - "\n", - "**Suggestion**: this metric compares the mean cost required to achieve recourse for the protected subgroups. It could be useful in a scenario like loan approval, where one needs to ensure that the cost of changes needed to receive the loan are the same for males and females on average.\n", - "\n", - "## Fair Effectiveness-Cost Trade-Off\n", - "\n", - "This is the strictest definition, which considers the classifier to act fairly for a population group only if the protected subgroups have the same effectiveness-cost distribution (checked in the implementation via a statistical test).\n", - "\n", - "Equivalently, Equal Effectiveness within Budget must hold for *every* value of the cost budget $c$.\n", - "\n", - "**Suggestion**: this metric considers all available actions and compares all their possible trade-offs between effectiveness and cost among the protected subgroups. This could be useful for cases where the protected attribute should have absolutely no impact on the available options to achieve recourse, such as in high-risk situations like estimating the risk of a convicted individual to act unlawfully in the future (as in the well known [COMPAS dataset](https://www.propublica.org/datastore/dataset/compas-recidivism-risk-score-data-and-analysis))." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.18" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_gerryfair.ipynb b/examples/demo_gerryfair.ipynb index efdb5983..6b6352f5 100644 --- a/examples/demo_gerryfair.ipynb +++ b/examples/demo_gerryfair.ipynb @@ -1,884 +1,918 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "pycharm": { - "is_executing": false - } - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "from aif360.algorithms.inprocessing import GerryFairClassifier\n", - "from aif360.algorithms.inprocessing.gerryfair.clean import array_to_tuple\n", - "from aif360.algorithms.inprocessing.gerryfair.auditor import Auditor\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", - "from sklearn import svm\n", - "from sklearn import tree\n", - "from sklearn.kernel_ridge import KernelRidge\n", - "from sklearn import linear_model\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from IPython.display import Image\n", - "import pickle\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# load data set\n", - "data_set = load_preproc_data_adult(sub_samp=1000, balance=True)\n", - "max_iterations = 500" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**instantiate, fit, and predict** \n", - "\n", - "\n", - "We first demonstrate how to instantiate a `GerryFairClassifier`, `train` it with respect to rich subgroup fairness, and `predict` the label of a new example. We remark that when we set the `print_flag = True` at each iteration of the algorithm we print the error, fairness violation, and violated group size of most recent model. The error is the classification error of the classifier. At each round the Learner tries to find a classifier that minimizes the classification error plus a weighted sum of the fairness disparities on all the groups that the Auditor has found up until that point. By contrast the Auditor tries to find the group at each round with the greatest rich subgroup disparity with respect to the Learner's model. We define `violated group size` as the size (as a fraction of the dataset size) of this group, and the `fairness violation` as the `violated group size` times the difference in the statistical rate (FP or FN rate) on the group vs. the whole population. \n", - "\n", - "In the example below we set `max_iterations=500` which is an order of magnitude less than the time to convergence observed in [the rich subgroup fairness empirical paper](https://arxiv.org/abs/1808.08166), but advise that this can be highly dataset dependent. Our target $\\gamma$-disparity is $\\gamma = .005$, our statistical rate is false positive rate or `FP`, and our cost-sensitive classification oracle is linear regression (more on that below). \n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 1, error: 0.263, fairness violation: 0.028780000000000007, violated group size: 0.217\n", - "iteration: 2, error: 0.3815, fairness violation: 0.014390000000000003, violated group size: 0.217\n", - "iteration: 3, error: 0.42099999999999993, fairness violation: 0.009593333333333339, violated group size: 0.283\n", - "iteration: 4, error: 0.44075, fairness violation: 0.007195000000000002, violated group size: 0.217\n", - "iteration: 5, error: 0.45260000000000006, fairness violation: 0.005756000000000001, violated group size: 0.217\n", - "iteration: 6, error: 0.4605000000000001, fairness violation: 0.004796666666666668, violated group size: 0.283\n", - "iteration: 7, error: 0.4661428571428572, fairness violation: 0.004111428571428572, violated group size: 0.217\n", - "iteration: 8, error: 0.470375, fairness violation: 0.0035975000000000017, violated group size: 0.217\n", - "iteration: 9, error: 0.4691111111111112, fairness violation: 0.0033906666666666677, violated group size: 0.283\n", - "iteration: 10, error: 0.4681, fairness violation: 0.003225200000000001, violated group size: 0.283\n", - "iteration: 11, error: 0.4672727272727271, fairness violation: 0.0030898181818181836, violated group size: 0.283\n", - "iteration: 12, error: 0.4665833333333333, fairness violation: 0.0029769999999999996, violated group size: 0.217\n", - "iteration: 13, error: 0.466, fairness violation: 0.0028815384615384627, violated group size: 0.283\n", - "iteration: 14, error: 0.4655000000000001, fairness violation: 0.0027997142857142865, violated group size: 0.217\n", - "iteration: 15, error: 0.46506666666666674, fairness violation: 0.002728800000000001, violated group size: 0.217\n", - "iteration: 16, error: 0.4646875, fairness violation: 0.0026667500000000007, violated group size: 0.217\n", - "iteration: 17, error: 0.4643529411764707, fairness violation: 0.002612000000000001, violated group size: 0.283\n", - "iteration: 18, error: 0.46405555555555567, fairness violation: 0.002563333333333334, violated group size: 0.217\n", - "iteration: 19, error: 0.4637894736842106, fairness violation: 0.0025197894736842096, violated group size: 0.217\n", - "iteration: 20, error: 0.46354999999999996, fairness violation: 0.0024806000000000008, violated group size: 0.283\n", - "iteration: 21, error: 0.4633333333333334, fairness violation: 0.0024451428571428584, violated group size: 0.217\n", - "iteration: 22, error: 0.4631363636363638, fairness violation: 0.0024129090909090914, violated group size: 0.283\n", - "iteration: 23, error: 0.46295652173913054, fairness violation: 0.002383478260869566, violated group size: 0.217\n", - "iteration: 24, error: 0.4627916666666667, fairness violation: 0.002356500000000001, violated group size: 0.283\n", - "iteration: 25, error: 0.4626400000000001, fairness violation: 0.0023316800000000018, violated group size: 0.283\n", - "iteration: 26, error: 0.4625000000000001, fairness violation: 0.0023087692307692314, violated group size: 0.217\n", - "iteration: 27, error: 0.4623703703703705, fairness violation: 0.0022875555555555557, violated group size: 0.217\n", - "iteration: 28, error: 0.46224999999999994, fairness violation: 0.0022678571428571426, violated group size: 0.217\n", - "iteration: 29, error: 0.46213793103448264, fairness violation: 0.0022495172413793106, violated group size: 0.217\n", - "iteration: 30, error: 0.46203333333333335, fairness violation: 0.0022324000000000003, violated group size: 0.217\n", - "iteration: 31, error: 0.46193548387096783, fairness violation: 0.0022163870967741935, violated group size: 0.217\n", - "iteration: 32, error: 0.46184375, fairness violation: 0.0022013749999999993, violated group size: 0.217\n", - "iteration: 33, error: 0.459969696969697, fairness violation: 0.0023319393939393944, violated group size: 0.283\n", - "iteration: 34, error: 0.4582058823529412, fairness violation: 0.002454823529411765, violated group size: 0.217\n", - "iteration: 35, error: 0.45654285714285714, fairness violation: 0.0025706857142857144, violated group size: 0.217\n", - "iteration: 36, error: 0.4549722222222221, fairness violation: 0.0026801111111111114, violated group size: 0.283\n", - "iteration: 37, error: 0.4534864864864866, fairness violation: 0.0027836216216216214, violated group size: 0.283\n", - "iteration: 38, error: 0.45207894736842097, fairness violation: 0.0028816842105263162, violated group size: 0.283\n", - "iteration: 39, error: 0.4507435897435898, fairness violation: 0.0029747179487179492, violated group size: 0.217\n", - "iteration: 40, error: 0.44947499999999996, fairness violation: 0.0030631000000000005, violated group size: 0.217\n", - "iteration: 41, error: 0.44826829268292684, fairness violation: 0.0031471707317073175, violated group size: 0.283\n", - "iteration: 42, error: 0.4471190476190476, fairness violation: 0.0032272380952380955, violated group size: 0.217\n", - "iteration: 43, error: 0.44602325581395347, fairness violation: 0.0033035813953488386, violated group size: 0.283\n", - "iteration: 44, error: 0.44497727272727267, fairness violation: 0.0033764545454545453, violated group size: 0.283\n", - "iteration: 45, error: 0.4439777777777778, fairness violation: 0.003446088888888888, violated group size: 0.217\n", - "iteration: 46, error: 0.44302173913043474, fairness violation: 0.0035126956521739122, violated group size: 0.217\n", - "iteration: 47, error: 0.44210638297872346, fairness violation: 0.0035764680851063826, violated group size: 0.217\n", - "iteration: 48, error: 0.4412291666666666, fairness violation: 0.003637583333333332, violated group size: 0.217\n", - "iteration: 49, error: 0.4403877551020407, fairness violation: 0.0036962040816326523, violated group size: 0.217\n", - "iteration: 50, error: 0.4395600000000001, fairness violation: 0.0037524800000000003, violated group size: 0.217\n", - "iteration: 51, error: 0.43876470588235295, fairness violation: 0.0038065490196078425, violated group size: 0.217\n", - "iteration: 52, error: 0.438, fairness violation: 0.003858538461538461, violated group size: 0.283\n", - "iteration: 53, error: 0.4372641509433963, fairness violation: 0.003908566037735848, violated group size: 0.217\n", - "iteration: 54, error: 0.4365555555555556, fairness violation: 0.003956740740740741, violated group size: 0.283\n", - "iteration: 55, error: 0.4358181818181819, fairness violation: 0.004003163636363636, violated group size: 0.217\n", - "iteration: 56, error: 0.4351071428571429, fairness violation: 0.004047928571428571, violated group size: 0.217\n", - "iteration: 57, error: 0.4344736842105262, fairness violation: 0.004091122807017543, violated group size: 0.217\n", - "iteration: 58, error: 0.43381034482758624, fairness violation: 0.004132827586206895, violated group size: 0.217\n", - "iteration: 59, error: 0.4331694915254237, fairness violation: 0.0041731186440677965, violated group size: 0.283\n", - "iteration: 60, error: 0.43254999999999993, fairness violation: 0.004212066666666666, violated group size: 0.217\n", - "iteration: 61, error: 0.4319508196721312, fairness violation: 0.004249737704918031, violated group size: 0.217\n", - "iteration: 62, error: 0.4313709677419356, fairness violation: 0.004286193548387096, violated group size: 0.217\n", - "iteration: 63, error: 0.43080952380952386, fairness violation: 0.004321492063492062, violated group size: 0.283\n", - "iteration: 64, error: 0.430265625, fairness violation: 0.004355687499999999, violated group size: 0.283\n", - "iteration: 65, error: 0.4297384615384615, fairness violation: 0.004388830769230769, violated group size: 0.283\n", - "iteration: 66, error: 0.42922727272727274, fairness violation: 0.004420969696969697, violated group size: 0.217\n", - "iteration: 67, error: 0.42873134328358203, fairness violation: 0.004452149253731343, violated group size: 0.217\n", - "iteration: 68, error: 0.42824999999999996, fairness violation: 0.0044824117647058815, violated group size: 0.283\n", - "iteration: 69, error: 0.42778260869565227, fairness violation: 0.004511797101449274, violated group size: 0.217\n", - "iteration: 70, error: 0.42732857142857145, fairness violation: 0.004540342857142856, violated group size: 0.283\n", - "iteration: 71, error: 0.42688732394366197, fairness violation: 0.004568084507042252, violated group size: 0.217\n", - "iteration: 72, error: 0.4264583333333332, fairness violation: 0.004595055555555555, violated group size: 0.283\n", - "iteration: 73, error: 0.42604109589041106, fairness violation: 0.004621287671232876, violated group size: 0.217\n", - "iteration: 74, error: 0.4256351351351351, fairness violation: 0.0046468108108108095, violated group size: 0.283\n", - "iteration: 75, error: 0.42524, fairness violation: 0.004671653333333331, violated group size: 0.217\n", - "iteration: 76, error: 0.4248552631578947, fairness violation: 0.004695842105263155, violated group size: 0.217\n", - "iteration: 77, error: 0.42448051948051946, fairness violation: 0.004719402597402596, violated group size: 0.217\n", - "iteration: 78, error: 0.4239871794871795, fairness violation: 0.00475905128205128, violated group size: 0.217\n", - "iteration: 79, error: 0.42363291139240505, fairness violation: 0.004781215189873418, violated group size: 0.283\n", - "iteration: 80, error: 0.42328750000000015, fairness violation: 0.004802824999999999, violated group size: 0.283\n" - ] + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_gerryfair.ipynb)" + ], + "metadata": { + "id": "_Y3xm4CthJnm" + } }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 81, error: 0.4229506172839506, fairness violation: 0.004823901234567901, violated group size: 0.283\n", - "iteration: 82, error: 0.4226219512195123, fairness violation: 0.004844463414634145, violated group size: 0.217\n", - "iteration: 83, error: 0.4221807228915662, fairness violation: 0.004880216867469879, violated group size: 0.217\n", - "iteration: 84, error: 0.42175, fairness violation: 0.004915119047619047, violated group size: 0.217\n", - "iteration: 85, error: 0.4214470588235294, fairness violation: 0.004933882352941174, violated group size: 0.217\n", - "iteration: 86, error: 0.4210348837209302, fairness violation: 0.004967348837209301, violated group size: 0.217\n", - "iteration: 87, error: 0.420632183908046, fairness violation: 0.005000045977011494, violated group size: 0.283\n", - "iteration: 88, error: 0.42035227272727277, fairness violation: 0.0050172045454545434, violated group size: 0.217\n", - "iteration: 89, error: 0.4200786516853933, fairness violation: 0.005033977528089887, violated group size: 0.217\n", - "iteration: 90, error: 0.4198111111111112, fairness violation: 0.005050377777777776, violated group size: 0.283\n", - "iteration: 91, error: 0.4195824175824176, fairness violation: 0.0050664175824175805, violated group size: 0.217\n", - "iteration: 92, error: 0.4193695652173913, fairness violation: 0.005082108695652173, violated group size: 0.217\n", - "iteration: 93, error: 0.41916129032258065, fairness violation: 0.005097462365591397, violated group size: 0.217\n", - "iteration: 94, error: 0.41895744680851066, fairness violation: 0.005112489361702126, violated group size: 0.217\n", - "iteration: 95, error: 0.41875789473684216, fairness violation: 0.005127199999999998, violated group size: 0.217\n", - "iteration: 96, error: 0.41856250000000006, fairness violation: 0.005141604166666665, violated group size: 0.283\n", - "iteration: 97, error: 0.418979381443299, fairness violation: 0.005106494845360823, violated group size: 0.217\n", - "iteration: 98, error: 0.41938775510204085, fairness violation: 0.005072102040816325, violated group size: 0.217\n", - "iteration: 99, error: 0.4197878787878788, fairness violation: 0.0050384040404040376, violated group size: 0.217\n", - "iteration: 100, error: 0.42018000000000005, fairness violation: 0.0050053799999999985, violated group size: 0.217\n", - "iteration: 101, error: 0.42056435643564366, fairness violation: 0.004973009900990098, violated group size: 0.217\n", - "iteration: 102, error: 0.42094117647058826, fairness violation: 0.00494127450980392, violated group size: 0.217\n", - "iteration: 103, error: 0.4213106796116506, fairness violation: 0.004910155339805824, violated group size: 0.217\n", - "iteration: 104, error: 0.4216730769230769, fairness violation: 0.004879634615384614, violated group size: 0.217\n", - "iteration: 105, error: 0.4220285714285715, fairness violation: 0.004849695238095237, violated group size: 0.217\n", - "iteration: 106, error: 0.4223773584905662, fairness violation: 0.004820320754716981, violated group size: 0.283\n", - "iteration: 107, error: 0.42271962616822434, fairness violation: 0.004791495327102803, violated group size: 0.217\n", - "iteration: 108, error: 0.4230555555555556, fairness violation: 0.0047632037037037035, violated group size: 0.217\n", - "iteration: 109, error: 0.4233853211009175, fairness violation: 0.00473543119266055, violated group size: 0.217\n", - "iteration: 110, error: 0.4237090909090908, fairness violation: 0.004708163636363636, violated group size: 0.217\n", - "iteration: 111, error: 0.424027027027027, fairness violation: 0.004681387387387387, violated group size: 0.283\n", - "iteration: 112, error: 0.42433928571428586, fairness violation: 0.004655089285714286, violated group size: 0.283\n", - "iteration: 113, error: 0.4241238938053097, fairness violation: 0.004671504424778761, violated group size: 0.217\n", - "iteration: 114, error: 0.42442982456140343, fairness violation: 0.004645754385964912, violated group size: 0.283\n", - "iteration: 115, error: 0.42473043478260875, fairness violation: 0.0046204521739130425, violated group size: 0.283\n", - "iteration: 116, error: 0.42502586206896553, fairness violation: 0.0045955862068965524, violated group size: 0.283\n", - "iteration: 117, error: 0.42481196581196584, fairness violation: 0.004611948717948717, violated group size: 0.217\n", - "iteration: 118, error: 0.4251016949152542, fairness violation: 0.004587576271186439, violated group size: 0.217\n", - "iteration: 119, error: 0.42489075630252104, fairness violation: 0.004603731092436974, violated group size: 0.217\n", - "iteration: 120, error: 0.4251750000000001, fairness violation: 0.0045798333333333325, violated group size: 0.217\n", - "iteration: 121, error: 0.4249669421487604, fairness violation: 0.004595785123966942, violated group size: 0.283\n", - "iteration: 122, error: 0.4247622950819671, fairness violation: 0.0046114754098360656, violated group size: 0.217\n", - "iteration: 123, error: 0.42456097560975614, fairness violation: 0.00462691056910569, violated group size: 0.217\n", - "iteration: 124, error: 0.42436290322580644, fairness violation: 0.004642096774193548, violated group size: 0.217\n", - "iteration: 125, error: 0.4241680000000001, fairness violation: 0.00465704, violated group size: 0.217\n", - "iteration: 126, error: 0.4239761904761905, fairness violation: 0.004671746031746031, violated group size: 0.217\n", - "iteration: 127, error: 0.42425196850393704, fairness violation: 0.004648629921259842, violated group size: 0.217\n", - "iteration: 128, error: 0.4240625, fairness violation: 0.004663171874999999, violated group size: 0.217\n", - "iteration: 129, error: 0.4238759689922481, fairness violation: 0.004677488372093024, violated group size: 0.283\n", - "iteration: 130, error: 0.42369230769230776, fairness violation: 0.004691584615384614, violated group size: 0.217\n", - "iteration: 131, error: 0.42351145038167937, fairness violation: 0.004705465648854962, violated group size: 0.217\n", - "iteration: 132, error: 0.4233333333333333, fairness violation: 0.004719136363636364, violated group size: 0.283\n", - "iteration: 133, error: 0.423157894736842, fairness violation: 0.0047326015037594, violated group size: 0.217\n", - "iteration: 134, error: 0.4229850746268656, fairness violation: 0.004745865671641791, violated group size: 0.217\n", - "iteration: 135, error: 0.42281481481481475, fairness violation: 0.004758933333333335, violated group size: 0.283\n", - "iteration: 136, error: 0.4226470588235294, fairness violation: 0.004771808823529411, violated group size: 0.217\n", - "iteration: 137, error: 0.42248175182481745, fairness violation: 0.004784496350364964, violated group size: 0.283\n", - "iteration: 138, error: 0.42231884057971014, fairness violation: 0.004797000000000002, violated group size: 0.283\n", - "iteration: 139, error: 0.42215827338129497, fairness violation: 0.004809323741007196, violated group size: 0.283\n", - "iteration: 140, error: 0.42200000000000004, fairness violation: 0.004821471428571429, violated group size: 0.217\n", - "iteration: 141, error: 0.4218439716312057, fairness violation: 0.0048334468085106394, violated group size: 0.217\n", - "iteration: 142, error: 0.42169014084507045, fairness violation: 0.004845253521126761, violated group size: 0.283\n", - "iteration: 143, error: 0.4215384615384616, fairness violation: 0.004856895104895106, violated group size: 0.283\n", - "iteration: 144, error: 0.4213888888888888, fairness violation: 0.004868375, violated group size: 0.217\n", - "iteration: 145, error: 0.42124137931034483, fairness violation: 0.004879696551724138, violated group size: 0.217\n", - "iteration: 146, error: 0.4210958904109589, fairness violation: 0.00489086301369863, violated group size: 0.217\n", - "iteration: 147, error: 0.4209523809523809, fairness violation: 0.004901877551020409, violated group size: 0.217\n", - "iteration: 148, error: 0.42081081081081084, fairness violation: 0.004912743243243244, violated group size: 0.217\n", - "iteration: 149, error: 0.42067114093959734, fairness violation: 0.004923463087248323, violated group size: 0.283\n", - "iteration: 150, error: 0.4205333333333334, fairness violation: 0.004934040000000001, violated group size: 0.217\n", - "iteration: 151, error: 0.4203973509933776, fairness violation: 0.004944476821192053, violated group size: 0.217\n", - "iteration: 152, error: 0.4202631578947368, fairness violation: 0.0049547763157894754, violated group size: 0.283\n", - "iteration: 153, error: 0.4201307189542483, fairness violation: 0.00496494117647059, violated group size: 0.283\n", - "iteration: 154, error: 0.42, fairness violation: 0.004974974025974027, violated group size: 0.283\n", - "iteration: 155, error: 0.4198709677419355, fairness violation: 0.0049848774193548395, violated group size: 0.217\n", - "iteration: 156, error: 0.4197435897435898, fairness violation: 0.004994653846153847, violated group size: 0.217\n", - "iteration: 157, error: 0.4196178343949045, fairness violation: 0.0050043057324840766, violated group size: 0.217\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": false + }, + "id": "0xQSRpo5hI5H" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "from aif360.algorithms.inprocessing import GerryFairClassifier\n", + "from aif360.algorithms.inprocessing.gerryfair.clean import array_to_tuple\n", + "from aif360.algorithms.inprocessing.gerryfair.auditor import Auditor\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", + "from sklearn import svm\n", + "from sklearn import tree\n", + "from sklearn.kernel_ridge import KernelRidge\n", + "from sklearn import linear_model\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from IPython.display import Image\n", + "import pickle\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# load data set\n", + "data_set = load_preproc_data_adult(sub_samp=1000, balance=True)\n", + "max_iterations = 500" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 158, error: 0.4194936708860761, fairness violation: 0.005013835443037975, violated group size: 0.217\n", - "iteration: 159, error: 0.41937106918239, fairness violation: 0.005023245283018869, violated group size: 0.283\n", - "iteration: 160, error: 0.41925000000000007, fairness violation: 0.0050325375, violated group size: 0.217\n", - "iteration: 161, error: 0.4191304347826087, fairness violation: 0.005041714285714285, violated group size: 0.217\n", - "iteration: 162, error: 0.41901234567901235, fairness violation: 0.005050777777777778, violated group size: 0.283\n", - "iteration: 163, error: 0.41889570552147243, fairness violation: 0.005059730061349694, violated group size: 0.283\n", - "iteration: 164, error: 0.4191402439024391, fairness violation: 0.005039463414634148, violated group size: 0.283\n", - "iteration: 165, error: 0.41938181818181824, fairness violation: 0.005019442424242424, violated group size: 0.217\n", - "iteration: 166, error: 0.4192650602409639, fairness violation: 0.005028421686746988, violated group size: 0.217\n", - "iteration: 167, error: 0.41950299401197605, fairness violation: 0.005008706586826348, violated group size: 0.217\n", - "iteration: 168, error: 0.41973809523809513, fairness violation: 0.004989226190476189, violated group size: 0.217\n", - "iteration: 169, error: 0.41997041420118336, fairness violation: 0.0049699763313609474, violated group size: 0.283\n", - "iteration: 170, error: 0.4202, fairness violation: 0.00495095294117647, violated group size: 0.283\n", - "iteration: 171, error: 0.4204269005847953, fairness violation: 0.004932152046783625, violated group size: 0.217\n", - "iteration: 172, error: 0.42065116279069764, fairness violation: 0.00491356976744186, violated group size: 0.217\n", - "iteration: 173, error: 0.4208728323699421, fairness violation: 0.004895202312138728, violated group size: 0.217\n", - "iteration: 174, error: 0.42109195402298855, fairness violation: 0.004877045977011494, violated group size: 0.217\n", - "iteration: 175, error: 0.4213085714285715, fairness violation: 0.004859097142857142, violated group size: 0.217\n", - "iteration: 176, error: 0.42152272727272727, fairness violation: 0.0048413522727272715, violated group size: 0.217\n", - "iteration: 177, error: 0.42173446327683617, fairness violation: 0.00482380790960452, violated group size: 0.217\n", - "iteration: 178, error: 0.42161235955056187, fairness violation: 0.004833280898876404, violated group size: 0.217\n", - "iteration: 179, error: 0.42182122905027924, fairness violation: 0.004815977653631285, violated group size: 0.217\n", - "iteration: 180, error: 0.4220277777777778, fairness violation: 0.004798866666666665, violated group size: 0.217\n", - "iteration: 181, error: 0.42223204419889504, fairness violation: 0.004781944751381214, violated group size: 0.283\n", - "iteration: 182, error: 0.4224340659340659, fairness violation: 0.004765208791208789, violated group size: 0.217\n", - "iteration: 183, error: 0.4226338797814208, fairness violation: 0.004748655737704917, violated group size: 0.217\n", - "iteration: 184, error: 0.4228315217391304, fairness violation: 0.004732282608695651, violated group size: 0.217\n", - "iteration: 185, error: 0.4230270270270271, fairness violation: 0.004716086486486487, violated group size: 0.217\n", - "iteration: 186, error: 0.4229032258064515, fairness violation: 0.0047257311827957, violated group size: 0.283\n", - "iteration: 187, error: 0.4230962566844919, fairness violation: 0.0047097433155080205, violated group size: 0.217\n", - "iteration: 188, error: 0.4229734042553191, fairness violation: 0.00471931914893617, violated group size: 0.283\n", - "iteration: 189, error: 0.4231640211640213, fairness violation: 0.004703534391534391, violated group size: 0.217\n", - "iteration: 190, error: 0.42304210526315794, fairness violation: 0.004713042105263158, violated group size: 0.217\n", - "iteration: 191, error: 0.4232303664921467, fairness violation: 0.004697455497382198, violated group size: 0.217\n", - "iteration: 192, error: 0.42310937499999995, fairness violation: 0.004706895833333333, violated group size: 0.217\n", - "iteration: 193, error: 0.42329533678756476, fairness violation: 0.004691502590673575, violated group size: 0.283\n", - "iteration: 194, error: 0.4231752577319588, fairness violation: 0.004700876288659792, violated group size: 0.217\n", - "iteration: 195, error: 0.4230564102564103, fairness violation: 0.004710153846153845, violated group size: 0.217\n", - "iteration: 196, error: 0.4229387755102041, fairness violation: 0.004719336734693878, violated group size: 0.283\n", - "iteration: 197, error: 0.4228223350253807, fairness violation: 0.004728426395939086, violated group size: 0.283\n", - "iteration: 198, error: 0.4227070707070707, fairness violation: 0.004737424242424242, violated group size: 0.217\n", - "iteration: 199, error: 0.4228894472361809, fairness violation: 0.004722341708542713, violated group size: 0.217\n", - "iteration: 200, error: 0.42277499999999996, fairness violation: 0.004731279999999999, violated group size: 0.217\n", - "iteration: 201, error: 0.4226616915422886, fairness violation: 0.004740129353233829, violated group size: 0.217\n", - "iteration: 202, error: 0.4225495049504951, fairness violation: 0.00474889108910891, violated group size: 0.217\n", - "iteration: 203, error: 0.42243842364532025, fairness violation: 0.004757566502463053, violated group size: 0.283\n", - "iteration: 204, error: 0.42232843137254905, fairness violation: 0.004766156862745097, violated group size: 0.283\n", - "iteration: 205, error: 0.422219512195122, fairness violation: 0.004774663414634145, violated group size: 0.217\n", - "iteration: 206, error: 0.422111650485437, fairness violation: 0.004783087378640775, violated group size: 0.217\n", - "iteration: 207, error: 0.4220048309178744, fairness violation: 0.00479142995169082, violated group size: 0.217\n", - "iteration: 208, error: 0.4218990384615385, fairness violation: 0.004799692307692306, violated group size: 0.217\n", - "iteration: 209, error: 0.42179425837320567, fairness violation: 0.004807875598086124, violated group size: 0.217\n", - "iteration: 210, error: 0.4216904761904762, fairness violation: 0.004815980952380952, violated group size: 0.217\n", - "iteration: 211, error: 0.42158767772511846, fairness violation: 0.0048240094786729856, violated group size: 0.217\n", - "iteration: 212, error: 0.42148584905660386, fairness violation: 0.004831962264150944, violated group size: 0.217\n", - "iteration: 213, error: 0.4213849765258215, fairness violation: 0.004839840375586855, violated group size: 0.283\n", - "iteration: 214, error: 0.421285046728972, fairness violation: 0.0048476448598130835, violated group size: 0.217\n", - "iteration: 215, error: 0.42118604651162794, fairness violation: 0.004855376744186045, violated group size: 0.217\n", - "iteration: 216, error: 0.4210879629629629, fairness violation: 0.004863037037037037, violated group size: 0.283\n", - "iteration: 217, error: 0.42099078341013824, fairness violation: 0.004870626728110601, violated group size: 0.283\n", - "iteration: 218, error: 0.42089449541284396, fairness violation: 0.004878146788990825, violated group size: 0.217\n", - "iteration: 219, error: 0.42079908675799094, fairness violation: 0.004885598173515983, violated group size: 0.283\n", - "iteration: 220, error: 0.4207045454545455, fairness violation: 0.004892981818181818, violated group size: 0.283\n", - "iteration: 221, error: 0.4206108597285068, fairness violation: 0.004900298642533936, violated group size: 0.283\n", - "iteration: 222, error: 0.4205180180180179, fairness violation: 0.004907549549549549, violated group size: 0.217\n", - "iteration: 223, error: 0.4204260089686098, fairness violation: 0.004914735426008968, violated group size: 0.217\n", - "iteration: 224, error: 0.4203348214285714, fairness violation: 0.004921857142857143, violated group size: 0.283\n", - "iteration: 225, error: 0.4202444444444444, fairness violation: 0.004928915555555555, violated group size: 0.217\n", - "iteration: 226, error: 0.42015486725663725, fairness violation: 0.004935911504424777, violated group size: 0.217\n", - "iteration: 227, error: 0.4200660792951542, fairness violation: 0.004942845814977973, violated group size: 0.217\n", - "iteration: 228, error: 0.4199780701754386, fairness violation: 0.004949719298245614, violated group size: 0.217\n", - "iteration: 229, error: 0.4198908296943231, fairness violation: 0.004956532751091703, violated group size: 0.283\n", - "iteration: 230, error: 0.419804347826087, fairness violation: 0.004963286956521739, violated group size: 0.283\n", - "iteration: 231, error: 0.4197186147186147, fairness violation: 0.004969982683982686, violated group size: 0.283\n", - "iteration: 232, error: 0.4196336206896552, fairness violation: 0.004976620689655175, violated group size: 0.283\n", - "iteration: 233, error: 0.419549356223176, fairness violation: 0.004983201716738197, violated group size: 0.283\n", - "iteration: 234, error: 0.419465811965812, fairness violation: 0.0049897264957264945, violated group size: 0.217\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "iO2ueydohI5J" + }, + "source": [ + "**instantiate, fit, and predict**\n", + "\n", + "\n", + "We first demonstrate how to instantiate a `GerryFairClassifier`, `train` it with respect to rich subgroup fairness, and `predict` the label of a new example. We remark that when we set the `print_flag = True` at each iteration of the algorithm we print the error, fairness violation, and violated group size of most recent model. The error is the classification error of the classifier. At each round the Learner tries to find a classifier that minimizes the classification error plus a weighted sum of the fairness disparities on all the groups that the Auditor has found up until that point. By contrast the Auditor tries to find the group at each round with the greatest rich subgroup disparity with respect to the Learner's model. We define `violated group size` as the size (as a fraction of the dataset size) of this group, and the `fairness violation` as the `violated group size` times the difference in the statistical rate (FP or FN rate) on the group vs. the whole population.\n", + "\n", + "In the example below we set `max_iterations=500` which is an order of magnitude less than the time to convergence observed in [the rich subgroup fairness empirical paper](https://arxiv.org/abs/1808.08166), but advise that this can be highly dataset dependent. Our target $\\gamma$-disparity is $\\gamma = .005$, our statistical rate is false positive rate or `FP`, and our cost-sensitive classification oracle is linear regression (more on that below).\n" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 235, error: 0.4193829787234044, fairness violation: 0.00499619574468085, violated group size: 0.217\n", - "iteration: 236, error: 0.41930084745762713, fairness violation: 0.005002610169491525, violated group size: 0.283\n", - "iteration: 237, error: 0.4192194092827004, fairness violation: 0.005008970464135021, violated group size: 0.283\n", - "iteration: 238, error: 0.41913865546218493, fairness violation: 0.00501527731092437, violated group size: 0.283\n", - "iteration: 239, error: 0.4190585774058578, fairness violation: 0.005021531380753138, violated group size: 0.283\n", - "iteration: 240, error: 0.4189791666666667, fairness violation: 0.005027733333333333, violated group size: 0.217\n", - "iteration: 241, error: 0.41890041493775937, fairness violation: 0.005033883817427385, violated group size: 0.217\n", - "iteration: 242, error: 0.4188223140495868, fairness violation: 0.0050399834710743805, violated group size: 0.217\n", - "iteration: 243, error: 0.4187448559670781, fairness violation: 0.005046032921810699, violated group size: 0.217\n", - "iteration: 244, error: 0.41890983606557386, fairness violation: 0.005032467213114753, violated group size: 0.217\n", - "iteration: 245, error: 0.419073469387755, fairness violation: 0.005019012244897959, violated group size: 0.217\n", - "iteration: 246, error: 0.41923577235772364, fairness violation: 0.005005666666666667, violated group size: 0.283\n", - "iteration: 247, error: 0.4191578947368422, fairness violation: 0.005011757085020243, violated group size: 0.217\n", - "iteration: 248, error: 0.41908064516129034, fairness violation: 0.005017798387096774, violated group size: 0.283\n", - "iteration: 249, error: 0.4192409638554217, fairness violation: 0.005004618473895581, violated group size: 0.217\n", - "iteration: 250, error: 0.4194000000000001, fairness violation: 0.004991544, violated group size: 0.217\n", - "iteration: 251, error: 0.4195577689243028, fairness violation: 0.004978573705179282, violated group size: 0.283\n", - "iteration: 252, error: 0.4197142857142857, fairness violation: 0.004965706349206349, violated group size: 0.217\n", - "iteration: 253, error: 0.4198695652173912, fairness violation: 0.00495294071146245, violated group size: 0.217\n", - "iteration: 254, error: 0.42002362204724414, fairness violation: 0.00494027559055118, violated group size: 0.217\n", - "iteration: 255, error: 0.4201764705882353, fairness violation: 0.004927709803921568, violated group size: 0.217\n", - "iteration: 256, error: 0.420328125, fairness violation: 0.004915242187499999, violated group size: 0.217\n", - "iteration: 257, error: 0.42047859922178993, fairness violation: 0.004902871595330739, violated group size: 0.217\n", - "iteration: 258, error: 0.42062790697674424, fairness violation: 0.004890596899224807, violated group size: 0.217\n", - "iteration: 259, error: 0.4207760617760617, fairness violation: 0.004878416988416988, violated group size: 0.283\n", - "iteration: 260, error: 0.42069615384615383, fairness violation: 0.004884692307692307, violated group size: 0.217\n", - "iteration: 261, error: 0.42061685823754785, fairness violation: 0.004890919540229886, violated group size: 0.283\n", - "iteration: 262, error: 0.42076335877862603, fairness violation: 0.004878877862595419, violated group size: 0.217\n", - "iteration: 263, error: 0.4209087452471483, fairness violation: 0.004866927756653992, violated group size: 0.217\n", - "iteration: 264, error: 0.4208295454545454, fairness violation: 0.004873151515151516, violated group size: 0.283\n", - "iteration: 265, error: 0.4209735849056603, fairness violation: 0.004861313207547171, violated group size: 0.283\n", - "iteration: 266, error: 0.4211165413533834, fairness violation: 0.004849563909774436, violated group size: 0.217\n", - "iteration: 267, error: 0.42125842696629223, fairness violation: 0.0048379026217228475, violated group size: 0.217\n", - "iteration: 268, error: 0.42139925373134324, fairness violation: 0.004826328358208956, violated group size: 0.283\n", - "iteration: 269, error: 0.42131970260223045, fairness violation: 0.0048325873605947955, violated group size: 0.217\n", - "iteration: 270, error: 0.42145925925925937, fairness violation: 0.00482111851851852, violated group size: 0.283\n", - "iteration: 271, error: 0.42159778597785963, fairness violation: 0.004809734317343174, violated group size: 0.217\n", - "iteration: 272, error: 0.42173529411764704, fairness violation: 0.004798433823529413, violated group size: 0.283\n", - "iteration: 273, error: 0.4216556776556777, fairness violation: 0.004804703296703296, violated group size: 0.217\n", - "iteration: 274, error: 0.42157664233576647, fairness violation: 0.00481092700729927, violated group size: 0.217\n", - "iteration: 275, error: 0.42171272727272724, fairness violation: 0.004799745454545455, violated group size: 0.217\n", - "iteration: 276, error: 0.42184782608695653, fairness violation: 0.004788644927536233, violated group size: 0.217\n", - "iteration: 277, error: 0.4219819494584837, fairness violation: 0.004777624548736462, violated group size: 0.217\n", - "iteration: 278, error: 0.42190287769784157, fairness violation: 0.004783856115107913, violated group size: 0.217\n", - "iteration: 279, error: 0.42182437275985674, fairness violation: 0.004790043010752689, violated group size: 0.217\n", - "iteration: 280, error: 0.42174642857142863, fairness violation: 0.004796185714285715, violated group size: 0.283\n", - "iteration: 281, error: 0.42166903914590753, fairness violation: 0.0048022846975088965, violated group size: 0.283\n", - "iteration: 282, error: 0.4218014184397163, fairness violation: 0.00479141134751773, violated group size: 0.217\n", - "iteration: 283, error: 0.42172438162544174, fairness violation: 0.0047974840989399295, violated group size: 0.217\n", - "iteration: 284, error: 0.4216478873239437, fairness violation: 0.004803514084507042, violated group size: 0.217\n", - "iteration: 285, error: 0.42157192982456126, fairness violation: 0.004809501754385964, violated group size: 0.217\n", - "iteration: 286, error: 0.4217027972027972, fairness violation: 0.004798755244755245, violated group size: 0.217\n", - "iteration: 287, error: 0.4218327526132404, fairness violation: 0.004788083623693379, violated group size: 0.283\n", - "iteration: 288, error: 0.4219618055555556, fairness violation: 0.004777486111111113, violated group size: 0.283\n", - "iteration: 289, error: 0.4218858131487888, fairness violation: 0.004783480968858131, violated group size: 0.217\n", - "iteration: 290, error: 0.4218103448275861, fairness violation: 0.004789434482758621, violated group size: 0.217\n", - "iteration: 291, error: 0.42193814432989696, fairness violation: 0.004778941580756014, violated group size: 0.283\n", - "iteration: 292, error: 0.42186301369863016, fairness violation: 0.0047848698630136985, violated group size: 0.217\n", - "iteration: 293, error: 0.4217883959044368, fairness violation: 0.004790757679180888, violated group size: 0.217\n", - "iteration: 294, error: 0.42171428571428576, fairness violation: 0.004796605442176871, violated group size: 0.217\n", - "iteration: 295, error: 0.4216406779661017, fairness violation: 0.004802413559322035, violated group size: 0.217\n", - "iteration: 296, error: 0.4215675675675675, fairness violation: 0.004808182432432432, violated group size: 0.217\n", - "iteration: 297, error: 0.4214949494949495, fairness violation: 0.00481391245791246, violated group size: 0.283\n", - "iteration: 298, error: 0.4214228187919464, fairness violation: 0.0048196040268456385, violated group size: 0.217\n", - "iteration: 299, error: 0.42135117056856186, fairness violation: 0.0048252575250836115, violated group size: 0.217\n", - "iteration: 300, error: 0.42128, fairness violation: 0.004830873333333335, violated group size: 0.283\n", - "iteration: 301, error: 0.42120930232558146, fairness violation: 0.004836451827242525, violated group size: 0.217\n", - "iteration: 302, error: 0.42113907284768215, fairness violation: 0.004841993377483444, violated group size: 0.217\n", - "iteration: 303, error: 0.42106930693069305, fairness violation: 0.004847498349834984, violated group size: 0.217\n", - "iteration: 304, error: 0.4211940789473684, fairness violation: 0.004837263157894738, violated group size: 0.283\n", - "iteration: 305, error: 0.4211245901639345, fairness violation: 0.004842747540983607, violated group size: 0.283\n", - "iteration: 306, error: 0.4210555555555555, fairness violation: 0.004848196078431373, violated group size: 0.217\n", - "iteration: 307, error: 0.42098697068403895, fairness violation: 0.004853609120521175, violated group size: 0.283\n", - "iteration: 308, error: 0.4209188311688312, fairness violation: 0.004858987012987015, violated group size: 0.283\n", - "iteration: 309, error: 0.42085113268608415, fairness violation: 0.00486433009708738, violated group size: 0.283\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + }, + "id": "sgydei4GhI5K", + "outputId": "5ec1d33f-dce5-4bee-8f91-3f2901f5a15a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 1, error: 0.263, fairness violation: 0.028780000000000007, violated group size: 0.217\n", + "iteration: 2, error: 0.3815, fairness violation: 0.014390000000000003, violated group size: 0.217\n", + "iteration: 3, error: 0.42099999999999993, fairness violation: 0.009593333333333339, violated group size: 0.283\n", + "iteration: 4, error: 0.44075, fairness violation: 0.007195000000000002, violated group size: 0.217\n", + "iteration: 5, error: 0.45260000000000006, fairness violation: 0.005756000000000001, violated group size: 0.217\n", + "iteration: 6, error: 0.4605000000000001, fairness violation: 0.004796666666666668, violated group size: 0.283\n", + "iteration: 7, error: 0.4661428571428572, fairness violation: 0.004111428571428572, violated group size: 0.217\n", + "iteration: 8, error: 0.470375, fairness violation: 0.0035975000000000017, violated group size: 0.217\n", + "iteration: 9, error: 0.4691111111111112, fairness violation: 0.0033906666666666677, violated group size: 0.283\n", + "iteration: 10, error: 0.4681, fairness violation: 0.003225200000000001, violated group size: 0.283\n", + "iteration: 11, error: 0.4672727272727271, fairness violation: 0.0030898181818181836, violated group size: 0.283\n", + "iteration: 12, error: 0.4665833333333333, fairness violation: 0.0029769999999999996, violated group size: 0.217\n", + "iteration: 13, error: 0.466, fairness violation: 0.0028815384615384627, violated group size: 0.283\n", + "iteration: 14, error: 0.4655000000000001, fairness violation: 0.0027997142857142865, violated group size: 0.217\n", + "iteration: 15, error: 0.46506666666666674, fairness violation: 0.002728800000000001, violated group size: 0.217\n", + "iteration: 16, error: 0.4646875, fairness violation: 0.0026667500000000007, violated group size: 0.217\n", + "iteration: 17, error: 0.4643529411764707, fairness violation: 0.002612000000000001, violated group size: 0.283\n", + "iteration: 18, error: 0.46405555555555567, fairness violation: 0.002563333333333334, violated group size: 0.217\n", + "iteration: 19, error: 0.4637894736842106, fairness violation: 0.0025197894736842096, violated group size: 0.217\n", + "iteration: 20, error: 0.46354999999999996, fairness violation: 0.0024806000000000008, violated group size: 0.283\n", + "iteration: 21, error: 0.4633333333333334, fairness violation: 0.0024451428571428584, violated group size: 0.217\n", + "iteration: 22, error: 0.4631363636363638, fairness violation: 0.0024129090909090914, violated group size: 0.283\n", + "iteration: 23, error: 0.46295652173913054, fairness violation: 0.002383478260869566, violated group size: 0.217\n", + "iteration: 24, error: 0.4627916666666667, fairness violation: 0.002356500000000001, violated group size: 0.283\n", + "iteration: 25, error: 0.4626400000000001, fairness violation: 0.0023316800000000018, violated group size: 0.283\n", + "iteration: 26, error: 0.4625000000000001, fairness violation: 0.0023087692307692314, violated group size: 0.217\n", + "iteration: 27, error: 0.4623703703703705, fairness violation: 0.0022875555555555557, violated group size: 0.217\n", + "iteration: 28, error: 0.46224999999999994, fairness violation: 0.0022678571428571426, violated group size: 0.217\n", + "iteration: 29, error: 0.46213793103448264, fairness violation: 0.0022495172413793106, violated group size: 0.217\n", + "iteration: 30, error: 0.46203333333333335, fairness violation: 0.0022324000000000003, violated group size: 0.217\n", + "iteration: 31, error: 0.46193548387096783, fairness violation: 0.0022163870967741935, violated group size: 0.217\n", + "iteration: 32, error: 0.46184375, fairness violation: 0.0022013749999999993, violated group size: 0.217\n", + "iteration: 33, error: 0.459969696969697, fairness violation: 0.0023319393939393944, violated group size: 0.283\n", + "iteration: 34, error: 0.4582058823529412, fairness violation: 0.002454823529411765, violated group size: 0.217\n", + "iteration: 35, error: 0.45654285714285714, fairness violation: 0.0025706857142857144, violated group size: 0.217\n", + "iteration: 36, error: 0.4549722222222221, fairness violation: 0.0026801111111111114, violated group size: 0.283\n", + "iteration: 37, error: 0.4534864864864866, fairness violation: 0.0027836216216216214, violated group size: 0.283\n", + "iteration: 38, error: 0.45207894736842097, fairness violation: 0.0028816842105263162, violated group size: 0.283\n", + "iteration: 39, error: 0.4507435897435898, fairness violation: 0.0029747179487179492, violated group size: 0.217\n", + "iteration: 40, error: 0.44947499999999996, fairness violation: 0.0030631000000000005, violated group size: 0.217\n", + "iteration: 41, error: 0.44826829268292684, fairness violation: 0.0031471707317073175, violated group size: 0.283\n", + "iteration: 42, error: 0.4471190476190476, fairness violation: 0.0032272380952380955, violated group size: 0.217\n", + "iteration: 43, error: 0.44602325581395347, fairness violation: 0.0033035813953488386, violated group size: 0.283\n", + "iteration: 44, error: 0.44497727272727267, fairness violation: 0.0033764545454545453, violated group size: 0.283\n", + "iteration: 45, error: 0.4439777777777778, fairness violation: 0.003446088888888888, violated group size: 0.217\n", + "iteration: 46, error: 0.44302173913043474, fairness violation: 0.0035126956521739122, violated group size: 0.217\n", + "iteration: 47, error: 0.44210638297872346, fairness violation: 0.0035764680851063826, violated group size: 0.217\n", + "iteration: 48, error: 0.4412291666666666, fairness violation: 0.003637583333333332, violated group size: 0.217\n", + "iteration: 49, error: 0.4403877551020407, fairness violation: 0.0036962040816326523, violated group size: 0.217\n", + "iteration: 50, error: 0.4395600000000001, fairness violation: 0.0037524800000000003, violated group size: 0.217\n", + "iteration: 51, error: 0.43876470588235295, fairness violation: 0.0038065490196078425, violated group size: 0.217\n", + "iteration: 52, error: 0.438, fairness violation: 0.003858538461538461, violated group size: 0.283\n", + "iteration: 53, error: 0.4372641509433963, fairness violation: 0.003908566037735848, violated group size: 0.217\n", + "iteration: 54, error: 0.4365555555555556, fairness violation: 0.003956740740740741, violated group size: 0.283\n", + "iteration: 55, error: 0.4358181818181819, fairness violation: 0.004003163636363636, violated group size: 0.217\n", + "iteration: 56, error: 0.4351071428571429, fairness violation: 0.004047928571428571, violated group size: 0.217\n", + "iteration: 57, error: 0.4344736842105262, fairness violation: 0.004091122807017543, violated group size: 0.217\n", + "iteration: 58, error: 0.43381034482758624, fairness violation: 0.004132827586206895, violated group size: 0.217\n", + "iteration: 59, error: 0.4331694915254237, fairness violation: 0.0041731186440677965, violated group size: 0.283\n", + "iteration: 60, error: 0.43254999999999993, fairness violation: 0.004212066666666666, violated group size: 0.217\n", + "iteration: 61, error: 0.4319508196721312, fairness violation: 0.004249737704918031, violated group size: 0.217\n", + "iteration: 62, error: 0.4313709677419356, fairness violation: 0.004286193548387096, violated group size: 0.217\n", + "iteration: 63, error: 0.43080952380952386, fairness violation: 0.004321492063492062, violated group size: 0.283\n", + "iteration: 64, error: 0.430265625, fairness violation: 0.004355687499999999, violated group size: 0.283\n", + "iteration: 65, error: 0.4297384615384615, fairness violation: 0.004388830769230769, violated group size: 0.283\n", + "iteration: 66, error: 0.42922727272727274, fairness violation: 0.004420969696969697, violated group size: 0.217\n", + "iteration: 67, error: 0.42873134328358203, fairness violation: 0.004452149253731343, violated group size: 0.217\n", + "iteration: 68, error: 0.42824999999999996, fairness violation: 0.0044824117647058815, violated group size: 0.283\n", + "iteration: 69, error: 0.42778260869565227, fairness violation: 0.004511797101449274, violated group size: 0.217\n", + "iteration: 70, error: 0.42732857142857145, fairness violation: 0.004540342857142856, violated group size: 0.283\n", + "iteration: 71, error: 0.42688732394366197, fairness violation: 0.004568084507042252, violated group size: 0.217\n", + "iteration: 72, error: 0.4264583333333332, fairness violation: 0.004595055555555555, violated group size: 0.283\n", + "iteration: 73, error: 0.42604109589041106, fairness violation: 0.004621287671232876, violated group size: 0.217\n", + "iteration: 74, error: 0.4256351351351351, fairness violation: 0.0046468108108108095, violated group size: 0.283\n", + "iteration: 75, error: 0.42524, fairness violation: 0.004671653333333331, violated group size: 0.217\n", + "iteration: 76, error: 0.4248552631578947, fairness violation: 0.004695842105263155, violated group size: 0.217\n", + "iteration: 77, error: 0.42448051948051946, fairness violation: 0.004719402597402596, violated group size: 0.217\n", + "iteration: 78, error: 0.4239871794871795, fairness violation: 0.00475905128205128, violated group size: 0.217\n", + "iteration: 79, error: 0.42363291139240505, fairness violation: 0.004781215189873418, violated group size: 0.283\n", + "iteration: 80, error: 0.42328750000000015, fairness violation: 0.004802824999999999, violated group size: 0.283\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 81, error: 0.4229506172839506, fairness violation: 0.004823901234567901, violated group size: 0.283\n", + "iteration: 82, error: 0.4226219512195123, fairness violation: 0.004844463414634145, violated group size: 0.217\n", + "iteration: 83, error: 0.4221807228915662, fairness violation: 0.004880216867469879, violated group size: 0.217\n", + "iteration: 84, error: 0.42175, fairness violation: 0.004915119047619047, violated group size: 0.217\n", + "iteration: 85, error: 0.4214470588235294, fairness violation: 0.004933882352941174, violated group size: 0.217\n", + "iteration: 86, error: 0.4210348837209302, fairness violation: 0.004967348837209301, violated group size: 0.217\n", + "iteration: 87, error: 0.420632183908046, fairness violation: 0.005000045977011494, violated group size: 0.283\n", + "iteration: 88, error: 0.42035227272727277, fairness violation: 0.0050172045454545434, violated group size: 0.217\n", + "iteration: 89, error: 0.4200786516853933, fairness violation: 0.005033977528089887, violated group size: 0.217\n", + "iteration: 90, error: 0.4198111111111112, fairness violation: 0.005050377777777776, violated group size: 0.283\n", + "iteration: 91, error: 0.4195824175824176, fairness violation: 0.0050664175824175805, violated group size: 0.217\n", + "iteration: 92, error: 0.4193695652173913, fairness violation: 0.005082108695652173, violated group size: 0.217\n", + "iteration: 93, error: 0.41916129032258065, fairness violation: 0.005097462365591397, violated group size: 0.217\n", + "iteration: 94, error: 0.41895744680851066, fairness violation: 0.005112489361702126, violated group size: 0.217\n", + "iteration: 95, error: 0.41875789473684216, fairness violation: 0.005127199999999998, violated group size: 0.217\n", + "iteration: 96, error: 0.41856250000000006, fairness violation: 0.005141604166666665, violated group size: 0.283\n", + "iteration: 97, error: 0.418979381443299, fairness violation: 0.005106494845360823, violated group size: 0.217\n", + "iteration: 98, error: 0.41938775510204085, fairness violation: 0.005072102040816325, violated group size: 0.217\n", + "iteration: 99, error: 0.4197878787878788, fairness violation: 0.0050384040404040376, violated group size: 0.217\n", + "iteration: 100, error: 0.42018000000000005, fairness violation: 0.0050053799999999985, violated group size: 0.217\n", + "iteration: 101, error: 0.42056435643564366, fairness violation: 0.004973009900990098, violated group size: 0.217\n", + "iteration: 102, error: 0.42094117647058826, fairness violation: 0.00494127450980392, violated group size: 0.217\n", + "iteration: 103, error: 0.4213106796116506, fairness violation: 0.004910155339805824, violated group size: 0.217\n", + "iteration: 104, error: 0.4216730769230769, fairness violation: 0.004879634615384614, violated group size: 0.217\n", + "iteration: 105, error: 0.4220285714285715, fairness violation: 0.004849695238095237, violated group size: 0.217\n", + "iteration: 106, error: 0.4223773584905662, fairness violation: 0.004820320754716981, violated group size: 0.283\n", + "iteration: 107, error: 0.42271962616822434, fairness violation: 0.004791495327102803, violated group size: 0.217\n", + "iteration: 108, error: 0.4230555555555556, fairness violation: 0.0047632037037037035, violated group size: 0.217\n", + "iteration: 109, error: 0.4233853211009175, fairness violation: 0.00473543119266055, violated group size: 0.217\n", + "iteration: 110, error: 0.4237090909090908, fairness violation: 0.004708163636363636, violated group size: 0.217\n", + "iteration: 111, error: 0.424027027027027, fairness violation: 0.004681387387387387, violated group size: 0.283\n", + "iteration: 112, error: 0.42433928571428586, fairness violation: 0.004655089285714286, violated group size: 0.283\n", + "iteration: 113, error: 0.4241238938053097, fairness violation: 0.004671504424778761, violated group size: 0.217\n", + "iteration: 114, error: 0.42442982456140343, fairness violation: 0.004645754385964912, violated group size: 0.283\n", + "iteration: 115, error: 0.42473043478260875, fairness violation: 0.0046204521739130425, violated group size: 0.283\n", + "iteration: 116, error: 0.42502586206896553, fairness violation: 0.0045955862068965524, violated group size: 0.283\n", + "iteration: 117, error: 0.42481196581196584, fairness violation: 0.004611948717948717, violated group size: 0.217\n", + "iteration: 118, error: 0.4251016949152542, fairness violation: 0.004587576271186439, violated group size: 0.217\n", + "iteration: 119, error: 0.42489075630252104, fairness violation: 0.004603731092436974, violated group size: 0.217\n", + "iteration: 120, error: 0.4251750000000001, fairness violation: 0.0045798333333333325, violated group size: 0.217\n", + "iteration: 121, error: 0.4249669421487604, fairness violation: 0.004595785123966942, violated group size: 0.283\n", + "iteration: 122, error: 0.4247622950819671, fairness violation: 0.0046114754098360656, violated group size: 0.217\n", + "iteration: 123, error: 0.42456097560975614, fairness violation: 0.00462691056910569, violated group size: 0.217\n", + "iteration: 124, error: 0.42436290322580644, fairness violation: 0.004642096774193548, violated group size: 0.217\n", + "iteration: 125, error: 0.4241680000000001, fairness violation: 0.00465704, violated group size: 0.217\n", + "iteration: 126, error: 0.4239761904761905, fairness violation: 0.004671746031746031, violated group size: 0.217\n", + "iteration: 127, error: 0.42425196850393704, fairness violation: 0.004648629921259842, violated group size: 0.217\n", + "iteration: 128, error: 0.4240625, fairness violation: 0.004663171874999999, violated group size: 0.217\n", + "iteration: 129, error: 0.4238759689922481, fairness violation: 0.004677488372093024, violated group size: 0.283\n", + "iteration: 130, error: 0.42369230769230776, fairness violation: 0.004691584615384614, violated group size: 0.217\n", + "iteration: 131, error: 0.42351145038167937, fairness violation: 0.004705465648854962, violated group size: 0.217\n", + "iteration: 132, error: 0.4233333333333333, fairness violation: 0.004719136363636364, violated group size: 0.283\n", + "iteration: 133, error: 0.423157894736842, fairness violation: 0.0047326015037594, violated group size: 0.217\n", + "iteration: 134, error: 0.4229850746268656, fairness violation: 0.004745865671641791, violated group size: 0.217\n", + "iteration: 135, error: 0.42281481481481475, fairness violation: 0.004758933333333335, violated group size: 0.283\n", + "iteration: 136, error: 0.4226470588235294, fairness violation: 0.004771808823529411, violated group size: 0.217\n", + "iteration: 137, error: 0.42248175182481745, fairness violation: 0.004784496350364964, violated group size: 0.283\n", + "iteration: 138, error: 0.42231884057971014, fairness violation: 0.004797000000000002, violated group size: 0.283\n", + "iteration: 139, error: 0.42215827338129497, fairness violation: 0.004809323741007196, violated group size: 0.283\n", + "iteration: 140, error: 0.42200000000000004, fairness violation: 0.004821471428571429, violated group size: 0.217\n", + "iteration: 141, error: 0.4218439716312057, fairness violation: 0.0048334468085106394, violated group size: 0.217\n", + "iteration: 142, error: 0.42169014084507045, fairness violation: 0.004845253521126761, violated group size: 0.283\n", + "iteration: 143, error: 0.4215384615384616, fairness violation: 0.004856895104895106, violated group size: 0.283\n", + "iteration: 144, error: 0.4213888888888888, fairness violation: 0.004868375, violated group size: 0.217\n", + "iteration: 145, error: 0.42124137931034483, fairness violation: 0.004879696551724138, violated group size: 0.217\n", + "iteration: 146, error: 0.4210958904109589, fairness violation: 0.00489086301369863, violated group size: 0.217\n", + "iteration: 147, error: 0.4209523809523809, fairness violation: 0.004901877551020409, violated group size: 0.217\n", + "iteration: 148, error: 0.42081081081081084, fairness violation: 0.004912743243243244, violated group size: 0.217\n", + "iteration: 149, error: 0.42067114093959734, fairness violation: 0.004923463087248323, violated group size: 0.283\n", + "iteration: 150, error: 0.4205333333333334, fairness violation: 0.004934040000000001, violated group size: 0.217\n", + "iteration: 151, error: 0.4203973509933776, fairness violation: 0.004944476821192053, violated group size: 0.217\n", + "iteration: 152, error: 0.4202631578947368, fairness violation: 0.0049547763157894754, violated group size: 0.283\n", + "iteration: 153, error: 0.4201307189542483, fairness violation: 0.00496494117647059, violated group size: 0.283\n", + "iteration: 154, error: 0.42, fairness violation: 0.004974974025974027, violated group size: 0.283\n", + "iteration: 155, error: 0.4198709677419355, fairness violation: 0.0049848774193548395, violated group size: 0.217\n", + "iteration: 156, error: 0.4197435897435898, fairness violation: 0.004994653846153847, violated group size: 0.217\n", + "iteration: 157, error: 0.4196178343949045, fairness violation: 0.0050043057324840766, violated group size: 0.217\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 158, error: 0.4194936708860761, fairness violation: 0.005013835443037975, violated group size: 0.217\n", + "iteration: 159, error: 0.41937106918239, fairness violation: 0.005023245283018869, violated group size: 0.283\n", + "iteration: 160, error: 0.41925000000000007, fairness violation: 0.0050325375, violated group size: 0.217\n", + "iteration: 161, error: 0.4191304347826087, fairness violation: 0.005041714285714285, violated group size: 0.217\n", + "iteration: 162, error: 0.41901234567901235, fairness violation: 0.005050777777777778, violated group size: 0.283\n", + "iteration: 163, error: 0.41889570552147243, fairness violation: 0.005059730061349694, violated group size: 0.283\n", + "iteration: 164, error: 0.4191402439024391, fairness violation: 0.005039463414634148, violated group size: 0.283\n", + "iteration: 165, error: 0.41938181818181824, fairness violation: 0.005019442424242424, violated group size: 0.217\n", + "iteration: 166, error: 0.4192650602409639, fairness violation: 0.005028421686746988, violated group size: 0.217\n", + "iteration: 167, error: 0.41950299401197605, fairness violation: 0.005008706586826348, violated group size: 0.217\n", + "iteration: 168, error: 0.41973809523809513, fairness violation: 0.004989226190476189, violated group size: 0.217\n", + "iteration: 169, error: 0.41997041420118336, fairness violation: 0.0049699763313609474, violated group size: 0.283\n", + "iteration: 170, error: 0.4202, fairness violation: 0.00495095294117647, violated group size: 0.283\n", + "iteration: 171, error: 0.4204269005847953, fairness violation: 0.004932152046783625, violated group size: 0.217\n", + "iteration: 172, error: 0.42065116279069764, fairness violation: 0.00491356976744186, violated group size: 0.217\n", + "iteration: 173, error: 0.4208728323699421, fairness violation: 0.004895202312138728, violated group size: 0.217\n", + "iteration: 174, error: 0.42109195402298855, fairness violation: 0.004877045977011494, violated group size: 0.217\n", + "iteration: 175, error: 0.4213085714285715, fairness violation: 0.004859097142857142, violated group size: 0.217\n", + "iteration: 176, error: 0.42152272727272727, fairness violation: 0.0048413522727272715, violated group size: 0.217\n", + "iteration: 177, error: 0.42173446327683617, fairness violation: 0.00482380790960452, violated group size: 0.217\n", + "iteration: 178, error: 0.42161235955056187, fairness violation: 0.004833280898876404, violated group size: 0.217\n", + "iteration: 179, error: 0.42182122905027924, fairness violation: 0.004815977653631285, violated group size: 0.217\n", + "iteration: 180, error: 0.4220277777777778, fairness violation: 0.004798866666666665, violated group size: 0.217\n", + "iteration: 181, error: 0.42223204419889504, fairness violation: 0.004781944751381214, violated group size: 0.283\n", + "iteration: 182, error: 0.4224340659340659, fairness violation: 0.004765208791208789, violated group size: 0.217\n", + "iteration: 183, error: 0.4226338797814208, fairness violation: 0.004748655737704917, violated group size: 0.217\n", + "iteration: 184, error: 0.4228315217391304, fairness violation: 0.004732282608695651, violated group size: 0.217\n", + "iteration: 185, error: 0.4230270270270271, fairness violation: 0.004716086486486487, violated group size: 0.217\n", + "iteration: 186, error: 0.4229032258064515, fairness violation: 0.0047257311827957, violated group size: 0.283\n", + "iteration: 187, error: 0.4230962566844919, fairness violation: 0.0047097433155080205, violated group size: 0.217\n", + "iteration: 188, error: 0.4229734042553191, fairness violation: 0.00471931914893617, violated group size: 0.283\n", + "iteration: 189, error: 0.4231640211640213, fairness violation: 0.004703534391534391, violated group size: 0.217\n", + "iteration: 190, error: 0.42304210526315794, fairness violation: 0.004713042105263158, violated group size: 0.217\n", + "iteration: 191, error: 0.4232303664921467, fairness violation: 0.004697455497382198, violated group size: 0.217\n", + "iteration: 192, error: 0.42310937499999995, fairness violation: 0.004706895833333333, violated group size: 0.217\n", + "iteration: 193, error: 0.42329533678756476, fairness violation: 0.004691502590673575, violated group size: 0.283\n", + "iteration: 194, error: 0.4231752577319588, fairness violation: 0.004700876288659792, violated group size: 0.217\n", + "iteration: 195, error: 0.4230564102564103, fairness violation: 0.004710153846153845, violated group size: 0.217\n", + "iteration: 196, error: 0.4229387755102041, fairness violation: 0.004719336734693878, violated group size: 0.283\n", + "iteration: 197, error: 0.4228223350253807, fairness violation: 0.004728426395939086, violated group size: 0.283\n", + "iteration: 198, error: 0.4227070707070707, fairness violation: 0.004737424242424242, violated group size: 0.217\n", + "iteration: 199, error: 0.4228894472361809, fairness violation: 0.004722341708542713, violated group size: 0.217\n", + "iteration: 200, error: 0.42277499999999996, fairness violation: 0.004731279999999999, violated group size: 0.217\n", + "iteration: 201, error: 0.4226616915422886, fairness violation: 0.004740129353233829, violated group size: 0.217\n", + "iteration: 202, error: 0.4225495049504951, fairness violation: 0.00474889108910891, violated group size: 0.217\n", + "iteration: 203, error: 0.42243842364532025, fairness violation: 0.004757566502463053, violated group size: 0.283\n", + "iteration: 204, error: 0.42232843137254905, fairness violation: 0.004766156862745097, violated group size: 0.283\n", + "iteration: 205, error: 0.422219512195122, fairness violation: 0.004774663414634145, violated group size: 0.217\n", + "iteration: 206, error: 0.422111650485437, fairness violation: 0.004783087378640775, violated group size: 0.217\n", + "iteration: 207, error: 0.4220048309178744, fairness violation: 0.00479142995169082, violated group size: 0.217\n", + "iteration: 208, error: 0.4218990384615385, fairness violation: 0.004799692307692306, violated group size: 0.217\n", + "iteration: 209, error: 0.42179425837320567, fairness violation: 0.004807875598086124, violated group size: 0.217\n", + "iteration: 210, error: 0.4216904761904762, fairness violation: 0.004815980952380952, violated group size: 0.217\n", + "iteration: 211, error: 0.42158767772511846, fairness violation: 0.0048240094786729856, violated group size: 0.217\n", + "iteration: 212, error: 0.42148584905660386, fairness violation: 0.004831962264150944, violated group size: 0.217\n", + "iteration: 213, error: 0.4213849765258215, fairness violation: 0.004839840375586855, violated group size: 0.283\n", + "iteration: 214, error: 0.421285046728972, fairness violation: 0.0048476448598130835, violated group size: 0.217\n", + "iteration: 215, error: 0.42118604651162794, fairness violation: 0.004855376744186045, violated group size: 0.217\n", + "iteration: 216, error: 0.4210879629629629, fairness violation: 0.004863037037037037, violated group size: 0.283\n", + "iteration: 217, error: 0.42099078341013824, fairness violation: 0.004870626728110601, violated group size: 0.283\n", + "iteration: 218, error: 0.42089449541284396, fairness violation: 0.004878146788990825, violated group size: 0.217\n", + "iteration: 219, error: 0.42079908675799094, fairness violation: 0.004885598173515983, violated group size: 0.283\n", + "iteration: 220, error: 0.4207045454545455, fairness violation: 0.004892981818181818, violated group size: 0.283\n", + "iteration: 221, error: 0.4206108597285068, fairness violation: 0.004900298642533936, violated group size: 0.283\n", + "iteration: 222, error: 0.4205180180180179, fairness violation: 0.004907549549549549, violated group size: 0.217\n", + "iteration: 223, error: 0.4204260089686098, fairness violation: 0.004914735426008968, violated group size: 0.217\n", + "iteration: 224, error: 0.4203348214285714, fairness violation: 0.004921857142857143, violated group size: 0.283\n", + "iteration: 225, error: 0.4202444444444444, fairness violation: 0.004928915555555555, violated group size: 0.217\n", + "iteration: 226, error: 0.42015486725663725, fairness violation: 0.004935911504424777, violated group size: 0.217\n", + "iteration: 227, error: 0.4200660792951542, fairness violation: 0.004942845814977973, violated group size: 0.217\n", + "iteration: 228, error: 0.4199780701754386, fairness violation: 0.004949719298245614, violated group size: 0.217\n", + "iteration: 229, error: 0.4198908296943231, fairness violation: 0.004956532751091703, violated group size: 0.283\n", + "iteration: 230, error: 0.419804347826087, fairness violation: 0.004963286956521739, violated group size: 0.283\n", + "iteration: 231, error: 0.4197186147186147, fairness violation: 0.004969982683982686, violated group size: 0.283\n", + "iteration: 232, error: 0.4196336206896552, fairness violation: 0.004976620689655175, violated group size: 0.283\n", + "iteration: 233, error: 0.419549356223176, fairness violation: 0.004983201716738197, violated group size: 0.283\n", + "iteration: 234, error: 0.419465811965812, fairness violation: 0.0049897264957264945, violated group size: 0.217\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 235, error: 0.4193829787234044, fairness violation: 0.00499619574468085, violated group size: 0.217\n", + "iteration: 236, error: 0.41930084745762713, fairness violation: 0.005002610169491525, violated group size: 0.283\n", + "iteration: 237, error: 0.4192194092827004, fairness violation: 0.005008970464135021, violated group size: 0.283\n", + "iteration: 238, error: 0.41913865546218493, fairness violation: 0.00501527731092437, violated group size: 0.283\n", + "iteration: 239, error: 0.4190585774058578, fairness violation: 0.005021531380753138, violated group size: 0.283\n", + "iteration: 240, error: 0.4189791666666667, fairness violation: 0.005027733333333333, violated group size: 0.217\n", + "iteration: 241, error: 0.41890041493775937, fairness violation: 0.005033883817427385, violated group size: 0.217\n", + "iteration: 242, error: 0.4188223140495868, fairness violation: 0.0050399834710743805, violated group size: 0.217\n", + "iteration: 243, error: 0.4187448559670781, fairness violation: 0.005046032921810699, violated group size: 0.217\n", + "iteration: 244, error: 0.41890983606557386, fairness violation: 0.005032467213114753, violated group size: 0.217\n", + "iteration: 245, error: 0.419073469387755, fairness violation: 0.005019012244897959, violated group size: 0.217\n", + "iteration: 246, error: 0.41923577235772364, fairness violation: 0.005005666666666667, violated group size: 0.283\n", + "iteration: 247, error: 0.4191578947368422, fairness violation: 0.005011757085020243, violated group size: 0.217\n", + "iteration: 248, error: 0.41908064516129034, fairness violation: 0.005017798387096774, violated group size: 0.283\n", + "iteration: 249, error: 0.4192409638554217, fairness violation: 0.005004618473895581, violated group size: 0.217\n", + "iteration: 250, error: 0.4194000000000001, fairness violation: 0.004991544, violated group size: 0.217\n", + "iteration: 251, error: 0.4195577689243028, fairness violation: 0.004978573705179282, violated group size: 0.283\n", + "iteration: 252, error: 0.4197142857142857, fairness violation: 0.004965706349206349, violated group size: 0.217\n", + "iteration: 253, error: 0.4198695652173912, fairness violation: 0.00495294071146245, violated group size: 0.217\n", + "iteration: 254, error: 0.42002362204724414, fairness violation: 0.00494027559055118, violated group size: 0.217\n", + "iteration: 255, error: 0.4201764705882353, fairness violation: 0.004927709803921568, violated group size: 0.217\n", + "iteration: 256, error: 0.420328125, fairness violation: 0.004915242187499999, violated group size: 0.217\n", + "iteration: 257, error: 0.42047859922178993, fairness violation: 0.004902871595330739, violated group size: 0.217\n", + "iteration: 258, error: 0.42062790697674424, fairness violation: 0.004890596899224807, violated group size: 0.217\n", + "iteration: 259, error: 0.4207760617760617, fairness violation: 0.004878416988416988, violated group size: 0.283\n", + "iteration: 260, error: 0.42069615384615383, fairness violation: 0.004884692307692307, violated group size: 0.217\n", + "iteration: 261, error: 0.42061685823754785, fairness violation: 0.004890919540229886, violated group size: 0.283\n", + "iteration: 262, error: 0.42076335877862603, fairness violation: 0.004878877862595419, violated group size: 0.217\n", + "iteration: 263, error: 0.4209087452471483, fairness violation: 0.004866927756653992, violated group size: 0.217\n", + "iteration: 264, error: 0.4208295454545454, fairness violation: 0.004873151515151516, violated group size: 0.283\n", + "iteration: 265, error: 0.4209735849056603, fairness violation: 0.004861313207547171, violated group size: 0.283\n", + "iteration: 266, error: 0.4211165413533834, fairness violation: 0.004849563909774436, violated group size: 0.217\n", + "iteration: 267, error: 0.42125842696629223, fairness violation: 0.0048379026217228475, violated group size: 0.217\n", + "iteration: 268, error: 0.42139925373134324, fairness violation: 0.004826328358208956, violated group size: 0.283\n", + "iteration: 269, error: 0.42131970260223045, fairness violation: 0.0048325873605947955, violated group size: 0.217\n", + "iteration: 270, error: 0.42145925925925937, fairness violation: 0.00482111851851852, violated group size: 0.283\n", + "iteration: 271, error: 0.42159778597785963, fairness violation: 0.004809734317343174, violated group size: 0.217\n", + "iteration: 272, error: 0.42173529411764704, fairness violation: 0.004798433823529413, violated group size: 0.283\n", + "iteration: 273, error: 0.4216556776556777, fairness violation: 0.004804703296703296, violated group size: 0.217\n", + "iteration: 274, error: 0.42157664233576647, fairness violation: 0.00481092700729927, violated group size: 0.217\n", + "iteration: 275, error: 0.42171272727272724, fairness violation: 0.004799745454545455, violated group size: 0.217\n", + "iteration: 276, error: 0.42184782608695653, fairness violation: 0.004788644927536233, violated group size: 0.217\n", + "iteration: 277, error: 0.4219819494584837, fairness violation: 0.004777624548736462, violated group size: 0.217\n", + "iteration: 278, error: 0.42190287769784157, fairness violation: 0.004783856115107913, violated group size: 0.217\n", + "iteration: 279, error: 0.42182437275985674, fairness violation: 0.004790043010752689, violated group size: 0.217\n", + "iteration: 280, error: 0.42174642857142863, fairness violation: 0.004796185714285715, violated group size: 0.283\n", + "iteration: 281, error: 0.42166903914590753, fairness violation: 0.0048022846975088965, violated group size: 0.283\n", + "iteration: 282, error: 0.4218014184397163, fairness violation: 0.00479141134751773, violated group size: 0.217\n", + "iteration: 283, error: 0.42172438162544174, fairness violation: 0.0047974840989399295, violated group size: 0.217\n", + "iteration: 284, error: 0.4216478873239437, fairness violation: 0.004803514084507042, violated group size: 0.217\n", + "iteration: 285, error: 0.42157192982456126, fairness violation: 0.004809501754385964, violated group size: 0.217\n", + "iteration: 286, error: 0.4217027972027972, fairness violation: 0.004798755244755245, violated group size: 0.217\n", + "iteration: 287, error: 0.4218327526132404, fairness violation: 0.004788083623693379, violated group size: 0.283\n", + "iteration: 288, error: 0.4219618055555556, fairness violation: 0.004777486111111113, violated group size: 0.283\n", + "iteration: 289, error: 0.4218858131487888, fairness violation: 0.004783480968858131, violated group size: 0.217\n", + "iteration: 290, error: 0.4218103448275861, fairness violation: 0.004789434482758621, violated group size: 0.217\n", + "iteration: 291, error: 0.42193814432989696, fairness violation: 0.004778941580756014, violated group size: 0.283\n", + "iteration: 292, error: 0.42186301369863016, fairness violation: 0.0047848698630136985, violated group size: 0.217\n", + "iteration: 293, error: 0.4217883959044368, fairness violation: 0.004790757679180888, violated group size: 0.217\n", + "iteration: 294, error: 0.42171428571428576, fairness violation: 0.004796605442176871, violated group size: 0.217\n", + "iteration: 295, error: 0.4216406779661017, fairness violation: 0.004802413559322035, violated group size: 0.217\n", + "iteration: 296, error: 0.4215675675675675, fairness violation: 0.004808182432432432, violated group size: 0.217\n", + "iteration: 297, error: 0.4214949494949495, fairness violation: 0.00481391245791246, violated group size: 0.283\n", + "iteration: 298, error: 0.4214228187919464, fairness violation: 0.0048196040268456385, violated group size: 0.217\n", + "iteration: 299, error: 0.42135117056856186, fairness violation: 0.0048252575250836115, violated group size: 0.217\n", + "iteration: 300, error: 0.42128, fairness violation: 0.004830873333333335, violated group size: 0.283\n", + "iteration: 301, error: 0.42120930232558146, fairness violation: 0.004836451827242525, violated group size: 0.217\n", + "iteration: 302, error: 0.42113907284768215, fairness violation: 0.004841993377483444, violated group size: 0.217\n", + "iteration: 303, error: 0.42106930693069305, fairness violation: 0.004847498349834984, violated group size: 0.217\n", + "iteration: 304, error: 0.4211940789473684, fairness violation: 0.004837263157894738, violated group size: 0.283\n", + "iteration: 305, error: 0.4211245901639345, fairness violation: 0.004842747540983607, violated group size: 0.283\n", + "iteration: 306, error: 0.4210555555555555, fairness violation: 0.004848196078431373, violated group size: 0.217\n", + "iteration: 307, error: 0.42098697068403895, fairness violation: 0.004853609120521175, violated group size: 0.283\n", + "iteration: 308, error: 0.4209188311688312, fairness violation: 0.004858987012987015, violated group size: 0.283\n", + "iteration: 309, error: 0.42085113268608415, fairness violation: 0.00486433009708738, violated group size: 0.283\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 310, error: 0.4207838709677419, fairness violation: 0.00486963870967742, violated group size: 0.283\n", + "iteration: 311, error: 0.420717041800643, fairness violation: 0.004874913183279744, violated group size: 0.217\n", + "iteration: 312, error: 0.42065064102564104, fairness violation: 0.0048801538461538466, violated group size: 0.217\n", + "iteration: 313, error: 0.42058466453674115, fairness violation: 0.0048853610223642185, violated group size: 0.217\n", + "iteration: 314, error: 0.42051910828025474, fairness violation: 0.004890535031847135, violated group size: 0.217\n", + "iteration: 315, error: 0.4204539682539683, fairness violation: 0.004895676190476191, violated group size: 0.217\n", + "iteration: 316, error: 0.4203892405063292, fairness violation: 0.004900784810126583, violated group size: 0.217\n", + "iteration: 317, error: 0.4203249211356468, fairness violation: 0.004905861198738172, violated group size: 0.283\n", + "iteration: 318, error: 0.4202610062893082, fairness violation: 0.00491090566037736, violated group size: 0.217\n", + "iteration: 319, error: 0.4201974921630094, fairness violation: 0.004915918495297806, violated group size: 0.217\n", + "iteration: 320, error: 0.4201343749999999, fairness violation: 0.004920900000000002, violated group size: 0.217\n", + "iteration: 321, error: 0.4200716510903427, fairness violation: 0.004925850467289721, violated group size: 0.217\n", + "iteration: 322, error: 0.4200093167701862, fairness violation: 0.0049307701863354056, violated group size: 0.283\n", + "iteration: 323, error: 0.4199473684210526, fairness violation: 0.00493565944272446, violated group size: 0.283\n", + "iteration: 324, error: 0.41988580246913576, fairness violation: 0.004940518518518519, violated group size: 0.217\n", + "iteration: 325, error: 0.41982461538461535, fairness violation: 0.004945347692307694, violated group size: 0.217\n", + "iteration: 326, error: 0.4197638036809816, fairness violation: 0.004950147239263805, violated group size: 0.283\n", + "iteration: 327, error: 0.4197033639143731, fairness violation: 0.004954917431192661, violated group size: 0.283\n", + "iteration: 328, error: 0.4196432926829268, fairness violation: 0.004959658536585366, violated group size: 0.217\n", + "iteration: 329, error: 0.41958358662613987, fairness violation: 0.004964370820668694, violated group size: 0.283\n", + "iteration: 330, error: 0.41952424242424236, fairness violation: 0.004969054545454545, violated group size: 0.217\n", + "iteration: 331, error: 0.41946525679758306, fairness violation: 0.00497370996978852, violated group size: 0.217\n", + "iteration: 332, error: 0.41940662650602417, fairness violation: 0.004978337349397591, violated group size: 0.217\n", + "iteration: 333, error: 0.4193483483483482, fairness violation: 0.004982936936936937, violated group size: 0.217\n", + "iteration: 334, error: 0.4192904191616766, fairness violation: 0.004987508982035928, violated group size: 0.217\n", + "iteration: 335, error: 0.4192328358208956, fairness violation: 0.004992053731343284, violated group size: 0.283\n", + "iteration: 336, error: 0.4191755952380953, fairness violation: 0.00499657142857143, violated group size: 0.283\n", + "iteration: 337, error: 0.4191186943620178, fairness violation: 0.0050010623145400595, violated group size: 0.217\n", + "iteration: 338, error: 0.41906213017751476, fairness violation: 0.005005526627218935, violated group size: 0.217\n", + "iteration: 339, error: 0.4190058997050148, fairness violation: 0.005009964601769911, violated group size: 0.217\n", + "iteration: 340, error: 0.41894999999999993, fairness violation: 0.005014376470588236, violated group size: 0.283\n", + "iteration: 341, error: 0.41889442815249267, fairness violation: 0.005018762463343108, violated group size: 0.217\n", + "iteration: 342, error: 0.41883918128654973, fairness violation: 0.005023122807017544, violated group size: 0.217\n", + "iteration: 343, error: 0.41878425655976675, fairness violation: 0.0050274577259475225, violated group size: 0.283\n", + "iteration: 344, error: 0.4187296511627907, fairness violation: 0.005031767441860465, violated group size: 0.217\n", + "iteration: 345, error: 0.4186753623188406, fairness violation: 0.005036052173913045, violated group size: 0.283\n", + "iteration: 346, error: 0.4186213872832369, fairness violation: 0.005040312138728323, violated group size: 0.217\n", + "iteration: 347, error: 0.41856772334293946, fairness violation: 0.005044547550432276, violated group size: 0.283\n", + "iteration: 348, error: 0.41851436781609197, fairness violation: 0.005048758620689655, violated group size: 0.217\n", + "iteration: 349, error: 0.418461318051576, fairness violation: 0.005052945558739255, violated group size: 0.283\n", + "iteration: 350, error: 0.4185771428571428, fairness violation: 0.005043468571428572, violated group size: 0.283\n", + "iteration: 351, error: 0.4186923076923077, fairness violation: 0.005034045584045584, violated group size: 0.217\n", + "iteration: 352, error: 0.4188068181818182, fairness violation: 0.005024676136363637, violated group size: 0.283\n", + "iteration: 353, error: 0.4189206798866855, fairness violation: 0.005015359773371105, violated group size: 0.217\n", + "iteration: 354, error: 0.41903389830508475, fairness violation: 0.005006096045197741, violated group size: 0.283\n", + "iteration: 355, error: 0.41914647887323936, fairness violation: 0.004996884507042254, violated group size: 0.283\n", + "iteration: 356, error: 0.4192584269662922, fairness violation: 0.004987724719101122, violated group size: 0.217\n", + "iteration: 357, error: 0.41936974789915965, fairness violation: 0.0049786162464986, violated group size: 0.217\n", + "iteration: 358, error: 0.41948044692737424, fairness violation: 0.004969558659217878, violated group size: 0.217\n", + "iteration: 359, error: 0.41959052924791085, fairness violation: 0.004960551532033426, violated group size: 0.283\n", + "iteration: 360, error: 0.4195361111111111, fairness violation: 0.004964855555555557, violated group size: 0.283\n", + "iteration: 361, error: 0.4196454293628808, fairness violation: 0.004955911357340723, violated group size: 0.283\n", + "iteration: 362, error: 0.4197541436464089, fairness violation: 0.004947016574585636, violated group size: 0.217\n", + "iteration: 363, error: 0.4198622589531681, fairness violation: 0.004938170798898072, violated group size: 0.283\n", + "iteration: 364, error: 0.41996978021978026, fairness violation: 0.004929373626373626, violated group size: 0.217\n", + "iteration: 365, error: 0.42007671232876714, fairness violation: 0.004920624657534246, violated group size: 0.217\n", + "iteration: 366, error: 0.42018306010928963, fairness violation: 0.004911923497267759, violated group size: 0.217\n", + "iteration: 367, error: 0.4202888283378746, fairness violation: 0.004903269754768393, violated group size: 0.217\n", + "iteration: 368, error: 0.42039402173913043, fairness violation: 0.00489466304347826, violated group size: 0.217\n", + "iteration: 369, error: 0.4204986449864499, fairness violation: 0.00488610298102981, violated group size: 0.283\n", + "iteration: 370, error: 0.4206027027027027, fairness violation: 0.0048775891891891885, violated group size: 0.217\n", + "iteration: 371, error: 0.4207061994609164, fairness violation: 0.004869121293800538, violated group size: 0.217\n", + "iteration: 372, error: 0.4208091397849463, fairness violation: 0.004860698924731182, violated group size: 0.217\n", + "iteration: 373, error: 0.420911528150134, fairness violation: 0.004852321715817694, violated group size: 0.217\n", + "iteration: 374, error: 0.420855614973262, fairness violation: 0.004856754010695187, violated group size: 0.217\n", + "iteration: 375, error: 0.4209573333333334, fairness violation: 0.004848432, violated group size: 0.217\n", + "iteration: 376, error: 0.42105851063829786, fairness violation: 0.004840154255319148, violated group size: 0.217\n", + "iteration: 377, error: 0.4211591511936339, fairness violation: 0.004831920424403182, violated group size: 0.217\n", + "iteration: 378, error: 0.4211031746031746, fairness violation: 0.004836359788359788, violated group size: 0.217\n", + "iteration: 379, error: 0.42120316622691284, fairness violation: 0.004828179419525066, violated group size: 0.217\n", + "iteration: 380, error: 0.42130263157894726, fairness violation: 0.004820042105263157, violated group size: 0.217\n", + "iteration: 381, error: 0.42124671916010503, fairness violation: 0.004824477690288715, violated group size: 0.283\n", + "iteration: 382, error: 0.42134554973821986, fairness violation: 0.004816392670157068, violated group size: 0.283\n", + "iteration: 383, error: 0.42144386422976504, fairness violation: 0.004808349869451696, violated group size: 0.217\n", + "iteration: 384, error: 0.42154166666666676, fairness violation: 0.004800348958333333, violated group size: 0.283\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 385, error: 0.4214857142857143, fairness violation: 0.00480478961038961, violated group size: 0.217\n", + "iteration: 386, error: 0.4215829015544041, fairness violation: 0.0047968393782383415, violated group size: 0.217\n", + "iteration: 387, error: 0.42152713178294576, fairness violation: 0.004801266149870801, violated group size: 0.283\n", + "iteration: 388, error: 0.4216237113402061, fairness violation: 0.004793365979381444, violated group size: 0.283\n", + "iteration: 389, error: 0.42171979434447304, fairness violation: 0.004785506426735219, violated group size: 0.217\n", + "iteration: 390, error: 0.4218153846153847, fairness violation: 0.00477768717948718, violated group size: 0.217\n", + "iteration: 391, error: 0.4219104859335038, fairness violation: 0.004769907928388747, violated group size: 0.217\n", + "iteration: 392, error: 0.4218545918367348, fairness violation: 0.00477434693877551, violated group size: 0.217\n", + "iteration: 393, error: 0.42194910941475816, fairness violation: 0.004766615776081424, violated group size: 0.217\n", + "iteration: 394, error: 0.42204314720812186, fairness violation: 0.004758923857868021, violated group size: 0.283\n", + "iteration: 395, error: 0.4221367088607595, fairness violation: 0.00475127088607595, violated group size: 0.217\n", + "iteration: 396, error: 0.42208080808080806, fairness violation: 0.004755712121212122, violated group size: 0.283\n", + "iteration: 397, error: 0.42202518891687657, fairness violation: 0.004760130982367758, violated group size: 0.217\n", + "iteration: 398, error: 0.42196984924623115, fairness violation: 0.004764527638190955, violated group size: 0.283\n", + "iteration: 399, error: 0.42191478696741846, fairness violation: 0.004768902255639098, violated group size: 0.217\n", + "iteration: 400, error: 0.42186, fairness violation: 0.004773255, violated group size: 0.217\n", + "iteration: 401, error: 0.4218054862842893, fairness violation: 0.004777586034912718, violated group size: 0.283\n", + "iteration: 402, error: 0.4217512437810945, fairness violation: 0.00478189552238806, violated group size: 0.217\n", + "iteration: 403, error: 0.4218436724565757, fairness violation: 0.0047743374689826305, violated group size: 0.283\n", + "iteration: 404, error: 0.42178960396039605, fairness violation: 0.004778633663366336, violated group size: 0.217\n", + "iteration: 405, error: 0.4217358024691357, fairness violation: 0.004782908641975308, violated group size: 0.217\n", + "iteration: 406, error: 0.4216822660098523, fairness violation: 0.004787162561576355, violated group size: 0.217\n", + "iteration: 407, error: 0.4216289926289926, fairness violation: 0.004791395577395577, violated group size: 0.283\n", + "iteration: 408, error: 0.421575980392157, fairness violation: 0.004795607843137254, violated group size: 0.217\n", + "iteration: 409, error: 0.4215232273838631, fairness violation: 0.004799799511002444, violated group size: 0.217\n", + "iteration: 410, error: 0.42147073170731714, fairness violation: 0.004803970731707317, violated group size: 0.283\n", + "iteration: 411, error: 0.4214184914841849, fairness violation: 0.0048081216545012165, violated group size: 0.217\n", + "iteration: 412, error: 0.4213665048543689, fairness violation: 0.004812252427184466, violated group size: 0.283\n", + "iteration: 413, error: 0.42131476997578693, fairness violation: 0.004816363196125908, violated group size: 0.217\n", + "iteration: 414, error: 0.42126328502415455, fairness violation: 0.004820454106280194, violated group size: 0.217\n", + "iteration: 415, error: 0.4212120481927711, fairness violation: 0.004824525301204821, violated group size: 0.283\n", + "iteration: 416, error: 0.42116105769230766, fairness violation: 0.004828576923076923, violated group size: 0.217\n", + "iteration: 417, error: 0.4211103117505996, fairness violation: 0.004832609112709832, violated group size: 0.283\n", + "iteration: 418, error: 0.42105980861244025, fairness violation: 0.004836622009569378, violated group size: 0.283\n", + "iteration: 419, error: 0.42100954653937933, fairness violation: 0.004840615751789977, violated group size: 0.217\n", + "iteration: 420, error: 0.42110000000000003, fairness violation: 0.0048332238095238084, violated group size: 0.217\n", + "iteration: 421, error: 0.42104988123515436, fairness violation: 0.004837206650831354, violated group size: 0.217\n", + "iteration: 422, error: 0.42100000000000004, fairness violation: 0.004841170616113744, violated group size: 0.217\n", + "iteration: 423, error: 0.420950354609929, fairness violation: 0.004845115839243499, violated group size: 0.217\n", + "iteration: 424, error: 0.42104009433962253, fairness violation: 0.004837783018867924, violated group size: 0.217\n", + "iteration: 425, error: 0.4209905882352941, fairness violation: 0.004841717647058822, violated group size: 0.217\n", + "iteration: 426, error: 0.42094131455399053, fairness violation: 0.004845633802816901, violated group size: 0.217\n", + "iteration: 427, error: 0.42089227166276344, fairness violation: 0.004849531615925057, violated group size: 0.217\n", + "iteration: 428, error: 0.4208434579439252, fairness violation: 0.004853411214953271, violated group size: 0.217\n", + "iteration: 429, error: 0.4207948717948717, fairness violation: 0.0048572727272727274, violated group size: 0.283\n", + "iteration: 430, error: 0.42074651162790694, fairness violation: 0.004861116279069767, violated group size: 0.217\n", + "iteration: 431, error: 0.4206983758700697, fairness violation: 0.004864941995359629, violated group size: 0.283\n", + "iteration: 432, error: 0.420650462962963, fairness violation: 0.00486875, violated group size: 0.217\n", + "iteration: 433, error: 0.42060277136258656, fairness violation: 0.0048725404157043874, violated group size: 0.217\n", + "iteration: 434, error: 0.42055529953917054, fairness violation: 0.0048763133640553, violated group size: 0.283\n", + "iteration: 435, error: 0.42050804597701147, fairness violation: 0.004880068965517241, violated group size: 0.217\n", + "iteration: 436, error: 0.4204610091743119, fairness violation: 0.004883807339449542, violated group size: 0.217\n", + "iteration: 437, error: 0.4204141876430207, fairness violation: 0.004887528604118992, violated group size: 0.217\n", + "iteration: 438, error: 0.42036757990867574, fairness violation: 0.004891232876712329, violated group size: 0.217\n", + "iteration: 439, error: 0.4203211845102506, fairness violation: 0.0048949202733485206, violated group size: 0.283\n", + "iteration: 440, error: 0.42027499999999995, fairness violation: 0.00489859090909091, violated group size: 0.283\n", + "iteration: 441, error: 0.42022902494331066, fairness violation: 0.004902244897959184, violated group size: 0.283\n", + "iteration: 442, error: 0.42018325791855204, fairness violation: 0.004905882352941177, violated group size: 0.217\n", + "iteration: 443, error: 0.42013769751693003, fairness violation: 0.004909503386004516, violated group size: 0.283\n", + "iteration: 444, error: 0.42009234234234244, fairness violation: 0.004913108108108108, violated group size: 0.217\n", + "iteration: 445, error: 0.420047191011236, fairness violation: 0.004916696629213483, violated group size: 0.217\n", + "iteration: 446, error: 0.42000224215246645, fairness violation: 0.004920269058295964, violated group size: 0.217\n", + "iteration: 447, error: 0.4199574944071588, fairness violation: 0.004923825503355704, violated group size: 0.217\n", + "iteration: 448, error: 0.41991294642857147, fairness violation: 0.004927366071428571, violated group size: 0.217\n", + "iteration: 449, error: 0.41986859688195993, fairness violation: 0.004930890868596881, violated group size: 0.217\n", + "iteration: 450, error: 0.41982444444444433, fairness violation: 0.004934399999999999, violated group size: 0.217\n", + "iteration: 451, error: 0.41978048780487814, fairness violation: 0.004937893569844789, violated group size: 0.217\n", + "iteration: 452, error: 0.41973672566371684, fairness violation: 0.004941371681415928, violated group size: 0.217\n", + "iteration: 453, error: 0.41969315673289187, fairness violation: 0.0049448344370860925, violated group size: 0.217\n", + "iteration: 454, error: 0.41964977973568285, fairness violation: 0.004948281938325991, violated group size: 0.283\n", + "iteration: 455, error: 0.41960659340659345, fairness violation: 0.004951714285714286, violated group size: 0.283\n", + "iteration: 456, error: 0.41956359649122804, fairness violation: 0.004955131578947367, violated group size: 0.217\n", + "iteration: 457, error: 0.41952078774617063, fairness violation: 0.0049585339168490145, violated group size: 0.217\n", + "iteration: 458, error: 0.41947816593886456, fairness violation: 0.004961921397379911, violated group size: 0.217\n", + "iteration: 459, error: 0.4194357298474945, fairness violation: 0.00496529411764706, violated group size: 0.283\n", + "iteration: 460, error: 0.4193934782608696, fairness violation: 0.004968652173913044, violated group size: 0.283\n", + "iteration: 461, error: 0.41935140997830805, fairness violation: 0.004971995661605205, violated group size: 0.283\n", + "iteration: 462, error: 0.41930952380952374, fairness violation: 0.0049753246753246735, violated group size: 0.217\n", + "iteration: 463, error: 0.41926781857451406, fairness violation: 0.004978639308855291, violated group size: 0.217\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 464, error: 0.41922629310344817, fairness violation: 0.004981939655172413, violated group size: 0.217\n", + "iteration: 465, error: 0.41918494623655916, fairness violation: 0.004985225806451612, violated group size: 0.217\n", + "iteration: 466, error: 0.4191437768240344, fairness violation: 0.004988497854077254, violated group size: 0.283\n", + "iteration: 467, error: 0.41910278372591, fairness violation: 0.004991755888650964, violated group size: 0.283\n", + "iteration: 468, error: 0.4190619658119658, fairness violation: 0.004994999999999998, violated group size: 0.217\n", + "iteration: 469, error: 0.4190213219616205, fairness violation: 0.0049982302771855, violated group size: 0.217\n", + "iteration: 470, error: 0.41898085106382993, fairness violation: 0.005001446808510636, violated group size: 0.283\n", + "iteration: 471, error: 0.41894055201698516, fairness violation: 0.005004649681528661, violated group size: 0.217\n", + "iteration: 472, error: 0.41890042372881364, fairness violation: 0.005007838983050847, violated group size: 0.283\n", + "iteration: 473, error: 0.4188604651162791, fairness violation: 0.005011014799154333, violated group size: 0.217\n", + "iteration: 474, error: 0.41882067510548526, fairness violation: 0.005014177215189871, violated group size: 0.217\n", + "iteration: 475, error: 0.41878105263157905, fairness violation: 0.0050173263157894735, violated group size: 0.283\n", + "iteration: 476, error: 0.41874159663865557, fairness violation: 0.0050204621848739485, violated group size: 0.217\n", + "iteration: 477, error: 0.4188259958071279, fairness violation: 0.005013576519916141, violated group size: 0.217\n", + "iteration: 478, error: 0.41878661087866115, fairness violation: 0.005016707112970709, violated group size: 0.217\n", + "iteration: 479, error: 0.4188705636743216, fairness violation: 0.005009858037578285, violated group size: 0.217\n", + "iteration: 480, error: 0.41883125000000004, fairness violation: 0.005012983333333334, violated group size: 0.283\n", + "iteration: 481, error: 0.4187920997920998, fairness violation: 0.005016095634095634, violated group size: 0.283\n", + "iteration: 482, error: 0.4187531120331951, fairness violation: 0.0050191950207468874, violated group size: 0.283\n", + "iteration: 483, error: 0.4188364389233955, fairness violation: 0.00501239751552795, violated group size: 0.217\n", + "iteration: 484, error: 0.41891942148760325, fairness violation: 0.005005628099173555, violated group size: 0.283\n", + "iteration: 485, error: 0.4190020618556701, fairness violation: 0.004998886597938144, violated group size: 0.283\n", + "iteration: 486, error: 0.41896296296296304, fairness violation: 0.005001995884773661, violated group size: 0.217\n", + "iteration: 487, error: 0.4190451745379877, fairness violation: 0.004995289527720739, violated group size: 0.283\n", + "iteration: 488, error: 0.4191270491803279, fairness violation: 0.004988610655737704, violated group size: 0.283\n", + "iteration: 489, error: 0.4192085889570552, fairness violation: 0.004981959100204497, violated group size: 0.217\n", + "iteration: 490, error: 0.41916938775510204, fairness violation: 0.004985077551020407, violated group size: 0.283\n", + "iteration: 491, error: 0.4192505091649695, fairness violation: 0.004978460285132381, violated group size: 0.217\n", + "iteration: 492, error: 0.4192113821138212, fairness violation: 0.004981573170731706, violated group size: 0.217\n", + "iteration: 493, error: 0.41917241379310355, fairness violation: 0.004984673427991887, violated group size: 0.283\n", + "iteration: 494, error: 0.41913360323886634, fairness violation: 0.004987761133603237, violated group size: 0.217\n", + "iteration: 495, error: 0.4192141414141415, fairness violation: 0.004981191919191918, violated group size: 0.217\n", + "iteration: 496, error: 0.4192943548387097, fairness violation: 0.004974649193548386, violated group size: 0.217\n", + "iteration: 497, error: 0.419374245472837, fairness violation: 0.004968132796780683, violated group size: 0.217\n", + "iteration: 498, error: 0.4194538152610441, fairness violation: 0.004961642570281124, violated group size: 0.217\n", + "iteration: 499, error: 0.41953306613226454, fairness violation: 0.0049551783567134255, violated group size: 0.217\n" + ] + } + ], + "source": [ + "C = 100\n", + "print_flag = True\n", + "gamma = .005\n", + "\n", + "\n", + "fair_model = GerryFairClassifier(C=C, printflag=print_flag, gamma=gamma, fairness_def='FP',\n", + " max_iters=max_iterations, heatmapflag=False)\n", + "\n", + "# fit method\n", + "fair_model.fit(data_set, early_termination=True)\n", + "\n", + "# predict method. If threshold in (0, 1) produces binary predictions\n", + "\n", + "dataset_yhat = fair_model.predict(data_set, threshold=False)\n", + "\n", + "\n" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 310, error: 0.4207838709677419, fairness violation: 0.00486963870967742, violated group size: 0.283\n", - "iteration: 311, error: 0.420717041800643, fairness violation: 0.004874913183279744, violated group size: 0.217\n", - "iteration: 312, error: 0.42065064102564104, fairness violation: 0.0048801538461538466, violated group size: 0.217\n", - "iteration: 313, error: 0.42058466453674115, fairness violation: 0.0048853610223642185, violated group size: 0.217\n", - "iteration: 314, error: 0.42051910828025474, fairness violation: 0.004890535031847135, violated group size: 0.217\n", - "iteration: 315, error: 0.4204539682539683, fairness violation: 0.004895676190476191, violated group size: 0.217\n", - "iteration: 316, error: 0.4203892405063292, fairness violation: 0.004900784810126583, violated group size: 0.217\n", - "iteration: 317, error: 0.4203249211356468, fairness violation: 0.004905861198738172, violated group size: 0.283\n", - "iteration: 318, error: 0.4202610062893082, fairness violation: 0.00491090566037736, violated group size: 0.217\n", - "iteration: 319, error: 0.4201974921630094, fairness violation: 0.004915918495297806, violated group size: 0.217\n", - "iteration: 320, error: 0.4201343749999999, fairness violation: 0.004920900000000002, violated group size: 0.217\n", - "iteration: 321, error: 0.4200716510903427, fairness violation: 0.004925850467289721, violated group size: 0.217\n", - "iteration: 322, error: 0.4200093167701862, fairness violation: 0.0049307701863354056, violated group size: 0.283\n", - "iteration: 323, error: 0.4199473684210526, fairness violation: 0.00493565944272446, violated group size: 0.283\n", - "iteration: 324, error: 0.41988580246913576, fairness violation: 0.004940518518518519, violated group size: 0.217\n", - "iteration: 325, error: 0.41982461538461535, fairness violation: 0.004945347692307694, violated group size: 0.217\n", - "iteration: 326, error: 0.4197638036809816, fairness violation: 0.004950147239263805, violated group size: 0.283\n", - "iteration: 327, error: 0.4197033639143731, fairness violation: 0.004954917431192661, violated group size: 0.283\n", - "iteration: 328, error: 0.4196432926829268, fairness violation: 0.004959658536585366, violated group size: 0.217\n", - "iteration: 329, error: 0.41958358662613987, fairness violation: 0.004964370820668694, violated group size: 0.283\n", - "iteration: 330, error: 0.41952424242424236, fairness violation: 0.004969054545454545, violated group size: 0.217\n", - "iteration: 331, error: 0.41946525679758306, fairness violation: 0.00497370996978852, violated group size: 0.217\n", - "iteration: 332, error: 0.41940662650602417, fairness violation: 0.004978337349397591, violated group size: 0.217\n", - "iteration: 333, error: 0.4193483483483482, fairness violation: 0.004982936936936937, violated group size: 0.217\n", - "iteration: 334, error: 0.4192904191616766, fairness violation: 0.004987508982035928, violated group size: 0.217\n", - "iteration: 335, error: 0.4192328358208956, fairness violation: 0.004992053731343284, violated group size: 0.283\n", - "iteration: 336, error: 0.4191755952380953, fairness violation: 0.00499657142857143, violated group size: 0.283\n", - "iteration: 337, error: 0.4191186943620178, fairness violation: 0.0050010623145400595, violated group size: 0.217\n", - "iteration: 338, error: 0.41906213017751476, fairness violation: 0.005005526627218935, violated group size: 0.217\n", - "iteration: 339, error: 0.4190058997050148, fairness violation: 0.005009964601769911, violated group size: 0.217\n", - "iteration: 340, error: 0.41894999999999993, fairness violation: 0.005014376470588236, violated group size: 0.283\n", - "iteration: 341, error: 0.41889442815249267, fairness violation: 0.005018762463343108, violated group size: 0.217\n", - "iteration: 342, error: 0.41883918128654973, fairness violation: 0.005023122807017544, violated group size: 0.217\n", - "iteration: 343, error: 0.41878425655976675, fairness violation: 0.0050274577259475225, violated group size: 0.283\n", - "iteration: 344, error: 0.4187296511627907, fairness violation: 0.005031767441860465, violated group size: 0.217\n", - "iteration: 345, error: 0.4186753623188406, fairness violation: 0.005036052173913045, violated group size: 0.283\n", - "iteration: 346, error: 0.4186213872832369, fairness violation: 0.005040312138728323, violated group size: 0.217\n", - "iteration: 347, error: 0.41856772334293946, fairness violation: 0.005044547550432276, violated group size: 0.283\n", - "iteration: 348, error: 0.41851436781609197, fairness violation: 0.005048758620689655, violated group size: 0.217\n", - "iteration: 349, error: 0.418461318051576, fairness violation: 0.005052945558739255, violated group size: 0.283\n", - "iteration: 350, error: 0.4185771428571428, fairness violation: 0.005043468571428572, violated group size: 0.283\n", - "iteration: 351, error: 0.4186923076923077, fairness violation: 0.005034045584045584, violated group size: 0.217\n", - "iteration: 352, error: 0.4188068181818182, fairness violation: 0.005024676136363637, violated group size: 0.283\n", - "iteration: 353, error: 0.4189206798866855, fairness violation: 0.005015359773371105, violated group size: 0.217\n", - "iteration: 354, error: 0.41903389830508475, fairness violation: 0.005006096045197741, violated group size: 0.283\n", - "iteration: 355, error: 0.41914647887323936, fairness violation: 0.004996884507042254, violated group size: 0.283\n", - "iteration: 356, error: 0.4192584269662922, fairness violation: 0.004987724719101122, violated group size: 0.217\n", - "iteration: 357, error: 0.41936974789915965, fairness violation: 0.0049786162464986, violated group size: 0.217\n", - "iteration: 358, error: 0.41948044692737424, fairness violation: 0.004969558659217878, violated group size: 0.217\n", - "iteration: 359, error: 0.41959052924791085, fairness violation: 0.004960551532033426, violated group size: 0.283\n", - "iteration: 360, error: 0.4195361111111111, fairness violation: 0.004964855555555557, violated group size: 0.283\n", - "iteration: 361, error: 0.4196454293628808, fairness violation: 0.004955911357340723, violated group size: 0.283\n", - "iteration: 362, error: 0.4197541436464089, fairness violation: 0.004947016574585636, violated group size: 0.217\n", - "iteration: 363, error: 0.4198622589531681, fairness violation: 0.004938170798898072, violated group size: 0.283\n", - "iteration: 364, error: 0.41996978021978026, fairness violation: 0.004929373626373626, violated group size: 0.217\n", - "iteration: 365, error: 0.42007671232876714, fairness violation: 0.004920624657534246, violated group size: 0.217\n", - "iteration: 366, error: 0.42018306010928963, fairness violation: 0.004911923497267759, violated group size: 0.217\n", - "iteration: 367, error: 0.4202888283378746, fairness violation: 0.004903269754768393, violated group size: 0.217\n", - "iteration: 368, error: 0.42039402173913043, fairness violation: 0.00489466304347826, violated group size: 0.217\n", - "iteration: 369, error: 0.4204986449864499, fairness violation: 0.00488610298102981, violated group size: 0.283\n", - "iteration: 370, error: 0.4206027027027027, fairness violation: 0.0048775891891891885, violated group size: 0.217\n", - "iteration: 371, error: 0.4207061994609164, fairness violation: 0.004869121293800538, violated group size: 0.217\n", - "iteration: 372, error: 0.4208091397849463, fairness violation: 0.004860698924731182, violated group size: 0.217\n", - "iteration: 373, error: 0.420911528150134, fairness violation: 0.004852321715817694, violated group size: 0.217\n", - "iteration: 374, error: 0.420855614973262, fairness violation: 0.004856754010695187, violated group size: 0.217\n", - "iteration: 375, error: 0.4209573333333334, fairness violation: 0.004848432, violated group size: 0.217\n", - "iteration: 376, error: 0.42105851063829786, fairness violation: 0.004840154255319148, violated group size: 0.217\n", - "iteration: 377, error: 0.4211591511936339, fairness violation: 0.004831920424403182, violated group size: 0.217\n", - "iteration: 378, error: 0.4211031746031746, fairness violation: 0.004836359788359788, violated group size: 0.217\n", - "iteration: 379, error: 0.42120316622691284, fairness violation: 0.004828179419525066, violated group size: 0.217\n", - "iteration: 380, error: 0.42130263157894726, fairness violation: 0.004820042105263157, violated group size: 0.217\n", - "iteration: 381, error: 0.42124671916010503, fairness violation: 0.004824477690288715, violated group size: 0.283\n", - "iteration: 382, error: 0.42134554973821986, fairness violation: 0.004816392670157068, violated group size: 0.283\n", - "iteration: 383, error: 0.42144386422976504, fairness violation: 0.004808349869451696, violated group size: 0.217\n", - "iteration: 384, error: 0.42154166666666676, fairness violation: 0.004800348958333333, violated group size: 0.283\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "ak4FlXHmhI5L" + }, + "source": [ + "**3-d heatmaps**\n", + "\n", + "We now show to generate a 3d-heatmap of unfairness using the `generate_heatmap` method. The $X-Y$ axes in the plot represent the coefficients of the linear threshold function that defines a protected subgroup with respect to the first two sensitive attributes. Which $2$ attributes are considered sensitive can be overwritten with the `col_index` argument. The $Z$-axes is the $\\gamma$-disparity (FP) of the corresponding subgroup defined by the linear threshold function. This is important because it allows us to (1) visualize convergence as the heatmap flattens and (2) brute force check the fairness in low-dimensions without relying on a heuristic auditor. See the [the rich subgroup fairness empirical paper](https://arxiv.org/abs/1808.08166) for a discussion of these plots. Note that in the below plot no group has a $\\gamma$-disparity of greater than $.005$, which we would expect since the set of linear threshold functions on two attributes is a subset of the set of linear threshold functions on all protected attributes, and the final model is $\\gamma$-fair." + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 385, error: 0.4214857142857143, fairness violation: 0.00480478961038961, violated group size: 0.217\n", - "iteration: 386, error: 0.4215829015544041, fairness violation: 0.0047968393782383415, violated group size: 0.217\n", - "iteration: 387, error: 0.42152713178294576, fairness violation: 0.004801266149870801, violated group size: 0.283\n", - "iteration: 388, error: 0.4216237113402061, fairness violation: 0.004793365979381444, violated group size: 0.283\n", - "iteration: 389, error: 0.42171979434447304, fairness violation: 0.004785506426735219, violated group size: 0.217\n", - "iteration: 390, error: 0.4218153846153847, fairness violation: 0.00477768717948718, violated group size: 0.217\n", - "iteration: 391, error: 0.4219104859335038, fairness violation: 0.004769907928388747, violated group size: 0.217\n", - "iteration: 392, error: 0.4218545918367348, fairness violation: 0.00477434693877551, violated group size: 0.217\n", - "iteration: 393, error: 0.42194910941475816, fairness violation: 0.004766615776081424, violated group size: 0.217\n", - "iteration: 394, error: 0.42204314720812186, fairness violation: 0.004758923857868021, violated group size: 0.283\n", - "iteration: 395, error: 0.4221367088607595, fairness violation: 0.00475127088607595, violated group size: 0.217\n", - "iteration: 396, error: 0.42208080808080806, fairness violation: 0.004755712121212122, violated group size: 0.283\n", - "iteration: 397, error: 0.42202518891687657, fairness violation: 0.004760130982367758, violated group size: 0.217\n", - "iteration: 398, error: 0.42196984924623115, fairness violation: 0.004764527638190955, violated group size: 0.283\n", - "iteration: 399, error: 0.42191478696741846, fairness violation: 0.004768902255639098, violated group size: 0.217\n", - "iteration: 400, error: 0.42186, fairness violation: 0.004773255, violated group size: 0.217\n", - "iteration: 401, error: 0.4218054862842893, fairness violation: 0.004777586034912718, violated group size: 0.283\n", - "iteration: 402, error: 0.4217512437810945, fairness violation: 0.00478189552238806, violated group size: 0.217\n", - "iteration: 403, error: 0.4218436724565757, fairness violation: 0.0047743374689826305, violated group size: 0.283\n", - "iteration: 404, error: 0.42178960396039605, fairness violation: 0.004778633663366336, violated group size: 0.217\n", - "iteration: 405, error: 0.4217358024691357, fairness violation: 0.004782908641975308, violated group size: 0.217\n", - "iteration: 406, error: 0.4216822660098523, fairness violation: 0.004787162561576355, violated group size: 0.217\n", - "iteration: 407, error: 0.4216289926289926, fairness violation: 0.004791395577395577, violated group size: 0.283\n", - "iteration: 408, error: 0.421575980392157, fairness violation: 0.004795607843137254, violated group size: 0.217\n", - "iteration: 409, error: 0.4215232273838631, fairness violation: 0.004799799511002444, violated group size: 0.217\n", - "iteration: 410, error: 0.42147073170731714, fairness violation: 0.004803970731707317, violated group size: 0.283\n", - "iteration: 411, error: 0.4214184914841849, fairness violation: 0.0048081216545012165, violated group size: 0.217\n", - "iteration: 412, error: 0.4213665048543689, fairness violation: 0.004812252427184466, violated group size: 0.283\n", - "iteration: 413, error: 0.42131476997578693, fairness violation: 0.004816363196125908, violated group size: 0.217\n", - "iteration: 414, error: 0.42126328502415455, fairness violation: 0.004820454106280194, violated group size: 0.217\n", - "iteration: 415, error: 0.4212120481927711, fairness violation: 0.004824525301204821, violated group size: 0.283\n", - "iteration: 416, error: 0.42116105769230766, fairness violation: 0.004828576923076923, violated group size: 0.217\n", - "iteration: 417, error: 0.4211103117505996, fairness violation: 0.004832609112709832, violated group size: 0.283\n", - "iteration: 418, error: 0.42105980861244025, fairness violation: 0.004836622009569378, violated group size: 0.283\n", - "iteration: 419, error: 0.42100954653937933, fairness violation: 0.004840615751789977, violated group size: 0.217\n", - "iteration: 420, error: 0.42110000000000003, fairness violation: 0.0048332238095238084, violated group size: 0.217\n", - "iteration: 421, error: 0.42104988123515436, fairness violation: 0.004837206650831354, violated group size: 0.217\n", - "iteration: 422, error: 0.42100000000000004, fairness violation: 0.004841170616113744, violated group size: 0.217\n", - "iteration: 423, error: 0.420950354609929, fairness violation: 0.004845115839243499, violated group size: 0.217\n", - "iteration: 424, error: 0.42104009433962253, fairness violation: 0.004837783018867924, violated group size: 0.217\n", - "iteration: 425, error: 0.4209905882352941, fairness violation: 0.004841717647058822, violated group size: 0.217\n", - "iteration: 426, error: 0.42094131455399053, fairness violation: 0.004845633802816901, violated group size: 0.217\n", - "iteration: 427, error: 0.42089227166276344, fairness violation: 0.004849531615925057, violated group size: 0.217\n", - "iteration: 428, error: 0.4208434579439252, fairness violation: 0.004853411214953271, violated group size: 0.217\n", - "iteration: 429, error: 0.4207948717948717, fairness violation: 0.0048572727272727274, violated group size: 0.283\n", - "iteration: 430, error: 0.42074651162790694, fairness violation: 0.004861116279069767, violated group size: 0.217\n", - "iteration: 431, error: 0.4206983758700697, fairness violation: 0.004864941995359629, violated group size: 0.283\n", - "iteration: 432, error: 0.420650462962963, fairness violation: 0.00486875, violated group size: 0.217\n", - "iteration: 433, error: 0.42060277136258656, fairness violation: 0.0048725404157043874, violated group size: 0.217\n", - "iteration: 434, error: 0.42055529953917054, fairness violation: 0.0048763133640553, violated group size: 0.283\n", - "iteration: 435, error: 0.42050804597701147, fairness violation: 0.004880068965517241, violated group size: 0.217\n", - "iteration: 436, error: 0.4204610091743119, fairness violation: 0.004883807339449542, violated group size: 0.217\n", - "iteration: 437, error: 0.4204141876430207, fairness violation: 0.004887528604118992, violated group size: 0.217\n", - "iteration: 438, error: 0.42036757990867574, fairness violation: 0.004891232876712329, violated group size: 0.217\n", - "iteration: 439, error: 0.4203211845102506, fairness violation: 0.0048949202733485206, violated group size: 0.283\n", - "iteration: 440, error: 0.42027499999999995, fairness violation: 0.00489859090909091, violated group size: 0.283\n", - "iteration: 441, error: 0.42022902494331066, fairness violation: 0.004902244897959184, violated group size: 0.283\n", - "iteration: 442, error: 0.42018325791855204, fairness violation: 0.004905882352941177, violated group size: 0.217\n", - "iteration: 443, error: 0.42013769751693003, fairness violation: 0.004909503386004516, violated group size: 0.283\n", - "iteration: 444, error: 0.42009234234234244, fairness violation: 0.004913108108108108, violated group size: 0.217\n", - "iteration: 445, error: 0.420047191011236, fairness violation: 0.004916696629213483, violated group size: 0.217\n", - "iteration: 446, error: 0.42000224215246645, fairness violation: 0.004920269058295964, violated group size: 0.217\n", - "iteration: 447, error: 0.4199574944071588, fairness violation: 0.004923825503355704, violated group size: 0.217\n", - "iteration: 448, error: 0.41991294642857147, fairness violation: 0.004927366071428571, violated group size: 0.217\n", - "iteration: 449, error: 0.41986859688195993, fairness violation: 0.004930890868596881, violated group size: 0.217\n", - "iteration: 450, error: 0.41982444444444433, fairness violation: 0.004934399999999999, violated group size: 0.217\n", - "iteration: 451, error: 0.41978048780487814, fairness violation: 0.004937893569844789, violated group size: 0.217\n", - "iteration: 452, error: 0.41973672566371684, fairness violation: 0.004941371681415928, violated group size: 0.217\n", - "iteration: 453, error: 0.41969315673289187, fairness violation: 0.0049448344370860925, violated group size: 0.217\n", - "iteration: 454, error: 0.41964977973568285, fairness violation: 0.004948281938325991, violated group size: 0.283\n", - "iteration: 455, error: 0.41960659340659345, fairness violation: 0.004951714285714286, violated group size: 0.283\n", - "iteration: 456, error: 0.41956359649122804, fairness violation: 0.004955131578947367, violated group size: 0.217\n", - "iteration: 457, error: 0.41952078774617063, fairness violation: 0.0049585339168490145, violated group size: 0.217\n", - "iteration: 458, error: 0.41947816593886456, fairness violation: 0.004961921397379911, violated group size: 0.217\n", - "iteration: 459, error: 0.4194357298474945, fairness violation: 0.00496529411764706, violated group size: 0.283\n", - "iteration: 460, error: 0.4193934782608696, fairness violation: 0.004968652173913044, violated group size: 0.283\n", - "iteration: 461, error: 0.41935140997830805, fairness violation: 0.004971995661605205, violated group size: 0.283\n", - "iteration: 462, error: 0.41930952380952374, fairness violation: 0.0049753246753246735, violated group size: 0.217\n", - "iteration: 463, error: 0.41926781857451406, fairness violation: 0.004978639308855291, violated group size: 0.217\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + }, + "id": "u364ULCthI5L", + "outputId": "24fa9897-8b4a-4d42-afb3-2832bd76bb99" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# output heatmap (brute force)\n", + "# replace None with the relative path if you want to save the plot\n", + "fair_model.heatmapflag = True\n", + "fair_model.heatmap_path = 'heatmap'\n", + "fair_model.generate_heatmap(data_set, dataset_yhat.labels)\n", + "Image(filename='{}.png'.format(fair_model.heatmap_path))\n", + "\n", + "\n", + "\n" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 464, error: 0.41922629310344817, fairness violation: 0.004981939655172413, violated group size: 0.217\n", - "iteration: 465, error: 0.41918494623655916, fairness violation: 0.004985225806451612, violated group size: 0.217\n", - "iteration: 466, error: 0.4191437768240344, fairness violation: 0.004988497854077254, violated group size: 0.283\n", - "iteration: 467, error: 0.41910278372591, fairness violation: 0.004991755888650964, violated group size: 0.283\n", - "iteration: 468, error: 0.4190619658119658, fairness violation: 0.004994999999999998, violated group size: 0.217\n", - "iteration: 469, error: 0.4190213219616205, fairness violation: 0.0049982302771855, violated group size: 0.217\n", - "iteration: 470, error: 0.41898085106382993, fairness violation: 0.005001446808510636, violated group size: 0.283\n", - "iteration: 471, error: 0.41894055201698516, fairness violation: 0.005004649681528661, violated group size: 0.217\n", - "iteration: 472, error: 0.41890042372881364, fairness violation: 0.005007838983050847, violated group size: 0.283\n", - "iteration: 473, error: 0.4188604651162791, fairness violation: 0.005011014799154333, violated group size: 0.217\n", - "iteration: 474, error: 0.41882067510548526, fairness violation: 0.005014177215189871, violated group size: 0.217\n", - "iteration: 475, error: 0.41878105263157905, fairness violation: 0.0050173263157894735, violated group size: 0.283\n", - "iteration: 476, error: 0.41874159663865557, fairness violation: 0.0050204621848739485, violated group size: 0.217\n", - "iteration: 477, error: 0.4188259958071279, fairness violation: 0.005013576519916141, violated group size: 0.217\n", - "iteration: 478, error: 0.41878661087866115, fairness violation: 0.005016707112970709, violated group size: 0.217\n", - "iteration: 479, error: 0.4188705636743216, fairness violation: 0.005009858037578285, violated group size: 0.217\n", - "iteration: 480, error: 0.41883125000000004, fairness violation: 0.005012983333333334, violated group size: 0.283\n", - "iteration: 481, error: 0.4187920997920998, fairness violation: 0.005016095634095634, violated group size: 0.283\n", - "iteration: 482, error: 0.4187531120331951, fairness violation: 0.0050191950207468874, violated group size: 0.283\n", - "iteration: 483, error: 0.4188364389233955, fairness violation: 0.00501239751552795, violated group size: 0.217\n", - "iteration: 484, error: 0.41891942148760325, fairness violation: 0.005005628099173555, violated group size: 0.283\n", - "iteration: 485, error: 0.4190020618556701, fairness violation: 0.004998886597938144, violated group size: 0.283\n", - "iteration: 486, error: 0.41896296296296304, fairness violation: 0.005001995884773661, violated group size: 0.217\n", - "iteration: 487, error: 0.4190451745379877, fairness violation: 0.004995289527720739, violated group size: 0.283\n", - "iteration: 488, error: 0.4191270491803279, fairness violation: 0.004988610655737704, violated group size: 0.283\n", - "iteration: 489, error: 0.4192085889570552, fairness violation: 0.004981959100204497, violated group size: 0.217\n", - "iteration: 490, error: 0.41916938775510204, fairness violation: 0.004985077551020407, violated group size: 0.283\n", - "iteration: 491, error: 0.4192505091649695, fairness violation: 0.004978460285132381, violated group size: 0.217\n", - "iteration: 492, error: 0.4192113821138212, fairness violation: 0.004981573170731706, violated group size: 0.217\n", - "iteration: 493, error: 0.41917241379310355, fairness violation: 0.004984673427991887, violated group size: 0.283\n", - "iteration: 494, error: 0.41913360323886634, fairness violation: 0.004987761133603237, violated group size: 0.217\n", - "iteration: 495, error: 0.4192141414141415, fairness violation: 0.004981191919191918, violated group size: 0.217\n", - "iteration: 496, error: 0.4192943548387097, fairness violation: 0.004974649193548386, violated group size: 0.217\n", - "iteration: 497, error: 0.419374245472837, fairness violation: 0.004968132796780683, violated group size: 0.217\n", - "iteration: 498, error: 0.4194538152610441, fairness violation: 0.004961642570281124, violated group size: 0.217\n", - "iteration: 499, error: 0.41953306613226454, fairness violation: 0.0049551783567134255, violated group size: 0.217\n" - ] - } - ], - "source": [ - "C = 100\n", - "print_flag = True\n", - "gamma = .005\n", - "\n", - "\n", - "fair_model = GerryFairClassifier(C=C, printflag=print_flag, gamma=gamma, fairness_def='FP',\n", - " max_iters=max_iterations, heatmapflag=False)\n", - "\n", - "# fit method\n", - "fair_model.fit(data_set, early_termination=True)\n", - "\n", - "# predict method. If threshold in (0, 1) produces binary predictions\n", - "\n", - "dataset_yhat = fair_model.predict(data_set, threshold=False)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**3-d heatmaps**\n", - "\n", - "We now show to generate a 3d-heatmap of unfairness using the `generate_heatmap` method. The $X-Y$ axes in the plot represent the coefficients of the linear threshold function that defines a protected subgroup with respect to the first two sensitive attributes. Which $2$ attributes are considered sensitive can be overwritten with the `col_index` argument. The $Z$-axes is the $\\gamma$-disparity (FP) of the corresponding subgroup defined by the linear threshold function. This is important because it allows us to (1) visualize convergence as the heatmap flattens and (2) brute force check the fairness in low-dimensions without relying on a heuristic auditor. See the [the rich subgroup fairness empirical paper](https://arxiv.org/abs/1808.08166) for a discussion of these plots. Note that in the below plot no group has a $\\gamma$-disparity of greater than $.005$, which we would expect since the set of linear threshold functions on two attributes is a subset of the set of linear threshold functions on all protected attributes, and the final model is $\\gamma$-fair. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "SwMw4smGhI5M" + }, + "source": [ + "**black-box auditing**\n", + "\n", + "We now show to audit any black box classifier with respect to rich subgroup fairness under either FP or FN rate. Note the below auditing procedure would work for any set of (soft) predictions $\\hat{y}$, and need make no assumptions about the structure of the predictor. We note that as expected the disparity of the group found is the same as the disparity printed out in the last iteration of the `fit` method.\n", + " " ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "# output heatmap (brute force)\n", - "# replace None with the relative path if you want to save the plot\n", - "fair_model.heatmapflag = True\n", - "fair_model.heatmap_path = 'heatmap'\n", - "fair_model.generate_heatmap(data_set, dataset_yhat.labels)\n", - "Image(filename='{}.png'.format(fair_model.heatmap_path)) \n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**black-box auditing**\n", - "\n", - "We now show to audit any black box classifier with respect to rich subgroup fairness under either FP or FN rate. Note the below auditing procedure would work for any set of (soft) predictions $\\hat{y}$, and need make no assumptions about the structure of the predictor. We note that as expected the disparity of the group found is the same as the disparity printed out in the last iteration of the `fit` method.\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.004955178356713431\n" - ] - } - ], - "source": [ - "\n", - "\n", - "gerry_metric = BinaryLabelDatasetMetric(data_set)\n", - "gamma_disparity = gerry_metric.rich_subgroup(array_to_tuple(dataset_yhat.labels), 'FP')\n", - "print(gamma_disparity)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**pareto curves**\n", - "\n", - "The `FairFictPlay` algorithm implemented in the `fit` method converges given access to perfect oracles for solving cost-sensitive classification (CSC) problems. A cost-sensitive classification problem over a hypothesis class $\\mathcal{H}$ is $$\\min_{h}\\sum_{i = 1}^{n}(1-h(x_i))c_0 + h(x_i)c_1$$\n", - "By default in this package, and in the companion [empirical](https://arxiv.org/abs/1808.08166) and [theory](https://arxiv.org/pdf/1711.05144.pdf) papers, the hypothesis class of the learner and the of the subgroups are hyperplanes. The corresponding heuristic oracle for solving the CSC problem first forms two regression problems $(x_i, c_0)$ and $(x_i, c_1)$. Then in the case of hyperplanes, trains two regressions $r_i: \\mathcal{X} \\to R$ which predict the costs of classifying a given point $x$ $0,1$ respectively. Finally the binary classifier output by the oracle is defined as $\\hat{r}(x) = \\arg\\min_{j \\in \\{0,1\\}}r_j(x)$. But of course if we are interesting in different hypothesis classes for the learner, we simply need different regressors. In this package in addition to linear regression, we've added support for regression trees, kernelized ridge regression, and support vector regression. Below we trace out Pareto curves of $\\gamma$-disparity vs. error for each of these different heuristic oracles. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + }, + "id": "YsozvR3hhI5M", + "outputId": "6ef397ab-120c-49a0-a767-5201a047ee3d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.004955178356713431\n" + ] + } + ], + "source": [ + "\n", + "\n", + "gerry_metric = BinaryLabelDatasetMetric(data_set)\n", + "gamma_disparity = gerry_metric.rich_subgroup(array_to_tuple(dataset_yhat.labels), 'FP')\n", + "print(gamma_disparity)\n" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Curr Predictor: Linear\n", - "Curr Predictor: SVR\n", - "Curr Predictor: Tree\n", - "Curr Predictor: Kernel\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "1qK4jg_PhI5M" + }, + "source": [ + "**pareto curves**\n", + "\n", + "The `FairFictPlay` algorithm implemented in the `fit` method converges given access to perfect oracles for solving cost-sensitive classification (CSC) problems. A cost-sensitive classification problem over a hypothesis class $\\mathcal{H}$ is $$\\min_{h}\\sum_{i = 1}^{n}(1-h(x_i))c_0 + h(x_i)c_1$$\n", + "By default in this package, and in the companion [empirical](https://arxiv.org/abs/1808.08166) and [theory](https://arxiv.org/pdf/1711.05144.pdf) papers, the hypothesis class of the learner and the of the subgroups are hyperplanes. The corresponding heuristic oracle for solving the CSC problem first forms two regression problems $(x_i, c_0)$ and $(x_i, c_1)$. Then in the case of hyperplanes, trains two regressions $r_i: \\mathcal{X} \\to R$ which predict the costs of classifying a given point $x$ $0,1$ respectively. Finally the binary classifier output by the oracle is defined as $\\hat{r}(x) = \\arg\\min_{j \\in \\{0,1\\}}r_j(x)$. But of course if we are interesting in different hypothesis classes for the learner, we simply need different regressors. In this package in addition to linear regression, we've added support for regression trees, kernelized ridge regression, and support vector regression. Below we trace out Pareto curves of $\\gamma$-disparity vs. error for each of these different heuristic oracles." + ] }, { - "data": { - "image/png": "\n", - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + }, + "id": "WJspqrmEhI5M", + "outputId": "d1478011-d530-45e8-a32f-3862db281721" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Curr Predictor: Linear\n", + "Curr Predictor: SVR\n", + "Curr Predictor: Tree\n", + "Curr Predictor: Kernel\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# set to 50 iterations for fast running of notebook - set >= 1000 when running real experiments\n", + "pareto_iters = 50\n", + "def multiple_classifiers_pareto(dataset, gamma_list=[0.002, 0.005, 0.01, 0.02, 0.05, 0.1], save_results=False, iters=pareto_iters):\n", + "\n", + " ln_predictor = linear_model.LinearRegression()\n", + " svm_predictor = svm.LinearSVR()\n", + " tree_predictor = tree.DecisionTreeRegressor(max_depth=3)\n", + " kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')\n", + " predictor_dict = {'Linear': {'predictor': ln_predictor, 'iters': iters},\n", + " 'SVR': {'predictor': svm_predictor, 'iters': iters},\n", + " 'Tree': {'predictor': tree_predictor, 'iters': iters},\n", + " 'Kernel': {'predictor': kernel_predictor, 'iters': iters}}\n", + "\n", + " results_dict = {}\n", + "\n", + " for pred in predictor_dict:\n", + " print('Curr Predictor: {}'.format(pred))\n", + " predictor = predictor_dict[pred]['predictor']\n", + " max_iters = predictor_dict[pred]['iters']\n", + " fair_clf = GerryFairClassifier(C=100, printflag=True, gamma=1, predictor=predictor, max_iters=max_iters)\n", + " fair_clf.printflag = False\n", + " fair_clf.max_iters=max_iters\n", + " errors, fp_violations, fn_violations = fair_clf.pareto(dataset, gamma_list)\n", + " results_dict[pred] = {'errors': errors, 'fp_violations': fp_violations, 'fn_violations': fn_violations}\n", + " plt.plot(errors, fp_violations, label=pred)\n", + "\n", + " if save_results:\n", + " pickle.dump(results_dict, open('results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) + '.pkl', 'wb'))\n", + "\n", + " plt.xlabel('Error')\n", + " plt.ylabel('Unfairness')\n", + " plt.legend()\n", + " plt.title('Error vs. Unfairness\\n(Adult Dataset)')\n", + " plt.savefig('gerryfair_pareto.png')\n", + " plt.close()\n", + "multiple_classifiers_pareto(data_set)\n", + "Image(filename='gerryfair_pareto.png')" ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# set to 50 iterations for fast running of notebook - set >= 1000 when running real experiments\n", - "pareto_iters = 50\n", - "def multiple_classifiers_pareto(dataset, gamma_list=[0.002, 0.005, 0.01, 0.02, 0.05, 0.1], save_results=False, iters=pareto_iters):\n", - "\n", - " ln_predictor = linear_model.LinearRegression()\n", - " svm_predictor = svm.LinearSVR()\n", - " tree_predictor = tree.DecisionTreeRegressor(max_depth=3)\n", - " kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')\n", - " predictor_dict = {'Linear': {'predictor': ln_predictor, 'iters': iters},\n", - " 'SVR': {'predictor': svm_predictor, 'iters': iters},\n", - " 'Tree': {'predictor': tree_predictor, 'iters': iters},\n", - " 'Kernel': {'predictor': kernel_predictor, 'iters': iters}}\n", - "\n", - " results_dict = {}\n", - "\n", - " for pred in predictor_dict:\n", - " print('Curr Predictor: {}'.format(pred))\n", - " predictor = predictor_dict[pred]['predictor']\n", - " max_iters = predictor_dict[pred]['iters']\n", - " fair_clf = GerryFairClassifier(C=100, printflag=True, gamma=1, predictor=predictor, max_iters=max_iters)\n", - " fair_clf.printflag = False\n", - " fair_clf.max_iters=max_iters\n", - " errors, fp_violations, fn_violations = fair_clf.pareto(dataset, gamma_list)\n", - " results_dict[pred] = {'errors': errors, 'fp_violations': fp_violations, 'fn_violations': fn_violations}\n", - " plt.plot(errors, fp_violations, label=pred)\n", - "\n", - " if save_results:\n", - " pickle.dump(results_dict, open('results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) + '.pkl', 'wb'))\n", - "\n", - " plt.xlabel('Error')\n", - " plt.ylabel('Unfairness')\n", - " plt.legend()\n", - " plt.title('Error vs. Unfairness\\n(Adult Dataset)')\n", - " plt.savefig('gerryfair_pareto.png')\n", - " plt.close()\n", - "multiple_classifiers_pareto(data_set)\n", - "Image(filename='gerryfair_pareto.png') " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A natural question one might ask is, suppose we fix a statistical fairness definition for rich subgroup fairness like equality of false positive rates, `FP`. Does learning a classifier that is fair with respect to `FP` increase or decrease fairness with respect to false negative rates `FN`? One could see this relationship going in either direction - and indeed we submit that it is dataset dependent. In some cases, if enforcing `FP` fairness pushes the classifier towards the constant classifier, then it will also satisify `FN` rate fairness, since the constant classifier is perfectly fair. However, if the hypothesis class is sufficiently rich, then one would expect that ceteris paribus since we are optimizing for error in addition to `FP` rate fairness, the algorithm would increase `FN` rate unfairness in order to decrease error. Below we trace the FN vs. FP rate tradeoff across a range of input $\\gamma$, where the classifier is optimized only for `FP` rate fairness. " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "gamma: 0.001 gamma: 0.002 gamma: 0.003 gamma: 0.004 gamma: 0.005 gamma: 0.0075 gamma: 0.01 gamma: 0.02 gamma: 0.03 gamma: 0.05 " - ] + "cell_type": "markdown", + "metadata": { + "id": "N_ghZDh6hI5N" + }, + "source": [ + "A natural question one might ask is, suppose we fix a statistical fairness definition for rich subgroup fairness like equality of false positive rates, `FP`. Does learning a classifier that is fair with respect to `FP` increase or decrease fairness with respect to false negative rates `FN`? One could see this relationship going in either direction - and indeed we submit that it is dataset dependent. In some cases, if enforcing `FP` fairness pushes the classifier towards the constant classifier, then it will also satisify `FN` rate fairness, since the constant classifier is perfectly fair. However, if the hypothesis class is sufficiently rich, then one would expect that ceteris paribus since we are optimizing for error in addition to `FP` rate fairness, the algorithm would increase `FN` rate unfairness in order to decrease error. Below we trace the FN vs. FP rate tradeoff across a range of input $\\gamma$, where the classifier is optimized only for `FP` rate fairness." + ] }, { - "data": { - "image/png": "\n", - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + }, + "id": "0xLIr5QdhI5N", + "outputId": "33fb403b-09c0-44a7-9c55-260319380020" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gamma: 0.001 gamma: 0.002 gamma: 0.003 gamma: 0.004 gamma: 0.005 gamma: 0.0075 gamma: 0.01 gamma: 0.02 gamma: 0.03 gamma: 0.05 " + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def fp_vs_fn(dataset, gamma_list, iters):\n", + " fp_auditor = Auditor(dataset, 'FP')\n", + " fn_auditor = Auditor(dataset, 'FN')\n", + " fp_violations = []\n", + " fn_violations = []\n", + " for g in gamma_list:\n", + " print('gamma: {} '.format(g), end =\" \")\n", + " fair_model = GerryFairClassifier(C=100, printflag=False, gamma=g, max_iters=iters)\n", + " fair_model.gamma=g\n", + " fair_model.fit(dataset)\n", + " predictions = array_to_tuple((fair_model.predict(dataset)).labels)\n", + " _, fp_diff = fp_auditor.audit(predictions)\n", + " _, fn_diff = fn_auditor.audit(predictions)\n", + " fp_violations.append(fp_diff)\n", + " fn_violations.append(fn_diff)\n", + "\n", + " plt.plot(fp_violations, fn_violations, label='adult')\n", + " plt.xlabel('False Positive Disparity')\n", + " plt.ylabel('False Negative Disparity')\n", + " plt.legend()\n", + " plt.title('FP vs FN Unfairness')\n", + " plt.savefig('gerryfair_fp_fn.png')\n", + " plt.close()\n", + "\n", + "gamma_list = [0.001, 0.002, 0.003, 0.004, 0.005, 0.0075, 0.01, 0.02, 0.03, 0.05]\n", + "fp_vs_fn(data_set, gamma_list, pareto_iters)\n", + "Image(filename='gerryfair_fp_fn.png')" ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + }, + "id": "wn6BQLiFhI5N" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "def fp_vs_fn(dataset, gamma_list, iters):\n", - " fp_auditor = Auditor(dataset, 'FP')\n", - " fn_auditor = Auditor(dataset, 'FN')\n", - " fp_violations = []\n", - " fn_violations = []\n", - " for g in gamma_list:\n", - " print('gamma: {} '.format(g), end =\" \")\n", - " fair_model = GerryFairClassifier(C=100, printflag=False, gamma=g, max_iters=iters)\n", - " fair_model.gamma=g\n", - " fair_model.fit(dataset)\n", - " predictions = array_to_tuple((fair_model.predict(dataset)).labels)\n", - " _, fp_diff = fp_auditor.audit(predictions)\n", - " _, fn_diff = fn_auditor.audit(predictions)\n", - " fp_violations.append(fp_diff)\n", - " fn_violations.append(fn_diff)\n", - "\n", - " plt.plot(fp_violations, fn_violations, label='adult')\n", - " plt.xlabel('False Positive Disparity')\n", - " plt.ylabel('False Negative Disparity')\n", - " plt.legend()\n", - " plt.title('FP vs FN Unfairness')\n", - " plt.savefig('gerryfair_fp_fn.png')\n", - " plt.close()\n", - "\n", - "gamma_list = [0.001, 0.002, 0.003, 0.004, 0.005, 0.0075, 0.01, 0.02, 0.03, 0.05]\n", - "fp_vs_fn(data_set, gamma_list, pareto_iters)\n", - "Image(filename='gerryfair_fp_fn.png')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, "pycharm": { - "is_executing": true + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + }, + "colab": { + "provenance": [] } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_json_explainers.ipynb b/examples/demo_json_explainers.ipynb index da23274c..fe8e8ddb 100644 --- a/examples/demo_json_explainers.ipynb +++ b/examples/demo_json_explainers.ipynb @@ -1,263 +1,309 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load all necessary packages\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "from collections import OrderedDict\n", - "import json\n", - "from pprint import pprint\n", - "from aif360.datasets import GermanDataset\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from aif360.explainers import MetricTextExplainer, MetricJSONExplainer\n", - "from IPython.display import JSON, display_json" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Load dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "gd = GermanDataset()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Create metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "priv = [{'sex': 1}]\n", - "unpriv = [{'sex': 0}]\n", - "bldm = BinaryLabelDatasetMetric(gd, unprivileged_groups=unpriv, privileged_groups=priv)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Create explainers" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "text_expl = MetricTextExplainer(bldm)\n", - "json_expl = MetricJSONExplainer(bldm)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Text explanations" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of positive-outcome instances: 700.0\n" - ] - } - ], - "source": [ - "print(text_expl.num_positives())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_json_explainers.ipynb)\n" + ], + "metadata": { + "id": "PXISC-e0l2JJ" + } + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.0748013090229\n" - ] - } - ], - "source": [ - "print(text_expl.mean_difference())" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "s-b2u2Rvl013" + }, + "outputs": [], + "source": [ + "# Load all necessary packages\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "from collections import OrderedDict\n", + "import json\n", + "from pprint import pprint\n", + "from aif360.datasets import GermanDataset\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from aif360.explainers import MetricTextExplainer, MetricJSONExplainer\n", + "from IPython.display import JSON, display_json" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.896567328205\n" - ] - } - ], - "source": [ - "print(text_expl.disparate_impact())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### JSON Explanations" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def format_json(json_str):\n", - " return json.dumps(json.loads(json_str, object_pairs_hook=OrderedDict), indent=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "3HohcQsBl016" + }, + "source": [ + "##### Load dataset" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"metric\": \"num_positives\", \n", - " \"message\": \"Number of positive-outcome instances: 700.0\", \n", - " \"numPositives\": 700.0, \n", - " \"description\": \"Computed as the number of positive instances for the given (privileged or unprivileged) group.\", \n", - " \"ideal\": \"The ideal value of this metric lies in the total number of positive instances made available\"\n", - "}\n" - ] - } - ], - "source": [ - "print(format_json(json_expl.num_positives()))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "vHsL60ZSl017" + }, + "outputs": [], + "source": [ + "gd = GermanDataset()" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"metric\": \"mean_difference\", \n", - " \"message\": \"Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.0748013090229\", \n", - " \"numPositivesUnprivileged\": 201.0, \n", - " \"numInstancesUnprivileged\": 310.0, \n", - " \"numPositivesPrivileged\": 499.0, \n", - " \"numInstancesPrivileged\": 690.0, \n", - " \"description\": \"Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group.\", \n", - " \"ideal\": \"The ideal value of this metric is 0.0\"\n", - "}\n" - ] - } - ], - "source": [ - "print(format_json(json_expl.mean_difference()))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "Fy7C2Lttl017" + }, + "source": [ + "##### Create metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "BWIQjCUdl018" + }, + "outputs": [], + "source": [ + "priv = [{'sex': 1}]\n", + "unpriv = [{'sex': 0}]\n", + "bldm = BinaryLabelDatasetMetric(gd, unprivileged_groups=unpriv, privileged_groups=priv)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eUsxBOxVl018" + }, + "source": [ + "##### Create explainers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "7agLzgCCl018" + }, + "outputs": [], + "source": [ + "text_expl = MetricTextExplainer(bldm)\n", + "json_expl = MetricJSONExplainer(bldm)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hEKcHC3Fl019" + }, + "source": [ + "##### Text explanations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4yfddrtAl019", + "outputId": "680af6c9-53c0-4b51-d4b6-d09207f00ccd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of positive-outcome instances: 700.0\n" + ] + } + ], + "source": [ + "print(text_expl.num_positives())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xg91poCjl01-", + "outputId": "e310a8ce-3730-4e08-dafd-6f9ecd26945a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.0748013090229\n" + ] + } + ], + "source": [ + "print(text_expl.mean_difference())" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"metric\": \"disparate_impact\", \n", - " \"message\": \"Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.896567328205\", \n", - " \"numPositivePredictionsUnprivileged\": 201.0, \n", - " \"numUnprivileged\": 310.0, \n", - " \"numPositivePredictionsPrivileged\": 499.0, \n", - " \"numPrivileged\": 690.0, \n", - " \"description\": \"Computed as the ratio of likelihood of favorable outcome for the unprivileged group to that of the privileged group.\", \n", - " \"ideal\": \"The ideal value of this metric is 1.0\"\n", - "}\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W9HdX1wUl01-", + "outputId": "aff81d6b-f850-4a9c-e25a-60e77c69b093" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.896567328205\n" + ] + } + ], + "source": [ + "print(text_expl.disparate_impact())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LA2uVnjPl01_" + }, + "source": [ + "##### JSON Explanations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "JCosLnQbl01_" + }, + "outputs": [], + "source": [ + "def format_json(json_str):\n", + " return json.dumps(json.loads(json_str, object_pairs_hook=OrderedDict), indent=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F1nemIIel01_", + "outputId": "7c03e9aa-4aee-4862-ddd5-e7abd20d5941" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"metric\": \"num_positives\", \n", + " \"message\": \"Number of positive-outcome instances: 700.0\", \n", + " \"numPositives\": 700.0, \n", + " \"description\": \"Computed as the number of positive instances for the given (privileged or unprivileged) group.\", \n", + " \"ideal\": \"The ideal value of this metric lies in the total number of positive instances made available\"\n", + "}\n" + ] + } + ], + "source": [ + "print(format_json(json_expl.num_positives()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vaBapB4xl01_", + "outputId": "381346be-c9e9-4f41-dbaf-2c18cdd06ff6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"metric\": \"mean_difference\", \n", + " \"message\": \"Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.0748013090229\", \n", + " \"numPositivesUnprivileged\": 201.0, \n", + " \"numInstancesUnprivileged\": 310.0, \n", + " \"numPositivesPrivileged\": 499.0, \n", + " \"numInstancesPrivileged\": 690.0, \n", + " \"description\": \"Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group.\", \n", + " \"ideal\": \"The ideal value of this metric is 0.0\"\n", + "}\n" + ] + } + ], + "source": [ + "print(format_json(json_expl.mean_difference()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qExRlmw8l02A", + "outputId": "11b63f8e-4cc6-4192-e600-de0231ae33cc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"metric\": \"disparate_impact\", \n", + " \"message\": \"Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.896567328205\", \n", + " \"numPositivePredictionsUnprivileged\": 201.0, \n", + " \"numUnprivileged\": 310.0, \n", + " \"numPositivePredictionsPrivileged\": 499.0, \n", + " \"numPrivileged\": 690.0, \n", + " \"description\": \"Computed as the ratio of likelihood of favorable outcome for the unprivileged group to that of the privileged group.\", \n", + " \"ideal\": \"The ideal value of this metric is 1.0\"\n", + "}\n" + ] + } + ], + "source": [ + "print(format_json(json_expl.disparate_impact()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "mu6tkv_xl02A" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "print(format_json(json_expl.disparate_impact()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_lfr.ipynb b/examples/demo_lfr.ipynb index 134a2729..9ed91c43 100644 --- a/examples/demo_lfr.ipynb +++ b/examples/demo_lfr.ipynb @@ -1,473 +1,528 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### This notebook demonstrates the use of the learning fair representations algorithm for bias mitigation\n", - "Learning fair representations [1] is a pre-processing technique that finds a latent representation which encodes the data well but obfuscates information about protected attributes. We will see how to use this algorithm for learning representations that encourage individual fairness and apply them on the Adult dataset.\n", - "\n", - "References:\n", - "\n", - "[1] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, \"Learning Fair Representations.\" \n", - "International Conference on Machine Learning, 2013." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "# Load all necessary packages\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "from aif360.datasets import BinaryLabelDataset\n", - "from aif360.datasets import AdultDataset\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from aif360.metrics import ClassificationMetric\n", - "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", - "\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", - "from aif360.algorithms.preprocessing.lfr import LFR\n", - "\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.metrics import classification_report\n", - "\n", - "from IPython.display import Markdown, display\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "from common_utils import compute_metrics" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load dataset and set options" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the dataset and split into train and test\n", - "dataset_orig = load_preproc_data_adult()\n", - "dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Clean up training data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/markdown": [ - "#### Training Dataset shape" + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_lfr.ipynb)\n" ], - "text/plain": [ - "" + "metadata": { + "id": "tVXbNnjsmVIz" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BKhA67dOmMcG" + }, + "source": [ + "#### This notebook demonstrates the use of the learning fair representations algorithm for bias mitigation\n", + "Learning fair representations [1] is a pre-processing technique that finds a latent representation which encodes the data well but obfuscates information about protected attributes. We will see how to use this algorithm for learning representations that encourage individual fairness and apply them on the Adult dataset.\n", + "\n", + "References:\n", + "\n", + "[1] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, \"Learning Fair Representations.\"\n", + "International Conference on Machine Learning, 2013." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(34189, 18)\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wG_aksDfmMcJ" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "# Load all necessary packages\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "from aif360.datasets import BinaryLabelDataset\n", + "from aif360.datasets import AdultDataset\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from aif360.metrics import ClassificationMetric\n", + "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", + "\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", + "from aif360.algorithms.preprocessing.lfr import LFR\n", + "\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import classification_report\n", + "\n", + "from IPython.display import Markdown, display\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "from common_utils import compute_metrics" + ] }, { - "data": { - "text/markdown": [ - "#### Favorable and unfavorable labels" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "AtUDRcFzmMcK" + }, + "source": [ + "#### Load dataset and set options" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.0 0.0\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UeELy5k1mMcL" + }, + "outputs": [], + "source": [ + "# Get the dataset and split into train and test\n", + "dataset_orig = load_preproc_data_adult()\n", + "dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)" + ] }, { - "data": { - "text/markdown": [ - "#### Protected attribute names" + "cell_type": "markdown", + "metadata": { + "id": "rKE1eMYemMcL" + }, + "source": [ + "#### Clean up training data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rBcLsI-mMcL", + "outputId": "027ce352-06e6-478c-ceb6-1b810401643d" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Training Dataset shape" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(34189, 18)\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Favorable and unfavorable labels" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0 0.0\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Protected attribute names" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sex', 'race']\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Privileged and unprivileged protected attribute values" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[array([1.]), array([1.])] [array([0.]), array([0.])]\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Dataset feature names" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# print out some labels, names, etc.\n", + "display(Markdown(\"#### Training Dataset shape\"))\n", + "print(dataset_orig_train.features.shape)\n", + "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", + "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", + "display(Markdown(\"#### Protected attribute names\"))\n", + "print(dataset_orig_train.protected_attribute_names)\n", + "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", + "print(dataset_orig_train.privileged_protected_attributes,\n", + " dataset_orig_train.unprivileged_protected_attributes)\n", + "display(Markdown(\"#### Dataset feature names\"))\n", + "print(dataset_orig_train.feature_names)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['sex', 'race']\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "4pY8KI0QmMcM" + }, + "source": [ + "#### Metric for original training data" + ] }, { - "data": { - "text/markdown": [ - "#### Privileged and unprivileged protected attribute values" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n5qIXMbemMcM", + "outputId": "ad97d8eb-3381-4388-982f-5c1f070d9504" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Original training dataset" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.193139\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Original test dataset" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.197697\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Metric for the original dataset\n", + "privileged_groups = [{'sex': 1.0}]\n", + "unprivileged_groups = [{'sex': 0.0}]\n", + "\n", + "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Original training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())\n", + "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Original test dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_test.mean_difference())\n" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "[array([1.]), array([1.])] [array([0.]), array([0.])]\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "xvap7BOvmMcN" + }, + "source": [ + "#### Train with and transform the original training data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HelNehJ4mMcN" + }, + "outputs": [], + "source": [ + "scale_orig = StandardScaler()\n", + "dataset_orig_train.features = scale_orig.fit_transform(dataset_orig_train.features)\n", + "dataset_orig_test.features = scale_orig.transform(dataset_orig_test.features)" + ] }, { - "data": { - "text/markdown": [ - "#### Dataset feature names" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V3qCK0q-mMcN", + "outputId": "76a156cf-7de7-40ef-87a1-c89add05e108" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step: 0, loss: 1.0939550595829053, L_x: 2.531834521858599, L_y: 0.8200826015334493, L_z: 0.010344502931797964\n", + "step: 250, loss: 0.9162820270109503, L_x: 2.529109218043187, L_y: 0.6432961063010657, L_z: 0.010037499452782905\n", + "step: 500, loss: 0.8207071510514392, L_x: 2.5204911168067197, L_y: 0.5500397646035967, L_z: 0.00930913738358528\n", + "step: 750, loss: 0.8102771268166408, L_x: 2.511873834704061, L_y: 0.5427956868742799, L_z: 0.008147028235977415\n", + "step: 1000, loss: 0.7996570283329768, L_x: 2.480828451323288, L_y: 0.5399446552800813, L_z: 0.00581476396028337\n", + "step: 1250, loss: 0.7844631169970814, L_x: 2.4242508289183613, L_y: 0.5304307199052671, L_z: 0.005803657099989009\n", + "step: 1500, loss: 0.7653305722023572, L_x: 2.3297047767431986, L_y: 0.5176248867874912, L_z: 0.007367603870273078\n", + "step: 1750, loss: 0.7154304631442515, L_x: 2.085955877234543, L_y: 0.48081670080967953, L_z: 0.013009087305558827\n", + "step: 2000, loss: 0.6906420918886886, L_x: 1.896344106091722, L_y: 0.4646651544564373, L_z: 0.018171263411539594\n", + "step: 2250, loss: 0.6783680937630076, L_x: 1.7895665853948028, L_y: 0.4587714378849466, L_z: 0.020319998669290275\n", + "step: 2500, loss: 0.6725576747654705, L_x: 1.742061633693402, L_y: 0.4577729094336143, L_z: 0.020289300981257967\n", + "step: 2750, loss: 0.6694103860159343, L_x: 1.7548885984309939, L_y: 0.4545867175857845, L_z: 0.019667404293525217\n", + "step: 3000, loss: 0.6658207636894926, L_x: 1.7515234617350093, L_y: 0.4539151313299769, L_z: 0.018376643093007367\n", + "step: 3250, loss: 0.6481415219979564, L_x: 1.7252276686316934, L_y: 0.4491717858033674, L_z: 0.013223484665709846\n", + "step: 3500, loss: 0.645366243737316, L_x: 1.7196207136719521, L_y: 0.4482843307446003, L_z: 0.012559920812760247\n", + "step: 3750, loss: 0.6425278186287126, L_x: 1.7117758355776211, L_y: 0.4473063883366716, L_z: 0.012021923367139413\n", + "step: 4000, loss: 0.6419409673076768, L_x: 1.7092609385556714, L_y: 0.44744616781598634, L_z: 0.011784352818061686\n", + "step: 4250, loss: 0.6377801462539607, L_x: 1.6917081956472533, L_y: 0.4496335370425122, L_z: 0.009487894823361622\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Input recontruction quality - Ax\n", + "# Fairness constraint - Az\n", + "# Output prediction error - Ay\n", + "\n", + "privileged_groups = [{'sex': 1}]\n", + "unprivileged_groups = [{'sex': 0}]\n", + "\n", + "TR = LFR(unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups,\n", + " k=10, Ax=0.1, Ay=1.0, Az=2.0,\n", + " verbose=1\n", + " )\n", + "TR = TR.fit(dataset_orig_train, maxiter=5000, maxfun=5000)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" - ] - } - ], - "source": [ - "# print out some labels, names, etc.\n", - "display(Markdown(\"#### Training Dataset shape\"))\n", - "print(dataset_orig_train.features.shape)\n", - "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", - "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", - "display(Markdown(\"#### Protected attribute names\"))\n", - "print(dataset_orig_train.protected_attribute_names)\n", - "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", - "print(dataset_orig_train.privileged_protected_attributes, \n", - " dataset_orig_train.unprivileged_protected_attributes)\n", - "display(Markdown(\"#### Dataset feature names\"))\n", - "print(dataset_orig_train.feature_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Metric for original training data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sO4LI_hbmMcO" + }, + "outputs": [], + "source": [ + "# Transform training data and align features\n", + "dataset_transf_train = TR.transform(dataset_orig_train)\n", + "dataset_transf_test = TR.transform(dataset_orig_test)" + ] + }, { - "data": { - "text/markdown": [ - "#### Original training dataset" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZNXZ4AlUmMcO" + }, + "outputs": [], + "source": [ + "print(classification_report(dataset_orig_test.labels, dataset_transf_test.labels))" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Difference in mean outcomes between unprivileged and privileged groups = -0.193139\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UA_fxUSemMcO" + }, + "outputs": [], + "source": [ + "metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Transformed training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_train.mean_difference())\n", + "metric_transf_test = BinaryLabelDatasetMetric(dataset_transf_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Transformed test dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_test.mean_difference())\n" + ] }, { - "data": { - "text/markdown": [ - "#### Original test dataset" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PRATChL3mMcO" + }, + "outputs": [], + "source": [ + "from common_utils import compute_metrics\n", + "\n", + "display(Markdown(\"#### Predictions from transformed testing data\"))\n", + "bal_acc_arr_transf = []\n", + "disp_imp_arr_transf = []\n", + "\n", + "class_thresh_arr = np.linspace(0.01, 0.99, 100)\n", + "\n", + "dataset_transf_test_new = dataset_orig_test.copy(deepcopy=True)\n", + "dataset_transf_test_new.scores = dataset_transf_test.scores\n", + "\n", + "\n", + "for thresh in class_thresh_arr:\n", + "\n", + " fav_inds = dataset_transf_test_new.scores > thresh\n", + " dataset_transf_test_new.labels[fav_inds] = 1.0\n", + " dataset_transf_test_new.labels[~fav_inds] = 0.0\n", + "\n", + " metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_new,\n", + " unprivileged_groups, privileged_groups,\n", + " disp = False)\n", + "\n", + " bal_acc_arr_transf.append(metric_test_aft[\"Balanced accuracy\"])\n", + " disp_imp_arr_transf.append(metric_test_aft[\"Disparate impact\"])" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Difference in mean outcomes between unprivileged and privileged groups = -0.197697\n" - ] - } - ], - "source": [ - "# Metric for the original dataset\n", - "privileged_groups = [{'sex': 1.0}]\n", - "unprivileged_groups = [{'sex': 0.0}]\n", - "\n", - "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Original training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())\n", - "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Original test dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_test.mean_difference())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Train with and transform the original training data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "scale_orig = StandardScaler()\n", - "dataset_orig_train.features = scale_orig.fit_transform(dataset_orig_train.features)\n", - "dataset_orig_test.features = scale_orig.transform(dataset_orig_test.features)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HGsipNVamMcO" + }, + "outputs": [], + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')\n", + "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "biB1DXjKmMcP" + }, + "source": [ + "abs(1-disparate impact) must be small (close to 0) for classifier predictions to be fair." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DxtBqemwmMcP" + }, + "outputs": [], + "source": [ + "display(Markdown(\"#### Individual fairness metrics\"))\n", + "print(\"Consistency of labels in transformed training dataset= %f\" %metric_transf_train.consistency())\n", + "print(\"Consistency of labels in original training dataset= %f\" %metric_orig_train.consistency())\n", + "print(\"Consistency of labels in transformed test dataset= %f\" %metric_transf_test.consistency())\n", + "print(\"Consistency of labels in original test dataset= %f\" %metric_orig_test.consistency())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "90gI7jtwmMcP" + }, + "outputs": [], + "source": [ + "def check_algorithm_success():\n", + " \"\"\"Transformed dataset consistency should be greater than original dataset.\"\"\"\n", + " assert metric_transf_test.consistency() > metric_orig_test.consistency(), \"Transformed dataset consistency should be greater than original dataset.\"\n", + "\n", + "check_algorithm_success()" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "step: 0, loss: 1.0939550595829053, L_x: 2.531834521858599, L_y: 0.8200826015334493, L_z: 0.010344502931797964\n", - "step: 250, loss: 0.9162820270109503, L_x: 2.529109218043187, L_y: 0.6432961063010657, L_z: 0.010037499452782905\n", - "step: 500, loss: 0.8207071510514392, L_x: 2.5204911168067197, L_y: 0.5500397646035967, L_z: 0.00930913738358528\n", - "step: 750, loss: 0.8102771268166408, L_x: 2.511873834704061, L_y: 0.5427956868742799, L_z: 0.008147028235977415\n", - "step: 1000, loss: 0.7996570283329768, L_x: 2.480828451323288, L_y: 0.5399446552800813, L_z: 0.00581476396028337\n", - "step: 1250, loss: 0.7844631169970814, L_x: 2.4242508289183613, L_y: 0.5304307199052671, L_z: 0.005803657099989009\n", - "step: 1500, loss: 0.7653305722023572, L_x: 2.3297047767431986, L_y: 0.5176248867874912, L_z: 0.007367603870273078\n", - "step: 1750, loss: 0.7154304631442515, L_x: 2.085955877234543, L_y: 0.48081670080967953, L_z: 0.013009087305558827\n", - "step: 2000, loss: 0.6906420918886886, L_x: 1.896344106091722, L_y: 0.4646651544564373, L_z: 0.018171263411539594\n", - "step: 2250, loss: 0.6783680937630076, L_x: 1.7895665853948028, L_y: 0.4587714378849466, L_z: 0.020319998669290275\n", - "step: 2500, loss: 0.6725576747654705, L_x: 1.742061633693402, L_y: 0.4577729094336143, L_z: 0.020289300981257967\n", - "step: 2750, loss: 0.6694103860159343, L_x: 1.7548885984309939, L_y: 0.4545867175857845, L_z: 0.019667404293525217\n", - "step: 3000, loss: 0.6658207636894926, L_x: 1.7515234617350093, L_y: 0.4539151313299769, L_z: 0.018376643093007367\n", - "step: 3250, loss: 0.6481415219979564, L_x: 1.7252276686316934, L_y: 0.4491717858033674, L_z: 0.013223484665709846\n", - "step: 3500, loss: 0.645366243737316, L_x: 1.7196207136719521, L_y: 0.4482843307446003, L_z: 0.012559920812760247\n", - "step: 3750, loss: 0.6425278186287126, L_x: 1.7117758355776211, L_y: 0.4473063883366716, L_z: 0.012021923367139413\n", - "step: 4000, loss: 0.6419409673076768, L_x: 1.7092609385556714, L_y: 0.44744616781598634, L_z: 0.011784352818061686\n", - "step: 4250, loss: 0.6377801462539607, L_x: 1.6917081956472533, L_y: 0.4496335370425122, L_z: 0.009487894823361622\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GkkBIqXgmMcP" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "# Input recontruction quality - Ax\n", - "# Fairness constraint - Az\n", - "# Output prediction error - Ay\n", - "\n", - "privileged_groups = [{'sex': 1}]\n", - "unprivileged_groups = [{'sex': 0}]\n", - " \n", - "TR = LFR(unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups,\n", - " k=10, Ax=0.1, Ay=1.0, Az=2.0,\n", - " verbose=1\n", - " )\n", - "TR = TR.fit(dataset_orig_train, maxiter=5000, maxfun=5000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Transform training data and align features\n", - "dataset_transf_train = TR.transform(dataset_orig_train)\n", - "dataset_transf_test = TR.transform(dataset_orig_test)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(classification_report(dataset_orig_test.labels, dataset_transf_test.labels))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Transformed training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_train.mean_difference())\n", - "metric_transf_test = BinaryLabelDatasetMetric(dataset_transf_test, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Transformed test dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_test.mean_difference())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from common_utils import compute_metrics\n", - "\n", - "display(Markdown(\"#### Predictions from transformed testing data\"))\n", - "bal_acc_arr_transf = []\n", - "disp_imp_arr_transf = []\n", - "\n", - "class_thresh_arr = np.linspace(0.01, 0.99, 100)\n", - "\n", - "dataset_transf_test_new = dataset_orig_test.copy(deepcopy=True)\n", - "dataset_transf_test_new.scores = dataset_transf_test.scores\n", - "\n", - "\n", - "for thresh in class_thresh_arr:\n", - " \n", - " fav_inds = dataset_transf_test_new.scores > thresh\n", - " dataset_transf_test_new.labels[fav_inds] = 1.0\n", - " dataset_transf_test_new.labels[~fav_inds] = 0.0\n", - " \n", - " metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_new, \n", - " unprivileged_groups, privileged_groups,\n", - " disp = False)\n", - "\n", - " bal_acc_arr_transf.append(metric_test_aft[\"Balanced accuracy\"])\n", - " disp_imp_arr_transf.append(metric_test_aft[\"Disparate impact\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')\n", - "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "abs(1-disparate impact) must be small (close to 0) for classifier predictions to be fair." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(Markdown(\"#### Individual fairness metrics\"))\n", - "print(\"Consistency of labels in transformed training dataset= %f\" %metric_transf_train.consistency())\n", - "print(\"Consistency of labels in original training dataset= %f\" %metric_orig_train.consistency())\n", - "print(\"Consistency of labels in transformed test dataset= %f\" %metric_transf_test.consistency())\n", - "print(\"Consistency of labels in original test dataset= %f\" %metric_orig_test.consistency())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def check_algorithm_success():\n", - " \"\"\"Transformed dataset consistency should be greater than original dataset.\"\"\"\n", - " assert metric_transf_test.consistency() > metric_orig_test.consistency(), \"Transformed dataset consistency should be greater than original dataset.\"\n", - "\n", - "check_algorithm_success() " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_mdss_classifier_metric.ipynb b/examples/demo_mdss_classifier_metric.ipynb index 004d2956..13f94437 100644 --- a/examples/demo_mdss_classifier_metric.ipynb +++ b/examples/demo_mdss_classifier_metric.ipynb @@ -1,1230 +1,1381 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bias scan using Multi-Dimensional Subset Scan (MDSS)\n", - "\n", - "\"Identifying Significant Predictive Bias in Classifiers\" https://arxiv.org/abs/1611.08292\n", - "\n", - "The goal of bias scan is to identify a subgroup(s) that has significantly more predictive bias than would be expected from an unbiased classifier. There are $\\prod_{m=1}^{M}\\left(2^{|X_{m}|}-1\\right)$ unique subgroups from a dataset with $M$ features, with each feature having $|X_{m}|$ discretized values, where a subgroup is any $M$-dimension\n", - "Cartesian set product, between subsets of feature-values from each feature --- excluding the empty set. Bias scan mitigates this computational hurdle by approximately identifing the most statistically biased subgroup in linear time (rather than exponential).\n", - "\n", - "\n", - "We define the statistical measure of predictive bias function, $score_{bias}(S)$ as a likelihood ratio score and a function of a given subgroup $S$. The null hypothesis is that the given prediction's odds are correct for all subgroups in $\\mathcal{D}$:\n", - "\n", - "$$H_{0}:odds(y_{i})=\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}}\\ \\forall i\\in\\mathcal{D}.$$\n", - "\n", - "The alternative hypothesis assumes some constant multiplicative bias in the odds for some given subgroup $S$:\n", - "\n", - "$$H_{1}:\\ odds(y_{i})=q\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}},\\ \\text{where}\\ q>1\\ \\forall i\\in S\\ \\mathrm{and}\\ q=1\\ \\forall i\\notin S.$$\n", - "\n", - "In the classification setting, each observation's likelihood is Bernoulli distributed and assumed independent. This results in the following scoring function for a subgroup $S$:\n", - "\n", - "\\begin{align*}\n", - "score_{bias}(S)= & \\max_{q}\\log\\prod_{i\\in S}\\frac{Bernoulli(\\frac{q\\hat{p}_{i}}{1-\\hat{p}_{i}+q\\hat{p}_{i}})}{Bernoulli(\\hat{p}_{i})}\\\\\n", - "= & \\max_{q}\\log(q)\\sum_{i\\in S}y_{i}-\\sum_{i\\in S}\\log(1-\\hat{p}_{i}+q\\hat{p}_{i}).\n", - "\\end{align*}\n", - "Our bias scan is thus represented as: $S^{*}=FSS(\\mathcal{D},\\mathcal{E},F_{score})=MDSS(\\mathcal{D},\\hat{p},score_{bias})$.\n", - "\n", - "where $S^{*}$ is the detected most anomalous subgroup, $FSS$ is one of several subset scan algorithms for different problem settings, $\\mathcal{D}$ is a dataset with outcomes $Y$ and discretized features $\\mathcal{X}$, $\\mathcal{E}$ are a set of expectations or 'normal' values for $Y$, and $F_{score}$ is an expectation-based scoring statistic that measures the amount of anomalousness between subgroup observations and their expectations.\n", - "\n", - "Predictive bias emphasizes comparable predictions for a subgroup and its observations and Bias scan provides a more general method that can detect and characterize such bias, or poor classifier fit, in the larger space of all possible subgroups, without a priori specification." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import itertools\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from aif360.metrics import BinaryLabelDatasetMetric, MDSSClassificationMetric\n", - "from aif360.detectors import bias_scan\n", - "\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll demonstrate scoring a subset and finding the most anomalous subset with bias scan using the compas dataset.\n", - "\n", - "We can specify subgroups to be scored or scan for the most anomalous subgroup. Bias scan allows us to decide if we aim to identify bias as `higher` than expected probabilities or `lower` than expected probabilities. Depending on the favourable label, the corresponding subgroup may be categorized as priviledged or unprivileged." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig = load_preproc_data_compas()\n", - "\n", - "female_group = [{'sex': 1}]\n", - "male_group = [{'sex': 0}]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The dataset has the categorical features one-hot encoded so we'll modify the dataset to convert them back \n", - "to the categorical featues because scanning one-hot encoded features may find subgroups that are not meaningful e.g., a subgroup with 2 race values. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n", - "\n", - "age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45',\n", - " 'age_cat=Greater than 45']].values, axis=1).reshape(-1, 1)\n", - "priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3',\n", - " 'priors_count=More than 3']].values, axis=1).reshape(-1, 1)\n", - "c_charge_degree = np.argmax(dataset_orig_df[['c_charge_degree=M', 'c_charge_degree=F']].values, axis=1).reshape(-1, 1)\n", - "\n", - "features = np.concatenate((dataset_orig_df[['sex', 'race']].values, age_cat, priors_count,\n", - " c_charge_degree, dataset_orig.labels), axis=1)\n", - "feature_names = ['sex', 'race', 'age_cat', 'priors_count', 'c_charge_degree']" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "scrolled": true - }, - "outputs": [ + "cells": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexraceage_catpriors_countc_charge_degreetwo_year_recid
00.00.01.00.01.01.0
10.00.00.02.01.01.0
20.01.01.02.01.01.0
31.01.01.00.00.00.0
40.01.01.00.01.00.0
\n", - "
" + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_mdss_classifier_metric.ipynb)" ], - "text/plain": [ - " sex race age_cat priors_count c_charge_degree two_year_recid\n", - "0 0.0 0.0 1.0 0.0 1.0 1.0\n", - "1 0.0 0.0 0.0 2.0 1.0 1.0\n", - "2 0.0 1.0 1.0 2.0 1.0 1.0\n", - "3 1.0 1.0 1.0 0.0 0.0 0.0\n", - "4 0.0 1.0 1.0 0.0 1.0 0.0" + "metadata": { + "id": "zu6QM0J8CN0q" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kci10l16B90B" + }, + "source": [ + "## Bias scan using Multi-Dimensional Subset Scan (MDSS)\n", + "\n", + "\"Identifying Significant Predictive Bias in Classifiers\" https://arxiv.org/abs/1611.08292\n", + "\n", + "The goal of bias scan is to identify a subgroup(s) that has significantly more predictive bias than would be expected from an unbiased classifier. There are $\\prod_{m=1}^{M}\\left(2^{|X_{m}|}-1\\right)$ unique subgroups from a dataset with $M$ features, with each feature having $|X_{m}|$ discretized values, where a subgroup is any $M$-dimension\n", + "Cartesian set product, between subsets of feature-values from each feature --- excluding the empty set. Bias scan mitigates this computational hurdle by approximately identifing the most statistically biased subgroup in linear time (rather than exponential).\n", + "\n", + "\n", + "We define the statistical measure of predictive bias function, $score_{bias}(S)$ as a likelihood ratio score and a function of a given subgroup $S$. The null hypothesis is that the given prediction's odds are correct for all subgroups in $\\mathcal{D}$:\n", + "\n", + "$$H_{0}:odds(y_{i})=\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}}\\ \\forall i\\in\\mathcal{D}.$$\n", + "\n", + "The alternative hypothesis assumes some constant multiplicative bias in the odds for some given subgroup $S$:\n", + "\n", + "$$H_{1}:\\ odds(y_{i})=q\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}},\\ \\text{where}\\ q>1\\ \\forall i\\in S\\ \\mathrm{and}\\ q=1\\ \\forall i\\notin S.$$\n", + "\n", + "In the classification setting, each observation's likelihood is Bernoulli distributed and assumed independent. This results in the following scoring function for a subgroup $S$:\n", + "\n", + "\\begin{align*}\n", + "score_{bias}(S)= & \\max_{q}\\log\\prod_{i\\in S}\\frac{Bernoulli(\\frac{q\\hat{p}_{i}}{1-\\hat{p}_{i}+q\\hat{p}_{i}})}{Bernoulli(\\hat{p}_{i})}\\\\\n", + "= & \\max_{q}\\log(q)\\sum_{i\\in S}y_{i}-\\sum_{i\\in S}\\log(1-\\hat{p}_{i}+q\\hat{p}_{i}).\n", + "\\end{align*}\n", + "Our bias scan is thus represented as: $S^{*}=FSS(\\mathcal{D},\\mathcal{E},F_{score})=MDSS(\\mathcal{D},\\hat{p},score_{bias})$.\n", + "\n", + "where $S^{*}$ is the detected most anomalous subgroup, $FSS$ is one of several subset scan algorithms for different problem settings, $\\mathcal{D}$ is a dataset with outcomes $Y$ and discretized features $\\mathcal{X}$, $\\mathcal{E}$ are a set of expectations or 'normal' values for $Y$, and $F_{score}$ is an expectation-based scoring statistic that measures the amount of anomalousness between subgroup observations and their expectations.\n", + "\n", + "Predictive bias emphasizes comparable predictions for a subgroup and its observations and Bias scan provides a more general method that can detect and characterize such bias, or poor classifier fit, in the larger space of all possible subgroups, without a priori specification." ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(features, columns=feature_names + ['two_year_recid'])\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Training\n", - "We'll create a structured dataset and then train a simple classifier to predict the probability of the outcome" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from aif360.datasets import StandardDataset\n", - "dataset = StandardDataset(df, label_name='two_year_recid', favorable_classes=[0],\n", - " protected_attribute_names=['sex', 'race'],\n", - " privileged_classes=[[1], [1]],\n", - " instance_weights_name=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_train, dataset_orig_test = dataset.split([0.7], shuffle=True, seed=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.124496\n", - "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.159410\n" - ] - } - ], - "source": [ - "metric_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", - " unprivileged_groups=male_group,\n", - " privileged_groups=female_group)\n", - "\n", - "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_train.mean_difference())\n", - "metric_test = BinaryLabelDatasetMetric(dataset_orig_test,\n", - " unprivileged_groups=male_group,\n", - " privileged_groups=female_group)\n", - "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_test.mean_difference())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It shows that overall Females in the dataset have a lower observed recidivism them Males." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we train a classifier, the model is likely to pick up this bias in the dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2DYOd2UsB90E" + }, + "outputs": [], + "source": [ + "import itertools\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from aif360.metrics import BinaryLabelDatasetMetric, MDSSClassificationMetric\n", + "from aif360.detectors import bias_scan\n", + "\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas" + ] + }, { - "data": { - "text/plain": [ - "LogisticRegression(random_state=0)" + "cell_type": "markdown", + "metadata": { + "id": "SqG1-54_B90F" + }, + "source": [ + "We'll demonstrate scoring a subset and finding the most anomalous subset with bias scan using the compas dataset.\n", + "\n", + "We can specify subgroups to be scored or scan for the most anomalous subgroup. Bias scan allows us to decide if we aim to identify bias as `higher` than expected probabilities or `lower` than expected probabilities. Depending on the favourable label, the corresponding subgroup may be categorized as priviledged or unprivileged." ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "clf = LogisticRegression(solver='lbfgs', C=1.0, penalty='l2', random_state=0)\n", - "clf.fit(dataset_orig_train.features, dataset_orig_train.labels.flatten())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the probability scores we use are the probabilities of the favorable label, which is 0 in this case." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "array([0., 1.])" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CeaDWPaEB90G" + }, + "outputs": [], + "source": [ + "dataset_orig = load_preproc_data_compas()\n", + "\n", + "female_group = [{'sex': 1}]\n", + "male_group = [{'sex': 0}]" ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf.classes_" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "predictions should reflect the probability of a favorable outcome (i.e. no recidivism)." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_bias_test_prob = clf.predict_proba(dataset_orig_test.features)[:, 0]" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexraceage_catpriors_countc_charge_degreeobservedprobabilities
01.01.02.02.01.01.00.552951
11.00.01.00.01.00.00.740959
20.01.00.01.01.00.00.374728
30.00.02.02.01.01.00.444487
40.01.01.01.00.01.00.584908
\n", - "
" + "cell_type": "markdown", + "metadata": { + "id": "UbcjVVhAB90G" + }, + "source": [ + "The dataset has the categorical features one-hot encoded so we'll modify the dataset to convert them back\n", + "to the categorical featues because scanning one-hot encoded features may find subgroups that are not meaningful e.g., a subgroup with 2 race values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NzbzKqmfB90G" + }, + "outputs": [], + "source": [ + "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n", + "\n", + "age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45',\n", + " 'age_cat=Greater than 45']].values, axis=1).reshape(-1, 1)\n", + "priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3',\n", + " 'priors_count=More than 3']].values, axis=1).reshape(-1, 1)\n", + "c_charge_degree = np.argmax(dataset_orig_df[['c_charge_degree=M', 'c_charge_degree=F']].values, axis=1).reshape(-1, 1)\n", + "\n", + "features = np.concatenate((dataset_orig_df[['sex', 'race']].values, age_cat, priors_count,\n", + " c_charge_degree, dataset_orig.labels), axis=1)\n", + "feature_names = ['sex', 'race', 'age_cat', 'priors_count', 'c_charge_degree']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "QxeSBzMVB90H", + "outputId": "a64c16ec-f5e1-40fc-81f9-1c0a4e96d521" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexraceage_catpriors_countc_charge_degreetwo_year_recid
00.00.01.00.01.01.0
10.00.00.02.01.01.0
20.01.01.02.01.01.0
31.01.01.00.00.00.0
40.01.01.00.01.00.0
\n", + "
" + ], + "text/plain": [ + " sex race age_cat priors_count c_charge_degree two_year_recid\n", + "0 0.0 0.0 1.0 0.0 1.0 1.0\n", + "1 0.0 0.0 0.0 2.0 1.0 1.0\n", + "2 0.0 1.0 1.0 2.0 1.0 1.0\n", + "3 1.0 1.0 1.0 0.0 0.0 0.0\n", + "4 0.0 1.0 1.0 0.0 1.0 0.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " sex race age_cat priors_count c_charge_degree observed probabilities\n", - "0 1.0 1.0 2.0 2.0 1.0 1.0 0.552951\n", - "1 1.0 0.0 1.0 0.0 1.0 0.0 0.740959\n", - "2 0.0 1.0 0.0 1.0 1.0 0.0 0.374728\n", - "3 0.0 0.0 2.0 2.0 1.0 1.0 0.444487\n", - "4 0.0 1.0 1.0 1.0 0.0 1.0 0.584908" + "source": [ + "df = pd.DataFrame(features, columns=feature_names + ['two_year_recid'])\n", + "df.head()" ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(dataset_orig_test.features, columns=dataset_orig_test.feature_names)\n", - "df['observed'] = pd.Series(dataset_orig_test.labels.flatten(), index=df.index)\n", - "df['probabilities'] = pd.Series(dataset_bias_test_prob, index=df.index)\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll the create another structured dataset as the classified dataset by assigning the predicted probabilities to the scores attribute" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_bias_test = dataset_orig_test.copy()\n", - "dataset_bias_test.scores = dataset_bias_test_prob\n", - "dataset_bias_test.labels = dataset_orig_test.labels" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bias scoring" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we try to observe the difference between the model prediction and the actual observations of the favorable label, which in this case is 0. We create a new test_df for this computation. \n", - "\n", - "If the model's average prediction of the favorable label is higher than the actual observations average, then the group is said to be privileged. In the converse case, the group is said to be unprivileged.\n", - "\n", - "We would check for whether the male and female groups are privileged or not using mdss score" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9MBbYHoyB90I" + }, + "source": [ + "### Training\n", + "We'll create a structured dataset and then train a simple classifier to predict the probability of the outcome" + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexraceage_catpriors_countc_charge_degreetwo_year_recidmodel_not_recidobserved_not_recid
24791.01.02.02.01.01.00.5529510.0
35741.00.01.00.01.00.00.7409591.0
5130.01.00.01.01.00.00.3747281.0
17250.00.02.02.01.01.00.4444870.0
960.01.01.01.00.01.00.5849080.0
...........................
49310.01.00.01.01.00.00.3747281.0
32640.00.00.00.01.01.00.5357530.0
16530.00.01.01.01.00.00.4900371.0
26071.01.01.00.01.01.00.7691400.0
27320.01.00.02.00.01.00.2517260.0
\n", - "

1584 rows × 8 columns

\n", - "
" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mr3tEubCB90I" + }, + "outputs": [], + "source": [ + "from aif360.datasets import StandardDataset\n", + "dataset = StandardDataset(df, label_name='two_year_recid', favorable_classes=[0],\n", + " protected_attribute_names=['sex', 'race'],\n", + " privileged_classes=[[1], [1]],\n", + " instance_weights_name=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ub0RsE6_B90I" + }, + "outputs": [], + "source": [ + "dataset_orig_train, dataset_orig_test = dataset.split([0.7], shuffle=True, seed=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ozr0_L3AB90I", + "outputId": "b4a747cc-8121-452b-dc23-effcd43b89d7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.124496\n", + "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.159410\n" + ] + } ], - "text/plain": [ - " sex race age_cat priors_count c_charge_degree two_year_recid \\\n", - "2479 1.0 1.0 2.0 2.0 1.0 1.0 \n", - "3574 1.0 0.0 1.0 0.0 1.0 0.0 \n", - "513 0.0 1.0 0.0 1.0 1.0 0.0 \n", - "1725 0.0 0.0 2.0 2.0 1.0 1.0 \n", - "96 0.0 1.0 1.0 1.0 0.0 1.0 \n", - "... ... ... ... ... ... ... \n", - "4931 0.0 1.0 0.0 1.0 1.0 0.0 \n", - "3264 0.0 0.0 0.0 0.0 1.0 1.0 \n", - "1653 0.0 0.0 1.0 1.0 1.0 0.0 \n", - "2607 1.0 1.0 1.0 0.0 1.0 1.0 \n", - "2732 0.0 1.0 0.0 2.0 0.0 1.0 \n", - "\n", - " model_not_recid observed_not_recid \n", - "2479 0.552951 0.0 \n", - "3574 0.740959 1.0 \n", - "513 0.374728 1.0 \n", - "1725 0.444487 0.0 \n", - "96 0.584908 0.0 \n", - "... ... ... \n", - "4931 0.374728 1.0 \n", - "3264 0.535753 0.0 \n", - "1653 0.490037 1.0 \n", - "2607 0.769140 0.0 \n", - "2732 0.251726 0.0 \n", - "\n", - "[1584 rows x 8 columns]" + "source": [ + "metric_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", + " unprivileged_groups=male_group,\n", + " privileged_groups=female_group)\n", + "\n", + "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_train.mean_difference())\n", + "metric_test = BinaryLabelDatasetMetric(dataset_orig_test,\n", + " unprivileged_groups=male_group,\n", + " privileged_groups=female_group)\n", + "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_test.mean_difference())\n" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_df = dataset_bias_test.convert_to_dataframe()[0]\n", - "test_df['model_not_recid'] = dataset_bias_test.scores.flatten()\n", - "test_df['observed_not_recid'] = 1 - test_df['two_year_recid']\n", - "test_df" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "model_not_recid 0.617561\n", - "observed_not_recid 0.657051\n", - "dtype: float64" + "cell_type": "markdown", + "metadata": { + "id": "uhDQDm_5B90J" + }, + "source": [ + "It shows that overall Females in the dataset have a lower observed recidivism them Males." ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Females actual vs predicted rates of positive label\n", - "test_df[test_df.sex == 1][['model_not_recid','observed_not_recid']].mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since model average predictions for the positive label is lower than the observed average by a substantial amount (about 4%), the female group is most likely unprivileged." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "model_not_recid 0.512443\n", - "observed_not_recid 0.497642\n", - "dtype: float64" + "cell_type": "markdown", + "metadata": { + "id": "VNo-Ix97B90J" + }, + "source": [ + "If we train a classifier, the model is likely to pick up this bias in the dataset" ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Males actual vs predicted rates of positive label\n", - "test_df[test_df.sex == 0][['model_not_recid','observed_not_recid']].mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since model average predictions for the positive label is greater than the observed average by a small amount (about 1.5%), the male group could be privileged." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we'll create an instance of the MDSS Classification Metric and assess the apriori defined privileged and unprivileged groups; females and males respectively. \n", - "\n", - "By apriori defining the male group as unprivileged, we are saying we expect that the model's predictions is systematically lower than the actual observation.\n", - "\n", - "By apriori defining the female group as privileged, we are saying we expect that the model's predictions is systematically higher than the actual observation.\n", - "\n", - "From our mini-analysis above, we know that these hypothesis are unlikely to be true " - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "mdss_classified = MDSSClassificationMetric(dataset_orig_test, dataset_bias_test,\n", - " unprivileged_groups=male_group,\n", - " privileged_groups=female_group)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "-0.0" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Hj1KWwaOB90J", + "outputId": "abf6f0c4-ec8c-4033-a88a-bbb11fa47900" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(random_state=0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "clf = LogisticRegression(solver='lbfgs', C=1.0, penalty='l2', random_state=0)\n", + "clf.fit(dataset_orig_train.features, dataset_orig_train.labels.flatten())" ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We are asking the question:\n", - "# Is there evidence that the hypothesized privileged group is actually privileged?\n", - "\n", - "female_privileged_score = mdss_classified.score_groups(privileged=True)\n", - "female_privileged_score" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By having a score very close to zero, mdss bias score is informing us that there is no evidence from the data that our hypothesis of the female group being privileged is true." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "-0.0" + "cell_type": "markdown", + "metadata": { + "id": "8bC5aDp4B90K" + }, + "source": [ + "Note that the probability scores we use are the probabilities of the favorable label, which is 0 in this case." ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We are asking the question:\n", - "# Is there evidence that the hypothesized unprivileged group is actually unprivileged?\n", - "\n", - "male_unprivileged_score = mdss_classified.score_groups(privileged=False)\n", - "male_unprivileged_score" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By having a score very close zero, mdss bias score is informing us that there is no evidence from the data to support our hypothesis of the male group being unprivileged is true." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can flip our initial hypothesis and check if the male group is privileged or the female group is unprivileged." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "mdss_classified = MDSSClassificationMetric(dataset_orig_test, dataset_bias_test,\n", - " unprivileged_groups=female_group,\n", - " privileged_groups=male_group)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "0.63" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tQU_CQtOB90K", + "outputId": "f9beec78-31e0-48e7-bfcf-7f276b09af16" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 1.])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf.classes_" ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "male_privileged_score = mdss_classified.score_groups(privileged=True)\n", - "male_privileged_score" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By having a positive score, mdss bias score is informing us that there is evidence from the data that our hypothesis of the male group being privileged is true." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "1.1769" + "cell_type": "markdown", + "metadata": { + "id": "UyhqLqhNB90K" + }, + "source": [ + "predictions should reflect the probability of a favorable outcome (i.e. no recidivism)." ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "female_unprivileged_score = mdss_classified.score_groups(privileged=False)\n", - "female_unprivileged_score" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By having a positive score, mdss bias score is informing us that there is evidence from the data to support our hypothesis of the female group being unprivileged is true." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By taking into account the size of the group and the magnitude of the deviation, mdss bias core has been able to tell us the following about the male and female groups:\n", - "- There is no evidence that the female group is privileged.\n", - "- There is no evidence that the male group is unprivileged.\n", - "- There is evidence that the male group is privileged.\n", - "- There is evidence that the female is unprivileged." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bias scan\n", - "We get the bias score for the apriori defined subgroup but assuming we had no prior knowledge \n", - "about the predictive bias and wanted to find the subgroups with the most bias, we can apply bias scan to identify the priviledged and unpriviledged groups. The privileged argument is not a reference to a group but the direction for which to scan for bias." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n", - " favorable_value=dataset_orig_test.favorable_label,\n", - " penalty=0.5, overpredicted=True)\n", - "unprivileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n", - " favorable_value=dataset_orig_test.favorable_label,\n", - " penalty=0.5, overpredicted=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'race': [0.0], 'age_cat': [0.0], 'sex': [0.0]}, 3.1526)\n", - "({'sex': [1.0], 'race': [0.0]}, 3.3036)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "assert privileged_subset[0]\n", - "assert unprivileged_subset[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can observe that the bias score is higher than the score of the prior groups. These subgroups are guaranteed to be the highest scoring subgroup among the exponentially many subgroups.\n", - "\n", - "For the purposes of this example, the logistic regression model systematically underestimates the recidivism risk of individuals in the `Non-caucasian`, `less than 25`, `Male` subgroup whereas individuals belonging to the `Non-caucasian`, `Female` are assigned a higher risk than is actually observed. We refer to these subgroups as the `detected privileged group` and `detected unprivileged group` respectively." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can create another srtuctured dataset using the new groups to compute other dataset metrics. " - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "protected_attr_names = set(privileged_subset[0].keys()).union(set(unprivileged_subset[0].keys()))\n", - "dataset_orig_test.protected_attribute_names = list(protected_attr_names)\n", - "dataset_bias_test.protected_attribute_names = list(protected_attr_names)\n", - "\n", - "protected_attr = np.where(np.isin(dataset_orig_test.feature_names, list(protected_attr_names)))[0]\n", - "\n", - "dataset_orig_test.protected_attributes = dataset_orig_test.features[:, protected_attr]\n", - "dataset_bias_test.protected_attributes = dataset_bias_test.features[:, protected_attr]" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "# converts from dictionary of lists to list of dictionaries\n", - "a = list(privileged_subset[0].values())\n", - "subset_values = list(itertools.product(*a))\n", - "\n", - "detected_privileged_groups = []\n", - "for vals in subset_values:\n", - " detected_privileged_groups.append((dict(zip(privileged_subset[0].keys(), vals))))\n", - "\n", - "a = list(unprivileged_subset[0].values())\n", - "subset_values = list(itertools.product(*a))\n", - "\n", - "detected_unprivileged_groups = []\n", - "for vals in subset_values:\n", - " detected_unprivileged_groups.append((dict(zip(unprivileged_subset[0].keys(), vals))))" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "20HUpT98B90K" + }, + "outputs": [], + "source": [ + "dataset_bias_test_prob = clf.predict_proba(dataset_orig_test.features)[:, 0]" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test set: Difference in mean outcomes between unprivileged and privileged groups = 0.275836\n" - ] - } - ], - "source": [ - "metric_bias_test = BinaryLabelDatasetMetric(dataset_bias_test,\n", - " unprivileged_groups=detected_unprivileged_groups,\n", - " privileged_groups=detected_privileged_groups)\n", - "\n", - "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\"\n", - " % metric_bias_test.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It appears the detected privileged group have a higher risk of recidivism than the unprivileged group." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As noted in the paper, predictive bias is different from predictive fairness so there's no the emphasis in the subgroups having comparable predictions between them. \n", - "We can investigate the difference in what the model predicts vs what we actually observed as well as the multiplicative difference in the odds of the subgroups." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "to_choose = df[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = df.loc[to_choose]" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ryABk3w5B90K", + "outputId": "234f8d7b-f69a-42ad-9544-bc151e367245" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexraceage_catpriors_countc_charge_degreeobservedprobabilities
01.01.02.02.01.01.00.552951
11.00.01.00.01.00.00.740959
20.01.00.01.01.00.00.374728
30.00.02.02.01.01.00.444487
40.01.01.01.00.01.00.584908
\n", + "
" + ], + "text/plain": [ + " sex race age_cat priors_count c_charge_degree observed probabilities\n", + "0 1.0 1.0 2.0 2.0 1.0 1.0 0.552951\n", + "1 1.0 0.0 1.0 0.0 1.0 0.0 0.740959\n", + "2 0.0 1.0 0.0 1.0 1.0 0.0 0.374728\n", + "3 0.0 0.0 2.0 2.0 1.0 1.0 0.444487\n", + "4 0.0 1.0 1.0 1.0 0.0 1.0 0.584908" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(dataset_orig_test.features, columns=dataset_orig_test.feature_names)\n", + "df['observed'] = pd.Series(dataset_orig_test.labels.flatten(), index=df.index)\n", + "df['probabilities'] = pd.Series(dataset_bias_test_prob, index=df.index)\n", + "df.head()" + ] + }, { - "data": { - "text/plain": [ - "'Our detected priviledged group has a size of 192, we observe 67.71% as the average risk of recidivism, but our model predicts 57.30%'" + "cell_type": "markdown", + "metadata": { + "id": "8cH_NmUuB90L" + }, + "source": [ + "We'll the create another structured dataset as the classified dataset by assigning the predicted probabilities to the scores attribute" ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "group_obs = temp_df['observed'].mean()\n", - "group_prob = 1-temp_df['probabilities'].mean()\n", - "\n", - "\"Our detected priviledged group has a size of {}, we observe {:.2%} as the average risk of recidivism, but our model predicts {:.2%}\"\\\n", - ".format(len(temp_df), group_obs, group_prob)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "'This is a multiplicative increase in the odds by 1.562'" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kctU9DX1B90L" + }, + "outputs": [], + "source": [ + "dataset_bias_test = dataset_orig_test.copy()\n", + "dataset_bias_test.scores = dataset_bias_test_prob\n", + "dataset_bias_test.labels = dataset_orig_test.labels" ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "odds_mul = (group_obs / (1 - group_obs)) / (group_prob /(1 - group_prob))\n", - "\"This is a multiplicative increase in the odds by {:.3f}\".format(odds_mul)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "assert odds_mul > 1" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "to_choose = df[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = df.loc[to_choose]" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "'Our detected unpriviledged group has a size of 169, we observe 33.14% as the average risk of recidivism, but our model predicts 43.65%'" + "cell_type": "markdown", + "metadata": { + "id": "OB_6l47PB90L" + }, + "source": [ + "### Bias scoring" ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "group_obs = temp_df['observed'].mean()\n", - "group_prob = 1-temp_df['probabilities'].mean()\n", - "\n", - "\"Our detected unpriviledged group has a size of {}, we observe {:.2%} as the average risk of recidivism, but our model predicts {:.2%}\"\\\n", - ".format(len(temp_df), group_obs, group_prob)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "'This is a multiplicative decrease in the odds by 0.640'" + "cell_type": "markdown", + "metadata": { + "id": "ykJRqEA3B90L" + }, + "source": [ + "First, we try to observe the difference between the model prediction and the actual observations of the favorable label, which in this case is 0. We create a new test_df for this computation.\n", + "\n", + "If the model's average prediction of the favorable label is higher than the actual observations average, then the group is said to be privileged. In the converse case, the group is said to be unprivileged.\n", + "\n", + "We would check for whether the male and female groups are privileged or not using mdss score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PV5F2zMwB90L", + "outputId": "32bd9359-b7a1-4ee5-89fe-7f838dd50e15" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexraceage_catpriors_countc_charge_degreetwo_year_recidmodel_not_recidobserved_not_recid
24791.01.02.02.01.01.00.5529510.0
35741.00.01.00.01.00.00.7409591.0
5130.01.00.01.01.00.00.3747281.0
17250.00.02.02.01.01.00.4444870.0
960.01.01.01.00.01.00.5849080.0
...........................
49310.01.00.01.01.00.00.3747281.0
32640.00.00.00.01.01.00.5357530.0
16530.00.01.01.01.00.00.4900371.0
26071.01.01.00.01.01.00.7691400.0
27320.01.00.02.00.01.00.2517260.0
\n", + "

1584 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " sex race age_cat priors_count c_charge_degree two_year_recid \\\n", + "2479 1.0 1.0 2.0 2.0 1.0 1.0 \n", + "3574 1.0 0.0 1.0 0.0 1.0 0.0 \n", + "513 0.0 1.0 0.0 1.0 1.0 0.0 \n", + "1725 0.0 0.0 2.0 2.0 1.0 1.0 \n", + "96 0.0 1.0 1.0 1.0 0.0 1.0 \n", + "... ... ... ... ... ... ... \n", + "4931 0.0 1.0 0.0 1.0 1.0 0.0 \n", + "3264 0.0 0.0 0.0 0.0 1.0 1.0 \n", + "1653 0.0 0.0 1.0 1.0 1.0 0.0 \n", + "2607 1.0 1.0 1.0 0.0 1.0 1.0 \n", + "2732 0.0 1.0 0.0 2.0 0.0 1.0 \n", + "\n", + " model_not_recid observed_not_recid \n", + "2479 0.552951 0.0 \n", + "3574 0.740959 1.0 \n", + "513 0.374728 1.0 \n", + "1725 0.444487 0.0 \n", + "96 0.584908 0.0 \n", + "... ... ... \n", + "4931 0.374728 1.0 \n", + "3264 0.535753 0.0 \n", + "1653 0.490037 1.0 \n", + "2607 0.769140 0.0 \n", + "2732 0.251726 0.0 \n", + "\n", + "[1584 rows x 8 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_df = dataset_bias_test.convert_to_dataframe()[0]\n", + "test_df['model_not_recid'] = dataset_bias_test.scores.flatten()\n", + "test_df['observed_not_recid'] = 1 - test_df['two_year_recid']\n", + "test_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qMjfpXO4B90M", + "outputId": "78697715-5600-4c49-ee4d-a747ded2606d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "model_not_recid 0.617561\n", + "observed_not_recid 0.657051\n", + "dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Females actual vs predicted rates of positive label\n", + "test_df[test_df.sex == 1][['model_not_recid','observed_not_recid']].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HKaZKOgNB90M" + }, + "source": [ + "Since model average predictions for the positive label is lower than the observed average by a substantial amount (about 4%), the female group is most likely unprivileged." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_tKQJ0FUB90M", + "outputId": "1b6bbce7-e1b7-44c9-8803-59241e5cdc27" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "model_not_recid 0.512443\n", + "observed_not_recid 0.497642\n", + "dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Males actual vs predicted rates of positive label\n", + "test_df[test_df.sex == 0][['model_not_recid','observed_not_recid']].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y-wvPZH3B90M" + }, + "source": [ + "Since model average predictions for the positive label is greater than the observed average by a small amount (about 1.5%), the male group could be privileged." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6fQGJP-2B90M" + }, + "source": [ + "Now, we'll create an instance of the MDSS Classification Metric and assess the apriori defined privileged and unprivileged groups; females and males respectively.\n", + "\n", + "By apriori defining the male group as unprivileged, we are saying we expect that the model's predictions is systematically lower than the actual observation.\n", + "\n", + "By apriori defining the female group as privileged, we are saying we expect that the model's predictions is systematically higher than the actual observation.\n", + "\n", + "From our mini-analysis above, we know that these hypothesis are unlikely to be true" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yIqKBRPbB90M" + }, + "outputs": [], + "source": [ + "mdss_classified = MDSSClassificationMetric(dataset_orig_test, dataset_bias_test,\n", + " unprivileged_groups=male_group,\n", + " privileged_groups=female_group)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SI_TOyu0B90N", + "outputId": "9c118292-8368-4e0b-fdee-5ef094b6e9a1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We are asking the question:\n", + "# Is there evidence that the hypothesized privileged group is actually privileged?\n", + "\n", + "female_privileged_score = mdss_classified.score_groups(privileged=True)\n", + "female_privileged_score" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OOKIOvxUB90N" + }, + "source": [ + "By having a score very close to zero, mdss bias score is informing us that there is no evidence from the data that our hypothesis of the female group being privileged is true." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B8dsK90PB90N", + "outputId": "4cf06af8-6ca3-49d6-a9d7-895e7ce69478" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We are asking the question:\n", + "# Is there evidence that the hypothesized unprivileged group is actually unprivileged?\n", + "\n", + "male_unprivileged_score = mdss_classified.score_groups(privileged=False)\n", + "male_unprivileged_score" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Anh6JvDAB90N" + }, + "source": [ + "By having a score very close zero, mdss bias score is informing us that there is no evidence from the data to support our hypothesis of the male group being unprivileged is true." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2QuN7nxrB90N" + }, + "source": [ + "We can flip our initial hypothesis and check if the male group is privileged or the female group is unprivileged." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Om4DTlYtB90O" + }, + "outputs": [], + "source": [ + "mdss_classified = MDSSClassificationMetric(dataset_orig_test, dataset_bias_test,\n", + " unprivileged_groups=female_group,\n", + " privileged_groups=male_group)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "baP4caqSB90O", + "outputId": "a892284c-1b54-411b-c839-45925cf17bc6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.63" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "male_privileged_score = mdss_classified.score_groups(privileged=True)\n", + "male_privileged_score" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vZWkojHvB90O" + }, + "source": [ + "By having a positive score, mdss bias score is informing us that there is evidence from the data that our hypothesis of the male group being privileged is true." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bp8kBr9nB90O", + "outputId": "d689519f-775d-4a5f-e372-43de18e122ae" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1769" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "female_unprivileged_score = mdss_classified.score_groups(privileged=False)\n", + "female_unprivileged_score" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v7LHafDqB90P" + }, + "source": [ + "By having a positive score, mdss bias score is informing us that there is evidence from the data to support our hypothesis of the female group being unprivileged is true." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fIO7ig11B90P" + }, + "source": [ + "By taking into account the size of the group and the magnitude of the deviation, mdss bias core has been able to tell us the following about the male and female groups:\n", + "- There is no evidence that the female group is privileged.\n", + "- There is no evidence that the male group is unprivileged.\n", + "- There is evidence that the male group is privileged.\n", + "- There is evidence that the female is unprivileged." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XlYgT7krB90Q" + }, + "source": [ + "### Bias scan\n", + "We get the bias score for the apriori defined subgroup but assuming we had no prior knowledge\n", + "about the predictive bias and wanted to find the subgroups with the most bias, we can apply bias scan to identify the priviledged and unpriviledged groups. The privileged argument is not a reference to a group but the direction for which to scan for bias." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JDyYYrMsB90Q" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n", + " favorable_value=dataset_orig_test.favorable_label,\n", + " penalty=0.5, overpredicted=True)\n", + "unprivileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n", + " favorable_value=dataset_orig_test.favorable_label,\n", + " penalty=0.5, overpredicted=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SmDnrJb_B90Q", + "outputId": "325ae712-7d15-4a19-9732-7afe72ab3e41" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'race': [0.0], 'age_cat': [0.0], 'sex': [0.0]}, 3.1526)\n", + "({'sex': [1.0], 'race': [0.0]}, 3.3036)\n" + ] + } + ], + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k3i-WdwLB90Q" + }, + "outputs": [], + "source": [ + "assert privileged_subset[0]\n", + "assert unprivileged_subset[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LEkybEddB90Q" + }, + "source": [ + "We can observe that the bias score is higher than the score of the prior groups. These subgroups are guaranteed to be the highest scoring subgroup among the exponentially many subgroups.\n", + "\n", + "For the purposes of this example, the logistic regression model systematically underestimates the recidivism risk of individuals in the `Non-caucasian`, `less than 25`, `Male` subgroup whereas individuals belonging to the `Non-caucasian`, `Female` are assigned a higher risk than is actually observed. We refer to these subgroups as the `detected privileged group` and `detected unprivileged group` respectively." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I_YxruutB90Q" + }, + "source": [ + "We can create another srtuctured dataset using the new groups to compute other dataset metrics. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xdVrmsmtB90R" + }, + "outputs": [], + "source": [ + "protected_attr_names = set(privileged_subset[0].keys()).union(set(unprivileged_subset[0].keys()))\n", + "dataset_orig_test.protected_attribute_names = list(protected_attr_names)\n", + "dataset_bias_test.protected_attribute_names = list(protected_attr_names)\n", + "\n", + "protected_attr = np.where(np.isin(dataset_orig_test.feature_names, list(protected_attr_names)))[0]\n", + "\n", + "dataset_orig_test.protected_attributes = dataset_orig_test.features[:, protected_attr]\n", + "dataset_bias_test.protected_attributes = dataset_bias_test.features[:, protected_attr]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q2W84utLB90R" + }, + "outputs": [], + "source": [ + "# converts from dictionary of lists to list of dictionaries\n", + "a = list(privileged_subset[0].values())\n", + "subset_values = list(itertools.product(*a))\n", + "\n", + "detected_privileged_groups = []\n", + "for vals in subset_values:\n", + " detected_privileged_groups.append((dict(zip(privileged_subset[0].keys(), vals))))\n", + "\n", + "a = list(unprivileged_subset[0].values())\n", + "subset_values = list(itertools.product(*a))\n", + "\n", + "detected_unprivileged_groups = []\n", + "for vals in subset_values:\n", + " detected_unprivileged_groups.append((dict(zip(unprivileged_subset[0].keys(), vals))))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5CQLNeLMB90R", + "outputId": "fdf5c829-9610-43a1-b46c-6d6e7409f52c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test set: Difference in mean outcomes between unprivileged and privileged groups = 0.275836\n" + ] + } + ], + "source": [ + "metric_bias_test = BinaryLabelDatasetMetric(dataset_bias_test,\n", + " unprivileged_groups=detected_unprivileged_groups,\n", + " privileged_groups=detected_privileged_groups)\n", + "\n", + "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\"\n", + " % metric_bias_test.mean_difference())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FpsKHVQOB90R" + }, + "source": [ + "It appears the detected privileged group have a higher risk of recidivism than the unprivileged group." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aM5Xi7wYB90R" + }, + "source": [ + "As noted in the paper, predictive bias is different from predictive fairness so there's no the emphasis in the subgroups having comparable predictions between them.\n", + "We can investigate the difference in what the model predicts vs what we actually observed as well as the multiplicative difference in the odds of the subgroups." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-S80czQ6B90R" + }, + "outputs": [], + "source": [ + "to_choose = df[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = df.loc[to_choose]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Tv-pOahHB90S", + "outputId": "b4b11f9c-8ec7-4cff-c4fb-fb1dbc17f383" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected priviledged group has a size of 192, we observe 67.71% as the average risk of recidivism, but our model predicts 57.30%'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group_obs = temp_df['observed'].mean()\n", + "group_prob = 1-temp_df['probabilities'].mean()\n", + "\n", + "\"Our detected priviledged group has a size of {}, we observe {:.2%} as the average risk of recidivism, but our model predicts {:.2%}\"\\\n", + ".format(len(temp_df), group_obs, group_prob)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mhzLh_ZWB90S", + "outputId": "6507496a-8086-4605-a135-e031dde9de69" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'This is a multiplicative increase in the odds by 1.562'" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "odds_mul = (group_obs / (1 - group_obs)) / (group_prob /(1 - group_prob))\n", + "\"This is a multiplicative increase in the odds by {:.3f}\".format(odds_mul)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4CBlc07aB90S" + }, + "outputs": [], + "source": [ + "assert odds_mul > 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u-e4oDAwB90S" + }, + "outputs": [], + "source": [ + "to_choose = df[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = df.loc[to_choose]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hOwgUMAsB90S", + "outputId": "d45edf71-6256-4ae8-8c21-60c1a5275de4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected unpriviledged group has a size of 169, we observe 33.14% as the average risk of recidivism, but our model predicts 43.65%'" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group_obs = temp_df['observed'].mean()\n", + "group_prob = 1-temp_df['probabilities'].mean()\n", + "\n", + "\"Our detected unpriviledged group has a size of {}, we observe {:.2%} as the average risk of recidivism, but our model predicts {:.2%}\"\\\n", + ".format(len(temp_df), group_obs, group_prob)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DHMHVUyOB90T", + "outputId": "705cf23c-7d1c-41e4-a2a7-cb8b39a5b498" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'This is a multiplicative decrease in the odds by 0.640'" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "odds_mul = (group_obs / (1 - group_obs)) / (group_prob /(1 - group_prob))\n", + "\"This is a multiplicative decrease in the odds by {:.3f}\".format(odds_mul)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RComzQQWB90T" + }, + "outputs": [], + "source": [ + "assert odds_mul < 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pak55cFNB90T" + }, + "source": [ + "In summary, this notebook demonstrates the use of bias scan to identify subgroups with significant predictive bias, as quantified by a likelihood ratio score, using subset scanning. This allows consideration of not just subgroups of a priori interest or small dimensions, but the space of all possible subgroups of features.\n", + "It also presents opportunity for a kind of bias mitigation technique that uses the multiplicative odds in the over-or-under estimated subgroups to adjust for predictive fairness." ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" } - ], - "source": [ - "odds_mul = (group_obs / (1 - group_obs)) / (group_prob /(1 - group_prob))\n", - "\"This is a multiplicative decrease in the odds by {:.3f}\".format(odds_mul)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "assert odds_mul < 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In summary, this notebook demonstrates the use of bias scan to identify subgroups with significant predictive bias, as quantified by a likelihood ratio score, using subset scanning. This allows consideration of not just subgroups of a priori interest or small dimensions, but the space of all possible subgroups of features.\n", - "It also presents opportunity for a kind of bias mitigation technique that uses the multiplicative odds in the over-or-under estimated subgroups to adjust for predictive fairness." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.7 ('aif360')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.7 ('aif360')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd" + } + }, + "colab": { + "provenance": [] + } }, - "vscode": { - "interpreter": { - "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_mdss_detector.ipynb b/examples/demo_mdss_detector.ipynb index 6ce0ed8d..1cf24f3e 100644 --- a/examples/demo_mdss_detector.ipynb +++ b/examples/demo_mdss_detector.ipynb @@ -1,1542 +1,1730 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bias scan using Multi-Dimensional Subset Scan (MDSS)\n", - "\n", - "\"Identifying Significant Predictive Bias in Classifiers\" https://arxiv.org/abs/1611.08292\n", - "\n", - "The goal of bias scan is to identify a subgroup(s) that has significantly more predictive bias than would be expected from an unbiased classifier. There are $\\prod_{m=1}^{M}\\left(2^{|X_{m}|}-1\\right)$ unique subgroups from a dataset with $M$ features, with each feature having $|X_{m}|$ discretized values, where a subgroup is any $M$-dimension\n", - "Cartesian set product, between subsets of feature-values from each feature --- excluding the empty set. Bias scan mitigates this computational hurdle by approximately identifing the most statistically biased subgroup in linear time (rather than exponential).\n", - "\n", - "\n", - "We define the statistical measure of predictive bias function, $score_{bias}(S)$ as a likelihood ratio score and a function of a given subgroup $S$. The null hypothesis is that the given prediction's odds are correct for all subgroups in $\\mathcal{D}$:\n", - "\n", - "$$H_{0}:odds(y_{i})=\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}}\\ \\forall i\\in\\mathcal{D}.$$\n", - "\n", - "The alternative hypothesis assumes some constant multiplicative bias in the odds for some given subgroup $S$:\n", - "\n", - "$$H_{1}:\\ odds(y_{i})=q\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}},\\ \\text{where}\\ q>1\\ \\forall i\\in S\\ \\mathrm{and}\\ q=1\\ \\forall i\\notin S.$$\n", - "\n", - "In the classification setting, each observation's likelihood is Bernoulli distributed and assumed independent. This results in the following scoring function for a subgroup $S$:\n", - "\n", - "\\begin{align*}\n", - "score_{bias}(S)= & \\max_{q}\\log\\prod_{i\\in S}\\frac{Bernoulli(\\frac{q\\hat{p}_{i}}{1-\\hat{p}_{i}+q\\hat{p}_{i}})}{Bernoulli(\\hat{p}_{i})}\\\\\n", - "= & \\max_{q}\\log(q)\\sum_{i\\in S}y_{i}-\\sum_{i\\in S}\\log(1-\\hat{p}_{i}+q\\hat{p}_{i}).\n", - "\\end{align*}\n", - "Our bias scan is thus represented as: $S^{*}=FSS(\\mathcal{D},\\mathcal{E},F_{score})=MDSS(\\mathcal{D},\\hat{p},score_{bias})$.\n", - "\n", - "where $S^{*}$ is the detected most anomalous subgroup, $FSS$ is one of several subset scan algorithms for different problem settings, $\\mathcal{D}$ is a dataset with outcomes $Y$ and discretized features $\\mathcal{X}$, $\\mathcal{E}$ are a set of expectations or 'normal' values for $Y$, and $F_{score}$ is an expectation-based scoring statistic that measures the amount of anomalousness between subgroup observations and their expectations.\n", - "\n", - "Predictive bias emphasizes comparable predictions for a subgroup and its observations and Bias scan provides a more general method that can detect and characterize such bias, or poor classifier fit, in the larger space of all possible subgroups, without a priori specification." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Usage\n", - "\n", - "MDScan currently supports three scoring functions. These scoring functions usage are described below:\n", - "- *BerkJones*: Non-parametric scoring function. To be used for all of the four types of outcomes supported - binary, continuous, nominal, ordinal.\n", - "- *Bernoulli*: Parametric scoring function. To used for two of the four types of outcomes supported - binary and nominal.\n", - "- *Guassian*: Parametric scoring function. To used for one of the four types of outcomes supported - continuous.\n", - "- *Poisson*: Parametric scoring function. To be used for three of the four types of outcomes supported - binary, continuous, and ordinal.\n", - "\n", - "Note, non-parametric scoring functions can only be used for datasets where the expectations are constant or none.\n", - "\n", - "The type of outcomes must be provided using the mode keyword argument. The definition for the four types of outcomes supported are provided below:\n", - "- Binary: Yes/no outcomes. Outcomes must 0 or 1.\n", - "- Continuous: Continuous outcomes. Outcomes could be any real number.\n", - "- Nominal: Multiclass outcomes with no rank or order between them. Outcomes must be a finite set of integers with dimensionality <= 10.\n", - "- Ordinal: Multiclass outcomes that are ranked in a specific order. Outcomes must be positive integers.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from aif360.detectors.mdss_detector import bias_scan\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas\n", - "\n", - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll demonstrate finding the most anomalous subset with bias scan using the compas dataset. We can specify subgroups to be scored or scan for the most anomalous subgroup. Bias scan allows us to decide if we aim to identify bias as `higher` than expected probabilities or `lower` than expected probabilities." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compas Dataset\n", - "This is a binary classification use case where the favorable label is 0 and the scoring function is the default bernoulli." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(0)\n", - "\n", - "dataset_orig = load_preproc_data_compas()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The dataset has the categorical features one-hot encoded so we'll modify the dataset to convert them back \n", - "to the categorical featues because scanning one-hot encoded features may find subgroups that are not meaningful eg. a subgroup with 2 race values. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n", - "\n", - "age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45', \n", - " 'age_cat=Greater than 45']].values, axis=1).reshape(-1, 1)\n", - "priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3', \n", - " 'priors_count=More than 3']].values, axis=1).reshape(-1, 1)\n", - "c_charge_degree = np.argmax(dataset_orig_df[['c_charge_degree=F', 'c_charge_degree=M']].values, axis=1).reshape(-1, 1)\n", - "\n", - "features = np.concatenate((dataset_orig_df[['sex', 'race']].values, age_cat, priors_count, \\\n", - " c_charge_degree, dataset_orig.labels), axis=1)\n", - "feature_names = ['sex', 'race', 'age_cat', 'priors_count', 'c_charge_degree']" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame(features, columns=feature_names + ['two_year_recid'])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexraceage_catpriors_countc_charge_degreetwo_year_recid
00.00.01.00.00.01.0
10.00.00.02.00.01.0
20.01.01.02.00.01.0
31.01.01.00.01.00.0
40.01.01.00.00.00.0
\n", - "
" + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_mdss_detector.ipynb)" ], - "text/plain": [ - " sex race age_cat priors_count c_charge_degree two_year_recid\n", - "0 0.0 0.0 1.0 0.0 0.0 1.0\n", - "1 0.0 0.0 0.0 2.0 0.0 1.0\n", - "2 0.0 1.0 1.0 2.0 0.0 1.0\n", - "3 1.0 1.0 1.0 0.0 1.0 0.0\n", - "4 0.0 1.0 1.0 0.0 0.0 0.0" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### training\n", - "We'll train a simple classifier to predict the probability of the outcome" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LogisticRegression()" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "X = df.drop('two_year_recid', axis = 1)\n", - "y = df['two_year_recid']\n", - "clf = LogisticRegression(solver='lbfgs', C=1.0, penalty='l2')\n", - "clf.fit(X, y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the probability scores we use are the probabilities of the favorable label, which is 0 in this case." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "probs = pd.Series(clf.predict_proba(X)[:,0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### bias scan\n", - "We can scan for a privileged and unprivileged subset using bias scan" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=X,observations=y,expectations=probs,favorable_value=0, overpredicted=True)\n", - "unprivileged_subset = bias_scan(data=X,observations=y,expectations=probs,favorable_value=0,overpredicted=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'age_cat': [1.0], 'priors_count': [0.0, 1.0, 2.0], 'sex': [1.0], 'race': [1.0], 'c_charge_degree': [0.0]}, 7.9086)\n", - "({'race': [0.0], 'age_cat': [1.0, 2.0], 'priors_count': [1.0], 'c_charge_degree': [0.0, 1.0]}, 7.0227)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "dff = X.copy()\n", - "dff['observed'] = y \n", - "dff['probabilities'] = 1 - probs" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "to_choose = dff[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = dff.loc[to_choose]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.38278159716895366'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"Our detected priviledged group has a size of {}, we observe {} as the average risk of recidivism, but our model predicts {}\"\\\n", - ".format(len(temp_df), temp_df['observed'].mean(), temp_df['probabilities'].mean())" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "to_choose = dff[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = dff.loc[to_choose]" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.4447038821779929'" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"Our detected priviledged group has a size of {}, we observe {} as the average risk of recidivism, but our model predicts {}\"\\\n", - ".format(len(temp_df), temp_df['observed'].mean(), temp_df['probabilities'].mean())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Adult Dataset\n", - "This is a binary classification use case where the favorable label is 1 and the scoring function is the berk jones." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
workclasseducationmarital_statusoccupationrelationshipracesexnative_countryage_bineducation_num_binhours_per_week_bincapital_gain_bincapital_loss_binobservedexpectation
0Private11thNever-marriedMachine-op-inspctOwn-childBlackMaleUnited-States17-271-840-440000.236226
1PrivateHS-gradMarried-civ-spouseFarming-fishingHusbandWhiteMaleUnited-States37-47945-990000.236226
2Local-govAssoc-acdmMarried-civ-spouseProtective-servHusbandWhiteMaleUnited-States28-3612-1640-440010.236226
3PrivateSome-collegeMarried-civ-spouseMachine-op-inspctHusbandBlackMaleUnited-States37-4710-1140-447298-7978010.236226
4?Some-collegeNever-married?Own-childWhiteFemaleUnited-States17-2710-111-390000.236226
\n", - "
" + "metadata": { + "id": "cot8Opn7Ck5r" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xYzhBYSCChiJ" + }, + "source": [ + "## Bias scan using Multi-Dimensional Subset Scan (MDSS)\n", + "\n", + "\"Identifying Significant Predictive Bias in Classifiers\" https://arxiv.org/abs/1611.08292\n", + "\n", + "The goal of bias scan is to identify a subgroup(s) that has significantly more predictive bias than would be expected from an unbiased classifier. There are $\\prod_{m=1}^{M}\\left(2^{|X_{m}|}-1\\right)$ unique subgroups from a dataset with $M$ features, with each feature having $|X_{m}|$ discretized values, where a subgroup is any $M$-dimension\n", + "Cartesian set product, between subsets of feature-values from each feature --- excluding the empty set. Bias scan mitigates this computational hurdle by approximately identifing the most statistically biased subgroup in linear time (rather than exponential).\n", + "\n", + "\n", + "We define the statistical measure of predictive bias function, $score_{bias}(S)$ as a likelihood ratio score and a function of a given subgroup $S$. The null hypothesis is that the given prediction's odds are correct for all subgroups in $\\mathcal{D}$:\n", + "\n", + "$$H_{0}:odds(y_{i})=\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}}\\ \\forall i\\in\\mathcal{D}.$$\n", + "\n", + "The alternative hypothesis assumes some constant multiplicative bias in the odds for some given subgroup $S$:\n", + "\n", + "$$H_{1}:\\ odds(y_{i})=q\\frac{\\hat{p}_{i}}{1-\\hat{p}_{i}},\\ \\text{where}\\ q>1\\ \\forall i\\in S\\ \\mathrm{and}\\ q=1\\ \\forall i\\notin S.$$\n", + "\n", + "In the classification setting, each observation's likelihood is Bernoulli distributed and assumed independent. This results in the following scoring function for a subgroup $S$:\n", + "\n", + "\\begin{align*}\n", + "score_{bias}(S)= & \\max_{q}\\log\\prod_{i\\in S}\\frac{Bernoulli(\\frac{q\\hat{p}_{i}}{1-\\hat{p}_{i}+q\\hat{p}_{i}})}{Bernoulli(\\hat{p}_{i})}\\\\\n", + "= & \\max_{q}\\log(q)\\sum_{i\\in S}y_{i}-\\sum_{i\\in S}\\log(1-\\hat{p}_{i}+q\\hat{p}_{i}).\n", + "\\end{align*}\n", + "Our bias scan is thus represented as: $S^{*}=FSS(\\mathcal{D},\\mathcal{E},F_{score})=MDSS(\\mathcal{D},\\hat{p},score_{bias})$.\n", + "\n", + "where $S^{*}$ is the detected most anomalous subgroup, $FSS$ is one of several subset scan algorithms for different problem settings, $\\mathcal{D}$ is a dataset with outcomes $Y$ and discretized features $\\mathcal{X}$, $\\mathcal{E}$ are a set of expectations or 'normal' values for $Y$, and $F_{score}$ is an expectation-based scoring statistic that measures the amount of anomalousness between subgroup observations and their expectations.\n", + "\n", + "Predictive bias emphasizes comparable predictions for a subgroup and its observations and Bias scan provides a more general method that can detect and characterize such bias, or poor classifier fit, in the larger space of all possible subgroups, without a priori specification." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_StPscBKChiM" + }, + "source": [ + "### Usage\n", + "\n", + "MDScan currently supports three scoring functions. These scoring functions usage are described below:\n", + "- *BerkJones*: Non-parametric scoring function. To be used for all of the four types of outcomes supported - binary, continuous, nominal, ordinal.\n", + "- *Bernoulli*: Parametric scoring function. To used for two of the four types of outcomes supported - binary and nominal.\n", + "- *Guassian*: Parametric scoring function. To used for one of the four types of outcomes supported - continuous.\n", + "- *Poisson*: Parametric scoring function. To be used for three of the four types of outcomes supported - binary, continuous, and ordinal.\n", + "\n", + "Note, non-parametric scoring functions can only be used for datasets where the expectations are constant or none.\n", + "\n", + "The type of outcomes must be provided using the mode keyword argument. The definition for the four types of outcomes supported are provided below:\n", + "- Binary: Yes/no outcomes. Outcomes must 0 or 1.\n", + "- Continuous: Continuous outcomes. Outcomes could be any real number.\n", + "- Nominal: Multiclass outcomes with no rank or order between them. Outcomes must be a finite set of integers with dimensionality <= 10.\n", + "- Ordinal: Multiclass outcomes that are ranked in a specific order. Outcomes must be positive integers.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pEw-BVOaChiM" + }, + "outputs": [], + "source": [ + "from aif360.detectors.mdss_detector import bias_scan\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WBaUrHfAChiN" + }, + "source": [ + "We'll demonstrate finding the most anomalous subset with bias scan using the compas dataset. We can specify subgroups to be scored or scan for the most anomalous subgroup. Bias scan allows us to decide if we aim to identify bias as `higher` than expected probabilities or `lower` than expected probabilities." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b-4Qb66XChiN" + }, + "source": [ + "# Compas Dataset\n", + "This is a binary classification use case where the favorable label is 0 and the scoring function is the default bernoulli." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Yzhak7pCChiO" + }, + "outputs": [], + "source": [ + "np.random.seed(0)\n", + "\n", + "dataset_orig = load_preproc_data_compas()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_aaSrX5ZChiO" + }, + "source": [ + "The dataset has the categorical features one-hot encoded so we'll modify the dataset to convert them back\n", + "to the categorical featues because scanning one-hot encoded features may find subgroups that are not meaningful eg. a subgroup with 2 race values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EsV3GU1TChiO" + }, + "outputs": [], + "source": [ + "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n", + "\n", + "age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45',\n", + " 'age_cat=Greater than 45']].values, axis=1).reshape(-1, 1)\n", + "priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3',\n", + " 'priors_count=More than 3']].values, axis=1).reshape(-1, 1)\n", + "c_charge_degree = np.argmax(dataset_orig_df[['c_charge_degree=F', 'c_charge_degree=M']].values, axis=1).reshape(-1, 1)\n", + "\n", + "features = np.concatenate((dataset_orig_df[['sex', 'race']].values, age_cat, priors_count, \\\n", + " c_charge_degree, dataset_orig.labels), axis=1)\n", + "feature_names = ['sex', 'race', 'age_cat', 'priors_count', 'c_charge_degree']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jWLqSK0pChiO" + }, + "outputs": [], + "source": [ + "df = pd.DataFrame(features, columns=feature_names + ['two_year_recid'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "EfTmpJKMChiP", + "outputId": "b3287ae8-bee7-4d60-bfac-a201c384849f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexraceage_catpriors_countc_charge_degreetwo_year_recid
00.00.01.00.00.01.0
10.00.00.02.00.01.0
20.01.01.02.00.01.0
31.01.01.00.01.00.0
40.01.01.00.00.00.0
\n", + "
" + ], + "text/plain": [ + " sex race age_cat priors_count c_charge_degree two_year_recid\n", + "0 0.0 0.0 1.0 0.0 0.0 1.0\n", + "1 0.0 0.0 0.0 2.0 0.0 1.0\n", + "2 0.0 1.0 1.0 2.0 0.0 1.0\n", + "3 1.0 1.0 1.0 0.0 1.0 0.0\n", + "4 0.0 1.0 1.0 0.0 0.0 0.0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " workclass education marital_status occupation \\\n", - "0 Private 11th Never-married Machine-op-inspct \n", - "1 Private HS-grad Married-civ-spouse Farming-fishing \n", - "2 Local-gov Assoc-acdm Married-civ-spouse Protective-serv \n", - "3 Private Some-college Married-civ-spouse Machine-op-inspct \n", - "4 ? Some-college Never-married ? \n", - "\n", - " relationship race sex native_country age_bin education_num_bin \\\n", - "0 Own-child Black Male United-States 17-27 1-8 \n", - "1 Husband White Male United-States 37-47 9 \n", - "2 Husband White Male United-States 28-36 12-16 \n", - "3 Husband Black Male United-States 37-47 10-11 \n", - "4 Own-child White Female United-States 17-27 10-11 \n", - "\n", - " hours_per_week_bin capital_gain_bin capital_loss_bin observed expectation \n", - "0 40-44 0 0 0 0.236226 \n", - "1 45-99 0 0 0 0.236226 \n", - "2 40-44 0 0 1 0.236226 \n", - "3 40-44 7298-7978 0 1 0.236226 \n", - "4 1-39 0 0 0 0.236226 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv('https://gist.githubusercontent.com/Viktour19/b690679802c431646d36f7e2dd117b9e/raw/d8f17bf25664bd2d9fa010750b9e451c4155dd61/adult_autostrat.csv')\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that for the adult dataset, the positive label is 1 and thus the expectations provided is the probability of the earning >50k i.e label 1 and the favorable label is 1 which is the default for binary classification tasks. Since we would be using scoring function BerkJones, we also need to pass in an alpha value. Alpha can be interpreted as what proportion of the data you expect to have the favorable value" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "X = data.drop(['observed','expectation'], axis = 1)\n", - "probs = data['expectation']\n", - "y = data['observed']" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=X, observations=y, scoring='BerkJones', expectations=probs, overpredicted=True,penalty=50, alpha = .24)\n", - "unprivileged_subset = bias_scan(data=X,observations=y, scoring='BerkJones', expectations=probs, overpredicted=False,penalty=50, alpha = .24)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'relationship': [' Not-in-family', ' Other-relative', ' Own-child', ' Unmarried'], 'capital_gain_bin': ['0']}, 932.4812)\n", - "({'education_num_bin': ['12-16'], 'marital_status': [' Married-civ-spouse']}, 1041.1901)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "dff = X.copy()\n", - "dff['observed'] = y \n", - "dff['probabilities'] = probs" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 8532, we observe 0.0472 as the average probability of earning >50k, but our model predicts 0.2362'" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = dff[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = dff.loc[to_choose]\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the average probability of earning >50k, but our model predicts {}\"\\\n", - ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['probabilities'].mean(),4))" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected unprivileged group has a size of 2430, we observe 0.6996 as the average probability of earning >50k, but our model predicts 0.2362'" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = dff[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = dff.loc[to_choose]\n", - "\n", - "\"Our detected unprivileged group has a size of {}, we observe {} as the average probability of earning >50k, but our model predicts {}\"\\\n", - ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['probabilities'].mean(),4))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Insurance Costs\n", - "This is a regression use case where the favorable value is 0 and the scoring function is Gaussian." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1338, 7)" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/insurance.csv')\n", - "data.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "for col in ['bmi','age']:\n", - " data[col] = pd.qcut(data[col], 10, duplicates='drop')\n", - " data[col] = data[col].apply(lambda x: str(round(x.left, 2)) + ' - ' + str(round(x.right,2)))" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "features = data.drop('charges', axis = 1)\n", - "X = features.copy()\n", - "\n", - "for feature in X.columns:\n", - " X[feature] = X[feature].astype('category').cat.codes\n", - "\n", - "y = data['charges']" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LinearRegression\n", - "reg = LinearRegression()\n", - "reg.fit(X, y)\n", - "y_pred = pd.Series(reg.predict(X))" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=features, observations=y, expectations=y_pred, scoring = 'Gaussian', \n", - " overpredicted=True, penalty=1e10, mode ='continuous', favorable_value='low')\n", - "\n", - "unprivileged_subset = bias_scan(data=features, observations=y, expectations=y_pred, scoring = 'Gaussian', \n", - " overpredicted=False, penalty=1e10, mode ='continuous', favorable_value='low')" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'bmi': ['15.96 - 22.99', '22.99 - 25.33', '25.33 - 27.36'], 'smoker': ['no']}, 2384.5786)\n", - "({'bmi': ['15.96 - 22.99', '22.99 - 25.33', '25.33 - 27.36', '27.36 - 28.8'], 'smoker': ['yes']}, 3927.8765)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 321, we observe 7844.8402958566985 as the mean insurance costs, but our model predicts 5420.493262774548'" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = data[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = data.loc[to_choose].copy()\n", - "temp_y = y_pred.loc[to_choose].copy()\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the mean insurance costs, but our model predicts {}\"\\\n", - ".format(len(temp_df), temp_df['charges'].mean(), temp_y.mean())" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 115, we observe 21148.373896173915 as the mean insurance costs, but our model predicts 29694.035319112845'" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = data.loc[to_choose].copy()\n", - "temp_y = y_pred.loc[to_choose].copy()\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the mean insurance costs, but our model predicts {}\"\\\n", - ".format(len(temp_df), temp_df['charges'].mean(), temp_y.mean())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hospitalization Time\n", - "This is an ordinal, multiclass classification use case where the favorable value is 1 and the scoring function is Poisson." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(29980, 22)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/hospital.csv')\n", - "data = data[data['Length of Stay'] != '120 +'].fillna('Unknown')\n", - "data.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "X = data.drop(['Length of Stay'], axis = 1)\n", - "y = pd.to_numeric(data['Length of Stay'])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=X, observations=y, scoring = 'Poisson', favorable_value = 'low', overpredicted=True, penalty=50, mode ='ordinal')\n", - "unprivileged_subset = bias_scan(data=X, observations=y, scoring = 'Poisson', favorable_value = 'low', overpredicted=False, penalty=50, mode ='ordinal')" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'APR Severity of Illness Description': ['Extreme']}, 11180.5386)\n", - "({'Patient Disposition': ['Home or Self Care', 'Left Against Medical Advice', 'Short-term Hospital'], 'APR Severity of Illness Description': ['Minor', 'Moderate'], 'APR MDC Code': [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 21]}, 9950.881)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "dff = X.copy()\n", - "dff['observed'] = y \n", - "dff['predicted'] = y.mean()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 1900, we observe 15.2216 as the average number of days spent in the hospital, but our model predicts 5.4231'" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = dff[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = dff.loc[to_choose]\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the average number of days spent in the hospital, but our model predicts {}\"\\\n", - ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['predicted'].mean(),4))" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected unprivileged group has a size of 14620, we observe 2.8301 as the average number of days spent in the hospital, but our model predicts 5.4231'" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = dff[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = dff.loc[to_choose]\n", - "\n", - "\"Our detected unprivileged group has a size of {}, we observe {} as the average number of days spent in the hospital, but our model predicts {}\"\\\n", - ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['predicted'].mean(),4))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Temperature Dataset\n", - "This is a regression use case where the favorable value is the higher temperatures and the scoring function is Berk Jones." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SummaryPrecipTypeHumidityWindSpeedVisibilityPressureDailySummaryTemperature
0Partly Cloudyrain0.8914.119715.82631015.13Partly cloudy throughout the day.9.472222
1Partly Cloudyrain0.8614.264615.82631015.63Partly cloudy throughout the day.9.355556
2Mostly Cloudyrain0.893.928414.95691015.94Partly cloudy throughout the day.9.377778
3Partly Cloudyrain0.8314.103615.82631016.41Partly cloudy throughout the day.8.288889
4Mostly Cloudyrain0.8311.044615.82631016.51Partly cloudy throughout the day.8.755556
\n", - "
" + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wXtPQiNOChiQ" + }, + "source": [ + "### training\n", + "We'll train a simple classifier to predict the probability of the outcome" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "djiP9uJfChiQ", + "outputId": "092e5ab3-1060-4ba9-cb2e-c05aa9b6aecd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " Summary PrecipType Humidity WindSpeed Visibility Pressure \\\n", - "0 Partly Cloudy rain 0.89 14.1197 15.8263 1015.13 \n", - "1 Partly Cloudy rain 0.86 14.2646 15.8263 1015.63 \n", - "2 Mostly Cloudy rain 0.89 3.9284 14.9569 1015.94 \n", - "3 Partly Cloudy rain 0.83 14.1036 15.8263 1016.41 \n", - "4 Mostly Cloudy rain 0.83 11.0446 15.8263 1016.51 \n", - "\n", - " DailySummary Temperature \n", - "0 Partly cloudy throughout the day. 9.472222 \n", - "1 Partly cloudy throughout the day. 9.355556 \n", - "2 Partly cloudy throughout the day. 9.377778 \n", - "3 Partly cloudy throughout the day. 8.288889 \n", - "4 Partly cloudy throughout the day. 8.755556 " - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/weatherHistory.csv')\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Binning the continuous features since bias scan support only categorical features." - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "for col in ['Humidity','WindSpeed','Visibility','Pressure']:\n", - " data[col] = pd.qcut(data[col], 10, duplicates='drop')\n", - " data[col] = data[col].apply(lambda x: str(round(x.left, 2)) + ' - ' + str(round(x.right,2)))" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "features = data.drop('Temperature', axis = 1)\n", - "y = data['Temperature']" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=features, observations=y, favorable_value = 'high',\n", - " scoring = 'BerkJones', overpredicted=True, penalty=50, mode ='continuous', alpha = .4)\n", - "\n", - "unprivileged_subset = bias_scan(data=features, observations=y, favorable_value = 'high',\n", - " scoring = 'BerkJones', overpredicted=False, penalty=50, mode ='continuous', alpha = .4)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'Pressure': ['-0.0 - 1007.07', '1018.17 - 1020.0', '1020.0 - 1022.42', '1022.42 - 1026.61', '1026.61 - 1046.38'], 'Humidity': ['0.72 - 0.78', '0.78 - 0.83', '0.83 - 0.87', '0.87 - 0.92', '0.92 - 0.95', '0.95 - 1.0']}, 6907.8227)\n", - "({'Visibility': ['9.9 - 9.98', '9.98 - 10.05', '10.05 - 11.04', '11.04 - 11.45', '11.45 - 15.15', '15.15 - 15.83', '15.83 - 16.1'], 'PrecipType': ['rain'], 'Pressure': ['-0.0 - 1007.07', '1007.07 - 1010.68', '1010.68 - 1012.95', '1012.95 - 1014.8', '1014.8 - 1016.45', '1016.45 - 1018.17', '1018.17 - 1020.0', '1020.0 - 1022.42']}, 19962.4291)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 31607, we observe 5.155584909121934 as the mean temperature, but our model predicts 11.93267843751985'" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = data[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = data.loc[to_choose].copy()\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the mean temperature, but our model predicts {}\"\\\n", - ".format(len(temp_df), temp_df['Temperature'].mean(), y.mean())" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected unprivileged group has a size of 55642, we observe 16.773802762911078 as the mean temperature, but our model predicts 11.93267843751985'" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = data.loc[to_choose].copy()\n", - "\n", - "\"Our detected unprivileged group has a size of {}, we observe {} as the mean temperature, but our model predicts {}\"\\\n", - ".format(len(temp_df), temp_df['Temperature'].mean(), y.mean())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Iris Dataset\n", - "This is an nominal, multiclass classification use case where the favorable value is a flower specie and the scoring function is Bernoulli." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
\n", - "
" + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "X = df.drop('two_year_recid', axis = 1)\n", + "y = df['two_year_recid']\n", + "clf = LogisticRegression(solver='lbfgs', C=1.0, penalty='l2')\n", + "clf.fit(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xg10wgXrChiQ" + }, + "source": [ + "Note that the probability scores we use are the probabilities of the favorable label, which is 0 in this case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6DftyJMwChiQ" + }, + "outputs": [], + "source": [ + "probs = pd.Series(clf.predict_proba(X)[:,0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hg8G2sw1ChiR" + }, + "source": [ + "### bias scan\n", + "We can scan for a privileged and unprivileged subset using bias scan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s5DAn7t9ChiR" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=X,observations=y,expectations=probs,favorable_value=0, overpredicted=True)\n", + "unprivileged_subset = bias_scan(data=X,observations=y,expectations=probs,favorable_value=0,overpredicted=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XEcSO-DWChiR", + "outputId": "2c0ed17b-0ef5-49e6-b06a-e8ee63c1806e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'age_cat': [1.0], 'priors_count': [0.0, 1.0, 2.0], 'sex': [1.0], 'race': [1.0], 'c_charge_degree': [0.0]}, 7.9086)\n", + "({'race': [0.0], 'age_cat': [1.0, 2.0], 'priors_count': [1.0], 'c_charge_degree': [0.0, 1.0]}, 7.0227)\n" + ] + } ], - "text/plain": [ - " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n", - "0 5.1 3.5 1.4 0.2 Iris-setosa\n", - "1 4.9 3.0 1.4 0.2 Iris-setosa\n", - "2 4.7 3.2 1.3 0.2 Iris-setosa\n", - "3 4.6 3.1 1.5 0.2 Iris-setosa\n", - "4 5.0 3.6 1.4 0.2 Iris-setosa" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "iris_data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/Iris.csv').drop('Id', axis = 1)\n", - "iris_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "for col in iris_data.columns:\n", - " if col != 'Species':\n", - " iris_data[col] = pd.qcut(iris_data[col], 10, duplicates='drop')\n", - " iris_data[col] = iris_data[col].apply(lambda x: str(round(x.left, 2)) + ' - ' + str(round(x.right,2)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " Training simple model on data" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "X = iris_data.drop('Species', axis = 1)\n", - "for col in X.columns:\n", - " X[col] = X[col].cat.codes\n", - "\n", - "y = iris_data['Species']" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "clf_2 = LogisticRegression(C=1e-3)\n", - "clf_2.fit(X, y)\n", - "iris_data['Prediction'] = clf_2.predict(X)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "features = iris_data.drop(['Species','Prediction'], axis = 1)\n", - "expectations = pd.DataFrame(clf_2.predict_proba(X), columns=clf_2.classes_)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Bias scan" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=features, observations=y, expectations=expectations, scoring = 'Bernoulli', \n", - " favorable_value = 'Iris-virginica', overpredicted=True, penalty=.05, mode ='nominal')\n", - "unprivileged_subset = bias_scan(data=features, observations=y, expectations=expectations, scoring = 'Bernoulli', \n", - " favorable_value = 'Iris-virginica', overpredicted=False, penalty=.005, mode ='nominal')" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'PetalLengthCm': ['1.0 - 1.4', '1.4 - 1.5', '1.5 - 1.7', '1.7 - 3.9', '3.9 - 4.35', '4.35 - 4.64'], 'PetalWidthCm': ['0.1 - 0.2', '0.2 - 0.4', '0.4 - 1.16', '1.16 - 1.3', '1.3 - 1.5']}, 20.0508)\n", - "({'SepalLengthCm': ['4.8 - 5.0', '5.6 - 5.8', '6.1 - 6.3', '6.3 - 6.52', '6.52 - 6.9', '6.9 - 7.9'], 'PetalWidthCm': ['1.5 - 1.8', '1.8 - 1.9', '1.9 - 2.2', '2.2 - 2.5'], 'PetalLengthCm': ['4.35 - 4.64', '5.0 - 5.32', '5.32 - 5.8', '5.8 - 6.9']}, 22.101)\n" - ] - } - ], - "source": [ - "print(privileged_subset)\n", - "print(unprivileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 88, we observe 0 as the count of Iris-virginica, but our model predicts 50'" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = iris_data[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = iris_data.loc[to_choose].copy()\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the count of Iris-virginica, but our model predicts {}\"\\\n", - ".format(len(temp_df), (temp_df['Species'] == 'Iris-virginica').sum(), (temp_df['Prediction'] == 'Iris-setosa').sum())" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected unprivileged group has a size of 39, we observe 39 as the count of Iris-virginica, but our model predicts 38'" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "to_choose = iris_data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "temp_df = iris_data.loc[to_choose].copy()\n", - "\n", - "\"Our detected unprivileged group has a size of {}, we observe {} as the count of Iris-virginica, but our model predicts {}\"\\\n", - ".format(len(temp_df), (temp_df['Species'] == 'Iris-virginica').sum(), (temp_df['Prediction'] == 'Iris-virginica').sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assuming we want to scan for the second most privileged group, we can remove the records that belongs to the most privileged_subset and then rescan." - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "to_choose = iris_data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", - "X_filtered = iris_data[~to_choose]\n", - "y_filtered = y[~to_choose]" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "privileged_subset = bias_scan(data=X_filtered.drop(['Species','Prediction'], axis = 1), observations=y_filtered, \n", - " favorable_value = 'Iris-virginica', scoring = 'Bernoulli', overpredicted=True, penalty=1e-6, mode = 'nominal')" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "({'PetalLengthCm': ['1.0 - 1.4', '1.4 - 1.5', '1.5 - 1.7', '1.7 - 3.9', '3.9 - 4.35', '4.35 - 4.64']}, 36.0207)\n" - ] + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F-0Po3Z0ChiR" + }, + "outputs": [], + "source": [ + "dff = X.copy()\n", + "dff['observed'] = y\n", + "dff['probabilities'] = 1 - probs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BiAQLZTyChiR" + }, + "outputs": [], + "source": [ + "to_choose = dff[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = dff.loc[to_choose]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sSKlQJ5GChiR", + "outputId": "a35f78f8-8590-46be-d8e5-7d2326a8c125" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.38278159716895366'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"Our detected priviledged group has a size of {}, we observe {} as the average risk of recidivism, but our model predicts {}\"\\\n", + ".format(len(temp_df), temp_df['observed'].mean(), temp_df['probabilities'].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mXzojdkmChiS" + }, + "outputs": [], + "source": [ + "to_choose = dff[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = dff.loc[to_choose]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GYXXRRboChiS", + "outputId": "b2203dbe-33b6-41a8-da39-005684236ac7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.4447038821779929'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"Our detected priviledged group has a size of {}, we observe {} as the average risk of recidivism, but our model predicts {}\"\\\n", + ".format(len(temp_df), temp_df['observed'].mean(), temp_df['probabilities'].mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SzLEtrNVChiS" + }, + "source": [ + "# Adult Dataset\n", + "This is a binary classification use case where the favorable label is 1 and the scoring function is the berk jones." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-keubU1FChiS", + "outputId": "2e83deac-8a79-4c74-e4bf-f2b0663345c6" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
workclasseducationmarital_statusoccupationrelationshipracesexnative_countryage_bineducation_num_binhours_per_week_bincapital_gain_bincapital_loss_binobservedexpectation
0Private11thNever-marriedMachine-op-inspctOwn-childBlackMaleUnited-States17-271-840-440000.236226
1PrivateHS-gradMarried-civ-spouseFarming-fishingHusbandWhiteMaleUnited-States37-47945-990000.236226
2Local-govAssoc-acdmMarried-civ-spouseProtective-servHusbandWhiteMaleUnited-States28-3612-1640-440010.236226
3PrivateSome-collegeMarried-civ-spouseMachine-op-inspctHusbandBlackMaleUnited-States37-4710-1140-447298-7978010.236226
4?Some-collegeNever-married?Own-childWhiteFemaleUnited-States17-2710-111-390000.236226
\n", + "
" + ], + "text/plain": [ + " workclass education marital_status occupation \\\n", + "0 Private 11th Never-married Machine-op-inspct \n", + "1 Private HS-grad Married-civ-spouse Farming-fishing \n", + "2 Local-gov Assoc-acdm Married-civ-spouse Protective-serv \n", + "3 Private Some-college Married-civ-spouse Machine-op-inspct \n", + "4 ? Some-college Never-married ? \n", + "\n", + " relationship race sex native_country age_bin education_num_bin \\\n", + "0 Own-child Black Male United-States 17-27 1-8 \n", + "1 Husband White Male United-States 37-47 9 \n", + "2 Husband White Male United-States 28-36 12-16 \n", + "3 Husband Black Male United-States 37-47 10-11 \n", + "4 Own-child White Female United-States 17-27 10-11 \n", + "\n", + " hours_per_week_bin capital_gain_bin capital_loss_bin observed expectation \n", + "0 40-44 0 0 0 0.236226 \n", + "1 45-99 0 0 0 0.236226 \n", + "2 40-44 0 0 1 0.236226 \n", + "3 40-44 7298-7978 0 1 0.236226 \n", + "4 1-39 0 0 0 0.236226 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('https://gist.githubusercontent.com/Viktour19/b690679802c431646d36f7e2dd117b9e/raw/d8f17bf25664bd2d9fa010750b9e451c4155dd61/adult_autostrat.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J7Cgj8FqChiS" + }, + "source": [ + "Note that for the adult dataset, the positive label is 1 and thus the expectations provided is the probability of the earning >50k i.e label 1 and the favorable label is 1 which is the default for binary classification tasks. Since we would be using scoring function BerkJones, we also need to pass in an alpha value. Alpha can be interpreted as what proportion of the data you expect to have the favorable value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O1SjEJe5ChiS" + }, + "outputs": [], + "source": [ + "X = data.drop(['observed','expectation'], axis = 1)\n", + "probs = data['expectation']\n", + "y = data['observed']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QN7OywqMChiS" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=X, observations=y, scoring='BerkJones', expectations=probs, overpredicted=True,penalty=50, alpha = .24)\n", + "unprivileged_subset = bias_scan(data=X,observations=y, scoring='BerkJones', expectations=probs, overpredicted=False,penalty=50, alpha = .24)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w_iAu1aYChiT", + "outputId": "8fd30d41-0357-4396-d2c6-0e1c537c5a18" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'relationship': [' Not-in-family', ' Other-relative', ' Own-child', ' Unmarried'], 'capital_gain_bin': ['0']}, 932.4812)\n", + "({'education_num_bin': ['12-16'], 'marital_status': [' Married-civ-spouse']}, 1041.1901)\n" + ] + } + ], + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "peWzovx9ChiT" + }, + "outputs": [], + "source": [ + "dff = X.copy()\n", + "dff['observed'] = y\n", + "dff['probabilities'] = probs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EpY65LofChiT", + "outputId": "de4642f3-3ffb-4076-dc72-bbdc3a400c6f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 8532, we observe 0.0472 as the average probability of earning >50k, but our model predicts 0.2362'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = dff[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = dff.loc[to_choose]\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the average probability of earning >50k, but our model predicts {}\"\\\n", + ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['probabilities'].mean(),4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PwgDaz0pChiT", + "outputId": "0a5f3580-803e-4bde-edbd-de3d94de2d65" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected unprivileged group has a size of 2430, we observe 0.6996 as the average probability of earning >50k, but our model predicts 0.2362'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = dff[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = dff.loc[to_choose]\n", + "\n", + "\"Our detected unprivileged group has a size of {}, we observe {} as the average probability of earning >50k, but our model predicts {}\"\\\n", + ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['probabilities'].mean(),4))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LXTW6plLChiT" + }, + "source": [ + "# Insurance Costs\n", + "This is a regression use case where the favorable value is 0 and the scoring function is Gaussian." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZjCIbdjHChiT", + "outputId": "152e0c16-7036-405f-b3e5-b1b6e544bba7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1338, 7)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/insurance.csv')\n", + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xvUrgNm0ChiU" + }, + "outputs": [], + "source": [ + "for col in ['bmi','age']:\n", + " data[col] = pd.qcut(data[col], 10, duplicates='drop')\n", + " data[col] = data[col].apply(lambda x: str(round(x.left, 2)) + ' - ' + str(round(x.right,2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UUG30SyKChiU" + }, + "outputs": [], + "source": [ + "features = data.drop('charges', axis = 1)\n", + "X = features.copy()\n", + "\n", + "for feature in X.columns:\n", + " X[feature] = X[feature].astype('category').cat.codes\n", + "\n", + "y = data['charges']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yHnX7-OFChiU" + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "reg = LinearRegression()\n", + "reg.fit(X, y)\n", + "y_pred = pd.Series(reg.predict(X))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lOfpbRoOChiU" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=features, observations=y, expectations=y_pred, scoring = 'Gaussian',\n", + " overpredicted=True, penalty=1e10, mode ='continuous', favorable_value='low')\n", + "\n", + "unprivileged_subset = bias_scan(data=features, observations=y, expectations=y_pred, scoring = 'Gaussian',\n", + " overpredicted=False, penalty=1e10, mode ='continuous', favorable_value='low')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "l33QGsytChiU", + "outputId": "3a17f919-fffa-404d-b0bd-ca50a06050bf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'bmi': ['15.96 - 22.99', '22.99 - 25.33', '25.33 - 27.36'], 'smoker': ['no']}, 2384.5786)\n", + "({'bmi': ['15.96 - 22.99', '22.99 - 25.33', '25.33 - 27.36', '27.36 - 28.8'], 'smoker': ['yes']}, 3927.8765)\n" + ] + } + ], + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FWQXeaXCChiU", + "outputId": "62a22a5b-6801-4e52-b163-a9662aba90aa" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 321, we observe 7844.8402958566985 as the mean insurance costs, but our model predicts 5420.493262774548'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = data[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = data.loc[to_choose].copy()\n", + "temp_y = y_pred.loc[to_choose].copy()\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the mean insurance costs, but our model predicts {}\"\\\n", + ".format(len(temp_df), temp_df['charges'].mean(), temp_y.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tZTmBX6AChia", + "outputId": "00d084db-6be3-443a-c88d-3be5bd0aa93a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 115, we observe 21148.373896173915 as the mean insurance costs, but our model predicts 29694.035319112845'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = data.loc[to_choose].copy()\n", + "temp_y = y_pred.loc[to_choose].copy()\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the mean insurance costs, but our model predicts {}\"\\\n", + ".format(len(temp_df), temp_df['charges'].mean(), temp_y.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V-eFnnGoChib" + }, + "source": [ + "# Hospitalization Time\n", + "This is an ordinal, multiclass classification use case where the favorable value is 1 and the scoring function is Poisson." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CQhDUxmLChib", + "outputId": "5667e6b9-48e1-4e6c-ba3f-ee1e51e6dd59" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(29980, 22)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/hospital.csv')\n", + "data = data[data['Length of Stay'] != '120 +'].fillna('Unknown')\n", + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y8f8KIbBChib" + }, + "outputs": [], + "source": [ + "X = data.drop(['Length of Stay'], axis = 1)\n", + "y = pd.to_numeric(data['Length of Stay'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GujHJ6dxChib" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=X, observations=y, scoring = 'Poisson', favorable_value = 'low', overpredicted=True, penalty=50, mode ='ordinal')\n", + "unprivileged_subset = bias_scan(data=X, observations=y, scoring = 'Poisson', favorable_value = 'low', overpredicted=False, penalty=50, mode ='ordinal')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ijEm55FYChib", + "outputId": "c59cf22a-8989-45b5-d7fa-90d17cfbf571" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'APR Severity of Illness Description': ['Extreme']}, 11180.5386)\n", + "({'Patient Disposition': ['Home or Self Care', 'Left Against Medical Advice', 'Short-term Hospital'], 'APR Severity of Illness Description': ['Minor', 'Moderate'], 'APR MDC Code': [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 21]}, 9950.881)\n" + ] + } + ], + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sW0U5lJSChib" + }, + "outputs": [], + "source": [ + "dff = X.copy()\n", + "dff['observed'] = y\n", + "dff['predicted'] = y.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mCE1XmynChib", + "outputId": "855049ca-dca0-43a9-ccf0-cb878d45626a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 1900, we observe 15.2216 as the average number of days spent in the hospital, but our model predicts 5.4231'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = dff[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = dff.loc[to_choose]\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the average number of days spent in the hospital, but our model predicts {}\"\\\n", + ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['predicted'].mean(),4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6_sJJ7ytChic", + "outputId": "7b94ce1f-71f5-4e20-eb64-f5a322aec649" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected unprivileged group has a size of 14620, we observe 2.8301 as the average number of days spent in the hospital, but our model predicts 5.4231'" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = dff[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = dff.loc[to_choose]\n", + "\n", + "\"Our detected unprivileged group has a size of {}, we observe {} as the average number of days spent in the hospital, but our model predicts {}\"\\\n", + ".format(len(temp_df), np.round(temp_df['observed'].mean(),4), np.round(temp_df['predicted'].mean(),4))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OurrfXisChic" + }, + "source": [ + "# Temperature Dataset\n", + "This is a regression use case where the favorable value is the higher temperatures and the scoring function is Berk Jones." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YijsudnQChic", + "outputId": "13ce0ef1-e6a5-4a9a-f550-537e7c0d7f15" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SummaryPrecipTypeHumidityWindSpeedVisibilityPressureDailySummaryTemperature
0Partly Cloudyrain0.8914.119715.82631015.13Partly cloudy throughout the day.9.472222
1Partly Cloudyrain0.8614.264615.82631015.63Partly cloudy throughout the day.9.355556
2Mostly Cloudyrain0.893.928414.95691015.94Partly cloudy throughout the day.9.377778
3Partly Cloudyrain0.8314.103615.82631016.41Partly cloudy throughout the day.8.288889
4Mostly Cloudyrain0.8311.044615.82631016.51Partly cloudy throughout the day.8.755556
\n", + "
" + ], + "text/plain": [ + " Summary PrecipType Humidity WindSpeed Visibility Pressure \\\n", + "0 Partly Cloudy rain 0.89 14.1197 15.8263 1015.13 \n", + "1 Partly Cloudy rain 0.86 14.2646 15.8263 1015.63 \n", + "2 Mostly Cloudy rain 0.89 3.9284 14.9569 1015.94 \n", + "3 Partly Cloudy rain 0.83 14.1036 15.8263 1016.41 \n", + "4 Mostly Cloudy rain 0.83 11.0446 15.8263 1016.51 \n", + "\n", + " DailySummary Temperature \n", + "0 Partly cloudy throughout the day. 9.472222 \n", + "1 Partly cloudy throughout the day. 9.355556 \n", + "2 Partly cloudy throughout the day. 9.377778 \n", + "3 Partly cloudy throughout the day. 8.288889 \n", + "4 Partly cloudy throughout the day. 8.755556 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/weatherHistory.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nWsgeaAoChic" + }, + "source": [ + "Binning the continuous features since bias scan support only categorical features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yBU4ApF_Chic" + }, + "outputs": [], + "source": [ + "for col in ['Humidity','WindSpeed','Visibility','Pressure']:\n", + " data[col] = pd.qcut(data[col], 10, duplicates='drop')\n", + " data[col] = data[col].apply(lambda x: str(round(x.left, 2)) + ' - ' + str(round(x.right,2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L9ZDBb05Chic" + }, + "outputs": [], + "source": [ + "features = data.drop('Temperature', axis = 1)\n", + "y = data['Temperature']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OErDGbP7Chic" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=features, observations=y, favorable_value = 'high',\n", + " scoring = 'BerkJones', overpredicted=True, penalty=50, mode ='continuous', alpha = .4)\n", + "\n", + "unprivileged_subset = bias_scan(data=features, observations=y, favorable_value = 'high',\n", + " scoring = 'BerkJones', overpredicted=False, penalty=50, mode ='continuous', alpha = .4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qHcrvsRrChid", + "outputId": "4e8e8b4c-2b39-4aed-a497-aaa71a068305" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'Pressure': ['-0.0 - 1007.07', '1018.17 - 1020.0', '1020.0 - 1022.42', '1022.42 - 1026.61', '1026.61 - 1046.38'], 'Humidity': ['0.72 - 0.78', '0.78 - 0.83', '0.83 - 0.87', '0.87 - 0.92', '0.92 - 0.95', '0.95 - 1.0']}, 6907.8227)\n", + "({'Visibility': ['9.9 - 9.98', '9.98 - 10.05', '10.05 - 11.04', '11.04 - 11.45', '11.45 - 15.15', '15.15 - 15.83', '15.83 - 16.1'], 'PrecipType': ['rain'], 'Pressure': ['-0.0 - 1007.07', '1007.07 - 1010.68', '1010.68 - 1012.95', '1012.95 - 1014.8', '1014.8 - 1016.45', '1016.45 - 1018.17', '1018.17 - 1020.0', '1020.0 - 1022.42']}, 19962.4291)\n" + ] + } + ], + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qSXQ5YCrChid", + "outputId": "8e120487-1088-4c20-fdd0-445e3a6c87ff" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 31607, we observe 5.155584909121934 as the mean temperature, but our model predicts 11.93267843751985'" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = data[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = data.loc[to_choose].copy()\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the mean temperature, but our model predicts {}\"\\\n", + ".format(len(temp_df), temp_df['Temperature'].mean(), y.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nEVOD0hRChid", + "outputId": "7aa5f93c-1b13-4cdf-d280-599caf98f4a1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected unprivileged group has a size of 55642, we observe 16.773802762911078 as the mean temperature, but our model predicts 11.93267843751985'" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = data.loc[to_choose].copy()\n", + "\n", + "\"Our detected unprivileged group has a size of {}, we observe {} as the mean temperature, but our model predicts {}\"\\\n", + ".format(len(temp_df), temp_df['Temperature'].mean(), y.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K5gEwJeeChid" + }, + "source": [ + "# Iris Dataset\n", + "This is an nominal, multiclass classification use case where the favorable value is a flower specie and the scoring function is Bernoulli." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2r6LHOaEChid", + "outputId": "a3de6e37-70a5-4d5e-c963-a73542660785" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
\n", + "
" + ], + "text/plain": [ + " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_data = pd.read_csv('https://raw.githubusercontent.com/Adebayo-Oshingbesan/data/main/Iris.csv').drop('Id', axis = 1)\n", + "iris_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LcrAOo8_Chid" + }, + "outputs": [], + "source": [ + "for col in iris_data.columns:\n", + " if col != 'Species':\n", + " iris_data[col] = pd.qcut(iris_data[col], 10, duplicates='drop')\n", + " iris_data[col] = iris_data[col].apply(lambda x: str(round(x.left, 2)) + ' - ' + str(round(x.right,2)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-YMP9Vn4Chie" + }, + "source": [ + " Training simple model on data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UAmGe08iChie" + }, + "outputs": [], + "source": [ + "X = iris_data.drop('Species', axis = 1)\n", + "for col in X.columns:\n", + " X[col] = X[col].cat.codes\n", + "\n", + "y = iris_data['Species']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KwiBCBS2Chie" + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "clf_2 = LogisticRegression(C=1e-3)\n", + "clf_2.fit(X, y)\n", + "iris_data['Prediction'] = clf_2.predict(X)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xmILWHzJChie" + }, + "outputs": [], + "source": [ + "features = iris_data.drop(['Species','Prediction'], axis = 1)\n", + "expectations = pd.DataFrame(clf_2.predict_proba(X), columns=clf_2.classes_)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4PFjV1M5Chie" + }, + "source": [ + "Bias scan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6aNKTPWnChie" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=features, observations=y, expectations=expectations, scoring = 'Bernoulli',\n", + " favorable_value = 'Iris-virginica', overpredicted=True, penalty=.05, mode ='nominal')\n", + "unprivileged_subset = bias_scan(data=features, observations=y, expectations=expectations, scoring = 'Bernoulli',\n", + " favorable_value = 'Iris-virginica', overpredicted=False, penalty=.005, mode ='nominal')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qsS4-RavChie", + "outputId": "753bbe0d-ea3b-42bd-a0e0-ff0b16f6f097" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'PetalLengthCm': ['1.0 - 1.4', '1.4 - 1.5', '1.5 - 1.7', '1.7 - 3.9', '3.9 - 4.35', '4.35 - 4.64'], 'PetalWidthCm': ['0.1 - 0.2', '0.2 - 0.4', '0.4 - 1.16', '1.16 - 1.3', '1.3 - 1.5']}, 20.0508)\n", + "({'SepalLengthCm': ['4.8 - 5.0', '5.6 - 5.8', '6.1 - 6.3', '6.3 - 6.52', '6.52 - 6.9', '6.9 - 7.9'], 'PetalWidthCm': ['1.5 - 1.8', '1.8 - 1.9', '1.9 - 2.2', '2.2 - 2.5'], 'PetalLengthCm': ['4.35 - 4.64', '5.0 - 5.32', '5.32 - 5.8', '5.8 - 6.9']}, 22.101)\n" + ] + } + ], + "source": [ + "print(privileged_subset)\n", + "print(unprivileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bnaSm1EZChie", + "outputId": "5dfa7e86-309a-44fa-c49d-b3a9b0044d65" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 88, we observe 0 as the count of Iris-virginica, but our model predicts 50'" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = iris_data[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = iris_data.loc[to_choose].copy()\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the count of Iris-virginica, but our model predicts {}\"\\\n", + ".format(len(temp_df), (temp_df['Species'] == 'Iris-virginica').sum(), (temp_df['Prediction'] == 'Iris-setosa').sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9h6-a6P0Chif", + "outputId": "99ab2c4c-f3a9-45df-8be9-4f32c58fcaf2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected unprivileged group has a size of 39, we observe 39 as the count of Iris-virginica, but our model predicts 38'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = iris_data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "temp_df = iris_data.loc[to_choose].copy()\n", + "\n", + "\"Our detected unprivileged group has a size of {}, we observe {} as the count of Iris-virginica, but our model predicts {}\"\\\n", + ".format(len(temp_df), (temp_df['Species'] == 'Iris-virginica').sum(), (temp_df['Prediction'] == 'Iris-virginica').sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MZttznF3Chif" + }, + "source": [ + "Assuming we want to scan for the second most privileged group, we can remove the records that belongs to the most privileged_subset and then rescan." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EiDMjpAUChif" + }, + "outputs": [], + "source": [ + "to_choose = iris_data[unprivileged_subset[0].keys()].isin(unprivileged_subset[0]).all(axis=1)\n", + "X_filtered = iris_data[~to_choose]\n", + "y_filtered = y[~to_choose]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FGRZ11QAChif" + }, + "outputs": [], + "source": [ + "privileged_subset = bias_scan(data=X_filtered.drop(['Species','Prediction'], axis = 1), observations=y_filtered,\n", + " favorable_value = 'Iris-virginica', scoring = 'Bernoulli', overpredicted=True, penalty=1e-6, mode = 'nominal')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "45vgxTdEChif", + "outputId": "40ba98f2-a25a-4065-abff-91ca794acfe3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "({'PetalLengthCm': ['1.0 - 1.4', '1.4 - 1.5', '1.5 - 1.7', '1.7 - 3.9', '3.9 - 4.35', '4.35 - 4.64']}, 36.0207)\n" + ] + } + ], + "source": [ + "print(privileged_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7rcDITN6Chif", + "outputId": "e1fedb40-179f-46b2-bcc9-639743db19c7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Our detected privileged group has a size of 89, we observe 0 as the count of Iris-virginica, but our model predicts 4'" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_choose = X_filtered[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", + "temp_df = X_filtered.loc[to_choose]\n", + "\n", + "\"Our detected privileged group has a size of {}, we observe {} as the count of Iris-virginica, but our model predicts {}\"\\\n", + ".format(len(temp_df), (temp_df['Species'] == 'Iris-virginica').sum(), (temp_df['Prediction'] == 'Iris-virginica').sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7eR5OiuoChif" + }, + "source": [ + "In summary, this notebook explains how to use the new mdss bias scan interface in aif360.detectors to scan for bias, even for tasks beyond binary classification, using the concepts of over-predictions and under-predictions." + ] } - ], - "source": [ - "print(privileged_subset)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Our detected privileged group has a size of 89, we observe 0 as the count of Iris-virginica, but our model predicts 4'" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.7 ('aif360')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd" + } + }, + "colab": { + "provenance": [] } - ], - "source": [ - "to_choose = X_filtered[privileged_subset[0].keys()].isin(privileged_subset[0]).all(axis=1)\n", - "temp_df = X_filtered.loc[to_choose]\n", - "\n", - "\"Our detected privileged group has a size of {}, we observe {} as the count of Iris-virginica, but our model predicts {}\"\\\n", - ".format(len(temp_df), (temp_df['Species'] == 'Iris-virginica').sum(), (temp_df['Prediction'] == 'Iris-virginica').sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In summary, this notebook explains how to use the new mdss bias scan interface in aif360.detectors to scan for bias, even for tasks beyond binary classification, using the concepts of over-predictions and under-predictions." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.7 ('aif360')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" }, - "vscode": { - "interpreter": { - "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_meta_classifier.ipynb b/examples/demo_meta_classifier.ipynb index 5b62ef56..22709920 100644 --- a/examples/demo_meta_classifier.ipynb +++ b/examples/demo_meta_classifier.ipynb @@ -1,467 +1,528 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "scrolled": true - }, - "source": [ - "# Meta-Algorithm for fair classification.\n", - "The fairness metrics to be optimized have to specified as \"input\". Currently we can handle the following fairness metrics:\n", - "Statistical Rate, False Positive Rate, True Positive Rate, False Negative Rate, True Negative Rate,\n", - "Accuracy Rate, False Discovery Rate, False Omission Rate, Positive Predictive Rate, Negative Predictive Rate.\n", - "\n", - "-----------------------------\n", - "\n", - "The example below considers the cases of False Discovery Parity and Statistical Rate (disparate impact).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Markdown, display\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from sklearn.preprocessing import MaxAbsScaler\n", - "from tqdm import tqdm\n", - "\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from aif360.metrics import ClassificationMetric\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", - "from aif360.algorithms.inprocessing import MetaFairClassifier\n", - "\n", - "np.random.seed(12345)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Original Training dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig = load_preproc_data_adult()\n", - "\n", - "privileged_groups = [{'sex': 1}]\n", - "unprivileged_groups = [{'sex': 0}]\n", - "\n", - "dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "min_max_scaler = MaxAbsScaler()\n", - "dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)\n", - "dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ + "cells": [ { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Training Dataset shape" - }, - "metadata": {} + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_meta_classifier.ipynb)" + ], + "metadata": { + "id": "sjDIfgLTC20Y" + } }, { - "output_type": "stream", - "name": "stdout", - "text": "(34189, 18)\n" + "cell_type": "markdown", + "metadata": { + "scrolled": true, + "id": "h5NsmisDC1uI" + }, + "source": [ + "# Meta-Algorithm for fair classification.\n", + "The fairness metrics to be optimized have to specified as \"input\". Currently we can handle the following fairness metrics:\n", + "Statistical Rate, False Positive Rate, True Positive Rate, False Negative Rate, True Negative Rate,\n", + "Accuracy Rate, False Discovery Rate, False Omission Rate, Positive Predictive Rate, Negative Predictive Rate.\n", + "\n", + "-----------------------------\n", + "\n", + "The example below considers the cases of False Discovery Parity and Statistical Rate (disparate impact).\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Favorable and unfavorable labels" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MjbL8vrIC1uL" + }, + "outputs": [], + "source": [ + "from IPython.display import Markdown, display\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn.preprocessing import MaxAbsScaler\n", + "from tqdm import tqdm\n", + "\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from aif360.metrics import ClassificationMetric\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", + "from aif360.algorithms.inprocessing import MetaFairClassifier\n", + "\n", + "np.random.seed(12345)" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "1.0 0.0\n" + "cell_type": "markdown", + "metadata": { + "id": "XCVx_M0PC1uM" + }, + "source": [ + "## Original Training dataset" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Protected attribute names" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xYJhjuEqC1uM" + }, + "outputs": [], + "source": [ + "dataset_orig = load_preproc_data_adult()\n", + "\n", + "privileged_groups = [{'sex': 1}]\n", + "unprivileged_groups = [{'sex': 0}]\n", + "\n", + "dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['sex', 'race']\n" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8W4CWxEvC1uM" + }, + "outputs": [], + "source": [ + "min_max_scaler = MaxAbsScaler()\n", + "dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)\n", + "dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Privileged and unprivileged protected attribute values" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [], + "id": "jA7WAHWsC1uN", + "outputId": "5dde451b-0d61-4123-d0f8-af0d27aea497" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Training Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "(34189, 18)\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Favorable and unfavorable labels" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "1.0 0.0\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Protected attribute names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['sex', 'race']\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Privileged and unprivileged protected attribute values" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "[array([1.]), array([1.])] [array([0.]), array([0.])]\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Dataset feature names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" + } + ], + "source": [ + "display(Markdown(\"#### Training Dataset shape\"))\n", + "print(dataset_orig_train.features.shape)\n", + "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", + "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", + "display(Markdown(\"#### Protected attribute names\"))\n", + "print(dataset_orig_train.protected_attribute_names)\n", + "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", + "print(dataset_orig_train.privileged_protected_attributes,\n", + " dataset_orig_train.unprivileged_protected_attributes)\n", + "display(Markdown(\"#### Dataset feature names\"))\n", + "print(dataset_orig_train.feature_names)" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "[array([1.]), array([1.])] [array([0.]), array([0.])]\n" + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "aFuh5g4pC1uO", + "outputId": "57906c3d-53bc-4e5c-a91a-1ca9805e045f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.193\nTest set: Difference in mean outcomes between unprivileged and privileged groups = -0.199\n" + } + ], + "source": [ + "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = {:.3f}\".format(metric_orig_train.mean_difference()))\n", + "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = {:.3f}\".format(metric_orig_test.mean_difference()))" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Dataset feature names" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "YtI7xAQpC1uO" + }, + "source": [ + "## Algorithm without debiasing\n", + "\n", + "Get classifier without fairness constraints" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" - } - ], - "source": [ - "display(Markdown(\"#### Training Dataset shape\"))\n", - "print(dataset_orig_train.features.shape)\n", - "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", - "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", - "display(Markdown(\"#### Protected attribute names\"))\n", - "print(dataset_orig_train.protected_attribute_names)\n", - "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", - "print(dataset_orig_train.privileged_protected_attributes, \n", - " dataset_orig_train.unprivileged_protected_attributes)\n", - "display(Markdown(\"#### Dataset feature names\"))\n", - "print(dataset_orig_train.feature_names)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false, + "id": "Ps2Sawo6C1uO" + }, + "outputs": [], + "source": [ + "biased_model = MetaFairClassifier(tau=0, sensitive_attr=\"sex\", type=\"fdr\").fit(dataset_orig_train)" + ] + }, { - "output_type": "stream", - "name": "stdout", - "text": "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.193\nTest set: Difference in mean outcomes between unprivileged and privileged groups = -0.199\n" - } - ], - "source": [ - "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = {:.3f}\".format(metric_orig_train.mean_difference()))\n", - "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = {:.3f}\".format(metric_orig_test.mean_difference()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Algorithm without debiasing\n", - "\n", - "Get classifier without fairness constraints" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "biased_model = MetaFairClassifier(tau=0, sensitive_attr=\"sex\", type=\"fdr\").fit(dataset_orig_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Apply the unconstrained model to test data" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "dataset_bias_test = biased_model.predict(dataset_orig_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "G44H-9PpC1uP" + }, + "source": [ + "Apply the unconstrained model to test data" + ] + }, { - "output_type": "stream", - "name": "stdout", - "text": "Test set: Classification accuracy = 0.787\nTest set: Balanced classification accuracy = 0.619\nTest set: Disparate impact = 0.433\nTest set: False discovery rate ratio = 0.492\n" - } - ], - "source": [ - "classified_metric_bias_test = ClassificationMetric(dataset_orig_test, dataset_bias_test,\n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "print(\"Test set: Classification accuracy = {:.3f}\".format(classified_metric_bias_test.accuracy()))\n", - "TPR = classified_metric_bias_test.true_positive_rate()\n", - "TNR = classified_metric_bias_test.true_negative_rate()\n", - "bal_acc_bias_test = 0.5*(TPR+TNR)\n", - "print(\"Test set: Balanced classification accuracy = {:.3f}\".format(bal_acc_bias_test))\n", - "print(\"Test set: Disparate impact = {:.3f}\".format(classified_metric_bias_test.disparate_impact()))\n", - "fdr = classified_metric_bias_test.false_discovery_rate_ratio()\n", - "fdr = min(fdr, 1/fdr)\n", - "print(\"Test set: False discovery rate ratio = {:.3f}\".format(fdr))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Debiasing with FDR objective\n", - "\n", - "Learn a debiased classifier" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "debiased_model = MetaFairClassifier(tau=0.7, sensitive_attr=\"sex\", type=\"fdr\").fit(dataset_orig_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Apply the debiased model to test data" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_debiasing_test = debiased_model.predict(dataset_orig_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Model - with debiasing - dataset metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "83DjcoK2C1uP" + }, + "outputs": [], + "source": [ + "dataset_bias_test = biased_model.predict(dataset_orig_test)" + ] + }, { - "output_type": "stream", - "name": "stdout", - "text": "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.126\n" - } - ], - "source": [ - "metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "\n", - "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = {:.3f}\".format(metric_dataset_debiasing_test.mean_difference()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Model - with debiasing - classification metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "_8386WfuC1uP", + "outputId": "2e62132e-b01f-433e-bf0b-d4a185e574a3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Test set: Classification accuracy = 0.787\nTest set: Balanced classification accuracy = 0.619\nTest set: Disparate impact = 0.433\nTest set: False discovery rate ratio = 0.492\n" + } + ], + "source": [ + "classified_metric_bias_test = ClassificationMetric(dataset_orig_test, dataset_bias_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "print(\"Test set: Classification accuracy = {:.3f}\".format(classified_metric_bias_test.accuracy()))\n", + "TPR = classified_metric_bias_test.true_positive_rate()\n", + "TNR = classified_metric_bias_test.true_negative_rate()\n", + "bal_acc_bias_test = 0.5*(TPR+TNR)\n", + "print(\"Test set: Balanced classification accuracy = {:.3f}\".format(bal_acc_bias_test))\n", + "print(\"Test set: Disparate impact = {:.3f}\".format(classified_metric_bias_test.disparate_impact()))\n", + "fdr = classified_metric_bias_test.false_discovery_rate_ratio()\n", + "fdr = min(fdr, 1/fdr)\n", + "print(\"Test set: False discovery rate ratio = {:.3f}\".format(fdr))" + ] + }, { - "output_type": "stream", - "name": "stdout", - "text": "Test set: Classification accuracy = 0.694\nTest set: Balanced classification accuracy = 0.712\nTest set: Disparate impact = 0.730\nTest set: False discovery rate ratio = 0.643\n" - } - ], - "source": [ - "classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test, \n", - " dataset_debiasing_test,\n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "print(\"Test set: Classification accuracy = {:.3f}\".format(classified_metric_debiasing_test.accuracy()))\n", - "TPR = classified_metric_debiasing_test.true_positive_rate()\n", - "TNR = classified_metric_debiasing_test.true_negative_rate()\n", - "bal_acc_debiasing_test = 0.5*(TPR+TNR)\n", - "print(\"Test set: Balanced classification accuracy = {:.3f}\".format(bal_acc_debiasing_test))\n", - "print(\"Test set: Disparate impact = {:.3f}\".format(classified_metric_debiasing_test.disparate_impact()))\n", - "fdr = classified_metric_debiasing_test.false_discovery_rate_ratio()\n", - "fdr = min(fdr, 1/fdr)\n", - "print(\"Test set: False discovery rate ratio = {:.3f}\".format(fdr))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "We see that the FDR ratio has increased meaning it is now closer to parity." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running the algorithm for different tau values" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "4Hk_4pw_C1uP" + }, + "source": [ + "## Debiasing with FDR objective\n", + "\n", + "Learn a debiased classifier" + ] + }, { - "output_type": "stream", - "name": "stderr", - "text": "100%|██████████| 10/10 [00:16<00:00, 1.65s/it]\n" - } - ], - "source": [ - "accuracies, statistical_rates = [], []\n", - "s_attr = \"race\"\n", - "\n", - "all_tau = np.linspace(0, 0.9, 10)\n", - "for tau in tqdm(all_tau):\n", - " debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=s_attr, type='sr')\n", - " debiased_model.fit(dataset_orig_train)\n", - "\n", - " dataset_debiasing_test = debiased_model.predict(dataset_orig_test)\n", - " metric = ClassificationMetric(dataset_orig_test, dataset_debiasing_test,\n", - " unprivileged_groups=[{s_attr: 0}],\n", - " privileged_groups=[{s_attr: 1}])\n", - "\n", - " accuracies.append(metric.accuracy())\n", - " sr = metric.disparate_impact()\n", - " statistical_rates.append(min(sr, 1/sr))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Output fairness is represented by $\\gamma_{sr}$, which is the disparate impact ratio of different sensitive attribute values." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fv8PFKShC1uQ" + }, + "outputs": [], + "source": [ + "debiased_model = MetaFairClassifier(tau=0.7, sensitive_attr=\"sex\", type=\"fdr\").fit(dataset_orig_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FS95cQ6tC1uQ" + }, + "source": [ + "Apply the debiased model to test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2jHJD1r7C1uQ" + }, + "outputs": [], + "source": [ + "dataset_debiasing_test = debiased_model.predict(dataset_orig_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zPDB86PeC1uQ" + }, + "source": [ + "### Model - with debiasing - dataset metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "m-TZSduuC1uQ", + "outputId": "b0e4303b-befd-47e8-e338-ca20755eea4f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.126\n" + } + ], + "source": [ + "metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "\n", + "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = {:.3f}\".format(metric_dataset_debiasing_test.mean_difference()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9dT_7mArC1uQ" + }, + "source": [ + "### Model - with debiasing - classification metrics" + ] + }, { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "33CRw9eGC1uR", + "outputId": "b5d9c3ea-cb2e-4d1b-f50b-3936705c3c3a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Test set: Classification accuracy = 0.694\nTest set: Balanced classification accuracy = 0.712\nTest set: Disparate impact = 0.730\nTest set: False discovery rate ratio = 0.643\n" + } + ], + "source": [ + "classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test,\n", + " dataset_debiasing_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "print(\"Test set: Classification accuracy = {:.3f}\".format(classified_metric_debiasing_test.accuracy()))\n", + "TPR = classified_metric_debiasing_test.true_positive_rate()\n", + "TNR = classified_metric_debiasing_test.true_negative_rate()\n", + "bal_acc_debiasing_test = 0.5*(TPR+TNR)\n", + "print(\"Test set: Balanced classification accuracy = {:.3f}\".format(bal_acc_debiasing_test))\n", + "print(\"Test set: Disparate impact = {:.3f}\".format(classified_metric_debiasing_test.disparate_impact()))\n", + "fdr = classified_metric_debiasing_test.false_discovery_rate_ratio()\n", + "fdr = min(fdr, 1/fdr)\n", + "print(\"Test set: False discovery rate ratio = {:.3f}\".format(fdr))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [], + "id": "0EWkP2m2C1uR" + }, + "source": [ + "We see that the FDR ratio has increased meaning it is now closer to parity." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fun9czlMC1uR" + }, + "source": [ + "## Running the algorithm for different tau values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "PrMarZ_kC1uR", + "outputId": "5c405344-4dd0-4f52-f115-f7ae9d474af4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": "100%|██████████| 10/10 [00:16<00:00, 1.65s/it]\n" + } + ], + "source": [ + "accuracies, statistical_rates = [], []\n", + "s_attr = \"race\"\n", + "\n", + "all_tau = np.linspace(0, 0.9, 10)\n", + "for tau in tqdm(all_tau):\n", + " debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=s_attr, type='sr')\n", + " debiased_model.fit(dataset_orig_train)\n", + "\n", + " dataset_debiasing_test = debiased_model.predict(dataset_orig_test)\n", + " metric = ClassificationMetric(dataset_orig_test, dataset_debiasing_test,\n", + " unprivileged_groups=[{s_attr: 0}],\n", + " privileged_groups=[{s_attr: 1}])\n", + "\n", + " accuracies.append(metric.accuracy())\n", + " sr = metric.disparate_impact()\n", + " statistical_rates.append(min(sr, 1/sr))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dWSiLvXEC1uS" + }, + "source": [ + "Output fairness is represented by $\\gamma_{sr}$, which is the disparate impact ratio of different sensitive attribute values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BX7YH5rnC1uS", + "outputId": "b831f896-cdea-4610-f803-d73b5f7b13dd" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "fig, ax1 = plt.subplots(figsize=(13,7))\n", + "ax1.plot(all_tau, accuracies, color='r')\n", + "ax1.set_title('Accuracy and $\\gamma_{sr}$ vs Tau', fontsize=16, fontweight='bold')\n", + "ax1.set_xlabel('Input Tau', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Accuracy', color='r', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(all_tau, statistical_rates, color='b')\n", + "ax2.set_ylabel('$\\gamma_{sr}$', color='b', fontsize=16, fontweight='bold')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HRHHvcuQC1uS" + }, + "source": [ + "References:\n", + "\n", + " Celis, L. E., Huang, L., Keswani, V., & Vishnoi, N. K. (2018).\n", + " \"Classification with Fairness Constraints: A Meta-Algorithm with Provable Guarantees.\"\"\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.6.9 64-bit", + "language": "python", + "name": "python_defaultSpec_1596663900877" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.15" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "fig, ax1 = plt.subplots(figsize=(13,7))\n", - "ax1.plot(all_tau, accuracies, color='r')\n", - "ax1.set_title('Accuracy and $\\gamma_{sr}$ vs Tau', fontsize=16, fontweight='bold')\n", - "ax1.set_xlabel('Input Tau', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Accuracy', color='r', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(all_tau, statistical_rates, color='b')\n", - "ax2.set_ylabel('$\\gamma_{sr}$', color='b', fontsize=16, fontweight='bold')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "References:\n", - "\n", - " Celis, L. E., Huang, L., Keswani, V., & Vishnoi, N. K. (2018). \n", - " \"Classification with Fairness Constraints: A Meta-Algorithm with Provable Guarantees.\"\"\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.6.9 64-bit", - "language": "python", - "name": "python_defaultSpec_1596663900877" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file diff --git a/examples/demo_ot_metric.ipynb b/examples/demo_ot_metric.ipynb index 9c8bc904..dd8217d8 100644 --- a/examples/demo_ot_metric.ipynb +++ b/examples/demo_ot_metric.ipynb @@ -1,1379 +1,1485 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Measuring bias with Optimal Transport by calculating the Wasserstein distance" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Table of contents\n", - "\n", - "- Introduction\n", - "- General Optimal Transport examples\n", - "- Usage\n", - "- Application to Compas Dataset\n", - "- Application to Adult Dataset\n", - "- More details\n", - " - OT for mapping estimation\n", - " - Kantorovich optimal transport problem\n", - " - Solving optimal transport\n", - " - Necessity and priority of usage" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References\n", - "\n", - "\"FlipTest: fairness testing via optimal transport\" https://dl.acm.org/doi/abs/10.1145/3351095.3372845\n", - "\n", - "\"Obtaining Fairness using Optimal Transport Theory\" http://proceedings.mlr.press/v97/gordaliza19a.html\n", - "\n", - "\"Computational Optimal Transport\" https://arxiv.org/abs/1803.00567\n", - "\n", - "\"POT: Python Optimal Transport\" https://jmlr.org/papers/v22/20-451.html" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Introduction\n", - "\n", - "Optimal Transport (OT) is a field of mathematics which studies the geometry of probability spaces. Among its many contributions, OT provides a principled way to compare and align probability distributions by taking into account the underlying geometry of the\n", - "considered metric space.\n", - "\n", - "Optimal Transport (OT) is a mathematical problem that was first introduced by Gaspard Monge in 1781. It addresses the task of determining the most efficient method for transporting mass from one distribution to another. In this problem, the cost associated with moving a unit of mass from one position to another is referred to as the ground cost. The primary objective of OT is to minimize the total cost incurred when moving one mass distribution onto another. The optimization problem can be expressed for two distributions $\\mu_s$ and $\\mu_t$ as\n", - "\n", - "$$\n", - "\\min_{m, m_{\\#} \\mu_s=\\mu_t} \\int c(x, m(x)) d \\mu_s(x)\n", - "$$\n", - "in the continuous case, and\n", - "$$\n", - "\\min_{\\sigma \\in \\text{Perm}(n)} \\frac{1}{n} \\sum_{i=1}^n \\textbf{C}_{i,\\sigma(i)}\n", - "$$\n", - "in the discrete case, where $\\textbf{C}_{\\cdot, \\cdot}$ is the ground cost and the constraint $m_{\\#} \\mu_s=\\mu_t$ ensures that $\\mu_s$ is completely transported to $\\mu_t$. Where $T_{\\#} \\mu_s = \\mu_s(T^{-1}(B)) = u_{t}(B)$ with $T$ as a trasportation matrix between $\\mu_s$ and $\\mu_t$ at point $B$. " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "OT can be used to detect **model-induced bias** by calculating the above cost (also known as **Earth Mover's distance** or **Wasserstein distance**) between the distribution of ground truth labels and model predictions for each of the **protected groups**. If its value is close to 1, the model is **biased** towards this group." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## General Optimal Transport examples" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let us start with some simple examples of calculating the Earth Mover's distance between two distributions - the basis of Optimal Transport for bias detection. We do this using the `earth_movers_distance` function.\n", - "\n", - "For concrete examples of bias detection on real datasets, skip to the next chapter." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. General Optimal Transport\n", - "\n", - "Suppose we have two distributions $a$ and $b$ (as shown in the picture below), and we need to calculate the Wasserstein distance between these two distributions." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "# Initial distribution\n", - "a = np.array([0., 0.01547988, 0.03095975, 0.04643963, 0.05727554, 0.05417957, 0.04643963, 0.07739938, \n", - " 0.10835913, 0.12383901, 0.11764706, 0.10526316, 0.09287926, 0.07739938, 0.04643962, 0. ])\n", - "# Required distribution\n", - "b = np.array([0., 0.01829787, 0.02702128, 0.04106383, 0.07, 0.10829787, 0.14212766, 0.14468085, \n", - " 0.13, 0.10808511, 0.08255319, 0.05170213, 0.03361702, 0.02702128, 0.01553191, 0. ])" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_ot_metric.ipynb)" + ], + "metadata": { + "id": "vGqYImdfDCyy" + } + }, { - "data": { - "image/png": "", - "text/plain": [ - "
" + "cell_type": "markdown", + "metadata": { + "id": "Fp89L1E4DCXe" + }, + "source": [ + "# Measuring bias with Optimal Transport by calculating the Wasserstein distance" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "from scipy.interpolate import make_interp_spline\n", - "\n", - "# Drawing both of them\n", - "figure, axis = plt.subplots(1, 2)\n", - "figure.set_figheight(4)\n", - "figure.set_figwidth(12)\n", - "figure.tight_layout(w_pad = 5)\n", - "\n", - "def draw(y, id):\n", - " x = np.array(range(0, np.size(y)))\n", - " XYSpline = make_interp_spline(x, y) \n", - " X = np.linspace(x.min(), x.max(), 500)\n", - " Y = XYSpline(X)\n", - " axis[id].bar(x, y, color=\"lightgreen\", ec='black')\n", - " axis[id].scatter(x, y, color=\"orange\")\n", - " axis[id].plot(X, Y, color='blue')\n", - "\n", - "axis[0].title.set_text(\"Initial distribution\")\n", - "axis[1].title.set_text(\"Required distribution\")\n", - "draw(a, 0)\n", - "draw(b, 1)\n", - "\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to better understand how Optimal Transport works, below is presented the code considering the case when the matrix cost distance is presented and defined as the absolute difference between positions of each part of the distribution. That is $\\text{distance}[i][j] = abs(i - j)$." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "_a = pd.Series(a)\n", - "_b = pd.Series(b)\n", - "distance = np.zeros((np.size(a), np.size(b)))\n", - "for i in range(np.size(a)):\n", - " for j in range(np.size(b)):\n", - " distance[i][j] = abs(i - j)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wasserstein distance is equal to 1.3773703499999999.\n" - ] - } - ], - "source": [ - "from aif360.sklearn.metrics import ot_distance\n", - "c0 = ot_distance(y_true=_a, y_pred=_b, cost_matrix=distance, mode='continuous')\n", - "\n", - "print(\"Wasserstein distance is equal to \", c0, \".\", sep=\"\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Randomly distributed samples\n", - "\n", - "Suppose we have two distributions $a$ and $b$ with length $N$, that are generated randomly, and we need to calculate earth_movers_distance for them." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "N = 1000\n", - "np.random.seed(seed=1)\n", - "\n", - "# Initial distribution\n", - "a = np.random.rand(N)\n", - "a /= np.sum(a)\n", - "\n", - "# Required distribution\n", - "b = np.random.rand(N)\n", - "b /= np.sum(b)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "1Fk66iQtDCXg" + }, + "source": [ + "## Table of contents\n", + "\n", + "- Introduction\n", + "- General Optimal Transport examples\n", + "- Usage\n", + "- Application to Compas Dataset\n", + "- Application to Adult Dataset\n", + "- More details\n", + " - OT for mapping estimation\n", + " - Kantorovich optimal transport problem\n", + " - Solving optimal transport\n", + " - Necessity and priority of usage" + ] + }, { - "data": { - "image/png": "", - "text/plain": [ - "
" + "cell_type": "markdown", + "metadata": { + "id": "l6BtF35fDCXg" + }, + "source": [ + "## References\n", + "\n", + "\"FlipTest: fairness testing via optimal transport\" https://dl.acm.org/doi/abs/10.1145/3351095.3372845\n", + "\n", + "\"Obtaining Fairness using Optimal Transport Theory\" http://proceedings.mlr.press/v97/gordaliza19a.html\n", + "\n", + "\"Computational Optimal Transport\" https://arxiv.org/abs/1803.00567\n", + "\n", + "\"POT: Python Optimal Transport\" https://jmlr.org/papers/v22/20-451.html" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "# Drawing both of them\n", - "figure, axis = plt.subplots(1, 2)\n", - "figure.set_figheight(4)\n", - "figure.set_figwidth(15)\n", - "figure.tight_layout(w_pad = 5)\n", - "\n", - "def draw(y, id):\n", - " axis[id].hist(y, color='lightgreen', ec='black', bins=10)\n", - "\n", - "axis[0].title.set_text(\"Initial distribution\")\n", - "axis[1].title.set_text(\"Required distribution\")\n", - "draw(a, 0)\n", - "draw(b, 1)\n", - "\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this case the Wasserstein distance tends to zero as the size of the samples increase." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wasserstein distance is: 2.003382269162742e-05.\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from aif360.sklearn.metrics import ot_distance\n", - "\n", - "_a = pd.Series(a)\n", - "_b = pd.Series(b)\n", - "c = ot_distance(y_true=_a, y_pred=_b, mode='continuous')\n", - "\n", - "print(\"Wasserstein distance is: \", c, \".\", sep=\"\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Permutations\n", - "\n", - "Another example that shows clearly what the permutations in the first formula refer to is the one presented below." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "# Initial distribution\n", - "a = np.array([0., 0.1, 0.1, 0.1, 0.08, 0., 0.1, 0.1, 0.08, 0.08, 0., 0.1, 0.08, 0.08, 0.08, 0.])\n", - "# Required distribution\n", - "b = np.array([0., 0.08, 0.08, 0.08, 0.1, 0., 0.08, 0.08, 0.1, 0.1, 0., 0.08, 0.1, 0.1, 0.1, 0.])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "iy6Q5v0gDCXg" + }, + "source": [ + "## Introduction\n", + "\n", + "Optimal Transport (OT) is a field of mathematics which studies the geometry of probability spaces. Among its many contributions, OT provides a principled way to compare and align probability distributions by taking into account the underlying geometry of the\n", + "considered metric space.\n", + "\n", + "Optimal Transport (OT) is a mathematical problem that was first introduced by Gaspard Monge in 1781. It addresses the task of determining the most efficient method for transporting mass from one distribution to another. In this problem, the cost associated with moving a unit of mass from one position to another is referred to as the ground cost. The primary objective of OT is to minimize the total cost incurred when moving one mass distribution onto another. The optimization problem can be expressed for two distributions $\\mu_s$ and $\\mu_t$ as\n", + "\n", + "$$\n", + "\\min_{m, m_{\\#} \\mu_s=\\mu_t} \\int c(x, m(x)) d \\mu_s(x)\n", + "$$\n", + "in the continuous case, and\n", + "$$\n", + "\\min_{\\sigma \\in \\text{Perm}(n)} \\frac{1}{n} \\sum_{i=1}^n \\textbf{C}_{i,\\sigma(i)}\n", + "$$\n", + "in the discrete case, where $\\textbf{C}_{\\cdot, \\cdot}$ is the ground cost and the constraint $m_{\\#} \\mu_s=\\mu_t$ ensures that $\\mu_s$ is completely transported to $\\mu_t$. Where $T_{\\#} \\mu_s = \\mu_s(T^{-1}(B)) = u_{t}(B)$ with $T$ as a trasportation matrix between $\\mu_s$ and $\\mu_t$ at point $B$." + ] + }, { - "data": { - "image/png": "", - "text/plain": [ - "
" + "cell_type": "markdown", + "metadata": { + "id": "euPQoGR9DCXh" + }, + "source": [ + "OT can be used to detect **model-induced bias** by calculating the above cost (also known as **Earth Mover's distance** or **Wasserstein distance**) between the distribution of ground truth labels and model predictions for each of the **protected groups**. If its value is close to 1, the model is **biased** towards this group." ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "# Drawing both of them\n", - "figure, axis = plt.subplots(1, 2)\n", - "figure.set_figheight(4)\n", - "figure.set_figwidth(12)\n", - "figure.tight_layout(w_pad = 5)\n", - "\n", - "def draw(y, id):\n", - " x = np.array(range(0, np.size(y)))\n", - " axis[id].bar(x, y, color=\"lightgreen\", ec='black')\n", - " axis[id].scatter(x, y, color=\"orange\")\n", - "\n", - "axis[0].title.set_text(\"Initial distribution\")\n", - "axis[1].title.set_text(\"Required distribution\")\n", - "draw(a, 0)\n", - "draw(b, 1)\n", - "\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There, since we can go from the initial distribution to the desired one just using permutations, the Wasserstein distance is zero." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.0\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from aif360.sklearn.metrics import ot_distance\n", - "\n", - "_a = pd.Series(a)\n", - "_b = pd.Series(b)\n", - "c = ot_distance(_a, _b, mode='continuous')\n", - "\n", - "print(c)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Extreme case\n", - "\n", - "One more example that is closer to our case is \"normalization\". It's an explanation of why the maximum Wasserstein distance we can get in our case is approaching 1 (with increasing the size of the sample), that is, it is normalized. We get this in the case that all our population has a value 0 of the 2-year recidivism (which is presented in the paragraph \"Compas Dataset\") and the classifier fails massively in all the cases labeling all with a 1. That would be the worst-case scenario." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "# Initial distribution\n", - "a = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.001])\n", - "# Required distribution\n", - "b = np.array([0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "e1VAdG5wDCXh" + }, + "source": [ + "## General Optimal Transport examples" + ] + }, { - "data": { - "image/png": "", - "text/plain": [ - "
" + "cell_type": "markdown", + "metadata": { + "id": "Jl5mlU65DCXh" + }, + "source": [ + "Let us start with some simple examples of calculating the Earth Mover's distance between two distributions - the basis of Optimal Transport for bias detection. We do this using the `earth_movers_distance` function.\n", + "\n", + "For concrete examples of bias detection on real datasets, skip to the next chapter." ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "# Drawing both of them\n", - "figure, axis = plt.subplots(1, 2)\n", - "figure.set_figheight(4)\n", - "figure.set_figwidth(12)\n", - "figure.tight_layout(w_pad = 5)\n", - "\n", - "def draw(y, id):\n", - " x = np.array(range(0, np.size(y)))\n", - " axis[id].bar(x, y, color=\"lightgreen\", ec='black')\n", - " axis[id].scatter(x, y, color=\"orange\")\n", - "\n", - "axis[0].title.set_text(\"Initial distribution\")\n", - "axis[1].title.set_text(\"Required distribution\")\n", - "draw(a, 0)\n", - "draw(b, 1)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9375\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from aif360.sklearn.metrics import ot_distance\n", - "\n", - "_a = pd.Series(a)\n", - "_b = pd.Series(b)\n", - "c = ot_distance(_a, _b)\n", - "\n", - "print(c)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Usage\n", - "\n", - "The type of outcomes must be provided using the `mode` keyword argument. The definition for the four types of outcomes supported are provided below:\n", - "- Binary: Yes/no outcomes. Outcomes must 0 or 1.\n", - "- Continuous: Continuous outcomes. Outcomes could be any real number.\n", - "- Nominal: Multiclass outcomes with no rank or order between them. Outcomes must be a finite set of integers.\n", - "- Ordinal: Multiclass outcomes that are ranked in a specific order. Outcomes must be positive integers." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Compas Dataset" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll demonstrate finding the scanning for bias with earth_movers_distance using the Compas dataset. We scan for bias in the predictions of an `sklearn` logistic regression model with respect to different groups." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "np.random.seed(0)\n", - "dataset_orig = load_preproc_data_compas()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We scan for bias at first with respect to `sex`, and then `age`.\n", - "\n", - "To scan for bias with respect for a feature that is one-hot encoded - in this case, age category - we need to convert it to nominal or ordinal format." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "Zxqm3r4bDCXh" + }, + "source": [ + "### 1. General Optimal Transport\n", + "\n", + "Suppose we have two distributions $a$ and $b$ (as shown in the picture below), and we need to calculate the Wasserstein distance between these two distributions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mf1QLAOSDCXh" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# Initial distribution\n", + "a = np.array([0., 0.01547988, 0.03095975, 0.04643963, 0.05727554, 0.05417957, 0.04643963, 0.07739938,\n", + " 0.10835913, 0.12383901, 0.11764706, 0.10526316, 0.09287926, 0.07739938, 0.04643962, 0. ])\n", + "# Required distribution\n", + "b = np.array([0., 0.01829787, 0.02702128, 0.04106383, 0.07, 0.10829787, 0.14212766, 0.14468085,\n", + " 0.13, 0.10808511, 0.08255319, 0.05170213, 0.03361702, 0.02702128, 0.01553191, 0. ])" + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexracepriors_count=0priors_count=1 to 3priors_count=More than 3c_charge_degree=Fc_charge_degree=Mage_cattwo_year_recid
00.00.01.00.00.01.00.011.0
10.00.00.00.01.01.00.001.0
20.01.00.00.01.01.00.011.0
31.01.01.00.00.00.01.010.0
40.01.01.00.00.01.00.010.0
\n", - "
" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LSbUK_LVDCXi", + "outputId": "621856d3-65c9-4fa4-e82a-4d9ba29c50ee" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - " sex race priors_count=0 priors_count=1 to 3 priors_count=More than 3 \\\n", - "0 0.0 0.0 1.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 0.0 1.0 \n", - "2 0.0 1.0 0.0 0.0 1.0 \n", - "3 1.0 1.0 1.0 0.0 0.0 \n", - "4 0.0 1.0 1.0 0.0 0.0 \n", - "\n", - " c_charge_degree=F c_charge_degree=M age_cat two_year_recid \n", - "0 1.0 0.0 1 1.0 \n", - "1 1.0 0.0 0 1.0 \n", - "2 1.0 0.0 1 1.0 \n", - "3 0.0 1.0 1 0.0 \n", - "4 1.0 0.0 1 0.0 " + "source": [ + "import matplotlib.pyplot as plt\n", + "from scipy.interpolate import make_interp_spline\n", + "\n", + "# Drawing both of them\n", + "figure, axis = plt.subplots(1, 2)\n", + "figure.set_figheight(4)\n", + "figure.set_figwidth(12)\n", + "figure.tight_layout(w_pad = 5)\n", + "\n", + "def draw(y, id):\n", + " x = np.array(range(0, np.size(y)))\n", + " XYSpline = make_interp_spline(x, y)\n", + " X = np.linspace(x.min(), x.max(), 500)\n", + " Y = XYSpline(X)\n", + " axis[id].bar(x, y, color=\"lightgreen\", ec='black')\n", + " axis[id].scatter(x, y, color=\"orange\")\n", + " axis[id].plot(X, Y, color='blue')\n", + "\n", + "axis[0].title.set_text(\"Initial distribution\")\n", + "axis[1].title.set_text(\"Required distribution\")\n", + "draw(a, 0)\n", + "draw(b, 1)\n", + "\n", + "plt.show()" ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n", - "# Binning the features corresponding to age ('reshaping' them into one ordinal column)\n", - "age_cat_cols = ['age_cat=Less than 25', 'age_cat=25 to 45', 'age_cat=Greater than 45']\n", - "age_cat = np.argmax(dataset_orig_df[age_cat_cols].values, axis=1).reshape(-1, 1)\n", - "df = dataset_orig_df.drop(age_cat_cols, axis=1)\n", - "df['age_cat'] = age_cat\n", - "df['two_year_recid'] = dataset_orig.labels\n", - "df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Measuring bias with respect to `sex`\n", - "\n", - "\n", - "We train a linear regression model on the dataset, and scan its results for bias with respect to `sex` using `earth_movers_distance`.\n", - "\n", - "The arguments are as follows:\n", - "- `ground_truth`: ground truth labels;\n", - "- `classifier`: predicted labels;\n", - "- `prot_attr`: the values of the sensitive attributes (with respect to which the classifier may be introducing bias);\n", - "- `num_iters`: maximum number of iterations performed when calculating the Earth Mover's Distance;\n", - "- `mode`: mode of the labels, one of binary, nominal, ordinal and continious; in our case the labels are binary." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "from aif360.sklearn.metrics import ot_distance\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "X = df.drop('two_year_recid', axis=1)\n", - "y = df['two_year_recid']\n", - "clf = LogisticRegression(solver='lbfgs', max_iter=10000, C=1.0, penalty='l2')\n", - "clf.fit(X, y)\n", - "preds = pd.Series(clf.predict_proba(X)[:,0])\n", - "\n", - "ot_val1 = ot_distance(y_true=y, y_pred=preds, prot_attr=df['sex'])" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexot_val
00.00.000209
11.00.001647
\n", - "
" + "cell_type": "markdown", + "metadata": { + "id": "_w9qXdzUDCXj" + }, + "source": [ + "In order to better understand how Optimal Transport works, below is presented the code considering the case when the matrix cost distance is presented and defined as the absolute difference between positions of each part of the distribution. That is $\\text{distance}[i][j] = abs(i - j)$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3Y0nUhvcDCXj" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "_a = pd.Series(a)\n", + "_b = pd.Series(b)\n", + "distance = np.zeros((np.size(a), np.size(b)))\n", + "for i in range(np.size(a)):\n", + " for j in range(np.size(b)):\n", + " distance[i][j] = abs(i - j)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P2i9hj8VDCXj", + "outputId": "d2affef6-0656-40f3-f5ad-1be39352a547" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wasserstein distance is equal to 1.3773703499999999.\n" + ] + } ], - "text/plain": [ - " sex ot_val\n", - "0 0.0 0.000209\n", - "1 1.0 0.001647" + "source": [ + "from aif360.sklearn.metrics import ot_distance\n", + "c0 = ot_distance(y_true=_a, y_pred=_b, cost_matrix=distance, mode='continuous')\n", + "\n", + "print(\"Wasserstein distance is equal to \", c0, \".\", sep=\"\")" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "bs1 = pd.DataFrame({\"sex\": ot_val1.keys(), \"ot_val\": ot_val1.values()})\n", - "display(bs1)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We get the bias value for each each of the protected groups - in this case, Male (`0`) and Female (`1`). \n", - "\n", - "These values range from 0 to 1 and can be interpreted as the difference in percent between the ground truth distribution and the distribution of the protected group: for example, a value of 0.3 would mean a 30% difference." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Measuring bias with respect to `age_cat`\n", - "\n", - "Now we measure the bias of the same classifier with respect to the age category." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "ot_val2 = ot_distance(y_true=y, y_pred=preds, prot_attr=df['age_cat'])" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iXt221UKDCXk" + }, + "source": [ + "### 2. Randomly distributed samples\n", + "\n", + "Suppose we have two distributions $a$ and $b$ with length $N$, that are generated randomly, and we need to calculate earth_movers_distance for them." + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
age_catot_val
000.000578
110.000313
220.001800
\n", - "
" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uM1m8MkYDCXk" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "N = 1000\n", + "np.random.seed(seed=1)\n", + "\n", + "# Initial distribution\n", + "a = np.random.rand(N)\n", + "a /= np.sum(a)\n", + "\n", + "# Required distribution\n", + "b = np.random.rand(N)\n", + "b /= np.sum(b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KvR1F56qDCXk", + "outputId": "977b8774-b5cb-43e1-87ac-801dd4a91018" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - " age_cat ot_val\n", - "0 0 0.000578\n", - "1 1 0.000313\n", - "2 2 0.001800" + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Drawing both of them\n", + "figure, axis = plt.subplots(1, 2)\n", + "figure.set_figheight(4)\n", + "figure.set_figwidth(15)\n", + "figure.tight_layout(w_pad = 5)\n", + "\n", + "def draw(y, id):\n", + " axis[id].hist(y, color='lightgreen', ec='black', bins=10)\n", + "\n", + "axis[0].title.set_text(\"Initial distribution\")\n", + "axis[1].title.set_text(\"Required distribution\")\n", + "draw(a, 0)\n", + "draw(b, 1)\n", + "\n", + "plt.show()" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "bs2 = pd.DataFrame({\"age_cat\": ot_val2.keys(), \"ot_val\": ot_val2.values()})\n", - "display(bs2)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adult Dataset" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let us consider the Adult Dataset. It has two protected categories: `sex` and `race`." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", - "\n", - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
racesexAge (decade)=10Age (decade)=20Age (decade)=30Age (decade)=40Age (decade)=50Age (decade)=60Age (decade)=>=70Education Years=6Education Years=7Education Years=8Education Years=9Education Years=10Education Years=11Education Years=12Education Years=<6Education Years=>12Income Binary
00.01.00.01.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.0
11.01.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.00.00.0
21.01.00.01.00.00.00.00.00.00.00.00.00.00.00.01.00.00.01.0
30.01.00.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.01.0
41.00.01.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.0
\n", - "
" + "cell_type": "markdown", + "metadata": { + "id": "8cjTSciaDCXk" + }, + "source": [ + "In this case the Wasserstein distance tends to zero as the size of the samples increase." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bL9oZwWPDCXk", + "outputId": "3d78349e-b7cc-4d27-c86c-47fbcf79a921" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wasserstein distance is: 2.003382269162742e-05.\n" + ] + } ], - "text/plain": [ - " race sex Age (decade)=10 Age (decade)=20 Age (decade)=30 \\\n", - "0 0.0 1.0 0.0 1.0 0.0 \n", - "1 1.0 1.0 0.0 0.0 1.0 \n", - "2 1.0 1.0 0.0 1.0 0.0 \n", - "3 0.0 1.0 0.0 0.0 0.0 \n", - "4 1.0 0.0 1.0 0.0 0.0 \n", - "\n", - " Age (decade)=40 Age (decade)=50 Age (decade)=60 Age (decade)=>=70 \\\n", - "0 0.0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 0.0 \n", - "3 1.0 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 0.0 \n", - "\n", - " Education Years=6 Education Years=7 Education Years=8 Education Years=9 \\\n", - "0 0.0 1.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 1.0 \n", - "2 0.0 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 0.0 \n", - "\n", - " Education Years=10 Education Years=11 Education Years=12 \\\n", - "0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 \n", - "2 0.0 0.0 1.0 \n", - "3 1.0 0.0 0.0 \n", - "4 1.0 0.0 0.0 \n", - "\n", - " Education Years=<6 Education Years=>12 Income Binary \n", - "0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 \n", - "2 0.0 0.0 1.0 \n", - "3 0.0 0.0 1.0 \n", - "4 0.0 0.0 0.0 " + "source": [ + "import pandas as pd\n", + "from aif360.sklearn.metrics import ot_distance\n", + "\n", + "_a = pd.Series(a)\n", + "_b = pd.Series(b)\n", + "c = ot_distance(y_true=_a, y_pred=_b, mode='continuous')\n", + "\n", + "print(\"Wasserstein distance is: \", c, \".\", sep=\"\")" ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_raw = load_preproc_data_adult()\n", - "data = data_raw.convert_to_dataframe()[0]\n", - "data.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Measuring bias with respect to `sex`" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "clf = LogisticRegression(solver='lbfgs', max_iter=10000, C=1.0, penalty='l2')\n", - "X = data.drop('Income Binary',axis=1)\n", - "y = data['Income Binary']\n", - "\n", - "clf.fit(X, y)\n", - "preds = pd.Series(clf.predict_proba(X)[:,0])\n", - "\n", - "ot_val1 = ot_distance(y_true=y, y_pred=preds, prot_attr=data['sex'])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OBbhxn09DCXk" + }, + "source": [ + "### 3. Permutations\n", + "\n", + "Another example that shows clearly what the permutations in the first formula refer to is the one presented below." + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sexot_val
00.00.000503
11.00.000067
\n", - "
" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B1CTcaWsDCXk" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# Initial distribution\n", + "a = np.array([0., 0.1, 0.1, 0.1, 0.08, 0., 0.1, 0.1, 0.08, 0.08, 0., 0.1, 0.08, 0.08, 0.08, 0.])\n", + "# Required distribution\n", + "b = np.array([0., 0.08, 0.08, 0.08, 0.1, 0., 0.08, 0.08, 0.1, 0.1, 0., 0.08, 0.1, 0.1, 0.1, 0.])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mdhX4s8pDCXl", + "outputId": "763d21bc-502e-4442-eea5-63ee36f41d3e" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - " sex ot_val\n", - "0 0.0 0.000503\n", - "1 1.0 0.000067" + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Drawing both of them\n", + "figure, axis = plt.subplots(1, 2)\n", + "figure.set_figheight(4)\n", + "figure.set_figwidth(12)\n", + "figure.tight_layout(w_pad = 5)\n", + "\n", + "def draw(y, id):\n", + " x = np.array(range(0, np.size(y)))\n", + " axis[id].bar(x, y, color=\"lightgreen\", ec='black')\n", + " axis[id].scatter(x, y, color=\"orange\")\n", + "\n", + "axis[0].title.set_text(\"Initial distribution\")\n", + "axis[1].title.set_text(\"Required distribution\")\n", + "draw(a, 0)\n", + "draw(b, 1)\n", + "\n", + "plt.show()" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "bs1 = pd.DataFrame({\"sex\": ot_val1.keys(), \"ot_val\": ot_val1.values()})\n", - "display(bs1)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Measuring bias with respect to `race`" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "ot_val2 = ot_distance(y_true=y, y_pred=preds, prot_attr=data['race'])" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2b8iTs6CDCXl" + }, + "source": [ + "There, since we can go from the initial distribution to the desired one just using permutations, the Wasserstein distance is zero." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PeRPqgPyDCXl", + "outputId": "bd589c43-78e5-4232-a8bb-674d8368066c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from aif360.sklearn.metrics import ot_distance\n", + "\n", + "_a = pd.Series(a)\n", + "_b = pd.Series(b)\n", + "c = ot_distance(_a, _b, mode='continuous')\n", + "\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V5JjvRvJDCXl" + }, + "source": [ + "### 4. Extreme case\n", + "\n", + "One more example that is closer to our case is \"normalization\". It's an explanation of why the maximum Wasserstein distance we can get in our case is approaching 1 (with increasing the size of the sample), that is, it is normalized. We get this in the case that all our population has a value 0 of the 2-year recidivism (which is presented in the paragraph \"Compas Dataset\") and the classifier fails massively in all the cases labeling all with a 1. That would be the worst-case scenario." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C-XRy_ZNDCXl" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# Initial distribution\n", + "a = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.001])\n", + "# Required distribution\n", + "b = np.array([0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Wr7ugRoUDCXl", + "outputId": "73b33e31-cbe6-4ab1-994c-2922c8b51be2" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABLgAAAGUCAYAAAA285u8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVNklEQVR4nO3de1yUdd7/8TdxmNFSSlxBChDdCsxSg1ahEL1NTC076Eq1YaVW3NQqkKVopnlXpLkuax74WZrrr1La0LKNNbGUtZXa5GAnN6slMYM1rEQzOV6/P/wxd+MMyCA6Xszr+Xhcj73nO5/r+ny/w0if+8N18DIMwxAAAAAAAABgUue5ewIAAAAAAADA6aDBBQAAAAAAAFOjwQUAAAAAAABTo8EFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAAAAAABTo8EFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAAAAAABTo8EFeIA1a9bIy8tLu3btatP+Xl5emjdvnu31Z599pnnz5unrr792iL3nnnvUq1evNuU5nX0laejQoRo6dKjt9ddffy0vLy+tWbPGpeO88sorysrKcmkfZ7nmzZsnLy8vVVVVuXSslpypzx4AALSPprqrafPx8VHPnj11++2364svvnD39Gy2b98uLy8vbd++/azk69Wrl+6555427dtSneWKY8eOad68eS6v2VmuXr166cYbb3TpOKfSUg16cj0OwBENLgCnVFhYqClTpthef/bZZ3riiSecNlnmzJmjjRs3nsXZNa9nz54qLCzUmDFjXNqvLQ2utuZylVk+ewAAPN2LL76owsJCbd26VQ899JA2bdqk6667Tj/88IO7pyZJuvrqq1VYWKirr77a3VNpkylTpqiwsNClfY4dO6YnnnjC5QZXW3K1RUs16Mn1OABHPu6eAIBz3+DBg1sd26dPnzM4E9dYLBaX5t4WDQ0Nqq+vPyu5TuVc+uwBAPB0/fr1U3R0tKQTZ5k3NDRo7ty5ev3113Xvvfe6eXZS165dW1W7HDt2TJ07dz4LM3LNJZdcoksuueSM5mha+9nIdSrurjMBM+AMLsBD3XPPPbrgggv05ZdfavTo0brgggsUEhKihx9+WDU1NXaxvzwles2aNfrtb38rSRo2bJjt9PumU8adXSa3bNkyDRkyRD169ND555+vK6+8UgsXLlRdXV2b5m4YhhYuXKiwsDBZrVZdffXV+tvf/uYQ5+x09u+++07333+/QkJCZLFY9Ktf/UrXXnuttm7dKulEAfrWW29p3759dpcX/PJ4Cxcu1JNPPqnw8HBZLBZt27atxcsh9+/fr9tuu01du3aVv7+/7rrrLn333Xd2Mc2ddv7L0/nb8tkfP35cGRkZCg8Pl5+fny6++GI9+OCD+vHHHx3y3Hjjjdq8ebOuvvpqderUSREREVq9enUzPwUAAOCKpmbXf/7zH7vxXbt2aezYserWrZusVqsGDhyoV1991WH/999/X9dee62sVquCg4OVkZGh559/Xl5eXnZndremppCcX6LYVB9+/PHHSkhIUJcuXTR8+HBJUm1trZ588klFRETYaqh7773Xoaapq6vTo48+qqCgIHXu3FnXXXed/vnPf7b6c/r22281YcIEdenSRf7+/kpMTFRlZaVDnLPLBt99910NHTpUAQEB6tSpk0JDQzVu3DgdO3ZMX3/9tX71q19Jkp544glbHdX0mTQdr7i4WOPHj9dFF11k++NhS5dDbty4UVdddZWsVqt69+6tJUuW2L3fdMnqyWffn/z5t1SDSs5/rp988oluvvlmXXTRRbJarRowYID+/Oc/O82zbt06zZ49W8HBweratauuv/56ff75507XBJgVZ3ABHqyurk5jx47V5MmT9fDDD+vvf/+7/ud//kf+/v56/PHHne4zZswYPf3005o1a5aWLVtmO629pbOHvvrqK9155522Jsvu3bv11FNP6V//+lebGihPPPGEnnjiCU2ePFnjx4/X/v37dd9996mhoUGXX355i/smJSWpuLhYTz31lC677DL9+OOPKi4u1qFDhyRJy5cv1/3336+vvvqq2cv9lixZossuu0yLFi1S165ddemll7aY89Zbb9WECROUnJysTz/9VHPmzNFnn32mDz74QL6+vq1et6ufvWEYuuWWW/TOO+8oIyNDcXFx+uijjzR37lwVFhaqsLBQFovFFr979249/PDDmjlzpgIDA/XCCy9o8uTJ+vWvf60hQ4a0ep4AAMBRWVmZJOmyyy6zjW3btk033HCDBg0apOzsbPn7+2v9+vVKTEzUsWPHbM2Xzz77TMOHD1evXr20Zs0ade7cWcuXL9crr7zS7vOsra3V2LFj9cADD2jmzJmqr69XY2Ojbr75Zu3YsUOPPvqoYmNjtW/fPs2dO1dDhw7Vrl271KlTJ0nSfffdp7Vr12r69OkaMWKEPvnkE9122206cuTIKXP//PPPuv766/Xtt98qMzNTl112md566y0lJiaect+vv/5aY8aMUVxcnFavXq0LL7xQBw4c0ObNm1VbW6uePXtq8+bNuuGGGzR58mTb5X5NTa8mt912m26//XYlJyfrp59+ajFnaWmpUlNTNW/ePAUFBenll1/WtGnTVFtbq+nTp59yzr/Umhr0lz7//HPFxsaqR48eWrJkiQICAvTSSy/pnnvu0X/+8x89+uijdvGzZs3StddeqxdeeEHV1dWaMWOGbrrpJu3Zs0fe3t4uzRU4ZxkAOrwXX3zRkGR8+OGHtrG7777bkGS8+uqrdrGjR482Lr/8crsxScbcuXNtr//yl78Ykoxt27Y55Lr77ruNsLCwZufS0NBg1NXVGWvXrjW8vb2N77//vtX7GoZh/PDDD4bVajVuvfVWu/F//OMfhiQjPj7eNlZWVmZIMl588UXb2AUXXGCkpqa2mGPMmDFO59F0vD59+hi1tbVO3/tlrrlz5xqSjLS0NLvYl19+2ZBkvPTSS7axkz/jJmFhYcbdd99te+3KZ79582ZDkrFw4UK7uJycHEOSsXLlSrs8VqvV2Ldvn23s559/Nrp162Y88MADDrkAAIBzTXXX+++/b9TV1RlHjhwxNm/ebAQFBRlDhgwx6urqbLERERHGwIED7cYMwzBuvPFGo2fPnkZDQ4NhGIaRmJhodOrUyaisrLTF1NfXGxEREYYko6yszDbe2ppi27ZtDjVFU324evVqu33XrVtnSDJyc3Ptxj/88ENDkrF8+XLDMAxjz549LdY+v8zvzIoVKwxJxhtvvGE3ft999zVbZzV57bXXDElGaWlps8f/7rvvmv18mo73+OOPN/veL4WFhRleXl4O+UaMGGF07drV+OmnnwzD+N/vwy9/Robh/PNvrgY1DMef6+23325YLBajvLzcLm7UqFFG586djR9//NEuz+jRo+3iXn31VUOSUVhY6DQfYEZcogh4MC8vL9100012Y1dddZX27dvXrnlKSko0duxYBQQEyNvbW76+vpo4caIaGhq0d+9el45VWFio48eP63e/+53deGxsrMLCwk65/29+8xutWbNGTz75pN5///02XSY5duxYl868OnmuEyZMkI+Pj7Zt2+Zyble8++67kuTwxKLf/va3Ov/88/XOO+/YjQ8YMEChoaG211arVZdddlm7fx8AAPAEgwcPlq+vr7p06aIbbrhBF110kd544w35+Jy4iObLL7/Uv/71L1udUF9fb9tGjx6tiooK2yVk27Zt0/DhwxUYGGg7vre3d6vObGqLcePG2b3+61//qgsvvFA33XST3TwHDBigoKAg22V2TbVNc7XPqWzbtk1dunTR2LFj7cbvvPPOU+47YMAA+fn56f7779ef//xn/fvf/z7lPs6cvPaWXHHFFerfv7/d2J133qnq6moVFxe3KX9rvfvuuxo+fLhCQkLsxu+55x4dO3bM4ab4J3+mV111lSRR56FDocEFeLDOnTvLarXajVksFh0/frzdcpSXlysuLk4HDhzQn/70J+3YsUMffvihli1bJunEqeiuaLqUMCgoyOE9Z2Mny8nJ0d13360XXnhBMTEx6tatmyZOnOj03g7N6dmzZ+sn7GRePj4+CggIsK3lTDl06JB8fHwcTr338vJSUFCQQ/6AgACHY1gsFpd/RgAAQFq7dq0+/PBDvfvuu3rggQe0Z88e3XHHHbb3m+7FNX36dPn6+tptKSkpkqSqqipJJ/6b3tbax1WdO3dW165d7cb+85//6Mcff5Sfn5/DXCsrK+3m6WxeTbXPqRw6dMiuidekNevs06ePtm7dqh49eujBBx9Unz591KdPH/3pT3865b6/5Eqd19LP5GzUec7mGhwc7DT/yZ9/020qqPPQkXAPLgBn1Ouvv66ffvpJGzZssDvDqrS0tE3Ha/qPs7OGVGVlpcNN1k/WvXt3ZWVlKSsrS+Xl5dq0aZNmzpypgwcPavPmza2aQ3M3GW1OZWWlLr74Ytvr+vp6HTp0yK7QsFgsDjf3l06vOAoICFB9fb2+++47uyaXYRiqrKzUNddc0+ZjAwCAlkVGRtpuLD9s2DA1NDTohRde0Guvvabx48ere/fukqSMjAzddtttTo/RdG/RgICAZmufk51uTeGszunevbsCAgKarZW6dOlim2fTvJzVPqcSEBDg9Ib0rf1DZFxcnOLi4tTQ0KBdu3bpueeeU2pqqgIDA3X77be36hiu1Hkt/UyaPoumPyaf/DNpagq2VUBAgCoqKhzGv/32W0myfb8AT8IZXABc5spffJqKhF/ezNwwDD3//PNtyj148GBZrVa9/PLLduM7d+50+RTr0NBQPfTQQxoxYoTdaeTtfdbSyXN99dVXVV9fr6FDh9rGevXqpY8++sgu7t1339XRo0ftxlz57JueevTSSy/Zjefm5uqnn36yvQ8AAM68hQsX6qKLLtLjjz+uxsZGXX755br00ku1e/duRUdHO92aGkfDhg3TO++8Y/cExoaGBuXk5DjkaW1N4Yobb7xRhw4dUkNDg9N5NjXimmqb5mqfUxk2bJiOHDmiTZs22Y27ejN9b29vDRo0yHbFQFOd195nLX366afavXu33dgrr7yiLl262B4G1PTH15N/JievsWl+rZ3b8OHD9e6779oaWk3Wrl2rzp07a/Dgwa1dBtBhcAYXAJf169dPkrRy5Up16dJFVqtV4eHhTk89HzFihPz8/HTHHXfo0Ucf1fHjx7VixQr98MMPbcp90UUXafr06XryySc1ZcoU/fa3v9X+/fttT69pyeHDhzVs2DDdeeedioiIUJcuXfThhx9q8+bNdn85vfLKK7VhwwatWLFCUVFROu+882x/gW2LDRs2yMfHRyNGjLA9RbF///6aMGGCLSYpKUlz5szR448/rvj4eH322WdaunSp/P397Y7l6mc/cuRIzZgxQ9XV1br22mttT1EcOHCgkpKS2rwmAADgmosuukgZGRl69NFH9corr+iuu+7S//k//0ejRo3SyJEjdc899+jiiy/W999/rz179qi4uFh/+ctfJEmPPfaYNm3apP/6r//S448/rs6dO2vZsmVOn/LX2prCFbfffrtefvlljR49WtOmTdNvfvMb+fr66ptvvtG2bdt0880369Zbb1VkZKTuuusuZWVlydfXV9dff70++eQT25OnT2XixIn64x//qIkTJ+qpp57SpZdeqry8PL399tun3Dc7O1vvvvuuxowZo9DQUB0/ftz2tO7rr79e0okzzcLCwvTGG29o+PDh6tatm7p3737KKwCaExwcrLFjx2revHnq2bOnXnrpJeXn52vBggXq3LmzJOmaa67R5ZdfrunTp6u+vl4XXXSRNm7cqPfee8/heK7UoHPnztVf//pXDRs2TI8//ri6deuml19+WW+99ZYWLlx4Wj9vwLTcfZd7AGdec09RPP/88x1inT0lRk6eNpOVlWWEh4cb3t7edk+1cfYkxDfffNPo37+/YbVajYsvvth45JFHjL/97W9On9xzqqcoGoZhNDY2GpmZmUZISIjh5+dnXHXVVcabb75pxMfHt/gUxePHjxvJycnGVVddZXTt2tXo1KmTcfnllxtz5861PenGMAzj+++/N8aPH29ceOGFhpeXl+3zaDres88+6zCnlp6iWFRUZNx0003GBRdcYHTp0sW44447jP/85z92+9fU1BiPPvqoERISYnTq1MmIj483SktLHZ545Opn//PPPxszZswwwsLCDF9fX6Nnz57Gf//3fxs//PCDXVxYWJgxZswYh3Wd/JkCAICWOau7mvz8889GaGiocemllxr19fWGYRjG7t27jQkTJhg9evQwfH19jaCgIOO//uu/jOzsbLt9//GPfxiDBw82LBaLERQUZDzyyCPGypUrHZ7Q19qaormnKDqrDw3DMOrq6oxFixbZaroLLrjAiIiIMB544AHjiy++sMv/8MMPGz169DCsVqsxePBgo7Cw0GlN48w333xjjBs3zlY3jRs3zti5c+cpn6JYWFho3HrrrUZYWJhhsViMgIAAIz4+3ti0aZPd8bdu3WoMHDjQsFgsdk92bDred9995zCn5p6iOGbMGOO1114zrrjiCsPPz8/o1auXsXjxYof99+7dayQkJBhdu3Y1fvWrXxm///3vjbfeesvh82+uBjUM5/X4xx9/bNx0002Gv7+/4efnZ/Tv39/uMzKM//05/+Uvf7Ebd1a7AmbnZRiGcfbaaQAAAACA9rBmzRrde++9Kisra/NZSADQUXAPLgAAAAAAAJgaDS4AAAAAAACYGpcoAgAAAAAAwNQ4gwsAAAAAAACmRoMLAAAAAAAApkaDCwAAAAAAAKbm4+4JnEsaGxv17bffqkuXLvLy8nL3dAAAwBlgGIaOHDmi4OBgnXcef+szG+o1AAA6vrbUazS4fuHbb79VSEiIu6cBAADOgv379+uSSy5x9zTgIuo1AAA8hyv1Gg2uX+jSpYukEx9g165d3TwbAABwJlRXVyskJMT2332YC/UaAAAdX1vqNRpcv9B0mnvXrl0pmAAA6OC4vM2cqNcAAPAcrtRr3HgCAAAAAAAApkaDCwAAAAAAAKZGgwsAAAAAAACmRoMLAAAAAAAApkaDCwAAAAAAAKZGgwsAAAAAAACmRoMLAAAAAAAApkaDCwAAAAAAAKbm4+4JAACADq6xQfpuh/RzhdSpp/SrOOk8b3fPCjg97vhee0pOd+UlJznNmpecHSunu/J2gHqtTQ2u5cuX69lnn1VFRYWuuOIKZWVlKS4urtn4goICpaen69NPP1VwcLAeffRRJScn28Xk5uZqzpw5+uqrr9SnTx899dRTuvXWW23v//3vf9ezzz6roqIiVVRUaOPGjbrlllvsjmEYhp544gmtXLlSP/zwgwYNGqRly5bpiiuuaMsyAQDA6dq/QSqaJh375n/HOl8iRf1JCrnNffMCToc7vteektNdeclJTrPmJWfHyumuvB2kXnP5EsWcnBylpqZq9uzZKikpUVxcnEaNGqXy8nKn8WVlZRo9erTi4uJUUlKiWbNmaerUqcrNzbXFFBYWKjExUUlJSdq9e7eSkpI0YcIEffDBB7aYn376Sf3799fSpUubndvChQu1ePFiLV26VB9++KGCgoI0YsQIHTlyxNVlAgCA07V/g7RjvH2xJEnHDpwY37/BPfMCToc7vteektNdeclJTrPmJWfHyumuvB2oXvMyDMNwZYdBgwbp6quv1ooVK2xjkZGRuuWWW5SZmekQP2PGDG3atEl79uyxjSUnJ2v37t0qLCyUJCUmJqq6ulp/+9vfbDE33HCDLrroIq1bt85x0l5eDmdwGYah4OBgpaamasaMGZKkmpoaBQYGasGCBXrggQdOubbq6mr5+/vr8OHD6tq166k/DAAA4Fxjg7Spl2OxZON14i+DY8vO+unv/Pfe3Nz683PH99pTcrorLznJada85OxYOd2Vt4PVay5dolhbW6uioiLNnDnTbjwhIUE7d+50uk9hYaESEhLsxkaOHKlVq1aprq5Ovr6+KiwsVFpamkNMVlZWq+dWVlamyspKu1wWi0Xx8fHauXOn0wZXTU2NampqbK+rq6tbnQ8AAE9XXl6uqqoqp+9d8NMuXdZssSRJhnRsv/b+Y5WOnh/tNKJ79+4KDQ1th5kCreeO77W7/i01l9cdOdsjr6fkbC4v3yO+u67kbC4v3yO+R67kbCmvO7jU4KqqqlJDQ4MCAwPtxgMDA1VZWel0n8rKSqfx9fX1qqqqUs+ePZuNae6YzeVp2u/k4+zbt8/pPpmZmXriiSdanQMAAJxQXl6uiMgI/XzsZ6fv3x4jrXvo1MeZO+MBrS90/l6nzp30rz3/OmeKJnR87vheu+vfUkt53ZGzPfJ6Sk5nefkeuS9ne+Tle3Ru/Uz5HrU+Z3N53aVNN5n38vKye20YhsPYqeJPHnf1mO0xt4yMDKWnp9teV1dXKyQkxOWcAAB4mqqqKv187Gfd9X/uUuBlgQ7v96nZLx169ZTHGTJ3gi62OP639z97/6OXHnhJVVVV50TBBM/gju+1u/4ttZTXHTlPN6+n5GwuL98j9+U83bx8j869nynfo9blbCmvu7jU4Orevbu8vb0dzqw6ePCgw5lTTYKCgpzG+/j4KCAgoMWY5o7ZXB7pxJlcPXv2bNVxLBaLLBZLq3MAAAB7gZcFKqS/Y8FTZ1ysI9u36ILjP8rZn5kMSUetF6rumsEK8XL5mTfAGeWO77W7/i05y+uOnGc6Lzk943vEd9e8OZvL6yk5z3ReT6nXXJqhn5+foqKilJ+fbzeen5+v2NhYp/vExMQ4xG/ZskXR0dHy9fVtMaa5YzoTHh6uoKAgu+PU1taqoKDApeMAAIDTZ3idp4LIW0/83ye/9///tyDyVhkmKJaAJu74XntKTnflJSc5zZqXnB0rp7vydrR6zeVZpqen64UXXtDq1au1Z88epaWlqby8XMnJyZJOXPY3ceJEW3xycrL27dun9PR07dmzR6tXr9aqVas0ffp0W8y0adO0ZcsWLViwQP/617+0YMECbd26VampqbaYo0ePqrS0VKWlpZJO3FS+tLRU5eXlkk5cmpiamqqnn35aGzdu1CeffKJ77rlHnTt31p133tmWzwYAAJyGr4L6662B9+qo9UK78aPWC/XWwHv1VVB/90wMOA3u+F57Sk535SUnOc2al5wdK6e78nakes3le3AlJibq0KFDmj9/vioqKtSvXz/l5eUpLCxMklRRUWFrOkknzqzKy8tTWlqali1bpuDgYC1ZskTjxo2zxcTGxmr9+vV67LHHNGfOHPXp00c5OTkaNGiQLWbXrl0aNmyY7XXTvbPuvvturVmzRpL06KOP6ueff1ZKSop++OEHDRo0SFu2bFGXLl1cXSYAAGgHXwX1178Dr1Tw91/p/Jpq/WTpqm+79THNXwIBZ9zxvfaUnO7KS05ymjUvOTtWTnfl7Sj1WptuMp+SkqKUlBSn7zU1m34pPj5excXFLR5z/PjxGj9+fLPvDx061HZz+uZ4eXlp3rx5mjdvXotxAADg7DG8ztOBgEvdPQ2gXbnje+0pOd2Vl5zkNGtecnasnO7K2xHqNXO14wAAAAAAAICT0OACAAAAAACAqdHgAgAAgFPLly9XeHi4rFaroqKitGPHjhbjCwoKFBUVJavVqt69eys7O9sh5scff9SDDz6onj17ymq1KjIyUnl5eWdqCQAAwEPQ4AIAAICDnJwcpaamavbs2SopKVFcXJxGjRpl9zChXyorK9Po0aMVFxenkpISzZo1S1OnTlVubq4tpra2ViNGjNDXX3+t1157TZ9//rmef/55XXzxxWdrWQAAoINq003mAQAA0LEtXrxYkydP1pQpUyRJWVlZevvtt7VixQplZmY6xGdnZys0NFRZWVmSpMjISO3atUuLFi2yPT179erV+v7777Vz5075+vpKku1J3AAAAKeDM7gAAABgp7a2VkVFRUpISLAbT0hI0M6dO53uU1hY6BA/cuRI7dq1S3V1dZKkTZs2KSYmRg8++KACAwPVr18/Pf3002poaGh2LjU1NaqurrbbAAAATkaDCwAAAHaqqqrU0NCgwMBAu/HAwEBVVlY63aeystJpfH19vaqqqiRJ//73v/Xaa6+poaFBeXl5euyxx/SHP/xBTz31VLNzyczMlL+/v20LCQk5zdUBAICOiAYXAAAAnPLy8rJ7bRiGw9ip4n853tjYqB49emjlypWKiorS7bffrtmzZ2vFihXNHjMjI0OHDx+2bfv372/rcgAAQAfGPbgAAABgp3v37vL29nY4W+vgwYMOZ2k1CQoKchrv4+OjgIAASVLPnj3l6+srb29vW0xkZKQqKytVW1srPz8/h+NaLBZZLJbTXRIAAOjgOIMLAAAAdvz8/BQVFaX8/Hy78fz8fMXGxjrdJyYmxiF+y5Ytio6Ott1Q/tprr9WXX36pxsZGW8zevXvVs2dPp80tAACA1qLBBQAAAAfp6el64YUXtHr1au3Zs0dpaWkqLy9XcnKypBOXDk6cONEWn5ycrH379ik9PV179uzR6tWrtWrVKk2fPt0W89///d86dOiQpk2bpr179+qtt97S008/rQcffPCsrw8AAHQsXKIIAAAAB4mJiTp06JDmz5+viooK9evXT3l5eQoLC5MkVVRUqLy83BYfHh6uvLw8paWladmyZQoODtaSJUs0btw4W0xISIi2bNmitLQ0XXXVVbr44os1bdo0zZgx46yvDwAAdCw0uAAAAOBUSkqKUlJSnL63Zs0ah7H4+HgVFxe3eMyYmBi9//777TE9AAAAGy5RBAAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4AIAAAAAAICptanBtXz5coWHh8tqtSoqKko7duxoMb6goEBRUVGyWq3q3bu3srOzHWJyc3PVt29fWSwW9e3bVxs3bnQ579GjR/XQQw/pkksuUadOnRQZGakVK1a0ZYkAAAAer71rvjVr1sjLy8thO378+JlcBgAA8AAuN7hycnKUmpqq2bNnq6SkRHFxcRo1apTKy8udxpeVlWn06NGKi4tTSUmJZs2apalTpyo3N9cWU1hYqMTERCUlJWn37t1KSkrShAkT9MEHH7iUNy0tTZs3b9ZLL72kPXv2KC0tTb///e/1xhtvuLpMAAAAj3Ymaj5J6tq1qyoqKuw2q9V6NpYEAAA6MJcbXIsXL9bkyZM1ZcoURUZGKisrSyEhIc2eKZWdna3Q0FBlZWUpMjJSU6ZM0aRJk7Ro0SJbTFZWlkaMGKGMjAxFREQoIyNDw4cPV1ZWlkt5CwsLdffdd2vo0KHq1auX7r//fvXv31+7du1ydZkAAAAe7UzUfJLk5eWloKAguw0AAOB0udTgqq2tVVFRkRISEuzGExIStHPnTqf7FBYWOsSPHDlSu3btUl1dXYsxTcdsbd7rrrtOmzZt0oEDB2QYhrZt26a9e/dq5MiRTudWU1Oj6upquw0AAMDTnamaTzpxS4mwsDBdcskluvHGG1VSUtLiXKjXAABAa7jU4KqqqlJDQ4MCAwPtxgMDA1VZWel0n8rKSqfx9fX1qqqqajGm6ZitzbtkyRL17dtXl1xyifz8/HTDDTdo+fLluu6665zOLTMzU/7+/rYtJCSkFZ8CAABAx3amar6IiAitWbNGmzZt0rp162S1WnXttdfqiy++aHYu1GsAAKA12nSTeS8vL7vXhmE4jJ0q/uTx1hzzVDFLlizR+++/r02bNqmoqEh/+MMflJKSoq1btzqdV0ZGhg4fPmzb9u/f3+waAAAAPE1713yDBw/WXXfdpf79+ysuLk6vvvqqLrvsMj333HPNHpN6DQAAtIaPK8Hdu3eXt7e3w1/uDh486PAXuyZBQUFO4318fBQQENBiTNMxW5P3559/1qxZs7Rx40aNGTNGknTVVVeptLRUixYt0vXXX+8wN4vFIovF0trlAwAAeIQzVfOd7LzzztM111zT4hlc1GsAAKA1XDqDy8/PT1FRUcrPz7cbz8/PV2xsrNN9YmJiHOK3bNmi6Oho+fr6thjTdMzW5K2rq1NdXZ3OO89+Sd7e3mpsbHRlmQAAAB7tTNV8JzMMQ6WlperZs2f7TBwAAHgsl87gkqT09HQlJSUpOjpaMTExWrlypcrLy5WcnCzpxGnkBw4c0Nq1ayVJycnJWrp0qdLT03XfffepsLBQq1at0rp162zHnDZtmoYMGaIFCxbo5ptv1htvvKGtW7fqvffea3Xerl27Kj4+Xo888og6deqksLAwFRQUaO3atVq8ePFpfUgAAACe5kzUfE888YQGDx6sSy+9VNXV1VqyZIlKS0u1bNkyt6wRAAB0HC43uBITE3Xo0CHNnz9fFRUV6tevn/Ly8hQWFiZJqqioUHl5uS0+PDxceXl5SktL07JlyxQcHKwlS5Zo3LhxtpjY2FitX79ejz32mObMmaM+ffooJydHgwYNanVeSVq/fr0yMjL0u9/9Tt9//73CwsL01FNP2QoxAAAAtM6ZqPl+/PFH3X///aqsrJS/v78GDhyov//97/rNb35z1tcHAAA6FpcbXJKUkpKilJQUp++tWbPGYSw+Pl7FxcUtHnP8+PEaP358m/NKJ+798OKLL7Z4DAAAALROe9d8f/zjH/XHP/6xvaYHAABg06anKAIAAAAAAADnChpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAHBq+fLlCg8Pl9VqVVRUlHbs2NFifEFBgaKiomS1WtW7d29lZ2c3G7t+/Xp5eXnplltuaedZAwAAT0SDCwAAAA5ycnKUmpqq2bNnq6SkRHFxcRo1apTKy8udxpeVlWn06NGKi4tTSUmJZs2apalTpyo3N9chdt++fZo+fbri4uLO9DIAAICHoMEFAAAAB4sXL9bkyZM1ZcoURUZGKisrSyEhIVqxYoXT+OzsbIWGhiorK0uRkZGaMmWKJk2apEWLFtnFNTQ06He/+52eeOIJ9e7d+2wsBQAAeAAaXAAAALBTW1uroqIiJSQk2I0nJCRo586dTvcpLCx0iB85cqR27dqluro629j8+fP1q1/9SpMnT27VXGpqalRdXW23AQAAnIwGFwAAAOxUVVWpoaFBgYGBduOBgYGqrKx0uk9lZaXT+Pr6elVVVUmS/vGPf2jVqlV6/vnnWz2XzMxM+fv727aQkBAXVwMAADwBDS4AAAA45eXlZffaMAyHsVPFN40fOXJEd911l55//nl179691XPIyMjQ4cOHbdv+/ftdWAEAAPAUPu6eAAAAAM4t3bt3l7e3t8PZWgcPHnQ4S6tJUFCQ03gfHx8FBATo008/1ddff62bbrrJ9n5jY6MkycfHR59//rn69OnjcFyLxSKLxXK6SwIAAB0cZ3ABAADAjp+fn6KiopSfn283np+fr9jYWKf7xMTEOMRv2bJF0dHR8vX1VUREhD7++GOVlpbatrFjx2rYsGEqLS3l0kMAAHBaOIMLAAAADtLT05WUlKTo6GjFxMRo5cqVKi8vV3JysqQTlw4eOHBAa9eulSQlJydr6dKlSk9P13333afCwkKtWrVK69atkyRZrVb169fPLseFF14oSQ7jAAAArqLBBQAAAAeJiYk6dOiQ5s+fr4qKCvXr1095eXkKCwuTJFVUVKi8vNwWHx4erry8PKWlpWnZsmUKDg7WkiVLNG7cOHctAQAAeBAaXAAAAHAqJSVFKSkpTt9bs2aNw1h8fLyKi4tbfXxnxwAAAGgL7sEFAAAAAAAAU6PBBQAAAAAAAFNrU4Nr+fLlCg8Pl9VqVVRUlHbs2NFifEFBgaKiomS1WtW7d29lZ2c7xOTm5qpv376yWCzq27evNm7c2Ka8e/bs0dixY+Xv768uXbpo8ODBdveHAAAAAAAAQMficoMrJydHqampmj17tkpKShQXF6dRo0Y120QqKyvT6NGjFRcXp5KSEs2aNUtTp05Vbm6uLaawsFCJiYlKSkrS7t27lZSUpAkTJuiDDz5wKe9XX32l6667ThEREdq+fbt2796tOXPmyGq1urpMAAAAAAAAmITLDa7Fixdr8uTJmjJliiIjI5WVlaWQkBCtWLHCaXx2drZCQ0OVlZWlyMhITZkyRZMmTdKiRYtsMVlZWRoxYoQyMjIUERGhjIwMDR8+XFlZWS7lnT17tkaPHq2FCxdq4MCB6t27t8aMGaMePXq4ukwAAAAAAACYhEsNrtraWhUVFSkhIcFuPCEhQTt37nS6T2FhoUP8yJEjtWvXLtXV1bUY03TM1uRtbGzUW2+9pcsuu0wjR45Ujx49NGjQIL3++uvNrqempkbV1dV2GwAAAAAAAMzFpQZXVVWVGhoaFBgYaDceGBioyspKp/tUVlY6ja+vr1dVVVWLMU3HbE3egwcP6ujRo3rmmWd0ww03aMuWLbr11lt12223qaCgwOncMjMz5e/vb9tCQkJa+UkAAAAAAADgXNGmm8x7eXnZvTYMw2HsVPEnj7fmmC3FNDY2SpJuvvlmpaWlacCAAZo5c6ZuvPFGpze1l6SMjAwdPnzYtu3fv7/ZNQAAAAAAAODc5ONKcPfu3eXt7e1wttbBgwcdzq5qEhQU5DTex8dHAQEBLcY0HbM1ebt37y4fHx/17dvXLiYyMlLvvfee07lZLBZZLJaWlgwAAAAAAIBznEtncPn5+SkqKkr5+fl24/n5+YqNjXW6T0xMjEP8li1bFB0dLV9f3xZjmo7Zmrx+fn665ppr9Pnnn9vF7N27V2FhYa4sEwAAAAAAACbi0hlckpSenq6kpCRFR0crJiZGK1euVHl5uZKTkyWduOzvwIEDWrt2rSQpOTlZS5cuVXp6uu677z4VFhZq1apVWrdune2Y06ZN05AhQ7RgwQLdfPPNeuONN7R161a7M69OlVeSHnnkESUmJmrIkCEaNmyYNm/erDfffFPbt29v6+cDAAAAAACAc5zLDa7ExEQdOnRI8+fPV0VFhfr166e8vDzbWVIVFRUqLy+3xYeHhysvL09paWlatmyZgoODtWTJEo0bN84WExsbq/Xr1+uxxx7TnDlz1KdPH+Xk5GjQoEGtzitJt956q7Kzs5WZmampU6fq8ssvV25urq677ro2fTgAAAAAAAA497nc4JKklJQUpaSkOH1vzZo1DmPx8fEqLi5u8Zjjx4/X+PHj25y3yaRJkzRp0qQWYwAAAAAAANBxtOkpigAAAAAAAMC5ggYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAAAAATI0GFwAAAAAAAEyNBhcAAAAAAABMjQYXAAAAnFq+fLnCw8NltVoVFRWlHTt2tBhfUFCgqKgoWa1W9e7dW9nZ2Xbvb9iwQdHR0brwwgt1/vnna8CAAfq///f/nsklAAAAD0GDCwAAAA5ycnKUmpqq2bNnq6SkRHFxcRo1apTKy8udxpeVlWn06NGKi4tTSUmJZs2apalTpyo3N9cW061bN82ePVuFhYX66KOPdO+99+ree+/V22+/fbaWBQAAOigfd08AAAAA557Fixdr8uTJmjJliiQpKytLb7/9tlasWKHMzEyH+OzsbIWGhiorK0uSFBkZqV27dmnRokUaN26cJGno0KF2+0ybNk1//vOf9d5772nkyJFO51FTU6Oamhrb6+rq6nZYHQAA6Gg4gwsAAAB2amtrVVRUpISEBLvxhIQE7dy50+k+hYWFDvEjR47Url27VFdX5xBvGIbeeecdff755xoyZEizc8nMzJS/v79tCwkJacOKAABAR0eDCwAAAHaqqqrU0NCgwMBAu/HAwEBVVlY63aeystJpfH19vaqqqmxjhw8f1gUXXCA/Pz+NGTNGzz33nEaMGNHsXDIyMnT48GHbtn///tNYGQAA6Ki4RBEAAABOeXl52b02DMNh7FTxJ4936dJFpaWlOnr0qN555x2lp6erd+/eDpcvNrFYLLJYLG1cAQAA8BQ0uAAAAGCne/fu8vb2djhb6+DBgw5naTUJCgpyGu/j46OAgADb2Hnnnadf//rXkqQBAwZoz549yszMbLbBBQAA0BpcoggAAAA7fn5+ioqKUn5+vt14fn6+YmNjne4TExPjEL9lyxZFR0fL19e32VyGYdjdRB4AAKAtOIMLAAAADtLT05WUlKTo6GjFxMRo5cqVKi8vV3JysqQT98Y6cOCA1q5dK0lKTk7W0qVLlZ6ervvuu0+FhYVatWqV1q1bZztmZmamoqOj1adPH9XW1iovL09r167VihUr3LJGAADQcdDgAgAAgIPExEQdOnRI8+fPV0VFhfr166e8vDyFhYVJkioqKlReXm6LDw8PV15entLS0rRs2TIFBwdryZIlGjdunC3mp59+UkpKir755ht16tRJEREReumll5SYmHjW1wcAADoWGlwAAABwKiUlRSkpKU7fW7NmjcNYfHy8iouLmz3ek08+qSeffLK9pgcAAGDDPbgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqbWpwLV++XOHh4bJarYqKitKOHTtajC8oKFBUVJSsVqt69+6t7Oxsh5jc3Fz17dtXFotFffv21caNG08r7wMPPCAvLy9lZWW5vD4AAAAAAACYh8sNrpycHKWmpmr27NkqKSlRXFycRo0apfLycqfxZWVlGj16tOLi4lRSUqJZs2Zp6tSpys3NtcUUFhYqMTFRSUlJ2r17t5KSkjRhwgR98MEHbcr7+uuv64MPPlBwcLCrywMAAAAAAIDJuNzgWrx4sSZPnqwpU6YoMjJSWVlZCgkJ0YoVK5zGZ2dnKzQ0VFlZWYqMjNSUKVM0adIkLVq0yBaTlZWlESNGKCMjQxEREcrIyNDw4cPtzr5qbd4DBw7ooYce0ssvvyxfX98W11JTU6Pq6mq7DQAAAAAAAObiUoOrtrZWRUVFSkhIsBtPSEjQzp07ne5TWFjoED9y5Ejt2rVLdXV1LcY0HbO1eRsbG5WUlKRHHnlEV1xxxSnXk5mZKX9/f9sWEhJyyn0AAAAAAABwbnGpwVVVVaWGhgYFBgbajQcGBqqystLpPpWVlU7j6+vrVVVV1WJM0zFbm3fBggXy8fHR1KlTW7WejIwMHT582Lbt37+/VfsBAAAAAADg3OHTlp28vLzsXhuG4TB2qviTx1tzzJZiioqK9Kc//UnFxcUtzuWXLBaLLBZLq2IBAAAAAABwbnLpDK7u3bvL29vb4WytgwcPOpxd1SQoKMhpvI+PjwICAlqMaTpma/Lu2LFDBw8eVGhoqHx8fOTj46N9+/bp4YcfVq9evVxZJgAAAAAAAEzEpQaXn5+foqKilJ+fbzeen5+v2NhYp/vExMQ4xG/ZskXR0dG2m8A3F9N0zNbkTUpK0kcffaTS0lLbFhwcrEceeURvv/22K8sEAAAAAACAibh8iWJ6erqSkpIUHR2tmJgYrVy5UuXl5UpOTpZ04r5WBw4c0Nq1ayVJycnJWrp0qdLT03XfffepsLBQq1at0rp162zHnDZtmoYMGaIFCxbo5ptv1htvvKGtW7fqvffea3XegIAA2xlhTXx9fRUUFKTLL7/c9U8GAAAAAAAApuBygysxMVGHDh3S/PnzVVFRoX79+ikvL09hYWGSpIqKCpWXl9viw8PDlZeXp7S0NC1btkzBwcFasmSJxo0bZ4uJjY3V+vXr9dhjj2nOnDnq06ePcnJyNGjQoFbnBQAAAAAAgGdq003mU1JSlJKS4vS9NWvWOIzFx8eruLi4xWOOHz9e48ePb3NeZ77++utWxwIAAAAAAMCcXLoHFwAAAAAAAHCuocEFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAAAAAABTo8EFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAAAAAABTo8EFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAAAAAABTo8EFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAAAAAABTo8EFAAAAAAAAU6PBBQAAAAAAAFOjwQUAAACnli9frvDwcFmtVkVFRWnHjh0txhcUFCgqKkpWq1W9e/dWdna23fvPP/+84uLidNFFF+miiy7S9ddfr3/+859ncgkAAMBD0OACAACAg5ycHKWmpmr27NkqKSlRXFycRo0apfLycqfxZWVlGj16tOLi4lRSUqJZs2Zp6tSpys3NtcVs375dd9xxh7Zt26bCwkKFhoYqISFBBw4cOFvLAgAAHRQNLgAAADhYvHixJk+erClTpigyMlJZWVkKCQnRihUrnMZnZ2crNDRUWVlZioyM1JQpUzRp0iQtWrTIFvPyyy8rJSVFAwYMUEREhJ5//nk1NjbqnXfeaXYeNTU1qq6uttsAAABORoMLAAAAdmpra1VUVKSEhAS78YSEBO3cudPpPoWFhQ7xI0eO1K5du1RXV+d0n2PHjqmurk7dunVrdi6ZmZny9/e3bSEhIS6uBgAAeAIaXAAAALBTVVWlhoYGBQYG2o0HBgaqsrLS6T6VlZVO4+vr61VVVeV0n5kzZ+riiy/W9ddf3+xcMjIydPjwYdu2f/9+F1cDAAA8gY+7JwAAAIBzk5eXl91rwzAcxk4V72xckhYuXKh169Zp+/btslqtzR7TYrHIYrG4Mm0AAOCBaHABAADATvfu3eXt7e1wttbBgwcdztJqEhQU5DTex8dHAQEBduOLFi3S008/ra1bt+qqq65q38kDAACPxCWKAAAAsOPn56eoqCjl5+fbjefn5ys2NtbpPjExMQ7xW7ZsUXR0tHx9fW1jzz77rP7nf/5HmzdvVnR0dPtPHgAAeCQaXAAAAHCQnp6uF154QatXr9aePXuUlpam8vJyJScnSzpxb6yJEyfa4pOTk7Vv3z6lp6drz549Wr16tVatWqXp06fbYhYuXKjHHntMq1evVq9evVRZWanKykodPXr0rK8PAAB0LFyiCAAAAAeJiYk6dOiQ5s+fr4qKCvXr1095eXkKCwuTJFVUVKi8vNwWHx4erry8PKWlpWnZsmUKDg7WkiVLNG7cOFvM8uXLVVtbq/Hjx9vlmjt3rubNm3dW1gUAADomGlwAAABwKiUlRSkpKU7fW7NmjcNYfHy8iouLmz3e119/3U4zAwAAsMcligAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADA1GlwAAAAAAAAwNRpcAAAAAAAAMDUaXAAAAAAAADC1NjW4li9frvDwcFmtVkVFRWnHjh0txhcUFCgqKkpWq1W9e/dWdna2Q0xubq769u0ri8Wivn37auPGjS7lraur04wZM3TllVfq/PPPV3BwsCZOnKhvv/22LUsEAAAAAACASbjc4MrJyVFqaqpmz56tkpISxcXFadSoUSovL3caX1ZWptGjRysuLk4lJSWaNWuWpk6dqtzcXFtMYWGhEhMTlZSUpN27dyspKUkTJkzQBx980Oq8x44dU3FxsebMmaPi4mJt2LBBe/fu1dixY11dIgAAAAAAAEzE5QbX4sWLNXnyZE2ZMkWRkZHKyspSSEiIVqxY4TQ+OztboaGhysrKUmRkpKZMmaJJkyZp0aJFtpisrCyNGDFCGRkZioiIUEZGhoYPH66srKxW5/X391d+fr4mTJigyy+/XIMHD9Zzzz2noqKiZptvNTU1qq6uttsAAAAAAABgLi41uGpra1VUVKSEhAS78YSEBO3cudPpPoWFhQ7xI0eO1K5du1RXV9diTNMx25JXkg4fPiwvLy9deOGFTt/PzMyUv7+/bQsJCWn2WAAAAAAAADg3udTgqqqqUkNDgwIDA+3GAwMDVVlZ6XSfyspKp/H19fWqqqpqMabpmG3Je/z4cc2cOVN33nmnunbt6jQmIyNDhw8ftm379+9vZuUAAAAAAAA4V/m0ZScvLy+714ZhOIydKv7k8dYcs7V56+rqdPvtt6uxsVHLly9vdl4Wi0UWi6XZ9wEAAAAAAHDuc6nB1b17d3l7ezucNXXw4EGHs6uaBAUFOY338fFRQEBAizFNx3Qlb11dnSZMmKCysjK9++67zZ69BQAAAAAAgI7BpUsU/fz8FBUVpfz8fLvx/Px8xcbGOt0nJibGIX7Lli2Kjo6Wr69vizFNx2xt3qbm1hdffKGtW7faGmgAAAAAAADouFy+RDE9PV1JSUmKjo5WTEyMVq5cqfLyciUnJ0s6cV+rAwcOaO3atZKk5ORkLV26VOnp6brvvvtUWFioVatWad26dbZjTps2TUOGDNGCBQt0880364033tDWrVv13nvvtTpvfX29xo8fr+LiYv31r39VQ0OD7Yyvbt26yc/Pr+2fEgAAAAAAAM5ZLje4EhMTdejQIc2fP18VFRXq16+f8vLyFBYWJkmqqKhQeXm5LT48PFx5eXlKS0vTsmXLFBwcrCVLlmjcuHG2mNjYWK1fv16PPfaY5syZoz59+ignJ0eDBg1qdd5vvvlGmzZtkiQNGDDAbs7btm3T0KFDXV0qAAAAAAAATKBNN5lPSUlRSkqK0/fWrFnjMBYfH6/i4uIWjzl+/HiNHz++zXl79eplu3k9AAAAAAAAPIdL9+ACAAAAAAAAzjU0uAAAAAAAAGBqNLgAAADg1PLlyxUeHi6r1aqoqCjt2LGjxfiCggJFRUXJarWqd+/eys7Otnv/008/1bhx49SrVy95eXkpKyvrDM4eAAB4EhpcAAAAcJCTk6PU1FTNnj1bJSUliouL06hRo+weJvRLZWVlGj16tOLi4lRSUqJZs2Zp6tSpys3NtcUcO3ZMvXv31jPPPKOgoKCztRQAAOAB2nSTeQAAAHRsixcv1uTJkzVlyhRJUlZWlt5++22tWLFCmZmZDvHZ2dkKDQ21nZUVGRmpXbt2adGiRbanZ19zzTW65pprJEkzZ85s1TxqampUU1Nje11dXX06ywIAAB0UZ3ABAADATm1trYqKipSQkGA3npCQoJ07dzrdp7Cw0CF+5MiR2rVrl+rq6to8l8zMTPn7+9u2kJCQNh8LAAB0XDS4AAAAYKeqqkoNDQ0KDAy0Gw8MDFRlZaXTfSorK53G19fXq6qqqs1zycjI0OHDh23b/v3723wsAADQcXGJIgAAAJzy8vKye20YhsPYqeKdjbvCYrHIYrG0eX8AAOAZOIMLAAAAdrp37y5vb2+Hs7UOHjzocJZWk6CgIKfxPj4+CggIOGNzBQAAkGhwAQAA4CR+fn6KiopSfn6+3Xh+fr5iY2Od7hMTE+MQv2XLFkVHR8vX1/eMzRUAAECiwQUAAAAn0tPT9cILL2j16tXas2eP0tLSVF5eruTkZEkn7o01ceJEW3xycrL27dun9PR07dmzR6tXr9aqVas0ffp0W0xtba1KS0tVWlqq2tpaHThwQKWlpfryyy/P+voAAEDHwj24AAAA4CAxMVGHDh3S/PnzVVFRoX79+ikvL09hYWGSpIqKCpWXl9viw8PDlZeXp7S0NC1btkzBwcFasmSJxo0bZ4v59ttvNXDgQNvrRYsWadGiRYqPj9f27dvP2toAAEDHQ4MLAAAATqWkpCglJcXpe2vWrHEYi4+PV3FxcbPH69Wrl+3G8wAAAO2JSxQBAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqNLgAAAAAAABgajS4AAAAAAAAYGo0uAAAAAAAAGBqPu6eAJxobJC+2yH9XCF16in9Kk46z7vj5XRXXnJ2rJzuykvOjpXTXXk9JScAAABwhtHgOtfs3yAVTZOOffO/Y50vkaL+JIXc1nFyuisvOTtWTnflJWfHyumuvJ6SEwAAADgL2nSJ4vLlyxUeHi6r1aqoqCjt2LGjxfiCggJFRUXJarWqd+/eys7OdojJzc1V3759ZbFY1LdvX23cuNHlvIZhaN68eQoODlanTp00dOhQffrpp21Zonvs3yDtGG///3hI0rEDJ8b3b+gYOd2Vl5wdK6e78pKzY+V0V15PyQnTc1fNBwAA4CqXG1w5OTlKTU3V7NmzVVJSori4OI0aNUrl5eVO48vKyjR69GjFxcWppKREs2bN0tSpU5Wbm2uLKSwsVGJiopKSkrR7924lJSVpwoQJ+uCDD1zKu3DhQi1evFhLly7Vhx9+qKCgII0YMUJHjhxxdZlnX2PDib+qy3Dy5v8fK0o9EWfmnO7KS86OldNdecnZsXK6K6+n5ITpuavmAwAAaAuXL1FcvHixJk+erClTpkiSsrKy9Pbbb2vFihXKzMx0iM/OzlZoaKiysrIkSZGRkdq1a5cWLVqkcePG2Y4xYsQIZWRkSJIyMjJUUFCgrKwsrVu3rlV5DcNQVlaWZs+erdtuO3GZxZ///GcFBgbqlVde0QMPPOAwt5qaGtXU1NheHz58WJJUXV3t6sfSapWVlaqsrHQYv+BYiX5d9Y2TPZoY0rH9+nLLUh3tPNBpRFBQkIKCgs7pnO2R11NyNpfXHTlbysv36NzO2Vxevkftk9dTcjrLe/ToUUnS/t37VfNTjdN9TsfBLw/a8rT3f5ebjmcYzhp+aOKumu9kZ7Nec8f32l3/ls5kXnLyPSLnuZ2zubx8j8jZXnnbQ5vqNcMFNTU1hre3t7Fhwwa78alTpxpDhgxxuk9cXJwxdepUu7ENGzYYPj4+Rm1trWEYhhESEmIsXrzYLmbx4sVGaGhoq/N+9dVXhiSjuLjYLmbs2LHGxIkTnc5t7ty5hk786ZqNjY2NjY3Nw7b9+/e3VPZ4NHfVfM5Qr7GxsbGxsXnu5kq95tIZXFVVVWpoaFBgYKDdeGBgYLN/Na6srHQaX19fr6qqKvXs2bPZmKZjtiZv0/86i9m3b5/TuWVkZCg9Pd32urGxUd9//70CAgLk5eXldJ+zpbq6WiEhIdq/f7+6du3q1rmcaZ6yVtbZ8XjKWj1lnZLnrNXT12kYho4cOaLg4GA3zu7c5q6azxnqNffzlHVKnrNW1tnxeMpaPWWdkuestT3rtTY9RfHkYsIwjBYLDGfxJ4+35pjtFdPEYrHIYrHYjV144YXNrMI9unbt2qG/zL/kKWtlnR2Pp6zVU9Ypec5aPXmd/v7+bpqNubir5vsl6rVzh6esU/KctbLOjsdT1uop65Q8Z63tUa+5dJP57t27y9vb2+GvbAcPHnT4a1yToKAgp/E+Pj4KCAhoMabpmK3J23RfEFfmBgAAAEfuqvkAAADayqUGl5+fn6KiopSfn283np+fr9jYWKf7xMTEOMRv2bJF0dHR8vX1bTGm6ZityRseHq6goCC7mNraWhUUFDQ7NwAAADhyV80HAADQZq2+W9f/t379esPX19dYtWqV8dlnnxmpqanG+eefb3z99deGYRjGzJkzjaSkJFv8v//9b6Nz585GWlqa8dlnnxmrVq0yfH19jddee80W849//MPw9vY2nnnmGWPPnj3GM888Y/j4+Bjvv/9+q/MahmE888wzhr+/v7Fhwwbj448/Nu644w6jZ8+eRnV1tavLdLvjx48bc+fONY4fP+7uqZxxnrJW1tnxeMpaPWWdhuE5a2WdaA131Xxm4infMU9Zp2F4zlpZZ8fjKWv1lHUahuestT3X6XKDyzAMY9myZUZYWJjh5+dnXH311UZBQYHtvbvvvtuIj4+3i9++fbsxcOBAw8/Pz+jVq5exYsUKh2P+5S9/MS6//HLD19fXiIiIMHJzc13KaxiG0djYaMydO9cICgoyLBaLMWTIEOPjjz9uyxIBAAA8nrtqPgAAAFd5Gcb/v/snAAAAAAAAYEIu3YMLAAAAAAAAONfQ4AIAAAAAAICp0eACAAAAAACAqdHgAgAAAAAAgKnR4DpHLV++XOHh4bJarYqKitKOHTvcPaV2lZmZqWuuuUZdunRRjx49dMstt+jzzz9397TOuMzMTHl5eSk1NdXdUzkjDhw4oLvuuksBAQHq3LmzBgwYoKKiIndPq13V19frscceU3h4uDp16qTevXtr/vz5amxsdPfUTtvf//533XTTTQoODpaXl5def/11u/cNw9C8efMUHBysTp06aejQofr000/dM9nT0NI66+rqNGPGDF155ZU6//zzFRwcrIkTJ+rbb79134RPw6l+pr/0wAMPyMvLS1lZWWdtfu2lNevcs2ePxo4dK39/f3Xp0kWDBw9WeXn52Z8sOoyOXqtJ1GvUa+ZFvUa9ZhaeUqtJZ6deo8F1DsrJyVFqaqpmz56tkpISxcXFadSoUR2qEC8oKNCDDz6o999/X/n5+aqvr1dCQoJ++uknd0/tjPnwww+1cuVKXXXVVe6eyhnxww8/6Nprr5Wvr6/+9re/6bPPPtMf/vAHXXjhhe6eWrtasGCBsrOztXTpUu3Zs0cLFy7Us88+q+eee87dUzttP/30k/r376+lS5c6fX/hwoVavHixli5dqg8//FBBQUEaMWKEjhw5cpZnenpaWuexY8dUXFysOXPmqLi4WBs2bNDevXs1duxYN8z09J3qZ9rk9ddf1wcffKDg4OCzNLP2dap1fvXVV7ruuusUERGh7du3a/fu3ZozZ46sVutZnik6Ck+o1STqtY6Ieo16zSw8pV7zlFpNOkv1moFzzm9+8xsjOTnZbiwiIsKYOXOmm2Z05h08eNCQZBQUFLh7KmfEkSNHjEsvvdTIz8834uPjjWnTprl7Su1uxowZxnXXXefuaZxxY8aMMSZNmmQ3dttttxl33XWXm2Z0ZkgyNm7caHvd2NhoBAUFGc8884xt7Pjx44a/v7+RnZ3thhm2j5PX6cw///lPQ5Kxb9++szOpM6S5tX7zzTfGxRdfbHzyySdGWFiY8cc//vGsz609OVtnYmJih/s3CvfyxFrNMKjXOgLqtY713wLqtf/VEeo1T6nVDOPM1WucwXWOqa2tVVFRkRISEuzGExIStHPnTjfN6sw7fPiwJKlbt25unsmZ8eCDD2rMmDG6/vrr3T2VM2bTpk2Kjo7Wb3/7W/Xo0UMDBw7U888/7+5ptbvrrrtO77zzjvbu3StJ2r17t9577z2NHj3azTM7s8rKylRZWWn3u8lisSg+Pr5D/26STvx+8vLy6nB/3ZakxsZGJSUl6ZFHHtEVV1zh7umcEY2NjXrrrbd02WWXaeTIkerRo4cGDRrU4iUAQEs8tVaTqNc6Auo16rWOqqPWa55Qq0ntV6/R4DrHVFVVqaGhQYGBgXbjgYGBqqysdNOszizDMJSenq7rrrtO/fr1c/d02t369etVVFSkzMxMd0/ljPr3v/+tFStW6NJLL9Xbb7+t5ORkTZ06VWvXrnX31NrVjBkzdMcddygiIkK+vr4aOHCgUlNTdccdd7h7amdU0+8fT/rdJEnHjx/XzJkzdeedd6pr167unk67W7BggXx8fDR16lR3T+WMOXjwoI4ePapnnnlGN9xwg7Zs2aJbb71Vt912mwoKCtw9PZiQJ9ZqEvVaR0G9Rr3WEXXkes0TajWp/eo1nzM4R5wGLy8vu9eGYTiMdRQPPfSQPvroI7333nvunkq7279/v6ZNm6YtW7Z0+Hu9NDY2Kjo6Wk8//bQkaeDAgfr000+1YsUKTZw40c2zaz85OTl66aWX9Morr+iKK65QaWmpUlNTFRwcrLvvvtvd0zvjPOl3U11dnW6//XY1NjZq+fLl7p5OuysqKtKf/vQnFRcXd9ifoSTbDYVvvvlmpaWlSZIGDBignTt3Kjs7W/Hx8e6cHkzMk34fStRrHQX1GvVaR9OR6zVPqdWk9qvXOIPrHNO9e3d5e3s7dNgPHjzo0InvCH7/+99r06ZN2rZtmy655BJ3T6fdFRUV6eDBg4qKipKPj498fHxUUFCgJUuWyMfHRw0NDe6eYrvp2bOn+vbtazcWGRnZ4W64+8gjj2jmzJm6/fbbdeWVVyopKUlpaWkd/i++QUFBkuQxv5vq6uo0YcIElZWVKT8/v8P9NVCSduzYoYMHDyo0NNT2+2nfvn16+OGH1atXL3dPr910795dPj4+HvH7CWeHp9VqEvUa9Zr5UK95xu+njl6veUqtJrVfvUaD6xzj5+enqKgo5efn243n5+crNjbWTbNqf4Zh6KGHHtKGDRv07rvvKjw83N1TOiOGDx+ujz/+WKWlpbYtOjpav/vd71RaWipvb293T7HdXHvttQ6PDt+7d6/CwsLcNKMz49ixYzrvPPtfnd7e3h3isdMtCQ8PV1BQkN3vptraWhUUFHSo303S/xZLX3zxhbZu3aqAgAB3T+mMSEpK0kcffWT3+yk4OFiPPPKI3n77bXdPr934+fnpmmuu8YjfTzg7PKVWk6jXqNfMi3qNeq0j8JRaTWq/eo1LFM9B6enpSkpKUnR0tGJiYrRy5UqVl5crOTnZ3VNrNw8++KBeeeUVvfHGG+rSpYvtrwz+/v7q1KmTm2fXfrp06eJwn4rzzz9fAQEBHe7+FWlpaYqNjdXTTz+tCRMm6J///KdWrlyplStXuntq7eqmm27SU089pdDQUF1xxRUqKSnR4sWLNWnSJHdP7bQdPXpUX375pe11WVmZSktL1a1bN4WGhio1NVVPP/20Lr30Ul166aV6+umn1blzZ915551unLXrWlpncHCwxo8fr+LiYv31r39VQ0OD7fdTt27d5Ofn565pt8mpfqYnF4O+vr4KCgrS5ZdffranelpOtc5HHnlEiYmJGjJkiIYNG6bNmzfrzTff1Pbt2903aZiaJ9RqEvUa9Zp5Ua9Rr5mFp9Rq0lmq107rGYw4Y5YtW2aEhYUZfn5+xtVXX93hHscsyen24osvuntqZ1xHfey0YRjGm2++afTr18+wWCxGRESEsXLlSndPqd1VV1cb06ZNM0JDQw2r1Wr07t3bmD17tlFTU+PuqZ22bdu2Of13effddxuGceLR03PnzjWCgoIMi8ViDBkyxPj444/dO+k2aGmdZWVlzf5+2rZtm7un7rJT/UxPZtZHT7dmnatWrTJ+/etfG1ar1ejfv7/x+uuvu2/C6BA6eq1mGNRr1GvmRb1GvWYWnlKrGcbZqde8DMMwWt8OAwAAAAAAAM4t3IMLAAAAAAAApkaDCwAAAAAAAKZGgwsAAAAAAACmRoMLAAAAAAAApkaDCwAAAAAAAKZGgwsAAAAAAACmRoMLAAAAAAAApkaDCwAAAAAAAKZGgwsAAAAAAACmRoMLAAAAAAAApkaDCwAAAAAAAKb2/wAst3qZIyioRQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Drawing both of them\n", + "figure, axis = plt.subplots(1, 2)\n", + "figure.set_figheight(4)\n", + "figure.set_figwidth(12)\n", + "figure.tight_layout(w_pad = 5)\n", + "\n", + "def draw(y, id):\n", + " x = np.array(range(0, np.size(y)))\n", + " axis[id].bar(x, y, color=\"lightgreen\", ec='black')\n", + " axis[id].scatter(x, y, color=\"orange\")\n", + "\n", + "axis[0].title.set_text(\"Initial distribution\")\n", + "axis[1].title.set_text(\"Required distribution\")\n", + "draw(a, 0)\n", + "draw(b, 1)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RuuNFQUcDCXl", + "outputId": "b0dc6795-09a7-4160-b8b5-fbef7b182f11" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9375\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from aif360.sklearn.metrics import ot_distance\n", + "\n", + "_a = pd.Series(a)\n", + "_b = pd.Series(b)\n", + "c = ot_distance(_a, _b)\n", + "\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "982f8APNDCXm" + }, + "source": [ + "## Usage\n", + "\n", + "The type of outcomes must be provided using the `mode` keyword argument. The definition for the four types of outcomes supported are provided below:\n", + "- Binary: Yes/no outcomes. Outcomes must 0 or 1.\n", + "- Continuous: Continuous outcomes. Outcomes could be any real number.\n", + "- Nominal: Multiclass outcomes with no rank or order between them. Outcomes must be a finite set of integers.\n", + "- Ordinal: Multiclass outcomes that are ranked in a specific order. Outcomes must be positive integers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AXZLAng1DCXm" + }, + "source": [ + "## Compas Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JbzgBkZ6DCXm" + }, + "source": [ + "We'll demonstrate finding the scanning for bias with earth_movers_distance using the Compas dataset. We scan for bias in the predictions of an `sklearn` logistic regression model with respect to different groups." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S_a5CpBmDCXm" + }, + "outputs": [], + "source": [ + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "np.random.seed(0)\n", + "dataset_orig = load_preproc_data_compas()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MetVVGgEDCXm" + }, + "source": [ + "We scan for bias at first with respect to `sex`, and then `age`.\n", + "\n", + "To scan for bias with respect for a feature that is one-hot encoded - in this case, age category - we need to convert it to nominal or ordinal format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nKjqZpvrDCXm", + "outputId": "eb2d18a6-3d40-4a89-f9de-89b7b82441cd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexracepriors_count=0priors_count=1 to 3priors_count=More than 3c_charge_degree=Fc_charge_degree=Mage_cattwo_year_recid
00.00.01.00.00.01.00.011.0
10.00.00.00.01.01.00.001.0
20.01.00.00.01.01.00.011.0
31.01.01.00.00.00.01.010.0
40.01.01.00.00.01.00.010.0
\n", + "
" + ], + "text/plain": [ + " sex race priors_count=0 priors_count=1 to 3 priors_count=More than 3 \\\n", + "0 0.0 0.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 1.0 \n", + "2 0.0 1.0 0.0 0.0 1.0 \n", + "3 1.0 1.0 1.0 0.0 0.0 \n", + "4 0.0 1.0 1.0 0.0 0.0 \n", + "\n", + " c_charge_degree=F c_charge_degree=M age_cat two_year_recid \n", + "0 1.0 0.0 1 1.0 \n", + "1 1.0 0.0 0 1.0 \n", + "2 1.0 0.0 1 1.0 \n", + "3 0.0 1.0 1 0.0 \n", + "4 1.0 0.0 1 0.0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n", + "# Binning the features corresponding to age ('reshaping' them into one ordinal column)\n", + "age_cat_cols = ['age_cat=Less than 25', 'age_cat=25 to 45', 'age_cat=Greater than 45']\n", + "age_cat = np.argmax(dataset_orig_df[age_cat_cols].values, axis=1).reshape(-1, 1)\n", + "df = dataset_orig_df.drop(age_cat_cols, axis=1)\n", + "df['age_cat'] = age_cat\n", + "df['two_year_recid'] = dataset_orig.labels\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gdQM2Pf0DCXm" + }, + "source": [ + "### Measuring bias with respect to `sex`\n", + "\n", + "\n", + "We train a linear regression model on the dataset, and scan its results for bias with respect to `sex` using `earth_movers_distance`.\n", + "\n", + "The arguments are as follows:\n", + "- `ground_truth`: ground truth labels;\n", + "- `classifier`: predicted labels;\n", + "- `prot_attr`: the values of the sensitive attributes (with respect to which the classifier may be introducing bias);\n", + "- `num_iters`: maximum number of iterations performed when calculating the Earth Mover's Distance;\n", + "- `mode`: mode of the labels, one of binary, nominal, ordinal and continious; in our case the labels are binary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1UMII6BdDCXn" + }, + "outputs": [], + "source": [ + "from aif360.sklearn.metrics import ot_distance\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X = df.drop('two_year_recid', axis=1)\n", + "y = df['two_year_recid']\n", + "clf = LogisticRegression(solver='lbfgs', max_iter=10000, C=1.0, penalty='l2')\n", + "clf.fit(X, y)\n", + "preds = pd.Series(clf.predict_proba(X)[:,0])\n", + "\n", + "ot_val1 = ot_distance(y_true=y, y_pred=preds, prot_attr=df['sex'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dJ9XucBpDCXn", + "outputId": "14f8db55-0698-4582-f0ab-a0d5b51d32c0" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexot_val
00.00.000209
11.00.001647
\n", + "
" + ], + "text/plain": [ + " sex ot_val\n", + "0 0.0 0.000209\n", + "1 1.0 0.001647" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bs1 = pd.DataFrame({\"sex\": ot_val1.keys(), \"ot_val\": ot_val1.values()})\n", + "display(bs1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qMuMdXc9DCXn" + }, + "source": [ + "We get the bias value for each each of the protected groups - in this case, Male (`0`) and Female (`1`).\n", + "\n", + "These values range from 0 to 1 and can be interpreted as the difference in percent between the ground truth distribution and the distribution of the protected group: for example, a value of 0.3 would mean a 30% difference." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZHqN9JTDCXn" + }, + "source": [ + "### Measuring bias with respect to `age_cat`\n", + "\n", + "Now we measure the bias of the same classifier with respect to the age category." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "usxcylLxDCXn" + }, + "outputs": [], + "source": [ + "ot_val2 = ot_distance(y_true=y, y_pred=preds, prot_attr=df['age_cat'])" + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
raceot_val
00.00.000779
11.00.000068
\n", - "
" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KLAS5o-DDCXs", + "outputId": "6e4d8bdc-21e7-4e3e-f57c-04302bf53dc2" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
age_catot_val
000.000578
110.000313
220.001800
\n", + "
" + ], + "text/plain": [ + " age_cat ot_val\n", + "0 0 0.000578\n", + "1 1 0.000313\n", + "2 2 0.001800" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - " race ot_val\n", - "0 0.0 0.000779\n", - "1 1.0 0.000068" + "source": [ + "bs2 = pd.DataFrame({\"age_cat\": ot_val2.keys(), \"ot_val\": ot_val2.values()})\n", + "display(bs2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xU8-Sjz_DCXs" + }, + "source": [ + "## Adult Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "clHgbhiCDCXt" + }, + "source": [ + "Let us consider the Adult Dataset. It has two protected categories: `sex` and `race`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XhiCZSmCDCXt" + }, + "outputs": [], + "source": [ + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v6SiGFSTDCXt", + "outputId": "05ed49ca-30d7-4d3b-d706-296b3cbcb60a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
racesexAge (decade)=10Age (decade)=20Age (decade)=30Age (decade)=40Age (decade)=50Age (decade)=60Age (decade)=>=70Education Years=6Education Years=7Education Years=8Education Years=9Education Years=10Education Years=11Education Years=12Education Years=<6Education Years=>12Income Binary
00.01.00.01.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.0
11.01.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.00.00.0
21.01.00.01.00.00.00.00.00.00.00.00.00.00.00.01.00.00.01.0
30.01.00.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.01.0
41.00.01.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.0
\n", + "
" + ], + "text/plain": [ + " race sex Age (decade)=10 Age (decade)=20 Age (decade)=30 \\\n", + "0 0.0 1.0 0.0 1.0 0.0 \n", + "1 1.0 1.0 0.0 0.0 1.0 \n", + "2 1.0 1.0 0.0 1.0 0.0 \n", + "3 0.0 1.0 0.0 0.0 0.0 \n", + "4 1.0 0.0 1.0 0.0 0.0 \n", + "\n", + " Age (decade)=40 Age (decade)=50 Age (decade)=60 Age (decade)=>=70 \\\n", + "0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 \n", + "3 1.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 \n", + "\n", + " Education Years=6 Education Years=7 Education Years=8 Education Years=9 \\\n", + "0 0.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 1.0 \n", + "2 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 \n", + "\n", + " Education Years=10 Education Years=11 Education Years=12 \\\n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 1.0 \n", + "3 1.0 0.0 0.0 \n", + "4 1.0 0.0 0.0 \n", + "\n", + " Education Years=<6 Education Years=>12 Income Binary \n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 1.0 \n", + "3 0.0 0.0 1.0 \n", + "4 0.0 0.0 0.0 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_raw = load_preproc_data_adult()\n", + "data = data_raw.convert_to_dataframe()[0]\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UrUBEyKeDCXt" + }, + "source": [ + "### Measuring bias with respect to `sex`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WyItlB-4DCXt" + }, + "outputs": [], + "source": [ + "clf = LogisticRegression(solver='lbfgs', max_iter=10000, C=1.0, penalty='l2')\n", + "X = data.drop('Income Binary',axis=1)\n", + "y = data['Income Binary']\n", + "\n", + "clf.fit(X, y)\n", + "preds = pd.Series(clf.predict_proba(X)[:,0])\n", + "\n", + "ot_val1 = ot_distance(y_true=y, y_pred=preds, prot_attr=data['sex'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iz35JSsaDCXt", + "outputId": "7b759101-d6b5-4579-f7f9-4667c21bad55" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sexot_val
00.00.000503
11.00.000067
\n", + "
" + ], + "text/plain": [ + " sex ot_val\n", + "0 0.0 0.000503\n", + "1 1.0 0.000067" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bs1 = pd.DataFrame({\"sex\": ot_val1.keys(), \"ot_val\": ot_val1.values()})\n", + "display(bs1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z1JSCRJ4DCXt" + }, + "source": [ + "### Measuring bias with respect to `race`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3T0756nuDCXt" + }, + "outputs": [], + "source": [ + "ot_val2 = ot_distance(y_true=y, y_pred=preds, prot_attr=data['race'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BXcbZhdIDCXu", + "outputId": "2f21c706-9625-448d-d713-f5b60be57d4a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
raceot_val
00.00.000779
11.00.000068
\n", + "
" + ], + "text/plain": [ + " race ot_val\n", + "0 0.0 0.000779\n", + "1 1.0 0.000068" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "bs2 = pd.DataFrame({\"race\": ot_val2.keys(), \"ot_val\": ot_val2.values()})\n", + "display(bs2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b_txzvFjDCXu" + }, + "source": [ + "## More details\n", + "\n", + "It is commonly employed as a means to quantify the similarity between distributions, particularly when the distributions have distinct supports. In scenarios where the supports of the distributions are disjoint, OT-based Wasserstein distances offer favorable comparisons to well-known f-divergences such as the Kullback-Leibler divergence, Jensen-Shannon divergence, and Total Variation distance.\n", + "\n", + "One notable aspect that makes OT valuable for data science applications is its ability to compute meaningful sub-gradients of the Wasserstein distance. This feature enhances its efficiency as a tool for measuring and optimizing similarity between empirical distributions.\n", + "\n", + "The machine learning (ML) literature has seen numerous contributions utilizing OT as an approach. For instance, in the training of Generative Adversarial Networks (GANs), OT has been utilized to tackle the issue of vanishing gradients, which can hinder the learning process. Additionally, OT has been employed to identify discriminant or robust subspaces within datasets, offering useful insights. Moreover, the Wasserstein distance has found application in measuring similarity between word embeddings of documents, as well as comparing signals or spectra.\n", + "\n", + "Due to the inherent constraint in the problem, solving Optimal Transport (OT) can be quite challenging. As a result, in practical applications dealing with discrete distributions, a more manageable approach known as a linear program has been employed as a substitute. This approach corresponds to the Kantorovitch formulation, where the original Monge mapping, denoted as $m$ is replaced by a joint distribution represented by an OT matrix.\n", + "From the optimization problem described above, we can identify two primary components of the OT solution that have practical applications:\n", + "- The optimal value (Wasserstein distance): This quantifies the similarity between distributions. It is used to measure the dissimilarity or similarity between datasets or distributions. The Wasserstein distance represents the optimal value obtained from solving the OT problem.\n", + "- The optimal mapping (Monge mapping or OT matrix): This determines the correspondences between the distributions. It describes how the mass is transported between the source and target distributions. The optimal mapping can be utilized to transfer knowledge or information between distributions.\n", + "\n", + "In the first case, OT is employed to assess the similarity between distributions or datasets. Here, the Wasserstein distance, which is the optimal value obtained from solving the OT problem, is used as a measure of similarity. In the second case, the focus lies on understanding the specific manner in which mass is transferred between distributions, represented by the mapping. This mapping can be leveraged to facilitate the transfer of knowledge or information between the distributions.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZIPRdM1wDCXu" + }, + "source": [ + "### OT for mapping estimation\n", + "\n", + "One fascinating aspect of the Optimal Transport (OT) problem is the inherent OT mapping. When computing the optimal transport between discrete distributions, one of the outputs is the OT matrix, which provides correspondences between the samples in each distribution.\n", + "\n", + "This correspondence is estimated based on the OT criterion and is obtained in a non-supervised manner. This characteristic makes it particularly intriguing for problems involving dataset transfer. OT has been employed, for instance, in performing color transfer between images or in the context of domain adaptation, where knowledge or information is transferred between different datasets.\n", + "\n", + "Furthermore, more recent applications have explored the extension of OT, known as Gromov-Wasserstein, to establish correspondences between languages using word embeddings. This utilization of OT enables the identification of connections or similarities between languages based on the distributional properties of word embeddings.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9tM6dploDCXu" + }, + "source": [ + "### Kantorovich optimal transport problem\n", + "\n", + "This is the most typical OT problem. It seeks an optimal coupling $\\boldsymbol{T}$ which minimizes the displacement cost of a discrete measure $\\boldsymbol{a}$ to a discrete measure $\\boldsymbol{b}$ with respect to a ground cost $\\boldsymbol{M} \\in \\mathbb{R}^{n_{1} \\times n_{2}}$. In order to be a transport plan, $\\boldsymbol{T}$ must be part of the set $\\Pi(\\mathbf{a}, \\mathbf{b})=\\left\\{\\boldsymbol{T} \\geq \\mathbf{0}, \\boldsymbol{T} \\mathbf{1}_{n_{2}}=\\boldsymbol{a}, \\boldsymbol{T}^{\\top} \\mathbf{1}_{n_{1}}=\\boldsymbol{b}\\right\\}$. When the ground cost is a metric, the optimal value of the OT problem is also a metric (Rubner et al., 2000; Cuturi and Avis, 2014) and is called the Wasserstein distance. In this discrete case, the OT problem is defined as\n", + "\n", + "$$\n", + "W_{M}(\\boldsymbol{a}, \\boldsymbol{b})=\\min _{\\boldsymbol{T} \\in \\Pi(\\mathbf{a}, \\mathbf{b})}\\langle\\boldsymbol{T}, \\boldsymbol{M}\\rangle\n", + "$$\n", + "\n", + "which is a linear program. The optimization problem above is often adapted to include a regularization term for the transport plan $\\boldsymbol{T}$, such as entropic regularization (Cuturi, 2013) or squared L2. For the entropic regularized OT problem, one may use the Sinkhorn Knopp algorithm (or variants), or stochastic optimization algorithms. POT has a simple syntax to solve these problems." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DDlqt0TnDCXu" + }, + "source": [ + "### Solving optimal transport\n", + "\n", + "The optimal transport problem between discrete distributions is often expressed as\n", + "$$\n", + "\\begin{array}{r}\n", + "\\gamma^*=\\arg \\min _{\\gamma \\in \\mathbb{R}_{+}^{m \\times n}} \\sum_{i, j} \\gamma_{i, j} M_{i, j} \\\\\n", + "\\text { s.t. } \\gamma 1=a ; \\gamma^T 1=b ; \\gamma \\geq 0\n", + "\\end{array}\n", + "$$\n", + "where:\n", + "- $M \\in \\mathbb{R}_{+}^{m \\times n}$ is the metric cost matrix defining the cost to move mass from bin $a_i$ to bin $b_j$.\n", + "- $a$ and $b$ are histograms on the simplex (positive, sum to 1) that represent the weights of each samples in the source an target distributions.\n", + "Solving the linear program above can be done using the function ot.emd that will return the optimal transport matrix $\\gamma^*$ :" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TFNEAAplDCXu" + }, + "source": [ + "### The necessity and priority of usage\n", + "\n", + "The main difference between the MDSS and OT detectors is the range of applicability. MDSS scanner can in linear time decide, what the most anomalous subset is, while in the case of OT detector has a narrower but more accurate field of use. The method used can only account for 1-dimensional histograms as precisely described in \"POT: Python Optimal Transport\" https://jmlr.org/papers/v22/20-451.html. This means there is no possibility yet to handle datasets of more than 1 feature. For more dimensions, there is a prospect for the Sinkhorn algorithm, which will be implemented in the nearest future. As compared to MDSS, the user needs to specify the sensitive attribute for which wants to know the bias.\n", + "\n", + "So if the user is interested in obtaining a certain bias on a particular parameter, it is better to use earth_movers_distance. But if it is more important to see the general picture for all parameters on bias presence, then it is better to use MDSS.\n", + "\n", + "The matrix that is obtained from the emd method is called a transport plan in the OT framework (the gamma matrix in https://pythonot.github.io/all.html?highlight=emd#ot.emd). It is the matrix which minimizes the transportation cost, and workable to evaluate the actual Wasserstein distance by taking the Frobenius product (see here for instance: https://en.wikipedia.org/wiki/Frobenius_inner_product) with the metric cost matrix, M." ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "bs2 = pd.DataFrame({\"race\": ot_val2.keys(), \"ot_val\": ot_val2.values()})\n", - "display(bs2)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## More details\n", - "\n", - "It is commonly employed as a means to quantify the similarity between distributions, particularly when the distributions have distinct supports. In scenarios where the supports of the distributions are disjoint, OT-based Wasserstein distances offer favorable comparisons to well-known f-divergences such as the Kullback-Leibler divergence, Jensen-Shannon divergence, and Total Variation distance.\n", - "\n", - "One notable aspect that makes OT valuable for data science applications is its ability to compute meaningful sub-gradients of the Wasserstein distance. This feature enhances its efficiency as a tool for measuring and optimizing similarity between empirical distributions.\n", - "\n", - "The machine learning (ML) literature has seen numerous contributions utilizing OT as an approach. For instance, in the training of Generative Adversarial Networks (GANs), OT has been utilized to tackle the issue of vanishing gradients, which can hinder the learning process. Additionally, OT has been employed to identify discriminant or robust subspaces within datasets, offering useful insights. Moreover, the Wasserstein distance has found application in measuring similarity between word embeddings of documents, as well as comparing signals or spectra.\n", - "\n", - "Due to the inherent constraint in the problem, solving Optimal Transport (OT) can be quite challenging. As a result, in practical applications dealing with discrete distributions, a more manageable approach known as a linear program has been employed as a substitute. This approach corresponds to the Kantorovitch formulation, where the original Monge mapping, denoted as $m$ is replaced by a joint distribution represented by an OT matrix.\n", - "From the optimization problem described above, we can identify two primary components of the OT solution that have practical applications:\n", - "- The optimal value (Wasserstein distance): This quantifies the similarity between distributions. It is used to measure the dissimilarity or similarity between datasets or distributions. The Wasserstein distance represents the optimal value obtained from solving the OT problem.\n", - "- The optimal mapping (Monge mapping or OT matrix): This determines the correspondences between the distributions. It describes how the mass is transported between the source and target distributions. The optimal mapping can be utilized to transfer knowledge or information between distributions.\n", - "\n", - "In the first case, OT is employed to assess the similarity between distributions or datasets. Here, the Wasserstein distance, which is the optimal value obtained from solving the OT problem, is used as a measure of similarity. In the second case, the focus lies on understanding the specific manner in which mass is transferred between distributions, represented by the mapping. This mapping can be leveraged to facilitate the transfer of knowledge or information between the distributions.\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### OT for mapping estimation\n", - "\n", - "One fascinating aspect of the Optimal Transport (OT) problem is the inherent OT mapping. When computing the optimal transport between discrete distributions, one of the outputs is the OT matrix, which provides correspondences between the samples in each distribution.\n", - "\n", - "This correspondence is estimated based on the OT criterion and is obtained in a non-supervised manner. This characteristic makes it particularly intriguing for problems involving dataset transfer. OT has been employed, for instance, in performing color transfer between images or in the context of domain adaptation, where knowledge or information is transferred between different datasets.\n", - "\n", - "Furthermore, more recent applications have explored the extension of OT, known as Gromov-Wasserstein, to establish correspondences between languages using word embeddings. This utilization of OT enables the identification of connections or similarities between languages based on the distributional properties of word embeddings.\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Kantorovich optimal transport problem\n", - "\n", - "This is the most typical OT problem. It seeks an optimal coupling $\\boldsymbol{T}$ which minimizes the displacement cost of a discrete measure $\\boldsymbol{a}$ to a discrete measure $\\boldsymbol{b}$ with respect to a ground cost $\\boldsymbol{M} \\in \\mathbb{R}^{n_{1} \\times n_{2}}$. In order to be a transport plan, $\\boldsymbol{T}$ must be part of the set $\\Pi(\\mathbf{a}, \\mathbf{b})=\\left\\{\\boldsymbol{T} \\geq \\mathbf{0}, \\boldsymbol{T} \\mathbf{1}_{n_{2}}=\\boldsymbol{a}, \\boldsymbol{T}^{\\top} \\mathbf{1}_{n_{1}}=\\boldsymbol{b}\\right\\}$. When the ground cost is a metric, the optimal value of the OT problem is also a metric (Rubner et al., 2000; Cuturi and Avis, 2014) and is called the Wasserstein distance. In this discrete case, the OT problem is defined as\n", - "\n", - "$$\n", - "W_{M}(\\boldsymbol{a}, \\boldsymbol{b})=\\min _{\\boldsymbol{T} \\in \\Pi(\\mathbf{a}, \\mathbf{b})}\\langle\\boldsymbol{T}, \\boldsymbol{M}\\rangle\n", - "$$\n", - "\n", - "which is a linear program. The optimization problem above is often adapted to include a regularization term for the transport plan $\\boldsymbol{T}$, such as entropic regularization (Cuturi, 2013) or squared L2. For the entropic regularized OT problem, one may use the Sinkhorn Knopp algorithm (or variants), or stochastic optimization algorithms. POT has a simple syntax to solve these problems." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Solving optimal transport\n", - "\n", - "The optimal transport problem between discrete distributions is often expressed as\n", - "$$\n", - "\\begin{array}{r}\n", - "\\gamma^*=\\arg \\min _{\\gamma \\in \\mathbb{R}_{+}^{m \\times n}} \\sum_{i, j} \\gamma_{i, j} M_{i, j} \\\\\n", - "\\text { s.t. } \\gamma 1=a ; \\gamma^T 1=b ; \\gamma \\geq 0\n", - "\\end{array}\n", - "$$\n", - "where:\n", - "- $M \\in \\mathbb{R}_{+}^{m \\times n}$ is the metric cost matrix defining the cost to move mass from bin $a_i$ to bin $b_j$.\n", - "- $a$ and $b$ are histograms on the simplex (positive, sum to 1) that represent the weights of each samples in the source an target distributions.\n", - "Solving the linear program above can be done using the function ot.emd that will return the optimal transport matrix $\\gamma^*$ :" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The necessity and priority of usage\n", - "\n", - "The main difference between the MDSS and OT detectors is the range of applicability. MDSS scanner can in linear time decide, what the most anomalous subset is, while in the case of OT detector has a narrower but more accurate field of use. The method used can only account for 1-dimensional histograms as precisely described in \"POT: Python Optimal Transport\" https://jmlr.org/papers/v22/20-451.html. This means there is no possibility yet to handle datasets of more than 1 feature. For more dimensions, there is a prospect for the Sinkhorn algorithm, which will be implemented in the nearest future. As compared to MDSS, the user needs to specify the sensitive attribute for which wants to know the bias. \n", - "\n", - "So if the user is interested in obtaining a certain bias on a particular parameter, it is better to use earth_movers_distance. But if it is more important to see the general picture for all parameters on bias presence, then it is better to use MDSS.\n", - "\n", - "The matrix that is obtained from the emd method is called a transport plan in the OT framework (the gamma matrix in https://pythonot.github.io/all.html?highlight=emd#ot.emd). It is the matrix which minimizes the transportation cost, and workable to evaluate the actual Wasserstein distance by taking the Frobenius product (see here for instance: https://en.wikipedia.org/wiki/Frobenius_inner_product) with the metric cost matrix, M." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "e3a8dcdfdfc9ccb9b75f5bdad7d0512468824af451b906a037b07d69b0e56c16" + } + }, + "colab": { + "provenance": [] + } }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "e3a8dcdfdfc9ccb9b75f5bdad7d0512468824af451b906a037b07d69b0e56c16" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/demo_reject_option_classification.ipynb b/examples/demo_reject_option_classification.ipynb index 004b0ec1..47724ac2 100644 --- a/examples/demo_reject_option_classification.ipynb +++ b/examples/demo_reject_option_classification.ipynb @@ -1,804 +1,888 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### This notebook demonstrates the use of the Reject Option Classification (ROC) post-processing algorithm for bias mitigation.\n", - "- The debiasing function used is implemented in the `RejectOptionClassification` class.\n", - "- Divide the dataset into training, validation, and testing partitions.\n", - "- Train classifier on original training data.\n", - "- Estimate the optimal classification threshold, that maximizes balanced accuracy without fairness constraints.\n", - "- Estimate the optimal classification threshold, and the critical region boundary (ROC margin) using a validation set for the desired constraint on fairness. The best parameters are those that maximize the classification threshold while satisfying the fairness constraints.\n", - "- The constraints can be used on the following fairness measures:\n", - " * Statistical parity difference on the predictions of the classifier\n", - " * Average odds difference for the classifier\n", - " * Equal opportunity difference for the classifier\n", - "- Determine the prediction scores for testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.\n", - "- Using the determined optimal classification threshold and the ROC margin, adjust the predictions. Report accuracy and fairness metric on the new predictions." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "# Load all necessary packages\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "import numpy as np\n", - "from tqdm import tqdm\n", - "from warnings import warn\n", - "\n", - "from aif360.datasets import BinaryLabelDataset\n", - "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", - "from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric\n", - "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\\\n", - " import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas\n", - "from aif360.algorithms.postprocessing.reject_option_classification\\\n", - " import RejectOptionClassification\n", - "from common_utils import compute_metrics\n", - "\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.metrics import accuracy_score\n", - "\n", - "from IPython.display import Markdown, display\n", - "import matplotlib.pyplot as plt\n", - "from ipywidgets import interactive, FloatSlider" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load dataset and specify options" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "## import dataset\n", - "dataset_used = \"adult\" # \"adult\", \"german\", \"compas\"\n", - "protected_attribute_used = 1 # 1, 2\n", - "\n", - "if dataset_used == \"adult\":\n", - "# dataset_orig = AdultDataset()\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_adult(['sex'])\n", - " else:\n", - " privileged_groups = [{'race': 1}]\n", - " unprivileged_groups = [{'race': 0}]\n", - " dataset_orig = load_preproc_data_adult(['race'])\n", - " \n", - "elif dataset_used == \"german\":\n", - "# dataset_orig = GermanDataset()\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_german(['sex'])\n", - " else:\n", - " privileged_groups = [{'age': 1}]\n", - " unprivileged_groups = [{'age': 0}]\n", - " dataset_orig = load_preproc_data_german(['age'])\n", - " \n", - "elif dataset_used == \"compas\":\n", - "# dataset_orig = CompasDataset()\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_compas(['sex'])\n", - " else:\n", - " privileged_groups = [{'race': 1}]\n", - " unprivileged_groups = [{'race': 0}] \n", - " dataset_orig = load_preproc_data_compas(['race'])\n", - "\n", - " \n", - "# Metric used (should be one of allowed_metrics)\n", - "metric_name = \"Statistical parity difference\"\n", - "\n", - "# Upper and lower bound on the fairness metric used\n", - "metric_ub = 0.05\n", - "metric_lb = -0.05\n", - " \n", - "#random seed for calibrated equal odds prediction\n", - "np.random.seed(1)\n", - "\n", - "# Verify metric name\n", - "allowed_metrics = [\"Statistical parity difference\",\n", - " \"Average odds difference\",\n", - " \"Equal opportunity difference\"]\n", - "if metric_name not in allowed_metrics:\n", - " raise ValueError(\"Metric name should be one of allowed metrics\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Split into train, test and validation" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the dataset and split into train and test\n", - "dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)\n", - "dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Clean up training data and display properties of the data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Training Dataset shape" + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_reject_option_classification.ipynb)" ], - "text/plain": [ - "" + "metadata": { + "id": "d7UMvwuqELCz" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s2106jSjEKag" + }, + "source": [ + "#### This notebook demonstrates the use of the Reject Option Classification (ROC) post-processing algorithm for bias mitigation.\n", + "- The debiasing function used is implemented in the `RejectOptionClassification` class.\n", + "- Divide the dataset into training, validation, and testing partitions.\n", + "- Train classifier on original training data.\n", + "- Estimate the optimal classification threshold, that maximizes balanced accuracy without fairness constraints.\n", + "- Estimate the optimal classification threshold, and the critical region boundary (ROC margin) using a validation set for the desired constraint on fairness. The best parameters are those that maximize the classification threshold while satisfying the fairness constraints.\n", + "- The constraints can be used on the following fairness measures:\n", + " * Statistical parity difference on the predictions of the classifier\n", + " * Average odds difference for the classifier\n", + " * Equal opportunity difference for the classifier\n", + "- Determine the prediction scores for testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.\n", + "- Using the determined optimal classification threshold and the ROC margin, adjust the predictions. Report accuracy and fairness metric on the new predictions." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(34189, 18)\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AEdfTuQQEKai" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "# Load all necessary packages\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from warnings import warn\n", + "\n", + "from aif360.datasets import BinaryLabelDataset\n", + "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", + "from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric\n", + "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\\\n", + " import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas\n", + "from aif360.algorithms.postprocessing.reject_option_classification\\\n", + " import RejectOptionClassification\n", + "from common_utils import compute_metrics\n", + "\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "from IPython.display import Markdown, display\n", + "import matplotlib.pyplot as plt\n", + "from ipywidgets import interactive, FloatSlider" + ] }, { - "data": { - "text/markdown": [ - "#### Favorable and unfavorable labels" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "CJ5WlRxlEKak" + }, + "source": [ + "#### Load dataset and specify options" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1.0, 0.0)\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vGB2s9vKEKak" + }, + "outputs": [], + "source": [ + "## import dataset\n", + "dataset_used = \"adult\" # \"adult\", \"german\", \"compas\"\n", + "protected_attribute_used = 1 # 1, 2\n", + "\n", + "if dataset_used == \"adult\":\n", + "# dataset_orig = AdultDataset()\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_adult(['sex'])\n", + " else:\n", + " privileged_groups = [{'race': 1}]\n", + " unprivileged_groups = [{'race': 0}]\n", + " dataset_orig = load_preproc_data_adult(['race'])\n", + "\n", + "elif dataset_used == \"german\":\n", + "# dataset_orig = GermanDataset()\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_german(['sex'])\n", + " else:\n", + " privileged_groups = [{'age': 1}]\n", + " unprivileged_groups = [{'age': 0}]\n", + " dataset_orig = load_preproc_data_german(['age'])\n", + "\n", + "elif dataset_used == \"compas\":\n", + "# dataset_orig = CompasDataset()\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_compas(['sex'])\n", + " else:\n", + " privileged_groups = [{'race': 1}]\n", + " unprivileged_groups = [{'race': 0}]\n", + " dataset_orig = load_preproc_data_compas(['race'])\n", + "\n", + "\n", + "# Metric used (should be one of allowed_metrics)\n", + "metric_name = \"Statistical parity difference\"\n", + "\n", + "# Upper and lower bound on the fairness metric used\n", + "metric_ub = 0.05\n", + "metric_lb = -0.05\n", + "\n", + "#random seed for calibrated equal odds prediction\n", + "np.random.seed(1)\n", + "\n", + "# Verify metric name\n", + "allowed_metrics = [\"Statistical parity difference\",\n", + " \"Average odds difference\",\n", + " \"Equal opportunity difference\"]\n", + "if metric_name not in allowed_metrics:\n", + " raise ValueError(\"Metric name should be one of allowed metrics\")" + ] }, { - "data": { - "text/markdown": [ - "#### Protected attribute names" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "MVg1w8UREKak" + }, + "source": [ + "#### Split into train, test and validation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TYG56PPqEKal" + }, + "outputs": [], + "source": [ + "# Get the dataset and split into train and test\n", + "dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)\n", + "dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['sex']\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "4MCJ47rEEKal" + }, + "source": [ + "#### Clean up training data and display properties of the data" + ] }, { - "data": { - "text/markdown": [ - "#### Privileged and unprivileged protected attribute values" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XyFgyqgyEKal", + "outputId": "0a6f20cc-ab05-4bb3-c6fe-75497f1e558e" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Training Dataset shape" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(34189, 18)\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Favorable and unfavorable labels" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1.0, 0.0)\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Protected attribute names" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sex']\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Privileged and unprivileged protected attribute values" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "([array([1.])], [array([0.])])\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Dataset feature names" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# print out some labels, names, etc.\n", + "display(Markdown(\"#### Training Dataset shape\"))\n", + "print(dataset_orig_train.features.shape)\n", + "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", + "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", + "display(Markdown(\"#### Protected attribute names\"))\n", + "print(dataset_orig_train.protected_attribute_names)\n", + "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", + "print(dataset_orig_train.privileged_protected_attributes,\n", + " dataset_orig_train.unprivileged_protected_attributes)\n", + "display(Markdown(\"#### Dataset feature names\"))\n", + "print(dataset_orig_train.feature_names)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "([array([1.])], [array([0.])])\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "komnLuKdEKam" + }, + "source": [ + "#### Metric for original training data" + ] }, { - "data": { - "text/markdown": [ - "#### Dataset feature names" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5o0lE46IEKam", + "outputId": "00af823c-3549-4def-f501-ab9501a91321" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Original training dataset" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.190698\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Original training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" - ] - } - ], - "source": [ - "# print out some labels, names, etc.\n", - "display(Markdown(\"#### Training Dataset shape\"))\n", - "print(dataset_orig_train.features.shape)\n", - "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", - "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", - "display(Markdown(\"#### Protected attribute names\"))\n", - "print(dataset_orig_train.protected_attribute_names)\n", - "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", - "print(dataset_orig_train.privileged_protected_attributes, \n", - " dataset_orig_train.unprivileged_protected_attributes)\n", - "display(Markdown(\"#### Dataset feature names\"))\n", - "print(dataset_orig_train.feature_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Metric for original training data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Original training dataset" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "i_E0KBPAEKan" + }, + "source": [ + "### Train classifier on original data" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Difference in mean outcomes between unprivileged and privileged groups = -0.190698\n" - ] - } - ], - "source": [ - "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Original training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train classifier on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Logistic regression classifier and predictions\n", - "scale_orig = StandardScaler()\n", - "X_train = scale_orig.fit_transform(dataset_orig_train.features)\n", - "y_train = dataset_orig_train.labels.ravel()\n", - "\n", - "lmod = LogisticRegression()\n", - "lmod.fit(X_train, y_train)\n", - "y_train_pred = lmod.predict(X_train)\n", - "\n", - "# positive class index\n", - "pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]\n", - "\n", - "dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)\n", - "dataset_orig_train_pred.labels = y_train_pred" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Obtain scores for validation and test sets" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)\n", - "X_valid = scale_orig.transform(dataset_orig_valid_pred.features)\n", - "y_valid = dataset_orig_valid_pred.labels\n", - "dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)\n", - "\n", - "dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)\n", - "X_test = scale_orig.transform(dataset_orig_test_pred.features)\n", - "y_test = dataset_orig_test_pred.labels\n", - "dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find the optimal parameters from the validation set" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Best threshold for classification only (no fairness)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best balanced accuracy (no fairness constraints) = 0.7473\n", - "Optimal classification threshold (no fairness constraints) = 0.2674\n" - ] - } - ], - "source": [ - "num_thresh = 100\n", - "ba_arr = np.zeros(num_thresh)\n", - "class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)\n", - "for idx, class_thresh in enumerate(class_thresh_arr):\n", - " \n", - " fav_inds = dataset_orig_valid_pred.scores > class_thresh\n", - " dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", - " dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", - " \n", - " classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,\n", - " dataset_orig_valid_pred, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - " \n", - " ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\\\n", - " +classified_metric_orig_valid.true_negative_rate())\n", - "\n", - "best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]\n", - "best_class_thresh = class_thresh_arr[best_ind]\n", - "\n", - "print(\"Best balanced accuracy (no fairness constraints) = %.4f\" % np.max(ba_arr))\n", - "print(\"Optimal classification threshold (no fairness constraints) = %.4f\" % best_class_thresh)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Estimate optimal parameters for the ROC method" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups, \n", - " privileged_groups=privileged_groups, \n", - " low_class_thresh=0.01, high_class_thresh=0.99,\n", - " num_class_thresh=100, num_ROC_margin=50,\n", - " metric_name=metric_name,\n", - " metric_ub=metric_ub, metric_lb=metric_lb)\n", - "ROC = ROC.fit(dataset_orig_valid, dataset_orig_valid_pred)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Optimal classification threshold (with fairness constraints) = 0.5049\n", - "Optimal ROC margin = 0.1819\n" - ] - } - ], - "source": [ - "print(\"Optimal classification threshold (with fairness constraints) = %.4f\" % ROC.classification_threshold)\n", - "print(\"Optimal ROC margin = %.4f\" % ROC.ROC_margin)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predictions from Validation Set" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Validation set" - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5lY_98bAEKan" + }, + "outputs": [], + "source": [ + "# Logistic regression classifier and predictions\n", + "scale_orig = StandardScaler()\n", + "X_train = scale_orig.fit_transform(dataset_orig_train.features)\n", + "y_train = dataset_orig_train.labels.ravel()\n", + "\n", + "lmod = LogisticRegression()\n", + "lmod.fit(X_train, y_train)\n", + "y_train_pred = lmod.predict(X_train)\n", + "\n", + "# positive class index\n", + "pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]\n", + "\n", + "dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)\n", + "dataset_orig_train_pred.labels = y_train_pred" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "##### Raw predictions - No fairness constraints, only maximizing balanced accuracy" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "BDMk25uDEKan" + }, + "source": [ + "#### Obtain scores for validation and test sets" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Balanced accuracy = 0.7473\n", - "Statistical parity difference = -0.3703\n", - "Disparate impact = 0.2687\n", - "Average odds difference = -0.2910\n", - "Equal opportunity difference = -0.3066\n", - "Theil index = 0.1123\n" - ] - } - ], - "source": [ - "# Metrics for the test set\n", - "fav_inds = dataset_orig_valid_pred.scores > best_class_thresh\n", - "dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", - "dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", - "\n", - "display(Markdown(\"#### Validation set\"))\n", - "display(Markdown(\"##### Raw predictions - No fairness constraints, only maximizing balanced accuracy\"))\n", - "\n", - "metric_valid_bef = compute_metrics(dataset_orig_valid, dataset_orig_valid_pred, \n", - " unprivileged_groups, privileged_groups)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Validation set" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V5VmAEHaEKan" + }, + "outputs": [], + "source": [ + "dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)\n", + "X_valid = scale_orig.transform(dataset_orig_valid_pred.features)\n", + "y_valid = dataset_orig_valid_pred.labels\n", + "dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)\n", + "\n", + "dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)\n", + "X_test = scale_orig.transform(dataset_orig_test_pred.features)\n", + "y_test = dataset_orig_test_pred.labels\n", + "dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nT335R3VEKan" + }, + "source": [ + "### Find the optimal parameters from the validation set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0QP_Z4FfEKan" + }, + "source": [ + "#### Best threshold for classification only (no fairness)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TBCG4ciYEKao", + "outputId": "9e521331-9e23-4a65-8854-11f317c532dd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best balanced accuracy (no fairness constraints) = 0.7473\n", + "Optimal classification threshold (no fairness constraints) = 0.2674\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "num_thresh = 100\n", + "ba_arr = np.zeros(num_thresh)\n", + "class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)\n", + "for idx, class_thresh in enumerate(class_thresh_arr):\n", + "\n", + " fav_inds = dataset_orig_valid_pred.scores > class_thresh\n", + " dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", + " dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", + "\n", + " classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,\n", + " dataset_orig_valid_pred,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "\n", + " ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\\\n", + " +classified_metric_orig_valid.true_negative_rate())\n", + "\n", + "best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]\n", + "best_class_thresh = class_thresh_arr[best_ind]\n", + "\n", + "print(\"Best balanced accuracy (no fairness constraints) = %.4f\" % np.max(ba_arr))\n", + "print(\"Optimal classification threshold (no fairness constraints) = %.4f\" % best_class_thresh)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "##### Transformed predictions - With fairness constraints" + "cell_type": "markdown", + "metadata": { + "id": "uLSLLz9LEKao" + }, + "source": [ + "#### Estimate optimal parameters for the ROC method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CHnwHA7MEKao" + }, + "outputs": [], + "source": [ + "ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups,\n", + " low_class_thresh=0.01, high_class_thresh=0.99,\n", + " num_class_thresh=100, num_ROC_margin=50,\n", + " metric_name=metric_name,\n", + " metric_ub=metric_ub, metric_lb=metric_lb)\n", + "ROC = ROC.fit(dataset_orig_valid, dataset_orig_valid_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V_mjRe-MEKao", + "outputId": "45c22432-37ac-4653-cd78-18a4d0d2e8c3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimal classification threshold (with fairness constraints) = 0.5049\n", + "Optimal ROC margin = 0.1819\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "print(\"Optimal classification threshold (with fairness constraints) = %.4f\" % ROC.classification_threshold)\n", + "print(\"Optimal ROC margin = %.4f\" % ROC.ROC_margin)" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Balanced accuracy = 0.6051\n", - "Statistical parity difference = -0.0436\n", - "Disparate impact = 0.6107\n", - "Average odds difference = -0.0049\n", - "Equal opportunity difference = -0.0136\n", - "Theil index = 0.2184\n" - ] - } - ], - "source": [ - "# Transform the validation set\n", - "dataset_transf_valid_pred = ROC.predict(dataset_orig_valid_pred)\n", - "\n", - "display(Markdown(\"#### Validation set\"))\n", - "display(Markdown(\"##### Transformed predictions - With fairness constraints\"))\n", - "metric_valid_aft = compute_metrics(dataset_orig_valid, dataset_transf_valid_pred, \n", - " unprivileged_groups, privileged_groups)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# Testing: Check if the metric optimized has not become worse\n", - "assert np.abs(metric_valid_aft[metric_name]) <= np.abs(metric_valid_bef[metric_name])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predictions from Test Set" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Test set" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xrKBT_JtEKao" + }, + "source": [ + "### Predictions from Validation Set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jaNwAJVjEKap", + "outputId": "05ff8be0-0c2d-40c3-fb0f-422014a0c6f8" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Validation set" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "##### Raw predictions - No fairness constraints, only maximizing balanced accuracy" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Balanced accuracy = 0.7473\n", + "Statistical parity difference = -0.3703\n", + "Disparate impact = 0.2687\n", + "Average odds difference = -0.2910\n", + "Equal opportunity difference = -0.3066\n", + "Theil index = 0.1123\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Metrics for the test set\n", + "fav_inds = dataset_orig_valid_pred.scores > best_class_thresh\n", + "dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", + "dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", + "\n", + "display(Markdown(\"#### Validation set\"))\n", + "display(Markdown(\"##### Raw predictions - No fairness constraints, only maximizing balanced accuracy\"))\n", + "\n", + "metric_valid_bef = compute_metrics(dataset_orig_valid, dataset_orig_valid_pred,\n", + " unprivileged_groups, privileged_groups)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "##### Raw predictions - No fairness constraints, only maximizing balanced accuracy" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Cah4xxAXEKap", + "outputId": "3b2f3e3b-6970-4222-b404-852c55df3f52" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Validation set" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "##### Transformed predictions - With fairness constraints" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Balanced accuracy = 0.6051\n", + "Statistical parity difference = -0.0436\n", + "Disparate impact = 0.6107\n", + "Average odds difference = -0.0049\n", + "Equal opportunity difference = -0.0136\n", + "Theil index = 0.2184\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Transform the validation set\n", + "dataset_transf_valid_pred = ROC.predict(dataset_orig_valid_pred)\n", + "\n", + "display(Markdown(\"#### Validation set\"))\n", + "display(Markdown(\"##### Transformed predictions - With fairness constraints\"))\n", + "metric_valid_aft = compute_metrics(dataset_orig_valid, dataset_transf_valid_pred,\n", + " unprivileged_groups, privileged_groups)" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Balanced accuracy = 0.7417\n", - "Statistical parity difference = -0.3576\n", - "Disparate impact = 0.2774\n", - "Average odds difference = -0.3281\n", - "Equal opportunity difference = -0.4001\n", - "Theil index = 0.1128\n" - ] - } - ], - "source": [ - "# Metrics for the test set\n", - "fav_inds = dataset_orig_test_pred.scores > best_class_thresh\n", - "dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label\n", - "dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label\n", - "\n", - "display(Markdown(\"#### Test set\"))\n", - "display(Markdown(\"##### Raw predictions - No fairness constraints, only maximizing balanced accuracy\"))\n", - "\n", - "metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, \n", - " unprivileged_groups, privileged_groups)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Test set" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "URUEY2y5EKap" + }, + "outputs": [], + "source": [ + "# Testing: Check if the metric optimized has not become worse\n", + "assert np.abs(metric_valid_aft[metric_name]) <= np.abs(metric_valid_bef[metric_name])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5ZtFhlFIEKap" + }, + "source": [ + "### Predictions from Test Set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ylh6DYw6EKap", + "outputId": "0adb3168-8b07-40fd-8ab6-0cedf6f3e93d" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Test set" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "##### Raw predictions - No fairness constraints, only maximizing balanced accuracy" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Balanced accuracy = 0.7417\n", + "Statistical parity difference = -0.3576\n", + "Disparate impact = 0.2774\n", + "Average odds difference = -0.3281\n", + "Equal opportunity difference = -0.4001\n", + "Theil index = 0.1128\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Metrics for the test set\n", + "fav_inds = dataset_orig_test_pred.scores > best_class_thresh\n", + "dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label\n", + "dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label\n", + "\n", + "display(Markdown(\"#### Test set\"))\n", + "display(Markdown(\"##### Raw predictions - No fairness constraints, only maximizing balanced accuracy\"))\n", + "\n", + "metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred,\n", + " unprivileged_groups, privileged_groups)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/markdown": [ - "##### Transformed predictions - With fairness constraints" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0tf5iP1qEKap", + "outputId": "21b8acdc-8806-47bd-8b2b-d13aacc7a75a" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Test set" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "##### Transformed predictions - With fairness constraints" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Balanced accuracy = 0.5968\n", + "Statistical parity difference = -0.0340\n", + "Disparate impact = 0.6932\n", + "Average odds difference = -0.0151\n", + "Equal opportunity difference = -0.0415\n", + "Theil index = 0.2133\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Metrics for the transformed test set\n", + "dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)\n", + "\n", + "display(Markdown(\"#### Test set\"))\n", + "display(Markdown(\"##### Transformed predictions - With fairness constraints\"))\n", + "metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred,\n", + " unprivileged_groups, privileged_groups)" ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Balanced accuracy = 0.5968\n", - "Statistical parity difference = -0.0340\n", - "Disparate impact = 0.6932\n", - "Average odds difference = -0.0151\n", - "Equal opportunity difference = -0.0415\n", - "Theil index = 0.2133\n" - ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aAHHyobOEKaq" + }, + "source": [ + "# Summary of Optimal Parameters\n", + "We show the optimal parameters for all combinations of metrics optimized, datasets, and protected attributes below." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SdxWpeoLEKaq" + }, + "source": [ + "### Fairness Metric: Statistical parity difference, Accuracy Metric: Balanced accuracy\n", + "\n", + "#### Performance\n", + "\n", + "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", + "|-|-|-|-|-|-|-|-|-|\n", + "|Adult (Valid)|0.7473|0.6051|-0.3703|-0.0436|0.7473|0.6198|-0.2226|-0.0007|\n", + "|Adult (Test)|0.7417|0.5968|-0.3576|-0.0340|0.7417|0.6202|-0.2279|0.0006|\n", + "|German (Valid)|0.6930|0.6991|-0.0613|0.0429|0.6930|0.6607|-0.2525|-0.0328|\n", + "|German (Test)|0.6524|0.6460|-0.0025|0.0410|0.6524|0.6317|-0.3231|-0.1038|\n", + "|Compas (Valid)|0.6599|0.6400|-0.2802|0.0234|0.6599|0.6646|-0.3225|-0.0471|\n", + "|Compas (Test)|0.6774|0.6746|-0.2724|-0.0313|0.6774|0.6512|-0.2494|0.0578|\n", + "\n", + "#### Optimal Parameters\n", + "\n", + "| Dataset |Sex (Class. thresh.)|Sex (Class. thresh. - fairness)|Sex (ROC margin - fairness)| Race/Age (Class. thresh.)|Race/Age (Class. thresh. - fairness)|Race/Age (ROC margin - fairness)|\n", + "|-|-|-|-|-|-|-|\n", + "|Adult|0.2674|0.5049|0.1819|0.2674|0.5049|0.0808|\n", + "|German|0.6732|0.6237|0.0538|0.6732|0.7029|0.0728|\n", + "|Compas|0.5148|0.5841|0.0679|0.5148|0.5841|0.0679|" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gyGBhpfDEKaq" + }, + "source": [ + "### Fairness Metric: Average odds difference, Accuracy Metric: Balanced accuracy\n", + "\n", + "#### Performance\n", + "\n", + "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", + "|-|-|-|-|-|-|-|-|-|\n", + "|Adult (Valid)|0.7473|0.6058|-0.2910|-0.0385|0.7473|0.6593|-0.1947|-0.0444|\n", + "|Adult (Test)|0.7417|0.6024|-0.3281|-0.0438|0.7417|0.6611|-0.1991|-0.0121|\n", + "|German (Valid)|0.6930|0.6930|-0.0039|-0.0039|0.6930|0.6807|-0.0919|-0.0193|\n", + "|German (Test)|0.6524|0.6571|0.0071|0.0237|0.6524|0.6587|-0.3278|-0.2708|\n", + "|Compas (Valid)|0.6599|0.6416|-0.2285|-0.0332|0.6599|0.6646|-0.2918|-0.0105|\n", + "|Compas (Test)|0.6774|0.6721|-0.2439|-0.0716|0.6774|0.6512|-0.1927|0.1145|\n", + "\n", + "#### Optimal Parameters\n", + "\n", + "| Dataset |Sex (Class. thresh.)|Sex (Class. thresh. - fairness)|Sex (ROC margin - fairness)| Race/Age (Class. thresh.)|Race/Age (Class. thresh. - fairness)|Race/Age (ROC margin - fairness)|\n", + "|-|-|-|-|-|-|-|\n", + "|Adult|0.2674|0.5049|0.1212|0.2674|0.5049|0.0505|\n", + "|German|0.6732|0.6633|0.0137|0.6732|0.6732|0.0467|\n", + "|Compas|0.5148|0.5742|0.0608|0.5148|0.5841|0.0679|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uU7vi8qpEKaq" + }, + "source": [ + "### Fairness Metric: Equal opportunity difference, Accuracy Metric: Balanced accuracy\n", + "\n", + "#### Performance\n", + "\n", + "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", + "|-|-|-|-|-|-|-|-|-|\n", + "|Adult (Valid)|0.7473|0.6051|-0.3066|-0.0136|0.7473|0.6198|-0.2285|0.0287|\n", + "|Adult (Test)|0.7417|0.5968|-0.4001|-0.0415|0.7417|0.6202|-0.2165|0.1193|\n", + "|German (Valid)|0.6930|0.6930|-0.0347|-0.0347|0.6930|0.6597|0.1162|-0.0210|\n", + "|German (Test)|0.6524|0.6571|0.0400|0.0733|0.6524|0.6190|-0.3556|-0.4333|\n", + "|Compas (Valid)|0.6599|0.6416|-0.1938|0.0244|0.6599|0.6646|-0.2315|0.0002|\n", + "|Compas (Test)|0.6774|0.6721|-0.1392|0.0236|0.6774|0.6512|-0.1877|0.1196|\n", + "\n", + "#### Optimal Parameters\n", + "\n", + "| Dataset |Sex (Class. thresh.)|Sex (Class. thresh. - fairness)|Sex (ROC margin - fairness)| Race/Age (Class. thresh.)|Race/Age (Class. thresh. - fairness)|Race/Age (ROC margin - fairness)|\n", + "|-|-|-|-|-|-|-|\n", + "|Adult|0.2674|0.5049|0.1819|0.2674|0.5049|0.0808|\n", + "|German|0.6732|0.6633|0.0137|0.6732|0.6039|0.0000|\n", + "|Compas|0.5148|0.5742|0.0608|0.5148|0.5841|0.0679|\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zq-eTEOlEKaq" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "# Metrics for the transformed test set\n", - "dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)\n", - "\n", - "display(Markdown(\"#### Test set\"))\n", - "display(Markdown(\"##### Transformed predictions - With fairness constraints\"))\n", - "metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, \n", - " unprivileged_groups, privileged_groups)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Summary of Optimal Parameters\n", - "We show the optimal parameters for all combinations of metrics optimized, datasets, and protected attributes below." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fairness Metric: Statistical parity difference, Accuracy Metric: Balanced accuracy\n", - "\n", - "#### Performance\n", - "\n", - "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", - "|-|-|-|-|-|-|-|-|-|\n", - "|Adult (Valid)|0.7473|0.6051|-0.3703|-0.0436|0.7473|0.6198|-0.2226|-0.0007|\n", - "|Adult (Test)|0.7417|0.5968|-0.3576|-0.0340|0.7417|0.6202|-0.2279|0.0006|\n", - "|German (Valid)|0.6930|0.6991|-0.0613|0.0429|0.6930|0.6607|-0.2525|-0.0328|\n", - "|German (Test)|0.6524|0.6460|-0.0025|0.0410|0.6524|0.6317|-0.3231|-0.1038|\n", - "|Compas (Valid)|0.6599|0.6400|-0.2802|0.0234|0.6599|0.6646|-0.3225|-0.0471|\n", - "|Compas (Test)|0.6774|0.6746|-0.2724|-0.0313|0.6774|0.6512|-0.2494|0.0578|\n", - "\n", - "#### Optimal Parameters\n", - "\n", - "| Dataset |Sex (Class. thresh.)|Sex (Class. thresh. - fairness)|Sex (ROC margin - fairness)| Race/Age (Class. thresh.)|Race/Age (Class. thresh. - fairness)|Race/Age (ROC margin - fairness)|\n", - "|-|-|-|-|-|-|-|\n", - "|Adult|0.2674|0.5049|0.1819|0.2674|0.5049|0.0808|\n", - "|German|0.6732|0.6237|0.0538|0.6732|0.7029|0.0728|\n", - "|Compas|0.5148|0.5841|0.0679|0.5148|0.5841|0.0679|" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fairness Metric: Average odds difference, Accuracy Metric: Balanced accuracy\n", - "\n", - "#### Performance\n", - "\n", - "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", - "|-|-|-|-|-|-|-|-|-|\n", - "|Adult (Valid)|0.7473|0.6058|-0.2910|-0.0385|0.7473|0.6593|-0.1947|-0.0444|\n", - "|Adult (Test)|0.7417|0.6024|-0.3281|-0.0438|0.7417|0.6611|-0.1991|-0.0121|\n", - "|German (Valid)|0.6930|0.6930|-0.0039|-0.0039|0.6930|0.6807|-0.0919|-0.0193|\n", - "|German (Test)|0.6524|0.6571|0.0071|0.0237|0.6524|0.6587|-0.3278|-0.2708|\n", - "|Compas (Valid)|0.6599|0.6416|-0.2285|-0.0332|0.6599|0.6646|-0.2918|-0.0105|\n", - "|Compas (Test)|0.6774|0.6721|-0.2439|-0.0716|0.6774|0.6512|-0.1927|0.1145|\n", - "\n", - "#### Optimal Parameters\n", - "\n", - "| Dataset |Sex (Class. thresh.)|Sex (Class. thresh. - fairness)|Sex (ROC margin - fairness)| Race/Age (Class. thresh.)|Race/Age (Class. thresh. - fairness)|Race/Age (ROC margin - fairness)|\n", - "|-|-|-|-|-|-|-|\n", - "|Adult|0.2674|0.5049|0.1212|0.2674|0.5049|0.0505|\n", - "|German|0.6732|0.6633|0.0137|0.6732|0.6732|0.0467|\n", - "|Compas|0.5148|0.5742|0.0608|0.5148|0.5841|0.0679|\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fairness Metric: Equal opportunity difference, Accuracy Metric: Balanced accuracy\n", - "\n", - "#### Performance\n", - "\n", - "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", - "|-|-|-|-|-|-|-|-|-|\n", - "|Adult (Valid)|0.7473|0.6051|-0.3066|-0.0136|0.7473|0.6198|-0.2285|0.0287|\n", - "|Adult (Test)|0.7417|0.5968|-0.4001|-0.0415|0.7417|0.6202|-0.2165|0.1193|\n", - "|German (Valid)|0.6930|0.6930|-0.0347|-0.0347|0.6930|0.6597|0.1162|-0.0210|\n", - "|German (Test)|0.6524|0.6571|0.0400|0.0733|0.6524|0.6190|-0.3556|-0.4333|\n", - "|Compas (Valid)|0.6599|0.6416|-0.1938|0.0244|0.6599|0.6646|-0.2315|0.0002|\n", - "|Compas (Test)|0.6774|0.6721|-0.1392|0.0236|0.6774|0.6512|-0.1877|0.1196|\n", - "\n", - "#### Optimal Parameters\n", - "\n", - "| Dataset |Sex (Class. thresh.)|Sex (Class. thresh. - fairness)|Sex (ROC margin - fairness)| Race/Age (Class. thresh.)|Race/Age (Class. thresh. - fairness)|Race/Age (ROC margin - fairness)|\n", - "|-|-|-|-|-|-|-|\n", - "|Adult|0.2674|0.5049|0.1819|0.2674|0.5049|0.0808|\n", - "|German|0.6732|0.6633|0.0137|0.6732|0.6039|0.0000|\n", - "|Compas|0.5148|0.5742|0.0608|0.5148|0.5841|0.0679|\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file diff --git a/examples/demo_reweighing_preproc.ipynb b/examples/demo_reweighing_preproc.ipynb index 25b3a9af..3af0fb92 100644 --- a/examples/demo_reweighing_preproc.ipynb +++ b/examples/demo_reweighing_preproc.ipynb @@ -1,956 +1,1067 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### This notebook demonstrates the use of a reweighing pre-processing algorithm for bias mitigation\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "# Load all necessary packages\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "import numpy as np\n", - "from tqdm import tqdm\n", - "\n", - "from aif360.datasets import BinaryLabelDataset\n", - "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from aif360.metrics import ClassificationMetric\n", - "from aif360.algorithms.preprocessing.reweighing import Reweighing\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\\\n", - " import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.metrics import accuracy_score\n", - "\n", - "from IPython.display import Markdown, display\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from common_utils import compute_metrics" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load dataset and set options" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "## import dataset\n", - "dataset_used = \"adult\" # \"adult\", \"german\", \"compas\"\n", - "protected_attribute_used = 1 # 1, 2\n", - "\n", - "\n", - "if dataset_used == \"adult\":\n", - "# dataset_orig = AdultDataset()\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_adult(['sex'])\n", - " else:\n", - " privileged_groups = [{'race': 1}]\n", - " unprivileged_groups = [{'race': 0}]\n", - " dataset_orig = load_preproc_data_adult(['race'])\n", - " \n", - "elif dataset_used == \"german\":\n", - "# dataset_orig = GermanDataset()\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_german(['sex'])\n", - " else:\n", - " privileged_groups = [{'age': 1}]\n", - " unprivileged_groups = [{'age': 0}]\n", - " dataset_orig = load_preproc_data_german(['age'])\n", - " \n", - "elif dataset_used == \"compas\":\n", - "# dataset_orig = CompasDataset()\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_compas(['sex'])\n", - " else:\n", - " privileged_groups = [{'race': 1}]\n", - " unprivileged_groups = [{'race': 0}]\n", - " dataset_orig = load_preproc_data_compas(['race'])\n", - "\n", - "all_metrics = [\"Statistical parity difference\",\n", - " \"Average odds difference\",\n", - " \"Equal opportunity difference\"]\n", - "\n", - "#random seed for calibrated equal odds prediction\n", - "np.random.seed(1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Split into train, and test" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the dataset and split into train and test\n", - "dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)\n", - "dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Clean up training data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Training Dataset shape" + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_reweighing_preproc.ipynb)" ], - "text/plain": [ - "" + "metadata": { + "id": "kld2ROMiEqjG" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2wa7Lj9UEqAS" + }, + "source": [ + "#### This notebook demonstrates the use of a reweighing pre-processing algorithm for bias mitigation\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Hb5z2N-nEqAU" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "# Load all necessary packages\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "\n", + "from aif360.datasets import BinaryLabelDataset\n", + "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from aif360.metrics import ClassificationMetric\n", + "from aif360.algorithms.preprocessing.reweighing import Reweighing\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\\\n", + " import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "from IPython.display import Markdown, display\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from common_utils import compute_metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z1Q8uhveEqAV" + }, + "source": [ + "#### Load dataset and set options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aITNAPt7EqAV" + }, + "outputs": [], + "source": [ + "## import dataset\n", + "dataset_used = \"adult\" # \"adult\", \"german\", \"compas\"\n", + "protected_attribute_used = 1 # 1, 2\n", + "\n", + "\n", + "if dataset_used == \"adult\":\n", + "# dataset_orig = AdultDataset()\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_adult(['sex'])\n", + " else:\n", + " privileged_groups = [{'race': 1}]\n", + " unprivileged_groups = [{'race': 0}]\n", + " dataset_orig = load_preproc_data_adult(['race'])\n", + "\n", + "elif dataset_used == \"german\":\n", + "# dataset_orig = GermanDataset()\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_german(['sex'])\n", + " else:\n", + " privileged_groups = [{'age': 1}]\n", + " unprivileged_groups = [{'age': 0}]\n", + " dataset_orig = load_preproc_data_german(['age'])\n", + "\n", + "elif dataset_used == \"compas\":\n", + "# dataset_orig = CompasDataset()\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_compas(['sex'])\n", + " else:\n", + " privileged_groups = [{'race': 1}]\n", + " unprivileged_groups = [{'race': 0}]\n", + " dataset_orig = load_preproc_data_compas(['race'])\n", + "\n", + "all_metrics = [\"Statistical parity difference\",\n", + " \"Average odds difference\",\n", + " \"Equal opportunity difference\"]\n", + "\n", + "#random seed for calibrated equal odds prediction\n", + "np.random.seed(1)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(34189, 18)\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "1gdWRyHiEqAV" + }, + "source": [ + "#### Split into train, and test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-y91WRx8EqAW" + }, + "outputs": [], + "source": [ + "# Get the dataset and split into train and test\n", + "dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)\n", + "dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jS5e2yI1EqAW" + }, + "source": [ + "#### Clean up training data" + ] }, { - "data": { - "text/markdown": [ - "#### Favorable and unfavorable labels" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hNo6RAZVEqAW", + "outputId": "9048b4d5-2c26-45c4-884b-48b503483891" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Training Dataset shape" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(34189, 18)\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Favorable and unfavorable labels" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1.0, 0.0)\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Protected attribute names" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sex']\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Privileged and unprivileged protected attribute values" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "([array([1.])], [array([0.])])\n" + ] + }, + { + "data": { + "text/markdown": [ + "#### Dataset feature names" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# print out some labels, names, etc.\n", + "display(Markdown(\"#### Training Dataset shape\"))\n", + "print(dataset_orig_train.features.shape)\n", + "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", + "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", + "display(Markdown(\"#### Protected attribute names\"))\n", + "print(dataset_orig_train.protected_attribute_names)\n", + "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", + "print(dataset_orig_train.privileged_protected_attributes,\n", + " dataset_orig_train.unprivileged_protected_attributes)\n", + "display(Markdown(\"#### Dataset feature names\"))\n", + "print(dataset_orig_train.feature_names)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1.0, 0.0)\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "2BpxVxkREqAX" + }, + "source": [ + "#### Metric for original training data" + ] }, { - "data": { - "text/markdown": [ - "#### Protected attribute names" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LXkJXK9uEqAX", + "outputId": "60fc518a-f5c8-462b-d1df-d9415104cb3f" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Original training dataset" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.190698\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Metric for the original dataset\n", + "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Original training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['sex']\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "SYH2uoveEqAX" + }, + "source": [ + "#### Train with and transform the original training data" + ] }, { - "data": { - "text/markdown": [ - "#### Privileged and unprivileged protected attribute values" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oriR9WsVEqAX" + }, + "outputs": [], + "source": [ + "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "RW.fit(dataset_orig_train)\n", + "dataset_transf_train = RW.transform(dataset_orig_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KzmPD_aFEqAX" + }, + "outputs": [], + "source": [ + "### Testing\n", + "assert np.abs(dataset_transf_train.instance_weights.sum()-dataset_orig_train.instance_weights.sum())<1e-6" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1jvdO21CEqAX" + }, + "source": [ + "#### Metric with the transformed training data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1k_NnwKHEqAY", + "outputId": "8b43d1c5-cd8c-4855-f3bb-138e84fa2708" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Transformed training dataset" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.000000\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Transformed training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_train.mean_difference())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vkbk8fMhEqAY" + }, + "outputs": [], + "source": [ + "### Testing\n", + "assert np.abs(metric_transf_train.mean_difference()) < 1e-6" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5BtkCp43EqAY" + }, + "source": [ + "### Train classifier on original data" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "([array([1.])], [array([0.])])\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i0GS7CqmEqAY" + }, + "outputs": [], + "source": [ + "# Logistic regression classifier and predictions\n", + "scale_orig = StandardScaler()\n", + "X_train = scale_orig.fit_transform(dataset_orig_train.features)\n", + "y_train = dataset_orig_train.labels.ravel()\n", + "w_train = dataset_orig_train.instance_weights.ravel()\n", + "\n", + "lmod = LogisticRegression()\n", + "lmod.fit(X_train, y_train,\n", + " sample_weight=dataset_orig_train.instance_weights)\n", + "y_train_pred = lmod.predict(X_train)\n", + "\n", + "# positive class index\n", + "pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]\n", + "\n", + "dataset_orig_train_pred = dataset_orig_train.copy()\n", + "dataset_orig_train_pred.labels = y_train_pred" + ] }, { - "data": { - "text/markdown": [ - "#### Dataset feature names" + "cell_type": "markdown", + "metadata": { + "id": "jkx3ZxtuEqAY" + }, + "source": [ + "#### Obtain scores for original validation and test sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t91rkccAEqAY" + }, + "outputs": [], + "source": [ + "dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)\n", + "X_valid = scale_orig.transform(dataset_orig_valid_pred.features)\n", + "y_valid = dataset_orig_valid_pred.labels\n", + "dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)\n", + "\n", + "dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)\n", + "X_test = scale_orig.transform(dataset_orig_test_pred.features)\n", + "y_test = dataset_orig_test_pred.labels\n", + "dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xYPWUCrLEqAY" + }, + "source": [ + "### Find the optimal classification threshold from the validation set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sltBSivLEqAY", + "outputId": "02ae46c7-41a9-494d-bf76-a9743a828482" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best balanced accuracy (no reweighing) = 0.7473\n", + "Optimal classification threshold (no reweighing) = 0.2674\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "num_thresh = 100\n", + "ba_arr = np.zeros(num_thresh)\n", + "class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)\n", + "for idx, class_thresh in enumerate(class_thresh_arr):\n", + "\n", + " fav_inds = dataset_orig_valid_pred.scores > class_thresh\n", + " dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", + " dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", + "\n", + " classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,\n", + " dataset_orig_valid_pred,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "\n", + " ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\\\n", + " +classified_metric_orig_valid.true_negative_rate())\n", + "\n", + "best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]\n", + "best_class_thresh = class_thresh_arr[best_ind]\n", + "\n", + "print(\"Best balanced accuracy (no reweighing) = %.4f\" % np.max(ba_arr))\n", + "print(\"Optimal classification threshold (no reweighing) = %.4f\" % best_class_thresh)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" - ] - } - ], - "source": [ - "# print out some labels, names, etc.\n", - "display(Markdown(\"#### Training Dataset shape\"))\n", - "print(dataset_orig_train.features.shape)\n", - "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", - "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", - "display(Markdown(\"#### Protected attribute names\"))\n", - "print(dataset_orig_train.protected_attribute_names)\n", - "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", - "print(dataset_orig_train.privileged_protected_attributes, \n", - " dataset_orig_train.unprivileged_protected_attributes)\n", - "display(Markdown(\"#### Dataset feature names\"))\n", - "print(dataset_orig_train.feature_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Metric for original training data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Original training dataset" + "cell_type": "markdown", + "metadata": { + "id": "1E85P7JpEqAY" + }, + "source": [ + "### Predictions from the original test set at the optimal classification threshold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8MQxVbSuEqAZ", + "outputId": "304dfde5-5248-43a4-f725-8277c110e9d9" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Predictions from original testing data" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 14%|█▍ | 14/100 [00:00<00:00, 134.43it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification threshold used = 0.2674\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 41%|████ | 41/100 [00:00<00:00, 131.04it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Balanced accuracy = 0.7417\n", + "Statistical parity difference = -0.3576\n", + "Disparate impact = 0.2774\n", + "Average odds difference = -0.3281\n", + "Equal opportunity difference = -0.4001\n", + "Theil index = 0.1128\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 67%|██████▋ | 67/100 [00:00<00:00, 127.49it/s]../aif360/metrics/dataset_metric.py:94: RuntimeWarning: invalid value encountered in double_scalars\n", + " return metric_fun(privileged=False) / metric_fun(privileged=True)\n", + "100%|██████████| 100/100 [00:00<00:00, 121.44it/s]\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "display(Markdown(\"#### Predictions from original testing data\"))\n", + "bal_acc_arr_orig = []\n", + "disp_imp_arr_orig = []\n", + "avg_odds_diff_arr_orig = []\n", + "\n", + "print(\"Classification threshold used = %.4f\" % best_class_thresh)\n", + "for thresh in tqdm(class_thresh_arr):\n", + "\n", + " if thresh == best_class_thresh:\n", + " disp = True\n", + " else:\n", + " disp = False\n", + "\n", + " fav_inds = dataset_orig_test_pred.scores > thresh\n", + " dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label\n", + " dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label\n", + "\n", + " metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred,\n", + " unprivileged_groups, privileged_groups,\n", + " disp = disp)\n", + "\n", + " bal_acc_arr_orig.append(metric_test_bef[\"Balanced accuracy\"])\n", + " avg_odds_diff_arr_orig.append(metric_test_bef[\"Average odds difference\"])\n", + " disp_imp_arr_orig.append(metric_test_bef[\"Disparate impact\"])" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Difference in mean outcomes between unprivileged and privileged groups = -0.190698\n" - ] - } - ], - "source": [ - "# Metric for the original dataset\n", - "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Original training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Train with and transform the original training data" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "RW.fit(dataset_orig_train)\n", - "dataset_transf_train = RW.transform(dataset_orig_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "### Testing \n", - "assert np.abs(dataset_transf_train.instance_weights.sum()-dataset_orig_train.instance_weights.sum())<1e-6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Metric with the transformed training data" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Transformed training dataset" + "cell_type": "markdown", + "metadata": { + "id": "RXJvR_iGEqAZ" + }, + "source": [ + "#### Display results for all thresholds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cnCff_lAEqAZ", + "outputId": "b5c2884a-f512-4d87-c049-45e9743ba5d1" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_orig)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_orig)), color='r')\n", + "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", + "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Difference in mean outcomes between unprivileged and privileged groups = -0.000000\n" - ] - } - ], - "source": [ - "metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Transformed training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_train.mean_difference())" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "### Testing \n", - "assert np.abs(metric_transf_train.mean_difference()) < 1e-6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train classifier on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# Logistic regression classifier and predictions\n", - "scale_orig = StandardScaler()\n", - "X_train = scale_orig.fit_transform(dataset_orig_train.features)\n", - "y_train = dataset_orig_train.labels.ravel()\n", - "w_train = dataset_orig_train.instance_weights.ravel()\n", - "\n", - "lmod = LogisticRegression()\n", - "lmod.fit(X_train, y_train, \n", - " sample_weight=dataset_orig_train.instance_weights)\n", - "y_train_pred = lmod.predict(X_train)\n", - "\n", - "# positive class index\n", - "pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]\n", - "\n", - "dataset_orig_train_pred = dataset_orig_train.copy()\n", - "dataset_orig_train_pred.labels = y_train_pred" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Obtain scores for original validation and test sets" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)\n", - "X_valid = scale_orig.transform(dataset_orig_valid_pred.features)\n", - "y_valid = dataset_orig_valid_pred.labels\n", - "dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)\n", - "\n", - "dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)\n", - "X_test = scale_orig.transform(dataset_orig_test_pred.features)\n", - "y_test = dataset_orig_test_pred.labels\n", - "dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find the optimal classification threshold from the validation set" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best balanced accuracy (no reweighing) = 0.7473\n", - "Optimal classification threshold (no reweighing) = 0.2674\n" - ] - } - ], - "source": [ - "num_thresh = 100\n", - "ba_arr = np.zeros(num_thresh)\n", - "class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)\n", - "for idx, class_thresh in enumerate(class_thresh_arr):\n", - " \n", - " fav_inds = dataset_orig_valid_pred.scores > class_thresh\n", - " dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", - " dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", - " \n", - " classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,\n", - " dataset_orig_valid_pred, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - " \n", - " ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\\\n", - " +classified_metric_orig_valid.true_negative_rate())\n", - "\n", - "best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]\n", - "best_class_thresh = class_thresh_arr[best_ind]\n", - "\n", - "print(\"Best balanced accuracy (no reweighing) = %.4f\" % np.max(ba_arr))\n", - "print(\"Optimal classification threshold (no reweighing) = %.4f\" % best_class_thresh)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predictions from the original test set at the optimal classification threshold" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Predictions from original testing data" + "cell_type": "markdown", + "metadata": { + "id": "SVS2_UFuEqAZ" + }, + "source": [ + "```abs(1-disparate impact)``` must be small (close to 0) for classifier predictions to be fair.\n", + "\n", + "However, for a classifier trained with original training data, at the best classification rate, this is quite high. This implies unfairness." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XQXKx9LKEqAZ", + "outputId": "2cc1dfc3-0576-4183-f4e1-b6414ae5eb63" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 14%|█▍ | 14/100 [00:00<00:00, 134.43it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Classification threshold used = 0.2674\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 41%|████ | 41/100 [00:00<00:00, 131.04it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Balanced accuracy = 0.7417\n", - "Statistical parity difference = -0.3576\n", - "Disparate impact = 0.2774\n", - "Average odds difference = -0.3281\n", - "Equal opportunity difference = -0.4001\n", - "Theil index = 0.1128\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 67%|██████▋ | 67/100 [00:00<00:00, 127.49it/s]../aif360/metrics/dataset_metric.py:94: RuntimeWarning: invalid value encountered in double_scalars\n", - " return metric_fun(privileged=False) / metric_fun(privileged=True)\n", - "100%|██████████| 100/100 [00:00<00:00, 121.44it/s]\n" - ] - } - ], - "source": [ - "display(Markdown(\"#### Predictions from original testing data\"))\n", - "bal_acc_arr_orig = []\n", - "disp_imp_arr_orig = []\n", - "avg_odds_diff_arr_orig = []\n", - "\n", - "print(\"Classification threshold used = %.4f\" % best_class_thresh)\n", - "for thresh in tqdm(class_thresh_arr):\n", - " \n", - " if thresh == best_class_thresh:\n", - " disp = True\n", - " else:\n", - " disp = False\n", - " \n", - " fav_inds = dataset_orig_test_pred.scores > thresh\n", - " dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label\n", - " dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label\n", - " \n", - " metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, \n", - " unprivileged_groups, privileged_groups,\n", - " disp = disp)\n", - "\n", - " bal_acc_arr_orig.append(metric_test_bef[\"Balanced accuracy\"])\n", - " avg_odds_diff_arr_orig.append(metric_test_bef[\"Average odds difference\"])\n", - " disp_imp_arr_orig.append(metric_test_bef[\"Disparate impact\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display results for all thresholds" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_orig)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_orig)), color='r')\n", - "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", - "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```abs(1-disparate impact)``` must be small (close to 0) for classifier predictions to be fair.\n", - "\n", - "However, for a classifier trained with original training data, at the best classification rate, this is quite high. This implies unfairness." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_orig)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, avg_odds_diff_arr_orig, color='r')\n", - "ax2.set_ylabel('avg. odds diff.', color='r', fontsize=16, fontweight='bold')\n", - "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```average odds difference = 0.5((FPR_unpriv-FPR_priv)+(TPR_unpriv-TPR_priv))``` must be close to zero for the classifier to be fair.\n", - "\n", - "However, for a classifier trained with original training data, at the best classification rate, this is quite high. This implies unfairness." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train classifier on transformed data" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "scale_transf = StandardScaler()\n", - "X_train = scale_transf.fit_transform(dataset_transf_train.features)\n", - "y_train = dataset_transf_train.labels.ravel()\n", - "\n", - "lmod = LogisticRegression()\n", - "lmod.fit(X_train, y_train,\n", - " sample_weight=dataset_transf_train.instance_weights)\n", - "y_train_pred = lmod.predict(X_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Obtain scores for transformed test set" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_transf_test_pred = dataset_orig_test.copy(deepcopy=True)\n", - "X_test = scale_transf.fit_transform(dataset_transf_test_pred.features)\n", - "y_test = dataset_transf_test_pred.labels\n", - "dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predictions from the transformed test set at the optimal classification threshold" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Predictions from transformed testing data" + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_orig)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, avg_odds_diff_arr_orig, color='r')\n", + "ax2.set_ylabel('avg. odds diff.', color='r', fontsize=16, fontweight='bold')\n", + "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dIWP-qLnEqAZ" + }, + "source": [ + "```average odds difference = 0.5((FPR_unpriv-FPR_priv)+(TPR_unpriv-TPR_priv))``` must be close to zero for the classifier to be fair.\n", + "\n", + "However, for a classifier trained with original training data, at the best classification rate, this is quite high. This implies unfairness." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Af7QD7isEqAZ" + }, + "source": [ + "### Train classifier on transformed data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GQMmlGUhEqAZ" + }, + "outputs": [], + "source": [ + "scale_transf = StandardScaler()\n", + "X_train = scale_transf.fit_transform(dataset_transf_train.features)\n", + "y_train = dataset_transf_train.labels.ravel()\n", + "\n", + "lmod = LogisticRegression()\n", + "lmod.fit(X_train, y_train,\n", + " sample_weight=dataset_transf_train.instance_weights)\n", + "y_train_pred = lmod.predict(X_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PwAsGmBjEqAZ" + }, + "source": [ + "#### Obtain scores for transformed test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hBUntbk8EqAa" + }, + "outputs": [], + "source": [ + "dataset_transf_test_pred = dataset_orig_test.copy(deepcopy=True)\n", + "X_test = scale_transf.fit_transform(dataset_transf_test_pred.features)\n", + "y_test = dataset_transf_test_pred.labels\n", + "dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JqtLDy-qEqAa" + }, + "source": [ + "### Predictions from the transformed test set at the optimal classification threshold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "amJP3jbrEqAa", + "outputId": "bacea04d-b4e1-4f69-d25e-9f9a69557199" + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Predictions from transformed testing data" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 13%|█▎ | 13/100 [00:00<00:00, 127.35it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification threshold used = 0.2674\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 41%|████ | 41/100 [00:00<00:00, 98.54it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Balanced accuracy = 0.7128\n", + "Statistical parity difference = -0.0906\n", + "Disparate impact = 0.7625\n", + "Average odds difference = -0.0266\n", + "Equal opportunity difference = -0.0518\n", + "Theil index = 0.1294\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 100/100 [00:01<00:00, 97.34it/s]\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "display(Markdown(\"#### Predictions from transformed testing data\"))\n", + "bal_acc_arr_transf = []\n", + "disp_imp_arr_transf = []\n", + "avg_odds_diff_arr_transf = []\n", + "\n", + "print(\"Classification threshold used = %.4f\" % best_class_thresh)\n", + "for thresh in tqdm(class_thresh_arr):\n", + "\n", + " if thresh == best_class_thresh:\n", + " disp = True\n", + " else:\n", + " disp = False\n", + "\n", + " fav_inds = dataset_transf_test_pred.scores > thresh\n", + " dataset_transf_test_pred.labels[fav_inds] = dataset_transf_test_pred.favorable_label\n", + " dataset_transf_test_pred.labels[~fav_inds] = dataset_transf_test_pred.unfavorable_label\n", + "\n", + " metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred,\n", + " unprivileged_groups, privileged_groups,\n", + " disp = disp)\n", + "\n", + " bal_acc_arr_transf.append(metric_test_aft[\"Balanced accuracy\"])\n", + " avg_odds_diff_arr_transf.append(metric_test_aft[\"Average odds difference\"])\n", + " disp_imp_arr_transf.append(metric_test_aft[\"Disparate impact\"])" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 13%|█▎ | 13/100 [00:00<00:00, 127.35it/s]" - ] + "cell_type": "markdown", + "metadata": { + "id": "66W_Hk_JEqAa" + }, + "source": [ + "#### Display results for all thresholds" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Classification threshold used = 0.2674\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0drHEcZoEqAa", + "outputId": "da8232fe-15c3-4c70-cd4a-3dee9a0f92d9" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')\n", + "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", + "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - " 41%|████ | 41/100 [00:00<00:00, 98.54it/s] " - ] + "cell_type": "markdown", + "metadata": { + "id": "iDSrQeV8EqAa" + }, + "source": [ + "```abs(1-disparate impact)``` must be small (close to 0) for classifier predictions to be fair.\n", + "\n", + "For a classifier trained with reweighted training data, at the best classification rate, this is indeed the case.\n", + "This implies fairness." + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Balanced accuracy = 0.7128\n", - "Statistical parity difference = -0.0906\n", - "Disparate impact = 0.7625\n", - "Average odds difference = -0.0266\n", - "Equal opportunity difference = -0.0518\n", - "Theil index = 0.1294\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sd_ijaPuEqAa", + "outputId": "11b66e0c-57e2-437c-eb2a-47320d354e41" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, avg_odds_diff_arr_transf, color='r')\n", + "ax2.set_ylabel('avg. odds diff.', color='r', fontsize=16, fontweight='bold')\n", + "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)" + ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 100/100 [00:01<00:00, 97.34it/s]\n" - ] - } - ], - "source": [ - "display(Markdown(\"#### Predictions from transformed testing data\"))\n", - "bal_acc_arr_transf = []\n", - "disp_imp_arr_transf = []\n", - "avg_odds_diff_arr_transf = []\n", - "\n", - "print(\"Classification threshold used = %.4f\" % best_class_thresh)\n", - "for thresh in tqdm(class_thresh_arr):\n", - " \n", - " if thresh == best_class_thresh:\n", - " disp = True\n", - " else:\n", - " disp = False\n", - " \n", - " fav_inds = dataset_transf_test_pred.scores > thresh\n", - " dataset_transf_test_pred.labels[fav_inds] = dataset_transf_test_pred.favorable_label\n", - " dataset_transf_test_pred.labels[~fav_inds] = dataset_transf_test_pred.unfavorable_label\n", - " \n", - " metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, \n", - " unprivileged_groups, privileged_groups,\n", - " disp = disp)\n", - "\n", - " bal_acc_arr_transf.append(metric_test_aft[\"Balanced accuracy\"])\n", - " avg_odds_diff_arr_transf.append(metric_test_aft[\"Average odds difference\"])\n", - " disp_imp_arr_transf.append(metric_test_aft[\"Disparate impact\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display results for all thresholds" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" + "cell_type": "markdown", + "metadata": { + "id": "Gl4oFd-LEqAf" + }, + "source": [ + "```average odds difference = 0.5((FPR_unpriv-FPR_priv)+(TPR_unpriv-TPR_priv))``` must be close to zero for the classifier to be fair.\n", + "\n", + "For a classifier trained with reweighted training data, at the best classification rate, this is indeed the case.\n", + "This implies fairness." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "mC9VED1KEqAf" + }, + "source": [ + "# Summary of Results\n", + "We show the optimal classification thresholds, and the fairness and accuracy metrics." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I93c6CY4EqAf" + }, + "source": [ + "### Classification Thresholds\n", + "\n", + "| Dataset |Classification threshold|\n", + "|-|-|\n", + "|Adult||0.2674|\n", + "|German|0.6732|\n", + "|Compas|0.5148|" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oT0aNe0OEqAf" + }, + "source": [ + "### Fairness Metric: Disparate impact, Accuracy Metric: Balanced accuracy\n", + "\n", + "#### Performance\n", + "\n", + "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", + "|-|-|-|-|-|-|-|-|-|\n", + "|Adult (Test)|0.7417|0.7128|0.2774|0.7625|0.7417|0.7443|0.4423|0.7430|\n", + "|German (Test)|0.6524|0.6460|0.9948|1.0852|0.6524|0.6460|0.3824|0.5735|\n", + "|Compas (Test)|0.6774|0.6562|0.6631|0.8342|0.6774|0.6342|0.6600|1.1062|\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VlTe6r2ZEqAf" + }, + "source": [ + "### Fairness Metric: Average odds difference, Accuracy Metric: Balanced accuracy\n", + "\n", + "#### Performance\n", + "\n", + "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", + "|-|-|-|-|-|-|-|-|-|\n", + "|Adult (Test)|0.7417|0.7128|-0.3281|-0.0266|0.7417|0.7443|-0.1991|-0.0395|\n", + "|German (Test)|0.6524|0.6460|0.0071|0.0550|0.6524|0.6460|-0.3278|-0.1944|\n", + "|Compas (Test)|0.6774|0.6562|-0.2439|-0.0946|0.6774|0.6342|-0.1927|0.1042|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uDz4X4JXEqAf" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')\n", - "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", - "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```abs(1-disparate impact)``` must be small (close to 0) for classifier predictions to be fair.\n", - "\n", - "For a classifier trained with reweighted training data, at the best classification rate, this is indeed the case.\n", - "This implies fairness." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, avg_odds_diff_arr_transf, color='r')\n", - "ax2.set_ylabel('avg. odds diff.', color='r', fontsize=16, fontweight='bold')\n", - "ax2.axvline(best_class_thresh, color='k', linestyle=':')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```average odds difference = 0.5((FPR_unpriv-FPR_priv)+(TPR_unpriv-TPR_priv))``` must be close to zero for the classifier to be fair.\n", - "\n", - "For a classifier trained with reweighted training data, at the best classification rate, this is indeed the case.\n", - "This implies fairness." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# Summary of Results\n", - "We show the optimal classification thresholds, and the fairness and accuracy metrics." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Classification Thresholds\n", - "\n", - "| Dataset |Classification threshold|\n", - "|-|-|\n", - "|Adult||0.2674|\n", - "|German|0.6732|\n", - "|Compas|0.5148|" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fairness Metric: Disparate impact, Accuracy Metric: Balanced accuracy\n", - "\n", - "#### Performance\n", - "\n", - "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", - "|-|-|-|-|-|-|-|-|-|\n", - "|Adult (Test)|0.7417|0.7128|0.2774|0.7625|0.7417|0.7443|0.4423|0.7430|\n", - "|German (Test)|0.6524|0.6460|0.9948|1.0852|0.6524|0.6460|0.3824|0.5735|\n", - "|Compas (Test)|0.6774|0.6562|0.6631|0.8342|0.6774|0.6342|0.6600|1.1062|\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fairness Metric: Average odds difference, Accuracy Metric: Balanced accuracy\n", - "\n", - "#### Performance\n", - "\n", - "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", - "|-|-|-|-|-|-|-|-|-|\n", - "|Adult (Test)|0.7417|0.7128|-0.3281|-0.0266|0.7417|0.7443|-0.1991|-0.0395|\n", - "|German (Test)|0.6524|0.6460|0.0071|0.0550|0.6524|0.6460|-0.3278|-0.1944|\n", - "|Compas (Test)|0.6774|0.6562|-0.2439|-0.0946|0.6774|0.6342|-0.1927|0.1042|" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file diff --git a/examples/demo_short_gerryfair_test.ipynb b/examples/demo_short_gerryfair_test.ipynb index 281dee28..e7dca201 100644 --- a/examples/demo_short_gerryfair_test.ipynb +++ b/examples/demo_short_gerryfair_test.ipynb @@ -1,183 +1,202 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "pycharm": { - "is_executing": false - } - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "iteration: 1, error: 0.229, fairness violation: 0.05428400000000001, violated group size: 0.249\n", - "iteration: 2, error: 0.3645, fairness violation: 0.027142000000000006, violated group size: 0.249\n", - "iteration: 3, error: 0.4096666666666666, fairness violation: 0.01809466666666667, violated group size: 0.251\n", - "iteration: 4, error: 0.43225, fairness violation: 0.013571000000000003, violated group size: 0.249\n", - "iteration: 5, error: 0.44580000000000014, fairness violation: 0.0108568, violated group size: 0.251\n", - "iteration: 6, error: 0.4548333333333334, fairness violation: 0.009047333333333338, violated group size: 0.251\n", - "iteration: 7, error: 0.46128571428571435, fairness violation: 0.007754857142857144, violated group size: 0.251\n", - "iteration: 8, error: 0.466125, fairness violation: 0.006785500000000003, violated group size: 0.251\n", - "iteration: 9, error: 0.469888888888889, fairness violation: 0.006031555555555558, violated group size: 0.249\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "from aif360.algorithms.inprocessing import GerryFairClassifier\n", - "from aif360.algorithms.inprocessing.gerryfair.clean import array_to_tuple\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", - "from sklearn import svm\n", - "from sklearn import tree\n", - "from sklearn.kernel_ridge import KernelRidge\n", - "from sklearn import linear_model\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from IPython.display import Image\n", - "import pickle\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# load data set\n", - "data_set = load_preproc_data_adult(sub_samp=1000, balance=True)\n", - "max_iterations = 10\n", - "C = 100\n", - "print_flag = True\n", - "gamma = .005\n", - "\n", - "fair_model = GerryFairClassifier(C=C, printflag=print_flag, gamma=gamma, fairness_def='FP',\n", - " max_iters=max_iterations, heatmapflag=False)\n", - "# fit method\n", - "fair_model.fit(data_set, early_termination=True)\n", - "\n", - "# predict method. If threshold in (0, 1) produces binary predictions\n", - "dataset_yhat = fair_model.predict(data_set, threshold=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "pycharm": { - "is_executing": false - } - }, - "outputs": [ + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/demo_short_gerryfair_test.ipynb)" + ], + "metadata": { + "id": "qsaaqV7jGtYV" + } + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.0060315555555555565\n" - ] - } - ], - "source": [ - "# auditing \n", - "\n", - "gerry_metric = BinaryLabelDatasetMetric(data_set)\n", - "gamma_disparity = gerry_metric.rich_subgroup(array_to_tuple(dataset_yhat.labels), 'FP')\n", - "print(gamma_disparity)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "pycharm": { - "is_executing": false - } - }, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": false + }, + "id": "9Cb8bfPXGUa0", + "outputId": "165bfd3e-1281-4b34-d21d-137182075fab" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration: 1, error: 0.229, fairness violation: 0.05428400000000001, violated group size: 0.249\n", + "iteration: 2, error: 0.3645, fairness violation: 0.027142000000000006, violated group size: 0.249\n", + "iteration: 3, error: 0.4096666666666666, fairness violation: 0.01809466666666667, violated group size: 0.251\n", + "iteration: 4, error: 0.43225, fairness violation: 0.013571000000000003, violated group size: 0.249\n", + "iteration: 5, error: 0.44580000000000014, fairness violation: 0.0108568, violated group size: 0.251\n", + "iteration: 6, error: 0.4548333333333334, fairness violation: 0.009047333333333338, violated group size: 0.251\n", + "iteration: 7, error: 0.46128571428571435, fairness violation: 0.007754857142857144, violated group size: 0.251\n", + "iteration: 8, error: 0.466125, fairness violation: 0.006785500000000003, violated group size: 0.251\n", + "iteration: 9, error: 0.469888888888889, fairness violation: 0.006031555555555558, violated group size: 0.249\n" + ] + } + ], + "source": [ + "%matplotlib inline\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "from aif360.algorithms.inprocessing import GerryFairClassifier\n", + "from aif360.algorithms.inprocessing.gerryfair.clean import array_to_tuple\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult\n", + "from sklearn import svm\n", + "from sklearn import tree\n", + "from sklearn.kernel_ridge import KernelRidge\n", + "from sklearn import linear_model\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from IPython.display import Image\n", + "import pickle\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# load data set\n", + "data_set = load_preproc_data_adult(sub_samp=1000, balance=True)\n", + "max_iterations = 10\n", + "C = 100\n", + "print_flag = True\n", + "gamma = .005\n", + "\n", + "fair_model = GerryFairClassifier(C=C, printflag=print_flag, gamma=gamma, fairness_def='FP',\n", + " max_iters=max_iterations, heatmapflag=False)\n", + "# fit method\n", + "fair_model.fit(data_set, early_termination=True)\n", + "\n", + "# predict method. If threshold in (0, 1) produces binary predictions\n", + "dataset_yhat = fair_model.predict(data_set, threshold=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": false + }, + "id": "cCdltbMWGUa2", + "outputId": "0ab1dd98-6cae-45a1-e51c-41ef9b7193df" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0060315555555555565\n" + ] + } + ], + "source": [ + "# auditing\n", + "\n", + "gerry_metric = BinaryLabelDatasetMetric(data_set)\n", + "gamma_disparity = gerry_metric.rich_subgroup(array_to_tuple(dataset_yhat.labels), 'FP')\n", + "print(gamma_disparity)\n", + "\n" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Curr Predictor: Linear\n", - "Curr Predictor: SVR\n", - "Curr Predictor: Tree\n", - "Curr Predictor: Kernel\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": false + }, + "id": "on2jaBgsGUa3", + "outputId": "ed58df2b-2288-491b-bf07-ad5d783baa06" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Curr Predictor: Linear\n", + "Curr Predictor: SVR\n", + "Curr Predictor: Tree\n", + "Curr Predictor: Kernel\n" + ] + } + ], + "source": [ + "# set to 10 iterations for fast running of notebook - set >= 1000 when running real experiments\n", + "# tests learning with different hypothesis classes\n", + "pareto_iters = 10\n", + "def multiple_classifiers_pareto(dataset, gamma_list=[0.002, 0.005, 0.01], save_results=False, iters=pareto_iters):\n", + "\n", + " ln_predictor = linear_model.LinearRegression()\n", + " svm_predictor = svm.LinearSVR()\n", + " tree_predictor = tree.DecisionTreeRegressor(max_depth=3)\n", + " kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')\n", + " predictor_dict = {'Linear': {'predictor': ln_predictor, 'iters': iters},\n", + " 'SVR': {'predictor': svm_predictor, 'iters': iters},\n", + " 'Tree': {'predictor': tree_predictor, 'iters': iters},\n", + " 'Kernel': {'predictor': kernel_predictor, 'iters': iters}}\n", + "\n", + " results_dict = {}\n", + "\n", + " for pred in predictor_dict:\n", + " print('Curr Predictor: {}'.format(pred))\n", + " predictor = predictor_dict[pred]['predictor']\n", + " max_iters = predictor_dict[pred]['iters']\n", + " fair_clf = GerryFairClassifier(C=100, printflag=True, gamma=1, predictor=predictor, max_iters=max_iters)\n", + " fair_clf.printflag = False\n", + " fair_clf.max_iters=max_iters\n", + " errors, fp_violations, fn_violations = fair_clf.pareto(dataset, gamma_list)\n", + " results_dict[pred] = {'errors': errors, 'fp_violations': fp_violations, 'fn_violations': fn_violations}\n", + " if save_results:\n", + " pickle.dump(results_dict, open('results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) + '.pkl', 'wb'))\n", + "\n", + "multiple_classifiers_pareto(data_set)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": false + }, + "id": "tgGHGbQZGUa3" + }, + "outputs": [], + "source": [] } - ], - "source": [ - "# set to 10 iterations for fast running of notebook - set >= 1000 when running real experiments\n", - "# tests learning with different hypothesis classes\n", - "pareto_iters = 10\n", - "def multiple_classifiers_pareto(dataset, gamma_list=[0.002, 0.005, 0.01], save_results=False, iters=pareto_iters):\n", - "\n", - " ln_predictor = linear_model.LinearRegression()\n", - " svm_predictor = svm.LinearSVR()\n", - " tree_predictor = tree.DecisionTreeRegressor(max_depth=3)\n", - " kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')\n", - " predictor_dict = {'Linear': {'predictor': ln_predictor, 'iters': iters},\n", - " 'SVR': {'predictor': svm_predictor, 'iters': iters},\n", - " 'Tree': {'predictor': tree_predictor, 'iters': iters},\n", - " 'Kernel': {'predictor': kernel_predictor, 'iters': iters}}\n", - "\n", - " results_dict = {}\n", - "\n", - " for pred in predictor_dict:\n", - " print('Curr Predictor: {}'.format(pred))\n", - " predictor = predictor_dict[pred]['predictor']\n", - " max_iters = predictor_dict[pred]['iters']\n", - " fair_clf = GerryFairClassifier(C=100, printflag=True, gamma=1, predictor=predictor, max_iters=max_iters)\n", - " fair_clf.printflag = False\n", - " fair_clf.max_iters=max_iters\n", - " errors, fp_violations, fn_violations = fair_clf.pareto(dataset, gamma_list)\n", - " results_dict[pred] = {'errors': errors, 'fp_violations': fp_violations, 'fn_violations': fn_violations}\n", - " if save_results:\n", - " pickle.dump(results_dict, open('results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) + '.pkl', 'wb'))\n", - "\n", - "multiple_classifiers_pareto(data_set)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, "pycharm": { - "is_executing": false + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + }, + "colab": { + "provenance": [] } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/tutorial_medical_expenditure.ipynb b/examples/tutorial_medical_expenditure.ipynb index 8f8d7186..96d823eb 100644 --- a/examples/tutorial_medical_expenditure.ipynb +++ b/examples/tutorial_medical_expenditure.ipynb @@ -1,2242 +1,2565 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Medical Expenditure Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## This tutorial demonstrates classification model learning with bias mitigation as a part of a Care Management use case using Medical Expenditure data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The notebook demonstrates how the AIF 360 toolkit can be used to detect and reduce bias when learning classifiers using a variety of fairness metrics and algorithms . It also demonstrates how explanations can be generated for predictions made by models learnt with the toolkit using LIME.\n", - "\n", - "Classifiers are built using Logistic Regression as well as Random Forests.\n", - "\n", - "Bias detection is demonstrated using several metrics, including disparate impact, average odds difference, statistical parity difference, equal opportunity difference, and Theil index.\n", - "\n", - "Bias alleviation is explored via a variety of methods, including reweighing (pre-processing algorithm), prejudice remover (in-processing algorithm), and disparate impact remover (pre-processing technique).\n", - "\n", - "Data from the [Medical Expenditure Panel Survey](https://meps.ahrq.gov/mepsweb/) is used in this tutorial. See [Section 2](#2.-Data-used) below for more details.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Table of Contents" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To return to the table of contents, click on the number at any major section heading.\n", - "\n", - "[1. Use case](#1.-Use-case)\n", - "\n", - "[2. Data used](#2.-Data-used)\n", - "\n", - "[3. Training models without debiasing](#3.-Training-models-on-original-2015-Panel-19-data)\n", - "\n", - "[4. Reweighing (pre-processing bias mitigation)](#4.-Bias-mitigation-using-pre-processing-technique---Reweighing)\n", - "\n", - "[5. Prejudice Remover (in-processing bias mitigation)](#5.-Bias-mitigation-using-in-processing-technique---Prejudice-Remover-(PR))\n", - "\n", - "[6. Summary of results](#6.-Summary-of-Model-Learning-Results)\n", - "\n", - "[7. Deploying model](#7.-Deploying-model)\n", - "\n", - "[8. Generating explanations for model predictions using LIME](#8.-Generating-explanations-for-model-predictions-using-LIME)\n", - "\n", - "[9. Re-deploying Model](#9.-Re-deploying-Model)\n", - "\n", - "[10. Overall Summary](#10.-SUMMARY)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [1.](#Table-of-Contents) Use case" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to demonstrate how AIF 360 can be used to detect and mitigate bias in classfier models, we adopt the following use case:\n", - "\n", - "1. a data scientist develops a 'fair' healthcare utilization scoring model with respect to defined protected classes. Fairness may be dictated by legal or government regulations, such as a requirement that additional care decisions be not predicated on factors such as race of the patient.\n", - "\n", - "\n", - "2. developer takes the model AND performance characteristics / specs of the model (e.g. accuracy, fairness tests, etc. basically the model factsheet) and deploys the model in an enterprise app that prioritizes cases for care management.\n", - "\n", - "\n", - "3. the app is put into production and starts scoring people and making recommendations. \n", - "\n", - "\n", - "4. explanations are generated for each recommendation\n", - "\n", - "\n", - "5. both recommendations and associated explanations are given to nurses as a part of the care management process. The nurses can evaluate the recommendations for quality and correctness and provide feedback.\n", - "\n", - "\n", - "6. nurse feedback as well as analysis of usage data with respect to specs of the model w.r.t accuracy and fairness is communicated to AI Ops specialist and LOB user periodically.\n", - "\n", - "\n", - "7. when significant drift in model specs relative to the model factsheet is observed, the model is sent back for retraining." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [2.](#Table-of-Contents) Data used" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The specific data used is the [2015 Full Year Consolidated Data File](https://meps.ahrq.gov/mepsweb/data_stats/download_data_files_detail.jsp?cboPufNumber=HC-181) as well as the [2016 Full Year Consolidated Data File](https://meps.ahrq.gov/mepsweb/data_stats/download_data_files_detail.jsp?cboPufNumber=HC-192)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The 2015 file contains data from rounds 3,4,5 of panel 19 (2014) and rounds 1,2,3 of panel 20 (2015). The 2016 file contains data from rounds 3,4,5 of panel 20 (2015) and rounds 1,2,3 of panel 21 (2016).\n", - "\n", - "For this demonstration, three datasets were constructed: one from panel 19, round 5 (used for learning models), one from panel 20, round 3 (used for deployment/testing of model - steps); the other from panel 21, round 3 (used for re-training and deployment/testing of updated model)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For each dataset, the sensitive attribute is 'RACE' constructed as follows: 'Whites' (privileged class) defined by the features RACEV2X = 1 (White) and HISPANX = 2 (non Hispanic); 'Non-Whites' that included everyone else. \n", - "\n", - "Along with race as the sensitive feature, other features used for modeling include demographics (such as age, gender, active duty status), physical/mental health assessments, diagnosis codes (such as history of diagnosis of cancer, or diabetes), and limitations (such as cognitive or hearing or vision limitation).\n", - "\n", - "To measure utilization, a composite feature, 'UTILIZATION', was created to measure the total number of trips requiring some sort of medical care by summing up the following features: OBTOTV15(16), the number of office based visits; OPTOTV15(16), the number of outpatient visits; ERTOT15(16), the number of ER visits; IPNGTD15(16), the number of inpatient nights, and + HHTOTD16, the number of home health visits.\n", - "\n", - "The model classification task is to predict whether a person would have 'high' utilization (defined as UTILIZATION >= 10, roughly the average utilization for the considered population). High utilization respondents constituted around 17% of each dataset.\n", - "\n", - "To simulate the scenario, each dataset is split into 3 parts: a train, a validation, and a test/deployment part.\n", - "\n", - "We assume that the model is initially built and tuned using the 2015 Panel 19 train/test data. (Use case steps 1-2.)\n", - "It is then put into practice and used to score people to identify potential candidates for care management (Use case steps 3-5). Initial deployment is simulated to 2015 Panel 20 deployment data. To show change in performance and/or fairness over time, (use case steps 6-7), the 2016 Panel 21 deployment data is used. Finally, if drift is observed, the 2015 train/validation data is used to learn a new model and evaluated again on the 2016 deployment data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [3.](#Table-of-Contents) Training models on original 2015 Panel 19 data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, load all necessary packages" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.insert(0, '../')\n", - "\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from IPython.display import Markdown, display\n", - "\n", - "# Datasets\n", - "from aif360.datasets import MEPSDataset19\n", - "from aif360.datasets import MEPSDataset20\n", - "from aif360.datasets import MEPSDataset21\n", - "\n", - "# Fairness metrics\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from aif360.metrics import ClassificationMetric\n", - "\n", - "# Explainers\n", - "from aif360.explainers import MetricTextExplainer\n", - "\n", - "# Scalers\n", - "from sklearn.preprocessing import StandardScaler\n", - "\n", - "# Classifiers\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.pipeline import make_pipeline\n", - "\n", - "# Bias mitigation techniques\n", - "from aif360.algorithms.preprocessing import Reweighing\n", - "from aif360.algorithms.inprocessing import PrejudiceRemover\n", - "\n", - "# LIME\n", - "from aif360.datasets.lime_encoder import LimeEncoder\n", - "import lime\n", - "from lime.lime_tabular import LimeTabularExplainer\n", - "\n", - "np.random.seed(1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3.1. Load data & create splits for learning/validating/testing model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get the dataset and split into train (50%), validate (30%), and test (20%)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "(dataset_orig_panel19_train,\n", - " dataset_orig_panel19_val,\n", - " dataset_orig_panel19_test) = MEPSDataset19().split([0.5, 0.8], shuffle=True)\n", - "\n", - "sens_ind = 0\n", - "sens_attr = dataset_orig_panel19_train.protected_attribute_names[sens_ind]\n", - "\n", - "unprivileged_groups = [{sens_attr: v} for v in\n", - " dataset_orig_panel19_train.unprivileged_protected_attributes[sens_ind]]\n", - "privileged_groups = [{sens_attr: v} for v in\n", - " dataset_orig_panel19_train.privileged_protected_attributes[sens_ind]]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function will be used throughout the notebook to print out some labels, names, etc." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "def describe(train=None, val=None, test=None):\n", - " if train is not None:\n", - " display(Markdown(\"#### Training Dataset shape\"))\n", - " print(train.features.shape)\n", - " if val is not None:\n", - " display(Markdown(\"#### Validation Dataset shape\"))\n", - " print(val.features.shape)\n", - " display(Markdown(\"#### Test Dataset shape\"))\n", - " print(test.features.shape)\n", - " display(Markdown(\"#### Favorable and unfavorable labels\"))\n", - " print(test.favorable_label, test.unfavorable_label)\n", - " display(Markdown(\"#### Protected attribute names\"))\n", - " print(test.protected_attribute_names)\n", - " display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", - " print(test.privileged_protected_attributes, \n", - " test.unprivileged_protected_attributes)\n", - " display(Markdown(\"#### Dataset feature names\"))\n", - " print(test.feature_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Show 2015 dataset details" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "tags": [] - }, - "outputs": [ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/main/examples/tutorial_medical_expenditure.ipynb)" + ], + "metadata": { + "id": "vEJtd4TZHluk" + } + }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Training Dataset shape" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "L4KzQLdJHHV3" + }, + "source": [ + "# Medical Expenditure Tutorial" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "(7915, 138)\n" + "cell_type": "markdown", + "metadata": { + "id": "F92_6AkpHHV5" + }, + "source": [ + "## This tutorial demonstrates classification model learning with bias mitigation as a part of a Care Management use case using Medical Expenditure data." + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Validation Dataset shape" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "i-MYlxfEHHV5" + }, + "source": [ + "The notebook demonstrates how the AIF 360 toolkit can be used to detect and reduce bias when learning classifiers using a variety of fairness metrics and algorithms . It also demonstrates how explanations can be generated for predictions made by models learnt with the toolkit using LIME.\n", + "\n", + "Classifiers are built using Logistic Regression as well as Random Forests.\n", + "\n", + "Bias detection is demonstrated using several metrics, including disparate impact, average odds difference, statistical parity difference, equal opportunity difference, and Theil index.\n", + "\n", + "Bias alleviation is explored via a variety of methods, including reweighing (pre-processing algorithm), prejudice remover (in-processing algorithm), and disparate impact remover (pre-processing technique).\n", + "\n", + "Data from the [Medical Expenditure Panel Survey](https://meps.ahrq.gov/mepsweb/) is used in this tutorial. See [Section 2](#2.-Data-used) below for more details.\n" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "(4749, 138)\n" + "cell_type": "markdown", + "metadata": { + "id": "52e2nvCaHHV6" + }, + "source": [ + "## Table of Contents" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Test Dataset shape" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "oTpXyrKpHHV6" + }, + "source": [ + "To return to the table of contents, click on the number at any major section heading.\n", + "\n", + "[1. Use case](#1.-Use-case)\n", + "\n", + "[2. Data used](#2.-Data-used)\n", + "\n", + "[3. Training models without debiasing](#3.-Training-models-on-original-2015-Panel-19-data)\n", + "\n", + "[4. Reweighing (pre-processing bias mitigation)](#4.-Bias-mitigation-using-pre-processing-technique---Reweighing)\n", + "\n", + "[5. Prejudice Remover (in-processing bias mitigation)](#5.-Bias-mitigation-using-in-processing-technique---Prejudice-Remover-(PR))\n", + "\n", + "[6. Summary of results](#6.-Summary-of-Model-Learning-Results)\n", + "\n", + "[7. Deploying model](#7.-Deploying-model)\n", + "\n", + "[8. Generating explanations for model predictions using LIME](#8.-Generating-explanations-for-model-predictions-using-LIME)\n", + "\n", + "[9. Re-deploying Model](#9.-Re-deploying-Model)\n", + "\n", + "[10. Overall Summary](#10.-SUMMARY)" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "(3166, 138)\n" + "cell_type": "markdown", + "metadata": { + "id": "OD5cMJOHHHV6" + }, + "source": [ + "## [1.](#Table-of-Contents) Use case" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Favorable and unfavorable labels" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "C_X0hQkEHHV6" + }, + "source": [ + "In order to demonstrate how AIF 360 can be used to detect and mitigate bias in classfier models, we adopt the following use case:\n", + "\n", + "1. a data scientist develops a 'fair' healthcare utilization scoring model with respect to defined protected classes. Fairness may be dictated by legal or government regulations, such as a requirement that additional care decisions be not predicated on factors such as race of the patient.\n", + "\n", + "\n", + "2. developer takes the model AND performance characteristics / specs of the model (e.g. accuracy, fairness tests, etc. basically the model factsheet) and deploys the model in an enterprise app that prioritizes cases for care management.\n", + "\n", + "\n", + "3. the app is put into production and starts scoring people and making recommendations.\n", + "\n", + "\n", + "4. explanations are generated for each recommendation\n", + "\n", + "\n", + "5. both recommendations and associated explanations are given to nurses as a part of the care management process. The nurses can evaluate the recommendations for quality and correctness and provide feedback.\n", + "\n", + "\n", + "6. nurse feedback as well as analysis of usage data with respect to specs of the model w.r.t accuracy and fairness is communicated to AI Ops specialist and LOB user periodically.\n", + "\n", + "\n", + "7. when significant drift in model specs relative to the model factsheet is observed, the model is sent back for retraining." + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "1.0 0.0\n" + "cell_type": "markdown", + "metadata": { + "id": "aM9yDCerHHV6" + }, + "source": [ + "## [2.](#Table-of-Contents) Data used" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Protected attribute names" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "JQwdXTlNHHV6" + }, + "source": [ + "The specific data used is the [2015 Full Year Consolidated Data File](https://meps.ahrq.gov/mepsweb/data_stats/download_data_files_detail.jsp?cboPufNumber=HC-181) as well as the [2016 Full Year Consolidated Data File](https://meps.ahrq.gov/mepsweb/data_stats/download_data_files_detail.jsp?cboPufNumber=HC-192)." + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['RACE']\n" + "cell_type": "markdown", + "metadata": { + "id": "39NlmjCVHHV7" + }, + "source": [ + "The 2015 file contains data from rounds 3,4,5 of panel 19 (2014) and rounds 1,2,3 of panel 20 (2015). The 2016 file contains data from rounds 3,4,5 of panel 20 (2015) and rounds 1,2,3 of panel 21 (2016).\n", + "\n", + "For this demonstration, three datasets were constructed: one from panel 19, round 5 (used for learning models), one from panel 20, round 3 (used for deployment/testing of model - steps); the other from panel 21, round 3 (used for re-training and deployment/testing of updated model)." + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Privileged and unprivileged protected attribute values" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "k8QrrHJvHHV7" + }, + "source": [ + "For each dataset, the sensitive attribute is 'RACE' constructed as follows: 'Whites' (privileged class) defined by the features RACEV2X = 1 (White) and HISPANX = 2 (non Hispanic); 'Non-Whites' that included everyone else. \n", + "\n", + "Along with race as the sensitive feature, other features used for modeling include demographics (such as age, gender, active duty status), physical/mental health assessments, diagnosis codes (such as history of diagnosis of cancer, or diabetes), and limitations (such as cognitive or hearing or vision limitation).\n", + "\n", + "To measure utilization, a composite feature, 'UTILIZATION', was created to measure the total number of trips requiring some sort of medical care by summing up the following features: OBTOTV15(16), the number of office based visits; OPTOTV15(16), the number of outpatient visits; ERTOT15(16), the number of ER visits; IPNGTD15(16), the number of inpatient nights, and + HHTOTD16, the number of home health visits.\n", + "\n", + "The model classification task is to predict whether a person would have 'high' utilization (defined as UTILIZATION >= 10, roughly the average utilization for the considered population). High utilization respondents constituted around 17% of each dataset.\n", + "\n", + "To simulate the scenario, each dataset is split into 3 parts: a train, a validation, and a test/deployment part.\n", + "\n", + "We assume that the model is initially built and tuned using the 2015 Panel 19 train/test data. (Use case steps 1-2.)\n", + "It is then put into practice and used to score people to identify potential candidates for care management (Use case steps 3-5). Initial deployment is simulated to 2015 Panel 20 deployment data. To show change in performance and/or fairness over time, (use case steps 6-7), the 2016 Panel 21 deployment data is used. Finally, if drift is observed, the 2015 train/validation data is used to learn a new model and evaluated again on the 2016 deployment data" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "[array([1.])] [array([0.])]\n" + "cell_type": "markdown", + "metadata": { + "id": "TMCLPCFaHHV7" + }, + "source": [ + "## [3.](#Table-of-Contents) Training models on original 2015 Panel 19 data" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Dataset feature names" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "iylHAkh8HHV7" + }, + "source": [ + "First, load all necessary packages" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['AGE', 'RACE', 'PCS42', 'MCS42', 'K6SUM42', 'REGION=1', 'REGION=2', 'REGION=3', 'REGION=4', 'SEX=1', 'SEX=2', 'MARRY=1', 'MARRY=2', 'MARRY=3', 'MARRY=4', 'MARRY=5', 'MARRY=6', 'MARRY=7', 'MARRY=8', 'MARRY=9', 'MARRY=10', 'FTSTU=-1', 'FTSTU=1', 'FTSTU=2', 'FTSTU=3', 'ACTDTY=1', 'ACTDTY=2', 'ACTDTY=3', 'ACTDTY=4', 'HONRDC=1', 'HONRDC=2', 'HONRDC=3', 'HONRDC=4', 'RTHLTH=-1', 'RTHLTH=1', 'RTHLTH=2', 'RTHLTH=3', 'RTHLTH=4', 'RTHLTH=5', 'MNHLTH=-1', 'MNHLTH=1', 'MNHLTH=2', 'MNHLTH=3', 'MNHLTH=4', 'MNHLTH=5', 'HIBPDX=-1', 'HIBPDX=1', 'HIBPDX=2', 'CHDDX=-1', 'CHDDX=1', 'CHDDX=2', 'ANGIDX=-1', 'ANGIDX=1', 'ANGIDX=2', 'MIDX=-1', 'MIDX=1', 'MIDX=2', 'OHRTDX=-1', 'OHRTDX=1', 'OHRTDX=2', 'STRKDX=-1', 'STRKDX=1', 'STRKDX=2', 'EMPHDX=-1', 'EMPHDX=1', 'EMPHDX=2', 'CHBRON=-1', 'CHBRON=1', 'CHBRON=2', 'CHOLDX=-1', 'CHOLDX=1', 'CHOLDX=2', 'CANCERDX=-1', 'CANCERDX=1', 'CANCERDX=2', 'DIABDX=-1', 'DIABDX=1', 'DIABDX=2', 'JTPAIN=-1', 'JTPAIN=1', 'JTPAIN=2', 'ARTHDX=-1', 'ARTHDX=1', 'ARTHDX=2', 'ARTHTYPE=-1', 'ARTHTYPE=1', 'ARTHTYPE=2', 'ARTHTYPE=3', 'ASTHDX=1', 'ASTHDX=2', 'ADHDADDX=-1', 'ADHDADDX=1', 'ADHDADDX=2', 'PREGNT=-1', 'PREGNT=1', 'PREGNT=2', 'WLKLIM=-1', 'WLKLIM=1', 'WLKLIM=2', 'ACTLIM=-1', 'ACTLIM=1', 'ACTLIM=2', 'SOCLIM=-1', 'SOCLIM=1', 'SOCLIM=2', 'COGLIM=-1', 'COGLIM=1', 'COGLIM=2', 'DFHEAR42=-1', 'DFHEAR42=1', 'DFHEAR42=2', 'DFSEE42=-1', 'DFSEE42=1', 'DFSEE42=2', 'ADSMOK42=-1', 'ADSMOK42=1', 'ADSMOK42=2', 'PHQ242=-1', 'PHQ242=0', 'PHQ242=1', 'PHQ242=2', 'PHQ242=3', 'PHQ242=4', 'PHQ242=5', 'PHQ242=6', 'EMPST=-1', 'EMPST=1', 'EMPST=2', 'EMPST=3', 'EMPST=4', 'POVCAT=1', 'POVCAT=2', 'POVCAT=3', 'POVCAT=4', 'POVCAT=5', 'INSCOV=1', 'INSCOV=2', 'INSCOV=3']\n" - } - ], - "source": [ - "describe(dataset_orig_panel19_train, dataset_orig_panel19_val, dataset_orig_panel19_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Metrics for original data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.48230522996275893\n" - } - ], - "source": [ - "metric_orig_panel19_train = BinaryLabelDatasetMetric(\n", - " dataset_orig_panel19_train,\n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "explainer_orig_panel19_train = MetricTextExplainer(metric_orig_panel19_train)\n", - "\n", - "print(explainer_orig_panel19_train.disparate_impact())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3.2. Learning a Logistic Regression (LR) classifier on original data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3.2.1. Training LR model on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = dataset_orig_panel19_train\n", - "model = make_pipeline(StandardScaler(),\n", - " LogisticRegression(solver='liblinear', random_state=1))\n", - "fit_params = {'logisticregression__sample_weight': dataset.instance_weights}\n", - "\n", - "lr_orig_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3.2.2. Validating LR model on original data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function will be used throughout the tutorial to find best threshold using a validation set" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "def test(dataset, model, thresh_arr):\n", - " try:\n", - " # sklearn classifier\n", - " y_val_pred_prob = model.predict_proba(dataset.features)\n", - " pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]\n", - " except AttributeError:\n", - " # aif360 inprocessing algorithm\n", - " y_val_pred_prob = model.predict(dataset).scores\n", - " pos_ind = 0\n", - " \n", - " metric_arrs = defaultdict(list)\n", - " for thresh in thresh_arr:\n", - " y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)\n", - "\n", - " dataset_pred = dataset.copy()\n", - " dataset_pred.labels = y_val_pred\n", - " metric = ClassificationMetric(\n", - " dataset, dataset_pred,\n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "\n", - " metric_arrs['bal_acc'].append((metric.true_positive_rate()\n", - " + metric.true_negative_rate()) / 2)\n", - " metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())\n", - " metric_arrs['disp_imp'].append(metric.disparate_impact())\n", - " metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())\n", - " metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())\n", - " metric_arrs['theil_ind'].append(metric.theil_index())\n", - " \n", - " return metric_arrs" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "thresh_arr = np.linspace(0.01, 0.5, 50)\n", - "val_metrics = test(dataset=dataset_orig_panel19_val,\n", - " model=lr_orig_panel19,\n", - " thresh_arr=thresh_arr)\n", - "lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot metrics with twin x-axes" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "def plot(x, x_name, y_left, y_left_name, y_right, y_right_name):\n", - " fig, ax1 = plt.subplots(figsize=(10,7))\n", - " ax1.plot(x, y_left)\n", - " ax1.set_xlabel(x_name, fontsize=16, fontweight='bold')\n", - " ax1.set_ylabel(y_left_name, color='b', fontsize=16, fontweight='bold')\n", - " ax1.xaxis.set_tick_params(labelsize=14)\n", - " ax1.yaxis.set_tick_params(labelsize=14)\n", - " ax1.set_ylim(0.5, 0.8)\n", - "\n", - " ax2 = ax1.twinx()\n", - " ax2.plot(x, y_right, color='r')\n", - " ax2.set_ylabel(y_right_name, color='r', fontsize=16, fontweight='bold')\n", - " if 'DI' in y_right_name:\n", - " ax2.set_ylim(0., 0.7)\n", - " else:\n", - " ax2.set_ylim(-0.25, 0.1)\n", - "\n", - " best_ind = np.argmax(y_left)\n", - " ax2.axvline(np.array(x)[best_ind], color='k', linestyle=':')\n", - " ax2.yaxis.set_tick_params(labelsize=14)\n", - " ax2.grid(True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we plot $1 - \\min(\\text{disparate impact}, 1/\\text{disparate impact})$ since it's possible to overcorrect and end up with a value greater than 1, implying unfairness for the original privileged group. For shorthand, we simply call this 1-min(DI, 1/DI) from now on. We want the plotted metric to be less than 0.2." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "disp_imp = np.array(val_metrics['disp_imp'])\n", - "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " disp_imp_err, '1 - min(DI, 1/DI)')" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " val_metrics['avg_odds_diff'], 'avg. odds diff.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a function to print out accuracy and fairness metrics. This will be used throughout the tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "def describe_metrics(metrics, thresh_arr):\n", - " best_ind = np.argmax(metrics['bal_acc'])\n", - " print(\"Threshold corresponding to Best balanced accuracy: {:6.4f}\".format(thresh_arr[best_ind]))\n", - " print(\"Best balanced accuracy: {:6.4f}\".format(metrics['bal_acc'][best_ind]))\n", - "# disp_imp_at_best_ind = np.abs(1 - np.array(metrics['disp_imp']))[best_ind]\n", - " disp_imp_at_best_ind = 1 - min(metrics['disp_imp'][best_ind], 1/metrics['disp_imp'][best_ind])\n", - " print(\"Corresponding 1-min(DI, 1/DI) value: {:6.4f}\".format(disp_imp_at_best_ind))\n", - " print(\"Corresponding average odds difference value: {:6.4f}\".format(metrics['avg_odds_diff'][best_ind]))\n", - " print(\"Corresponding statistical parity difference value: {:6.4f}\".format(metrics['stat_par_diff'][best_ind]))\n", - " print(\"Corresponding equal opportunity difference value: {:6.4f}\".format(metrics['eq_opp_diff'][best_ind]))\n", - " print(\"Corresponding Theil index value: {:6.4f}\".format(metrics['theil_ind'][best_ind]))" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7627\nCorresponding 1-min(DI, 1/DI) value: 0.6066\nCorresponding average odds difference value: -0.1831\nCorresponding statistical parity difference value: -0.2643\nCorresponding equal opportunity difference value: -0.1608\nCorresponding Theil index value: 0.0936\n" - } - ], - "source": [ - "describe_metrics(val_metrics, thresh_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3.2.3. Testing LR model on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "lr_orig_metrics = test(dataset=dataset_orig_panel19_test,\n", - " model=lr_orig_panel19,\n", - " thresh_arr=[thresh_arr[lr_orig_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7759\nCorresponding 1-min(DI, 1/DI) value: 0.5738\nCorresponding average odds difference value: -0.2057\nCorresponding statistical parity difference value: -0.2612\nCorresponding equal opportunity difference value: -0.2228\nCorresponding Theil index value: 0.0921\n" - } - ], - "source": [ - "describe_metrics(lr_orig_metrics, [thresh_arr[lr_orig_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For all the fairness metrics displayed above, the value should be close to '0' for fairness.\n", - "\n", - "1-min(DI, 1/DI) < 0.2 is typically desired for classifier predictions to be fair.\n", - "\n", - "However, for a logistic regression classifier trained with original training data, at the best classification rate, this is quite high. This implies unfairness.\n", - "\n", - "Similarly, $\\text{average odds difference} = \\frac{(FPR_{unpriv}-FPR_{priv})+(TPR_{unpriv}-TPR_{priv})}{2}$ must be close to zero for the classifier to be fair.\n", - "\n", - "Again, the results for this classifier-data combination are still high. This still implies unfairness." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3.3. Learning a Random Forest (RF) classifier on original data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3.3.1. Training RF model on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = dataset_orig_panel19_train\n", - "model = make_pipeline(StandardScaler(),\n", - " RandomForestClassifier(n_estimators=500, min_samples_leaf=25))\n", - "fit_params = {'randomforestclassifier__sample_weight': dataset.instance_weights}\n", - "rf_orig_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3.3.2. Validating RF model on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "thresh_arr = np.linspace(0.01, 0.5, 50)\n", - "val_metrics = test(dataset=dataset_orig_panel19_val,\n", - " model=rf_orig_panel19,\n", - " thresh_arr=thresh_arr)\n", - "rf_orig_best_ind = np.argmax(val_metrics['bal_acc'])" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqUAAAG4CAYAAAB8eJ7qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd1yV9fvH8dfNVBygIuDAbe5tbnNkpZWWWWlpmpWW7WFlv29772ll2jBt2dDUyjQ1c8+cuAeIIiiiICDzXL8/PpBkgAcF7nMO1/PxOA/g5j73uU6mvLnuz7BEBKWUUkoppezkZXcBSimllFJKaShVSimllFK201CqlFJKKaVsp6FUKaWUUkrZTkOpUkoppZSynYZSpZRSSillOw2lSimllFLKdraEUsuy7rYs64BlWWmWZW2wLKvnOc6/2bKsTZZlpVqWFWtZ1leWZYWddc4Qy7K2W5aVnvNxcMm+C6WUUkop11SUrGVZ1lTLsiSfR0pp1lzqodSyrKHAe8DLQDtgJTDPsqw6BZzfHZgOfAm0AK4FmgNf5zmnKzAj51jbnI8/WJbVueTeiVJKKaWU6ylq1gIeAGqc9dgPfF/y1Z5hlfaOTpZlrQG2iMiYPMf2AD+KyBP5nD8euE9E6uY5Nhr4QEQq5nw9A6gqIpflOWchcExEbiq5d6OUUkop5VqKmrXyeX53YDnQXURWllyl/1aqnVLLsvyADsCCs761AOhWwNNWADUsyxpoGcHAMOC3POd0zeea8wu5plJKKaWUxznPrHW2MUBEaQZSAJ/SfDEgGPAG4s46Hgf0y+8JIrLKsqxhmFvy5TE1/wGMynNaWAHXDCMflmWNBcYCBAQEdAgODi7au1BKKaWUssHBgwcF+DvPockiMjnP10XOWnlZlhUI3Aics6Na3Eo7lBaZZVnNgQ+AFzDdzxrAG8AnwMjzuWbOH95kgAoVKkhUVFTxFKuUcmmxsbEAhIXl+/uqUkq5PMuyTotIxxJ8iRGYO+nTS/A18lXaoTQeyAZCzzoeCsQW8JwngLUi8kbO11tyZoMtsyzr/0TkUM5zi3JNpVQZNGzYMACWLFlibyFKKVVyzidr5TUG+ElEEoq7sHMp1VAqIhmWZW0ALgN+yPOty4CfCnhaAOY/bl65X+eOiV2Vc4038pxzGWa2mVJKATBhwgS7S1BKqRJ1nlkLAMuyOgFtgAdLrsKC2XH7/m1gumVZazGTmO4CagKTACzLmgYgIrm35ucCUyzLGseZ2/fvAn+LyMGcc94DllqWNQH4GRgM9AF6lMo7Ukq5hf79+9tdglJKlYaiZq1cY4E9IrKk9Eo9o9RDqYjMsCyrGvAkJmBuA64UkdyBnXXOOn+qZVmVgHuBt4BEYDHweJ5zVuZMhnoReB7YBwwVkTUl/X6UUu4jOjoagPDwcJsrUUqpklPUrAWQk7WGYXKULUp9nVJXU6FCBUlJKdUNC5RSNunduzegY0qVUu7LsqxUEalgdx0lweVn3yulVHF58skn7S5BKaVUAbRTqp1SpZRSSrkJT+6UluqOTkopZaf9+/ezf/9+u8tQSimVD+2UaqdUqTJDx5QqpdydJ3dKdUypUqrMeO655+wuQSmlVAG0U6qdUqWUUkq5CU/ulOqYUqVUmbFr1y527dpldxlKKaXyoZ1S7ZQqVWbomFKllLvz5E6pjilVSpUZL7/8st0lKKWUKoB2SrVTqpRSSik34cmdUh1TqpQqM7Zt28a2bdvsLkMppVQ+tFOqnVKlygwdU6qUcnee3CnVMaVKqTLjjTfesLsEpZRSBdBOqXZKlVJKKeUmPLlTqmNKlVJlxqZNm9i0aZPdZSillMqHdkq1U6pUmaFjSpVS7s6TO6U6plQpVWa8++67dpeglFKqANop1U6pUkoppdyEJ3dKdUypUqrMWLduHevWrbO7DKWUUvnQTql2SpUqM3RMqVLK3Xlyp1THlCqlyoyJEyfaXYJSSqkCaKdUO6VKKaWUchOe3CnVMaVKqTJj5cqVrFy50u4ylFJK5UM7pdopVarM0DGlSil358mdUh1TqpQqMz755BO7S1BKKVUA7ZRqp1QppZRSbsKTO6U6plQpVWb89ddf/PXXX3aXoZRSKh/aKdVOqVJlho4pVUq5O0/ulOqYUqWURxMR4pLS2Xs0mQF3P8eJ1Ay2HDpJ69pBdpemlFIqD+2UaqdUKY/gcAiHT55m77Fk9sYls+foKfYcNZ+fSs/65zzLAhFoVSuQ4Z3rMLBNTSr46+/nSin34MmdUg2lGkqVcksnUzPYEHWCdZEnWB+ZQERMEqczs//5fnBFfxqFVKBxSCUah1akUUhFYiLW4uvjRWKVpny9+iC74k5R0d+Hwe1qMbxLHZqGVbbxHSml1LlpKPVgGkqVcn0ipgu6PvIE6yITWB95gl1xpwDw9bZoWSuQNrWDuCg0J4BWr0iVCn7/uU7eMaUiwt8HT/D16oP8svUIGVkOOtStwvDOdbiyVQ3K+XqX5ltUSimnaCj1YBpKlXJNIsLPmw6zeOcx1kcmcCQxDYBK/j60r1uFi+tVoWO9qrSpHUR5P+cCZHR0NADh4eH/On4iJYOf/j7EN2sOsj8+haAAX65vX5u+TUNoVqNyvgFXKaXsoKHUg2koVcr1OBzC879sZ+rKSEIr+3Nxvar/PJqEVcLbyyqR1xURVu0/ztdrDjJ/WyxZDvPvY1jlcjSrUYmmNSrTrEZlmteoRL1qFfDx1lX1lFKlS0OpB9NQqpRryXYIE37awg8bDnF7j/o8eVUzLKt4Qujvv/8OQP/+/c957omUDLbFJLLjSBI7jpxix5Ek9h5N/ieo+vt4cVFoJZrVqESr2kH0aBRMvWoBxVarUkrlR0OpB9NQqpTryMhy8NCMTfy69QgP9mvMA5c2LtaQd6HrlGZkOdh7NDknqCaxI9YE1oSUDABqVylPz8bB9GhUne6NqhEUoLf9lVLFS0OpB9NQqpRrOJ2RzbivN7Bk1zGevKoZd/RsUOyvERsbC0BYWFixXVNEiDyeyvI9x1i2J55V+45zKj0Ly4LWtQLp2bg6PRoH075OFfx89Ha/UurCaCj1YBpKlbLfqbRMbp+6nnVRCbwyuBXDOtWxu6TzlpXtYPOhkyzdHc/yvfFsij5JtkMI8POma4Nq3Ny5Dn2ahOBVQuNilVKeTUOpB9NQqpS9TqRkMOqLtWyPSeKdoW0Z2KZmib3W3LlzARg4cGCJvcbZktIyWbXvOMv3xPPH9jhik9JoWL0CY3o24Np2tXTpKaVUkWgo9WAaSpWyT1xSGiM+XUNUQiqTRrSnb9PQEn29Cx1TeqEysx38tvUIk5fuJyImieCKfozsWo8RXepSVZedUko5QUOpB9NQqpQ9ohNSGf7pGo4npzNlVEe6NQwu8deMj48HIDi45F+rMLlLT01Zup8/dx2jnK8XN3QI5/Ye9akX7JE/a5RSxURDqQfTUKpU6dt7NJkRn67hdGY2X97WibbhQXaXZJs9caf4dNkBZm08TKbDweXNQxl7SQM61K1qd2lKKRekodSDaShVqviICGmZDk6kZnAyNZOTqRmcSM3k5Gnz9YmUDE6ezmTxzqN4WRZf3dGpVPebnzlzJgDXXXddqb2ms46eSmPayii+WhPFydRM6gdXoEejYLo3CqZrw2oElve1u0SllAvQUOrBNJQqdWHSMrP5besRvllzkC2HE8nIchR4bnlfb6oE+FK7agCvDWlN/VK+VW33mFJnpGZkMfPvwyzeeZTV+4+TmpGNlwWtagfRMyektq8bhL+PTpBSqizSUOrBNJQqdX4OxKfwzZoofthwiJOpmTQIrkDfpiFUrehHlQA/qgT4EljejyoVfKkS4EdgeV/bZ5onJiYCEBgYaGsdzsrIMstLLdsTz4o8y0uV8/WiU/1q9GhUje6NgmkWVlmXmFKqjHA2lFqWdTfwKFADiAAeFJFlhZzvBzwJ3ALUBOKAN0Xk/WIp3AkaSjWUKuW0zGwHC7fH8dWaKFbsPY6Pl8UVLcIY3rkOXRtW0y02S9iptEzW7E9g+V6zBureo8kAVAnwpWvDanRtGEz3htWoH1xB/yyU8lDOhFLLsoYCXwF3A8tzPo4GmovIwQKeMxOoDfwP2AOEAuVFZEnxVV84DaUaSpU6p5iTp/l27UG+WxfNsVPp1Aoqz02dwrnx4nBCKpWzuzynzZgxA4ChQ4faXEnxiE1MY+W+eFbsPc7KffEcSUwDIKxyObo1rEa3RsF0a1iNmkHlba5UKVVcnAyla4AtIjImz7E9wI8i8kQ+518O/AA0FJH44q7ZWRpKNZQqla/E1EwW7Yzjly1HWLLrKAL0aRLC8M516N0kBG83vF3sDmNKz1fudqcr98Wzcu9xVu0/TkJKBgD1gyvQvVE17u3TmLBA9/klQin1X+cKpTm34VOBm0TkhzzHPwRaikivfJ7zEXARsBYYCZwG5gH/JyLJxfwWClTmQ2l4eLhMnz7d7jKUcgnZDiExLZOk01kkp2UhCL7eXlQJ8KVqBT98vd177/a0NNNJLFeubASztEwHyelZpKRnkZyehZeXRd2qAQT46SQppdxVnz59MoCteQ5NFpHJuV9YllUTOAz0EpGleY4/DQwXkSZnX9OyrN+B3sAi4HkgCPgA0229viTeR358SuuFXFVCQsI/3ROlyqLYxDR+33aEedtiWReZgEOgTtUABrQMo3/LMNrUDtJJNB5gd9wpxk5bz+Gtp3l2UAuGd65rd0lKqfOTJSIdi/maXoAAN4tIIoBlWfcC8y3LChWRuGJ+vXyV+VCqVFl09FQaP288zLxtsWw8eBKAxiEVubdPI/q3rEGzGpU8cqLMV199BcCIESNsrqT0XRRaidn39OCBGRv536xtbDucxLODmuvSUkp5nnggGzNRKa9QILaA5xwBDucG0hw7cj7WwczEL3Fl/va9jilVZcmWQyf5YkUkv2yJITNbaFmrMgNa1uCKFmE0Cqlod3klzpPHlDor2yG8tWAXHy3ZR4e6Vfh4eHtCKpeN4QxKeYIiTHTaLCJj8xzbDfxUwESnscC7QEjuGFLLsi4FFgKhInK0ON9DgXVrKNVQqjxbVraD+RFxfLHiAOujTlDBz5sbOoYzsmtdGlT3/CCaV2ZmJgC+vro70q9bjjD+h81ULu/DxyM60L5OFbtLUko5oQhLQk3HLAW1ArgLuB1oISJRlmVNAxCRkTnnV8R0RlcDz2LGlH4C7BCRG0rorfy3bjtCaVEWdLUsayowKp9v/fOHYllWb+DPfM5pJiI7C6tFQ6nyVCdTM/h2bTTTV0USk5hGnaoBjOpWjxs61qZyOQ1lCnbGJjF22gZiE9N44doWDL24jt0lKaXOoYiL5z+GyVrbgIdyJz5ZlrUEQER65zm/CWZyUw/gBPAzMEFEThXzWyi45tIOpUVd0NWyrEDg7EX2VgBLRWR0zjm9MaG0BZCQ57xjIpJdWD0aSpWn2RN3ii9WRjLz70OkZTro2qAao7vX49JmoW65jFNxmjp1KgC33nqrrXW4kpOpGdz37UaW7Ynnli51eerq5vj5uPcqC0p5Mt1mtDhfsIgLuubz/O6YMNtdRFbmHOuNCaXVi7roq4ZS5Snik9N56udtzNsWi5+PF9e2rcno7vVpVqOy3aW5DB1Tmr9sh/D6/J188td+Lq5XhYk3tydUx5kq5ZI0lBbXi53Hgq75XGMq0FFEWuY51hsTSqMAf2A78KKI5HdL/180lCpPsCAilidmbuVUehbjejVkZNe6VKvob3dZys3M2RzDYz9uxuGAK1uFcUvXerSvE+SRKzEo5a48OZSW9pJQwYA3/11aIA7od64n59zKvxE4u6N6BBgHrAP8gFuARZZl9cpvrGrOLLOxAH5+fkV8C0q5juT0LJ6fG8H36w/RvEZlvh3WlotCK9ldlnJTg9rUpHWtQKaujOSnDYf4eVMMLWpW5pYudbmmbS3K66L7SqkSVNqd0iLvMnDW8+8B3gJqikjCOc79DbPA7KDCztNOqXJX6yITePj7TRw+cZpxvRvywKUX6VjAc5gyZQoAY8aMOceZKiU9i1kbDzN9VRS74k5RuZwPN3QMZ0SXutQP9sgmjVJuwZM7pW51+96yrE1AhIgMd+K1ngGGiUizws7TUKrcTXpWNm//sZvJS/cTXiWAd4a2oUPdqnaX5Rb69TM3ZBYuXGhzJe5DRFh7IIFpq6OYvy2WLIdwyUXVGdmlLn2ahpT5yXNKlTYNpcX5gkVc0DXPOZ2ANUAfEVnixOvMAgJFpG9h52koVe5kZ2wSD363iZ2xp7ipUzhPXtWcCv66MZsqHUeT0vh2bTTfrI0iLimdRiEV+W5sF4J1/LJSpUZDaXG+YBEXdM3zvE+BS0Tkonyu+SAQiVnz1A8YAUwAhojIzMLq0VCq3EG2Q/hs+X7enL+byuV9eG1Iay5tdvYOckqVjsxsB/O2xfLoD5tpXTuQr+/ookNHlColnhxKS73FIiIzLMuqBjzJmQVdrxSRqJxT/rN6s2VZlYBhwPMFXNYPeAOoDZzGhNOrROS3Yi5fqVK3OfokL/26g7WRCVzRIpSXB7fSmfXn6aOPPgLg7rvvtrkS9+br7cWgNjURER74bhNPz97GK9e10ln6SqkLotuMaqdUuahthxN5d+FuFu44SlCAL/+7shnXd6itP/gvwIABAwCYN2+ezZV4jtd/38lHS/bx3KAWjOpWz+5ylPJ4ntwp1VCqoVS5mB1Hknh34W7mR8QRWN6XsZc0YFS3elTUsaPKBTkcwtjp6/lz1zGm3daJ7o2C7S5JKY+modSDaShVrmJX7CneW7Sb37bGUqmcD3f0aMDoHvV0n3rl8k6lZTLk45XEJaUz597u1K3mkT8vlXIJGko9mIZSZbe9R0/x3qK9/LIlhgp+PtzWoz6396hPYHkNo8XtvffeA+CBBx6wuRLPE3U8hWs+XEH1iv7MvLsblfSXKaVKhIZSD6ahVNlBRNh8KJGpKw4we3MM5X29Gd29HmN6NiAoQHcZKymDBpm9NObMmWNzJZ5p5d54bvl8Lb0vqs7kkR11DVOlSoCGUg+moVSVpj1xp5izOYY5m2OIOp5KeV9vRnary9ieDXRGvfII01ZF8vTsCO7u3ZDH+je1uxylPI4nh1KdOaFUCTt0IpW5m48wZ3MMO44k4WVBt4bB3NO7EVe0DNPb9Mqj3NKlLjuOnOKjJftoElaJa9rWsrskpZSb0E6pdkpVCYhPTue3rUeYvSmGDVEnAGhXJ4hBbWpyVesahFQqZ3OFZdObb74JwPjx422uxLNlZDkY8ekaNh86yQ93daV17SC7S1LKY3hyp1RDqYZSVYxEhP+btZXv1x8i2yE0Ca3EoLY1Gdi6JnWqBdhdXpk3ZMgQAH766SebK/F8x5PTGTRxBdkOYc693QmprL+IKVUcNJR6MA2lqjhNXXGAZ+du56ZOdRjVrS5NwyrbXZJSttkek8SQj1fSJKwS347pQnk/b7tLUsrtaSj1YBpKVXGJiElk8Icr6dE4mM9GddSdl5QC5m09wriv/yascjkeuqwxQ9rXxsfby+6ylHJbGko9mIZSVRxSM7K4+oPlJKdlMe+BnjqT3kW9+uqrAEyYMMHmSsqWtQcSePm3HWyKPknjkIo81r8p/ZqF6C9uSp0HTw6l+uuqUsXguTnbORCfwrtD22ogdWGbNm1i06ZNdpdR5nSqX5VZd3fj4+HtyXYIY6at58ZPVv0zCVAppUA7pdopVRds7uYY7vt2I/f0acijV+i6jEoVJjPbwffro3l34R6OnUrnihahPHpFUxqFVLS7NKXcgid3SjWUaihVFyA6IZUr31tG49CKzLizK746Vk4pp6RmZPHZsgN8snQ/pzOzubFjOA/2a0yoztJXqlAaSj2YhlJ1vjKzHdwwaRX7jibz2wM9Ca+qSz65uhdeeAGAp556yuZKVK7jyel8sHgvX6+JwtvLYlCbmrSrU4W24UE0Dqmok6KUOosnh1Ld0Ump8/TOH7vZFH2SiTe300DqJnbt2mV3Ceos1Sr68+ygFozuXo93F+5hwfY4vl9/CIDyvt60qhVIm/BA2oZXoU14ILWCyusEKaU8lHZKtVOqzsOKvfGM+GwNQzuG8+qQ1naXo5THEBGijqey+dBJNkWbR0RMEhlZDgCCK/rTNjyQlrUCaRRSkfrBFagfXIEAP+2xqLLBkzulGko1lKoiOp6czoD3llGpnA9z7+uhPwyVKmEZWQ52xZ5iU/QJNkUnsvnQSfYeTf7XOTUDy1G/egUaBJug2qB6BRpWr0jNoPJ4e2lnVXkODaUeTEOpKgoR4bap61ix7zg/392d5jV1xyZ38vTTTwPw/PPP21yJulCnM7I5EJ/CgfgU9h9L5kB8CvtyPj+VlvXPeeV9vXlnaBv6t6xhY7VKFR9PDqXa4lGqCL5YEcmfu47x3KAWGkjdUHR0tN0lFB8R2LgRIiOhb18ICrK7olJV3s+b5jUr/+fvoYhwPCWD/cdSOBCfzNSVUTz58za6NgwmsLyvTdUqpZyhnVLtlConbTucyHUfreSSi4KZMlK3EVWFSEuD2Fg4cuTfj8RE6NoV+veHqlXP79oxMfD11/DllxARYY75+ECfPnDttXDNNVCrVvG9Fze37XAiAycuZ1TXejw7qIXd5Sh1wTy5U6qhVEOpcsKptEyumbiClIws5j1wCVUr+NldknIVS5fClCkmLOYG0RP57FTk7Q3lykFKCnh5mXB61VXm0aoVFPZLzunT8PPPJoj+8Qc4HOb5I0dCixbwyy8waxbs2WPOv/hiE1CvvRaaNSv82mXAUz9v4+s1UfxyX0+9w6HcnoZSD6ahVJ1LRpaD26auY/X+40y/vTNdG1azuyR1np544gkAXnnlleK54Jo1pkNZoQI0bgw1ahT8CA42z1m/Hn791Tz+/tscCw+HK680AfXSSyEgwNyeX74cpk2D77+HpCSoUwduucWE0Ysu+nctIrBzpwmvP/8Ma9ea440bm3A6cCDUrm1qrVgRypcvM2E1MTWTPm8toUFwBb6/syteOvFJuTENpR5MQ6kqjMMhPPz9Jn7eFMObN7Th+g617S5JXYCxY8cCMHny5Au/2O7d0L07VK4MK1dCaGjRrxETA/PmmYD6xx+QnAz+/tCrF+zdC/v3mxB5/fUwapQ57uXkYvKHD8OcOSagLl4MWVn//r5lnQmouR9zP69dG5o3N13Y5s1NaHbzAPv9+mge+3GL/j1Wbk9DqQfTUKoK8+q8nUz6ax/jL7+Ie/s2trsc5Sri4szt81OnTCBtXAz/b6Snw7JlJqAuWGC6q6NGweDBJixeiJMnzTCDhAQTfFNSCv546hRERZn3mKtixX+H1NzPw8OdD8k2cziE6yetJOp4KovH99ZJT8ptaSj1YBpKVUG+XBnJM3MiGN65Di9e21InNinj1Cno3dvcKv/zT+jUye6KSsbx47B9u3lERJz5GBt75pxatWDqVOjXz7Yyi2Lb4UQGTVzOLV3q8tw1Le0uR6nzoqHUg2koVfn5fdsRxn39N/2ahTJpRAddfNtDjB8/HoA333zz/C6QmWnGZi5cCLNnmzGgZU1CwpmA+v77sGMHPPEEPPss+Lp+9/GZ2duYvjqKOff2oGWtQLvLUarIPDmUusd9F6VK0brIBO7/bhPtwoN4f1g7DaQe5PTp05w+ffr8niwCd9wB8+fD5MllM5CCWcqqRw+4805Yt878N3n5ZTPeNSrK7urO6eHLm1AlwI+nZ2/D4SjbTRmlXI12SrVTqvLYE3eK6yetolpFP366qxtVdOknlet//zPh6/nn4amn7K7GtcyYAWPHmvGln34KQ4aU7Ovt2mVeJzUVMjLMIz39zOd5HyJwzz0wYsQ/T/9xwyHG/7CZ169vzY0dw0u2VqWKmSd3SjWUaihVOeKS0rjuo5VkZDuYOa4b4VUD7C5JuYqPPjLBZuxYmDTJ7Weil4j9+2HYMNM9HTcO3nrLLDtV3ObNM69z+rRZ+cDPz6xY4OeX/+dHjsDWraamd94Bf38cDuHGT1axPz6FPx/pTWCA6w87UCqXhlIPpqFUASSlZXLjpFVEJ6Qy486uOtbMQz344IMAvPvuu84/adYs0/kbOBB++snsnqTyl5EBTz4Jb7xhNgSYMcMs3l8cRODdd2H8eHPtOXPMuq3nkpVlutyvv24mpf3wA9Spw/aYJK7+YBnDO9flhWt10pNyH54cSnVMqSrzMrIc3DV9A3uPJjPplg4aSNUZy5fDTTdB587w7bcaSM/Fz8+Ev3nzzCz9Dh3gs89MoLwQ6elw++3w8MNmI4AVK5wLpGD+zF57DWbONJOy2reHBQtoXrMyo7rV46s1UWw9lHhh9SmlioV2SrVTWqY5HMJD329i9qYY3r6xDde110W1VY7t282EnurVTQjK3ZFJOefIEbP71KJF5nb7W29BzZpFv87Ro3DddebP4Omn4Zlnzn9t1N27Tdc7IgKef56khx+l79vLqF2lPDPHddOdnpRb0E6pUh7qoyV7mb0phsf6N9FAqs44cgQGDDBjEn//XQPp+ahRw6xU8PLL5pZ53bqm67xypfOd0y1b4OKLYcMG+O47eO65C1us/6KLYPVquPlmeOopKt9wHc/2CGVT9Em+Xx99/tdVxSsz03Tat241u5HNmGHWw03Ujran006pdkrLrL1HT3Hle8u5vEUoH9zUThfHLwPuueceAD788MOCT0pOhksuMV21pUvN7V51Yfbtgw8/hM8/N8GifXu4914TUsuVy/85P/9sZswHBZk1YTt0KL56RODjj+HBB5FatZhw0zMsKFeTxY/0LnTFDREhM1uwLPD1diIcZ2ebEO3K/7aImI0S4uJMEMz9mPfz3I+pqebP7LHHoGHDC3vdzEwzpGLRItMNP3bMPOLj4cSJ/J9TuzZMmQL9+1/Ya7s5ZzullmXdDTwK1AAigAdFZFkB5/YG/sznW81EZOcFlFskGko1lJZJDocwbPJqdh89xcKHexFc0d/uklQpOOfi+VlZcM01psM3d67plqrik5wMX30FH3xghkdUqwZjxpiZ8bljREXglVfM5KROnUw4rVGjZOpZvRpuuAHHsWP834WmVKQAACAASURBVKV3sqbPYCqX8yE9y0FGloP0fx7Z/3wN4O/jxQP9GjOmZ4P8w2lionkP774LLVvCCy+YIOUq4TQ52YTB334zj0OH/nuOvz+EhUFo6JmP6emma5mZCTfeCI8/Dm3bFu21jx0z6/x+9BHExJh1b2vWNMNkCnscPw533WXGBd92mxkOEhRUPP893IwzodSyrKHAV8DdwPKcj6OB5iJyMJ/ze2NCaQsgIc+3jolIdjGVfk5OhVLLorMIa0qhnlKnobRs+mbNQf5v1lbeuL41N+g6hQpMGBo3Dj75xDzGjrW7Is8lYrZo/eADM4sezASmceNMN/Xbb2H4cNMVK4llpfI6dszczl+4kDXte/P7gFs4fFFr/H298fP2wt/X65+P/j7e+Pt4sTn6JAu2x9GiZmVeG9L6zOTIzEwTuJ591nT8rr/eDD04cAC6dYMXX4Q+fUr2/eRHxHT+c0Po0qVmpYRKleDyy83Y6Zo1zwTQsDCz3FZ+IfrIERO2P/7YbLk7YIDZ0atnz8Jr2LTJ7AD2zTcm3F5+Odx/v3m+s0My0tLMEI7XXze/qEyZUiZ/cXQylK4BtojImDzH9gA/isgT+ZzfGxNKq4tIfDGX7DwROecDxAGyGeQ+kCrOPMddHgEBAaLKlrjE09Lymd9l2CerxOFw2F2OchWvvioCIhMm2F1J2RIZKfL44yJVq5r//pYl8sorIqX5dzMrS+S550QqVTI1XHyxyPTpImlpBT7lty0x0uGFP6TBE7/Ka79tl/SfZok0aWKe37u3yIYN5sSMDJFPPhGpXdt8r08fkRUrSv49paaKzJsnct99Ig0amNcGkebNRcaPF1m8WCQ9/fyvf+KEyIsvigQHm+t27y7yyy///nPLzBT58UeRSy4x5wQEiIwbJxIRcWHvbe1a8z5AZPRoU4srSEkROXiwxF8GSJFCMxt+QBZww1nHPwT+KuA5vQEBIoEjwCKgT2GvUxIPZzuljpxiAdKBWcCnIvmOP3Ar4eHhMn36dLvLUKXoYEIqSWlZXBRSET8fnetXluTets+9jZ8rZNEimr/4InF9+7Ljf/+7sMk06rx4padTfckSMqpV40THjrbU4J2aStj8+dSaNYuA6GgyqlQhZuBAYgYNIqNatf+cn+0QMjZsodXnU6i9M4Lk8DocGHcXx7t0+U+X0Ssjgxpz51L366/xO3GC4507c2D0aJKbNCmW2v2PHaNyRIR5bN9OpT178MrMJNvfnxPt25PQuTMJnTuTFhZWLK+XyystjRrz5hE+Ywbl4uJIbtCA6GHD8IuPp9bs2ZSLi+N0WBiHBw8mdsAAsipVKpbXtTIyqDdtGnW+/ZaMqlXZ9cgjJHTpUizXdlp2NpX27KHK+vVU+ftvArdtI+Hii9n20ksl+rJ9+vTJALbmOTRZRCbnfmFZVk3gMNBLRJbmOf40MFxE/vM/nWVZTYA+wDpMqL0FuCvnGvmOQy0RziRXkDdBDuZ0TB0g2TmPPSATQMJKO00X10M7pWXLgohYqfv4LzJx8R67S1E2mDBhgkw4uxP6118ifn6mm1NIZ0yVIdnZIr//LnLVVaZz6+MjctNNIitXnukERkaKDB8uApJeNVjeuPYBafDobHly1lZJOp1R8LWTk0Vee+1MZ3jwYJEtW4pWX1qayKpVIm+/LXLDDWe6sCBSrpxIjx4ijz5qOqWpqef/36EoMjJEvvxSpFmzM7X07i0ya5bpRJeUdetEWrQwr3frrSXbNXU4RPbsEfnoI5HrrhMJCjrzXtu0EXnkEZFFi0ru9XNw7k5pTUwj8ZKzjj8N7CrsuWed/xswx9nzi+NRpIlOlkUP4CZgCBCSm2uBbGA28JIIm4ojLJcWHVNadpxKy+Syt5cSFODL3Pt6ODd7Vnm2nTvNWL+QELNUUdWqdlekXM3evWdWDkhKgo4dzQSszz4z3dCHH4bHHye1XABvzt/NFysPUKNyOV4a3Io+TUMKvm5Skhmb+dZbZmxmx47g62uumdtlzf0877GUFNi82YwJBbPUVteuZx5t2phNDOzicJjxwiEhZuet0pCebiaTvfqqGRf7+utmIpZvMWwfm5ZmJj3Onw8LF0JUlDkeHg6XXWYeffua91tKzjWm1LIsPyAVuElEfshz/EOgpYj0cvJ1ngGGiUgxbcvmxGsWJZT+8ySLcGAa0AsTSq2cj1nAjSLMLs4iS5KG0rLjmdnbmLY6ipnjutGuThW7y1F2i4uDLl3MMjerV0P9+nZXpFxZcjJMm2YmZ+3caTYGeOklE07y+PvgCR7/cQt7jiZzbduaPDuoBUEBhYTEhAR4+21Ysya353ZmHdf8vvbzg3btzoTQklqZwB1t2GB2/tq82UzcGjcO7rzTzN4vqgMHYNIk88vH8eMQGGjCZ79+Jog2amTbagpFmOi0WUTG5jm2G/hJ8pnoVMA1ZgGBItL3ggougqJ2Si/DjDG4GvDBhFGAjUBloCGwXQS32UhYQ2nZ8PfBEwz5eCWjutbj2UEt7C5H2WT06NEAfDFxopkFHREBS5aYBdqVcoaI6XIGFrwdcXpWNh/+uY+P/txL7Srl+ezWi2lYvWIpFlmGORymq/nee+ajv79ZXeGBB0wXuTDZ2eY5H31kVimwLLNE3Lhx5t8LF9lmuAhLQk3HLAW1ApPdbgdaiEiUZVnTAERkZM75D2ImOUVgxpSOACYAQ0RkZgm9lf/W7UwotSweBcYCDXIPAQ5gDvCOCMssiwqYgbUBIth476BoNJR6vowsBwM/WE5SWiZ/PNyLiv6u8Q+LKn1PP/00OBw8v2UL/PorzJoFgwbZXZbyUBuiEhgzbQPZDmHSiA50bfjfyVKqBO3YYTrbX35p7oj06mXC6aBB4O195rz4eDM8Y9Ik0yENCzPr544daxbsdzFFXDz/Mczi+duAhyRn4pNlWUsARKR3ztePAWOA2sBpTDh9RUR+K4G3UHDNTobS3Nn3FpAEfA68L0LkWeftBBqL4P2fi7goDaWe78M/9/LG/F18OrIj/ZqH2l2OslN2Njz4IEycaB45OzwpVVIOHk/lti/XERmfwsvXteJGXRe59J04YW7DT5xoxoTWq2d2FOvQwYTR778341J79YK77zZr5to5LvccnA2l7qgooXQ/8AHwmQjJBZxXE/AVIapYqyxBGko92/5jyfR/bxn9moXw0fBi3KZQub60NNi2DTZuNAt3b9xoxpqlpsIjj0BBuzopVcwST2dyz9d/s3xvPON6N+TRy5vg5eUiuzuVJVlZZrOG994zGwiA2UBg5Ehzi76Fewzt0lBqcQ0wR4Siz4pycRpKPZeIcNOU1UTEJLHo4V6EVC5gj23l3hwOs3f2rl0meOY+duwwP4TA/OBp25YRR49CcDBfLV2qa5GqUpWZ7eCZORF8s+YgA1qG8faNbSnv5zY3FT3Ppk3m34irrzb/PrgRTw6lzg6uWwKEWxapIvyz/ZRlEQwEAIkiJJZAfUqdtx/WH2L1/gReHtxKA6m7cjjMNpCHDkF09JmPeT8/fNhs75grLMzMTh440Hxs187MrPfyoskLL5hzNJCqUubr7cVL17akQXAFXvptBzGTVzFlZEf9t8kubduah3IpznZKfwKuBR4S4f08x+8F3gNmiXB9iVVZgrRT6pmOnUqn39t/0SS0Et+N7aK3ylxddjbs3w/bt595RESYpXdOn/73uX5+ZvJB7dpmOZ7wcPN5w4bmh0wx71ijVHH7Y3sc93+7kSoBvnx268U0q1HZ7pKUG/HkTqmzofQQZvZWHREO5zleEzgEHBbBLUdvayj1TA98t5F5W2P57YGeNArRpVhcgsiZrmdk5L8D6M6dZqJBrjp1oHlzaNYMGjQ4EzzDwyE4WDudyu1tO5zI7V+uIzkti4k3ty98oX2l8nDZUGq2Nw3DTIqPReTwOZ7x30s4GUrTMbf6K4uQkud4BeAUkCGCW96D0FDqeSLjU+jz1hLu6tWQx/s3tbucsiMtzYzRyu8W+6FD5pE3eIKZBduihQmguY9mzUpsjNewYcMA+O6770rk+koVRWxiGrd/uY4dR5K4t08jBrevTf1g18sayrW4VCi1rE6YpaT6Y7Y3zSsOmA98isgKZy7n7JjSU0AV4HJgVp7jl+d8zHc2vlJ2mLYqCm/LYnS3enaXUjaIwNdfw+OPQ0zMmeM+PlCrluluduoE11135nZ7nTrQpAlUKN1/V9vqGDLlQsICy/H9nV0Z/8Nm3l+8l/cX76VxSEUuax7K5S3CaF0rUIceKddkWRcDbwI9co/kc1YYMBIYiWWtBB5GZF2hl3WyU7oA6AckAm8BO4BmwMNAILBQhCuceyf/LOj6KGZIQATwoIgsK+DcqcCofL71r98ULMvqBbwNtABigNdFZNK5atFOqWdJSc+iy8uL6NM0hPdvamd3OZ5v/Xq4/35Ytcrs3T1+vJlUFB5u9qDW2+xKOSU6IZU/tsfxx/Y41kYmkO0QQir50695KJc3D6Vrw2r4++hsfeUinVLLyrt+fSxm16jN8M9k+GCgDdAdE04BHIgU2gx1NpReB/yYU8C/vpVz7HqRf3VQC7mWNRT4CrP11fKcj6OB5iJyMJ/zA4HyZx1eASwVkdE559TH7FbwOfARJrl/BAwTkZ8Kq0dDqWeZviqSp2ZHMPPubrTX/e1LztGj8H//Zxaerl4dXn0VRo3SEKpUMTiZmsGfu46yICKOv3YfIzUjm4r+PvS6qDpXtAxjQMswfL3171pZ5SKhNAuYAUwB/qKgMGlZFtALsyvoDYj4FnpZZ0KpuS5vYjqjZ3tThMecuoipbw2wRUTG5Dm2B/hRRJ5w4vndMWG2u4iszDn2GnCdiDTOc96nmD1euxZ2PQ2lnsPhEC575y8q+vvw8z3dc/4uqGKVmWl2RXn2WbMI/QMPwFNPFboPuCsZMmQIAD/9VOjvqkq5jLTMbFbtO86CnC5qfHI69aoF8PDlTbi6VQ29vV8GuUgobYTI3uJ+jtObgIsw3rKYAQwCQjEDWOeIUOj4gH/XY/kBHTDjEPJaAHRz8jJjgIjcQJqja8418poPjLIsy1dEMlEeb/neePYdS+GdoW00kJaE+fPNFp07d0L//vDuu2ZcqBvp2rXQ31GVcjnlfL3p0zSEPk1DeOnalizeeZQ3F+zi/m83MmnJPh7t34TeF1XXf/NU6SpqIHXyOU6HUnM91oHzITQfwYA3JtDmFYcZs1qonFv5NwJnd1TDgIX5XNMn5zWPnHWdsZhWMn4uvL+tKpovV0YSXNGPK1vVsLsUz7JnjxkrOmcONGoEc+fCVVeBG/4QHD9+vN0lKHXevLws+jUPpW/TEOZuieGtBbsZ/cU6OtWrymP9m9CxXlW7S1TqgjgdSi0LH+BKoAn/HeOJCM8XY10FGQF4AdMv5CIiMhmYDOb2fTHUpWwWGZ/C4l1Hua9vY50McL6ysmD3brM//ObNsGWL+RgTAxUrwmuvmdv1/v52V6pUmeblZXFN21oMaFmDGeujeX/RHq6ftIpLm4Yw/oomuhi/KnmWtb8IZwsiDZ050alQalmEYLYaLexenTOhNB7Ixtz+zysUM3vrXMYAP4lIwlnHYwu4ZhZnZoIpD5a7DNSIznXsLsU9OBywbJnZIz43fEZEnFlH1NfXrBd66aXQpg3cfDPUcP8O9KBBgwCYM2eOzZUodeH8fLy4pUtdhrSvxdSVkUxaso8r31/GNW1q8vBlTahTLcDuEpXnqsd/J7/nJ3dCvFOc7ZQ+BxS2CrlTLygiGZZlbQAuA37I863LgEJnHlhmgdY2wIP5fHsVMPisY5cB63U8qedLSc/ih/XRXNmqhu4j7QwRGDPGzJwHs3RT69Zw330mgLZuDU2bmu08Pcyll15qdwlKFbsAPx/u7t2I4Z3qMmnpPr5YcYBfthyhc4OqtKwVSOtaQbSqFUh41fI69lQVp2L/n8nZJaH2YVLxVMzyTQI8ANyX8/mrIkx16gXNklDTMUtBrQDuAm7HzJSPsixrGoCIjDzreZ8Cl4jIRflcM3dJqCnAJ5h1sT4CbtIloTyfLgNVRG+/DY88YsaJjh9vQqlSymMcTUpjyrL9rN6fwM7YJDKzzc/5wPK+tKoVSKvageZjrUBqV9Gg6m5cZPZ93SKdLxLl1GWdDKVpgC9mQlEcICJ4WxYtgK3A0yK86GxtOYvnP4ZZPH8b8JCILM353hJTv/TOc34lzGSl50Xk9QKu2Qt4hzOL57+mi+d7Pl0Gqoh++w0GDoTBg+H773VdUaU8XHpWNrtjk9l6OJGth0+y9XAiu2JP/RNUgwJ8aVM7iE71q9K5flVa1Q7UcfkuziVCaQlxNpSmAOUwwfQ05rZ/WM7nScAhEdxyMJ+GUve2dPcxRn6+lneGtmFwu9p2l+PaIiKga1czg37ZslLf4tMVDBgwAIB58+bZXIlS9knPymZX7Cm2HEpk66FENkafYHec2S3c38eL9nWqmJDaoCrtwqtQ3k9DqitxuVBqWUHApZg76gAHgEWIJBb1Us6OKT0O1MJsKRoL1Aa+BtJyvq/3TJUtdBkoJ8XHw6BBEBAAs2eXyUAKMHDgQLtLUMp2/j7etK4dROvaQf8cS0jJYO2BBPOIPM4Hi/fw3iLw9bZondNJ7dqgGj0aBeuC/eoMy3oSeBw4e1ZdKpb1CiIvF+lyTnZK/wD6Ap0xY0mH8+/JTctF6FWUF3YV2il1X5HxKfR5awn39W3Mw5f9Z6ixypWRAZdfDqtXw19/QefOdleklHJxSWmZbIg8wZoDCaw9cJwthxLJcghtagfyzKAWOn7fRi7TKbWsdzFziyD/SU8CvIdIfruB5n9JJ0PpjUAfTHc0FjNBqXrOt48B/UXY6OyLuhINpe7r+bnbmbYqkpUT+uqs+4KIwJ13wpQp8NVXMHy43RUppdxQakYW87bG8vr8ncQlpTO4XS0mDGhKqP7bW+pcIpRaVjfMlu+CCaS7gZ2YteSbAo1yzhSgOyKrnbqsM6H0v7VQGRNSs4AVIpws8kVchIZS95SSnkWXlxfRt1kI7w1rZ3c5ruv9982C9088AS8X6S6KR+rXz2wct3Dh2RvAKaWckZKexYd/7uXTZQfw8ba4p08jbu9Rn3K+Ou60tLhIKP0CGAWcBEYjMvus7w8BPgMqAV8icpszlz3nmFLLwh/YnvPlVSLsFCEJmF3I05QqUTP/PsSp9CxGdatndymua/58eOghuOYaeNHpxTE82tChQ+0uQSm3VsHfh8f6N2XoxeG89OsO3pi/ixnrovnfVc24vHmoroBSdlyM6YI+/p9ACiDyE5ZVHbM858XOXtTZ2/cnMWm3vAgZzl7cHWin1P3oMlBO2LkTunSBunVhxQqzTahSShWz5XvieW5uBHuOJtOjUTDPDGxO49BKdpfl0VykU5qAmfxeA5GjBZwTghnyeQKRas5c1tlFCnPvdbVx8nylSszyvfHsO5bCrd3raSDNT0KCWYvUzw/mzNFAqpQqMT0aB/PbAz15ZmBzthw6Sf/3lvHsnAiOJ6fbXZoqWeYHS0GB9N/fc/q3FGc7pT2AWUAi8D9gE2aN0jyvzUFnX9SVaKfU/dw+dR2bD51kxYS+usjz2TIzoX9/WL4cFi+G7t3trsil9O7dG4AlS5bYWodSnighJYO3Fuzi27UH8fay6NcslBs7htOzcTA+3rpRR3FxkU6pA3P7/rlznPksIIg49cPa2XVKl+a8eFXgm3y+L0W4llLnLTI+hcW7jnJf38YaSM+2cyf83/+ZMDp1qgbSfNx66612l6CUx6pawY+XBrdidPd6fLs2mlkbDzNvWyyhlf0Z0r42N3QMp36w/SsZqWL1THFezNlOqeMcp4gIbpkQtFPqXnQZqLOImDGjb7xhbtWXKwdPP21m2yullI0yshws3nmUH9ZH8+euozgELq5XhRs6hnNVqxpU8Nde1vlwoU6ps5zulDobSr845ysKo515QVejodR96DJQeWRnw88/w5tvmkXxq1WDe+4xj5AQu6tzWZmZmQD4+vraXIlSZcvRpDRmbjzM9+uj2X8shQA/b65qVYMRXerSJjzo3BdQ/3CRUPos/95EqXAi57rNby57PuuUehINpe7jy5WRPDMngpl3dyu7u4mcPm1uzb/9NuzdCw0awCOPwK23mi1EVaF0TKlS9hIR/j54gh/WH2Lu5hhSM7O5p3cjHuzXWMedOsklQmkJ0d65cgsOh/DFigO0qxNUNgNpfDx8+CFMnGg+79QJfvgBBg8Gb7ccOWOLO+64w+4SlCrTLMuiQ92qdKhblSevbs4Lc7cz8c+9rDlwnPdvakeNwPJ2l6icYVkbgJnALES2n+t0py/r5O37z89xiohwe/GUVLq0U+oe/tgex5hp65l4czuubl3T7nJKz+HD5hb9J5+YLunVV8Ojj0LPnqDLYSmlPMDPGw/zv1lb8fPx4q0b29C3aajdJbk0l+iUWlYcZrt5AfZhAurPzm4nWuBlizDRqaATLXSikyphwyav4uDxVJY+1qds3OKJjITXXoPPPzfjR0eMgMceg+bN7a7MraWmpgIQoEMdlHIp+48lc883G9lxJIkxPevz6BVN8fMpA//WnwcXCaUW0B24DrgWqIfJibHAzzmPxYhkF+myOvteQ6mri4hJ5Kr3l/PEgKbc2auh3eWUrN274ZVX4KuvwMsLbrvNhNH69e2uzCPomFKlXFdaZjYv/bqD6aujaBsexAc3tSO8qv4CeTaXCKVns6w2nAmorTABNRH4BbPO/XxEUs95GSdDad2zDvkADYCngHbA1SL8VZT6XYWGUtf38Peb+H1bLKueuJTA8h46a3rrVnj5Zfj+e/D3h7FjzW36WrXsrsyjzJgxA4ChQ4faXIlSqiC/bT3C4z9uAQveuL41/VvWsLskl+KSoTQvy2qACaiDgc6Y3UNTETnn9oIXNPvesqgIxAM/izDsvC9kIw2lru1oUhrdX1vMzZ3q8Nw1Le0up/ht2AAvvmiWd6pY0Szp9NBDEKpjqpRSZdfB46nc9+3fbD6UyKiudXniymaU83XLG7LFzuVDaV6WFYrpnl6LyIBznn6BoTQIOAKki+CWC41pKHVtby3YxcQ/9/LnI72p50k7gRw9Cg8+CN9+C0FB8MADcP/9ULWq3ZV5tMTERAACAwNtrkQpdS4ZWQ5e/30nny4/QLMalbmpUzgd6lahaVhlvL3K7kRPtwqlRXQhs+/LYQa5hgNHRQgr5tpKhYZS15WWmU23VxfTvk4VPh3V0e5yiocITJsGDz8MyckwYYJZZ7RyZbsrKxN0TKlS7mfRjjienh3B4ZOnAajg5027OlXoUNc82tUJolI5Dx3alQ+3CaWWVQ04BjgQcWoJUmfXKb2V/Gff5/6q8puT11HKabM2HiYhJYPbe3jIJJ/9++HOO2HhQrMv/ZQp0KyZ3VWVKffff7/dJSiliujSZqH0bRrC4ZOn2RB1gvWRJ9gQdYIPFu/BIeBlQZOwynSoG0THulXp2TiYahX97S5bneF0W/tCZ9+nA98CD4qQ5OyLuhLtlLomEeHyd5bi6+3Fr/f3yFl9wk1lZcF778FTT4GPj1nq6c47zex6pZRS5+VUWiabok+yIcqE1I0HT5KcnoWfjxdD2tfi9h4NaBRyzrk1bsclOqWWdbcTZ1UAXgMEEacGBJ/v7Hsw40hjnXkRV6ah1DUt3X2MkZ+v5a0b2jCkQ227yzl/mzbBHXeYCU2DBpldmWq78ftxc/Hx8QAEBwfbXIlSqrhlO4TtMUl8s/YgM/8+RHqWg75NQ7ijZ326Nqjm3s2NPFwklBa2fv2/zqS4Q6kn01DqmkZ9vpaImCRWTOiDv48bzrg8fRqee87sxhQcbLYHHTJEd2GymY4pVapsOJ6czvTVUUxfFcXxlAxa1qrMHT0acFXrGvi6+QYsLhZKnfmhVuyd0v5AJ2CjCHPzHB8EtAXWivC7My/oajSUup69R0/R7+2lPHzZRdx/aWO7yym6ZcvMovd798Ltt8Mbb0CVKnZXpYC5c80/XwMHDrS5EqVUaUjLzGbWxsN8umw/+46lUCOwHLd2q8dNnetQ2U0nR7lIKE0DfIFPgLgCzgoAHqUEQulKzAKoA0RYkOd4X2AhsEqE7s68oKvRUOp6npi5lZ/+PsSqCX3db7D67t3Qvj2EhcHkydC3r90VKaVUmedwCEt2H2XK0gOs2n+cCn7e3HhxOLd0qUuD6u417tTZUGqZcZ+PAjWACOBBEVnmxPN6AEuAnSKS/wLhlrUauBgYhsgPBZyTO/ve6VDq7Oz7pjkfV511fG3OR51CrIpFQkoGM/8+xHXtarlfIM3IgJtvNjsyLVmiY0ddUGysGQYfFuaWK9gppc6Tl5dF36ah9G0ayrbDiXy6bD/TV0XxxYpIejYOZnjnuvRrFoKPm9/az2VZ1lDgPeBuYHnOx3mWZTUXkYOFPK8KMA1YBBS2peAazB30zkD+ofR86nayU5rbpq0twpE8x2sAh4EMEcoVV1GlSTulrmXi4j28uWA3Cx66hItCK9ldTtE8/ji8/jrMnAmDB9tdjcqHjilVSuU6eiqNGWuj+WbtQY4kphFWuRw3d67DsIvDCansupHGmU6pZVlrgC0iMibPsT3AjyLyRCHPmwlsxowVvb6QTmkwJrSeRCSqoIsBdQAKPOfspzgZSg/kXPhjEe7Nc/wD4B4gUoQGzrygq9FQ6joyshz0eG0xTcIqMf32znaXUzSLFsFll8GYMfDJJ3ZXowrw++9m6Hv//v1trkQp5Sqysh0s3nmU6aujWLYnHh8viytahnFLl7p0rl/V5WbtnyuUWpblB6QCN0meW+uWZX0ItBSRXgU8725gOHAJ8BSFhdIS4mwonQLcjplptQ/YBTQBGuac8pkIY0uqyJIUHh4u06dPt7sMBZxMzST6RCr1gytQ0d/ZkSX2801MpOMdd5AVEMCGSZNwRLy+GAAAIABJREFUlC9vd0lKKaXOQ0aWg+MpGZxIzSDbIZTz8aZqRT+qBPjhKjub9unTJwPYmufQZBGZnPuFZVk1MXexe4nI0jzHnwaGi0iTs69pWVYrzByhLiJywLKsZ7EhlDr7k/9VYChmIdSGnAmjFpCc8323lJCQ8M8tPWUfEeHqD5aTnuXHHzdf4nK/mRZIxNyqT0rCf8ECLmnXzu6KVCGio6MBCA8Pt7kSpZQrS8vMZu7mGL5aHcXmjYlUrWBxR8/6jOxazxWaJlkiUmx7b1uW5Q/MAMaLyIHiuu75cGpErwj7gMuBnZggmvvYDlwuwv4Sq1CVCWsOJBARk8Rt3eu7TyAFc6t+9mx45RXQQOrybrnlFm655Ra7y1BKubhyvt7c0DGc2ff24KdxXWldO5DXf99F91cX8/6iPSSezrS7xMLEA9lA6FnHQyHfTY9qYCasf2FZVpZlWVnA00CLnK8vL9Fq8yjy4vmWRUPMG4vLCatuTceUuoYx09azLjKBVRMupbyfmyyWv307dOgAl1wC8+bptqFuYOHChQD069fP5kqUUu5mc/RJPli8l4U74qhUzofR3etzW/d6BAX4lWodRZjotFlExuY5thv46eyJTpZl+WKGZOZ1N3AZMBiIFJHkYin+HHRHJw2ltos6nkLvN5dwd++GPHpF03M/wRWkpUGXLhATA1u2mHVJlVJKebxthxOZuHgvv0fEUtHfh5Fd63JHzwZUrVA64dTJUDoUmI4JlyuAuzBzg1qISJRlWdMARGRkAc9/FhvGlDrV2rEsvrYssi2Lp846/lTOcZ0ppM7bN2sO4m1ZjOxaz+5SnPfEE7B5M3z+uQZSN7J//37279fRRkqp89eyViCTbunA7w/2pHeT6nz81z56vLaYl3/b8f/s3XeYFGXW9/HvIUeJElQQUcSAigFR1oAKPOq+uGZYRRd91oRxVVx1jatreAxrWhUMoGBAMeKCARQTLGlNGBCFQVHyIJLDzHn/qB5pmumZ7pnurg6/z3X11d1Vd1Wd7pphDndkycr1YYcHgLuPAi4DrgM+BQ4BjvXNUzO1p2y6piyS7JRQu7gzN2p7B2AOMM+dndIVZDqppjR8fR/8iIZ1a/L8uQeHHUpixo2DY4+Fiy6CBx8MOxpJguYpFZFU+27xSh569zte/+xn9m3fjJcu6JHW62XFMqNpkuzk+Y3cWRu1vT6wGljvTk7Og6OkNFyr1m9i75ve4qIjduHyPlvNUpF9Fi2CvfeGVq1g6lTQ9E855f333wfg8MPLnaZPRKTK5i5dza9rN7JPu6ZpvU7OJKVm70ZeTQOux31DZYckOq9BWVJ6MPBu1PaDo/aLJO3TH36h1OGADs3DDqVy7nDWWbBiRTBZvhLSnKNkVETSZaeW2Z8nZlhPgvntDwf6YHYa7l9XdECiw4W/IJgCargZA8zY34wBwLDIBb+o8GiROKYWFVPDYN/26f2fZUo8+GDQdH/33dAlo32/JUVmzZrFrFmzwg5DRKRQlE0hug9BjWmFEq0pHQ78jmCd06diLuaR/SJJm15UzO5tt6Fxvdphh1Kxb76BwYPh97+HCy8MOxqpovPOOw9Qn1IRkQwoG2vUEjg68qhQQkmpO0+YcTRwUjm7R7vzZMIhikRsLCnlkx9+oV+3LF9dxz0Y1FS/PjzxBOTS5P6yhdtuuy3sEERECsPmkf7zgBnAPyo7JOG1stw5xYxTgb5EJs8HXnfnxSqEKsJXP//K2o0lHNChWdihVOzFF4M+pA89BK1jF8iQXNKjR3pHxYqISNUltYCrOy8AL0RvM6MRcJL7Fs36IpWaVlQMwAE7ZvEgp1Wr4PLLoWtXOP/8sKORapo5cyYAXdQnWESk6jaPrE+E435UIgWTSko3x0INgr4BZxDUnNYDJaWSnOlFy2nXvD5tmtQLO5T4brkFfvoJXngBaubI8qcS10UXXQSoT6mISDX1JBhTVJmysUcJSSopNaMbQSLaj6DjatIXFAFwd6bPK+awXbcNO5T4vv4a7r03mAZKzb554a677go7BBGRfJHyARaVJqVm7AQMAE4HOpUTyFrg1VQHJvmtaNkalq7aQLdsnZ+0bHBTo0Zwxx1hRyMp0q1bt7BDEBHJB0ek46Rxk1IzzidIRqPXfozNih1o7c6qNMQmeWza3KA/abdsHeT04ovw7rvB4KZWrcKORlLk008/BaBr164hRyIiksPc30/HaeMuM2pGKUHSWZaIbgDGAy8B3wMTCVphc7qjnZYZDcfgFz9j/NeL+O/1vbFsm2Jp1SrYbbcgGZ02TX1J80jPnj0B9SkVkdyVdcuMBn/E9wM6RLbMxf2/VTlVIn1KHXgSGOzOL8H12bMqFxMpM33ecvbfsXn2JaSweXDTiy8qIc0z9913X9ghiIjkD7MBwO3AdjHbfwKuxv3ZZE6X6ECns4G+ZrxCUFO6NJmLiERbsnI9c5eupn82TpofPbjp4IMrLy85Rc32IiIpYjYYKBt0EVvDtAMwArO2uN+T6ClrVLDvDuBHNq9b2go4F3gL+CjRC4jEmjEvMj9ptg1y0uCmvDdt2jSmTat0+WUREamIWReCGlIIcsTVwOfATGBN1PY7MNsj0dPGTUrdudadDgRzUT0OrGBzgtqAyDRQZsw3Q3/BJWHTipZTt1YN9tq+SdihbKlscNM//qHBTXlq8ODBDB48OOwwRERy3QUEOeR64C9AU9y74r430BQYTDAWqUakbEIqqikFwJ0P3DmXYGnRU4DXgI1sTlC3i1w8YWY2yMzmmtk6M5thZodWUr6Omf09csx6M/vBzC6J2j/QzLycRxbPyl64phcV07VdU+rUqvTHL3NWroS//AX23RfOOy/saCRNHnroIR566KGwwxARyXU9CConb8L9ftxLftvjvinSZH8TQZ74u0RPmvDk+e5sIOhP+pIZzYD+BHOXJjWruJn1A+4HBhF0AxgEjDOzPdz9hziHPU/QP+FcYDZBglw/pswaYOctY/Z1ycQm6bd6/SZm/vwrFxy+c+WFM+mWW+Dnn2H0aA1uymNaXlREJCV2iDxXNJBpJHAbkPAAkiotM+rOcuAR4BEzOhIkp4m6HBju7o9F3l9sZkcTVO9eE1vYzPoARwE7u3vZAKuicsNyX5hEHBKCT3/8hZJS54Bsmp/066/hn/+Es8/W4KY8N2nSJAB6aIUuEZHq2Cby/FMFZcr2bVNBmS1UKSmN5s4c4JZEyppZHWB/4O6YXW8Tv8b1eGAacLmZnUmwgtQ44Fp3j560v76ZzQNqAp8C17v7Jwl/EMmIaUXFmMF+O2ZJUqrBTQXl2muvBTRPqYhINdUmaL4/g8qndkw416x2UpqklgRJ46KY7YuAXnGO6QgcQtCZ9iSCDrQPEvRlPTlSZhbBtFWfAY2BS4GPzWwfd5+dyg8g1TO9aDm7tdmGberVDjuUwKhRweCmf/0Ltt027GgkzYYMGRJ2CCIi+WRYKk+W6aS0KmoQZOOnufsKADO7CHjLzFq7+yJ3nwxMLjvAzCYR1JZeDFwSe0IzO5egfyp16tRJ/ycQADaVlPLfH5Zz8v47VF44ExYsgAsvhG7dNLipQHTu3DnsEERE8kXKV7/JdFK6FCghGKgUrTUQrz/oAuCnsoQ04uvIc3u2rnXF3UvMbDrQqbwTuvtQYCgEy4wmHL1Uy9cLVrJmQwndsmF+Unf43/+FtWthxAgNbioQ778fLNd8+OGHhxyJiEhOeyodJ81oUuruG8xsBtAbeDFqV2+Ckf3l+Rg4xcwaRfUh3TXyPK+8AyxYu3JvguZ8yRLTisomzc+C/qRDhsC4cfDQQ6Das4Jx4403AupTKiJSLe5npeO0YTTf3wuMMLOpBAnn+QT9Qx8FMLOnAdz9zEj5Z4HrgWFmdhNBn9L7gdHuvjhyzI3Afwimi9qGoMl+b5KYsFXSb/q8YnZoVp+2TWJn88qwb7+FK66APn1g0KBwY5GMevLJJ8MOQURE4oiblJpxWDIncueDxMr5KDNrAVwHtCVYkupYdy+r9WwfU36VmfUiGNw0DVgOvApcHVWsKUFzfBuClac+AQ5z96nJfAZJH3dn6tzlHNqpZbiBbNoEZ5wBdevCk0+SwKhBySMdO3YMOwQRkdxn9idgBO6lCZavAZyBe4XN/uZefpdKM0qJLCWaAHfPiUFTW2nYsKGvXr067DDyXtHS1fS8eyL/OKELp3ffMbxAbrkFbrgBnn8e+vULLw4Jxfjx4wHo1SveZB8iItnNzNa4e8OQgyglmDN+GDAK92/jlNuVYLGlgcCOuFc4gKOyRFLVSJISZf1JQx3kNG0a3HwznHaaEtICdeuttwJKSkVEqmkJ0IFgKdGbMFsKfE4woB2CKUD3jjxDkE8uruykFSWlsVWsfQiaxz8G5hMsMfW7SAD/TuADSAGbXrScJvVrs8u2jcIJYM2aoNm+bdtgcJMUpBEjRoQdgohIPtiZYBXOQUATYFvgyJgyZRWbK4F/AbdXdtK4Sak7v42sMuN04Eygnzujo7afCjxHkKiKxDVtXjHdOjSjRo2QKt+vvhpmzYLx46FZFoz+l1C0a5fwEswiIhJPMBvS3zC7FegH/A/QjaDyEoLpOqcBbwEv4J5QP8lE+4FeF3l+M2b7WIJMeDDwRILnkgKzbNV65ixZzakHhJQQvPMOPPggXHopHHVUODFIVnjzzeCfsKOPPjrkSERE8oD7WmB45FFtiSalHSLPg4D/i9p+YeQ5xJErku2mz1sOQLcw5ictLoaBA2H33eH2SlsOJM/dcccdgJJSEZFslGhS+i3QBbjdjCsIVllqS9CB1SP7Rco1vaiYOrVq0GX7Jpm/+IUXwuLFMGYM1A95flQJ3fPPPx92CCIi+SdYtOhAgkrKelvtd386kdMkmpT+DXgFqEmQiEaPpioFrk3wPFKAphYtp+sOTalbK8NLeT73XDD10623wn77ZfbakpXatGlTeSEREUmc2S7A60C85REdSCgprZFIIXfeAI4GpkRObpHn/wB93DX6Xsq3ZsMmvvxpReaXFp0/P1it6aCD4K9/zey1JWuNGTOGMWPGhB2GiEg++RewG0FuGO+RkIQnvHdnAjDBjAZAM2C5O2uSCFoK0Kc//sKmUs/s/KQbNsCZZwbPI0ZArZxc10HS4J577gGgb9++IUciIpI3uhNUVH5DMAB+NYkvvrSFpP5am1GLoG9pC3fGVeWCUlimFy3HDPbbMUM1pSUlQUL63nvw1FOwyy6Zua7khNGjR1deSEREkrEOaAwcifui6pwooeZ7ADNOAX4CJgNjItsmmDHHjD7VCULy17SiYjq3bkyT+rXTfzH3YGDTqFFw551BcioSpWXLlrRs2bLygiIikqiXIs/bVvdECSWlZhxKMEl+S7bsH/BvgumiTq5uIJJ/NpWU8t95yzPXdH/ddTBkSNCH9KqrMnNNySkvv/wyL7/8cthhiIjkk7eAFcDrmA3C7CjMDtvikaBEm++vIUhgvyHozFrm38DdwMGJXlAKxzcLV7J6Q0lmBjndcw/cdhucc47mI5W4HnjgAQBOPPHEkCMREckbrxL0IW0CPFjOfifBfDPRpPSgyEn7ArOjts+JPG+f4HmkgEwvKgZIf03pk0/ClVfCKafAI4+AhbSUqWS91157LewQRETyUUr+8CaalDaMPP8Qs71p5FmzkstWphUtZ/um9dmuaRp/PF5+Oagd7dMnGGlfM8NzoUpOadIkhAUcRETy282pOlGiSelPBLP0xzbTXxl5np+qgCQ/uDvTioo5qGOL9F1kwgT44x/hwAOD5LRu3fRdS/LCqFGjAOjXr1/IkYiI5An3jCelbwHnEfQbAMCMb4BOBM36b6UqIMkP3y5axeKV6/ndLmlKSqdMgT/8AXbdFf79b2jYsPJjpOA98sgjgJJSEZFslGhSeivBCPsWbJ4QtRNBH4JlgEaWyBY+nL0EgEM7VXuGiK19+SUceyy0bg1vvw3NMzgxv+S0sWPHhh2CiEhGmNkgYDDQFvgSuMzdP4xT9nCCXK4z0ACYBzzu7nfHOfm7gON+VOR1RYJyCUgoKXXnJzN+BzwAHAXUBEqACcBl7vyUyHmkcLz/7RJ2adUo9f1Ji4qC/qN16sA770Dbtqk9v+S1Bg0ahB2CiEjamVk/4H5gEPBR5Hmcme3h7rHjgwBWEeR4XwBrgN8BQ8xsjbs/XE75nkBp1Ot4KzhZBfu2Luye3EpQZtQDmgPF7qxL6uAs1LBhQ1+9enXYYeSVdRtL2Ofmtzm9+47c0HeP1J24uBi6d4elS+GDD2CvvVJ3bikII0eOBGDAgAEhRyIiUjWRRLHCPmtmNgX43N3Pido2Gxjt7tckeJ2XgfXu/sdydpYS1IDWjLyuSFAuAQnVlJrRhGD+qTXuLAV+jmxvSVDNu8KdFYmcS/Lf1LnFrN9UyqG7pnjlnDvugO+/hw8/VEIqVfL4448DSkpFJH+ZWR1gf4J55KO9DfRI8Bz7RsreVG4B9xrlvq6mhGpKzXgJOB74izsPRG2/iKB6+BX33FzVqV27dj5ixIiww8grC1asY9nqDezZdpuUTRlaZ9kyup9+OksOO4xvrr02NSeVgrNp0yYAatVKtDu9iEh2OeKIIzYQNLOXGeruQ8vemNl2BLMmHe7uH0RtvwE43d07xzu3mc0nWC60FnCzu/891fFXJNF/mbtHnl+K2f4yQR+E7uSo4uJievbsGXYYeaXPP9+nVeNmXHRECn8sLroISkpo88gjtNl559SdV0REJLdscvcD0nTuQ4FGBIsm3Wlmc9298po7sxoEuWB7YOv5Gd2fTuTiiSalZUOof4nZviJmvxS4hSvW8e2iVZy8/w6pO2lREQwdCmefDUpIpRqGDx8OwMCBA0ONQ0QkjZYSDEZvHbO9NbCwogPdfW7k5Rdm1pqg+b7ipNRsd+A1IN4faAcSSkoT7QewMvLcJ2Z72ftVCZ5H8twH6ZgK6uaboUYNuP761J1TCtLw4cN/S0xFRPKRu28AZgC9Y3b1BiYlcaoalFfrubWHgV0IRtrHeyQk0ZrS/wK9gCfN2BP4GtgduJwgA56R6AUlv33w7RJaNa7Lbm0ap+aE33wDTz8Nl14KO6Sw9lUK0sSJE8MOQUQkE+4FRpjZVOBj4HxgO+BRADN7GsDdz4y8vxiYC8yKHH8Ywaqd5U0HFWt/glzwVeBNYENVg040KX2UICndhi3XOC2bf+rRqgYg+aOk1Pnou6UctVtrLFUjnG68EerXh6uvTs35RERE8py7jzKzFsB1BJPnzwSOdfd5kSLtYw6pCdwJdAA2Ad8DV5NYfrcI6AgMxH1lZYUrklDzvTsvE2Td5VXH3uPOK9UJQvLDzJ9W8MuajRyWqqmgPv0UXngBLrsMWrVKzTmloD322GM89thjYYchIpJ27v6wu3dw97ruvn/0SHx37+nuPaPe3+fue7p7Q3dv4u77RY6vbA5SgNsIcsIrMUukuT+upCbPN6MbcBxBZ9lFwOvuTKtOAGHT5Pmp8+CE2dw7/lum/60XLRpV6+cy0LcvfPQRzJ0LTZtW/3xS8Hr16gXA+PHjQ45ERKRqEpk8P+PMXgX6AhuBxQS1rWUc94RGKSc1WV8kAc3pJFTS54PZS+iyXZPUJKSTJ8Mbb8A//qGEVFJGyaiISIqZXUNQYelAHWD76L0kscxowkmpGY2BY4EdgXqx+93J6ASrkl1WrtvIf3/4hfMP75iaE/7tb0GT/SWXpOZ8IiIikg4XR54t5jlpiS4z2g0YS7DmfTxKSgvYpO+XUVLqqZkKasIEeO89uO8+aNSo+ucTiXj44WAg6aBBg0KOREQkbzQiqA09EXgL93VVPVGi85TeB7SgmvNPSf764NslNKxTk/3aN6veidyDWtIddoDzzktNcCIRY8aMYcyYMWGHISKST16PPE+rTkIKiTff702QBb9PsNToapLoIyD578PZSzl455bUqZXo/3PieOMNmDIlWMGp3la9RESqZdy4cWGHICKSb0YTLKY0DrP7gSK2HOgEUSP/K5LQ6HszfiSYdLWF+1ZLjeY0jb6vvqKlq+l590Ru+cOenHFwh6qfqLQU9t0XVq+Gr7+G2rVTFqOIiEg+yLrR92alVFxR6bgnVAmaaLVW2ZqlXRIsLwUkZUuLvvgifP55sKyoElJJg/vvv5/7778/7DBERPJNRUuMpnyZ0SJgBfCaGU8QLEO1MbqA+2+JqxSYD75dQvvmDejQshr/cdu0CW64Abp0gf79UxecSJQJEyYAcOmll4YciYhI3ngqVSdKNCkdwuaq2SvK2e+gpLQQbdhUyuTvl3HCfttXXrgiI0bAt9/CK69AzZqpCU4kxuuvv155IRERSZz7Wak6VTKT52uUvWzlvz8sZ/WGkuo13a9fDzfdBN26wR/+kLLYREREJHckmpSmLAuW/PLBt0uoVcPosXOLqp9kyBD44Qd47DEw/d9H0ufuu+8G4Morrww5EhERiZVQUuqeuv4Ckl8+nL2U/do3o3G9Kg5MmjcvmJe0Vy/o3Tu1wYnEmDx5ctghiIhIHMk034tsYdmq9cz8eQVX9N61aidwhz//OXitWlLJgJdeeinsEEREJI6EZzo3Y4AZ/zVjtRklMY9NlZ9B8s1H3y3FvRpTQQ0dCuPHw113QYcOKY1NREREcktCNaVmnEowut7RgCeJ+ODbpTRrUJsu2zdJ/uCiIrjySjjqKC0nKhlzxx13AHD11VeHHImIiMRKtPn+wsjzWqABQXJaDLQAfok8pIC4Ox/OXsIhnbalZo0k/58S3Wz/xBNqtpeM+fTTT8MOQUQkv5ndQLCK0y3JHppoUro3QSLaC5gE4M62ZlwPXAT0TfbCktu+WbiSxSvXc2inlskfPGQITJgAjz4KO+6Y+uBE4nj++efDDkFEJN/dRJAzJp2UJtqntGypnv9GLoQZNYF7gG2BB5K9sOS2D74NlhY9LNn+pEVFMHhwMNr+3HNTH5iIiIjkpERrSn8FmhH0J10JNAaOIVh6FKB76kOTbPbh7KV0bt2YNk3qJX5QaSn87/8GzfWPP65me8m4W24J/uN+/fXXhxyJiIjESjQp/ZkgKW0FfA0cCLwWtb84xXFJFlu7oYSpRcX86eAkm96HDIF33w2e1WwvIZg1a1bYIYiISByJJqWfAF0IakSfZuuaUU2uX0D+M3cZGzaVJjcV1Ny5QbN9795wzjnpC06kAiNHjgw7BBGRfFflVUATTUoHAVcBK91ZY0YToB+wCXgFuLOqAUju+fDbpdStVYMDd2qe2AFlzfY1aqjZXkREJJ+5V7miMtFlRlcDq6Pe3wHcUdWLSm77YPYSundsQb3aNRM74NFH4b33gsny27dPb3AiFbjhhhsA+Pvf/x5yJCIiEituUmpGUtmDOz9UPxzJdj//spbvFq+if7d2iR0wdy5cdRX06bN5blKRkPz4449hhyAiInFUVFNaRGT6pwR4JefagpkNAgYDbYEvgcvc/cMKytcBrgPOALYDFgF3u/sDUWVOIpgTa2fge+Bv7v5KojFJYiZ8sxiAw3dNoD9paSmcfXbQbK+17SULDBs2LOwQREQkjsrmKbUkHgkxs37A/cBtwL4Ek/GPM7OKamafB44GzgU6A6cAn0ed82BgFPAM0DXy/KKZaaqqFBv3xQJ23rYhu7RqVHnhRx6BiRPh3nvVbC8iIiIVMvfyK0PNSKpKwT2x0VZmNgX43N3Pido2Gxjt7teUU74P8CKws7svjXPOUUBzd+8dtW08sMTd/1hRPA0bNvTVq1dXVEQilq1aT7d/jOfCI3bhij6dKy68eDHsvDP06AFvvqlaUskK11wT/BNz++23hxyJiEjVmNkad29YecncE7fJPdEkMxmRZvj9gbtjdr0N9Ihz2PHANOByMzsTWAuMA65191WRMgcDD8Yc9xbBEqiSIm9/tYhSh2O6tK288J13wpo1cP/9SkglayxbtizsEEREJI6E+4GmSEugJkGf0GiLgF5xjukIHAKsB04CmhIkoNsBJ0fKtIlzzjblndDMziXoCkCdOnWS+gCFbOwXC+jQogG7t21cccGffoJ//QvOOAN22y0zwYkkYOjQoWGHICJSGMxaAEuAUtwTyjeTGJxEZ+A8gj6d9WN2uztHJXquJNUgGEh1mruvCGKxi4C3zKy1u8cmo5Vy96HAUAia71MZbL5avnoDk75fxrmHdcQqq/m89dZgkNONN2YmOBEREclWCTeXJpSUmrE/MBFoEOdiiSZ2S4ESoHXM9tbAwjjHLAB+KktII76OPLcnqBFdmOQ5JUnvfL2IklLn2Mqa7ufMCSbIP+cc2GmnzAQnkqArr7wSgLvvju1BJCIiCQtmUapM0v1eE60pvbYqJ4/l7hvMbAbQm2DwUpnewEtxDvsYOMXMGkX1Id018jwv8jw5co67Ys45qboxS2DcFwvYoVl9umy/TcUFb74ZatWC667LTGAiSVi7dm3YIYiI5IOHSLxCMmGJJqU9IhcfBDwSeb0PcCuwG8GSo4m6FxhhZlMJEs7zCfqHPgpgZk8DuPuZkfLPAtcDw8zsJoI+pfcTjNZfHClzP/CBmV0NvAqcABxB0BdVqmnF2o189N1SzvrdThU33X/9NYwcCX/5C2y3XeYCFEnQv/71r7BDEBHJJykdyZxoUtoi8vwMQVKKOzPNOJegifwvwMBETuTuoyzo/HodweT5M4Fj3b2s1rN9TPlVZtaLYHDTNGA5QeJ5dVSZSWbWnyBJ/jvB5Pn93H1Kgp9PKjDh60VsLHGO6VLuuLHNbrgBGjSAq6+uuJyIiIjksg1AbYIKxXhjexoQLJSUsLjzlG5RyFgBNALqAiuAesAewCrgR+AXd5onc+FsoXlKK/fnp6bz1c8r+PjqI+PXlH7yCey3H1x/PWhdcclSl112GQD33XdfyJGIiFRNVsxTavYfoBvQH/cX45QpG33vuNfM4DtoAAAgAElEQVRM5LSVrehUpqyZvDnB8qMA7xH05QQoTfA8kmNWrd/EB7OXcHSXthU33V93HTRrBldckbngREREJAxTCJruU7pyZqLN918QzBe6N/AGsDubR7s7weT3kofe/WYxGzaVcuxeFTTdT5oEY8fC7bdDkyaZC04kSaohFRFJiVuAJ4FfKihTDCQ1DU+izff7EAxomkHQd+BFoA9BQjoBON2dJclcOFuo+b5iF4ycwYx5y/nPNUdRo0Y5NaXucOSRwSCn77+Hhnm58pmIiEhWyIrm+zRJqKbUnc+Az6I2HW1GU2CTO6viHCY5bs2GTbw3azH9DmhXfkIKMGECTJwYLCeqhFSy3IUXXghoFL6ISDaqzjKjdQBVMeaxibOWsG5jKcfsFWfCfHf429+gXTs477zMBidSBfXrxy5GJyIi2aLCpNSM/YD+BKPtX3XnXTP+DNxOMOhpvRkPu3Nl+kOVTBv7xQJaNqpDtw5xJlYYMwamToWhQ6Fu3cwGJ1IFWslJRCR7xe1TasYhBP1FoxPXu4CrCPqSlrXnOnChezD5fa5Rn9LyrdtYwn63vMMJ+27PP07Ya+sCpaWw776wenXQn7R27cwHKSIiUmDyuU9pRVNCDSaYGNWiHmWToBrBOvZlr89IV4ASjve/XcKaDSUcG6/p/oUX4PPPg2VFlZBKjjj33HM599xzww5DRCTtzGyQmc01s3VmNsPMDq2g7Ilm9raZLTGzlWY2xcyOy2S8UHFSegBBLehbBMuLjiNIQB34ozutgNMjZfdIZ5CSeeO+WECzBrXpvlM5TfebNsGNN8Kee0L//pkPTqSKWrRoQYsWLSovKCKSw8ysH8ES7LcB+wKTgHFm1j7OIYcD7wK/j5QfC7xSUSKbDhU1368naLpv5s6vZjQhWOLTgXrubDSjDrAOKHWv1qCp0Kj5fmvrN5Ww/y3j+f1ebbnz5L23LjBsGJx9Nrz8MpxwQuYDFBERKVCJNN+b2RTgc3c/J2rbbGC0u1+T4HWmAh+6e8ZWxamoprQ2gDu/Rp5XlO1wZ2PkeUNkUwVL/Uiu+Wj2Ulat38Qx5U2Yv3590GR/wAFw/PGZD05ERETiMrM6wP5svbDR20CPJE7VmKAyMmMqrd0044ZEtuWq5s2bM3HixLDDyCoLlq/lqn1K8AVfMXHBV1vs2+6119h13jw+GzSI5e+/H1KEIlVz5513AvDXv/415EhERKqslplNj3o/1N2HRr1vCdQkWOwo2iKgVyIXMLMLgR2AEdUJNFmJNLnfGPXay9mW04qLi+nZs2fYYWSNDZtKOeDWd+i9x44M6rnPljvXr4czzoAePdhn8GAwVZBLbnn33XcB9DsvIrlsk7sfkK6Tm9lJBLMt9XP3eem6TnkqS0qVdRSYSd8v5dd1m8pf6/7JJ2H+/OBZCankoL///e9hhyAikm5LgRKgdcz21sDCig40s5OBp4Ez3X1MesKLr6Kk9OaMRSFZY9wXC2lUtxaHdGq55Y716+G226BHD+iVUO2/iIiIZJi7bzCzGUBv4MWoXb2Bl+IdZ2anAk8Bf3L30emNsnxxk1J3JaWFZmNJKW99tZBeu7eibq2aW+5ULankgQEDBgAwcuTIkCMREUmre4ERkRH0HwPnA9tBsNCRmT0N4O5nRt73J+g/eiXwgZmVNZducPfiTAWdk9M4SXpMmVPML2s2br3WvWpJJU907tw57BBERNLO3UeZWQvgOqAtMBM4NqqPaOx8pecT5IT3RR5l3gd6pjfazZSUym/GzlxAgzo1OXzXbbfcoVpSyRPXX3992CGIiGSEuz8MPBxnX8+K3oelonlKpYCUlDpvzVzIkbu1ol7tqKZ71ZKKiIhIBqimVACYOreYZas3bL3WfVkt6bBhqiWVnNc/sizu888/H3IkIiISS0mpAPDmzAXUq12Dnp2jmu7Lakl/9zs46qjwghNJka5du4YdgoiIxKGkVAD4+PtlHNSxBQ3qRP1IPPGEakklr1x99dVhhyAiInGoT6mwdNV6vlu8iu47tdi8UbWkIiIikkGqKRWmzQ2mIDtwp+abNz7xBPz0EwwfrlpSyRsnnXQSAC+9FHf+aBERCYmSUmHK3GLq167JXts3CTaollTy1MEHHxx2CCIiEoeSUmHK3GL227EpdWpFenOollTy1JVXXhl2CCIiEof6lBa4FWs28s3CXzf3J1UtqYiIiIRANaUFblpRMe5R/UlVSyp57LjjjgPg9ddfDzkSERGJpaS0wE2Zu4w6tWrQtV1T1ZJK3jtKP9ciIllLSWmBmzq3mK7tmgZLiz48RLWkktcuvfTSsEMQEZE41Ke0gK1av4mZP/9K952aq5ZUREREQqWa0gI2Y95ySko9GOT0+OOqJZW8d8wxxwAwbty4kCMREZFYSkoL2JQ5y6hVw9iveU245RY45BDVkkpe69u3b9ghiIhIHEpKC9jUucXstUMTGjxwHyxaBK+9plpSyWuDBg0KOwQREYlDfUoL1NoNJXw2/xeObLwR7roL+vWD7t3DDktEREQKlJLSAvXJD8vZWOKc9OpQKCmB228POySRtOvVqxe9evUKOwwRESmHmu8L1JS5xeyxZC5tX3keLr8cdtop7JBE0q5fv35hhyAiInGYu4cdQ6gaNmzoq1evDjuMjOs/dDJX/fNS9lv0HXz/PTRrFnZIIiIiUgkzW+PuDcOOIx1UU1qA1m8qoeF7E9jvm2lw771KSEVERCR0qiktwJrSad8vofFB3WhfDxp8Nwvq1g07JJGM6NmzJwATJ04MNQ4RkapSTanklZVDHqfb0nmsevpZJaRSUAYOHBh2CCIiEodqSgutpnT1apZv34EFTVuxx9yZmpdUREQkh+RzTammhCowJXfdTbMVS/n4gmuUkErB2bhxIxs3bgw7DBERKYea7wvJggVw112M3bUHbY85MuxoRDKud+/egPqUiohkIyWlheTGG2H9eu7sOZAXd2oedjQiGffnP/857BBERCQOJaWFYuZMeOIJ3j3qVGp26kSrxvXCjkgk4wYMGBB2CCIiEof6lBaKq67CGzfm5q4n0r2jakmlMK1Zs4Y1a9aEHYaIiJRDNaWFYPx4GDeORdffwvwNDThQTfdSoI499lhAfUpFRLKRktJ8V1ICV14JHTrw1uEnwzvf032nFmFHJRKKCy64IOwQREQkDiWl+W7kSPjsM3juOSb9vIp2zeuzXdP6YUclEop+/fqFHYKIiMShPqX5bONGuO466NYNP/VUps4t5sAOqiWVwrVixQpWrFgRdhgiIlIO1ZTms7FjYf58ePhhZi9ZzfI1GzXISQraH/7wB0B9SkVEslEoNaVmNsjM5prZOjObYWaHVlC2p5l5OY/dosoMjFOmsOc9evJJaNMGjjmGKXOLAeiuQU5SwC655BIuueSSsMMQEZFyZLym1Mz6AfcDg4CPIs/jzGwPd/+hgkP3BIqj3i+J2b8G2Dl6g7uvq37EOWrhQvj3v+GKK6BWLabMWUabberRvnmDsCMTCc2JJ54YdggiIhJHGM33lwPD3f2xyPuLzexo4ALgmgqOW+zuSyvY7+6+MFVB5ryRI4OR92edhbszdW4xB3VsgWm9eylgS5cG/4S0bNky5EhERCRWRpvvzawOsD/wdsyut4EelRw+3cwWmNkEMzuinP31zWyemc03szfMbN9UxJyT3IOm+x49YLfdKFq2hsUr16s/qRS8k08+mZNPPjnsMEREpByZriltCdQEFsVsXwT0inPMAoJa1GlAHeAMYIKZHe7uH0bKzALOBj4DGgOXAh+b2T7uPjv2hGZ2LnAuQJ06dar1gbLSlCnw9dfwWFAZPWXOMkD9SUWuuOKKsEMQEZE4sn70vbvPIkg6y0w2sw7AYODDSJnJwOSyAmY2CfgUuBjYalSDuw8FhgI0bNjQ0xR6eIYNgwYN4NRTAZg6t5iWjeqw87aNQg5MJFx9+/YNOwQREYkj06PvlwIlQOuY7a2BZPqDTgE6xdvp7iXA9IrK5K01a+C55+CUU2CbbQCYMreYA3dqrv6kUvAWLlzIwoXqei4iko0ympS6+wZgBtA7ZldvYFISp+pK0KxfLguyr70rKpO3XnoJVq6Es84CYP7yNfz0y1oO7KCme5H+/fvTv3//sMMQEZFyhNF8fy8wwsymAh8D5wPbAY8CmNnTAO5+ZuT9ZUAR8CVBn9IBwPHASWUnNLMbgf8As4FtCJrs9yboi1pYhg2DnXeGww4DYMqcyPykHbWSk8jVV18ddggiIhJHxpNSdx9lZi2A64C2wEzgWHefFynSPuaQOsBdwA7AWoLk9PfuPjaqTFOCPqJtgBXAJ8Bh7j41bR8kG82ZA++9B7feCpGm+qlzi2lSvzadWzcOOTiR8B199NFhhyAikhFmNohg/E1bgtzpsqgB4rFl2wL3APsRdH0c4e4DMxTqb0IZ6OTuDwMPx9nXM+b9/wH/V8n5/gL8JVXx5aynngqS0TPP/G3TlLnL6NahOTVqqD+pyI8//ghAu3btQo5ERCR9qrBQUV2CcT93EJmdKAyhLDMqaVBSEjTd9+kDkT+4M39aQdGyNRzaSROFiwCcccYZnHHGGWGHISKSbr8tVOTuX7v7xWyeYnMr7l7k7pe4+3C2XD0zo7J+SihJ0Lvvwo8/wt13/7bpmSnzqFe7Bsfvu32IgYlkj+uuuy7sEERE0ipqoaK7Y3YlslBRqAo+KW3evDkTJ04MO4xq2/2OO2jeuDGTmzaldOJEStzZYd1Krt+/Np9M+Tjs8ESyQq1awT95+fA7LyIFq5aZTY96PzQy/3qZqixUlBUKPiktLi6mZ8+eYYdRPcuXw8cfwznncFifPgA8+dFc7vr8K964uAddtm8ScoAi2WHOnDkAdOzYMeRIRESqbJO7HxB2EOlQ8ElpXnjuOVi/Hs4+GwB3Z+R/5rFv+6ZKSEWinB35HVFNqYjksVQtVJRxSkrzwbBhsM8+sO++AEz6fhlzlq7mn/32CTkwkexy8803hx2CiEhaufsGMytbqOjFqF29gZfCiSoxSkpz3eefw/TpcP/9v20aMXkezRvW4ZgubUMMTCT7HH744WGHICKSCUktVBTZ1jXychugNPJ+g7t/lamglZTmumHDoE4dOP10ABasWMs7Xy/inEM7Uq92zZCDE8kus2bNAqBz584hRyIikj5VWKgIgoWHovUF5gEd0hVnLCWluWzDBhg5Eo47DloEy4g+N+UHSt05vXt5P28ihe28884D1KdURPJfMgsVRbaFvsqOktJc9sYbsHTpbwOcNmwq5blpP3JE51a0a94g5OBEss9tt90WdggiIhKHktJcNmwYbL99sIoT8PZXC1mycj1nHLRjyIGJZKcePbJ63mgRkYKmZUZz1c8/w9ixwTr3NYO+oyMmz6Nd8/ocvuu2IQcnkp1mzpzJzJkzww5DRETKoZrSXDViBJSWwllnAfDtopVMmVvMNcfsRo0aoXcLEclKF110EaA+pSIi2UhJaS5yD5ruDz0UOnUCglrSOrVqcMoB7UIOTiR73XXXXWGHICIicSgpzUWTJ8OsWfDXvwKwav0mXv7vfP7f3m1p3rBOyMGJZK9u3bqFHYKIiMShPqW5aORIaNAATj4ZgFc++YnVG0o0wEmkEp9++imffvpp2GGIiEg5VFOaazZuhBdegD/8ARo3Dta5nzyPLttvQ9d2TcOOTiSrXXbZZYD6lIqIZCMlpbnmrbdg2TI47TQAphUtZ9ailfzfSXtjpgFOIhW57777wg5BRETiUFKaa559Nli96X/+B4CnJxexTb1a9N1nu3DjEskBXbt2rbyQiIiEQn1Kc8mqVfDaa3DqqVC7NotXruPNmQs55YB21K+jde5FKjNt2jSmTZsWdhgiIlIO1ZTmkldfhTVrfmu6HzX1RzaVap17kUQNHjwYUJ9SEZFspKQ0lzz7LOy4I/TowaaSUp6d+gOHdmpJx20bhR2ZSE546KGHwg5BRETiUFKaKxYvhrffhquugho1mPDlQhasWMdNx+0ZdmQiOaNLly5hhyAiInGoT2mueOEFKCn5rel+xOR5bNekHkft1irkwERyx6RJk5g0aVLYYYiISDlUU5ornnkG9t4bunRhzpJVfPTdUq7ssyu1aur/FSKJuvbaawH1KRURyUZKSnPBnDnwn//AnXcC8PTkedSuaZzaTevciyRjyJAhYYcgIiJxKCnNBc8+Gzz378/KdRsZPWM+/2/v7WjVuF64cYnkmM6dO4cdgoiIxKG232znHjTdH3YYtG/PSzPms2r9Jgb26BB2ZCI55/333+f9998POwwRESmHakqz3aefwjffwGWXUVrqPDV5Hvu2b8o+WudeJGk33ngjoD6lIiLZSElptnvmGahdG045hfdnL2Hu0tXc319LJYpUxZNPPhl2CCIiEoeS0mxWUgLPPQfHHAPNm/PUa1PZtnFdjunSNuzIRHJSx44dww5BRETiUJ/SbPbBB/Dzz3DaacxZsoqJs5YwoPuO1Kml2yZSFePHj2f8+PFhhyEiIuVQTWk2e+YZaNQI+vbl6fFzqV3TOE3r3ItU2a233gpAr169Qo5ERERiKSnNVuvWwejRcOKJrKxRmxen/0jfvbdj28Z1w45MJGeNGDEi7BBERCQOJaXZauxYWLECTjuN0TPms3pDCX/SNFAi1dKunRacEBHJVuqcmK2efRZataL0iCN5alIR+2kaKJFqe/PNN3nzzTfDDkNERMqhmtJstGIFvPEGnHce789ZTtGyNVzeRyvRiFTXHXfcAcDRRx8dciQiIhJLSWk2euklWL8eTjuN4ZOKaNW4Lsd0aRN2VCI57/nnnw87BBERiUPN99no2Wdh5535fqc9eP/bJQw4aEdq19StEqmuNm3a0KaN/oMnIpKNlOlkm59/hnffhdNP5+nJ86hTswZ/PFDTQImkwpgxYxgzZkzYYYiISDnUfJ9tnn8e3Fl10imMHj2f/7dPW00DJZIi99xzDwB9+/YNORIREYmlpDTbPPss7L8/L/zakNUbSjirx05hRySSN0aPHh12CCIiEoeS0mwyaxbMmEHpPffw9OQi9t+xGXvt0CTsqETyRsuWLcMOQURE4lCf0mwyciSYMfmAXhQtW6PJ8kVS7OWXX+bll18OOwwRESmHakqzxbRpcNdd0LcvQ75bR+ttNA2USKo98MADAJx44okhRyIiIrHM3cOOIVQNGzb01atXhxvEzz/DAQdA3brMGfseRz71JVf03pWLj+oUblwieWbFihUANGmibjEikpvMbI27Nww7jnRQTWnY1q6F44+HX3+FyZMZ/u2qYBqo7poGSiTVlIyKiGQv9SkNkzucc07QdD9yJL922o3RM+bTd5/taNlI00CJpNqoUaMYNWpU2GGIiEg5VFMaprvugmeegVtugeOP5/kPvmfNhhIGaoCTSFo88sgjAPTr1y/kSEREJJb6lIbVp/SNN+C44+DUU9k48hnumzCbhyd+T4+dW/DMnw/KfDwiBWDNmjUANGjQIORIRESqJp/7lCopDSMp/eorOOgg6NSJ+a+9xcWvz+KTH36h3wHtuPG4PWhQRxXYIiIisjUlpXks40npsmXQvTusWsXbT43hiknFANx24l703We7zMUhUoBGjhwJwIABA0KORESkapSU5rGMJqUbN8LRR+MffcRDNzzGPStbsF/7ptzff1/aNVdzoki69ezZE4CJEyeGGoeISFXlc1Iayuh7MxtkZnPNbJ2ZzTCzQyso29PMvJzHbjHlTjKzr8xsfeT5hPR/kiRdfjm8+y53nvAX7l3VgouP3IUXzjtYCalIhrzzzju88847YYchIpJ2yeRakfKHR8qtM7M5ZnZ+pmItk/Gk1Mz6AfcDtwH7ApOAcWZW2cScewJtox6zo855MDAKeAboGnl+0cy6p/wDVFHpo0PgoYd44sATeHXv3jz754O4ok9natXUrFwimVK7dm1q164ddhgiImmVbK5lZjsBYyPl9gVuBx40s5MyE3Ekjkw335vZFOBzdz8nattsYLS7X1NO+Z7Ae8C27r40zjlHAc3dvXfUtvHAEnf/Y0XxZKL5/pc3x9Po/x3NR+278uwN/+LOU/alWcM6ab2miGxt+PDhAAwcODDUOEREqiqR5vsq5Fp3Aie6e6eobY8De7r7wamLvmIZraYzszrA/sDbMbveBnpUcvh0M1tgZhPM7IiYfQeXc863Ejhn2v30+bf4SSfzQ5M2LHz0SYb86UAlpCIhGT58+G+JqYhIPqpirhUvjzrAzDLWvJTpuYdaAjWBRTHbFwG94hyzALgAmAbUAc4AJpjZ4e7+YaRMmzjnbFPeCc3sXODcyFs3s7XJfIgotYBNCZVcswL+Zx8qrLaVVEv8/kimhXpvzCysS+cK/e5kL92b7JaJ+1PfzKZHvR/q7kOj3lcl12oDjC+nfK3I+RZUPdzEZf2EmO4+C5gVtWmymXUABgMflndMAuccCgyttGAlzGy6ux9Q3fNIeuj+ZC/dm+ym+5O9dG+ym+5P9WR6lM1SoARoHbO9NbAwifNMATpFvV+YgnOKiIiI5Lqq5Frx8qhNkfNlREaTUnffAMwAesfs6k0w4itRXdmyKnlyCs4pIiIiktOqmGvFy6Omu/vG1EYYXxjN9/cCI8xsKvAxcD6wHfAogJk9DeDuZ0beXwYUAV8S9CkdABwPRE9TcD/wgZldDbwKnAAcARyS5s9S7S4Akla6P9lL9ya76f5kL92b7JYt9yepXCuy/SIzuw8YAvwOGAiZHQoTyopOZjYIuIpgvtGZwF/c/YPIvokA7t4z8v4q4BxgB2AtQXJ6u7uPjTnnycCtQEfge+Bv7v5yBj6OiIiISFZJJteKbDsc+CfBvPA/A3e6+6MZjbnQlxkVERERkfBpOSERERERCZ2S0grk4rqxhSSZ+2Nmbc3sWTP7xsxKzGx4BkMtOEnemxPN7G0zW2JmK81sipkdl8l4C02S9+dwM5tkZsvMbG3kd+jKTMZbSJL9uxN13CFmtsnMZqY7xkKW5O9OTzPzch67ZTLmXKKkNI5cXTe2UCR7f4C6BNNa3EEwpZikSRXuzeHAu8DvI+XHAq8k+sdYklOF+7MKeAA4DNiDoO/+zZH+apJCVbg3Zcc1A54GJqQ9yAJW1ftD0EezbdRjdjrjzGXqUxpHrq4bWyiSvT8xx74BLHX3gemNsjBV595ElZ8KfOjuV6QpzIKVovvzMrDe3bVIXQpV9d5E7sdngAEnu3uXtAdbgKqQF/QE3gO2dfeMzfWZy1RTWo5cXje2EFTx/kgGpPDeNAaWpyouCaTi/pjZvpGy76c2usJW1XsTqbFuTVCDLWlSzd+d6Wa2wMwmmNkRaQkwTygpLV9F68a2iXNMmzjly9aNldSpyv2RzKj2vTGzCwmmgBuR2tCEatwfM5tvZuuB6cDDmZ4qpgAkfW/MbC/gRmCAu5ekN7yCV5XfnQXABQTzqp9IsGT6BHVNii+MyfNFRMoV6YN9F9DP3eeFHY9s4VCgEXAQcKeZzXV3/cchJGZWFxgFXOnuc8OOR7bm7rMIEtEyk82sAzAY+DCMmLKdktLy5ey6sQWiKvdHMqPK9yayAMbTwJnuPiY94RW8Kt+fqMTnCzNrDdyEarNTKdl70xbYHRhmZsMi22oAZmabgGPdPbapWaouVX93pgD9UxVUvlHzfTlyed3YQlDF+yMZUNV7Y2anEiQ4A919dPoiLGwp/N2pQTCjhaRIFe7NT8BeQNeox6PAd5HX+rcwhVL4u9OVoFlfyqGa0vhyct3YApLs/cHMukZebgOURt5vcPevMhl4AUjq3phZf4KE9ErgAzMr65+1wd2LMxx7IUj2/lwMzGVzM+RhBPfq4cyGXRASvjeRyo4t5iQ1s8UEsyJortL0SPZ35zKgiGB59DrAAOB4gj6mUg4lpXG4+ygzawFcx+Z1Y4+N6ufWPqb8XDM7lmDd2AsI1o29xN1fymDYBSPZ+xPxScz7vsA8oEO64ixEVbg35xP8W3Rf5FHmfaBneqMtPFW4PzWBOwl+TzYB3wNXE/lDLKlTxX/XJEOqcH/qEPSR3wFYS5Cc/t7dx2Yo5JyjeUpFREREJHTqUyoiIiIioVNSKiIiIiKhU1IqIiIiIqFTUioiIiIioVNSKiIiIiKhU1IqIiIiIqFTUiqS5cysk5k9ZGZfm9kqM1tpZt+Y2WNmdlBUuSIzczMrCjHcsliGR2LxyFrPZdtbm9kzZrbAzEoi++8zsw5R5YenMa6mZnZT5HF8onFnipn1jLp+ZY+bIseUvZ+Y6Xgrk877msy9ivleUxqHiKSOJs8XyWJmdhbwCFsv6dg58tiWYIWQXHE/0C/E6zcFboy8fgp4NcRYREQkipJSkSxlZkcCjxO0aDjwD4IlbBcDOwInA7uGFmAF3H0gwTK7sfaPPP8C7OTuv0TtszSHVakK4s7U9ScS9T2Y2UBgWOTtU5H4Us7M6rn7unScW0QkUWq+F8let7P5d/QBd7/e3ee7+wZ3n+3utwPnVHQCM+tqZi+b2Xdm9quZbTSzhZFtB8SU3cnMnjazH8xsnZn9YmYzI82kraLKnWNm082s2MzWm9lPZvaOmf0pqswWTatlzafALpEiTYHlkf0DK2rmNbP9zOy5yHU2mNlSM3vPzA6M7G9kZk+Z2RdmtizyGX8xsw/MrF/UeW4iWMO9zJ9ir1lBt4OGZnazmX1pZmvNbI2ZfWJml5tZrahyW3wOMzsz8h2utaD7xZ9IIzM70sz+E7ne92Z2lZlFJ7k3RcV3gpk9YWZLCZZALCuzu5mNiPq+F5vZaDPbO+ZaCf28xBxzqpl9XtH3YWaHmtnrZrYk6uf1+djrV/AdbBeJd1Xk5+ERoHGcskl/BhFJI3fXQw89suwBtCKoHS17bJ/AMUWRskVR2/rHnCf6sRrYParslxWU7RIpc0oFZUZHnWt41GOYYCMAAAanSURBVPYOBGvYxztuYKRM2fvhUec5AdgY77hImTYVnNuBMyPlbqqgzPDy4o5sawjMqODYsUCNSNnoz7E8TvlDkvg5GFje9xJTpmz/0jjf1YCosjfFlP+tXGT/IcCaOHGvBQ5N8ucl+vtYWNn3AQwASuKUWwf0jPczFtlWH/i6nGN/Lu97TOQz6KGHHpl7qKZUJDt1iHr9q7v/VMXz/Bf4H6AtQb/UbYALIvsaAOcBmFkLYI/I9gcIErHmQDfgemBFZN9hkedVBH1a6xJ0JTgVeDNeEO4+0d0NmBfZNM/dLfIYXt4xZlYfeIzN3YxuAFoDLQmS4zmR7SsJ+ql2iHymekAPguQK4PJIDDcBO0Vd4qmoGAbGix24DNgv8votgu+yI8F3C3AMQfIfqykwCGgC3Bm1/YwKrlUdLYD/A5oBFyVwPQOOJvjO9opse4wgsZtH0NWiLrAvsITge/0XJPXzEq01FXwfZtYQeJCgdWATwX9ItgHOj5SrS9B9pSJnArtFXv8H2IGgdv6X2IJV/AwikkbqUyqS3xYC/wvcR5C01Y/Z3znyvJzgD3dTgiRrJUGN02fufmtU+bmR54bAdQQ1iF8Db7t7qv+I/44g0QKY6O63RO0bHfV6DUGiOgrYnaCpNrp/ameq5/dRr69x94UAZvZ3Ng+UOhZ4Nua4Ge7+SKTsSOCvke07VjOeeBYBN7h7iZk9BTxUyfXucfe3Iq9nmlknNid0OxLc21h7mVkbgn7Nify8RKvs+/hd5HwAY9297LsdYmbnA12BXc1sF3f/Ls41jox6fXvZf+bM7B6C/tnREv2ZF5EMUU2pSHYqinq9jZltV8XzvABcRZCsxSaklG1z91KCGqv5QCfgb8BI4ItIX812kfIPAy8CZeXvI6g9XGRmV1cxxnhaR73+qoJyfyWowetOULMWO2CqXjXj2Dbq9Q9Rr+dFvS6v/+GsqNerUxhPPN+7e0kS1/sk5n2ifShbJPHzEq2y7yPe9wyVf9e/xRb1en6c10BSP/MikiFKSkWykLsvBqZGbRpcXrnoQTbl7GtG0HQPQS3ankBNoNwBI+7+BtCeoGbxOODvBP37uhDUiuLu69z9VIJmzkOAs4EpBE2rt5nZ9ol9woQsinq9ewXlopvOjwfqRroKLCunrFchjsVRr9vHeR1dpszGal43Wb9dz90Tud7amPfRn2F8VNeG3x4EfWe/jFyj0p+XePFR/vcR73uOfV/ed11madTrHeK83hxE8p9BRNJISalI9vobQY0kwCWRkdPbmf3/9u4etKkoDOP4/x0Krg5OBRHExcVNcBKti0sRXBVcLAgqgosUVFQqdZJScBBdikIRVARFQYtW1FHaTla7ieikxbaE2shxeE5IAr1p+sVt5flB4YacfNxyQ56c+77nRkdoQf1eVANYpEr9y78K/Eanua8tNjgiBoEuVC/6AngIzOe7t+cxRyPiNNAJjKNZ0/HaU1Dw5b9C76kHywMR0RsR2yJia0QciYhafWu14THTQEdEXKR51qymMajuynWMS3nWsN0XugDADlTjutiYTSml9AX4nG92RcS50MUGtkTEnoi4BAzXxrdzvCzTB3RKHeBwRHSHVlY4iepaASZbnLoHeN2wfSEiOiNiJ3B+scHrsA9mtgoOpWYbVErpFWpEWkCf1cvAN+APCg99qKml6PEzwEi+2Ql8RbOPuwsecgp42fAa46gJBnSKHjRjOYhOp8/kv55833dgYhm72FJKqYKWvKqFzj40S/YTeIyajcjbNW9QwDjLIs0tKaVZ1HENaoaazcsjnWjxVgZobmr6gWpra2uuPkf1rP+DHtTlHsBNFBIrwBhwheaSinaOl7allOaAM+iHWAfwBB1ft/OQeepNT0WGgE95ex86NT9Fc2lAozXdBzNbHYdSsw0spXQHnW6/hYJoBdXjTQJ3gf4lnuIYCky/UDfxPYqvqNQPvEPBr4oaiD6igDeQx4yghp4pFP7+ojA6DOzPQXLNpJQeo1rRYbSsTxWF0lHqdaY3gOsoWFTyfQcp7p4+DrxFM8ftvIc5tOrAVdQIM4+C2xiagevO9YmbXkppFIXtIRToFtD/ewL9GOltGN7O8bLc17+Plg97ima1q+iH1ANgb9LFBVo9vgIcAh6hz8k0uvhA0Xq+a74PZrZy0V7pkZmZmZnZ+vFMqZmZmZmVzqHUzMzMzErnUGpmZmZmpXMoNTMzM7PSOZSamZmZWekcSs3MzMysdA6lZmZmZlY6h1IzMzMzK51DqZmZmZmV7h914gZ0QJtbRwAAAABJRU5ErkJggg==\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "disp_imp = np.array(val_metrics['disp_imp'])\n", - "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " disp_imp_err, '1 - min(DI, 1/DI)')" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " val_metrics['avg_odds_diff'], 'avg. odds diff.')" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2300\nBest balanced accuracy: 0.7717\nCorresponding 1-min(DI, 1/DI) value: 0.4860\nCorresponding average odds difference value: -0.1157\nCorresponding statistical parity difference value: -0.1929\nCorresponding equal opportunity difference value: -0.1063\nCorresponding Theil index value: 0.0896\n" - } - ], - "source": [ - "describe_metrics(val_metrics, thresh_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3.3.3. Testing RF model on original data" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "rf_orig_metrics = test(dataset=dataset_orig_panel19_test,\n", - " model=rf_orig_panel19,\n", - " thresh_arr=[thresh_arr[rf_orig_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2300\nBest balanced accuracy: 0.7638\nCorresponding 1-min(DI, 1/DI) value: 0.5141\nCorresponding average odds difference value: -0.1388\nCorresponding statistical parity difference value: -0.2190\nCorresponding equal opportunity difference value: -0.1135\nCorresponding Theil index value: 0.0936\n" - } - ], - "source": [ - "describe_metrics(rf_orig_metrics, [thresh_arr[rf_orig_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As in the case of the logistic regression classifier learned on the original data, the fairness metrics for the random forest classifier have values that are quite far from 0.\n", - "\n", - "For example, 1 - min(DI, 1/DI) has a value of over 0.5 as opposed to the desired value of < 0.2.\n", - "\n", - "This indicates that the random forest classifier learned on the original data is also unfair." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [4.](#Table-of-Contents) Bias mitigation using pre-processing technique - Reweighing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.1. Transform data" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "dataset_transf_panel19_train = RW.fit_transform(dataset_orig_panel19_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Metrics for transformed data" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0000000000000002\n" - } - ], - "source": [ - "metric_transf_panel19_train = BinaryLabelDatasetMetric(\n", - " dataset_transf_panel19_train,\n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "explainer_transf_panel19_train = MetricTextExplainer(metric_transf_panel19_train)\n", - "\n", - "print(explainer_transf_panel19_train.disparate_impact())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.2. Learning a Logistic Regression (LR) classifier on data transformed by reweighing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.2.1. Training LR model after reweighing" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = dataset_transf_panel19_train\n", - "model = make_pipeline(StandardScaler(),\n", - " LogisticRegression(solver='liblinear', random_state=1))\n", - "fit_params = {'logisticregression__sample_weight': dataset.instance_weights}\n", - "lr_transf_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.2.2. Validating LR model after reweighing" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "thresh_arr = np.linspace(0.01, 0.5, 50)\n", - "val_metrics = test(dataset=dataset_orig_panel19_val,\n", - " model=lr_transf_panel19,\n", - " thresh_arr=thresh_arr)\n", - "lr_transf_best_ind = np.argmax(val_metrics['bal_acc'])" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "disp_imp = np.array(val_metrics['disp_imp'])\n", - "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " disp_imp_err, '1 - min(DI, 1/DI)')" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " val_metrics['avg_odds_diff'], 'avg. odds diff.')" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7581\nCorresponding 1-min(DI, 1/DI) value: 0.2939\nCorresponding average odds difference value: -0.0084\nCorresponding statistical parity difference value: -0.0992\nCorresponding equal opportunity difference value: 0.0242\nCorresponding Theil index value: 0.0938\n" - } - ], - "source": [ - "describe_metrics(val_metrics, thresh_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.2.3. Testing LR model after reweighing" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "lr_transf_metrics = test(dataset=dataset_orig_panel19_test,\n", - " model=lr_transf_panel19,\n", - " thresh_arr=[thresh_arr[lr_transf_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7539\nCorresponding 1-min(DI, 1/DI) value: 0.2482\nCorresponding average odds difference value: -0.0151\nCorresponding statistical parity difference value: -0.0872\nCorresponding equal opportunity difference value: -0.0035\nCorresponding Theil index value: 0.0966\n" - } - ], - "source": [ - "describe_metrics(lr_transf_metrics, [thresh_arr[lr_transf_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The fairness metrics for the logistic regression model learned after reweighing are well improved, and thus the model is much more fair relative to the logistic regression model learned from the original data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.3. Learning a Random Forest (RF) classifier on data transformed by reweighing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.3.1. Training RF model after reweighing" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = dataset_transf_panel19_train\n", - "model = make_pipeline(StandardScaler(),\n", - " RandomForestClassifier(n_estimators=500, min_samples_leaf=25))\n", - "fit_params = {'randomforestclassifier__sample_weight': dataset.instance_weights}\n", - "rf_transf_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.3.2. Validating RF model after reweighing" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "thresh_arr = np.linspace(0.01, 0.5, 50)\n", - "val_metrics = test(dataset=dataset_orig_panel19_val,\n", - " model=rf_transf_panel19,\n", - " thresh_arr=thresh_arr)\n", - "rf_transf_best_ind = np.argmax(val_metrics['bal_acc'])" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "disp_imp = np.array(val_metrics['disp_imp'])\n", - "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " disp_imp_err, '1 - min(DI, 1/DI)')" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " val_metrics['avg_odds_diff'], 'avg. odds diff.')" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2500\nBest balanced accuracy: 0.7703\nCorresponding 1-min(DI, 1/DI) value: 0.4516\nCorresponding average odds difference value: -0.0876\nCorresponding statistical parity difference value: -0.1668\nCorresponding equal opportunity difference value: -0.0758\nCorresponding Theil index value: 0.0906\n" - } - ], - "source": [ - "describe_metrics(val_metrics, thresh_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4.3.3. Testing RF model after reweighing" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "rf_transf_metrics = test(dataset=dataset_orig_panel19_test,\n", - " model=rf_transf_panel19,\n", - " thresh_arr=[thresh_arr[rf_transf_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2500\nBest balanced accuracy: 0.7586\nCorresponding 1-min(DI, 1/DI) value: 0.4307\nCorresponding average odds difference value: -0.0843\nCorresponding statistical parity difference value: -0.1632\nCorresponding equal opportunity difference value: -0.0611\nCorresponding Theil index value: 0.0963\n" - } - ], - "source": [ - "describe_metrics(rf_transf_metrics, [thresh_arr[rf_transf_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once again, the model learned from the transformed data is fairer than that learned from the original data. However, the random forest model learned from the transformed data is still relatively unfair as compared to the logistic regression model learned from the transformed data." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "## [5.](#Table-of-Contents) Bias mitigation using in-processing technique - Prejudice Remover (PR)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.1. Learning a Prejudice Remover (PR) model on original data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 5.1.1. Training a PR model" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "model = PrejudiceRemover(sensitive_attr=sens_attr, eta=25.0)\n", - "pr_orig_scaler = StandardScaler()\n", - "\n", - "dataset = dataset_orig_panel19_train.copy()\n", - "dataset.features = pr_orig_scaler.fit_transform(dataset.features)\n", - "\n", - "pr_orig_panel19 = model.fit(dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 5.1.2. Validating PR model" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "thresh_arr = np.linspace(0.01, 0.50, 50)\n", - "\n", - "dataset = dataset_orig_panel19_val.copy()\n", - "dataset.features = pr_orig_scaler.transform(dataset.features)\n", - "\n", - "val_metrics = test(dataset=dataset,\n", - " model=pr_orig_panel19,\n", - " thresh_arr=thresh_arr)\n", - "pr_orig_best_ind = np.argmax(val_metrics['bal_acc'])" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "disp_imp = np.array(val_metrics['disp_imp'])\n", - "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " disp_imp_err, '1 - min(DI, 1/DI)')" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " val_metrics['avg_odds_diff'], 'avg. odds diff.')" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1200\nBest balanced accuracy: 0.6836\nCorresponding 1-min(DI, 1/DI) value: 0.2268\nCorresponding average odds difference value: 0.0254\nCorresponding statistical parity difference value: -0.0830\nCorresponding equal opportunity difference value: 0.1172\nCorresponding Theil index value: 0.1119\n" - } - ], - "source": [ - "describe_metrics(val_metrics, thresh_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 5.1.3. Testing PR model" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = dataset_orig_panel19_test.copy()\n", - "dataset.features = pr_orig_scaler.transform(dataset.features)\n", - "\n", - "pr_orig_metrics = test(dataset=dataset,\n", - " model=pr_orig_panel19,\n", - " thresh_arr=[thresh_arr[pr_orig_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1200\nBest balanced accuracy: 0.6880\nCorresponding 1-min(DI, 1/DI) value: 0.1588\nCorresponding average odds difference value: 0.0523\nCorresponding statistical parity difference value: -0.0566\nCorresponding equal opportunity difference value: 0.1479\nCorresponding Theil index value: 0.1108\n" - } - ], - "source": [ - "describe_metrics(pr_orig_metrics, [thresh_arr[pr_orig_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As in the case of reweighing, prejudice remover results in a fair model. However, it has come at the expense of relatively lower balanced accuracy." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [6.](#Table-of-Contents) Summary of Model Learning Results" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": " bal_acc avg_odds_diff disp_imp \\\nBias Mitigator Classifier \n Logistic Regression 0.775935 -0.205706 0.426176 \n Random Forest 0.763772 -0.138763 0.485869 \nReweighing Logistic Regression 0.753893 -0.015104 0.751755 \nReweighing Random Forest 0.758565 -0.084303 0.569260 \nPrejudice Remover 0.688028 0.052286 0.841229 \n\n stat_par_diff eq_opp_diff theil_ind \nBias Mitigator Classifier \n Logistic Regression -0.261207 -0.222779 0.092122 \n Random Forest -0.218998 -0.113503 0.093575 \nReweighing Logistic Regression -0.087196 -0.003518 0.096575 \nReweighing Random Forest -0.163191 -0.061108 0.096345 \nPrejudice Remover -0.056631 0.147869 0.110774 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
bal_accavg_odds_diffdisp_impstat_par_diffeq_opp_difftheil_ind
Bias MitigatorClassifier
Logistic Regression0.775935-0.2057060.426176-0.261207-0.2227790.092122
Random Forest0.763772-0.1387630.485869-0.218998-0.1135030.093575
ReweighingLogistic Regression0.753893-0.0151040.751755-0.087196-0.0035180.096575
ReweighingRandom Forest0.758565-0.0843030.569260-0.163191-0.0611080.096345
Prejudice Remover0.6880280.0522860.841229-0.0566310.1478690.110774
\n
" - }, - "metadata": {}, - "execution_count": 46 - } - ], - "source": [ - "import pandas as pd\n", - "pd.set_option('display.multi_sparse', False)\n", - "results = [lr_orig_metrics, rf_orig_metrics, lr_transf_metrics,\n", - " rf_transf_metrics, pr_orig_metrics]\n", - "debias = pd.Series(['']*2 + ['Reweighing']*2\n", - " + ['Prejudice Remover'],\n", - " name='Bias Mitigator')\n", - "clf = pd.Series(['Logistic Regression', 'Random Forest']*2 + [''],\n", - " name='Classifier')\n", - "pd.concat([pd.DataFrame(metrics) for metrics in results], axis=0).set_index([debias, clf])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of all the models, the logistic regression model gives the best balance in terms of balanced accuracy and fairness. While the model learnt by prejudice remover is slightly fairer, it has much lower accuracy. All other models are quite unfair compared to the logistic model. Hence, we take the logistic regression model learnt from data transformed by re-weighing and 'deploy' it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [7.](#Table-of-Contents) Deploying model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7.1. Testing model learned on 2014 (Panel 19) on 2015 (Panel 20) deployment data" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_panel20_deploy = MEPSDataset20()\n", - "\n", - "# now align it with the 2014 dataset\n", - "dataset_orig_panel20_deploy = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_deploy)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QrsoK5hTHHV7" + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.insert(0, '../')\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from IPython.display import Markdown, display\n", + "\n", + "# Datasets\n", + "from aif360.datasets import MEPSDataset19\n", + "from aif360.datasets import MEPSDataset20\n", + "from aif360.datasets import MEPSDataset21\n", + "\n", + "# Fairness metrics\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from aif360.metrics import ClassificationMetric\n", + "\n", + "# Explainers\n", + "from aif360.explainers import MetricTextExplainer\n", + "\n", + "# Scalers\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "# Classifiers\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "# Bias mitigation techniques\n", + "from aif360.algorithms.preprocessing import Reweighing\n", + "from aif360.algorithms.inprocessing import PrejudiceRemover\n", + "\n", + "# LIME\n", + "from aif360.datasets.lime_encoder import LimeEncoder\n", + "import lime\n", + "from lime.lime_tabular import LimeTabularExplainer\n", + "\n", + "np.random.seed(1)" + ] + }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Test Dataset shape" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "KBOISv2hHHV8" + }, + "source": [ + "### 3.1. Load data & create splits for learning/validating/testing model" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "(17570, 138)\n" + "cell_type": "markdown", + "metadata": { + "id": "8WYyOIpsHHV8" + }, + "source": [ + "Get the dataset and split into train (50%), validate (30%), and test (20%)" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Favorable and unfavorable labels" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_Xxxpd6LHHV8" + }, + "outputs": [], + "source": [ + "(dataset_orig_panel19_train,\n", + " dataset_orig_panel19_val,\n", + " dataset_orig_panel19_test) = MEPSDataset19().split([0.5, 0.8], shuffle=True)\n", + "\n", + "sens_ind = 0\n", + "sens_attr = dataset_orig_panel19_train.protected_attribute_names[sens_ind]\n", + "\n", + "unprivileged_groups = [{sens_attr: v} for v in\n", + " dataset_orig_panel19_train.unprivileged_protected_attributes[sens_ind]]\n", + "privileged_groups = [{sens_attr: v} for v in\n", + " dataset_orig_panel19_train.privileged_protected_attributes[sens_ind]]" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "1.0 0.0\n" + "cell_type": "markdown", + "metadata": { + "id": "KrANNAJ2HHV8" + }, + "source": [ + "This function will be used throughout the notebook to print out some labels, names, etc." + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Protected attribute names" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "a11OtzvpHHV8" + }, + "outputs": [], + "source": [ + "def describe(train=None, val=None, test=None):\n", + " if train is not None:\n", + " display(Markdown(\"#### Training Dataset shape\"))\n", + " print(train.features.shape)\n", + " if val is not None:\n", + " display(Markdown(\"#### Validation Dataset shape\"))\n", + " print(val.features.shape)\n", + " display(Markdown(\"#### Test Dataset shape\"))\n", + " print(test.features.shape)\n", + " display(Markdown(\"#### Favorable and unfavorable labels\"))\n", + " print(test.favorable_label, test.unfavorable_label)\n", + " display(Markdown(\"#### Protected attribute names\"))\n", + " print(test.protected_attribute_names)\n", + " display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", + " print(test.privileged_protected_attributes,\n", + " test.unprivileged_protected_attributes)\n", + " display(Markdown(\"#### Dataset feature names\"))\n", + " print(test.feature_names)" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['RACE']\n" + "cell_type": "markdown", + "metadata": { + "id": "G2ZKAdY4HHV8" + }, + "source": [ + "Show 2015 dataset details" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Privileged and unprivileged protected attribute values" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "3jCL-l67HHV9", + "outputId": "bc17de82-2f69-43bf-ce21-22f69b900bad" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Training Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "(7915, 138)\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Validation Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "(4749, 138)\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Test Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "(3166, 138)\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Favorable and unfavorable labels" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "1.0 0.0\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Protected attribute names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['RACE']\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Privileged and unprivileged protected attribute values" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "[array([1.])] [array([0.])]\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Dataset feature names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['AGE', 'RACE', 'PCS42', 'MCS42', 'K6SUM42', 'REGION=1', 'REGION=2', 'REGION=3', 'REGION=4', 'SEX=1', 'SEX=2', 'MARRY=1', 'MARRY=2', 'MARRY=3', 'MARRY=4', 'MARRY=5', 'MARRY=6', 'MARRY=7', 'MARRY=8', 'MARRY=9', 'MARRY=10', 'FTSTU=-1', 'FTSTU=1', 'FTSTU=2', 'FTSTU=3', 'ACTDTY=1', 'ACTDTY=2', 'ACTDTY=3', 'ACTDTY=4', 'HONRDC=1', 'HONRDC=2', 'HONRDC=3', 'HONRDC=4', 'RTHLTH=-1', 'RTHLTH=1', 'RTHLTH=2', 'RTHLTH=3', 'RTHLTH=4', 'RTHLTH=5', 'MNHLTH=-1', 'MNHLTH=1', 'MNHLTH=2', 'MNHLTH=3', 'MNHLTH=4', 'MNHLTH=5', 'HIBPDX=-1', 'HIBPDX=1', 'HIBPDX=2', 'CHDDX=-1', 'CHDDX=1', 'CHDDX=2', 'ANGIDX=-1', 'ANGIDX=1', 'ANGIDX=2', 'MIDX=-1', 'MIDX=1', 'MIDX=2', 'OHRTDX=-1', 'OHRTDX=1', 'OHRTDX=2', 'STRKDX=-1', 'STRKDX=1', 'STRKDX=2', 'EMPHDX=-1', 'EMPHDX=1', 'EMPHDX=2', 'CHBRON=-1', 'CHBRON=1', 'CHBRON=2', 'CHOLDX=-1', 'CHOLDX=1', 'CHOLDX=2', 'CANCERDX=-1', 'CANCERDX=1', 'CANCERDX=2', 'DIABDX=-1', 'DIABDX=1', 'DIABDX=2', 'JTPAIN=-1', 'JTPAIN=1', 'JTPAIN=2', 'ARTHDX=-1', 'ARTHDX=1', 'ARTHDX=2', 'ARTHTYPE=-1', 'ARTHTYPE=1', 'ARTHTYPE=2', 'ARTHTYPE=3', 'ASTHDX=1', 'ASTHDX=2', 'ADHDADDX=-1', 'ADHDADDX=1', 'ADHDADDX=2', 'PREGNT=-1', 'PREGNT=1', 'PREGNT=2', 'WLKLIM=-1', 'WLKLIM=1', 'WLKLIM=2', 'ACTLIM=-1', 'ACTLIM=1', 'ACTLIM=2', 'SOCLIM=-1', 'SOCLIM=1', 'SOCLIM=2', 'COGLIM=-1', 'COGLIM=1', 'COGLIM=2', 'DFHEAR42=-1', 'DFHEAR42=1', 'DFHEAR42=2', 'DFSEE42=-1', 'DFSEE42=1', 'DFSEE42=2', 'ADSMOK42=-1', 'ADSMOK42=1', 'ADSMOK42=2', 'PHQ242=-1', 'PHQ242=0', 'PHQ242=1', 'PHQ242=2', 'PHQ242=3', 'PHQ242=4', 'PHQ242=5', 'PHQ242=6', 'EMPST=-1', 'EMPST=1', 'EMPST=2', 'EMPST=3', 'EMPST=4', 'POVCAT=1', 'POVCAT=2', 'POVCAT=3', 'POVCAT=4', 'POVCAT=5', 'INSCOV=1', 'INSCOV=2', 'INSCOV=3']\n" + } + ], + "source": [ + "describe(dataset_orig_panel19_train, dataset_orig_panel19_val, dataset_orig_panel19_test)" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "[array([1.])] [array([0.])]\n" + "cell_type": "markdown", + "metadata": { + "id": "Z0O8P8wZHHV9" + }, + "source": [ + "Metrics for original data" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Dataset feature names" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "m5IYpllTHHV9", + "outputId": "b6b4808f-dfd0-4dfe-ac8f-3ef935e01537" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.48230522996275893\n" + } + ], + "source": [ + "metric_orig_panel19_train = BinaryLabelDatasetMetric(\n", + " dataset_orig_panel19_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "explainer_orig_panel19_train = MetricTextExplainer(metric_orig_panel19_train)\n", + "\n", + "print(explainer_orig_panel19_train.disparate_impact())" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['AGE', 'RACE', 'PCS42', 'MCS42', 'K6SUM42', 'REGION=1', 'REGION=2', 'REGION=3', 'REGION=4', 'SEX=1', 'SEX=2', 'MARRY=1', 'MARRY=2', 'MARRY=3', 'MARRY=4', 'MARRY=5', 'MARRY=6', 'MARRY=7', 'MARRY=8', 'MARRY=9', 'MARRY=10', 'FTSTU=-1', 'FTSTU=1', 'FTSTU=2', 'FTSTU=3', 'ACTDTY=1', 'ACTDTY=2', 'ACTDTY=3', 'ACTDTY=4', 'HONRDC=1', 'HONRDC=2', 'HONRDC=3', 'HONRDC=4', 'RTHLTH=-1', 'RTHLTH=1', 'RTHLTH=2', 'RTHLTH=3', 'RTHLTH=4', 'RTHLTH=5', 'MNHLTH=-1', 'MNHLTH=1', 'MNHLTH=2', 'MNHLTH=3', 'MNHLTH=4', 'MNHLTH=5', 'HIBPDX=-1', 'HIBPDX=1', 'HIBPDX=2', 'CHDDX=-1', 'CHDDX=1', 'CHDDX=2', 'ANGIDX=-1', 'ANGIDX=1', 'ANGIDX=2', 'MIDX=-1', 'MIDX=1', 'MIDX=2', 'OHRTDX=-1', 'OHRTDX=1', 'OHRTDX=2', 'STRKDX=-1', 'STRKDX=1', 'STRKDX=2', 'EMPHDX=-1', 'EMPHDX=1', 'EMPHDX=2', 'CHBRON=-1', 'CHBRON=1', 'CHBRON=2', 'CHOLDX=-1', 'CHOLDX=1', 'CHOLDX=2', 'CANCERDX=-1', 'CANCERDX=1', 'CANCERDX=2', 'DIABDX=-1', 'DIABDX=1', 'DIABDX=2', 'JTPAIN=-1', 'JTPAIN=1', 'JTPAIN=2', 'ARTHDX=-1', 'ARTHDX=1', 'ARTHDX=2', 'ARTHTYPE=-1', 'ARTHTYPE=1', 'ARTHTYPE=2', 'ARTHTYPE=3', 'ASTHDX=1', 'ASTHDX=2', 'ADHDADDX=-1', 'ADHDADDX=1', 'ADHDADDX=2', 'PREGNT=-1', 'PREGNT=1', 'PREGNT=2', 'WLKLIM=-1', 'WLKLIM=1', 'WLKLIM=2', 'ACTLIM=-1', 'ACTLIM=1', 'ACTLIM=2', 'SOCLIM=-1', 'SOCLIM=1', 'SOCLIM=2', 'COGLIM=-1', 'COGLIM=1', 'COGLIM=2', 'DFHEAR42=-1', 'DFHEAR42=1', 'DFHEAR42=2', 'DFSEE42=-1', 'DFSEE42=1', 'DFSEE42=2', 'ADSMOK42=-1', 'ADSMOK42=1', 'ADSMOK42=2', 'PHQ242=-1', 'PHQ242=0', 'PHQ242=1', 'PHQ242=2', 'PHQ242=3', 'PHQ242=4', 'PHQ242=5', 'PHQ242=6', 'EMPST=-1', 'EMPST=1', 'EMPST=2', 'EMPST=3', 'EMPST=4', 'POVCAT=1', 'POVCAT=2', 'POVCAT=3', 'POVCAT=4', 'POVCAT=5', 'INSCOV=1', 'INSCOV=2', 'INSCOV=3']\n" - } - ], - "source": [ - "# describe(dataset_orig_panel20_train, dataset_orig_panel20_val, dataset_orig_panel20_deploy)\n", - "describe(test=dataset_orig_panel20_deploy)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.5456992351196291\n" - } - ], - "source": [ - "metric_orig_panel20_deploy = BinaryLabelDatasetMetric(\n", - " dataset_orig_panel20_deploy, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "explainer_orig_panel20_deploy = MetricTextExplainer(metric_orig_panel20_deploy)\n", - "\n", - "print(explainer_orig_panel20_deploy.disparate_impact())" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "lr_transf_metrics_panel20_deploy = test(\n", - " dataset=dataset_orig_panel20_deploy,\n", - " model=lr_transf_panel19,\n", - " thresh_arr=[thresh_arr[lr_transf_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7311\nCorresponding 1-min(DI, 1/DI) value: 0.1943\nCorresponding average odds difference value: 0.0071\nCorresponding statistical parity difference value: -0.0596\nCorresponding equal opportunity difference value: 0.0303\nCorresponding Theil index value: 0.1019\n" - } - ], - "source": [ - "describe_metrics(lr_transf_metrics_panel20_deploy, [thresh_arr[lr_transf_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Deployed model tested on the 2015 Panel 20 data still exhibits fairness as well as maintains accuracy." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [8.](#Table-of-Contents) Generating explanations for model predictions using LIME" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8.1. Generating explanations on 2015 Panel 20 deployment data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This section shows how LIME can be integrated with AIF360 to get explanations for model predictions." - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "train_dataset = dataset_transf_panel19_train # data the deployed model (lr from transformed data)\n", - "test_dataset = dataset_orig_panel20_deploy # the data model is being tested on\n", - "model = lr_transf_panel19 # lr_transf_panel19 is LR model learned from Panel 19 with Reweighing\n", - "thresh_arr = np.linspace(0.01, 0.5, 50)\n", - "best_thresh = thresh_arr[lr_transf_best_ind]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we need to fit the encoder to the aif360 dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "lime_data = LimeEncoder().fit(train_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `transform()` method is then used to convert aif360 features to LIME-compatible features" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "s_train = lime_data.transform(train_dataset.features)\n", - "s_test = lime_data.transform(test_dataset.features)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `LimeTabularExplainer` takes as input the LIME-compatible data along with various other arguments to create a lime explainer" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "explainer = LimeTabularExplainer(\n", - " s_train, class_names=lime_data.s_class_names, \n", - " feature_names=lime_data.s_feature_names,\n", - " categorical_features=lime_data.s_categorical_features, \n", - " categorical_names=lime_data.s_categorical_names, \n", - " kernel_width=3, verbose=False, discretize_continuous=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `inverse_transform()` function is used to transform LIME-compatible data back to aif360-compatible data since that is needed by the model to make predictions. The function below is used to produce the predictions for any perturbed data that is produce by LIME" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "def s_predict_fn(x):\n", - " return model.predict_proba(lime_data.inverse_transform(x))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `explain_instance()` method can then be used to produce explanations for any instance in the test dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "def show_explanation(ind):\n", - " exp = explainer.explain_instance(s_test[ind], s_predict_fn, num_features=10)\n", - " print(\"Actual label: \" + str(test_dataset.labels[ind]))\n", - " exp.as_pyplot_figure()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nActual label: [0.]\n" - }, - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": "Actual label: [0.]\n" - }, - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbEAAAEICAYAAADRFcoMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de7xtc73/8dc7d7klkrBtNpFbu6zUOVFCJb8cKk52Cp1q53TjlJ1EJUcXnC7HUWmfnEQ6WxFHEsklUqm12S57u+2Nyi2bKLIjvH9/jO9imOZca677HLyfj8d8rDG+4/v9ju8Ya675Wd/vGHN8ZZuIiIgmes5kNyAiImKkEsQiIqKxEsQiIqKxEsQiIqKxEsQiIqKxEsQiIqKxEsSip0mypI0meJ8nSjpyIvdZ2/d8SduPQ71rSbpE0gOSvjTW9bfs61ZJO43nPp6JJG0n6YYu8n1S0rcmok1NkCAWw5IPqLHTLlja3tz2xeOwu5nAPcAqtj82DvU/I0naQtJ5ku6R9LQv1UpaXdIZkv4q6XeS3tGhnr3K345a0peWdLekN9u+1PYmQ7XJ9udtv7eUn1r+0Vt6pMfYdAliEc8O6wMLPIKnG/TyB6SklSWtMI67+DvwfeA9HbZ/DXgEWAvYG/iGpM3b5DsTWA14bUv6zoCBc8ektc9CCWIxZiS9T9JCSX+SdJakF9W2bS7p/LLtj5I+WdK3kfQrSfdLulPScZKW7XJ/q0o6oZS7XdKRkpYq274h6fRa3qMkXaDK9pJuK8My95T/kPfusI/nSTpb0mJJ95XldWvbL5b075IuK0N1P5W0Rm37DyTdJenPZThv85I+k+pD7+OSHpT0o5L+RE9X0nKSvirpjvL6qqTlyraBY/hY+U/+Tknv7nAMJwL71va1U5d1HyzpLuDbHep9n6TrynEvkPTyNnk6/n7L7+Irpf1/kXSNpC3Ktl1KnQ+U3+1B7d8FbAHcIembkl7VIc+I2b7B9gnA/DbH9lzgbcCnbD9o+xfAWcC72tTzN6pguE/Lpn2A79l+dOC81+o/uBz7A5JukLRjST9c0ndLtkvKz/vL7/YfJG0k6eflPXePpFNHeRp6m+288ur6BdwK7NQmfQeq4aqXA8sB/wVcUratDNwJfAxYvqy/smzbGngVsDQwFbgOOLBWr4GNOrTlDOCbwHOBFwC/Ad5ftq0I3AjsB2xX2rZu2bY98Cjw5dLW1wJ/BTYp208EjizLz6f6oFqxtPsHwJm1NlwMLAJeDKxQ1r9Y2/4vpdxywFeBebVtT+yn3fkFjgB+XY5tTeCXwL+3HMMRwDLALsBDwPM6nKun7KvLuo8q7V6hTX17ArcDrwAEbASs3+YYOv5+gTcCc6l6KAJeAqxdtt0JbFeWnwe8fJD35AbA4cDNpf6PD9RTy7MtcP8gr22HeN9vBLgl7WXAQy1pBwE/6lDHq4G/DJxPYFVgCTC9dt5vK8ubAH8AXlTWpwLTyvLhwHdr6QaWru3nf4FDqTopyw91bE1/TXoD8mrWi85B7ATg6Nr6SlRDMVOBGcCVXdZ/IHBGbb1tEKMavnm4/gFb9nNRbf2VwJ+A3wEzaunbU31IP7eW9n2q/6ihTXCp5ZsO3Fdbvxg4rLb+AeDcDmVXK8ezaqf98NQAsAjYpbbtjcCttWNY0vLhdTfwqg77fsq+uqj7EWD5QX5P5wEHDOc90vr7pfrH50aqIPeclny/B95PdQ2v2/emqP4h+R/gPuBsYMoYve/bBbHtgLta0t4HXDxIPTcB76jlvarlfXlbbX93AzsBy7TUcTiDB7GTgNmUf9qe6a8MJ8ZYeRFVsADA9oPAvcA6wHpUH5pPI+nFZYjuLkl/AT4PrNEub4v1qXogd5ahqvupemUvqLXhcqr/zkUVpOrus/3X2vrvyjG0tm/FMlT1u9K+S4DVBoYti7tqyw9RBXAkLSXpi5IWlbK3ljzdHB+0nNM2bbzX9qPt9j0GdS92NQTWScffad1gv1/bFwLHUV1XulvSbEmrlKJvo+pd/q4Mjf3DUPty9Qm+ALgKuA3YnKqXPl4eBFZpSVsFeGCQMifx5JDiu8r609heSBXwD6c6N3NUG54fwsep3vO/UXW36790Wa6REsRirNxBFViAJ64XPJ9qyOkPwIYdyn0DuB7Y2PYqwCep/gCH8geqntgatlcrr1VsP3FRXdIHqYbD7qD6w657XmnjgCklX6uPUQ3tvLK07zUD1XfRxncAu1H9N70q1X/N9bJD3WTxlHM6SBtHYqi6h2rbH4BpXexn0N+v7WNtbw1sRjUkO6uk/9b2blT/lJzJ0/8JeUK5vrdHua54E9UQ5keADW1fV/JsV64ZdXpt18WxtLoRWFrSxrW0l9Lm+lnNycCOJSi/CjilU0bb37O9LdXvyVTDu0/L1qbcXbbfZ/tFVL3Zr2uCv6YykRLEYiSWkbR87bU01Tj8uyVNLzcIfB643PatVMM6a0s6sHzgrCzplaWulamuEzwoaVPgX7tpgO07gZ8CX5K0iqTnSJom6bVQ9QCAI4F3Uv3H+3FJ01uq+aykZcsH2Juprne1Wplq2O5+SasDn+nuFD1R9mGqHumKVOek7o90Du5QndPDJK2p6maRTwPfHST/cIy27m8BB0nautygsZGk9dvk6/j7lfQKSa+UtAzVNcm/AY+X38nekla1/fdS/vF2jZC0FdX1swOogt16tvexfVHpmQHg6vb1lQZ5XdqhfklaHhi4GWX58v6m9OR/CBwh6bmSXk31T8vJnU5a+Xv4BdX5P9/2Xe3ySdpE0g5lX3+jeg+2OweLS/qGtbJ76smbj+6jCnRtz98zQYJYjMQ5VH9UA6/Dbf8M+BRwOtWHyjRgLwDbDwCvB3alGnq7CXhdqesgqh7LA8B/A8O5k2ofqg+XBVR/rKdRBculqT6Qj7J9le2bqHoAJw98AJV23EfV+zgF2N/29W328VWqGzbuoboRYji3Qp9ENUx3e2njr1u2nwBsVoZDz2xT/kigH7gauAa4oqSNhVHVbfsHwOeA71H97s4EVm+TdbDf7yol7T6q83QvcEzZ9i7g1jIEuT/VnZzt3A1sY3s72yeU99pYWp/qPT7Qu1oC1L+Q/AGq98fdVIHpX20P1hMD+E6pt+1QYrEc8EWq991dVD3SQ1oz2X6I6vdwWXkfvYrqZpvLJT1IdbfkAbZvHqJNjaXaPysRzwqqnojxXdvrDpU3InpbemIREdFYCWIREdFYGU6MiIjGSk8sIiIaq2cf7PlMtcYaa3jq1KmT3YyIiEaZO3fuPbbXbE1PEJtgU6dOpb+/f7KbERHRKJJ+1y49w4kREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFY+bJzk6ibyYQjYlTyPNlGSU8sIiIaK0EsIiIaK0EsIiIaK0EsIiIaa8KDmKQXSpojaZGkuZLOkfRiSde25Dtc0kFl+URJt0iaJ+l6SZ+p5btY0g2SrpL0W0nTa9tWlXSSpIVlfydJWrVsmyrJkj5cy3+cpP26PI7Xl/ZfU37uMMpTExERwzShQUySgDOAi21Ps701cAiwVhfFZ9meDkwH9pW0QW3b3rZfCnwdOKaWfgJws+2NbE8DbgG+Vdt+N3CApGVHcDj3ALva3hLYFzh5BHVERMQoTHRP7HXA320fP5Bg+yrgD8OoY/ny869ttv0KWAdA0kbA1sC/17YfAfRJmlbWFwMXUAWhYbF9pe07yup8YAVJyw23noiIGLmJDmJbAHM7bJtWhgvnSZoH7N+y/ZiSfhswx/bdberYGTizLG8GzLP92MDGsjwP2LxW5ijgIElL1SuSNKventrr2Db7fRtwhe2H2x2YpJmS+iX1L168uMPhR0TEcPXSl50XleFCoLom1rJ9lu3TJK0EXCDpH23/smw7pQwJrkQ13Ng12zdLuhx4R0v6MTx1aLItSZtTBcI3DLKP2cBsgL6+vnyTMiJijEx0T2w+1RDfiNl+ELgY2LaWvDewIfAd4L9K2gJguqQnjrEsTy/b6j4PHAyolnfInpikdamu8e1je9FojisiIoZvooPYhcBykmYOJEjaCliv2wokLQ28EnhK0LBt4FPAqyRtanshcCVwWC3bYVTDfgtbyl5PFdh2raUdY3t6m9dHSjtWA34MfML2Zd22PyIixs6EBrESaN4C7FRueZ8PfAG4q4viA9fErgauAX7Ypv4lwJeAWSXpPcCLy74WAS8uae18Dlh3GIfzIWAj4NO1XtoLhlE+IiJGSc7DLidUX1+f+/v7R1Y4DwCOGH/5TOxJkuba7mtNzxM7IiKisRLEIiKisXrpFvsYSoY5IiKeIj2xiIhorASxiIhorASxiIhorFwTezbJLfoRQ8u150ZJTywiIhorQSwiIhorQSwiIhorQSwiIhpr0oOYpBdKmlMe0jtX0jmSXixpc0kXSrpB0k2SPiWpPlXKzpJ+I+n68vDdUyVNKdtOlLRHy36mSrq2LG8vyZLeW9s+vaQd1GW7X1/ae035ucPYnJGIiOjWpAaxEpTOAC62Pc321sAhwFrAWcAXbW8CvBT4R+ADpdwWVPOG7Wt70zKZ5inA1GHs/lrgn2vrM4CrhlH+HmBX21sC+wInD6NsRESMgcnuib0O+Lvt4wcSbF9FNWXKZbZ/WtIeopr65BMl28HA521fVyt3lu1LhrHv3wHLS1qrBNOdgZ90W9j2lbbvKKvzgRUkLTeM/UdExChN9vfEtgDmtknfvDXd9iJJK0lapWz/jzHY/2nAnlSTZ14BPDywQdIsqhmjW10yMDFmzduoJtt8uE1+yiSgMwGmTJkyBs2OiAiY/CA2apKeD1wArAjMtj2c4PZ94FRgU+B/qYYsgWpmZ+CYLva/OXAU8IZOeWzPBmZDNZ/YMNoXERGDmOzhxPnA1m3SF7SmS9oQeND2X0q5lwPYvrdcE5sNrDScndu+C/g78HqqQFjf36zajM3117G1POtSXdPbx/ai4ew7IiJGb7KD2IXAcmW4DQBJWwE3ANtK2qmkrQAcCxxdsh0NHCrpJbW6VhxhGz4NHGz7sXqi7WNsT2/z+khp02rAj4FP2L5shPuOiIhRmNQgZtvAW4Cdyi3284EvAHcBuwGHSboBuAb4LXBcKXcNcABwUrkF/zLgJcD3atV/U9Jt5fWrQdrwS9tnjqD5HwI2Aj5d66W9YAT1RETECMl52OWE6uvrc39//+TsPA8AjhhaPhN7kqS5tvta0yd7ODEiImLEEsQiIqKxEsQiIqKxGv89sRiGjPVHxDNMemIREdFYCWIREdFYCWIREdFYuSYWMV7yvbxmyrXjRklPLCIiGitBLCIiGitBLCIiGqung5ikx2oP1/2TpFvK8s8kTZW0pKwvkHS8pOfUyh4o6W+SVq2lbS/p7LK8n6THy1PzB7ZfK2lql237aNnv1ZIukLT+2B15RER0o6eDGLBkYAoU4CxgVlnfqWxfVLZtBWwG7F4rO4PqyfdvHaT+24BDR9i2K4E+21tRzRB99BD5IyJijPV6EOuK7UeBX1JNjYKkaVQTZB5GFcw6ORvYXNImI9jnRbYfKqu/BtYdbh0RETE6z4ggJmlFYEeqeccA9gLmAJcCm0haq0PRx6l6UJ9sU+epHWZ23qdNPe8BfjLqA4mIiGFp+vfEpkmaBxj4P9sDgWQG8Bbbj0s6HdiTMqFmG9+jmiV6g3qi7bd30wBJ7wT6gNcOkmcmMBNgypQp3VQbERFdaHoQG7gm9gRJWwIbA+er+rLpssAtdAhith+V9CXg4JZ6TgXaDTN+2fZJJc9OVNfUXmv74U6NtD0bmA3VpJjdHVpERAyl6UGsnRnA4ba/MJBQ7moc7O7BE4GPAysPJAzVE5P0MuCbwM627x5ViyMiYkR69pqYpKWBjr2bQewFnNGSdkZJb8v2I8CxwAuGsZ9jqG4e+UG5VnbWcBsaERGjI/foc8IkvRT4b9vbTHZbxlJfX5/7+/snuxkxEfLsxGbq0c/EZztJc233tab3ZE9M0v7A/1LdIh8REdFWT14Ts308cPxktyMiInpbT/bEIiIiutGTPbGIZ4RcW4kYd+mJRUREYyWIRUREYyWIRUREY+WaWEREjT6b7/eNB39mfK4RpycWERGNlSAWERGNlSAWERGNlSAWERGNNeogJml3SZa0aVmfKmmJpCslXSfpN5L2q+VfS9LZkq6StEDSObVylnRkLe8akv4u6bha2kxJ15fXbyRtW9t2saS+sryBpJskvbG2fYqkByUdVNbXk3RRacd8SQcM47hfL2mupGvKzx1GdAIjImLExuLuxBnAL8rPz5S0RbZfBiBpQ+CHkmT728ARwPm2/7Ns36pW1y3A/+PJB//uCcwf2CjpzcD7gW1t3yPp5cCZkraxfVct37rAucDHbJ9Xq//LwE9q64+WPFdIWhmYK+l82wu6OO57gF1t3yFpC+A8YJ0uykVExBgZVU9M0krAtsB76DBfl+2bgY8CHylJawO31bZfXcv+EHDdQG8KeDvw/dr2g4FZtu8pZa8AvgN8sJZnbeCnwKG2n5jjS9LuVEHyiaBo+85SB7YfAK6jy0Bk+0rbd5TV+cAKkpbrpmxERIyN0Q4n7gaca/tG4F5JW3fIdwWwaVn+GnBCGcY7VNKLWvLOAfaStB7wGHBHbdvmwNyW/P0lfcB3gONsnzaQUILtwcBnOx2IpKnAy4DLy/qsMtll6+vYNsXfBlxhu+0knmUItF9S/+LFizs1ISIihmm0QWwGVdCh/JzRId8T3x4sw3sbAv9NFdiulLRmLe+5wOupenanjqBNPwPeKWnFWtrhwFdsP9i2cVWQOx040PZfSjuPsT29zesjLWU3B46iGuZsy/Zs2322+9Zcc81O2SIiYphGfE1M0urADsCWkgwsBZiqp9XqZVRDdQDY/hPwPeB7ks4GXkPpYdl+RNJc4GPAZsA/1epZAGwNXFhL25raECFwNPAu4AeSdrP9KPBKYA9JRwOrAY9L+pvt4yQtQxXATrH9w9rxzQL2bnMslwwEsnLt7QxgH9uLOp+tiIgYD6O5sWMP4GTbT/RAJP0cWK+eqQzT/QfwX2V9B+DXth8qN1NMA37fUveXgJ/b/pOeOsX70cBRkna2fa+k6cB+VEGq7kCqIHmCpP1sb1drz+HAgyWACTgBuM72l+sV2D4GOKbTwUtaDfgx8Anbl3XKFxER42c0QWwG1TBa3enAIcA0SVcCywMPAMfaPrHk2Ro4TtKjVMOZ37L92xLsALA9n6f2rgbSz5K0DvDL0vt7AHin7Ttb8lnSvsDZVIFvVodjeDVVr+0aSfNK2idtn9PF8X8I2Aj4tKRPl7Q32L67i7IRETEG5EzcN6H6+vrc398/2c2IiA7yAODxMdoHAEuaa7uvNT1P7IiIiMZKEIuIiMbKfGIRETXjNe9VjI/0xCIiorESxCIiorESxCIiorFyTSwi2tOz9FbzfO2oUdITi4iIxkoQi4iIxkoQi4iIxkoQi4iIxprQICbpsTKx5LWSfjAw51dL+o/KE+KRNFXSkpZJKfcp21aS9A1JiyRdIWmupPfVylnSh2v7Pk7SfpK+VupZ0FL3HiM4nj0lzZf0eG026oiImCAT3RNbUiaW3AJ4BNi/TfqfgA/WyixqmZTypJL+LeA+YGPbLwd2BlavlbsbOEDSsvUG2P6g7enALi11n8bwXQu8FbhkBGUjImKUJnM48VKqqUxa/QpYZ7CCkqYB2wCH2X4cwPZi2/WpYRYDFwD7jk1zn872dbZvGK/6IyJicJPyPTFJSwNvAs5tSV8K2JFqosoB02pzfQF8GHgecNVAABvEUcBPJP1Pl+0acjbnkZA0E5gJMGXKlJFWExERLSY6iK1QC0iX8mSwGkhfB7gOOL9WZlEZ/nuCpH9qWT8U2BN4ge0XDaTbvlnS5cA7umncULM5j5Tt2cBsqOYTG+v6IyKerSbrmth02x+2/Ug9HVgfEE+9JtbOAuClkp4DYPtzpfwqbfJ+Hji41DsoSbNabiIZeB1btn+7rHcz83NERIyznnrslO2HJH0EOFPS1wfJt1BSP3CkpE/ZfkzS8rQJVLavl7QA2BX47RD7H7QnZvvd3R5LRESMv577npjtK4GrgRklaVpLr2jg2tR7gecDAwHtfODjHar9HLDuWLdV0lsk3Qb8A/BjSeeN9T4iIqIzOQ+7nFB9fX3u7++f7GZEDC0PAI4eImmu7ad9H7fnemIRERHdShCLiIjG6qkbOyKih2RYLRogPbGIiGisBLGIiGisBLGIiGisXBOLiKjRZ5+dXy3wZ5p5DTQ9sYiIaKwEsYiIaKwEsYiIaKwEsYiIaKyugpik3SVZ0qZlfaqkJZKulHSdpN9I2q+Wfz9Jx7XUcbGkvrJ8q6RrymuBpCPLU+jr+Q+U9DdJq9bStpf057LfGyRdIunNte2HS7q9PCj4Jkk/lLRZ2baUpLmSXlPL/1NJew7rjD1Z9nOS/iDpwZGUj4iI0eu2JzYD+AVPPlkeqskqX2b7JcBewIGShjNVyetsbwlsA2wIfLPNPn8LvLUl/dKy302AjwDHSdqxtv0rZb6yjYFTgQslrWn7MeADJf8ykmYAj9v+wTDaXPej0vaIiJgkQwYxSSsB2wLvoQpWT2P7ZuCjVEFlWGw/COwP7C5p9bLPacBKwGE8NXC2lp0HHAF8qMP2U4GfUmZ2tn058CvgcKrJMtuW67Ldv7Z950jLR0TE6HXzPbHdgHNt3yjpXklbA/e2yXcFsGlt/e2Stq2tb9RpB7b/IukWYGPgcqpgOQe4FNhE0lq2/9ih+BXArEHa39quQ4A/AF+1vXAgUdKpwCZtyn/Z9kmD1D8kSTOBmQBTpkwZTVUREVHTTRCbAfxnWZ5T1o9rk6/1G4Kn2n6ipyPp4iH2Uy8/A3iL7cclnQ7s2WGf7fY71PbXAH8Gtqgn2n77EPWMmO3ZwGyo5hMbr/1ERDzbDBrEyvDeDsCWkgwsBRj4WpvsLwOuG0kjJK0MTAVulLQlVY/sfFWT8i0L3ELnIDbUfl8G9Jf9PBc4muqYvi1pF9vnlG0de2LAKcDcsn6W7U93fXARETFuhuqJ7QGcbPv9AwmSfg6sV88kaSrwH8B/DbcB5Zrb14Ezbd8naRZwuO0v1PLcImn9NmW3Aj4FvLdD3W8D3gB8rCR9Gvi+7eslfQCYI+lC23/roic2fbjHFhER42uoGztmAGe0pJ1OdV1p2sAt9sD3gWNtf3sY+75I0rXAb4DfAwOBcq82+zyDJ28q2W7gFnuqHuFHbF9Qy/tvA7fYA+8EdrC9WNLmwFuAzwHYvhI4Dzh4GG1+gqSjJd0GrCjpNkmHj6SeiIgYOTkT302ovr4+9/f3T3YzIqKDPAC4N0maa7uvNT1P7IiIiMZKEIuIiMbKfGIRETW9PqwWT5WeWERENFaCWERENFaCWERENFauiUVE1Dwbb7Fv8nXA9MQiIqKxEsQiIqKxEsQiIqKxEsQiIqKxJjWISXqsPKz3Wkk/kLRiSX+wJd9+ko6rrc+UdH159UvavrbtFEk3lDr/R9IyLXW9QtKjkvYo69Ml/UrSfElXS+p6XjFVjpW0sJR9+QhPRUREjMBk98SW2J5uewvgEWD/oQpIejPVE++3tb0p1YzJ35W0TslyCtVMzlsCK1CbpkXSUsBRwE9rVT4E7GN7c2Bn4KuSVuuy/W+imvts49KOb3RZLiIixsBkB7G6S4GNush3MDDL9j0Atq8Avg18sKyf44Jqmpd1a2U/TDWVzN0DCbZvtH1TWb6jbFuzyzbvBpxUdvdrYDVJa3dZNiIiRqknvicmaWmqXs25JWkFSfNqWVYHzirLm/PkLMsD+oF3t9S5DPAu4ICyvg7VfGKvA17RoR3bUM0kvaisf6XkbzXH9heBdYA/1NJvK2l3ttQ7k6qnxpQpU9rtOiIiRmCyg1g9WF0KnFCWl9h+YiZlSfsBT5tHZghfBy6xfWlZ/ypwsO3Hpad/mbH0oE4G9rX9OIDtfxvmPtuyPRuYDdV8YmNRZ0RETH4Qe0qw6tICYGvgwlra1lS9MQAkfYZqSPD9tTx9wJwSwNYAdpH0qO0zJa0C/Bg4tAwLDtQzVE/sdmC9Wvq6JS0iIibAZAexkTgaOErSzrbvlTSdaphwBwBJ7wXeCOw40KMCsL3BwLKkE4GzSwBbFjiD6trWafUdddETOwv4kKQ5wCuBP9u+c4gyERExRhoXxGyfJelFwGXlWtoLgZfaXlyyHA/8DvhV6XX90PYRg1T5z8BrgOeXYUuA/WzP61zkCecAuwALqe5yfPfg2SMiYiypuomvmUoQ+zbVXZbvdAMOpq+vz/39/UNnjIhJkQcA9yZJc20/7d6IxvXE6mw/SnUHYkREPAv10vfEIiIihqXRPbGIiLHWhKG1eFJ6YhER0VgJYhER0VgJYhER0Vi5JhYR3WvzyLZnnN7/pk7UpCcWERGNlSAWERGNlSAWERGNlSAWERGNNSZBTNLukixp07I+VdISSfMkLZB0kqRlJL2xpM2T9KCkG8rySZK2l3R2S70nStpD0hkl30JJf67VcZGko2r515d0s6TVJF1c6r9K0mWSNil5Lq7td56k0xgBSa+RdIWkRyXtMZrzFxERIzNWPbEZwC/KzwGLylxhW1LNs/XPts+zPb2k9wN7l/V9Bqvc9ltKmfcCl9bq2AXYXdJLStb/BD5l+/6yvrftlwLfAY6pVTmw3+m2RxqAfg/sB3xvhOUjImKURh3EJK0EbAu8B9irdbvtx4DfAOuMdl9t6l4C/BvwNUm7ACvbPqVN1kuAjcZ437favhp4fMjMERExLsbie2K7AefavlHSvZK2Bu4d2ChpeaoJIw/ooq7tJNXn8ZoCnN0pM4DtcyS9h6q3tW2HbLsC19TWT5G0pCyfb3uWpL2BWW3KLhxFby0iIsbRWASxGVTDeABzyvpxwLQSkDYAflx6LUO51PabB1bKDMzd+Bqwgu0bWtIHgtWtwIdr6XvbfsqkXqUH164XN2qSZgIzAaZMmTIeu4iIeFYaVRCTtDqwA7ClJANLAaYKKotsT5e0BtUszP9k+6xRt7i9x2k/rPe0YNXJUD0xSZ8D/ojba6EAAAuPSURBVB9AuR7XNduzgdlQTYo5nLIREdHZaHtiewAn237/QIKknwPrDazbvkfSJ4BDgPEKYqM2VE/M9qHAoRPXooiIGMpob+yYAZzRknY6VcCqOxNYUdJ2o9zfWDmldov9z0ZSgaRXSLoN2BP4pqT5Y9vEiIgYipyHXU6ovr4+9/d3NcIZ0XvyAOCYJJLm2u5rTc8TOyIiorESxCIiorESxCIiorEyKWZEdC/Xi6LHpCcWERGNlSAWERGNlSAWERGNlWtiERE1+uyz4LtwgD/zzLi+mZ5YREQ0VoJYREQ0VoJYREQ0VoJYREQ01oQHMUmP1Z4gP69M04KkiyX9XnryCaOSzpT0YFmeKmlJKbNA0vGSnlPSr23Zx+GSDirLJ0q6RdJVkm6UdJKkdcu2lSUtkrRxWV9G0jWSXtnlsRwj6XpJV0s6Q9JqY3OWIiKiG5PRE1tie3rt9cXatvuBVwOUgLB2S9lFZULKrYDNgN273Ocs2y8FNgGuBC6UtKztB6imjTmu5DsI+KXty7us93xgC9tbATfy9CloIiJiHPXacOIcYK+y/Fbgh+0y2X4U+CWw0XAqd+UrwF3Am0ra9wEkfRzYn2EEIts/LW0B+DWw7nDaExERozMZQWyFluHEt9e2XQC8RtJSVMHs1HYVSFoR2BG4piRNq9dJFYwGcwWwaW39AOAo4Ejbf6rt59KWtg68dmpT578AP+nQ3pmS+iX1L168eIimRUREtybjy85LypBgO48Bv6AKYCvYvlVPnYRvWglSBv7P9k8kTeXJYUaguiY2RBtav824M3AnsEU90XZXM1FLOhR4FDil3Xbbs4HZUE2K2U2dERExtF58Yscc4Azg8DbbFg0SAIfjZVS9PiS9CPgIsA1wkaQTbF9dtl0KrNym/EG2f1by7Ae8GdjRmSY7ImJC9WIQuxT4AvC/Y11xufPxw1Q3jJxbkr8CfN72bZI+CnxN0mvK9bNBe2KSdgY+DrzW9kNj3d6IiBhcL1wTq9+dOHDzxX/YvmcM93mMpKuo7iB8BfA6249Iej0wBTih7PtHwH3APl3WexxVT+38cizHj2GbIyJiCMoI2MTq6+tzf3//ZDcjIjrIA4B7k6S5tvta03vtFvuIiIiuJYhFRERj9eKNHRERk6Zpw2zPdumJRUREYyWIRUREYyWIRUREY+WaWEREzTP1Fvtn6rW+9MQiIqKxEsQiIqKxEsQiIqKxEsQiIqKxJj2ISdpdkiVtWtanlvUP1/IcV6Y8GVj/qKTrJV0j6SpJX5a0TNl2q6Q1yvJj5cG880u+j0l6Ttn2VkkX1OrctuQd8mYXSetJukjSglL3AWN2QiIiomuTHsSAGVQTYc6opd0NHCBp2dbMkvYH3gC8yvaWVE+lvxtYoU3dS2xPt7058HrgTcBnAGz/EHhY0jtKAPw68AHbj3bR5keBj9neDHgV8EFJm3V3uBERMVYmNYhJWgnYFngP1WzOAxZTTVq5b5tihwL/avt+ANuP2P6i7b8Mti/bdwMzgQ/pyemiPwQcSTUB529t/7Kbdtu+0/YVZfkB4DpgnW7KRkTE2Jns74ntBpxr+0ZJ90raGri3bDsK+Imk/xnILGkVYCXbt4xkZ7ZvlrQU8ALgj2X9VKpgNq22n9dRTZbZ6iHb/1hPkDSVaqboyzvtV9JMqgDKlClTRtL0iIhoY7KHE2cAc8ryHGpDirZvpgoM7+hUWNIby3WsWyX9Y6d8g5RfimqY8UFg/dq+LyrDkK2v1gC2EnA6cOBgPUHbs2332e5bc801h9vMiIjoYNJ6YpJWB3YAtpRkYCnAwNdq2T4PnAb8HMD2XyQ9KGkD27fYPg84T9LZwNOun7XZ54bAY1TX0AA+AFwDHAZ8TdI/2HY3PbFyHe104JRyfS0iIibYZA4n7gGcbPv9AwmSfg6sN7Bu+3pJC4Bdgd+W5C8A35C0l+37y/Wt5YfamaQ1geOB40qgeiHwUWAb24slvQ94L/Dfti8Cpg9Sl4ATgOtsf3l4hx0REWNlMoPYDKrrXnWnA4e0pH0OuLK2/g3gucDlkh6mGgq8rCXPgBUkzQOWobqj8GRgIOh8GTja9uKyfiBwqaTTbf9piLa/GngXcE2pH+CTts8ZolxERIwh2c/Mh0L2qr6+Pvf39092MyKigzwAuDdJmmu7rzV9sm/siIiIGLEEsYiIaKzJ/p5YRERPafqw27NNemIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYCWIREdFYeQDwBJO0GPjdMIutAdwzDs0ZL2nv+Ep7x1faO75G2t71bT9tVuEEsQaQ1N/u6c29Ku0dX2nv+Ep7x9dYtzfDiRER0VgJYhER0VgJYs0we7IbMExp7/hKe8dX2ju+xrS9uSYWERGNlZ5YREQ0VoJYREQ0VoJYj5C0uqTzJd1Ufj6vTZ7pkn4lab6kqyW9vbbtREm3SJpXXtN7vL0bSLpc0kJJp0padrLbW/KdK+l+SWe3pPfc+R2ivb16fvcteW6StG8t/WJJN9TO7wvGqZ07l/0slPSJNtuXK+drYTl/U2vbDinpN0h643i0b6zaK2mqpCW183l8j7T3NZKukPSopD1atrV9bwzJdl498AKOBj5Rlj8BHNUmz4uBjcvyi4A7gdXK+onAHg1q7/eBvcry8cC/TnZ7y7YdgV2Bs1vSe+78DtHenju/wOrAzeXn88ry88q2i4G+cW7jUsAiYENgWeAqYLOWPB8Aji/LewGnluXNSv7lgA1KPUv1cHunAtdO1Pt1GO2dCmwFnFT/exrsvTHUKz2x3rEb8J2y/B1g99YMtm+0fVNZvgO4G3jaN9gnyIjbK0nADsBpg5Wf6PaWdl4APDDObenGiNvbw+f3jcD5tv9k+z7gfGDncW5X3TbAQts3234EmEPV7rr6cZwG7FjO527AHNsP274FWFjq69X2ToYh22v7VttXA4+3lB3xeyNBrHesZfvOsnwXsNZgmSVtQ/XfzqJa8ufKsN1XJC03Tu0cMJr2Ph+43/ajZfNtwDrj1dBiWO3toGfPb4tePb/rAH+orbe269tl6OtT4/RBPNT+n5KnnL8/U53PbsqOtdG0F2ADSVdK+rmk7ca5rU9pSzGcczTiskt3uYMYA5J+BrywzaZD6yu2Lanjdx8krQ2cDOxre+A/mkOoPjyWpfoexsHAEb3Y3vH6R3Gs2ttBz57fiTLO7d3b9u2SVgZOB95FNeQUI3MnMMX2vZK2Bs6UtLntv0x2w8ZagtgEsr1Tp22S/ihpbdt3lg/9uzvkWwX4MXCo7V/X6h74L/hhSd8GDurh9t4LrCZp6fLf47rA7b3Q3kHq7snz20Gvnt/bge1r6+tSXQvD9u3l5wOSvkc1NDXWQex2YL2W/beel4E8t0laGliV6nx2U3asjbi9ri40PQxge66kRVTXqPsnub2Dld2+pezF3RTMcGLvOAsYuCNnX+D/WjOUO8zOAE6yfVrLtrXLT1Fdj7h2XFs7ivaWP7CLgD0GKz/GhmzvYHrx/HbSw+f3POANkp5X7l58A3CepKUlrQEgaRngzYzP+f0tsLGqOzeXpboR4qxBjmMP4MJyPs8C9ip3A24AbAz8ZhzaOCbtlbSmpKUAJG1Y2ntzD7S3k7bvja5KTuTdK3kNemfP84ELgJuAnwGrl/Q+4Ftl+Z3A34F5tdf0su1C4BqqP/7vAiv1eHs3pPoQWAj8AFhusttb1i8FFgNLqMbl39ir53eI9vbq+f2X0qaFwLtL2nOBucDVwHzgPxmnO/+AXYAbqa7NHlrSjgD+qSwvX87XwnL+NqyVPbSUuwF403iez9G2F3hbOZfzgCuAXXukva8o79O/UvVw5w/23ujmlcdORUREY2U4MSIiGitBLCIiGitBLCIiGitBLCIiGitBLCIiGitBLCIiGitBLCIiGuv/A2u8HoaIt3PNAAAAAElFTkSuQmCC\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "print(\"Threshold corresponding to Best balanced accuracy: {:6.4f}\".format(best_thresh))\n", - "show_explanation(0)\n", - "show_explanation(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "See the [LIME documentation](https://github.com/marcotcr/lime) for detailed description of results. In short, the left hand side shows the label predictions made by the model, the middle shows the features that are important to the instance in question and their contributions (weights) to the label prediction, while the right hand side shows the actual values of the features in the particular instance." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [9.](#Table-of-Contents) Re-deploying Model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9.1. Testing model learned on 2014 (Panel 19) data on 2016 (Panel 21) deployment data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Load the Panel 21 data, and split it again into 3 parts: train, validate, and deploy. We test the deployed model against the deployment data. If a new model needs to be learnt, it will be learnt from the train/validate data and then tested again on the deployment data." - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": { - "tags": [] - }, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "3FPzVT6aHHV9" + }, + "source": [ + "### 3.2. Learning a Logistic Regression (LR) classifier on original data" + ] + }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Test Dataset shape" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "B-TWJ25bHHV9" + }, + "source": [ + "#### 3.2.1. Training LR model on original data" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "(15675, 138)\n" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eswfdeoeHHV-" + }, + "outputs": [], + "source": [ + "dataset = dataset_orig_panel19_train\n", + "model = make_pipeline(StandardScaler(),\n", + " LogisticRegression(solver='liblinear', random_state=1))\n", + "fit_params = {'logisticregression__sample_weight': dataset.instance_weights}\n", + "\n", + "lr_orig_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Favorable and unfavorable labels" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "gliJu1abHHV-" + }, + "source": [ + "#### 3.2.2. Validating LR model on original data" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "1.0 0.0\n" + "cell_type": "markdown", + "metadata": { + "id": "Pp1EFVfqHHV-" + }, + "source": [ + "This function will be used throughout the tutorial to find best threshold using a validation set" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Protected attribute names" - }, - "metadata": {} + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ly9Re3JkHHV-" + }, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "def test(dataset, model, thresh_arr):\n", + " try:\n", + " # sklearn classifier\n", + " y_val_pred_prob = model.predict_proba(dataset.features)\n", + " pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]\n", + " except AttributeError:\n", + " # aif360 inprocessing algorithm\n", + " y_val_pred_prob = model.predict(dataset).scores\n", + " pos_ind = 0\n", + "\n", + " metric_arrs = defaultdict(list)\n", + " for thresh in thresh_arr:\n", + " y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)\n", + "\n", + " dataset_pred = dataset.copy()\n", + " dataset_pred.labels = y_val_pred\n", + " metric = ClassificationMetric(\n", + " dataset, dataset_pred,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "\n", + " metric_arrs['bal_acc'].append((metric.true_positive_rate()\n", + " + metric.true_negative_rate()) / 2)\n", + " metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())\n", + " metric_arrs['disp_imp'].append(metric.disparate_impact())\n", + " metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())\n", + " metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())\n", + " metric_arrs['theil_ind'].append(metric.theil_index())\n", + "\n", + " return metric_arrs" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['RACE']\n" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WXYPRxEuHHV-" + }, + "outputs": [], + "source": [ + "thresh_arr = np.linspace(0.01, 0.5, 50)\n", + "val_metrics = test(dataset=dataset_orig_panel19_val,\n", + " model=lr_orig_panel19,\n", + " thresh_arr=thresh_arr)\n", + "lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Privileged and unprivileged protected attribute values" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "j8y_WX34HHV-" + }, + "source": [ + "Plot metrics with twin x-axes" + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "[array([1.])] [array([0.])]\n" + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false, + "id": "e8LcnRvoHHV-" + }, + "outputs": [], + "source": [ + "def plot(x, x_name, y_left, y_left_name, y_right, y_right_name):\n", + " fig, ax1 = plt.subplots(figsize=(10,7))\n", + " ax1.plot(x, y_left)\n", + " ax1.set_xlabel(x_name, fontsize=16, fontweight='bold')\n", + " ax1.set_ylabel(y_left_name, color='b', fontsize=16, fontweight='bold')\n", + " ax1.xaxis.set_tick_params(labelsize=14)\n", + " ax1.yaxis.set_tick_params(labelsize=14)\n", + " ax1.set_ylim(0.5, 0.8)\n", + "\n", + " ax2 = ax1.twinx()\n", + " ax2.plot(x, y_right, color='r')\n", + " ax2.set_ylabel(y_right_name, color='r', fontsize=16, fontweight='bold')\n", + " if 'DI' in y_right_name:\n", + " ax2.set_ylim(0., 0.7)\n", + " else:\n", + " ax2.set_ylim(-0.25, 0.1)\n", + "\n", + " best_ind = np.argmax(y_left)\n", + " ax2.axvline(np.array(x)[best_ind], color='k', linestyle=':')\n", + " ax2.yaxis.set_tick_params(labelsize=14)\n", + " ax2.grid(True)" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "#### Dataset feature names" - }, - "metadata": {} + "cell_type": "markdown", + "metadata": { + "id": "q2ouoHOKHHV-" + }, + "source": [ + "Here we plot $1 - \\min(\\text{disparate impact}, 1/\\text{disparate impact})$ since it's possible to overcorrect and end up with a value greater than 1, implying unfairness for the original privileged group. For shorthand, we simply call this 1-min(DI, 1/DI) from now on. We want the plotted metric to be less than 0.2." + ] }, { - "output_type": "stream", - "name": "stdout", - "text": "['AGE', 'RACE', 'PCS42', 'MCS42', 'K6SUM42', 'REGION=1', 'REGION=2', 'REGION=3', 'REGION=4', 'SEX=1', 'SEX=2', 'MARRY=1', 'MARRY=2', 'MARRY=3', 'MARRY=4', 'MARRY=5', 'MARRY=6', 'MARRY=7', 'MARRY=8', 'MARRY=9', 'MARRY=10', 'FTSTU=-1', 'FTSTU=1', 'FTSTU=2', 'FTSTU=3', 'ACTDTY=1', 'ACTDTY=2', 'ACTDTY=3', 'ACTDTY=4', 'HONRDC=1', 'HONRDC=2', 'HONRDC=3', 'HONRDC=4', 'RTHLTH=-1', 'RTHLTH=1', 'RTHLTH=2', 'RTHLTH=3', 'RTHLTH=4', 'RTHLTH=5', 'MNHLTH=-1', 'MNHLTH=1', 'MNHLTH=2', 'MNHLTH=3', 'MNHLTH=4', 'MNHLTH=5', 'HIBPDX=-1', 'HIBPDX=1', 'HIBPDX=2', 'CHDDX=-1', 'CHDDX=1', 'CHDDX=2', 'ANGIDX=-1', 'ANGIDX=1', 'ANGIDX=2', 'MIDX=-1', 'MIDX=1', 'MIDX=2', 'OHRTDX=-1', 'OHRTDX=1', 'OHRTDX=2', 'STRKDX=-1', 'STRKDX=1', 'STRKDX=2', 'EMPHDX=-1', 'EMPHDX=1', 'EMPHDX=2', 'CHBRON=-1', 'CHBRON=1', 'CHBRON=2', 'CHOLDX=-1', 'CHOLDX=1', 'CHOLDX=2', 'CANCERDX=-1', 'CANCERDX=1', 'CANCERDX=2', 'DIABDX=-1', 'DIABDX=1', 'DIABDX=2', 'JTPAIN=-1', 'JTPAIN=1', 'JTPAIN=2', 'ARTHDX=-1', 'ARTHDX=1', 'ARTHDX=2', 'ARTHTYPE=-1', 'ARTHTYPE=1', 'ARTHTYPE=2', 'ARTHTYPE=3', 'ASTHDX=1', 'ASTHDX=2', 'ADHDADDX=-1', 'ADHDADDX=1', 'ADHDADDX=2', 'PREGNT=-1', 'PREGNT=1', 'PREGNT=2', 'WLKLIM=-1', 'WLKLIM=1', 'WLKLIM=2', 'ACTLIM=-1', 'ACTLIM=1', 'ACTLIM=2', 'SOCLIM=-1', 'SOCLIM=1', 'SOCLIM=2', 'COGLIM=-1', 'COGLIM=1', 'COGLIM=2', 'DFHEAR42=-1', 'DFHEAR42=1', 'DFHEAR42=2', 'DFSEE42=-1', 'DFSEE42=1', 'DFSEE42=2', 'ADSMOK42=-1', 'ADSMOK42=1', 'ADSMOK42=2', 'PHQ242=-1', 'PHQ242=0', 'PHQ242=1', 'PHQ242=2', 'PHQ242=3', 'PHQ242=4', 'PHQ242=5', 'PHQ242=6', 'EMPST=-1', 'EMPST=1', 'EMPST=2', 'EMPST=3', 'EMPST=4', 'POVCAT=1', 'POVCAT=2', 'POVCAT=3', 'POVCAT=4', 'POVCAT=5', 'INSCOV=1', 'INSCOV=2', 'INSCOV=3']\n" - } - ], - "source": [ - "dataset_orig_panel21_deploy = MEPSDataset21()\n", - "\n", - "# now align it with the panel19 datasets\n", - "dataset_orig_panel21_deploy = dataset_orig_panel19_train.align_datasets(dataset_orig_panel21_deploy)\n", - "\n", - "describe(test=dataset_orig_panel21_deploy)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.48375589333734254\n" - } - ], - "source": [ - "metric_orig_panel21_deploy = BinaryLabelDatasetMetric(\n", - " dataset_orig_panel21_deploy, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "explainer_orig_panel21_deploy = MetricTextExplainer(metric_orig_panel21_deploy)\n", - "\n", - "print(explainer_orig_panel21_deploy.disparate_impact())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, the logistic regression classifier trained on the panel 19 data after reweighing is tested against the panel 21 deployment data." - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "lr_transf_metrics_panel21_deploy = test(\n", - " dataset=dataset_orig_panel21_deploy,\n", - " model=lr_transf_panel19,\n", - " thresh_arr=[thresh_arr[lr_transf_best_ind]])" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7379\nCorresponding 1-min(DI, 1/DI) value: 0.2559\nCorresponding average odds difference value: -0.0143\nCorresponding statistical parity difference value: -0.0813\nCorresponding equal opportunity difference value: -0.0044\nCorresponding Theil index value: 0.0994\n" - } - ], - "source": [ - "describe_metrics(lr_transf_metrics_panel21_deploy, [thresh_arr[lr_transf_best_ind]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Compared to the 2015 panel 20 deployment data results, the $|1 - \\text{disparate impact}|$ fairness metric shows a noticable drift upwards. While still within specs, it may be worthwhile to re-learn the model. So even though the model is still relatively fair and accurate, we go ahead and re-learn the model from the 2015 Panel 20 data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9.2. Re-learning model (from 2015 Panel 20 data)" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [], - "source": [ - "(dataset_orig_panel20_train,\n", - " dataset_orig_panel20_val,\n", - " dataset_orig_panel20_test) = MEPSDataset20().split([0.5, 0.8], shuffle=True) \n", - "\n", - "# now align them with the 2014 datasets\n", - "dataset_orig_panel20_train = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_train)\n", - "dataset_orig_panel20_val = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_val)\n", - "dataset_orig_panel20_test = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Train and evaluate new model on 'transformed' 2016 training/test data**" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "RW.fit(dataset_orig_panel20_train)\n", - "dataset_transf_panel20_train = RW.transform(dataset_orig_panel20_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0000000000000002\n" - } - ], - "source": [ - "metric_transf_panel20_train = BinaryLabelDatasetMetric(\n", - " dataset_transf_panel20_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "explainer_transf_panel20_train = MetricTextExplainer(metric_transf_panel20_train)\n", - "\n", - "print(explainer_transf_panel20_train.disparate_impact())" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = dataset_transf_panel20_train\n", - "model = make_pipeline(StandardScaler(),\n", - " LogisticRegression(solver='liblinear', random_state=1))\n", - "fit_params = {'logisticregression__sample_weight': dataset.instance_weights}\n", - "lr_transf_panel20 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "thresh_arr = np.linspace(0.01, 0.5, 50)\n", - "val_metrics = test(dataset=dataset_orig_panel20_val,\n", - " model=lr_transf_panel20,\n", - " thresh_arr=thresh_arr)\n", - "lr_transf_best_ind_panel20 = np.argmax(val_metrics['bal_acc'])" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "disp_imp = np.array(val_metrics['disp_imp'])\n", - "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " disp_imp_err, '1 - min(DI, 1/DI)')" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "plot(thresh_arr, 'Classification Thresholds',\n", - " val_metrics['bal_acc'], 'Balanced Accuracy',\n", - " val_metrics['avg_odds_diff'], 'avg. odds diff.')" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7465\nCorresponding 1-min(DI, 1/DI) value: 0.1129\nCorresponding average odds difference value: 0.0036\nCorresponding statistical parity difference value: -0.0414\nCorresponding equal opportunity difference value: -0.0057\nCorresponding Theil index value: 0.0946\n" - } - ], - "source": [ - "describe_metrics(val_metrics, thresh_arr)" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "lr_transf_metrics_panel20_test = test(\n", - " dataset=dataset_orig_panel20_test,\n", - " model=lr_transf_panel20,\n", - " thresh_arr=[thresh_arr[lr_transf_best_ind_panel20]])" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7490\nCorresponding 1-min(DI, 1/DI) value: 0.0533\nCorresponding average odds difference value: 0.0158\nCorresponding statistical parity difference value: -0.0184\nCorresponding equal opportunity difference value: -0.0150\nCorresponding Theil index value: 0.0988\n" - } - ], - "source": [ - "describe_metrics(lr_transf_metrics_panel20_test, [thresh_arr[lr_transf_best_ind_panel20]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The new model is both relatively fair as well as accurate so we deploy and test against the 2016 deployment data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9.3. Testing model learned on 2015 (Panel 20) data on 2016 (Panel 21) deployment data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Evaluate new 2015 transformed data model and evaluate again on 2016 deployment data**" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [], - "source": [ - "lr_transf_panel20_metrics_panel21_deploy = test(\n", - " dataset=dataset_orig_panel21_deploy,\n", - " model=lr_transf_panel20,\n", - " thresh_arr=[thresh_arr[lr_transf_best_ind_panel20]])" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7370\nCorresponding 1-min(DI, 1/DI) value: 0.1698\nCorresponding average odds difference value: -0.0021\nCorresponding statistical parity difference value: -0.0648\nCorresponding equal opportunity difference value: 0.0016\nCorresponding Theil index value: 0.0960\n" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4tQlJ4P1HHV-", + "outputId": "1f67dacc-9607-49d3-dd42-47a9003a5cbd" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "disp_imp = np.array(val_metrics['disp_imp'])\n", + "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " disp_imp_err, '1 - min(DI, 1/DI)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zMMhQiBtHHV_", + "outputId": "9666d9a4-09df-450b-8df5-274b079de02b" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " val_metrics['avg_odds_diff'], 'avg. odds diff.')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_TnIlce5HHV_" + }, + "source": [ + "Make a function to print out accuracy and fairness metrics. This will be used throughout the tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9TkUXqWNHHV_" + }, + "outputs": [], + "source": [ + "def describe_metrics(metrics, thresh_arr):\n", + " best_ind = np.argmax(metrics['bal_acc'])\n", + " print(\"Threshold corresponding to Best balanced accuracy: {:6.4f}\".format(thresh_arr[best_ind]))\n", + " print(\"Best balanced accuracy: {:6.4f}\".format(metrics['bal_acc'][best_ind]))\n", + "# disp_imp_at_best_ind = np.abs(1 - np.array(metrics['disp_imp']))[best_ind]\n", + " disp_imp_at_best_ind = 1 - min(metrics['disp_imp'][best_ind], 1/metrics['disp_imp'][best_ind])\n", + " print(\"Corresponding 1-min(DI, 1/DI) value: {:6.4f}\".format(disp_imp_at_best_ind))\n", + " print(\"Corresponding average odds difference value: {:6.4f}\".format(metrics['avg_odds_diff'][best_ind]))\n", + " print(\"Corresponding statistical parity difference value: {:6.4f}\".format(metrics['stat_par_diff'][best_ind]))\n", + " print(\"Corresponding equal opportunity difference value: {:6.4f}\".format(metrics['eq_opp_diff'][best_ind]))\n", + " print(\"Corresponding Theil index value: {:6.4f}\".format(metrics['theil_ind'][best_ind]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "If3oRVsIHHV_", + "outputId": "50ca70be-1981-4935-9b50-35f537fbbccf" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7627\nCorresponding 1-min(DI, 1/DI) value: 0.6066\nCorresponding average odds difference value: -0.1831\nCorresponding statistical parity difference value: -0.2643\nCorresponding equal opportunity difference value: -0.1608\nCorresponding Theil index value: 0.0936\n" + } + ], + "source": [ + "describe_metrics(val_metrics, thresh_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6d_DyGR6HHV_" + }, + "source": [ + "#### 3.2.3. Testing LR model on original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U-poJJj2HHWD" + }, + "outputs": [], + "source": [ + "lr_orig_metrics = test(dataset=dataset_orig_panel19_test,\n", + " model=lr_orig_panel19,\n", + " thresh_arr=[thresh_arr[lr_orig_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "8QOwbTfFHHWD", + "outputId": "89bac9e9-6f9d-49f0-e009-ee49c9d67f9b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7759\nCorresponding 1-min(DI, 1/DI) value: 0.5738\nCorresponding average odds difference value: -0.2057\nCorresponding statistical parity difference value: -0.2612\nCorresponding equal opportunity difference value: -0.2228\nCorresponding Theil index value: 0.0921\n" + } + ], + "source": [ + "describe_metrics(lr_orig_metrics, [thresh_arr[lr_orig_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kTskH6IFHHWE" + }, + "source": [ + "For all the fairness metrics displayed above, the value should be close to '0' for fairness.\n", + "\n", + "1-min(DI, 1/DI) < 0.2 is typically desired for classifier predictions to be fair.\n", + "\n", + "However, for a logistic regression classifier trained with original training data, at the best classification rate, this is quite high. This implies unfairness.\n", + "\n", + "Similarly, $\\text{average odds difference} = \\frac{(FPR_{unpriv}-FPR_{priv})+(TPR_{unpriv}-TPR_{priv})}{2}$ must be close to zero for the classifier to be fair.\n", + "\n", + "Again, the results for this classifier-data combination are still high. This still implies unfairness." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DmtHiBcRHHWE" + }, + "source": [ + "### 3.3. Learning a Random Forest (RF) classifier on original data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dq0DCzjHHHWE" + }, + "source": [ + "#### 3.3.1. Training RF model on original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YiiL5wqoHHWE" + }, + "outputs": [], + "source": [ + "dataset = dataset_orig_panel19_train\n", + "model = make_pipeline(StandardScaler(),\n", + " RandomForestClassifier(n_estimators=500, min_samples_leaf=25))\n", + "fit_params = {'randomforestclassifier__sample_weight': dataset.instance_weights}\n", + "rf_orig_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H7RinWUpHHWE" + }, + "source": [ + "#### 3.3.2. Validating RF model on original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z3qGN6sqHHWE" + }, + "outputs": [], + "source": [ + "thresh_arr = np.linspace(0.01, 0.5, 50)\n", + "val_metrics = test(dataset=dataset_orig_panel19_val,\n", + " model=rf_orig_panel19,\n", + " thresh_arr=thresh_arr)\n", + "rf_orig_best_ind = np.argmax(val_metrics['bal_acc'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false, + "id": "h2gP_bMdHHWE", + "outputId": "177ffe66-6dd6-4478-a58d-5c1862285cab" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "disp_imp = np.array(val_metrics['disp_imp'])\n", + "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " disp_imp_err, '1 - min(DI, 1/DI)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false, + "id": "jvBY8APFHHWE", + "outputId": "a1bba751-3d5b-4dbd-b803-ad2c98a7e34d" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " val_metrics['avg_odds_diff'], 'avg. odds diff.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "hU_k8-v9HHWF", + "outputId": "88f2f7d7-868e-4ca4-89d1-b2617f4d79a3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2300\nBest balanced accuracy: 0.7717\nCorresponding 1-min(DI, 1/DI) value: 0.4860\nCorresponding average odds difference value: -0.1157\nCorresponding statistical parity difference value: -0.1929\nCorresponding equal opportunity difference value: -0.1063\nCorresponding Theil index value: 0.0896\n" + } + ], + "source": [ + "describe_metrics(val_metrics, thresh_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uC7NIOsZHHWF" + }, + "source": [ + "#### 3.3.3. Testing RF model on original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_pydY7xcHHWF" + }, + "outputs": [], + "source": [ + "rf_orig_metrics = test(dataset=dataset_orig_panel19_test,\n", + " model=rf_orig_panel19,\n", + " thresh_arr=[thresh_arr[rf_orig_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "luhNCYPfHHWF", + "outputId": "d33d2a3f-4936-48a1-ce26-4583af4ff7b7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2300\nBest balanced accuracy: 0.7638\nCorresponding 1-min(DI, 1/DI) value: 0.5141\nCorresponding average odds difference value: -0.1388\nCorresponding statistical parity difference value: -0.2190\nCorresponding equal opportunity difference value: -0.1135\nCorresponding Theil index value: 0.0936\n" + } + ], + "source": [ + "describe_metrics(rf_orig_metrics, [thresh_arr[rf_orig_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0a2LHpNYHHWF" + }, + "source": [ + "As in the case of the logistic regression classifier learned on the original data, the fairness metrics for the random forest classifier have values that are quite far from 0.\n", + "\n", + "For example, 1 - min(DI, 1/DI) has a value of over 0.5 as opposed to the desired value of < 0.2.\n", + "\n", + "This indicates that the random forest classifier learned on the original data is also unfair." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1U0RwwbIHHWF" + }, + "source": [ + "## [4.](#Table-of-Contents) Bias mitigation using pre-processing technique - Reweighing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6HlWdhCTHHWF" + }, + "source": [ + "### 4.1. Transform data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z7mwDFmVHHWF" + }, + "outputs": [], + "source": [ + "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "dataset_transf_panel19_train = RW.fit_transform(dataset_orig_panel19_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xFh-nxmOHHWG" + }, + "source": [ + "Metrics for transformed data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "zDKYll6wHHWG", + "outputId": "8473c9b8-0dc0-46c0-c798-ab965900b0e3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0000000000000002\n" + } + ], + "source": [ + "metric_transf_panel19_train = BinaryLabelDatasetMetric(\n", + " dataset_transf_panel19_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "explainer_transf_panel19_train = MetricTextExplainer(metric_transf_panel19_train)\n", + "\n", + "print(explainer_transf_panel19_train.disparate_impact())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1R0Vk-3OHHWG" + }, + "source": [ + "### 4.2. Learning a Logistic Regression (LR) classifier on data transformed by reweighing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OHLbxcK2HHWG" + }, + "source": [ + "#### 4.2.1. Training LR model after reweighing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FGb3-8VIHHWG" + }, + "outputs": [], + "source": [ + "dataset = dataset_transf_panel19_train\n", + "model = make_pipeline(StandardScaler(),\n", + " LogisticRegression(solver='liblinear', random_state=1))\n", + "fit_params = {'logisticregression__sample_weight': dataset.instance_weights}\n", + "lr_transf_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "apSeaQhGHHWG" + }, + "source": [ + "#### 4.2.2. Validating LR model after reweighing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rObPE6MGHHWG" + }, + "outputs": [], + "source": [ + "thresh_arr = np.linspace(0.01, 0.5, 50)\n", + "val_metrics = test(dataset=dataset_orig_panel19_val,\n", + " model=lr_transf_panel19,\n", + " thresh_arr=thresh_arr)\n", + "lr_transf_best_ind = np.argmax(val_metrics['bal_acc'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NO_xBTfrHHWG", + "outputId": "85249a47-071b-4198-d370-5b4138611052" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "disp_imp = np.array(val_metrics['disp_imp'])\n", + "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " disp_imp_err, '1 - min(DI, 1/DI)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uh4aQ2Z6HHWG", + "outputId": "538172de-f2cc-4c47-8aa7-cfd0d4094b59" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAroAAAG4CAYAAACq3USPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3hU1dbH8e9OhQQIJbRA6E1AOgKiFBVFFLwiKgh2QcV77dj1XsurXsWGylVARcGCiNhFQKqISG9C6EmAJBACARLSZtb7x56QEEOYCUlmJlmf5znO5MyZM3sw5Tf77L22ERGUUkoppZQqbwK83QCllFJKKaVKgwZdpZRSSilVLmnQVUoppZRS5ZIGXaWUUkopVS5p0FVKKaWUUuWSBl2llFJKKVUuadBVSimllFLlkleCrjFmrDFmtzEmwxiz2hhz4RmOv8EYs84Yk26MSTTGTDfG1CtwzDXGmL+MMZmu26tL910opZRSSvkmT7KWMaa+MeYzY8xWY4zDGDP1NMf5XdYq86BrjLkeeAt4EegM/A78bIxpdJrjewPTgI+BdsA/gLbAp/mO6QXMcO3r5LqdaYzpUXrvRCmllFLK93iatYBQIBl4GVhxmnP6ZdYyZb0ymjFmBbBBREbn27cd+EpEHi/k+IeBf4lI43z7bgXeFpEqrq9nADVFZEC+Y+YDB0VkROm9G6WUUkop3+Jp1irw3B+AZBG5pcB+v8xaZdqja4wJAboCcws8NBc4/zRPWwbUN8YMNlYkMBz4Kd8xvQo55y9FnFMppZRSqtwpZtZyh19mraAyfr1IIBBIKrA/CbiksCeIyHJjzHBsF3llbJvnATfnO6zeac5Zj0IYY8YAYwDCwsK6RkZGevYulFJKKaW8IC4uToA1+XZNEpFJ+b72OGu5yaOs5SvKOuh6zBjTFngbeB77yaE+8CrwPnBTcc7p+oaYBBAeHi6xsbEl01illFckJiYCUK+eT/++VUqps2aMOSEi3bzdDn9R1kE3GXAAdQvsrwsknuY5jwN/isirrq83GGPSgKXGmCdEZK/ruZ6cUylVjgwfPhyARYsWebchSinlfcXJWu7wy6xVpmN0RSQLWA0MKPDQAOyMwMKEYf+H5Zf7dW77l3t4TqVUOfLYY4/x2GOPebsZSinldcXMWu7wy6zljaELrwPTjDF/Yiea3QVEAe8BGGM+ARCR3GEJ3wOTjTF3kzd04U1gjYjEuY55C1hijHkM+Aa4GugPXFAm70gp5VUDBw70dhOUUsqXeJq1MMZ0ct2tBjhdX2eJyF+u/X6Ztco86IrIDGNMLeApbGjdBAwSkdyBso0KHD/VGFMV+CfwGpAKLAAezXfM764Jay8AzwE7getFpNBacEqp8iU+Ph6A6OhoL7dEKaW8z9Os5bK2wNeDgVigieucfpm1yryOrq8JDw+XtLQ0bzdDKXUW+vXrB+gYXaVU+WeMSReRcG+3w1/4fNUFpZQ6k6eeesrbTVBKKeWDtEdXe3SVUkop5Se0R9czZVp1QSmlSsOuXbvYtWuXt5uhlFLKx2iPrvboKuX3dIyuUqqi0B5dz+gYXaWU33v22We93QSllFI+SHt0tUdXKaWUUn5Ce3Q9o2N0lVJ+LyYmhpiYGG83QymllI/RHl3t0VXK7+kYXaVURaE9up7RMbpKKb/34osversJSimlfJD26GqPrlJKKaX8hPboekbH6Cql/N6mTZvYtGmTt5uhlFLKx2iPrvboKuX3dIyuUqqi0B5dz+gYXaWU33v11Ve93QSllFI+SHt0tUdXKaWUUn5Ce3Q9o2N0lVJ+b926daxbt87bzVBKKeVjtEdXe3SV8ns6RlcpVVFoj65ndIyuUsrvvfnmm95uglJKKR+kPbrao6uUUkopP6E9up7RMbpKKb+3cuVKVq5c6e1mKKWU8jHao6s9ukr5PR2jq5SqKLRH1zM6Rlcp5ffeeecdbzdBKaWUD9IeXe3RVUoppZSf0B5dz+gYXaWU3/v999/5/fffvd0MpZRSPkZ7dLVHVym/p2N0lVIVhfboekbH6Cql/N7777/v7SYopZTyQdqjqz26SimllPIT2qPrGR2jq5Tye4sXL2bx4sXeboZSSikfoz262qOrlN/TMbpKqYpCe3Q9o2N0lVJ+78MPP/R2E5RSSvkg7dHVHl2llFJK+Qnt0fWMjtFVSvm9+fPnM3/+fG83QymllI/RHl3t0VXK7+kYXaVURaE9up7RoKtBVym/Fx8fD0B0dLSXW6KUUqVLg65ndDKaUsrvacBVSilVGB2jq5Tye3PmzGHOnDneboZSSikfo0MXdOiCUn5Px+gqpSoKHbrgGQ26GnSV8nuJiYkA1KtXz8stUUqp0qVB1zM6Rlcp5fc04CqllCqMjtFVSvm977//nu+//97bzVBKKeVjdOiCDl1Qyu/pGF2lVEWhQxc8o0FXg65Sfi85ORmAyMhIL7dEKaVKlwZdz+gYXaWU39OAq5RSqjA6Rlcp5fe+/vprvv76a283QymllI/RoQs6dEEpv6djdJVSFYUOXfCM9ugqpfzet99+y7fffuvtZiillM8wxow1xuw2xmQYY1YbYy48w/F9XcdlGGN2GWPuKvD4f4wxUmBLLN13cfY06Cql/FZGtoMdB46zJjGT5KxAbzdHKaV8gjHmeuAt4EWgM/A78LMxptFpjm8K/OQ6rjPwEvC2MeaaAofGAPXzbeeWyhsoQTp0QYcuKFUiElMzOHgsk0Y1w4gICy6RczqdwsHjmcSlpBN3KJ34w+nEpaQTn5JOfMoJEo9mAJC2ZQnBgYaf3nqMTtHVS+S1lVLKF7kzdMEYswLYICKj8+3bDnwlIo8Xcvx/gaEi0jLfvilAOxHp5fr6P8AwEWlfMu+kbGjVBaVUsTmdwpLtB5n+RxwLtibhdH1urlYpiEa1wmhUM4zommFE17D3G9UMI6p6ZUKCAsh2ODlwLJPE1Ay7Hc0gMfUEiUczXbcZJKVmkuVwnnw9Y6BetUpE1wijd4tIomtWplHNMJ6/+0V2Jadxx8cXM3tsb6JrhnnpX0QppbzLGBMCdAXGF3hoLnD+aZ7Wy/V4fr8ANxtjgkUk27WvmTFmP5AJrACeEJFdJdPy0lHhe3Sjo6Nl2rRp3m6GUn7F4RQOp2dxKC2LrBwnQQEB1AwPpnJIIFk5QpbDSVaOa3M4yf97xgCBAQHkOJ1/O2+AMQQFGoIDA1ybISQwgJAg+3VIYADG/L09GRkZZOY42Xcsh+CAAJrVCSewsAOVUsrP9e/fPwvYmG/XJBGZlPuFMSYK2Af0FZEl+fY/A4wUkdYFz2mM2QZMF5Hn8u3rAywGokQkwRhzOVAV2ArUAZ4C2mB7fQ+V5HssSRW+RzclJeXkjG2l1OmJCGviDjP9jzh+3JhAVo6T85pEMrJnIwa2r0doUOFjZJ1O4cAx1/AD15Z8PJPaVUKpF1GJetUqnbytHhaMOYuA+vvOZG764E96NKvM1FvPIzhQpyEopcqdHBHpVtYvKiI/5//aGPMHsAu4GXi9rNvjrgofdJVSRUvLzOGbdfuY/kccWxKOUiU0iOHdoxnZozGt61U94/MDAowNshGVOK9pzVJp4/Tp0wEYNWoUL1/TgYdnrufJ2Rv57zUdzio4K6WUH0oGHEDdAvvrAqerkpB4muNzXOf7GxE5bozZDLQs7HFfoUFXqQrou/X7mbRkJxnZTnIcTrIdQo7TicMp9r7DSY5TyHEKDtfA27b1q/Hi1edyVacowkN961fHlClTABt0h3VtSNyhNCYs2EHjWuHc07+Fl1unlFJlR0SyjDGrgQHAzHwPDQBmneZpy4GrC+wbAKzKNz73FMaYStihCwvPrsWly7f+WimlSpWI8M6CHbw2bxtt6lWlVd0qBAUEEBRoCAowBAUGEBxgCAyw42Nzx8te2LI2XRpV99ne0Xnz5p3y9QMDWhGbks6rv8TQqGYYgztGeallSinlFa8D04wxfwLLgLuAKOA9AGPMJwAicpPr+PeAfxpj3gTeB3oDtwAjck9ojBkPfA/EYcfoPg2EAx+X/tspPq8EXWPMWGActgbbZuB+EVl6mmOnYsd/FHSyvIYxph+Ff6I4R0S2lkSblfJ3WTlOHv96I7PW7OXqzg14+ZpzTzuu1t8EB59azswYwyvDOrD/yAkemrmeqOqV6Nq4dIZNKKWUrxGRGcaYWtgJY/WBTcAgEYl1HdKowPG7jTGDgDeAu4H9wL0ikr8HuCHwORAJHAT+AHrmO6dPKvOqC64ixtOBscBvrttbgbYiElfI8RFA5QK7lwFLRORW1zH9sEG3HZCS77iDIuIoqj1aR1dVBKnp2dw5fRV/7Erh/ktact/FLX22d7Y4pk6dCsAtt9xyyv7DaVkM/d/vpJ7IZvbY82lcq+hVM0WEvxKO8t36/fy65QBdGlXnyUFtS6wusFJKnS1dAtgz3gi6HhUxLuT5vbEBubeI/O7a1w8bdGuLSKGDpk9Hg64q7+IOpXPr1D+JS0nnlWEduLpzQ283qcTlVk5ZtGjR3x7bnZzG1ROXUTMshK/Hnk/1sJC/HbMnOY3v1u/nu/X72XHgOEEBhi6NarA67jC1wkP4v6vPZUDbgvM0lFKq7GnQ9UyZBl1XEeN0YISIzMy3/12gvYj0deMcU4Fu+VfmyBd0Y4FQ4C/gBRE54wBpDbqqPFsde5gxn6zCIcL7o7rSo1ktbzfJK1buSWHk5BV0alSdabefR2hQIAeOZvD9hgS+W7eP9XtTATivaU2GdIxi0Ln1qRkewqZ9qTw8cz1bE4/xj05R/HtwO2qE/z0oK6VUWdGg65myDroeFzEu8PwIIAF4XETeyre/NdAfWAmEADdiB173LWzsrzFmDDAGICQkpGtmZubZvjWlfM4PG/bz4JfriYqoxIe3dKdZ7SrebpJXfbtuH/d9sY5+rWuTleNk+a5DiED7BtUY0jGKKztEEVW94CgpO7b53YU7eHfhDqqHhfDCP9oxsH19L7wDpZTSoOspfwu69wCvYVfpSDnDsT9hiyoPKeo47dFV3pTtcJb4ogYiwv8W7+SVOTF0a1yDSTd1o2Y574WcPHkyAKNHjy7yuHcWbGf83G00iwxncMcohnSKormbHwA2709l3MwN/JVwlCs71OfZIe2oVSX0rNuulFKe0KDrGb8aumCMWQdsFpGRbrzWv4HhInJOUcdp0FXe8tJPW/ho2R76tIrkyg5RXHxOHapWOrtJT9kOJ0/N3sSMVfEM6RjFK8M6UCm4fFRWKMoll1wCwPz58894bGJqBnWrhRZrMl62w8l7i3YyYcF2qlUK5rmr2nNFB+3dVUqVHQ26nvHWZLT1IjIm375twKyiJqMZY84DVgD9RWSRG68zG4gQkYuKOk6DrvKGmaviGffVBno0rUlcSjoJqRmEBAXQv3Xtk6E3LOTM1f9EhN3JaayOPcyauMMs33mIPYfSufeiFjwwoFW5qqzgS7YmHmXczA1s3JfK5e3r8dxV7aldVXt3lVKlT4OuZ7xVXmwatqxYbhHj24F2IhJbSBHj3OdNAfqISKtCznk/sAdbkzcEGAU8BlwjIl8X1R4NuqqsrY5NYcSkFXRvWoOPbz2PAGNYG3+Y79cn8NPGBA4cy6RScAAXt6nLlR3q079NnZO9shnZDjbsTWV17OGT4TYlLQuAiMrBdGlUnWFdo7WXsQzkOJxMWrqLN+dtxxjo37oOgztGcVGbOlQOKf+96Eop79Cg65kyD7pwcsGIR8grYvxA7phdY8wiABHpl+/4qthJaM+JyCuFnO8RYDS2mPEJbOB9SUR+OlNbNOiqsrT/yAmGvLOM8NBAvr2n999KXTmcwqo9KfywIYGfNyWQfDyLsJBALmwZSdLRTDbvTyXbYX9mm9UOp2ujGnRtXINuTWrQLLIKAQEVswd34sSJAIwdO7bMX3vHgeNM/yOWHzcmcPBYJmEhgVx8jv2Q0rdV7QoxdEQpVXY06HrGK0HXl2jQVWXlRJaDa9//nT3J6cweez4t61Yt8vgch5MVu23oXbLtIA1qVKZr4xp0bVSDLo1rlPsJZp64/PLLAfj555+91gaHU/hzdwrfb9jPnE2JpKRlUTU0iAHt6jK4QxS9W0QSElSyEw+VUhWPBl3PaNDVoKvKgIjwz8/X8tPGBKbc1I2Lz9HFB8qzHIeT33ce4gdX6D2akUNE5WAub1+PkT0ac27DCG83USnlpzToekaDrgZdVQZyy1o9OrANd/dr7u3mqDKUleNk6faD/LAhgV82J5Ke5aBr4xrccn4TBravV+Ll5ZRS5ZsGXc9o0NWgq0rZ3M2JjJm2mn90iuKN6ztpJYRS8NZbdv2Y++67z8stKdqxjGxmrtrLx8v3EHsonXrVKnFjr8YM7x6tNXmVUm7RoOsZDboadFUp2pp4lKETf6dlnSrMuLOXTkwqJUOG2HVhvvvuOy+3xD1Op7Bo2wE+WraHpduTCQkK4KqOUdzSuwntonRYg1Lq9DToekaDrgZdVUpS0rIY8s5vZOU4+e6fF1AvopK3m6R80PakY3y8fA+zVu/jRLaD85rU5JbeTejepCahwQGEBgUQEhigVwKUUoAGXU9p0NWgq0pBtsPJqCkrWBt/hC/v7EWn6OrebpLycaknspm5Kp6Pl+8hPuXE3x4PDbKhNzQ4MO9+UCD1IipxdecGXNquLqFBesVAqfJOg65nNOhq0FWl4MnZG/l0RRxvXN+Rqzs39HZzyr3x48cD8PDDD3u5JWfP4RSWbD/I3pR0MnOcdst25N3PcZKZ4/o628mWhKPsO3KCGmHBXN25Idd3j6Z1vaJL1yml/JcGXc+ceY1RpZRHPv59D5+uiOPOvs005JaR5cuXe7sJJSYwwNC/dR23j3c4hWU7kpmxMp5pf+zhw2W76dyoOsO7R3NlhyjCQ/XXvFKq4tIeXe3RVSXE4RRe+WUr7y/excVt6jDppm4EVtCVypR3HDqeyey1+/hiZTw7DhwnPCSQwR2juK57NJ2jq+s4X6XKAe3R9YwGXQ26qgSkpmdz7xdrWbztIKN6NuKZK9vpKljKa0SENXFHmLEyju/XJ3Ai20GrulUY1rUh/+jUgDrVdGKkUv5Kg65nNOhq0FVnaceBY4z+ZDXxKek8e1U7RvZo7O0mVTgvv/wyAI899piXW+J7jmVk88OGBL5cFc/auCMEGLiwZW2GdmnApW3rUTlEJ7Ap5U806HpGg64GXXUW5v+VxP0z1lEpOID/jepK9yY1vd2kCmn48OEAfPHFF15uiW/bdfA4X6/Zx+y1+9h35ARVQoMYdG49runSkO5NahKgQ22U8nkadD2jQVeDrioGEWHiop2MnxtDu6hqTLqxG1HVK3u7WUq5xekUVuxO4es1e/lpYwJpWQ4a1qjM0M4NuLpLQ5pG6t9QpXyVBl3PaNDVoKs8lJ6Vw7iZG/hxYwJXdYri5aEd9PKv8lvpWTnM3ZzErDV7WbYjGadA1dAgBPuBDnDdx3VfTt6vUy2UZ65sx4C2db3SdqUqIg26ntGgq0FXeSA+JZ3Rn6wiJukYjw1sw5g+zXQmuw94/vnnAXj66ae93BL/lpiawffr95OQmgGAMZD73W0MJ7/Xjes/i2MOsjXxGEM6RvHvwW2pVSXUK+1WqiLRoOsZDboadJWblu88xNhPV5PjFN4e0Zl+HtQ6VaVr1KhRAEyfPt3LLalYsnKcvLd4J28v2E7VSsE8O6QdV3aorx/+lCpFGnQ9o0FXg646AxFhytLdvDxnK01qhTH5pm40q13F281SymfEJB7jka/Ws35vKgPa1uX//tFeS5gpVUo06HpGg64GXVWEoxnZPDJzA3M2J3JZu7q8em1HqlUK9nazlPI5OQ4nHy7bzWtztxEaFMDTV7ZlWNeG2rurVAnToOsZDboadEvPxo2QlARhYXYLD8+7HxYGISF24J+P+mv/UcZ+upr4wyd4/PI23H5BU/2j7aOeeeYZAJ577jkvt0TtOnicx2Zt5M89KfRpVZuXhp5LA61IolSJ0aDrGV0EXZW85GQYNw6mTi36uICAvNDbtCmMHg0jRtivvWzmqnie+mYTEZWD+WJMT62P6+Pi4+O93QTl0qx2Fb4Y05PpK2J5+eetXPr6Yh4Z2IZO0dXJcTrJcQg5TtfmcJLtEBxOIcdp77epV5X2DSK8/TaUUuWE9uhqj27JEbHhdtw4SE2Fhx6CK66A9PSit7Q0WLYMNm2CGjXg1lvh7ruhRYsyfwsZ2Q7+891mvlgZT69mtZgwojO1q+pMcqWKIz4lnSdmb2Tp9mSPnndhy0ju6d+CHk1r6lUUpQrQHl3PaNDVoFsy/vrLhtMlS6B3b3jvPWjf3v3ni9jnvvsuzJ4NOTkwcCDccw9cfjkEln6d2rhD6dz96Wo27z/KPf2b8+CA1gTqSlFKnRUR4Y9dKaRn5RAUGEBQgLFb7v1AQ3BgAIEBhgBjmLMpkQ9+20Xy8Sy6Na7BPRe1oF+r2hp4lXLRoOsZDboadM/OiRPwwgvw6qtQpQq88grcdpsdllBc+/fD5Mnw/vuQkGCHNdx1F9x+O9SqVXJtz2feX0k8+OU6Aozhjes7clEbLYDvTx5//HEAXnrpJS+3RJWEjGwHM1bG8/7inexPzaB9g2rc068Fl7Wrp8sUqwpPg65nNOhq0C2+X36BsWNh1y648UYYPx7qlGBt2exs+OYb28u7eDGEhsLIkfDGG1CtWom8RI7Dyfi523hv8U7ObRDBxJFdiK7p/THCyjNjxowBYNKkSV5uiSpJWTlOvlm7j/8t3snu5DSa1w5nbL8WDOkURXDgWXyYVu5LS7MTi9etgx07oG5d2/mQu9Wo4dOTissjDbqe0aCrQddzCQnwwAMwYwa0agX/+x9cdFHpvuamTTBxou3lHTPGvmYJeP6Hv/jgt92M7NGIp69sS6VgXcpXKV/jcAo/bkxg4sIdbE08RsMalbmrb3Ou7daQ0CD9mS0xSUk20K5da2/XrYNt2/LWfw4NhczMU59TrdqpwTd3693bhuCS4nSe3ZXCckSDrmc06GrQLZoI7N6d94tv7Vo7ljYrC554Ah591P7yKysPPQSvvw5Ll8IFF5zVqfYkp3HJ64sZ1rUhL1/ToYQaqFQRcnIgSIvdFJeI8OuWA7yzcAfr4o9Qt1ood/ZpzojzGlE5RAOvRzIyYMUKWLQI/vjD/n5PTMx7vEkT6NQpb+vcGaKj4ehR+zehsG3PHjvBGGw5ydGjbadIo0bFb+fatfYq3owZ9mcnMrLwrVatvPtdu0JE+a3coUHXMxp0Nejmyc62k8pyA21uuD161D4eGAht2kD37jbktmxZ9m1MS7OT3CpVsm07i5B9z2drWLDlAIvH9dNVnPzcww8/DMD48eO93JIivPce3HuvnVz50ENw4YV6ybeYRIRlOw4xYcF2/tydQmSVEEZf2IxRPRsTHqofJAqVmWmD7cKFNtwuX273GQPnnmuDbG6g7dCheL2xInDgAMTEwJQp8Pnndv+IEfDII+5PUHY44IcfbMBdvNjO/xg50obn5GS7HTqUdz819dTn164Nb70Fw4eXy58xDbqe0aBbkYNuUpIt6/Xbb/Z23TrbUwtQuTJ07HjqL7/27e1+b/vlF1uR4Zln4Nlni3WKdfFH+Me7y7j3ohY8eGnrEm6gKmv33HMPAO+++66XW3IaL71kPxz26GHHOR46BN26wYMPwrBhEKyr7RXXil2HeHvBDn7bkUyNsGBuv6ApN53fRFcwzA22ixblBduMDBv8OneGfv3sduGFUL166bQhLs5egZs82fb0XnGFDbyn+5B3/Dh89JENqTt32p7ge++1E5GLamNWFqSk2NC7d6/927Bypf1QOXGi7Z0ubTt32r9NVarATTeV6ktp0PWMBt2KEnSdTtiyxQba3G3nTvtYaKjtpe3ZE7p0scG2VasyKelVbKNGwZdf2l7ndu08eqqIMHzSH+w4cJzFj/SnivYAqdIiAo89ZquRjBxp/4hnZ8O0aba3KiYGGja0f8xHjy69wFEBrIk7zDsLdrBg6wGqVQrilt5Nua13E6qHhXi7aWXL4bDB8qmn7AcqY+zv9PzBtiTHzrrj0CEbOCdMsGG0Z08beK+6yo67jY+Ht9+GSZNs72zPnnbIw9ChxRvq43DYScxPPGF/Bp9/3v6MleSwobQ0+wFizhy77dhh9195JXz/fcm9TiE06HpGg255DroOh71c+vPP8PvvcPiw3V+7tp0okLt16VK242xLwsGDcM450Lq1Ha/rwSSFBVuTuG3qKp67qh039WpSem1UFZvDYauSTJpkb99++9TvU6cTfvrJ9ngtXGh7gm6/He67z07mUcWyaV8qby/Yzi+bkwgPCeTGXk0Y0LYOrepWpWp57+VdtMh+/2zYYEPt/fdDnz5lH2xPJz3dLio0frwd09uqlb1S+O239vFrrrEBt2fPknm9uDhbi/2HH+y43cmTbW92cYjA5s15wXbpUtuTHBYG/fvbq4wDB5bJQkcadD2jQbe8Bt0jR+CGG2zIbd3aTtzKDbYtW5aPcUvTptlLRO++a4OEGxxO4fK3lpDtEOY+0EdLFJUT999/PwBvvvmml1vikpVlvzdnzLC9Si+8UPTP3Jo1tof3iy9sAB46FP7xDxsEWrbUnt5i2Jp4lHcW7ODHjQkniwY0rFGZ9rVC6cZR2p84SLOjiUQmxhOwa5e9wtW9ux1b6m8f/HfvtitSzpoFjRvDa6/Z7yFf/T2fk2Pb+sortu233w7/+tfZTVo7HRH46it7/uRkG6SfffbMS80fP24/MKxbB6tWwdy5sG+ffax9extqL7vM/m2tVLZzPDToekaDbnkMulu32ktCu3bZXqS77vJ2i0qHiP1F88cfdhJdw4ZnfMqXK+N5ZNYGJo7swqBz65dBI1VZ8Kmgm55ux93+/LNdSMU1Uc4te/fCO+/YMnpHjuTtj4zMC725W6tWtveoSpWSfw/lxdGjHH3/A46uXIvs3ElY3G6qH0oiUJwnDzkeUpmk2g3JrF2HtuuWIUOHYnJn+Pu6tDQ7/nv8eDvU7PHH7URHX5hL4WsOH7ZVgiZPtldM3nsPLr3UPpaY+Peyatu355VVq1Urr9f2ssvc+vKiucgAACAASURBVFtTmjToekaDbnkLuj/+aHtyQ0Ptp9g+fbzdotK1a5f9dD1ggF1coogejBNZDvqPX0S9iErMHnu+LimqSl5qKgwebCd4vv++HXdbHJmZtodx+3a7bduWdz+3VylX8+Zw7bX25/7cc8/+PZQHR47YD/lvvGEDTmSk/Xdq0QKaNye7aTP21ojiryq12ZAZQkzScf7af5RBC2fyn18nceya66g64zPfnacgAp99ZoPbvn12/PfLL3s9gPmFJUtsLfaYGDjvPIiNtROzczVtmjcBO7e0WsOGPtU7rkHXMxp0y0vQFbGf7J96yv6Azp5dOpeBfNH48fay3cyZtiftNN5duINXf4lhxpie9GhWOksJqwrs4EHb27NpE0yfDtddVzqvk5ZmJ77kBt/Fi2H+fDsmuF07W8ppxAho1qx0Xt+XHT5sZ+y/+ab90DFkCDz9tK1wcQZOpzBjVTwHnvgP9/36Eesvv5bW33xGpRAf69ldtcpOrFq+3L6vt96C88/3dqv8S0aG/WAwZ46d65EbaDt29IthQhp0PaNBtzwE3bQ0uO02W4VgxAg7xuxM44/Kk5wcW7Zp/347hKGQiRcpaVn0fWUhPZrVZMrN3b3QSFWavF5eLD7eXgaNjYWvv7aXOMvSwYP2g97nn9veZLA/EzfcYAN3vXpFPz872/Zq7d9vL+N26FA2JZlKyqFDNtxOmGDrfl99tQ24xZh4lHw8k7U3/ZMBs6cws/dQ6nzwP/q2LsbS5rmhOzvbdjo0amQXXGjU6MxLmIvYntotW+xQtC1b8u4nJtpleF96CW6+WVcLq4DcDbrGmLHAOKA+sBm4X0SWFnF8X+B1oB2wH3hFRN47m3P6Ag26/h509+yxk1Y2bID//teOB/ShSyxlZs0aexnqttvsLPcCnv1+Mx//vodf7u9Dy7pVPTt3RoYtFzN/PgwaZC9N6x8Xn+LVBSO2brU9uUeO2KFDZ7li31mLjbWT4D7/3I41DAiwS3QPHWoD1P79dhnv/LfJyXnjEcEOfXr8cXtpvIwn2ngkOdlWrXj7bTt5aNgwe1WrY8ezO68I+2+7m6ip7/NOr+vY+s9HeebKtu4tLCNiOx3uu89+AAkIsB/G84uIyAu/uZvIqYH2+PFTjz/nHLtgT8eO9vfcmcKyKrfcCbrGmOuB6cBY4DfX7a1AWxGJK+T4psAm4ENgInCB63a4iMwqzjl9hVtB1xh6iLCiDNpT5vw66C5aZMfmZWfbP2qXX+7tFnnXuHF2GMOiRdC378ndcYfSufj1RVzTxYOlfkVsreFPPrF/tFJTISTEzqZv396GgOuu848JK6p05OTY2e3/+Q9UrWovg3bp4u1WnWrLFvu74fPP8+p8BgbaHsH69SEqyt7mv1+zpq1k8vnndkzrxIl2DLy3paXZVbcOHrS3S5bYtqWn25/Fp55yf+Utd4jgGHMngVMm83q/m/mozwgevqw1o3o2JjDgNJ0Je/bYCjA//5xXzqpDB9sLGxd36hYfn3f/0CH7/IYNbZg955y8YHvOOfb/V0XswFCFcjPorgA2iMjofPu2A1+JyOOFHP9fYKiItMy3bwrQTkR6FeecPkNEzriBOEHWg/wLpIY7z/GXLSwsTPyO0yny9tsigYEibdqIxMR4u0W+IS1NpFkzkVatRE6cOLn7X5+tkdZP/SSJqSeKeLLL9u0izzxjzwMi4eEiN90kMm+eSEaGyPTpIm3b2seaNxeZPFkkM7MU35TySWvWiHTubL8Prr5aZN8+b7eoaE6n/d5OSBDJyXHvOXPnirRoYd/j8OEi+/eXXtv27rU/YxMmiIwbJ3LzzSKDBol06ybSuLFI5cq2Hfm3gACRG24Q2by5dNolYv+tRo4UAflk+APS+NEfZPDbS2Xj3iOnHpedLTJ+vEhYmP2d8eab7v87i4gcPy5y7FjJtl2VW0CaFJnZCAFygGsL7H8XWHya5ywB3i2w71ogGwguzjl9ZXO3R9cJ5B6YCcwGpoiwsETSthdFR0fLtGnTSvU1TE4OUkI9f8bhoMWECTT47juSzz+fLU88gSNcx6TnqrFqFR3HjWPPjTey57bbOJHtYMeB49SpWom61QqvjRl09Ch1Fi2i7ty5RGzejBjD4a5dSRowgOQLL8RRsFSP00nksmU0nj6dqtu2kVG7NvHXXUfClVfi9OXLvOVY7pCFhz0p5VUMAZmZNJk6legvvySrenW233cfyeW4sklAVhbRn39O408/xRkSwu7bb2ffkCHFq0bgcFA5IYGw2FjC4uIIj4219+PjCcp3Vc0ZHExWjRpkV69OVvXqZEdEkF2jhr3v2rIiIsisV4+smjVL8N0WzjgctH32WWovXcraex/gj579cTiFehGViKwSQtWYGFq99hpVt28n+fzz2X7vvWTWrVvq7VIVV//+/bOAjfl2TRKRk2P2jDFRwD6gr4gsybf/GWCkiPxt3XtjzDZguog8l29fH2AxEAUYT8/pK9wNuuOB64Dc2iW5T9oFfABMFSGxVFpYykp96MLhw9Crly1WPXbs2V1+On4crr/erqY0bpydNapjRf/u5pvhs8+Qt9/mg992c/xgCnd3jiQ0Pc1OVMm/pabay41ZWXbG+k032Qk87pTpEYF58+D//s9eRq1d2xYjHzvWjqlTZebxx+1Vs5deeqn0XmTRIlsubMcOuOMOW+zeV1acKm3bt9sVpubNs5fk33vv9JUMxDUOeMMGu23caLeYGFs2LVdUVN4l+vybL16mz8qycyHmzCH9g494MKg9S9fs4p2ts+k3dwambl07TtiXF2lQ5caZhi5o0D2VR5PRjOECYARwDZA7DVUAB/At8H8irCvpRpamUg+6hw7Z8PTTT3Z5wylTile+ZP9+u4b2hg12/Nydd5Z8W8uLQ4dsPdGEhLx9AQF28ka1ajaE5t6vVs1OBBkxwpaXKe4fqd9+gxdftGPzIiLgmWds6NU/ev7vyBH7wXLKFFuLddIkO7mrohHXJKv777cVGu6+G5580i5ykRtqc4NtSkre8xo1sj+PbdueGmj97cPgiRNwxRWwZAny+OOkvT+FsINJ/Hj+EDpMm0jjZlHebqGqINwIuiFAOjBCRGbm2/8u0F5E+hbynCXARhG5J9++a4HPgDBs0PXonL6iWFUXjCEa+AToiw26xnWbA1wnwrcl2cjSVCaT0ZxOOzP48cdtT+GMGbZCgLs2brSz/Y8csX9oKvqkMzc4jqRy5yvfcyiwEjMevoyQalXKJnSuXWvLGv34o+0BmjrV//6gqzyzZ9uezAMH7IpT//53xSrdV5jUVPs9/u679ndbripVbKA991w7AatDB3vfD+qSuu34cVtGbvlyaN+etU/9l1tjgnA4hbeGd+KiNjpkQZU+DyajrReRMfn2bQNmyekno10tIq3y7ZsEnCunTkZz+5y+wtMe3QHAXcCVQBA24AKsBaoBzYG/RCjBqa+lq0yrLvzxBwwfbntn//tf2zNypvA1d64tmVO1qg1PnTqVTVv93MxV8Yz7agPv3NCZKzuUcU+LiK2fOW6cXWVn1ixdsaqU3XrrrQB89NFHnj9ZxM7kj409ddu40Q5X6NQJPvjA9yoqeNuaNXYoQ+vWeXV3K8JQqqNH7fsePBhCQohPSefOaavZkniU+y9uxb8uakHA6aoyKFUCPCgvNg1bAmwZNrvdjq2iEGuM+QRARG5yHZ9bXmwy8D7QG1tebIScWl7stOcs6fdZUtwdozsOGAPkLrVjACfwHfCGCEuNIRw7fiNMhJBSam+JK/PyYocP2xqI33xjhyJMnWrX0S7MBx/YIQrt2tmQq8s7uiUzx0H/VxdRu2oo39zT23tL/f72my17dOSILTM0cqR32lEce/faFbcWL4aVK+14zDvusFcifHA4xjPPPAPAc889d/qDsrPth83ffoPdu/MCbVycvSydX9Wq0LgxjBoFDz4IwcGl2Hrl705kOXhy9ka+XruPi9vU4fXrOxFRWb9nVOnwcMGIR7CLO2wCHsgdX2uMWQQgIv3yHd8XeIO8BSP+K4UvGFHoOX2Vp1UXDHAUW1B4ggh7Chy3FWgpgo8uEP53XqmjKwLvvGMXd6hTB774Anr3znvc6bSXBl980Rai//JLLQ7ugU9XxPLk7E18ctt59GlV27uNSUy0EwiXLLGXwF9/3dbj9TWxsXnBdvFi2LnT7o+IsKtL/fmnrVfavr0NvKNGnf4Dmq8QgW3bbO/b3LmwcGFeEf7atW2QPd1WvbpPBnrlu0SET5bH8vwPfxFdM4z3RnWldT0PF6dRyg26BLBnPAm6u4C3gQ9EOH6a46KAYBF8tgu7IK8uGLF6te3xi42F55+3qxBlZ8Ott9pi7XfcYQuia2+S27JynPQfv4g61UL5+u7zvdebm192NjzxhF3MokcP+Oor7/bOHzliKwds3JgXbPfssY/VqAF9+tgFN/r1s5ekAwPt5dovvrATslautGF96FD7Pdq/v+9csj50CH791QbbefNsby3YSWQDBtjxlf37l69xo8qnrNyTwthP15CWmcMrwzqU/dApVe5p0PWMu0H3KuA7ETyfuebjvL4yWmoqjBlje20vvdReQl261K5j/uij2qvkoc//jOPxrzcy9dbu9CvO+vSladYs+yEmNNSGxosvLr3XSkmxYXb7dnubf0tOzjsuMvLUYNu+/ZlD6/r1dljN9Ol2KE7TpnD77XDLLdCgQem9p9NJSWHUJZdAbCzTDx+2PbnVq9vKCJdeagNus2ZnPo9SJSTpaAZ3T1/NmrgjnNe0Jhe1qcNFberQsk4V3/jwrfyaBl3PuBt0I4AIIF2E5Hz7I7FlJ1JFSC21VpYirwddsH+YJ02ya6OLwMcf20lryiPZDtubWys8xLtjc4sSE2N7QrduhRdesB9mPOkNFbEhdv9+2Lcvb8v/9Z49NoDmMgaio+1yri1aQMuW9rZ1a7sVtzc2I8NWJZgyBRYssOcZOBBuvBGuugoKLrRR0v76CyZMgE8+4fkTJyA6mqdHj7bhtmtXXZ5ZeVVWjpP3F+/kp02JbEk4CkCD6pW5qE0d+repTa9mkVQO8ZtRfsqHaND1jLtBdxbwD+ABESbk2/9P4C1gtgjDSq2Vpcgngm6u7dvzFi5QHpuxMo5HZ23kw1u6+XaZn+PHbS/+55/boQz16tlx2U4nOByF38/OtiWu9u+3AbOg2rVtAf4GDWzd0tww27Kl7XEt7RXbdu6EDz+EadMgPt5O5ho2zIbevn1LbmiD0wlz5tiqFnPn2vc1ahTce69WtlA+KyH1BAu3HmRhzAGW7UgmPctBaFAA5zevRf82dejfug7RNSt42TrlNg26nnE36O7FzrBrJMK+fPujgL3APhGiS62Vpcingq4qtmyHk4teW0SNsBC+9dXe3PxEbB3SKVPs/YAAOxY2IKDw+4GBNsw2aJC35Qbb+vXtcAhf4HTaMb/TptmxyMeO2fHII0fa0FvcD3HHj9srHRMm2AlmUVF2ct+YMXb4hVJ+IjPHwYpdKSzYeoCFMQeIPZQOQLPIcM5rWpPuTWpyXtOaNKxR2fd/jymv0KDrGXeDbia2bm41EdLy7Q8HjgFZIpRyl1Hp0KBbPuTWzZ1yUzcuaevDvbkVSXo6fPedDb2//GJ7qDt3toF36FAID8/rsc7dRE79Oi0NPvnEfiBITYXu3W396WHDTqleMdw11OeLL77w1rtVqlh2HTzOgq0HWL7zECv3pHA0IweA+hGVTobe85rWpEXtKlqfVwEadD3lbtBNBmoAw0SYnW//1cAsIEUEv+xW0aDr/3IcTi5+fTFVQoP44V8XaC+IL0pKshPwpk2z1UY8ERhog+1990HPnoVO0Hz55ZcBeOyxx0qitUp5hdMpxCQdY+WeFP7cbbcDxzIBqB4WTLfGNenVvBbXdmtItUpajaei0qDrGXeD7lzgEiAVeA3YApwDPIidpDZfhMvcflFbcHgcdjjEZuB+EVl6mmOnAjcX8tAp/6NdhY5fJ6/Q8SsFCx0XRoOu/5u1ei8PzVzP+zd25bJ29bzdHHUmW7bYyWtOZ97wjPybMacO2+jTx06mU6qCERHiUtJPht6Ve1LYcyidiMrBjOnTjFt7NyEsRCddVjQadD3jbtAdCnwFfysvZlz7TunpLfpc5npgOnYJud9ct7cCbUUkrpDjI4CC07eXAUtE5FbXMblL132IXbLuAtft8Nyl605Hg65/cziFS15fTKXgQH66V3tzlVLl26Z9qbw+bxsLth4gskoIY/u14IYejagUrBUcKgoNup5xK+gCGMN4bA9uQeNFeMTtFzRmBbBBREbn27cd+EpEHnfj+b2xAbm3iPzu2vdfYKiItMx33BTs+su9ijqfBl3/9s3afdw/Yx3vjerCwPb1vd0c5SXXXHMNALNmFfm5VqlyY3VsCuN/2cbyXYeoH1GJey9uybCuDQkO9JHFW1Sp0aDrGbeveYjwsDHMAIYAdYEk7CISK909hzEmBOgKjC/w0FzgfDdPMxrYnBtyXXq5zpHfL8DNxphgEcl2t43KfzicwoQF22lTryqXttUhCxVZr15Ffp5Vqtzp2rgmn4/pye87knl1bgyPf72R9xbv5P5LWjKkYwMCdeKaUoAHQRfAFWrdDraFiAQCsSE5vyTsGOAiuYYxXAcU7PmtB8wv5JxBrtdMKHCeMcAYgJB8M7eVf/lhw352HUxj4sguOhu5gnv44Ye93QSlvOL8FpF83bwWC2MOMP6XbTwwYz0TF+7kwQGtGNC2LkHaw6sqOLeDrjEEAYOA1vx9zCwiPFeC7TqdUUAAMO1sTiIik4BJYIculEC7VBlzOoW3F+ygVd0qDNQJaEqpCswYw0Vt6tKvVR1+3pTI6/NiuPvTNQQYqFO1EvUiKlE/wt5GRVQ++XX96pWpUzVUhzso/2CMw3VPEHE7v7p1oDHUARZhQ+7puBN0kwEHduhDfnWBRDeePxqYJSIpBfYnnuacOa7XVOXMT5sS2HHgOG+P6Ky9uYohQ4YA8N1333m5JUp5T0CA4YoO9RnYvh6/bE5ka8JRElIzSEjNYFvSMRZvO0h6luOU5xgDretWZUyfZgzuGKWhV/myYv2xdzcRPwu0KeJxt3pFRSTLGLMaGADMzPfQAGw93tMyxpwHdATuL+Th5cDVBfYNAFbp+Nzyx+kUJvy6nRZ1qjDoXJ2ApuDiiy/2dhOU8hmBAYZB59b/2+9HEeFYZg4JRzJISD1BYmoG+1MzmLs5kQe/XM/r87ZxV9/mDOvaUKs4KF8Uh5t5Mz93y4vtBJoAU7GlwAS4D/iX6/7LIkx16wVtebFp2LJiy4C7gNuxFRJijTGfAIjITQWeNwXoIyKtCjlnbnmxycD7QG9sebERWl6s/PlpYwJjP13DW8M7cVWnBt5ujlJK+TUR4dctB3hn4Q7WxR+hdtVQRl/YlBt6NKZKqNbp9TVadcEz7gbdDCAYO+krCftzEWgM7YCNwDMivOD2i9oFIx7BLhixCXhARJa4HluEfYF++Y6vip1Q9pyIvHKac/YF3iBvwYj/6oIR5Y/TKQyasJQsh5N5D/TVmcVKKVVCRITluw4xceFOftuRTETlYG45vwm39m5C9TCduO0rymXQNeZ17NjbhzDGdnSKfFIip3Yz6KYBlbBh9wR2yEM91/2jwF4RGpVEg8qaBl3/MmdTIndNX80b13fk6s4Nvd0c5SMuv/xyAH7++Wcvt0Sp8mFt3GEmLtrJvL+SCAsJZFTPxtxxQVPqVKvk7aZVeOU06DoBJyJBp9wvAe6e5BDQALvcbyLQEPgUyHA9XqMkGqNUUUTs2NymkeEM7hDl7eYoHzJ48GBvN0GpcqVzoxpMvqkbMYnH+N+iHUxZuovJS3dRNTSIqpWCqVopiGquW7sFn7ytVjmILo1qcE79at5+G8p/OAGDMbnfNCV2udbdHt15wEVAD+zY3JGcOiD4NxH6llSjypL26PqPN+Zt461ft/PatR25pqv25iqlVFmJPZTGN2v3k5KWybGMHI5m5HAsI5tjGTkcy8zmeEYOxzJyyHHmRYMujaozskdjruhQXye3laBy2qO7H1st6wi281SA2NMcLYg0d/vUbgbd64D+2F7cROwkstquhw8CA0VY6+6L+hINuv5h4qIdvDInhmFdG/LKNR20pJhSSvkYESEj20lKehY/b0zgsxVx7EpOI6JyMMO6NuSGHo1oXruKt5vp98pp0P0UGOHm0YKI25+c3Aq6f28P1bDBNwdYJsIRj0/iIzTo+r4pS3fxwo9buKpTFK9f10knoKm/ueQSu7Di/PkFF0hUSnlL7uS2T1fE8cumRHKcQq9mtRjZsxGXtq1HSJDW7C2Ochp06wATgC5AC2yPbtxpjxdp6vapzxR0jSEU+Mv15RUibHX35P5Ag65vm7Z8D09/u5lB59ZjwvDOupylKtTkyZMBGD16tJdbopQqzMFjmXy5Kp7P/4xj7+ETRFYJ4bpu0VzfPZrGtcpXZitt5TLo5mcno3nUa1vk6dwcunAEqApUFiGrJF7YV2jQ9V0zVsbx6KyNXHJOXf43qouu2KOUUn7O4RSWbD/IZyvi+HVLEk6xK7NddE4dLm5Th86NauhVuzMol0H31PJiN7vul2l5sa+wK4/1FGFlSbywr9Cg65u+XrOXh2aup0/L2ky6qSuhQTqRQSmlypP9R07w08YEFmw9wJ+7U8hxCtXDgunfug4XtalDn1a1iagc7O1m+pxyGnRLrbyYu0H3AmA2kAo8CazD1tA9SaSIsRQ+TIOu7/l+/X7u+2ItvZrX4oObu+tsXXVG/fr1A2DRokVebYdSqniOZmSzdFsyv25NYlHMQVLSsggMMHRvUoOL29TlwlaRBAcGkJ7p4HhmDulZOaRlOUjPtLdpmTmkZeWQnumgelgwLetWpVXdKjSNDC93HSXlNOjmYEuK1cBWXijzoQt2vMTpiYjbNXl9igZd3zJnUyL3fLaGro1qMPW27oSF+OW3lSpjU6dOBeCWW27xajuUUmfP4RTWxR9hwdYkft1ygK2Jx9x6XkhQAGEhgRw9kU1ulbPAAEPjWmG0qlOVlnWrlIsAXE6DrtfLiznPcIiI4JffMRp0fceCrUncOW017RtEMO32HrrGulJKKfYdOcGfuw8RYAzhIUGEhQYSHhJEeGggYSFBhIcGERYSeHIeR2aOg10H09iWdIwdB46zLekY25OOs+dQ2ikBuFHNMKJrhhFdo/LJ+41qhhFdI4yIMN8dMlFOg653y4sZw0dnfFXhVndf1Jdo0PUNS7Yd5I5PVtG6blWm39FDx2Upj2RnZwMQHKzfN0qpwhUMwDsPHic+5QTxh9M5kp59yrFVKwWdDL2NI8O4qVcTGlSv7KWWn6qcBt385cVye2vLprxYeadB1/tWx6YwcsoKmtQK54sxPakeFuLtJik/o2N0lVJn42hGNvEp6Tb4pqQTfzid+JR04lxbWEgQ46/tyIC2db3d1PIZdPMr4fJiem1Yed1bv+6gRlgIn97RQ0OuKpY77rjD201QSvmxapWCaRcVQbuoiL89tic5jXs+W8PoT1Zx+wVNeXRgG13sonT1L8mTuTt04cMzHCIi3F4yTSpb2qPrXcczc+jy3DxuPr8xT17R1tvNUUoppf4mI9vBiz9t4ZPlsXSMrs47IzoTXTPMK20plz26xjQCQCTu5P2iiLhd6askqi4YdDKaKqY5mxK4a/oavhjTk57Nanm7OcpPpaenAxAW5p0/PEqpiuGnjQk8+tUGMPDqsI4MbF+vzNtQToNuwTq6RVb68qTGrid97+Y0m1LFNn/LASIqB9OtcQ1vN0X5sUGDBjFo0CBvN0MpVc4NOrc+P957IU0jw7lr+mr+891mMnMc3m5WeWEK3C9qc5u7ibjg7LYgoBnwNNAZuNKTF1UKbK3EhVsP0K91bYJ0eV91Fu6++25vN0EpVUE0qhXGzLt68fLPW/lo2R5Wxx7mnRs607hW+epkLWOfkNeLm//+WTurqgvGUAVIBr4RYXhJNaos6dAF71kdm8I1/1vOhBGdGdIxytvNUUoppTzyy+ZExs1cjwi8fE0HruhQv9Rfs1wOXShFZ9uNFoRN3QNLoC2qgpm/5QBBAYa+rWp7uynKz6WmppKamurtZiilKpjL2tXjx3svpHmdKtzz2Rr+/e0mKnrZVl/j1tCF01RdqAT0BkIB/QujPPbrliS6N6mpi0Oos3bVVVcBWkdXKVX2omuG8eWdvXj1l61UqxSMMTp9yWPGnKm6V36CiNuVvtwdo3sLhY+XyP2/+ZO7L6gUQHxKOtuSjvPUFdHebooqB+69915vN0EpVYGFBAXw5BVttTe3+G7BvXG5xnVciQfd3JMXlAl8DtzvwXmUYv6WJAAuOcf7q8wo/zd06FBvN0EppbQ39+yUyj9ecasuAGSKkFiSjVEVx69bDtC8djhNInU8vTp7ycnJAERGRnq5JUoppYoh/2poVYH3gSPAa8BeoCHwEBAJjPbkxG4FXRFiPTmpUkU5lpHNit2HuO2Cwj4/KeW5YcOGATpGVyml/JLI4pP3jZkI1AMuQGR3vv2Lge3AYOA7d0/t7mS0gcB5wFoRvs+3fwjQCfhThDnuvqiq2JZsSybbITpsQZWYhx56yNtNUEopVTKuc92eKLA/9+uheNCr6255sWeAf2PH5OZ3HPgPduEIpdzy65YkaoQF06WRroamSsbgwYMZPHiwt5uhlFJ+xxgTaox52xiTbIxJM8Z8Z4xp6MbzxhpjdhtjMowxq40xFxZ4fJExRgpsX7jRpFDX7SyMuRJjOmHMlcCXrv0elWpyN+i2cd0uL7D/T9ftOZ68qKq4chxOFsYcoH/rOgQG6KB9VTISExNJTNQpA0opVQxvAtcAI4ALgWrAD8aYwNM9wRhzPfAW8CJ2hdzfgZ+NMY0KHPoRUD/fdqcb7fkFOzGtJ/AtbFxilAAAIABJREFUsNp12wtbceEXd98YuB90w1y3VQrsr1rgcaWKtCbuCIfTs7lYhy2oEjR8+HCGD/fLxRmVUsprjDER2FJd40RknoisAW4EOgCXFPHUB4GpIjJZRLaIyL+ABKDgeuzpIpKYb3Nn3YV/ATHYsFtwiwE8qifpbtWFBKAR8CTwz3z7n3Dd7vfkRVXF9euWJIIDDX1a6ex4VXIee+wxbzdBKaX8UVfsUIC5uTtEJN4YswU4n0J6T40xIa7njS/w0FzXc/IbbowZDiQBPwPPisixIlskkoAxnYGbgIuAWkAysBD4BJEMt98d7gfd+djEf7cxXIpN1K2B5thu5PmevKgvqVmzps7ULkO1jh3nic6G1X8s83ZTVDlSqVIlQKsuKKUqhCBjzKp8X08SkUnFPFc9wIENkvkluR4rTCQQ6Dqm4HPy9wJ/BsRiO0PbAS9he4ovPWOrbJid5NrOirtB92XgeiAcG26bu/Yb7IS0l8+2Id6SkpJCv379vN2MCmFPchq3zFnEvwe3pV9vLS2mSk58fDwA0dG60p5SqtzLEZFuRR1gjHkBexW+KP3P8PhZKRC+NxpjdgErjDFdXEMkyoS7dXR3unpyP+DUiWd/AXeIsKs0GqfKF10NTZWWG2+8EdAeXaWUcnkTmH6GY+KwE74Csb20B/M9VhdYeprnJWN7gQv+Ma8LRS4ktsr1vJaAbwVdABH+ANoZQ3Psm0kSYWeptUyVO79uOUCrulWIrqlzF1XJeuqpp7zdBKWU8hkikszfhyP8jTFmNZANDMAONcBVWuwcbCWFws6d5XreAGBmvocGALOKeLlzsaE6wY23UGLcDrq5XOFWA67ySOqJbFbuSWFMn2bebooqhy65pKjJwUoppQojIqnGmA+AV4wxB4BDwOvABvLNvzLGbAXeEZF3XLteB6YZY/4ElgF3AVHAe67jmwMjgZ+wgbstdjnfta7jy4xb5cWM4VNjcBhz6sIQxvC0a/+00mmeKi8WbztIjlO0rJgqFbt27WLXLh1BpZRSxXA/MBuYgQ2hx4HBIuLId0xr7PAGAERkhut5TwHrgAuAQSIS6zokC7gYW7UhBpiArcpwSYHzljojImc+yLAbW16shQi78+1vAuwCYkXwy9lF4eHhkpaW5u1mlHv3fbGW37Yn8+eTl+hCEarE5U4o1TG6SqnyzhiTLiLh3m5HmTLGAJGIHDzjsQW4O3Shvuu24CDj3NISpytBoRQ5DieLYg4yoG1dDbmqVDz77LPeboJSSqmSYMzl2IoQfyDyNcbcCEwEwjBmLTAIkQPuns7dldFyi/P2KrC/V4HHlfqbVbGHST2RzSXn1PF2U1Q51bdvX/r27evtZiillDp7Y4GHgHCMqQy8iy1va7DLDT/nycncDbobXS8w1RhGGUNXYxiFXcNYXI8rVaj5fyUREhjAhS1re7spqpyKiYkhJibG281QSil19jq4bpcC5wFVgC3AD9gsepknJ3N36MJUoDfQAPg4336DDbpTPXlRVbH8uvUAPZvXIjzU4yIfSrnlzjvvBHSMrlJKlQO5vWL7gMtd998EvsJWhYjy5GTuLhjxgTEMBK4p5OGvRPjQkxdVFcfOg//f3p3HSVFd/R//HPZFBQEFFRVQgwsajBo3UDTgT8njboT4qMEkGkWjJtHENS4xLtEkEI0mrqjoA4YYEwxuGHEDEYmouCAqq7INOwzrcH5/3BppmumZ7pmeqeru7/v16ld3V926fbprBs7cPnXvKmaUrea8I7vEHYoUsVtvvTXuEEREJD/WA80JszwcQBhQ/YTNZbLrc+kslwUjvmfGmcCJRAtGAP9y32KyYJEtvBythnbs3qrPlfpzxBFHxB2CiIjkx2xgP+ANwuitA1MJVQUAWV+IBjkuGOHOU8BTqdvM2AY43X2LkgYRAMZ+vJC9O21L5+21GprUn6lTpwLQo0ePmCMREZE6ehK4Fb6etnYs7ksxOzl6ntPywbUqmjSjEXA8cA5hhLcFKNGVLS0rX8/kWUu56Og94g5Fitwll1wCqEZXRKQI3AFUAL2BGWyeZaEJ8BDVLzO8lZwSXTMOISS3A9i8QkblBWkiWxg3bREVm5y++2o1NKlfd955Z9whiIhIPoSVzO6MbqnbHwQezLW7GhNdM7oCZxPWLN6rcnNKkzXAM7m+sBS/sR8vYIdtm3PALm3iDkWK3CGHHBJ3CCIikkAZE10zLiQkuKmLRKQva+VAR3dW1UNsUsDWbqjg1WmL6L//TjTSamhSz6ZMmQJAz549Y45ERERyZlaRQ2vHPeuKhOoa3ktIZCuzlPXAWEJtxOfAOAAluVKV56bOY+W6jZxy4C41Nxapo8svvxxQja6ISIGqtxGxbDJiBx4GrnRnGYAZ+9VXQFIcRrw9hy7tW3FYt3ZxhyIlYMiQIXGHICIitTebLa/3ak9YEW0DYZGI9kBToJwcpxfLdgngHwKfmHGfGX2jFxOp0oyy1UycsYTvHbwrZipbkPrXs2dPlS2IiBQq9y64d8W9K3AGIen9PdAG952BNsAfo9b/m0vX1SW6twNzCMPJBuwIXAC8QJjEV6RKT70zh8aNjDMO6hx3KFIiJk2axKRJk+IOQ0RE6m4IYTT3ZtzDamjh/kagFXBXLp1lTHTducadLkAfwnQOy9mc9LYiGmI2Y64Zt+f2HqRYbazYxKjJczmm+4503K5F3OFIibjyyiu58sor4w5DRETq7qDo/ttp2w+N7g/MpbMaSxfcec2dCwjL/n4P+CehZqIy6d0ZyOl/GDMbbGYzzGytmU02s941tG9mZjdHx6wzs9lmdmnK/kFm5lXclGk1sFemLWLRynUMOGTXuEOREnLPPfdwzz33xB2GiIjU3aLofjRmozAbgtko4F+EQdZFmQ/dWtbTM7iznjDjwt/N2B4YSKiTyGmReTMbAAwFBhNKIAYDz5nZvu4+O8NhI4DOhNKJ6YSku2Vam3JgiyW4vHLIWxrMyEmz2XHb5hzTfYe4Q5ESoqV/RUSKxn2EJYCbA6embK9coOzPuXRWqyWA3VkaBXKfGd3IrTD458Awd38gev5TMzseuAi4Or2xmR0HfAfYw93Los0zqwzLfX4OcUieLVixlv98spCfHL0HTRpne52jSN2NHz8egCOOyOnvbhERSRr32wnfyP8SSP1mfi1wB+6/y6W7WiW6W8bDF8BvsmlrZs0ItRfphcQvknlk+BRgEvBzMzuXsBLbc8A17p46h29LM5sFNAamANe7+7tZvxGps1GT57LJ4cyDVbYgDeuaa64BNI+uiEhRcL8Rsz8SFi1rD5QBb+G+PNeu6pzo5qgDIRFdkLZ9AdA3wzHdgF7AOuB0oC1wN6E2+IyozTTCFGjvAdsClwFvmtk33X16Pt+AVG3TJuepd+ZwWLd2dO3QOu5wpMT89a9/jTsEERHJp5DUPl/Xbho60a2NRoSajLM8yuTN7BLgBTPr6O4L3H0CMKHyADMbTxjV/SlwaXqHZnYBod6XZs2a1f87KAETZyxh1uJyLu+7V9yhSAnq3r173CGIiEhtmf06p/buN2fbtKET3TKggnAxWaqOQKb62nnAl77lcPXH0f1ubD06jLtXmNk7QJVZl7vfD9wP0Lp1a6+qjeRm5KTZbNuiCSf02CnuUKQEvfrqqwAcffTRMUciIiK1cCNbroxWk6wT3Qa9Ysjd1wOTgX5pu/oB4zMc9iaws5ltk7LtG9H9rKoOsLAc1wGEJFnq2fLyDYyZOp9TD9yFFk0bxx2OlKAbbriBG264Ie4wRESk9izLW07iKF34A/C4mb1NSGIvJNTb/gXAzB4DcPdzo/ZPAtcDj5jZjYQa3aHAKHdfGB1zA/AWYeqx7QjlCgcQZnKQevbP975k/cZNughNYvPwww/HHYKIiNTeeSmPmwI3EZLaB4G5hClmf0y4zuu6XDrOmOiacVQuHbnzWnbtfKSZtScEuhMwFejv7pWjs7ultV9lZn0JF6BNApYCzwBXpTRrSyhF6ERYwe1d4Ch3fzuX9yC1M+LtOfTYZTt67NIm7lCkRHXr1i3uEEREpLbcH/36sdkthHzuW7i/l7L9H4SqgJwuBjL3qksizNhE9vUS7l4QF7ZtpXXr1r569eq4wyhYU79czv/c/Qa/OXk/zjm8S9zhSIkaO3YsAH37Zpq8RUSkOJhZubsX7/RGZl8Rrt3qgPvSlO3tCNd6LcA96wuCakpOc66FkNIyYtJsmjdpxEk9d4k7FClht9xyC6BEV0SkCLSN7h8glKxWli7cFG3fLpfOqkt0H017fhxhKPnNlBc9kpBd/zuXF5XisGZ9Bf989yu+u/9OtGnZNO5wpIQ9/vjjcYcgIiL58QZhbYVT2XIJYAiVBm/k0lnGRNd9c2GwGf8LnAsMcGdUyvYzgf8jJL9SYp6bOo+V6zZy5iG6CE3iteuu+hkUESkSPwVeA3aoYt9CqlgfoTrZTi9WeYVb+goVYwjlDVfm8qJSHEZMmkOX9q04tGu7uEOREvf888/z/PN1XkBHRETi5j4N6AHcAbwNfA5MBG4H9o/2Zy3bC8i6RPeDgd+lbL84ut89lxeVwvfFolW8PWMJvzy+O2HaYpH43H777QAcf/zxMUciIiJ15r4IuDofXWWb6H5KyK5vM+MXhIUYdgI6EOolPs1HMFI4nnpnLo0bGWd8q3PcoYgwYsSIuEMQEZF8MjsM6A/sSChZeJZaTBubbaJ7LfAPwkS9HaIbhLKFTcA1ub6wFK4NFZsYNXkux+69Iztu1yLucETo1KlT3CGIiEi+mN0HXJC29VrM/oL7xVUdkklWNbruPAscT6iRcEKC64TVyI5z16wLpeSVTxZStmodA7QSmiTE6NGjGT16dNxhiIhIXZkNAn5C1cv/XojZuZkPrqK7TAtGZH59WgHbA0vdKc/p4ATSghG5+9GwSXzw5XLGX3UsTRpnez2jSP3p06cPAOPGjYs1DhGR+lYCC0aMBw4DZgF/jO53A35GuGZsAu5HZttdTquZmdGEUKvb3p3ncjlWisOCFWt5ZdpCLjx6DyW5khijRo2quZGIiBSCHoSqgRNxn/r1VrNXgPej/VnLOlMx43vAl8AEYHS07WUzvjDjuFxeVArXix/OZ5PDaboITRKkQ4cOdOjQoeaGIiKSdM2i+7lp2+em7c9KVomuGb0JC0N0YHOdBIQV0boAZ+TyolK4XpteRuftW7LHDsX7rYkUnqeffpqnn3467jBERKTu5kT3d2EWlgM2awPcmbY/K9mO6F4dtU2fpLfyIrTDc3lRKUwbKzbx1ueL6b3XDpo7VxLlT3/6E3/605/iDkNEROruWcKA6nnAYsyWA0uAHxJKGnK68jjbGt3DqKyXgOkp27+I7nfJ5UWlML03dxkr122k9176iliS5Z///GfcIYiISH7cApxKuAANYNuUfTOB3+bSWbYjupXfU89O2942um+Zy4tKYXrt0zIaGRyxR/u4QxHZQps2bWjTpk3cYYiISF25LwYOBR4iLFC2EfgKeAA4HPcluXSX7Yjul4RlftNLFK6I7tMLhqUIvfFZGft3bkvbVjnVgYvUu5EjRwIwYMCAmCMREZE6c18AnJ+PrrId0X2BUC/xTOUGMz4hJLoe7ZcitmLtBqbMWcZRKluQBLrvvvu477774g5DREQSJttE9xZgMaFUoXKFib0Iye8S4Lb8hyZJMuHzxVRscnrtqURXkmfMmDGMGTMm7jBERAqOmTU3s7vNrMzMVpvZv8ys2jlEzeyoqN2XZuYWVjNLb2NmdqOZfWVma8xsnJntV29vJINslwD+EjgSeBHYREhwN0XPe0f7pYi9Pn0RrZo15sDdto87FJGttGrVilatWsUdhohIIRoCnA58H+gNbAc8a2aNqzlmG2AqcBmwJkObXwK/AH4KHAIsBF4ys20ztK8XWa+M5s6nwPFmtADaAUvcWVtvkUmivDG9jMO7tadZE62GJskzfPhwAM4+++yYIxERKRwW5qf9EXCeu78UbTuHsOxuXzKUprr7GGBM1H5YFf0acDlwu7v/Pdr2A0Kyexbw13y/l0yyXTCijRm7mdHBnbXufOXOWjM6RNt1uXMRm7OknJmLy+ml+lxJqAcffJAHH3ww7jBERArNQUBTwjf0ALj7HOBj4Ig69NsV6JTW7xrgtTr2m7NsR3QfBk4Bfgakzso+EBgK/IMCXR2tXbt2jBs3Lu4wEm3J6vX8Yv+N7LxmBuPGzYo7HJGtXHfddQD6XRaRUtDEzN5JeX6/u99fy746ARVAWdr2BdG+2qo8dkEV/Tbo2gvZJrqHRvd/T9v+NCHxPZQCtWTJEvr06RN3GIl20fDJTJmzjPFnHaMV0UREROK10d0Prq6Bmd0CXFtDP8fkL6QGYLbb14/d09d1yCjbRHeH6H5Z2vblafulyFRsct78rIzje3RSkiuJNWzYMAAGDRoUaxwiIgkxBBheQ5vZhJVvGwMdgEUp+zoCr9fh9een9JOalHZM2ZermYSZv5wcrjHLtuFKYHvgOEKZQqXjovtV2b6gFJb35y5jxdqN9NpLf8tIcinRFRHZzN3L2LocYStmNhnYAPQDnoy2dQb2AcbXIYQZhIS2HzAp6rcFYVaHK+vQb84jbtkmuv8lXH33sBn7EYqU9wF+TsisJ+f6wlIY3phehhmaP1cSTbW5IiK5c/flZvYQ8DszW0hYM+EPwPvA2Mp2ZvYJcI+73xM93wbYM9rdCNjNzHoCS9x9tru7mQ0BromO/RS4jjAw+mQtw32NzWs5ZC3bRPcvhER3O+CmlO0Wvehfcn1hKQyvTy9jv523o11rLfsrIiJShC4HNgIjgZbAy8C57l6R0qY7obyh0sHAKynPb4pujwKDom2/i/r7M6EqYCJwnLuvrFWU7n1qc5i5Z5ccm3EXYQQ33V3u/LI2L54ErVu39tWrV8cdRiKtWreRnje9yPlHdeNXx+8ddzgiGT3wwAMAnH9+XpZGFxFJLDMrd/fWccdRKHJZMOIKM0YCJxGKiRcA/3IPtRdSfN76fDEbNzm9VbYgCTdy5EhAia6ISMEzO6qavQ4sxv2jbLvLOtEFiJJaJbYl4o3PymjRtBEHddGyv5JsY8eOrbmRiIgUgnHUVItr9iVwEe7/rqmzrBNdM7YF+gO7Ay3S97tzc7Z9SWF4bfoiDu3anuZNqlvuWkRERCSvappdoTPwNGaH4P5+dQ2zSnTNOISwpnG7apop0S0iXy1bwxeLVnPWt3erubFIzO69914ABg8eHHMkIiJSR48SpiXbmTDF2WxgV+BI4CvgXcIECc0I144Nqq6zRlm+6BCgPSHDruomReaN6WH6vd6aP1cKwOjRoxk9enTcYYiISN29DOwEfB/3XrifhXtv4Kxo+0jgVEL+eXRNnWVbunAAoV7iVcIywKupxVxmUjhem76IHbdtzjc6bhN3KCI1eu655+IOQURE8uO66D69/vZZQnJ7De77YrYc6FRTZ9kmusuAVsBp7lstAyxFZlO07O8xe++oZX9FRESkIe0e3V+G2a1sngf3wui+a3S/kizy2GxLFx6L7ntk2V4K2IdfrWBp+QaOUtmCFIihQ4cydOjQuMMQEZG6mxbd3wwsxGwKZguAOwjVBNMwa0yY6varmjrLdkR3JrAc+KcZD0VBbEht4P51MiwF7vXPFgFwpObPlQLx8ssvA3DZZZfFHImIiNTRNcA/gcaESRAqJ0IwwgpuVwPHAk2BN2vqLKuV0czYRPU1ue6e25y8SaGV0bb2/fvfYmn5ep6/vLo5m0VERKShlcTKaGZ9gN8ChxKqDzYBbwHX4v4qZk2A5sA63DdW11UuyamKNUtA+fqNTJ61lEFHdok7FBERESlF7uOAIzFrBWwPLMF9Tcr+jYTR3Rplm+iel2OIUqAmzljC+opN9FLZghSQu+66C4Arrrgi5khERKROzMYBDwGjcC8HyuvSXVaJrjuP1uVFpHC8Mb2MZk0a8e2u1a0NIpIsEyZMiDsEERHJj6OA3sDdmI0EHsZ9Ym07y6pGt5ipRndLx/3xVXbctgXDf3xo3KGIiIhImqKv0TVbS1j1DDZfH/Yx8DDwOO6Lcuku2+nFMONsM/5rxmozKtJuWdVJSLItWLGWTxesovdeKlsQERGRWHQEfkRYIW0T4RqxfYE7gbmYPZ1LZ1klumacSZhL95tAS7QMcFF6PVr2t5cSXSkwt99+O7fffnvcYYiISF25L8f9Edz7AZ2By4GJhFyzKXByLt1lezHaxdH9GsIKaQ4sAdoTVk3TamlF4I3pi+iwTTP26bRd3KGI5GTKlClxhyAiIvm3ipBvLgUqCHPr5iTbRPcAQnLbFxgP4M4OZlwPXAKcmOsLS7Js2uS88VkZR+7ZgUaNNEAvhWXEiBFxhyAiIvlg1hToD5wFfJdQSQCbqwdey6W7bGt0K4ue/0tUGGxGY+D3wA7An3J5UUmeT+avpGzVenpr2V8RERGJzwLgaeAMQhWBEZb6vRXYC/djcuks2xHdFYQJew1YCWwLnEBYFhjCyhVSwF6fHi5i1Py5Uoh+85vfAHD99dfHHImIiNRR2+h+PWEp4IeBF6nlNGHZJrpfERLdHQlTPHw7evFKS2rz4pIc//lkId07bkunNi3iDkUkZ9OmTYs7BBERyY8pwCPAE7jXOb/MNtF9F+hBGLl9jK1HcLWgRAFbuHItb89cwqXH7hV3KCK1Mnz48LhDEBGRfHD/Vj67yzbRHQz8EljpTrkZbYABhHWG/wHckc+gpGG9MHU+7vDdA3aKOxQREREpdWZNCBekdWfzxWibud+cdVdaGU0row28fwKLVq5j7M+PxkwzLkjh+fWvfw3AzTdn/W+fiEhBKoGV0XYExhGS3Kq5Zz3NWMYRXTN2yyUud2bn0l6SYdHKdbw9YwmXHLOnklwpWHPmzIk7BBERyY+bgL2r2Z/TCG1104vNBGZkefsilxc1s8FmNsPM1prZZDPrXUP7ZmZ2c3TMOjObbWaXprU53cw+ivZ/ZGan5hJTqXr+w/lscuivsgUpYI888giPPPJI3GGIiEjdHUdIZiv/UXfgUmA68ClheeCs1TSPblVL/Wa6ZcXMBgBDCfOhHUhYgOI5M6tuBHkEcDxwAWEo+3vA+yl9Hg6MBJ4Aekb3fzMzTXtWgzHvz6PbDq3p3nHbuEMRERER2SW6v+rrLe73AKcB3yAsC5y1jDW6ZuQ0POLOeVm9oNlE4H13Pz9l23RglLtfXUX744C/AXu4e1mGPkcC7Tysi1y5bSywyN2/X108pVyjW7ZqHd/+7VguPmZPfnFc5lIYkaS7+urwT8dtt90WcyQiIvWrBGp0VwMtgKbAGkKZbafo8QpgLu5Zl9dmrNHNNnHNhZk1Aw4C7krb9SJwRIbDTgEmAT83s3MJb/Q54Bp3XxW1ORy4O+24FwjLE0sGL1SWLeyvsgUpbIsXL447BBERyY/FhFHdNsB8wgjuE8DaaP/2uXSW7fRi+dIBaExY3i3VAqBvhmO6Ab2AdcDphBUz7gZ2JiwPByHTr6rPTlV1aGYXEMogaNasWU5voJiM+WAe3Tq0Zu9OKluQwnb//ffHHYKIiOTHNEKiuwfwGvC/wHeifQ78N5fOsk50zegO/ISq5zRz96+DyLdGhDd2lrsvD7HYJcALZtbR3dMT3Bq5+/3A/RBKF/IZbKFYvGodEz5fzEV99tBsCyIiIpIUDwCfEcoXbiJcnLZDtG8RcHkunWWV6JpxEGFOs1ZV7Sb7qR7KgAqgY9r2joTh6arMA76sTHIjH0f3uxFGbufn2GfJe+HDBSpbkKJxxRVXAHDXXelVUSIiUlDcnwKe+vq52V7AMYRFyt7EfVku3dU060Kla4DW1GG2BQB3Xw9MBvql7epHmH2hKm8CO5vZNinbvhHdz4ruJ+TYZ8kb88E8urRvxb47bRd3KCJ1tmbNGtasWRN3GCIikm/uK3D/J+7/zjXJhexLF44gjNoOBu6LHn8TuIUwqe+AHF7zD8DjZvY2IYm9kFBv+xcAM3sMwN3Pjdo/CVwPPGJmNxJqdIcSZmlYGLUZCrxmZlcBzwCnErL/XjnEVTKWrF7PhC8W85OjuqlsQYrCn//857hDEBGRBMp2RLd9dP9E5QZ3phIu6PoG8LNsX9DdRxLqK64DphCS0f7uXjk6u1t0q2y/inChWhvC7AtPAa8CP0xpMx4YCAwizK97LjDA3SdmG1cpeeHD+VRscpUtiIiISFHLOI/uFo2M5cA2QHNgOaFAeF9gFTAHWOZOu3qMs96U4jy65zw0kdlLyhl3RR+N6EpRuPzycG3CkCFDYo5ERKR+Ff08unmW7YhuZYlAO8LSwACvEGpjATblMSapR0tWr2f854vpv/9OSnJFRESkqGVbo/sBYT7bA4BngX3YPMuBExZ8kALwYlS28F2VLUgR0UiuiIhUJdsR3ZuAswijubcQEtvK4cCXgcvyHpnUizFT57Nru5bst7NmWxAREZHiltWIrjvvAe+lbDrejLbARndWZThMEmZZ+XrGf1bGj3p3VdmCFJWLL74Y0OwLIiKypbosAdwMKK2ruArcix8uYKPKFqQItWyZvlijiIhIDYmuGd8iTNvVAnjGnf+Y8WPgNsKFaevMuNedK+o/VKmrf38wj87bt2T/XdrEHYpIXmlFNBERqUrGRNeMXoT628o2F5txJ/BLwgVoRkiAf2bGZ+5hwQdJpmXl63nzszJ+1EtlCyIiIlIaqrsY7UqgKVsu93tltM+AspTH59RXgJIfL34Uyha0SIQUowsuuIALLrgg7jBERAqOmTU3s7vNrMzMVpvZv8yscw3HHBW1+9LM3MwGVdFmWLQv9fZWvb2RDKpLdA8mjNy+QFj69zlCUuvA993ZEfjfqO2+9Rmk1N2YqGzhgM4qW5Di0759e9q3b19zQxERSTcEOB34PtAb2A541swaV3PMNsBUwqxba6pVPBBYAAAgAElEQVRpNxbYKeXWPx8B5yLjymhmrCOULWzvzgoz2gBLCYluC3c2mNEMWAtscq/ThW2xKYWV0ZaXb+Dg377EeUd25Zr++8QdjoiIiNRSPldGM7M2wCLgPHd/Itq2KzALOMHdX8iij1XAJe4+LG37MKCDu/9PPmKtrepGdJsCuLMiul9eucOdDdH9+miTij4T7MWP5rOhwjmhR6e4QxEREZHkOIiQ73298Je7zwE+Bo7IQ/+9zGyhmX1qZg+Y2Y556DMnNY7CmvHrbLYVqnbt2jFu3Li4w6hXqxeXc3XPTSz/4j3GfRF3NCL5d8cddwDwq1/9KuZIRETqXRMzeyfl+f3ufn8t++oEVLD5uqtKC6J9dfE88DQwA+hCWHDsP2Z2kLuvq2PfWcum3OCGlMdexbaCtmTJEvr06RN3GPVm+ZoNnH/LS/zg8D34SR+VUktx+s9//gNQ1L/LIiKRje5+cHUNzOwW4Noa+jkmfyFtzd1HpDz9wMwmE0oivktIgBtETYmuShIK3NiPFrChwul/gGZbkOJ18803xx2CiEiSDAGG19BmNnAY0BjoQKjVrdQReD2fAbn7V2Y2F9grn/3WpLpE96YGi0LqzZgP5rFzmxYcuGvbuEMRERGRBuDuZWxdjrCVaJR1A9APeDLa1hnYBxifz5jMrAOwCzAvn/3WJGOi665Et9CVr9/I69PLOPuw3bVIhBS1s88+G4Dhw2sawBARkUruvtzMHgJ+Z2YLgcXAH4D3CVODAWBmnwD3uPs90fNtgD2j3Y2A3cysJ7DE3WdH+28E/k5IbLsQVtVdCPyjAd7a1wpySjDJzn9nLWN9xSaO+kaHuEMRqVfdu3ePOwQRkUJ1ObARGAm0JKyKe667V6S06U4ob6h0MPBKyvObotujwCDCBW77A+cCbQnJ7ivAme6+sl7eRQYZ59EtFcU8j+5dL0zjvlc/570bjmOb5vqbRkREpNDlcx7dUlDdPLpS4N76YjH779JGSa6IiIiUJCW6Rap8/Ubem7uMw7ppWVQpfgMHDmTgwIFxhyEiIgmjob4i9d9Zy9hQ4RzWrV3coYjUu549e8YdgoiIJJAS3SL11heLadzIOLiLEl0pfldddVXcIYiISAKpdKFIqT5XRERESp0S3SKk+lwpNaeffjqnn3563GGIiEjCaLivCKk+V0rN4YcfHncIIiKSQEp0i5Dqc6XUXHHFFXGHICIiCaTShSKk+lwRERERJbpFR/W5UopOOukkTjrppLjDEBGRhNGQX5FRfa6Uou985ztxhyAiIgmkRLfIqD5XStFll10WdwgiIpJAKl0oMqrPFREREQmU6BYR1edKqTrhhBM44YQT4g5DREQSRsN+RUT1uVKqTjzxxLhDEBGRBFKiW0RUnyulavDgwXGHICIiCaTShSKi+lwRERGRzZToFgnV50op69u3L3379o07DBERSRgN/RWJyvrcQ1WfKyVowIABcYcgIiIJpES3SHxdn7v79nGHItLgzj///LhDEBGRBFLpQpF464vF9NilDdu2aBp3KCIiIiKJoES3CGyuz1XZgpSmPn360KdPn7jDEBGRhFHpQhHYPH+uLkST0jRo0KC4QxARkQRSolsEVJ8rpU6JroiIVEWlC0VA9blS6jZs2MCGDRviDkNERBJGiW6BU32uCPTr149+/frFHYaIiCSMShcKnOpzReDHP/5x3CGIiEgCKdEtcKrPFYGzzz477hBERCSBVLpQ4FSfKwLl5eWUl5fHHYaIiCSMEt0CpvpckaB///70798/7jBERCRhVLpQwFSfKxJcdNFFcYcgIiIJpES3gE2cofpcEYABAwbEHYKIiCSQShcKmOpzRYLly5ezfPnyuMMQEZGEUaJboNasr2DKHNXnigCcfPLJnHzyyXGHISIiCRNLomtmg81shpmtNbPJZta7mrZ9zMyruO2d0mZQhjYtGuYdNbz/zl6q+lyRyKWXXsqll14adxgiIpIwDV6ja2YDgKHAYOCN6P45M9vX3WdXc+h+wJKU54vS9pcDe6RucPe1dY84mTR/rshmp512WtwhiIhIAsVxMdrPgWHu/kD0/KdmdjxwEXB1NcctdPeyava7u8/PV5BJp/pckc3KysI/DR06dIg5EhERSZIGLV0ws2bAQcCLabteBI6o4fB3zGyemb1sZsdUsb+lmc0ys7lm9qyZHZiPmJNI9bkiWzrjjDM444wz4g5DREQSpqFHdDsAjYEFadsXAH0zHDOPMNo7CWgGnAO8bGZHu/vrUZtpwA+B94BtgcuAN83sm+4+Pb1DM7sAuACgWbNmdXpDcXhn1hLV54qk+MUvfhF3CCIikkCJn0fX3acREtlKE8ysC3Al8HrUZgIwobKBmY0HpgA/Bba6QsXd7wfuB2jdurXXU+j15m/vzGXb5k04tKtGdEUATjzxxLhDEBGRBGroWRfKgAqgY9r2jkAu9bUTgb0y7XT3CuCd6toUqoUr1jLmg3l87+BdadUs8X+niDSI+fPnM39+yZToi4hIlho00XX39cBkoF/arn7A+By66kkoaaiSmRlwQHVtCtUTE2dT4c65h+8edygiiTFw4EAGDhwYdxgiIpIwcQwJ/gF43MzeBt4ELgR2Bv4CYGaPAbj7udHzy4GZwIeEGt2zgVOA0ys7NLMbgLeA6cB2hHKFAwi1vUVj/cZNPPn2bPp8Ywe6dGgddzgiiXHVVVfFHYKIiCRQgye67j7SzNoD1wE7AVOB/u4+K2qyW9ohzYA7gc7AGkLC+113H5PSpi2h5rYTsBx4FzjK3d+utzcSg+emzmPRynX84IgucYcikijHH3983CGIiBQkM2sO3AV8H2gJvAwMdve51RxzNXAa0B1YRxhsvNrdp6a0MeAGwsX/2xPKTi929w/r6a1UHat7wV2LlVetW7f21atXxx1GVk67902Wlm/g5Z8fTaNGFnc4IokxZ84cAHbdddeYIxERqV9mVu7uefta18zuA04GfgAsJnzz3hY4KLrmqapjXgBGEGbEMuBm4HBgX3dfErX5FWFQcxBhUoFfA72A7u6+Ml/x10SJboEkuh/MXc6J97zBDSfuy3lHdo07HJFE6dOnDwDjxo2LNQ4RkfqWz0TXzNoQVpo9z92fiLbtCswCTnD3F7LsZxvCN+qnuPvoaDT3K+Aed/9t1KYlsBC4wt3/mo/4s6HL9gvEsPEzad2sMWcc1DnuUEQS57rrros7BBGRQnQQ0JSUhbzcfY6ZfUxYyCurRJewhkEjYGn0vCuhnDS13zVm9lrUrxLdhtKuXbvEjwJVbHK6blzJtd9qxuS33ow7HJHEadIk/FOW9N9lEZE8aGJm76Q8vz9aH6A2OhGmfS1L274g2petoYT1CyrXNKg8tqoFwnbJMcY6KflEd8mSJV9/7ZlUf37lM+56fxpjf34ke+64bdzhiCTOF198AUC3bt1ijkREpN5tdPeDq2tgZrcA19bQzzH5CMbM/kCove2VqaY3TiWf6CbdxopNDH9rFr327KAkVySDH/7wh4BGdEVEIkOA4TW0mQ0cBjQGOhBqdSt1JFp9tjpm9kdgIHCMu3+RsqtyBZ+O0euk9tugq/so0U24Fz9awLzla7n55B5xhyKSWDfddFPcIYiIJIa7l7F1OcJWzGwysIGwcNeT0bbOwD7UsJCXmQ0FBhCS3E/Sds8gJLT9CDMzYGYtgN7Albm8l7pSoptww8bPpPP2LTl27x3jDkUksY4++ui4QxARKTjuvtzMHgJ+Z2YL2Ty92PvA2Mp2ZvYJYQaFe6LnfwbOISzgtdTMKmtyV7n7Knd3MxsCXBMd+ylhqrFVRAl1Q1Gim2Afz1vB2zOWcE3/vWmseXNFMpo2bRoA3bt3jzkSEZGCczmwERjJ5gUjzk2rt+1OKG+oNDi6fzmtr5uAG6PHv4v6+zObF4w4riHn0AXNo5voeXSv+vv7PDPlS966+ju0bdUs7nBEEkvz6IpIqcj3ghHFTiO6CbWsfD3PTPmSUw/cRUmuSA1uvfXWuEMQEZEEUqKbUE+9M4e1GzbxgyO6xB2KSOIdccQRcYcgIiIJ1CjuAGRrFZucxybM4tCu7di703ZxhyOSeFOnTmXq1KlxhyEiIgmjEd0E+s8nC5m7dA3X9t8n7lBECsIll1wCqEZXRES2pEQ3gR4dP5Od2rSg374d4w5FpCDceeedcYcgIiIJpEQ3YT5buJI3Pivjyv/XnSaNVVkiko1DDjkk7hBERCSBlEklzKPjZ9GsSSMGHrJr3KGIFIwpU6YwZcqUuMMQEZGE0YhugqxYu4G//3cuJx6wM+23aR53OCIF4/LLLwdUoysiIltSopsgo96ZS/n6CgZpSjGRnAwZMiTuEEREJIGU6CbEpk3O42/N4lu7tWX/zm3iDkekoPTs2TPuEEREJIFUo5sQr01fxIyy1VogQqQWJk2axKRJk+IOQ0REEkYjugnx2IRZ7LBtc07osVPcoYgUnCuvvBJQja6IiGxJiW4CzFq8mlemLeTSY/eiWRMNsovk6p577ok7BBERSSAlugnw+IRZNDbjrEN3izsUkYLUo0ePuEMQEZEE0vBhzMrXb+Spd+ZwfI9OdNyuRdzhiBSk8ePHM378+LjDEBGRhNGIbsyeefcrVqzdqCnFROrgmmuuAVSjKyIiW1KiGyN357EJM9l3p+04aPft4w5HpGD99a9/jTsEERFJICW6MZo4YwmfzF/J704/ADOLOxyRgtW9e/e4QxARkQRSjW6MHpswk7atmnJSz53jDkWkoL366qu8+uqrcYchIiIJoxHdmHy1bA0vfLiAH/fuSoumjeMOR6Sg3XDDDYBqdEVEZEtKdGPy5MTZuDtnH7p73KGIFLyHH3447hBERCSBlOjGYO2GCv7v7dl8Z5+O7NquVdzhiBS8bt26xR2CiIgkkGp0YzDmg3ksXr2eHxzeJe5QRIrC2LFjGTt2bNxhiIhIwmhENwaPTpjFHju05sg928cdikhRuOWWWwDo27dvzJGIiEiSKNFtYFPmLOO9Ocu4+eT9NKWYSJ48/vjjcYcgIiIJpES3gT02fibbNG/Cad/qHHcoIkVj1113jTsEERFJINXoNqCyVet49v15nHFQZ7Zprr8xRPLl+eef5/nnn487DBERSRhlWw1oxNuzWV+xiXMO15RiIvl0++23A3D88cfHHImIiCSJEt0GsqFiE8Pfmk3vvTqwxw7bxB2OSFEZMWJE3CGIiEgCKdFtIC99tID5K9Zyyyk94g5FpOh06tQp7hBERCSBVKPbQIaNn8mu7VpyzN47xh2KSNEZPXo0o0ePjjsMERFJGI3oNoCP563g7RlLuKb/3jRupCnFRPLt97//PQAnnnhizJGIiEiSKNFtAI9NmEWLpo0482BNgSRSH0aNGhV3CCIikkBKdOvZ8vINPPPul5zScxfatmoWdzgiRalDhw5xhyAiIgmkRLeeWSO4+Jg96Ltvx7hDESlaTz/9NACnnXZazJGIiEiSmLvHHUOsWrdu7atXr447DBGpgz59+gAwbty4WOMQEalvZlbu7q3jjqNQKNFVoitS8JYvXw5AmzZtYo5ERKR+KdHNjUoXRKTgKcEVEZGqaB5dESl4I0eOZOTIkXGHISIiCaPSBZUuiBQ81eiKSKlQ6UJulOgq0RUpeOXl5QC0atUq5khEROqXEt3cqEZXRAqeElwREamKanRFpOANHz6c4cOHxx2GiIgkjEoXVLogUvBUoysipUKlC7mJZUTXzAab2QwzW2tmk82sdzVt+5iZV3HbO63d6Wb2kZmti+5Prf93IiJJ8NJLL/HSSy/FHYaISMExs+ZmdreZlZnZajP7l5l1ruGYq81skpmtMLNFZjbazHqktRlWRe72Vv2+m601eKJrZgOAocCtwIHAeOA5M9uthkP3A3ZKuU1P6fNwYCTwBNAzuv+bmR2a9zcgIonTtGlTmjZtGncYIiKFaAhwOvB9oDewHfCsmTWu5pg+wL3AEcCxwEZgrJm1S2s3li1zt/55jTwLDV66YGYTgffd/fyUbdOBUe5+dRXt+wCvADu4e1mGPkcC7dy9X8q2scAid/9+dfGodEGk8A0bNgyAQYMGxRqHiEh9y2fpgpm1ARYB57n7E9G2XYFZwAnu/kKW/WwDLAdOcffR0bZhQAd3/598xFpbDTqia2bNgIOAF9N2vUj4q6A675jZPDN72cyOSdt3eBV9vpBFnyJSBIYNG/Z1sisiIlk7CGhKSg7l7nOAj8kth9qWkFMuTdvey8wWmtmnZvaAme1Y14Bz1dDTi3UAGgML0rYvAPpmOGYecBEwCWgGnAO8bGZHu/vrUZtOGfrsVFWHZnYBcEH01M1sTS5vIkUTwnC9JJPOT3LVy7kxs3x3War0u5NcOjfJ1hDnp6WZvZPy/H53v7+WfXUCKoD0b8wz5lAZDAWmABNStj0PPA3MALoAtwD/MbOD3H1dLePNWeLn0XX3acC0lE0TzKwLcCXwelXHZNHn/UBtfyi+ZmbvuPvBde1H6ofOT3Lp3CSbzk9y6dwkW1LOj5ndAlxbQ7P0b8dr+1p/AHoBvdy9onK7u49IafaBmU0mlER8l5AAN4iGTnTLCH85dEzb3hGYn0M/E4GBKc/n56FPERERkWIwBKhpcvHZwGGEb9o7EGp1K3Uki8FEM/sjIR87xt2/qK6tu39lZnOBvWrqN58aNNF19/VRRt8P+FvKrn7A33PoqiehpKHShKiPO9P6HF/LUEVEREQKUnTxfpUX8KeKcrINhJzpyWhbZ2AfasihzGwoMICQ5H6SxWt1AHZhy/yt3sVRuvAH4HEzext4E7gQ2Bn4C4CZPQbg7udGzy8HZgIfEmp0zwZOIUyFUWko8JqZXQU8A5xKGJLvVc/vpc7lD1KvdH6SS+cm2XR+kkvnJtkK6vy4+3Izewj4nZktBBYT8rT3CVODAWBmnwD3uPs90fM/E66ZOgVYamaV9byr3H1VNAvDjYRBzHmEGt3bgIXAPxrgrX0tlpXRzGww8EvCnGpTgZ+5+2vRvnEA7t4nev5L4HygM7CGkPDe5u5j0vo8g1Do3A34HLjW3RusBkRERESk0JhZc+Au4CygJfAyMDiafaGyjQM3ufuNKc+rcpO732hmLQkDjwcCbQnJ7ivA9an9NoSSXwJYRERERIpTLEsAi4iIiIjUNyW61TCzwWY2w8zWmtlkM+tdQ/ujo3ZrzewLM7uwoWItRbmcHzPbycyeNLNPzKwiWrFF6kmO5+Y0M3sxWi99pZlNNLOTGjLeUpPj+TnazMab2WIzWxP9Dl3RkPGWklz/30k5rpeZbTSzqfUdYynL8Xenj5l5Fbe9GzLmUqdENwMzG0C4yO1WQo3JeOA5M9stQ/uuwJio3YGEouu7zez0qtpL3eR6foDmhCtQbydMTyf1pBbn5mjgP4S5FQ8k/B79I9v/4CU3tTg/q4A/AUcB+xKuhbgputZC8qgW56byuO2Bxwi1lVJPant+gP0I1yRV3qbXZ5yyJdXoZmBmE4H33f38lG3TgVHufnUV7e8ATnP3vVK2PQjs5+6HN0TMpSTX85N27LNAmbsPqt8oS1Ndzk1K+7eB1939F/UUZsnK0/l5Gljn7t+vpzBLUm3PTXQ+3gMMOMPde9R7sCWoFnlBH8IFWDtE031JDDSiWwUza0ZY//nFtF0vknnt58OraP8CcLCZNc1vhKWtludHGkAez822bL1mutRRPs6PmR0YtX01v9GVttqem2hkvSNhpF3qSR1/d94xs3lm9rKZ5WU1MsmeEt2qdSCsFLIgbXt1az93ytC+SdSf5E9tzo80jDqfGzO7mDCd4OP5DU2ow/kxs7lmtg54B7jX3f9SPyGWrJzPjZntD9wAnJ269KrUi9r87swDLiLM+38aMA14WWVZDSuOBSNERKoU1bTfCQxw91lxxyNb6A1sQ1gy9A4zm+Hu+mMkJtHcpyOBK9x9RtzxyNbcfRohua00wcy6AFeSxfK6kh9KdKtWBlQQvg5K1RGYn+GY+RnabySLZfgkJ7U5P9Iwan1uokVfHgPOdffR9RNeyav1+UlJpj4ws46EVY+U6OZPrudmJ8IyrY+Y2SPRtkaAmdlGoL+7p3/NLrWXr/93JgID8xWU1EylC1Vw9/XAZMLaz6n6kXnt5wkZ2r/j7hvyG2Fpq+X5kQZQ23NjZmcSkqZB7j6q/iIsbXn83WlEmMlE8qQW5+ZLYH+gZ8rtL8Bn0WP9W5hHefzd6UkoaZAGohHdzP4APB5d/f0mcCGwM+EfEszsMQB3Pzdq/xfgEjMbAvwVOBIYBOiq5PqR6/nBzHpGD7cDNkXP17v7Rw0ZeAnI6dyY2UBCknsF8JptXjN9vbsvaeDYS0Gu5+enwAw2fwV7FOFc3duwYZeErM9NNICyxZy5ZraQMBuG5tKtH7n+7lwOzAQ+BJoBZwOnEGp2pYEo0c3A3UeaWXvgOsJXRFMJXwVV1g3ultZ+hpn1B/5IKD7/CrjU3f/egGGXjFzPT+TdtOcnArOALvUVZymqxbm5kPBv0ZDoVulVoE/9Rlt6anF+GgN3EH5PNgKfA1cR/ecu+VPLf9ekgdTi/DQjXHPQGVhDSHi/6+5jGihkQfPoioiIiEiRUo2uiIiIiBQlJboiIiIiUpSU6IqIiIhIUVKiKyIiIiJFSYmuiIiIiBQlJboiIiIiUpSU6IoknJntZWb3mNnHZrbKzFaa2Sdm9oCZHZbSbqaZuZnNjDHcyliGRbF4tLZ75faOZvaEmc0zs4po/xAz65LSflg9xtXWzG6MbqdkG3dDMbM+Ka9f0+3G6JjK5+MaOt6a1Od5zeVcpX2ueY1DRJJNC0aIJJiZnQfcx9bLrXaPbjsQVtopFEOBATG+flvghujxo8AzMcYiIiL1TImuSEKZ2bHAg4RvXhz4LWF56YXA7sAZwDdiC7Aa7j6IsAR2uoOi+2VAV3dflrLP6jmsGlUTd0O9/jhSPgczGwQ8Ej19NIov78yshbuvrY++RUTipNIFkeS6jc2/o39y9+vdfa67r3f36e5+G3B+dR2YWU8ze9rMPjOzFWa2wczmR9sOTmvb1cweM7PZZrbWzJaZ2dToK+IdU9qdb2bvmNkSM1tnZl+a2Utm9oOUNlt8rVz51TGwZ9SkLbA02j+ouq+4zexbZvZ/0eusN7MyM3vFzL4d7d/GzB41sw/MbHH0HpeZ2WtmNiClnxuBGSld/yD9NaspuWhtZjeZ2YdmtsbMys3sXTP7uZk1SWm3xfsws3Ojz3CNhdKTH1CPzOxYM3srer3PzeyXZpaaON+YEt+pZvaQmZURlietbLOPmT2e8nkvNLNRZnZA2mtl9fOSdsyZZvZ+dZ+HmfU2s3+Z2aKUn9cR6a9fzWewcxTvqujn4T5g2wxtc34PIlJg3F033XRL2A3YkTCKW3nbJYtjZkZtZ6ZsG5jWT+ptNbBPStsPq2nbI2rzvWrajErpa1jK9i5An2qOGxS1qXw+LKWfU4ENmY6L2nSqpm8Hzo3a3VhNm2FVxR1taw1MrubYMUCjqG3q+1iaoX2vHH4OBlX1uaS1qdxfluGzOjul7Y1p7b9uF+3vBZRniHsN0DvHn5fUz2N+TZ8HcDZQkaHdWqBPpp+xaFtL4OMqjv2qqs8xm/egm266FfZNI7oiydQl5fEKd/+ylv38F/h/wE6EOt/tgIuifa2AnwCYWXtg32j7nwjJXTvgEOB6YHm076jofhWhRrg5oYziTOD5TEG4+zh3N2BWtGmWu1t0G1bVMWbWEniAzSVWvwY6Ah0ICfcX0faVhLrfLtF7agEcQUjYAH4exXAj0DXlJR5NiWFQptiBy4FvRY9fIHyW3QifLcAJhD8o0rUFBgNtgDtStp9TzWvVRXvgd8D2wCVZvJ4BxxM+s/2jbQ8QksVZhDKT5sCBwCLC5/pnyOnnJVVHqvk8zKw1cDfhW4yNhD9ytgMujNo1J5TuVOdcYO/o8VtAZ8K3CMvSG9byPYhIgVGNrkhxmw/8CBhCSARbpu3vHt0vJSQDbQmJ20rCyNh77n5LSvsZ0X1r4DrCSOfHwIvunu/E4EhC8gYwzt1/k7JvVMrjckLyOxLYh/A1dWq9b3fq5rspj6929/kAZnYzmy9m6w88mXbcZHe/L2o7HPhVtH33OsaTyQLg1+5eYWaPAvfU8Hq/d/cXosdTzWwvNieJuxPObbr9zawToU48m5+XVDV9HkdG/QGMcffKz/avZnYh0BP4hpnt6e6fZXiNY1Me31b5B6KZ/Z5Q754q2595ESlgGtEVSaaZKY+3M7Oda9nPU8AvCQlgepJL5TZ330QYWZsL7AVcCwwHPohqX3eN2t8L/A2obD+EMMq5wMyuqmWMmXRMefxRNe1+RRhpPJQwAph+UVuLOsaxQ8rj2SmPZ6U8rqqec1rK49V5jCeTz929IofXezftebY1qe1z+HlJVdPnkelzhpo/669jS3k8N8NjIKefeREpYEp0RRLI3RcCb6dsurKqdqkXQlWxb3tC2QKE0b79gMZAlRf1uPuzwG6EEdCTgJsJ9ZI9CKO3uPtadz+T8BVvL+CHwETC18q3mtku2b3DrCxIebxPNe1SywZOAZpHZRKLq2jrtYhjYcrj3TI8Tm1TaUMdXzdXX7+eu2fzemvSnqe+h7EpZR1f3wi1yB9Gr1Hjz0um+Kj688j0Oac/r+qzrlSW8rhzhsebg8j9PYhIgVGiK5Jc1xJGTgEuja6Y39nMmlpYROIaQk1lJhvZnFBsBFYQvuL/TVWNzexu4DuE+tvngb8D66Ldu0VtTjezS4BdgPcIo7vvVXZBhoSilt5kc7J6jJldY2Y7mNn2ZnaKmVXWC29MOWYZ0NTMrmfL0b1KqcnvXlFdaE3+nfL4txYWvehCqBmuqk1BcvfpwKfR0++Y2eUWFthoYWbfNLNfAyMq22fz85Kj8YRyAoATzOwkCzNqnE+oEz3x/QMAAAIYSURBVAaYVk3ZAsArKY+vMrNdzGwP4BdVNa6H9yAiCaNEVySh3H0s4WKxDYTf1RuAL4H1hITkt4QLjzIdvxJ4OXq6CzCHMEq6b4ZDLgJeSnmN9wgXKkEoT4Awsno3oZRgZXS7INo3D3g/h7dYLXdfQ5g+rTKR/S1hNG8J8A/CBWFEjyuNIyQtl1LFBUjuvopwpT2EC9ZWRVNtDaomlKFseeHZfEKtcuWcwM8R6oOLwQWE2Q0M+CMh8VwDTAFuYstykmx+XrLm7quBnxL+uGsK/JPw83V/1GQdmy9My+Qx4JPo8eGEsoTP2LIsIlVe34OIJI8SXZEEc/cHCaUG9xKS2zWE+sZpwEPA7TV0cTYhCVtKuIp8OJlXJrsdeIOQTG4kXOT1X0LSODRq8zLhoqvPCAllBSHBHQEcHSWneePu/yDU3o4gTBG1kZDovsrmut07gFsJycqaaN+xZL5q/hzgNcIIdzYxrCbMNnEz4WKldYRkcAphpPCkqN6z4Ln7q4QE/jFCkriB8Hm/T/gD55qU5tn8vOT6+k8QpqJ7ljD6vpHwx9lTwLc9LKhR3fFrgL7A04Tfk2WEBTcyzTed9/cgIsli2ZVyiYiIiIgUFo3oioiIiEhRUqIrIiIiIkVJia6IiIiIFCUluiIiIiJSlJToioiIiEhRUqIrIiIiIkVJia6IiIiIFCUluiIiIiJSlJToioiIiEhR+v/xOwftdEXSawAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " val_metrics['avg_odds_diff'], 'avg. odds diff.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "qKKdkMELHHWH", + "outputId": "280e5812-55ec-47d2-e942-7139f1ce3140" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7581\nCorresponding 1-min(DI, 1/DI) value: 0.2939\nCorresponding average odds difference value: -0.0084\nCorresponding statistical parity difference value: -0.0992\nCorresponding equal opportunity difference value: 0.0242\nCorresponding Theil index value: 0.0938\n" + } + ], + "source": [ + "describe_metrics(val_metrics, thresh_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8ZMdD_WsHHWH" + }, + "source": [ + "#### 4.2.3. Testing LR model after reweighing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hFwLENjsHHWH" + }, + "outputs": [], + "source": [ + "lr_transf_metrics = test(dataset=dataset_orig_panel19_test,\n", + " model=lr_transf_panel19,\n", + " thresh_arr=[thresh_arr[lr_transf_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "q2N7cXo5HHWH", + "outputId": "0a5abb12-1425-4e56-d4c9-8a7ad4684906" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7539\nCorresponding 1-min(DI, 1/DI) value: 0.2482\nCorresponding average odds difference value: -0.0151\nCorresponding statistical parity difference value: -0.0872\nCorresponding equal opportunity difference value: -0.0035\nCorresponding Theil index value: 0.0966\n" + } + ], + "source": [ + "describe_metrics(lr_transf_metrics, [thresh_arr[lr_transf_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NhNt40B5HHWH" + }, + "source": [ + "The fairness metrics for the logistic regression model learned after reweighing are well improved, and thus the model is much more fair relative to the logistic regression model learned from the original data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4aLi-JrbHHWH" + }, + "source": [ + "### 4.3. Learning a Random Forest (RF) classifier on data transformed by reweighing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xt20MGrvHHWH" + }, + "source": [ + "#### 4.3.1. Training RF model after reweighing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JmPkZxCWHHWH" + }, + "outputs": [], + "source": [ + "dataset = dataset_transf_panel19_train\n", + "model = make_pipeline(StandardScaler(),\n", + " RandomForestClassifier(n_estimators=500, min_samples_leaf=25))\n", + "fit_params = {'randomforestclassifier__sample_weight': dataset.instance_weights}\n", + "rf_transf_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L5PRJkXCHHWH" + }, + "source": [ + "#### 4.3.2. Validating RF model after reweighing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aiMIbrEgHHWI" + }, + "outputs": [], + "source": [ + "thresh_arr = np.linspace(0.01, 0.5, 50)\n", + "val_metrics = test(dataset=dataset_orig_panel19_val,\n", + " model=rf_transf_panel19,\n", + " thresh_arr=thresh_arr)\n", + "rf_transf_best_ind = np.argmax(val_metrics['bal_acc'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false, + "id": "PVXbPb4wHHWI", + "outputId": "58c2c3e1-ecd9-4dc7-f538-802897455410" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "disp_imp = np.array(val_metrics['disp_imp'])\n", + "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " disp_imp_err, '1 - min(DI, 1/DI)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OOqXnkZFHHWI", + "outputId": "d4ae6179-58d1-49f2-9adb-84c0cf210c29" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " val_metrics['avg_odds_diff'], 'avg. odds diff.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "41srPkY6HHWI", + "outputId": "0e3d76b6-a67c-4bb8-e313-d91361cf9719" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2500\nBest balanced accuracy: 0.7703\nCorresponding 1-min(DI, 1/DI) value: 0.4516\nCorresponding average odds difference value: -0.0876\nCorresponding statistical parity difference value: -0.1668\nCorresponding equal opportunity difference value: -0.0758\nCorresponding Theil index value: 0.0906\n" + } + ], + "source": [ + "describe_metrics(val_metrics, thresh_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IwyaM88yHHWI" + }, + "source": [ + "#### 4.3.3. Testing RF model after reweighing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z10zcyKcHHWI" + }, + "outputs": [], + "source": [ + "rf_transf_metrics = test(dataset=dataset_orig_panel19_test,\n", + " model=rf_transf_panel19,\n", + " thresh_arr=[thresh_arr[rf_transf_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "WYxXPLuMHHWI", + "outputId": "ae4de973-33a1-41cc-ba11-451fac6a5451" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2500\nBest balanced accuracy: 0.7586\nCorresponding 1-min(DI, 1/DI) value: 0.4307\nCorresponding average odds difference value: -0.0843\nCorresponding statistical parity difference value: -0.1632\nCorresponding equal opportunity difference value: -0.0611\nCorresponding Theil index value: 0.0963\n" + } + ], + "source": [ + "describe_metrics(rf_transf_metrics, [thresh_arr[rf_transf_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zu7Cz8UNHHWI" + }, + "source": [ + "Once again, the model learned from the transformed data is fairer than that learned from the original data. However, the random forest model learned from the transformed data is still relatively unfair as compared to the logistic regression model learned from the transformed data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "Ktz4pZSUHHWJ" + }, + "source": [ + "## [5.](#Table-of-Contents) Bias mitigation using in-processing technique - Prejudice Remover (PR)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "faA3UnViHHWJ" + }, + "source": [ + "### 5.1. Learning a Prejudice Remover (PR) model on original data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nXck0WKmHHWJ" + }, + "source": [ + "#### 5.1.1. Training a PR model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vt67nMosHHWJ" + }, + "outputs": [], + "source": [ + "model = PrejudiceRemover(sensitive_attr=sens_attr, eta=25.0)\n", + "pr_orig_scaler = StandardScaler()\n", + "\n", + "dataset = dataset_orig_panel19_train.copy()\n", + "dataset.features = pr_orig_scaler.fit_transform(dataset.features)\n", + "\n", + "pr_orig_panel19 = model.fit(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yz8PMbcPHHWJ" + }, + "source": [ + "#### 5.1.2. Validating PR model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BF5yFBPiHHWJ" + }, + "outputs": [], + "source": [ + "thresh_arr = np.linspace(0.01, 0.50, 50)\n", + "\n", + "dataset = dataset_orig_panel19_val.copy()\n", + "dataset.features = pr_orig_scaler.transform(dataset.features)\n", + "\n", + "val_metrics = test(dataset=dataset,\n", + " model=pr_orig_panel19,\n", + " thresh_arr=thresh_arr)\n", + "pr_orig_best_ind = np.argmax(val_metrics['bal_acc'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tLxkKhtKHHWJ", + "outputId": "9b6b7bf9-55f9-4348-d45c-7288599b7ac0" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqUAAAG4CAYAAAB8eJ7qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdeXxVxf3/8deEJBD2JeygCCooLrigAlpQca/WHVREqEKtW9UvttJa11ZtRSt1V1QU1FJXpD9FBAUUEBABBRGRHSRA2CGQ9fP7Yy4SQpYTyM259+b9fDzyuOTcOXM+FwE/mfnMjDMzRERERETClBR2ACIiIiIiSkpFREREJHRKSkVEREQkdEpKRURERCR0SkpFREREJHRKSkVEREQkdEpKRURERCR0oSSlzrmbnHNLnXO7nHOznHOnldH+aufcHOdclnMuwzk30jnXrEiby5xz3zvnsiOvl0T3U4iIiIjEpvLkWs654c45K+ZrR2XGXOlJqXOuFzAUeBg4DpgKfOycO6iE9t2AEcBrQEfgYuBI4I1CbboAoyLXOkVe33bOnRy9TyIiIiISe8qbawF/AJoX+VoC/Df60e7hKvtEJ+fcdOBbMxtQ6Noi4B0zG1xM+0HArWZ2cKFr/YGnzKx25PtRQEMzO6tQm/HAejO7KnqfRkRERCS2lDfXKub+bsCXQDczmxq9SPdWqSOlzrlU4ARgXJG3xgFdS7htCtDcOXeh89KB3sBHhdp0KabPT0rpU0RERCTh7GeuVdQAYH5lJqQAyZX5MCAdqAasLXJ9LdCzuBvMbJpzrjd+Sj4NH/OnwHWFmjUroc9mFMM5NxAYCFCzZs0T0tPTy/cpREREREKwYsUKA74pdOlFM3ux0PflzrUKc87VA64EyhxRrWiVnZSWm3PuSOAp4CH86Gdz4DHgBaDv/vQZ+Y/3IkCtWrVs+fLlFROsJLSMjAwAmjUr9mcdERGRqHPO7TSzE6P4iD74mfQRUXxGsSo7Kc0E8oGmRa43BTJKuGcwMMPMHot8/21kNdgXzrk/m9mqyL3l6VOk3Hr37g3AxIkTww1ERESkZPuTaxU2AHjXzDZWdGBlqdSk1MxynHOzgLOAtwu9dRbwbgm31cT/5ha2+/vdNbHTIn08VqjNWfjVZiIV4u677w47BBERkVLtZ64FgHPuJOBY4PboRViyMKbvnwBGOOdm4Bcx3Qi0AJ4HcM69DmBmu6fmxwAvOed+z57p+yeBb8xsRaTNUGCyc+5u4APgEuB04NRK+URSJZx77rlhhyAiIhJEeXOt3QYCi8xsYuWFukelJ6VmNso51wi4B59gzgPON7PdhZ0HFWk/3DlXB7gFeBzYAnwG/KlQm6mRxVB/Ax4EFgO9zGx6tD+PVB0rV64EoHXr1iFHIiIiUrLy5loAkVyrNz6PCkWl71Maa2rVqmU7dlTqgQUSp3r06AGoplRERMLjnMsys1phxxENMb/6XiRW3HPPPWGHICIikrA0UqqRUhEREYkTiTxSWqknOonEsyVLlrBkyZKwwxAREUlIGinVSKkEpJpSEREJWyKPlKqmVCSgBx54IOwQREREEpZGSjVSKiIiInEikUdKVVMqEtDChQtZuHBh2GGIiIgkJI2UaqRUAlJNqYiIhC2RR0pVUyoS0MMPPxx2CCIiIglLI6UaKRUREZE4kcgjpaopFQlo3rx5zJs3L+wwREREEpJGSjVSKgGpplRERMKWyCOlqikVCeixxx4LOwQREZGEpZFSjZSKiIhInEjkkVLVlIoENGfOHObMmRN2GCIiIglJI6UaKZWAVFMqIiJhS+SRUtWUigT05JNPhh2CiIhIwtJIqUZKRUREJE4k8kipakpFApo5cyYzZ84MOwwREZGEpJFSjZRKQKopFRGRsCXySKlqSkUCevrpp8MOQUREJGFppFQjpSIiIhInEnmkVDWlIgFNnTqVqVOnhh2GiIhIQtJIqUZKJSDVlIqISNgSeaRUNaUiAb3wwgthhyAiIpKwNFKqkVIRERGJE4k8UqqaUpGAJk2axKRJk8IOQ0REJCFppFQjpRKQakpFRCRsiTxSqppSkYBeeeWVsEMQERFJWBop1UipiIiIxIlEHilVTalIQOPHj2f8+PFhhyEiIpKQNFKqkVIJSDWlIiIStkQeKVVSqqRUAlq5ciUArVu3DjkSERGpqhI5KdVCJ5GAlIyKiIhEj2pKRQIaO3YsY8eODTsMERGRhKTpe03fS0CqKRURkbAl8vS9klIlpRJQRkYGAM2aNQs5EhERqaoSOSlVTalIQEpGRUREokc1pSIBjRkzhjFjxoQdhoiISELS9L2m7yUg1ZSKiEjYEnn6XkmpklIJKDMzE4D09PSQIxERkaoqkZNS1ZSKBKRkVEREJHpUUyoS0Hvvvcd7770XdhgiIiIJSdP3mr6XgFRTKiIiYUvk6XuNlIoENHr0aEaPHh12GCIiImVyzt3knFvqnNvlnJvlnDutjPapzrkHI/dkO+dWOOduq6x4QTWlIoHVq1cv7BBERETK5JzrBQwFbgK+jLx+7Jw70sxWlHDbf4BWwEBgEdAUSKuEcH+h6XtN30tAo0aNAqBXr14hRyIiIlVVkOl759x04FszG1Do2iLgHTMbXEz7s4G3gXZmllnRMQel6XuRgJ577jmee+65sMMQEREpkXMuFTgBGFfkrXFA1xJuuxiYCdzpnFvlnFvknPu3c652FEPdR5Wfvm/YsKEWrkggd999N6CFTiIiEqpk59zXhb5/0cxeLPR9OlANWFvkvrVAzxL6bAucCmQDlwH1gaeAFsDlFRF0EFU+Kd24ceMvq6pFREREYlyemZ1YwX0mAQZcbWZbAJxztwCfOOeamlnRBDcqNH0vEtDIkSMZOXJk2GGIiIiUJhPIxy9UKqwpkFHCPWuA1bsT0ogFkdeDKja8kikpFQlo2LBhDBs2LOwwRERESmRmOcAs4Kwib50FTC3htilAiyI1pIdHXpdXbIQl0+p7rb6XgHJzcwFISUkJORIREamqAq6+7wWMwG8FNQW4Ebge6Ghmy51zrwOYWd9I+9r4kdGvgPvxNaUvAAvM7IoofZR9hDJSWp4NXZ1zw51zVszXjkJtepTQpkPlfCKpClJSUpSQiohIzDOzUcDtwD3AHPwipvPNbPeo50EUmpY3s+34RVD18Kvw/wtMAn5biWFX/khpJHsfyd4buvYHit3Q1TlXj303b50CTDaz/pE2PYDPgY7AxkLt1ptZfmnxaKRUgho+fDgA/fr1CzUOERGpuhL5mNEwktJybehazP3d8MlsNzObGrnWA5+UNi7vpq9KSiWo3bs0aEsoEREJSyInpZW6JVShDV2HFHmrtA1dixoAzN+dkBbxtXOuOvA98Dcz+3y/gxUpQsmoiIhI9FR2TWlpG7o2K+vmyFT+lcBLRd5aA/wev+HrpcBCYEJJtarOuYHOua+dc1/n5eWV7xOIiIiISIWLt83z++AT6RGFL5rZQnwiuts051wb4C7gi6KdRE4+eBH89H2UYpUE89JL/mehAQMGlNFSREREyquyR0r3Z0PXwgYA75rZxjJbwnTgsPKFJ1KyUaNGMWrUqLDDEBERSUiVOlJqZjnOud0bur5d6K2zgHdLu9c5dxJwLH6LgyA64af1RSrE+PHjww5BREQkYYUxff8EMMI5N4M9G7q2AJ4HKLqhayEDgUVmNrFoh86524FlwHwgFT/NfzG+xlREREREYlylJ6VmNso51wi/oWtzYB77bui6F+dcHaA38GAJ3aYCjwGtgJ345PQCM/uogsOXKuzZZ58F4Kabbgo5EhERkcSjY0a1T6kEdN555wHw8ccfhxyJiIhUVYm8T6mSUiWlIiIiEicSOSmt7NX3IiIiIiL7UFIqEtDQoUMZOnRo2GGIiIgkJCWlIgFNmDCBCRMmhB2GiIhIQlJNqWpKRUREJE6oplREREREJIqUlIoENGTIEIYMGRJ2GCIiIgkpjBOdROLStGnTwg5BREQkYammVDWlIiIiEidUUyoiIiIiEkVKSkUCevTRR3n00UfDDkNERCQhqaZUJKA5c+aEHYKIiEjCUk2pakpFREQkTqimVEREREQkipSUigT00EMP8dBDD4UdhoiISEJSTalIQAsXLgw7BBERkYSlmlLVlIqIiEicUE2piIiIiEgUKSkVCejee+/l3nvvDTsMERGRhKSaUpGAVq5cGXYIIiIiCUs1paopFRERkTihmlIRERERkShSUioS0ODBgxk8eHDYYYiIiCQk1ZSKBLRhw4awQxAREUlYqilVTamIiIjECdWUioiIiIhEkZJSkYAGDRrEoEGDwg5DREQkIammVCSgnTt3hh2CiIhIwlJNqWpKRUREJE6oplREREREJIqUlIoEdPvtt3P77beHHYaIiEhCUlIqIiIiIqFTTalqSkVERCROqKZURERERCSKlJSKBHTzzTdz8803hx2GiIhImZxzNznnljrndjnnZjnnTiulbQ/nnBXz1aEyY9Y+pSIBpaWlhR2CiIhImZxzvYChwE3Al5HXj51zR5rZilJu7QhsLPT9+uhFua9ANaXOcbIZ0yshnkqnmlIRERGJF0FqSp1z04FvzWxAoWuLgHfMbHAx7XsAnwONzSyzgkMOLOj0/TTnmOsctzpHg6hGJCIiIiL7xTmXCpwAjCvy1jigaxm3f+2cW+Ocm+CcOz0qAZaiPNP3RwFPAv9wjveBYWZ8Hp2wKk/Dhg2ZOHFi2GFIHBgyZAgAgwYNCjkSERGpwpKdc18X+v5FM3ux0PfpQDVgbZH71gI9S+hzDfB7YCaQClwLTHDOdTezLyom7LIFTUqfAK4EWgE1gN5Ab+dYArwMDDcjIzohRtfGjRvp0aNH2GFIHPjkk08A9OdFRETClGdmJ1Zkh2a2EFhY6NI051wb4C6g0pLScu1T6hynAlcBlwFNIpcNyAdGA383Y05FBxlNqikVERGReFFWTWlk+j4LuMrM3i50/RngKDPrHvA59wG9zeyIA405qHJtCWXGl2bcDHQGJhV6Kxm4FJjuHL+pwPhEREREJCAzywFmAWcVeessYGo5uuqEn9avNOXaEso5zgJuBH5d6F4HzAbqAu2Av+NHTUUSSv/+/QF49dVXQ45ERESkVE8AI5xzM4Ap+NytBfA8gHPudQAz6xv5/nZgGTAfX1PaB7gYPzNeaQIlpc5xFzAQaLv7ElCATz7/ZcYXzlELWA0cHo1ARcLWunXrsEMQEREpk5mNcs41Au4BmgPzgPPNbHmkyUFFbkkFHsOvHdqJT04vMLOPKilkIPg+pQX42lEHbAVeAf5txrIi7X4ADjOjWsWHGh2qKRUREZF4EWSf0nhVnun7pcBTwMtmbC+hzRlAygFHJSIiIiJVStCk9BLgQzNKHVY14+cDD0kkNvXp0weAkSNHhhyJiIhI4gmalE4EWjtHlhm/HD/lHOlATWCLGVuiEJ9IzGjfvn3YIYiIiCSsoDWl7+JXYd1hxr8LXb8FGAq8b8blUYsyilRTKiIiIvEikWtKgyalq/Crtw4yY3Wh6y2AVcBqM+JyabKSUhEREYkXMZuUOtcCaIZfFJ+B2eoy7thH0On7xpHXzUWubynyvkjC6t27NwD/+c9/Qo5EREQkBjh3EjAAOBe/D2rh99YCnwDDMJsSpLugJzpti7yeXeT67u9LWo0vkjA6depEp06dwg5DREQkXM51xrlJwDTgt0BL/Ahp4a9mQF9gMs59gXOdy+w24PT9OKAnfmT0cWABcARwJ1APGG/GOcE/i7sJuAtfEjAfuN3Mviih7XDgumLe2mv42jnXHX+CQUfgZ+CfZvZ8WbFo+l5ERETiRUxM3ztXeP/6DPypUXPhl8Xw6cCxQDd8cgpQgFmpM/RBk9JLgXciAez1VuTa5Wa8H+xzuF7ASOAm4MvIa3/gSDNbUUz7ekBakctTgMlm1j/S5hD8aQWvAM8Cp0Zee5vZu6XFo6RURERE4kWMJKV5wCjgJWASJSWTzjmgO/5U0CswK3Uv+0BJqe+XIfiR0aKGmPHHQJ34+KYD35rZgELXFgHvmNngAPd3wyez3cxsauTaP4BLzeywQu2GAR3NrEtp/SkplaAuu8wfAfzuu6X+nCMiIhI1MZKUHorZTxV9T+ATncwY5ByjgIuApsBa/Ib6M4PH41KBE4AhRd4aB3QN2M0AYP7uhDSiS6SPwj4BrnPOpZhZbtAYRUrSpUupP9+IiIhUDeVNSAPeU55jRokkoIGT0GKkA9XwCW1ha/E1q6WKTOVfCRQdUW0GjC+mz+TIM9cU6WcgfiiZ1NTUgKFLVTdo0KCwQxAREUlYgZNS50gGzgfas2+NJ2Y8WIFxlaQPfseAEQfSiZm9CLwIfvq+AuISERERqRqcW1KO1oZZuyANAyWlztEEf9RoaecsBklKM4F8/PR/YU3xq7fKMgB418w2FrmeUUKfeexZCSZyQC666CIAPvzww5AjERERCVUb9l38XpzdC+IDCTpS+gDQoZT3Az3QzHKcc7OAs4C3C711FlDq6hHnN2g9Fri9mLenAZcUuXYW8LXqSaWinHnmmWGHICIiEitchXcYcEuoxfiseDh++yYD/gDcGvn1o2YMD/RAvyXUCPxWUFOAG4Hr8SvllzvnXgcws75F7hsG/MrMDi+mz91bQr0EvIDfF+tZ4CptCSUiIiKJIkZW3x9crvZmy4M0CzpS2jLyejc+KcWMp53jc+A7oFXwuGyUc64RcA9+8/x5wPm2J+CDit7jnKsD9KaEEgEzW+qcOx/4F/B7/Ob5t5WVkIqIiIhIOQVMMssr6EjpDqAGkALsxCezzSK/3gqsMts3mYwHGimVoM477zwAPv7445AjERGRqiomRkoLc64+cCZ+Rh1gKTABsy3l7SroSOkG/GhpPfyiolbAG8CuyPsNyvtgkXhz4YUXhh2CiIhI7HDuHuBPQM0i72Th3COYPVyu7gKOlH4KnAGcjK8lvYa9Fzd9aUb38jw4VmikVEREROJFzIyUOvckfm0RFL/oyYChmBV3GmjxXQZMSq8ETsePjmbgFyg1jry9HjjXjNlBHxpLlJSKiIhIvIiJpNS5rvgj3w2fkP4I/IDfS74DcGikpQHdMPsqSLeBpu/N+C/w3z2xcBg+Sc0DppixOdinEIlfPXv6Q8fGjy96eJiIiEiVMiDyugXoj9novd517jLgZaAO/gTNiklKnaM68H3k2wvM+MGMrcDoUm4TSTi9evUKOwQREZFY0Bk/CvqnfRJSALN3ca4xfnvOzkE7DTp9vxmf7aaZkRO083ig6XsRERGJFzEyfb8Rv/i9OWbrSmjTBF/yuQmzRkG6TQr4+N3zlccGbC8iIiIiiak2QIkJ6d7v1QnaadAtoZ4EugNvOcdfgDn4PUoLPZsVQR8qEo969OgBwMSJE0ONQ0REJGTJgOHcvQHaVitPp0FMxtcONATeLOZ9K0dfInGpX79+YYcgIiISS+6ryM6C1pQWlNHEzIJnwrFENaUiIiISL2KkprSsvLAwwyxQjhh0dPO1cjxcJCHl5uYCkJKSEnIkIiIioXqQvQ9RqhCBRkoTmUZKJSjVlIqISNhiYqQ0SlQHKhLQDTfcEHYIIiIi4XNuFvAe8D5m35fVPHC3AWtKXymjiZlxfcWEVLk0UipFmRnrt2WzJHMHS9bvYGnmdpas30HG1l30aN+Ya04+mBb108IOU0REqqCYGCl1bi3+uHkDFuMT1A+CHidaYrflWOhUUkOHFjpJnFqzZSczl21iyfrtLP0lCd3B9uy8X9pUT07ikPRa1KyWzzfLN1ItNY2zj2xK3y5tOKVtQ5xzIX4CERGpSmIkKXVAN+BS4GKgDT5PzAA+iHx9hll+ubrV6nslpVXNkvXbGTs/g0/mr2Xuys0AOAct6qXRtnEt2qbXom3j2hySXou2jWvRol4aSUmOHj16kJ1XwEV/eYFRM1eyOSuX9k3r0LfrwVxyXEtqpqoaRkREoismktKinDuWPQnq0fgEdQvwP+B94BPMssrsJmBSenCRS8lAW+CvwHHAr82YVJ74Y4WS0sRnZsz/eSufzM9g7LwMFq3bDsAxrepxTsdmdD+8MYc2qU2NlNJ/rho1ahQAvXr1YlduPh/O/ZnXpi5j/s9bqVMjmStPbM21pxxMm/TY+rdCREQSR0wmpYU51xafoF4CnIw/PTQLs9pl3nogq++dozaQCXxgRu/97ihESkoTU36B8fWyjXwyfy2fzM9g9eadJDk46ZCGnNOxGWd3bEbLCqgLNTO+WbGJ16Yu56Pv1pBXYPRo35jenVvTo32TMhNdERGR8oj5pLQw55riR08vxuy8MpsfYFJaH1gDZJtRf787CpGS0sSxcUcOk39cz8SF65i8KJONO3JIrZbEaYelc07HZpx5RBMa1a6+3/1v2bIFgHr16hX7/rqtu3hrxkremL6cdduyqZlajTM6NOH8o5vTo31jTe+LiMgBi6uktJwOZPV9DXyRa2tgnRnNKji2SqGkNH4VFBjfrt7CxIXrmLhwPXNXbcYMGtZKpfvhjTmjQxNO79CE2tUrJhkMuk9pXn4B05du5KPv1vDJ/Awyt+dQIyWJ09s34byjm3PGAcZkZuzKLWDrrly27Mxl685ctu7KZevOPLbuyuWolvU4/qAG+92/iIjErrhJSp1rBKwHCjAL9D+9A119v3vZ8XAzfhs0zliipDS+bNqRw+RF65m4cD2Tf1zPhh05OAfHtqpPj/aN6dG+Cce0rEdSUsWviH/vvfcAuPTSSwPfk19gzFzmE9SP52Wwfls2qclJdD+8Mecf3YxT2jYiKyefzVm5bNmZw+asXDZl5bIlK4fNO3PZnJXL5p3++6278n5JQHPzS/576xzccOohDDqnPdWTVT4gIpJI4jApDXzM6IGuvs8G3gJuN2Nr0DhjiZLS+PHZD2u5ccQ35OQX0KBmCt0P90noaYelH9C0fGUpKDBmrdjkE9TvMsjYuqvEts5BvbQU6qWlUD8thbppKdSvmUrdGsnUTUuhbo0U6qYlU++XX6dQt0YyaanVeObznxj51Qo6NKvDv3p14ojmdSvxU4qISDTFRFLq3E0BWtUC/kEUktKiq+/B15FmBHlILFNSGh9+WreNi5+ZysGNavK3i4/imFb1qRaF0dDSZGZmApCenn7AfRUUGLNXbub7NVupWyOZ+jVTqZ+WQv2aKdRPS6VOjeQDGu39/Id13PXOt2zdmcugcw7nhlPbRmX0WEREKleMJKWl7V+/V0sqOilNZEpKY9+WrFwufnYK23blMvqWUytk1fz+CFpTGis2bM9m8HvfMe77tZzStiFDrjiWVg1qhh2WiIgcgBhLSoOMdlT4SOm5wEnAbDPGFLp+EdAJmGHG2CAPjDVKSmNbfoHRf/hMpi3O5M0Bp9C5TcPQYhkzxv/Rv/DCC0OLobzMjLdnreKBD+eT5BwPXtyRizu11ClUIiJxKkaS0l1ACvACsLaEVjWBu4hCUjoVvwHqeWaMK3T9DGA8MM2MbkEeGGuUlMa2hz9awIuTl/DIpUdz1UkHhR1O3Fq5MYs7Rs3h6+WbuODo5vz9kqOoXzM17LBERKScgialztd93gU0B+YDt5vZFwHuOxWYCPxgZkeV0OgroDPQG7O3S2hT7oVOSUEaAR0ir9OKXJ8ReT0iYD8igb0/exUvTl5C3y4Hx0RCmpGRQUZGfJZRt25Yk1G/68Ifz23PuO8zOOfJyXy+cB1VvXxHRCQROed6AUOBh/Enb04FPnbOlfo/U+dcA+B1YEIZj5iOn7o/+cCjLfT8gCOlu4dpW5mxptD15sBqIMeMGhUZWGXRSGlsmrtyM1e8MI3jD6rPiOtPJqVa0J+foifeakpLMm/1Fm4fNYef1m2nbeNaXHpcSy4+rqXqTUVE4kCQkVLn3HTgWzMbUOjaIuAdMxtcyn3vAXPxCeflpYyUpgMtgc2YLS+pM8AnwSW1KXpLwKR0aaTj58y4pdD1p4CbgWVmtA3ywFijpDT2rNu6iwuf/pLkpCTG3HoqDWvFxjTz2LG+bPrcc88NOZIDtys3nw9mr+a92auZsXQjACcf0pBLj2/JeUc3p26NlKg928zYuiuPtJRqpCaH/8OGiEg8KSspdc6lAlnAVVZoat059wxwlJl1L+G+m4BrgF8Bf6W0pDRKgialLwHX41daLQYWAu2BdpEmL5sxMFpBRlPr1q1txIgRYYchEQYsWb+DXbn5tGtcmxopSlqiLTe/gE1ZfqP+7Lx8kpyjTo1kGtRMpXaN5EBLK4PIysn/5eSp7Lx8AJKco1pS5KvQr5MKfV+/ZgrJ2s5KRASA008/PQf4rtClF83sxd3fOOda4Gexu5vZ5ELX7wWuMbP2Rft0zh2NXyN0ipktdc7dTwhJadCzDh8FeuE3Qm3HnmTUAdsj78eljRs3/jItK+EyM/74zre8PWsHz13TmXOPbh52SHtZuXIlAK1btw45kugwM+as3Mz7s1czbO7PbMrKplEt48JjW3Bimwa0a1ybQ9JrUSMl2ClR2Xn5TF28gXHz1/Lp92vJ3J5NcpKjS7vGdG2XTn5BAZt37jkedUvktKotha6ZQbvGqbxzY1caxMiIuYhIyPLM7MSK6sw5Vx0YBQwys6UV1e9+xRJ0oYNznAK8zN6Lmr4HbjDjqyjEVik0fR87Xp2ylAfGfM9tZxzKnWfv84Nc6BKlpjSInLwCJi5cx/uzVzNhwTpy8v2hbs5By/pptGtcm7aNa/3yemjj2jSuU51t2Xl8/sM6xn2/lok/rGNHTj61UqvRo0MTzj6yKT3aN6FeWrDSgIIC46slG+g3fCYdW9TlzRtOIS1Vx6aKSNVW0dP3zrk2wFIgv9DlJPzAYz5wvpmNoxKUe/N852gHNAXWmrE4KlFVIiWlsWHKT5n0fWUGZ3ZowvN9TojJ04fGjx8PQM+ePUOOpHLtzMlnSeZ2lqzfweL1e7/uzN3zb1id6snsyssnN99Ir53KWUc25eyOzejarhHVk/c/mRw7bw03vfENp7dvwgvXnkByDCx6ExEJSzkWOs01s4GFrv0IvFt0oZNzLgVfklnYTcBZwCXAMjPbXiHBl0EnOikpDd3yDTu46OkpNOob/nkAACAASURBVK1bnfdu6kbt6kGrSiRMBQVGxtZdhZLU7dRIrcbZRzalU+sGFXoM7MivlnPPB/O48sRW/OOyY7T5v4hUWQGT0l7ACHxyOQW4Eb82qKOZLXfOvQ5gZn1LuP9+YrWm1DneAHoD95vxUKHrfwXuB94049qoRCgJzcy4651vAXip74kxnZAuWbIEgLZt43KjiQqXlORoUT+NFvXTOPWw9Kg+q88pB7NuWzb/nrCIxnWqc9c5Hcq+SUSkijKzUc5vXn8PfvP8efhp+N1bM4W/+Xcxyrsl1KFmLC10vQ2wBFhuxiHRCjKaNFIarjFzf+bWt2bz8CVHc/XJMfl35BdVqaY0FpkZf35/Hm/NWMH9Fx5Jv25x+U+OiMgBiYljRqMk6LDU7mXQRY+z2X3eabOKCUeqkp05+Tzy0QI6tqhLr86xv6L9gQceCDuEKs05x0O/6Ujm9mwe+N/3pNepzq+PaRF2WCIiUhznPov8aibwV8xyyrol6IqBXZHXLkWudynyvkhgz01azM9bdnHfhR0rtP4wWrp370737sXuOSyVJLlaEk9ddRwnHtyAO0fNZerizLBDEhGR4vUAugODgOk4V+aR9EGT0u/wWwMMd44+znGCc/QBXsXvd/5dqXeLFLFqUxYvTFrMhce24KRDGoYdTiALFy5k4cKFYYdR5dVIqcawvp1pk16Tga/PYv7PW8IOSUREiuciX8fiR0xLbxywpvR64CV8Alr0YQYMMOOVcocaA1RTGo6b3pjFZz+s47P/60GL+mlhhxOIakpjy5otO7ns2ankFhjv/b4rrRvWDDskEZGoi5uaUucOjvwqHTgXOBez00q9pRyb578NXFbMW2+b0as8ccYSJaWVb+riTK5+aTp3nnU4t515WNjhBDZ16lQAunbtGnIksttP67Zx+fPTaFAzlbdv7EJqchJbsvypUFt2+qNTf/n1zhy27swlO6+AY1vVp0u7RhzWpLa2lxKRuBI3Sel+KNc+pc5xJXAhkc3zgQ/NeLv0u2KbktLKlZdfwK+f+pLt2XmMv7N74CMrRUoya/kmrhn2FbtyC0ptl1otiXo1U3DAum3ZADSqlcopbRtxSrtGdGnbkHaNlaSKSGxTUlpaB47awGVmvFYxIVUuJaWVa8S0Zfx19Hyeu+Z4zouxs+3LMm/ePACOOqpS9xKWAL5ZsYnPf1hHvbQU6qalUD8thXppKdSrmUL9tFTqpaVQIyXpl4Rz5cYspi3ZwFeLNzBtyQbWbPFrNdNrV+eUtg3p0q4Rp7RtRHqt6uQWFJCXb+TmF5BXYOTlF5CT76/lFRSQm28kJzka1a5Oo9qp1KmerMRWRKImJpLSPSvrgzDMzgzU7f4kpc6RhK8PuBY/clrDLPD2UjFFSWnl2ZyVQ48hEzmiWV3eHHBy3P2PWzWlicnMWLExi2mRBHXa4g2/jKTuj9RqSTSqnUqj2qmk165Oo1rVSY9836hWdRrUSqF+zVQa1EylQc0U6tZIicljdUUkNsVIUlrAvuuMim2JT0oDTYuWd/q+Mz4R7YUvXP3lgWbE5TysktLKc+/oeYz8ajkf/eE0OjSrG3Y45TZzpl842Llz55AjkWgyM5Zm7mDG0o3syMkntZojuVoSyUmOlGpJJFdzJCclkRK5npLkyMkvYOOOHDZszyFzRzYbtuewYXs2G3bkkLktm8wdOeTkFV9ekOSgXloKDWqmUr/m7tdU6tRIplb1atRMTaZmajVqpSaTllptr2s1U5NpXKc69dJSKvl3SUTCEkNJaVCBk9IyRzed4xCgD3ANsHtVSuEf63cCH5QjOKmCfsjYysivlnPNyQfHZUIKSkarCuccbRvXpm3j2hXWp5mxPTuPDdtz2JSVw+asXDZl5bApK5fNWTl7/XrNll0sWLOVbdl5ZOXkk19Q+sBBSjXHBUc3p2/XNhzXun7czUCISFw6PRqdljhS6hw34pPRwhvmF/3XzoB6ZmyPRnCVQSOl0WdmXP3SdL5fs5WJg3rQoFZq2CHtlzlz5gDQqVOnkCORqsLMyMkvICs7n6zcfLKy89iRk09WTt4v175Zvol3Z61iW3YeR7esR98uB3PhsS20iFAkQcXESGmUlJaU7q4X2J2I5gDjgXeBxcBE4njafjclpdH38Xdr+P0b3/DgbzrSt0ubsMPZb6oplVi1PTuP92ev5vWpy1i0bjsNaqbQq/NB9DnlIFo10P6tIokk5pJSPz1zPNAmcmUpZt/sV1cBklKAV4C7zNgcea8j/hQnJaVSql25+Zz5+CTq1Ejmf7eeSnK1oIeIxR6NlEqsMzOmLdnA61OXM+77DAB6HtGU67q2oWu7RpraF0kAMZWUOtcHeARoUeSd1cDdmL1Zru4CJqUA64H38SOlmcA3KCmVMvx7wiKe+PRH3hxwMl3bpZd9g4hUiNWbd/Lm9OW8NWMlG3fkcGiT2gy54lg6ta4fdmgicgBiJil17i7g0d3fFdPCgD9i9njgLktJSh8GrgYOKvIA8IubaqKkVErx8+adnPH4RE5v34Tn+pwQdjgHTKvvJR7tys3n/327hn+N/5HM7dk8ddXxnHVk07DDEpH9FBNJqXNHAXPYc7b9DnxpZxLQFp8jAuQDx2L2fZBuS5xLNePPZrQBegDDgC2FHl6TSILqHKuc+yVTFgEgv8B4cMz3mMGfzz8i7HAqxF133cVdd90Vdhgi5VIjpRqXndCKD27uRvumdfjdiK8ZMW1Z2GGJSHz7PT6HzAbuAOpj1gmzY4D6wF34tUhJkbaBlFngZ8ZkMwbijxa9AhgN5LInQW0ReXhgzrmbnHNLnXO7nHOznHOnldE+1Tn3YOSebOfcCufcbYXe7+ecs2K+apQnLqkYGVt2cfVLXzF2fga3nXkYrRsmxkKLp59+mqeffjrsMET2S3rt6rw18BROb9+Ev46ez6Mf/0BBGdtNiYiUoCt+cPJ+zIZilv/LO2Z5kSn7+/F5Yregne7viU4NgN74vUu7Uo5pfOdcL2AkcBPwZeS1P3Ckma0o4Z73gFbAX4BF+AQ5zcwmRt7vBzwDtCt8n5lllBWPpu8r1oQFaxn09lyy8wp46DdHcdkJrcIOSUQKycsv4L4P5/PG9BX8plML/nn5MVRPjusqLJEqJUam79cDDYGDMVtVQptWwApgA2aNA3W7P0np3s+kLXCNGQ8Fa++mA9+a2YBC1xYB75jZ4GLanw28DbQzs8wS+uwHPG1m5d7tWklpxcjOy+cfHy/klSlLObJ5XZ66+jjaVeDm47Fg6tSpAHTt2jXkSEQOjJnx3KTF/HPsQk5p25AXrj1Rp0KJxIkYSUqz8QcwJVPy4iSHrynNxax6oG4PNCktD+dcKpAFXGVmbxe6/gxwlJl1L+aeZ4HDgRlAX/wiq4+BP5vZ9kibfsDLwCqgGr749q9mNrusmJSUHrhlmTu49a3ZfLd6C/26tuHu8zok5Mbd2qdUEs37s1fxx3e+5ZD0Wrza/yRa1k8LOyQRKUOMJKW7d2jqX0bL4VTkMaMVLB2fNK4tcn0t0LOEe9oCp+KLaS/DF9A+ha9lvTzSZiHwW2AuUAf4AzDFOXesmS2qyA8gexs9ZzV/fu87kqsl8eK1J3B2x2ZhhxQ1L7zwQtghiFSoS45rRdM6NfjdiFlc+uwUXu13Eke2iM9jgEUkFK9WZGeVPVLaAr+hanczm1zo+r3ANWbWvph7xgGnAc3MbEvk2tnAJ5FrRRNcnHO7R0s/N7Pbinl/IDAQIDU19YTs7OyK+HhVSlZOHveNns/bs1bRuU0DhvY+jhYaZRGJSz9kbKXfKzPZnp3Hc32O57TDApV/iUgIYmikNKiYHSnNxNcXFN0krylQ0qKkNcDq3QlpxILI60HsO+qKmeU7574GDiuuQzN7EXgR/PR94OgFgAVrtnLLm9+wJHMHt55xKH8487C4PqkpqEmTJgHQvfs+VSYica1Ds7q8f3NX+r86k/6vzuQvFxzBtaccXCX+XovIfnktGp1W6kgp/LLQaa6ZDSx07Ufg3RIWOg0EngSaFKohPRMYDzQ1s3XF3OOAryPP+W1p8aimtHxmLd/EVS99Rf20FJ7s1Ymuh1adU5pUUyqJbuuuXG57azYTF66nQ7M63HdhR7q0axR2WCJSSEyMlEZJGElpL2AEfiuoKcCNwPVARzNb7px7HcDM+kba18aPjH6F3/OqPvACsMDMroi0uS/y/iKgLnAbcC3QzcxmlBaPktLgCgqMi575kg3bcxhz66mk1w60mC5hLFmyBIC2bduGHIlI9JgZH8/L4O//bwGrN+/k/KOb8efzj6BVg8TYb1gk3iVyUlri9L1z/Ko8HZkxuexWYGajnHONgHuA5sA84HwzWx5pclCR9tudcz3xi5tmApuAD4C7CzWrj5+Ob4Y/eWo28KuyElIpn3dmrWLe6q0M7d2pyiWkoGRUqgbnHOcf3ZwzOjThxclLeHbiT0xYsI7f/aotv+9xKGmpibezhoiUk3PXASMwC1Zb6lwScC1mpU77lzhS6hy7l/sHYWaVXp9aITRSGsy2XbmcPmQSBzVM493fd41sP1a1jB8/HoCePUvaKEIk8fy8eSePfPwDY+b+TPN6NRh8/hFceEzzKvlvgEgsiImRUr/QaRl+9f0ozH4sod3h+MOW+uE32i/1p9qyktKgAp/oFGuUlAbz6Mc/8PykxYy+uRvHtq4fdjihUE2pVGUzlm7kgTHzmf/zVk5q05B7LzySo1rWCzsskSonRpLStUBj9gxeZgLfRl7BbwF6TOQV/HGj6zArdd/I0pLSontPnY2fHp+C36S+Ff4800zg/5lR6oKiWKWktGzLN+zgrCcm8+tjm/PElZ3CDic0K1euBKB169YhRyISjvwC479fr+SxTxayKSuH3p0P4o/ntKdBrdSwQxOpMmIkKa0NDMavD9r902nRhHL3dMo2/FHwj2C2rdRugyx0co5rgNeBXma8U+j6lcBbwEAzXg7wMWKOktKy/W7E13yxKJPPB/Wgad0aYYcjIiHbsjOXoeMX8dq0ZdRLS2HweR24/IRWmtIXqQQxkZTu5lwa0As4B+iMH7wEv13nTPye8v/FLFCiFTQpXYA/6rOeGdsLXa8NbAV+NKNDOT5GzFBSWrqpizO5+qXpDDr7cG45o9htX6uMsWPHAnDuueeGHIlIbFiwZiv3fDCPWcs3cVKbhvztkqM4vGmdsMMSSWgxlZRWsKBJ6U4gFRhsxj8LXf8T8AiQbUZcHuejpLRk+QXGBf/+gm278pjwf90T8jz78lBNqRyQHTvg5ZfhnHOg/T6H18WtggLj7VkreeTjH9i+K48bTmvLbWceSs3UuFz7KhLzlJQ65gJHRb7NxJ+y1Jw9BazzzDg2KhFGmZLSkr0xfTl/eX8ez1x9PBcc0zzscEKXkeEPHWvWrNQ6bZF9mcG118Ibb/jvzz0XbrvNJ6hJlXBq0pIl8PDD8LvfQefOUXnEhu3ZPPLxD7wzaxUt66fxwEUd6Xlk0cP7RORAxWRS6mt3TgIOBvat8zN7PVA3AZPSXwPvA9XYu5DVAQXAb8z4f0EeGGuUlBZvy85cTh8ykUMb12bU705RrZjIgXjpJRg4EP74R6hTB559FtasgcMPh1tvheuu89ej4dNPoVcv2LQJUlLgscd8Qhylv9Mzlm7kng++48e12zn7yKbcd1FHWtaPy4k0kZgUc0mpc4cCHwIlTQEZZoGmTgL9iG7G/4Bzgen4pNRFXr8Czo7XhFRK9tSERWzKyuHeC49UQhoxZswYxowZE3YYEm/mzvWJ51ln+dHKe+6BZcvgzTehYUP/XsuWcPvt8NNPFfdcM3j8cT8q26IFzJgB553nn3PppT5JjYKTDmnI/249jT+d24HJi9bT8/FJvDBpMdl5+VF5noiE7hmgAz43LOkrkHIfM+ocNYEGwCYzssp1cwzSSOm+lqzfztn/msxlx7fiH5cfE3Y4MUM1pVJuW7fCiSf6etLZs6FJk33bzJgB//43/Pe/kJcHF1zgRzJ79tz/0cysLBgwwCe+l14Kr70GtWv7RPXJJ+FPf/KJ6n/+A6eccmCfsRSrNmVx/4ffM37BWprWrc4Np7blqpMPonZ11ZuK7K8YHCndDNQBfgA+AnZQdHsoswcCdVWepNQ5koHjgUZmfBz4xhimpHRf1w+fyfSlG/l8UA8a16l6x4mWJDPT7wmcnp5eRks5IDt3wvjxMHo0fP65H9375z+hZpydvW4GvXvDu+/6z3HaaaW3X7MGnn/ef61bBx07wv/9H1x9NVQvx9/D5cvhkktgzhx46CH485/3TW5nzPBT+qtWwSOPwJ13RrW29YtF63lu4mKmLt5A3RrJ9O3Shn7d2lTJ44pFDlQMJqUZ+I30W2C29oC6CpqUOscVwNP4xU1mRrJzTAAOAW40Y9yBBBIWJaV7m/zjevq+MoO7z+vAjd3bhR2OVBXr18P//ucT0XHjfGJat65flPPZZ772cuRIP+oYL559Fm6+2Sd9d98d/L7sbBg1yk+9f/stNGvmR05vvBEaNCj93okT4YorICfHL6r69a9Lbrt5M/z2t/D++3509rXXoFGj4HHuhzkrN/P8xMV88n0GqdWS6NW5NQNOa0vrhnH2A4dIiGIwKX0GuBE4FrN5B9RVwIVOpwGfs6c2wMyo5hx3AkOAYWYMPJBAwqKkdI+8/ALOG/oFOfkFjLvjV1RPrtpbQBX13nvvAXDppZeGHEmCWLTIJ6GjR8PUqVBQAK1awW9+47+6d4fUVJ+UXncdZGTAAw/4qedqMf5nc9Ys6NrVT8GPGbN/o5BmfsR4yBCfqNeqBddf72tCDzlk37ZPPw133AGHHup/T4NsO7X7vkGDfGnBf/4D3bqVP9ZyWrx+Oy9OWsJ7s1dRYHDhMc25sUc7OjSrG/Vni8S7GExKLwKGA5vxOeFCIHevNmaTA3UVMCn9CL/Q6Qd8MevupLQ9sACYb8bRwT9B7FBSusdrU5dx34fzeeHaEzino7Y9Kko1pRXk66+hXz+YP99/f+yxexLR444rvo5y0yb4/e/9CGK3bjBixL6JWazYvBmOP97Xh86eXTGjj99+60dO33zTJ++XX+4Tyc6dYdcu/3szfDhceKH/valXzjPpZ82CK6/0U/9/+5vfJaAStqrK2LKLl79cwpvTV7AjJ5/T2zfmzrPac3SrcsYvUoXEYFJawL5HjBYWePV90KR0I/5s08OBRexJSlOAbGCzGQ2DPDDWKCn1Nmfl0GPIRI5sXpc3bjhZK+6LsWXLFgDqlfd/+LLH+vU+YXPOJ1UXXQRt2gS718wnZTfd5H/91FPQt2/UtjbaL2Zw2WV+dHTyZOjSpWL7X7XKf+7nn/eLqH71K7+o6euv4d574b779j+Z3LLFL456+22/Yv/116Fx44qNv6RHZ+Uy4qtlvDplGVt35XLvhR3pc/JB+ndIpBgxmpSWxjALNL0VNCnNBpLxG6JmsycpbYw/31QnOsW5h/73Pa9OWcpHfzhNU2gSHfn5frP4L7/00/XHH79//Sxf7pPRyZP9iOHzz0e9FjKwoUP99PqQIX6RUrRs3epPh3rySdiwwSeQFVFWYuZ/P++4w/+evvWWT3wryZasXG4fNZvPF67n8hNa8beLj6ryJ8mJFBWDSel9ZbapyNX3zrEEv0v/6cBE9iSl/wDuAhabEZcHoysphU07cuj66Gecd3QznriyU9jhxKxRo0YB0KtXr5AjiVN/+Yvfp/Pll/0CmwORn++ns++5x4/mDR/u9wEN0/TpfoX9+ef7xUOVMcqXl+e3m6ro0fs5c/x0/uLFvo538OBKq+MtKDCGTljE0AmLOKplXZ675gQthBIpJOaS0goUNCl9Dvgdvoi1Pr52YBH8kog+Z8Yt0QoympSU+o3yH//0Rz65/Ve0bxalU2USgGpKD8CHH/qa0Rtu8KcbVZTZs+Gaa2DBAujTx9dUnnEGVPa2XRs3+nrYpCT45puyV8nHg23b/LGkb73lF2yNHAlNK+/Y0AkL1nL7qDlUS3L8u/dx/OrwyiklEIl1SkodLYE5QCP2PWZ0A9DJjNVRiTDKqnpSuis3n1P/8RlHtazH8P4nhR1OTMvK8mdF1Iy3/TLD9tNPfiunQw/1U/c19j0W+YDs3On34nzlFT+tDT5B7NnTf516anT3ODXzCffYsTBlStTOlg+FGQwb5rekql/fbzN1xhmV9vhlmTu4ceQsFq7dxqCz23NTj3aqM5UqL2hS6py7CT+b3RyYD9xuZl+U0LY78Aj+qNCawHJgmJkNKaHzz/C1omdGfl0a3y6A8uxTejjwb+BMoBqQD0wAbjfjh0CdxKCqnpS+OX0Ff37/O94ccDJd22lTeKlgWVl+sc+qVX6Fd9BFTfsjL88v+JkwwW+lNGUK5Ob6baW6dfMJ6plnwgknQHIFnShkBvffDw8+6E9luvXWiuk31nz3nZ/OX7jQL6j6618rbTo/KyePu9/9jg/n/szZRzbl8SuPpU6NlEp5tkgsCpKUOud6ASOBm4AvI6/9gSPNbEUx7U/Az35/B2QB3YAXgLvM7NliHlAAFGCWXMbqe38sfUUudNo7DmoADYGNZuwq180xqConpQUFRs8nJlGrejIf3tJNIxBlGDlyJAB9+vQJOZI4Yea3fhoxAj76yK/orkw7dviR2fHj/decOf56errfKP788w+sfzO/ddKQIX4f1Vdfja2dACra9u3+MIDXX4fTT/ejps2bV8qjzYxXpizj4Y8WcHCjmrzQ5wQOa6pSI6maAial04FvzWxAoWuLgHfMbHDA57wHZJvZVcW86RNRs2oVufo+0N4hzlHPOQ5yjnQzdpnxsxm7nCM9cl175MShTxesZUnmDgb+qq0S0gCGDRvGsGHDwg4jfrzwgk9g7ruv8hNS8JvNn3MOPPaYrz1dt85vDt+qla89feqp/e87P99vnzRkiE/UXnklsRNSgNq1fTL/6qvw1VfQqRN8+mmlPNo5x/WnHsKbN5zM1p15/OaZKXw492fKO6giUhU451KBE2CfkzbHAV0D9nFcpO2kYhuYJf2SaPpfl/YVeFolaE3pu8DFwB1m/LvQ9VuAocD7Zlwe9KGxpHXr1jZixIiwwwjF4vU7yMsv0OKmgPLy8gBIrqip3wRWZ8ECjvvDH9h0/PF89/DDlbIRe1DVdu7kiL/9jfSpU1l98cX8dMstWDmmol1ODkc8/DBNJk1i2bXXsqx//8RPSIuouWwZHe+/n5orV7Jg8GDW9exZac/OyzeWb8wiKyeP6snVaFQ7lQY1U0iKwf8GLj+/XH+2RII4/fTTc/DT7Lu9aGYv7v7GOdcCWA10t0InKTnn7gWuMbMSj3tzzq3Cn2OfDDxgZg9WdPylCZqUrsIXyh5UeEGTc7QAVgGrzWgdtSijqKpO389avpHLnpvG/RceSb9uMXoyjsSnzEy/B2m1ar6OtGEMnquRn++PK338cT+KO2oU1A2wP++OHX4/0HHj/L133hn9WGPV9u3w61/DF1/4EdRKLGvJyStg9JzVjJy+grkrN5OWUo2Lj2vBNScfzFEtY2Ti7ttvfQ3zrbf6OlyRClLW9P0BJqWHALWBU4B/AH8ws7JH7pxLAk4GDgKq7/O+2etl9kH5N8+va8aOQtdrAduAHDMqeElt5aiqSenA179mxrKNTL37DGqmauQviOHDhwPQr1+/UOOIafn5Psn74osD2yC/srz0kj8hqkMH+N//4OCDS267aRNccIHfj/Sllw58r9VEsGOHP5Xr88/9tP5111V6CN+t2sLIr5Yzeu5qduUWcNxB9elz8sFccEzz8DbeX7nSL/D7+Wc/ij5tGpyk3U2kYgRISlPxi5WuMrO3C11/BjjKzLoHfM49QH8za1dGwyOA0UBJ7QIfMxp0Tm1b5PXsItd3f789YD8SA5as386nC9Zy7SkHKyEth+HDh/+SmEoJ7rvPLyp65pnYT0jB14WOHeuTiJNO8rWSxcnIgB49/Or+//5XCelutWr5I1V79oT+/f3BCJXs6Fb1+MflxzB9cE/u/fWRbNmZy/+9PZcuj0zg4Y8WsHxDJQ86bN7sF9Ft2+ZPHWvRwifru+J+XbDECTPLAWYBRU8UOQuYWo6ukihu1HNfzwKH4lfal/QVSNCR0nFAT2AL8DiwADgCuBOoB4w345ygD40lVXGkdPB73/HuN6uY8qczaFwnyJ83kTJs2OCnwG++Ga6/3u9tGU9++MGPgq5e7aeiC5/atWyZPy3q55/hgw/CPzkqFu3c6csaxo71C9wGDgwtFDNj2uINjJy+nE/mryW/wOjRvjH9ux3CaYemk5QUxdrTnBw/U/Dll/Dxx376ftw4v+Durrvgn/+M3rOlyijHllAj8FtBTQFuBK4HOprZcufc6wBm1jfS/lZgKbAw0sWvgH8Bz5rZ3WUEtBWoBXwAjAVy9mlj9lqgzxYwKb0UeId996FykWuXm/F+kAfGmqqWlK7flk23f3zGZce35JFLjwk7HIlXO3f6fUA//dSPjM6e7bdIOvlkmDix4jfIrwyZmXDJJT6heOghfyzqggU+Cc3K8ttadekSdpSxa9cuuOwy//v0zDO+LCJka7fu4s3pK3hj+goyt2fTrnEt+nVtw6XHt6JW9QqeJSoogL59/VZZr78O1167573f/c6XfHz5JXQNtPi5ePPn+z1x09KgWTO/JVfz5nv/unbt0vsw839/t2/f89W0aaWe1iUHppyb5/8RvyZoHnDH7hpT59xEADPrEfn+dmAA0AbIAxYDw4Dnzaz0LZ/8VlNtgfqYbSu1bVkxl2Pz/CH4kdGihpjxxwMJIkxVLSl9YtxCnvr8J8bf2Z12jcv4x0v28lLkeMwBAwaU0TLG5eT46erMTD/9WqeOX+RTp07Jm8rn5/vEc/een19+CdnZkJLiE7Xdpyd17lxxG9OHITvbH4U6cqRPUCdP9p9xQM+bEQAAIABJREFU3Dg4+uiwo4t92dlwxRV+Sn/oUH8SVAzIySvgo+/W8OqUpcxdtYU6NZK58sTWXNelDQc1qqDTvgYPhkcfhYcf9r8ubNs2/+cnNdXvl7s/J4wtXuxPJ9u50/99zcjwh0MUVavWnkQV9k4+t2/3dcBF/7+fluZ/kOjfv/xxSaWLuWNGnesPvAw8BDyMWfZ+d1Wefd6cozNwEdAUWAt8aMbM/X14LKhKSWlWTh5dH/2Mzm0a8lLfE8MOJ+70jGx7M378+JAjKcO2bbB8efFfK1bAmjX7/k9ptxo19iSou5PV6tV9LeWmTb7N0Uf70cOePeG008oemYk3ZvD3v/tTi9q08aPBhx4adlTxIyfHlz988AE88QTccUfYEe3lmxX/v737jo+qSv84/nkSeq/SEZCigor9J4qggnXtruCKiu7K2kUFxa4oinWxoYsN68oquIoFFZQi2MCCFBFRkBY6CSWQdn5/nIkMMWWSzMydTL7v12te087c+8zcSfLk3HPOs4mxM5fywY+ryXWO4/ZuxsVHtqPHXo3Lvl7z00/7nuF//tPfLmw7U6b4n5nrrvOfS2msXOkT0vxxqvvu67+nGzf6n+e0NH8dfjstzS/FVqdO8ZdatWD0aPj0Uz/29amnfGIrCSvhklIAs/8BpwLZwFp8b2s+R0mTpfI3U9kXH65MSelLs5Zy57vzeeuyIzikXQIu0yPlk5Pje/fee2/3x6tWhbZt/czy/Os994SmTf1p6S1bfM34LVsKv719u09E+/Txdc/ze2CS3TffQPv2vgKUlE52Npx3Howf78dRDh0adER/siZjB69+uYzXv/qdDduy6NysDucc3JpD2zWia8v6VKsS4Tzgd9/1P3cnnwxvv138mYIrr/RJ67Rp/h+6SKxbB716+VK9n33my+RGW24u3Hsv3H23X4nizTeha9fo70eiIuGSUrObgRH44Zz5wzr/eJZYlBk1oy5wMrAn/Hn5J+eI6wKr0VJZktKc3DyOeWQqTetUZ8IVRwYdjsTCsGHwwANwww3+NHp+Atq8eUItXi+VRHa2H1c5bhzcf7//fiagHdm5vDd3NWNn/ca8lRkA1KiawgGtG3Bou0Yc3K4hB7VtSP2aVf/84q+/9qsydOvmE8aSehi3boUDDvA9qT/8UHL79HT/j+CCBfDRR3D00WV7k5GaMgXOP9//M6rT+QkrAZPSVUBxvRXRTUpDp+0/wNe8L3yPjgpZtqKyJKXvzV3FVa9/xzMDDubEbpWkpyvKRo8eDcAVCTCB40/efRdOP92fPnzmmaCjEfFycvwp4ddf90tunX++P7Ufr0k1zvmJd3l5uyYDNWxYZAWutRk7mL1sE7OXbmL2so3MX5VBbp7DDLo0q8sh7RpyyJ6NOLR9I1ptWOXHU9et69fkjfQ9TZ/uez6vuqr4Urfbt/uZ/F98Ae+843ti4yEtzR8nnc5PWAmYlObPvj8L+Ajnyrz+WaRJ6UyguGmnTklp4nLOcfpTM9myI4fJ1/ciNZZLoiSxk046CYAPP/ww4EgK+PVXvyZox45+AlJFnPkuySs31ydfY8f63sHUVD8U5Pzz4YwzfFIXC99956spzZy5++NVq+5KUIu6btkSmjVjuzO+/32zT1SXbeLbZZvYujOHRtvTmfifG2mUtY3VH0yhfY8DSzcedfBgPxHs00/hmGP+/HxWlv9sJk2C//xn9yXK4kGn8xNaAialrwLnAW1wblW5NhVhUroFqAVMA8YD2yiwPJRzRLQGVaKpDEnpF0s2cN6zXzLizG6cf3gxFWuk4tmxwy8x89tv8O23fgykSKKaP98vmfT6637iXc2avof//PP9Wp5VCzlFXlrr18Ntt8GYMX488IgR0Lnz7hOA8q/zb69b9+ftmPnX5y+11KIFeS1asKZ2I1Jef40GixdwXv8RfNtyH9o3qc3xXZtxYtfmHNC6QclroW7f7k/j5+T4cqThiXluLvztb75Iw7PP+tUggqLT+QkpAZPSM4AxwGrgMWApu090grByp8VuKsKkdDnQEmjsHJtLF21iqwxJ6cUvfs3cFenMHHZscGX3JDYGDfJ/uCZO9HXIRSqCvDx/yvu113zytXEjNG4M557rewWPPLL0S4vl5PiF+2+/3SdRV1/tK4w1aFDya7OzYe3aXTPY8y+rVu1+Py3NJ42pqfDmm6ztcxKfLFjDpHlpfLFkAzl5jub1anBC12ac0LU5h7VvRJXUIsZzz5zpJzvlz9gHP9xg0CBffOLhh/348KClpfkk+bPPoH9/vxZxSoq/pKbuuh1+qVLFr9Cxxx5BR5+UEjApzePP69iHi7jMaKRJ6QhgGNDLOT6PKMgKItmT0p/XbOH4f03n+r6duea4TkGHU6E99thjAFx77bUBRxLy0kswcKBfE/G++4KORqRssrL8JJ7XXvNjozMz/bjPk0+GU0/14yrr1y9+G9On+yR07lw/Mejxx2Nzujk31/fEpqT41SvCpG/P5tNFPkGd9vM6dmTn0aBWVbo0q0uL+jVoVr8GLerVoHn9mjSvX4MW9WvQ9O5bSXn0Ub8Obp8+fpWCRx7xPb333BP9+MsqN9fHc++9/nYkOnXyZXsbaaWXaEvQpLQ4UZ/odCnwAD4Tfh5fhmq3VXud4+VIdphokj0pHfLmD7w/dzWzhh1Lw9rVgg6nQjvttNMAePfddwOOBPjxR99jcfjhfh3NirxgvUi+LVt8gjZxol/abMMG/90++mg47TSfpHbosKv9ihU+kXvjDb/axKOP+nKnZV1vNEoys3KZ9vM6pixcw7IN21mdkcma9J1k5e7+t7tWbhYfvDSY2jk7+LbnKZzw7ov81v9idjzyL9o3rZN4Z7YyM32BhLy8XZfc3N3v5+X5Smhnnukngn38sS8aIFGTgEnpiyW2cS6isR+RJqUlds06R4X8q5jMSWn69mwOGfEJ5x3WluGndws6HImWjAw45BC/vMy331aedUOlcsnN9T1tEyf6y4IF/vF99/XJac2afg3UvDy46Sa48cayVUqKE+ccG7dlsTp9B2npO0jL8NdVZ3/D1bdfRIrLY3y3Yxly8mCcpWAGbRrWouMeddirae3QdR067VGX+rWiMPY21l57DQYMgEsu8cMRAv5HIZkkXFIaRaVJJPWNqmA+WpBGdq7jnINbBx2KFObTT+HJJ/1EgtNOi2ySh3Pw97/7GfeffqqEVJJXaqofW3rkkb5855IluxLURx7xY0jPOsvfbtcu6GhLZGY0rlOdxnWq061V2HCEE7pA/c3w00+c8tAj7LNpJ0vWbeWXtVv/uP78l/Vk5fhe1hSD4/ZpxkVHtOPIjuWoQhVr558Pixb50/5duvh/GkRKEGlP6UUltdHs+8Rz0Qtf8+v6rUwfekzi/uKqQB5++GEAhgwZUv6Nbd0K++zjywc655egGTQILr3U3y7KqFG+TGGCVskRiYvNm2HNGp/sVAK5eY6VmzJZsm4rXy/dyLhvlrNxWxZ7Na3NRT3acdZBralTPQFPVjrnK3v997++uteZZwYdUVJI5p5SlRlN0qR007YsDh0xmX/07MCwk/YOOpykcPbZZwMwfvz48m9s6FA/u3b6dF9TfvRoP9kjNdX/4r7iCl8pJvyfiVmz/KLbf/kLTJig02EildSO7Fzen7ual75YytwV6dSpXoWzD2rFhT3asVfTOkGHt7vMTD/57IcfYMaM2JRJrWSUlCaxZE1Kx33zOzeN/5GJVx3Ffq1LmLkq8TVvHnTv7tf8e/bZXY//8otf0uaFF/wSOXvv7ZPTCy/0M5QPPBCqV4c5cyJb5kZEkt53v2/i5S+W8d7cVWTnOnp2asJFR7TjmL33SJxCKWvW+EmZWVm+NGtrDSkrDyWlgBkDgOuBLkDBkjGa6JRgLnj+K5Zt2M60ob116j6R5OX53s6FC/14q8aN/9wmM9Of7ho92v8Cr1ULWrWC33/3JQcPPDD+cYtIQlu3ZSfjvvmdV7/8nbSMHbRuWJOzDmrNSd2as3fzusH/HZg3zxf62Gsv32NaJ8F6dCuQSp+UmnEu8AZ+Bn5h32yVGU0gm7ZlcciIyQw6ugM3nahT99EycuRIAIYNG1b2jbz4op+N+vzz/rokc+b4hbXfesuPJx04sOz7FpGkl52bxycL1vDKF8v48rcNOAd7Nq7FiV2bc0K35nSPpOJUrEyaBKecsmsIUmqFTBsCp6TUmAb0BLbjy406YCPQGNgMbHaODkVvIXElY1L6xte/M2zCj7x39VG7z/KUcunfvz8Ab7zxRtk2sGGDn5ix995+LGlKEZVeRESiYN2WnUxe6Bf0n7VkPdm5jmb1qnNC1+acWFLFqVh58klf6OCGG/y4eim1hE9Kze7AL5hf6goQkSalm4B6wJHALEI9o2bcDlwFHOsc80u780SQjEnpBc9/xfKN2/lsiE7dJ5RLL/U9pd99B/vtF3Q0IlKJpGdm89lPa5k0L42pP69lR3YeDWtVpc8+zTi8Q2PqVK9C7eqp1KqWSq1qVXa7rlk1Nbq9q1df7ZPTMWP870UplQqQlPq17SOs4rTbSyNMSrOAVKAmkBl6uBpQHdgKfOYcx5V254kg2ZLSDVt3cth9U7isVweGnqBT9wlj1iy/3uKQIfDQQ0FHIyKVWH7FqY/mpzF54Rq27Mgp8TU1q6bSoFZVTujanP6HtWHv5vXKHkBOjl+b+ZNP/Cn94ypk+hAYJaXGeqAh/tT9GqAucDqQDkwDtjtHhRy1nGxJ6etf/c4tb//IB9f0ZN+W5filIX9yT6gW9e233166F+bk+GVQNm3yVWk0wF9EEkRWTh4rN2eyPSuHzKxctmXlkpmVw7aduWzPzmX7zpw/HluxKZMpC9eSlZtH9zYNOO+wNvxl/5bULssaqRkZ/h/1tDRYtiyhq3ElmmROSiP9Jq3CJ6V7AAuBw4B3wp7fWNodS2y8/+Mq2jepzT4t6gYdStJZtGhR2V74+OMwd64f2K+EVEQSSLUqKbRvEnl+s3FbFhO+XcEb3yznpvE/cs97Czn1gJacd1gb9mtVP/IhY/Xq+UmcPXvCq6/64iFS6UXaU/oScAFwLtAUeKpAkxHOUcruo8SQTD2l67fu5LARk7mid0eGnFA5Kp0kvBUrfOWmXr18eUSN8RWRJOCcY86yTfzn6+W8/+MqdmTnsW+Lepx3WBtOP7AV9WpEWDb54IP9+qU//qjfjxGqAD2lvgqoc6Wu9BlpUlobqANscY7tZgwD+gE5wNvAA86RW9qdJ4JkSkpf+2oZt749jw+v7ck+LXTqPiH89a/w3nv+tH379kFHIyISdemZ2bz7/Ur+8/VyFqzOoEbVFA5t14hOe9Slc7M6dGpWl07N6hSeqI4d6wuJTJ6ssaURSviktBxU0SmJktK/PfslaRk7mHJ9L826j4E77rgDgOHDh0f2gkmT4KST4N574dZbYxiZiEjwnHP8uDKdN2ev4IcVm1m8ZiuZ2bv6q1rUr0GnZnXpvEcdOocS1b0bVKNmx/ZwxBHwzjvFbF3yJXNSWuSYUjPalmZDzvF7+cORslq3ZSdf/rqBq47pqIQ0RpYvXx5548xMuPJKvy7pkCGxC0pEJEGYGfu3bsD+rX0Z5Lw8x4pNmfy8Zgs/r93C4jVb+XnNFl75dQM7c/IAaFirKhPPvZDWox+FJUt8xSeptIqb6LQUv0h+JFwJ29qNmV0BDAVaAPOBwc65GcW0rwbchh/X2hK/AsDDzrnHw9qcDdwD7AUsAW51zr0daUwV3aT5aeQ5OGX/lkGHkrRefPHFyBvffz/8+itMmeLr1YuIVDIpKUbbxrVo27gWffZt9sfjuXmO5Ru3s2jNFh76aBHnrjuAz1NSSXnqKXj00QAjlqCVVMrBSnGJiJn1Ax4D7gMOxC/G/6GZFdcz+wZwIjAI6AL8FZgbts0jgHHAa0D30PWbZnZ4pHFVdO/PXUXHPerQuZlmdwfu55/hgQfg/PPh2GODjkZEJKGkphjtmtTmhK7NmXBFDzoe2IWJnY9kx7+fJTdjS9DhSYCKHFNqRim6hcA5Lo5oh2ZfAXOdc5eGPbYYeMs5d3Mh7Y8H3gT2cs6tL2Kb44BGzrm+YY9NBtY5584rLp5kGFO6dssODr9vClcf24nr+3YOOpykdfPN/ut5//33F94gNxfefRduv93Pul+0CJo1K7ytiIgAkJObx9h/jeMfQ//GK+cP4Yzn7qNuJLP3K6lKOaY00iSzNEKn4Q8GCha8/RjoUcTLzgC+Aa43swvxFaU+BG5xzm0NtTkCeKLA6z7Cl0BNepPmpeEc/GX/FkGHktQ2bNhQ+BNbt/ryoaNG+VP27dr5dfeUkIqIlKhKagr/GHIe6198gB4fvsHZT57McxcfTtvGWlC/sinp9H20NcGXK11T4PE1QPMiXtMBOAo4ADgbn2ieCIwNa9O8NNs0s0FmNtvMZufklFxeLdG9P3c1nUKzGSV2xowZw5gxY3Y9sGIF3HQTtGkD11wDzZvDW2/BL7/AX/4SXKAiIhVQk1uGstfGFez1/SxOf+pzvvq1iI4AqRjMGmOWh1nEiVbESakZXcx41Iz3zfi0wGVK2SKOOEYH/M0595VzLr8H9GwzK1NXlHNujHPuEOfcIVWqlKE8WgJZm7GDr5du5BT1ksbPnDl+vGj79vDww3D88fDFFzBzJpx9NqSWurKaiIj89a/QvDmPpE2nYe1qDHj+K/77TSlWPZFEFfG8o4gyMjMOBqYChfWlG5HP0l8P5AIFk8lmQFoRr1kNrHTOpYc9tjB03RbfI5pWym0mjQ9Dp+5P2U9JaUw5x5CzzoLZs3l4xQqoW9f3jl59tT9dLyIi5VOtGlx+ObXuvJN3HnmEK2Zv48bxc/ll3VZuOnFvUlO03GHC8KsolaTU414j7Sa8pSwbL8g5l2Vmc4C++MlL+foC44t42Uzgr2ZWJ2wMaf5snmWh6y9C23iowDZnlTfmRPf+3NV0aVaXTjp1HzuTJ8OwYWTOmeNr1z/6KPz97752s4iIRM8//wkjRlD3uX/z4qjHuOe9BYyZ/iuL0rbQq3NTUswvNWVm/nbo2sxINSMlBQ5o3YAOTbUSTYw9SeQdkhGLtMzoamAP4Arg6VAgBwD3AnsD/Zzjh4h26JeEeiW0rZnAZcDfga7OuWVm9jKAc+7CUPs6+J7RL4G7gAbAv4GFzrm/htr0AKbj1zL9H3AmMBw4yjn3VXHxVOTZ92npOzhi5BSu69OZa47rFHQ4yWfOHBg2zCelbdvC8OH+tH0FH/IhIpLQLroIJkzw4/br1+eVL5dxz8QFZOXmRfTyFINzDm7NtX0606pBzRgHG38JMfveLA+fC0bSfe1wLqJxbZEmpVn4CUr1gQx8NbFUM/bAnyJ/2TkGRrJDvz27ArgRv3j+POA659z00HNT8TvoHda+C352/VHAJnziOcw5tyWszTn4JLkDuxbPn1BSLBU5KX1x5m/cPXEBk6/vRcc99F/hbpYtg7w8f2q9tBWuFi+G226D//4XGjf2ty+/XIvgi4jEw5w5cMgh8K9/weDBAOzIzmVHdi55DvKcI8853B+3ffUo52BnTi7/+Xo5r37pT6QO+L89ufKYvWhcJ3l+fydIUroDqIrvJCw40TxfLXyhpKgnpelAHaA6kA7UAPYFtgLLgc3O0SiSHSaaipyUnvP0LLbuzGHS4KODDiVxZGbC3Xf7CUi5uX5m/NFHQ69e/tKpU9FJ6urVvjf0ued8Anr99b5EaOg0/eDQL8dRo0bF692IiFRORx3lfyf//HOZJo+u3JzJY5N/5q05K6hZNZW/9+zApT3bJ8X6pwmSlH4JHAr0x7k3i2jTGFhHKZLSSGffrw1dN8KXHwX4DD+WEyCyPnWJmrT0HcxetkkTnMJ9+inst5+vpnTRRfDUU3DEEf70+6BBvg59y5bQrx+MHg3z5vne1PR0uPVW6NjRJ6T//KevwTx8uMaNiogE4Zpr/LrPH3xQppe3alCTB885gI+v60WvLk15fMpijn7wM56b8Ss7snOjHGyl9BX+1H1UK2dG2lM6ATgdOAE/gch3x+4yzjn+Fs3A4qWi9pS+8PlvDH9vAVNu6MVelX1A96ZNvkfzhRd8YjlmDBxzzK7nnfOn5KdN23VZscI/17ixT0w3bYLzzoN77oG99grmfYiIiJedDR06wN57wyeflHtzP65I58GPfmLG4vW0qF+Da4/rxDkHt6ZKaryXay+/BOkpbQK0Ajbj3LIi2hh+lSSKbFPwJREmpQfgJzTNwY8deBM4Hp+YTgHOd451keww0VTUpPTsp2exPSuXD6/tGXQowXEO3nzT/0e9fj0MHQp33AE1SxjY7hwsXeqT0+nTYft2vwj+gQfGJWwREYnA/ffDLbf4s1pdu0Zlk18s2cCDH/3Ed79vpkGtqtStUYWqKSlUTU2hSqpRNTWFqqlGlZQUqlZJoWqKUSXVyHOQm+fIyXPk5uWRk+v+uJ8Tdv+Qdo24/6z9ohJrURIiKY2RiJLSQl9oNABynGNriY0TWEVMSldtzqTHyE8ZekIXrjymY9DhBGPFCrjiCpg4EQ4+2J927949pru88sorAXjqqadiuh8REcF3NrRp44djPfNM1DbrnGPywrVMXrCGrNw8snN9Upmdm0d2niM7J4+cvDyyQ4/l5DpSUowqKUZq+HWqkZqS8sf9qqlG15b1Y/53OZmT0vKsbVMNqFjZXJKY9rPvlD6ha1GVWZNYXp7/5TRsGOTkwCOP+J7SOCzTVLOkHlgREYmeJk38Mnwvv+x7TRs2LLrt2rXw+ee+st6xx8IppxTZ1Mzou28z+u5bpqKQEkPF9pSacRDQHz/b/n/O8akZ/wDux0962gmMdo4h8Qg2FipiT+ktb//IxB9WMffO40NDNiqJOXN8AjprFvTt65PTDh2CjkpERGJl7lw44AB48EE/RCvf77/74VczZvjrn37a9Vzt2vDtt9C585+3V1rOwYcf+gmxdev6ya916+5+qVmz9EsPlkOl7Ck14yj8eNH8Nlea8RB+fdH8BVNrANeZ8YtzRK9vXYo1f2U63VrWrzwJ6W+/+bVCX3/d/+f80ktwwQVx/SUgIiIB2H9/6N0bnnwSGjTwCej06T4pBahf3y8fdfHF0LMntGjhh3T97W++A6NatfLt/5FHdk+GC5Oa6qv91avnO0yef758+6zEijvnORS/MGrBx8AnpOuBJqHbF4CS0njIzs1jYdoWLjpiz6BDib0NG2DECL+0U2qqX7bpxhsDW6Zp0KBBAIwZMyaQ/YuIVErXXgtnnumX9mvWzCefQ4b4Nai7dfvzOqbPP+/b33EHjBxZ9v1+9pmfBHv22X5lli1b/CUjY9ftgpcuXcr3XqMoVKhoKL5Q0XxgsHNuRhFtz8JX2DwQ3+G4ABjhnHs3TuECxSelh+B7RD8G3gFOBU4KPfY35xhnxnnAa/iF9CUOFq/ZSlZOHt1a1Q86lNjJzIQnnoD77vM/5Bdf7BfEb9Uq0LAaN24c6P5FRCql00+HSZOgffviC6DkO+MMn8A++CAcf7wfY1pay5f7Na07d4YXX/Sn6SuQUEn3x/Al3T8PXX9oZvs6534v5CW9gE/x5do3AucDb5tZ76IS2VgockypGTvxSWtD58gwoz6+xKcDajhHthnVgB34Sl8VsiB4RRtT+t/Zy7nxrbkVY33SdesgKwuaN4+sIkduLrz2mj9Vv3y5H6g+cqT/T1hERCRS27b5UqUZGX5camk6FXbu9BUA58+Hb77xa6UmkEjGlJrZV8Bc59ylYY8tBt5yzt0c4X6+BmY4524oV8ClUFwiWRW/ckIGgHOk5/9z4hzZoeus0GMa3Bcn81emU7taKu0bJ/gY5xUrfHWlzZv9zPjWrf3SHm3b+kvB219/7U/N//CDHw/00ku7L4AvIiISqdq1/TyEww+HSy+F8eMjn4cweDB89RW89VbCJaSRMLNqwMHAwwWe+hjoUYpN1cV3RsZNib2bZtwRyWMVVaNGjZg6dWrQYUSsbdY2btgPpk+fFnQoRXOO/W65hQaZmfx25ZVU3bSJGmvWUH3dOmpMnky19etJyf1zmbfMFi347fbbWdu7t//lkWDH5YEHHgDgpptuCjgSERGJROt//IOOTz/NoqFDWf2Xv5TYvvmkSez9zDP83q8fvzZunHB/h0KqmNnssPtjnHPhkx2aAKn4Ykfh1gB9ItmBmV0JtAZeKU+gpVXc6fs8di8lWuQ28D2qEZyfTTwV6fR9bp6j250f0e/QNtx1WnSqW8TEa6/BgAHwr3/5/zgLys2FtDQ/e3L5cn9dvz5ceCFUrx7/eCN0xx3+f7Hhw4cHHImIiEQkLw9OPNGvYfrtt8X3fH73HfToAUccAR9/HJf1r8uipNP3ZtYSWAn0cs5ND3v8DuB851yxs7HM7Gx8MtrPOTcxSmFHpKSkNFJKSuPgl7Vb6PPodB7+6wGcc3DroMMp3Jo1sO++fgbijBmRjSUVERGJldWr/dJSbdrAF18U3vmxcaMfOpaT49fE3mOP+McZoQiS0mrAduA859ybYY8/BXRzzvUq5rXnAC8DFzrn3opi2BEp7t+Au+MWhURk3soMALq1CmZJpIhcdZUfYP7CC0pIRUQkeC1a+GWiTj/dLy34cIGhlrm5vnLUqlV+DdQETkgj4ZzLMrM5QF/gzbCn+gLji3qdmZ0LvARcFERCCsUkpc4pKU0081amU71KCh0Tddb9W2/5y/33V8jB4SUZMGAAAK+++mrAkYiISKmcdhpcfrlfDP+EE/wi9/mGD/dLTj3zjJ8YlRweBV4JzaCfiV+DtCWhNeXN7GUA59yFofv98afshwDTzSy/jnmWc25jvIJOzAETUqgfV6azd4t6VElNCTqUP9uwAa68Eg46yC9qnIS6JNCiyCIiUkoPPwzTpsFFF/mVXpo2hffe80npxRf7tU2ThHNunJk829vRAAAgAElEQVQ1xq872gKYB5zsnFsWatK2wEsuw+eEo0KXfNOA3rGNdpcix5RWFhVlTGlenuOAuz/m9ANbcu8Z+wUdzp9deCH85z8we7avUywiIpJo5s6FQw/1vaWPPurXMt1rLz8RqmbNoKOLSCTrlFZUCdjlJoX5feN2tuzMoVvLBKzk9P778MorcMstSkhFRCRx7b8/PPAATJwI//d/fu7D+PEVJiFNdkpKK4h5q9IBEq+8aHo6/POfvurSrbcGHU1M9e/fn/79+wcdhoiIlMc11/ie0o0b/QL77doFHZGEaExpBTFvZQZVU41OzRJsktPQoX65jbffhmrVgo4mprp37x50CCIiUl4pKTBhAvz6q8pYJxglpRXE/FXpdG5Wl+pVEmiZpSlT4NlnfWJ66KFBRxNzw4YNCzoEERGJhlq1lJAmIJ2+rwCcc8xbmZ5Y40m3bvX1hDt1gru1epiIiIiUj3pKK4BV6TvYtD07sRbNv/VWWLrULzRcSQaIn3322QCMH1/k2sMiIiJSRkpKK4AfV/hJTl0TZZLTzJnwxBO+etNRRwUdTdwcccQRQYcgIiKStLROaQVYp/SRjxcxeuoS5t99AjWqBjymdMcO6N4ddu6EH3+EOgk28UpERCSJJfM6peoprQDmrUynY9M6wSekACNHwqJF8PHHSkhFREQkajTRqQKYtyqDrokwnvTXX31Set55u9cNriROO+00TjvttKDDEBERSUrqKU1wazN2sG7LzsSYeX/99VClCjz0UNCRBOK4444LOgQREZGkpaQ0wSVMJacPP4R33vHl2Vq1CjaWgFx77bVBhyAiIpK0dPo+wc1bmQHAvi0DPH2/cydcey107gyDBwcXh4iIiCQt9ZQmuHkr0+nQpDZ1qgd4qB59FBYvho8+SvpSosU56aSTAPjwww8DjkRERCT5KClNcPNXZXDQng2DC2D5crj3XjjzTDj++ODiSACnnnpq0CGIiIgkLSWlCWzjtixWbs7kwiP2DC6IIUMgL8/3llZyV1xxRdAhiIiIJC2NKU1g81b6SU77BTXJ6dNP4b//hZtvhnbtgolBREREKgUlpQksf+Z91yCWg8rOhquvhg4d4MYb47//BNSnTx/69OkTdBgiIiJJSafvE9j8lRm0aVST+rWqxn/nTz4JCxbAu+9CjRrx338C6tevX9AhiIiIJC1zzgUdQ6Bq167ttm3bFnQYher10Gfs26IeTw84OL47Tkvzyz/17AnvvQdm8d2/iIiIFMrMtjvnagcdRyzo9H2CSs/MZtmG7cEsmn/jjX5t0lGjlJCKiIhIXCgpTVALVvlF87vGe9H8zz+HV17xs+47dYrvvhNc79696d27d9BhiIiIJCWNKU1Q84OY5JSbC1ddBa1bwy23xG+/FcTAgQODDkFERCRpKSlNUD+uTKd5vRo0rVs9fjv997/hhx/8MlC1k3K4SrkoKRUREYkdnb5PUPNWptOtVRxP3a9bB7feCscdB+ecE7/9ViDZ2dlkZ2cHHYaIiEhSUlKagLbtzOHX9dviO8lp6FDYuhUef1yTm4rQt29f+vbtG3QYIiIiSUmn7xPQwtUZOAfd4jWedMIEeOkl31O6777x2WcF9I9//CPoEERERJKWktIElF9eNC49patXw6BBcPDBcOedsd9fBTZgwICgQxAREUlaOn2fgOatyqBJnWo0qxfjSU7OwSWXwPbt8OqrUDWAylEVyPbt29m+fXvQYYiIiCQl9ZQmoHkr0+nasj4W67Gdo0fDpEnw1FOw996x3VcSOPnkkwGYOnVqsIGIiIgkISWlCWZHdi6L127luH32iO2OFi70C+SfdBJcfnls95UkLtfnJCIiEjNKShPMT2lbyM1zsZ3klJUFAwb4tUiff16z7SPUr1+/oEMQERFJWkpKE0xcJjndfTd8+62fdd+iRez2k2TS0/2xqV8/jkt1iYiIVBJKShPM/FXp1K9ZldYNa8ZmB59/DiNH+glOZ54Zm30kqdNPPx3QmFIREZFYCGT2vZldYWa/mdkOM5tjZj2LadvbzFwhl73D2gwsok2N+Lyj6Jm3MoNurerFZpJTRgZccAG0awejRkV/+0nummuu4Zprrgk6DBERkaQU955SM+sHPAZcAXweuv7QzPZ1zv1ezEu7AhvD7q8r8Px2YK/wB5xzO8ofcfxk5eSxKG0LFx/ZLjY7uPZa+P13mDED6taNzT6S2FlnnRV0CCIiIkkriNP31wNjnXPPhu5fbWYnApcDNxfzurXOufXFPO+cc2nRCjIIi9duISs3j66xGE86YQKMHQu33QY9ekR/+5XA+vX+69ekSZOAIxEREUk+cT19b2bVgIOBjws89TFQUqY028xWm9kUMzumkOdrmtkyM1thZu+Z2YHRiDme5q/MAKBby3rR3fCqVXDppXDIIXDHHdHddiVyzjnncM455wQdhoiISFKKd09pEyAVWFPg8TVAnyJesxrfi/oNUA24AJhiZr2cczNCbRYBlwA/AHWBa4GZZnaAc25xwQ2a2SBgEEC1atXK9Yaiad6qdGpXS6Vd49rR22h+1abMTFVtKqcbbrgh6BBERESSVsLPvnfOLcInnfm+MLN2wFBgRqjNF8AX+Q3MbBbwPXA18KeZKc65McAYgNq1a7sYhV5qs5duYr/W9UlJieIkp6eego8+8tWbunSJ3nYroVNPPTXoEERERJJWvGffrwdygWYFHm8GlGY86FdAp6KedM7lArOLa5No1m3ZyYLVGRzVMYrjFVeuhBtv9FWbLrssetutpNLS0khLq9DDlkVERBJWXJNS51wWMAfoW+CpvsCsUmyqO/60fqHMr6e0f3FtEs2sJX4STc9OTaO30eHDISfH95aqalO59e/fn/79+wcdhoiISFIK4vT9o8ArZvY1MBO4DGgJPANgZi8DOOcuDN0fDCwF5uPHlA4AzgDOzt+gmd0JfAksBurhT9nvjx+LWiFM/3k9DWpVjV4lp8WLfQnRyy+H9u2js81KbtiwYUGHICIikrTinpQ658aZWWPgNqAFMA842Tm3LNSkbYGXVAMeAloDmfjk9BTn3AdhbRrgx4g2B9KB74CjnXNfx+yNRJFzjhmL13FkxyakRms86R13QPXqfgkoiYoTTzwx6BBEREQiYmZX4OfftMDnToPDJogXbNsCeAQ4CD/08RXn3MA4hfqHQCY6OedGA6OLeK53gfsPAg+WsL3rgOuiFV+8LV67lbVbdtIzWuNJv/8e3ngDbrkFmhUcvitltXz5cgDatGkTcCQiIiJFK0Ohour4eT8jCa1OFIRAyozK7qb/7ItTHdUpSknprbdCw4YwdGh0ticAXHDBBVxwwQVBhyEiIlKSPwoVOecWOueuZtcSm3/inFvqnLvGOTeW3atnxlXCLwlVGcxYvJ4OTWvTumGt8m/s88/hgw9g5Eho0KD825M/3KahECIikuDCChU9XOCpSAoVBarSJ6WNGjVi6tSpge3fOTisRgYndqxW/jic48BrrqFG48Z8dcAB5AX4vpJRlSr+xyXI74uIiFR6Vcxsdtj9MaH11/OVpVBRQqj0SenGjRvp3bt3YPuf9ct6HvroK56/6EB671PO8Z/vvw/z5sHTT3O0JuVE3a+//gpAhw4dAo5EREQqsRzn3CFBBxELlT4pDdr0xeupkmIc3qFx+TaUl+fHku61F/z979EJTnZzySWXAOopFRGRhBatQkVxp6Q0YDMWr+OgPRtSp3o5D8W4cfDDD/Daa6pvHyN333130CGIiIgUyzmXZWb5hYreDHuqLzA+mKgio6Q0QOu37mT+qgyGHN+5fBvKzobbb4f99wdVHIqZXr16BR2CiIhIJEpVqCj0WPfQzXpAXuh+lnNuQbyCVlIaoJm/RKm06AsvwJIlMHEipGiVr1hZtGgRAF26dAk4EhERkaKVoVAR+MJD4U4FlgHtYhVnQeaci9e+ElLt2rXdtm3bAtn3kDd/4JMFa/j29r5lr+SUmQkdO/pSojNmqMZ9DOVPiNOYUhERCYqZbXfO1Q46jlhQT2lA8kuLHlXe0qJPPgmrVvkKTkpIY+q+++4LOgQREZGkpaQ0IIvXbmVNxk56lqeK0+bNcP/9cNJJ0LNn9IKTQvXokdBrDouIiFRoGoAYkBmL/XjScpUWffhh2LQJRoyIUlRSnHnz5jFv3rygwxAREUlK6ikNyIzF68pXWnTNGhg1Cvr1gwMPjG5wUqirrroK0JhSERGRWFBSGoCdObl8+esG+h3SpuwbGTECduyAe+6JXmBSrIceeijoEERERJKWktIAzFm6iR3ZeWVfCmrpUnjmGV+5qVOnqMYmRTv00EODDkFERCRpaUxpAGb84kuL/t9eZSwtOnQoVKkCd9wR3cCkWN9//z3ff/990GGIiIgkJfWUBqBcpUWnTIG33oJ774VWraIfnBRp8ODBgMaUioiIxIKS0jjbsHUn81ZmcEPfMpQWzc6Gq6+GDh3ghhuiH5wUa9SoUUGHICIikrSUlMbZzCUbAOjZuQzjSZ94AhYuhHffhRo1ohyZlKR79+4lNxIREZEy0ZjSOJvx8zrq16zKfq3ql+6FaWlw111w8snwl7/EJDYp3jfffMM333wTdBgiIiJJST2lceRLi64vW2nRm26CnTv92qQqJxqIoUOHAhpTKiIiEgtKSuPol7VbScvYUfrSorNmwcsvw803awmoAD355JNBhyAiIpK0lJTGUZlKi+bmwlVX+Zn2t9wSo8gkEt26dQs6BBERkaSlpDSOZixeR4cmpSwt+txz8N138MYbUKdO7IKTEs2aNQuAHj16BByJiIhI8lFSGie+tOhGzj2kdeQv2rDB94727g3nnhuz2CQyt4R6qjWmVEREJPqUlMbJnGWbyMzOLV1p0dtvh/R0ePxxTW5KAP/+97+DDkFERCRpKSmNk88Xl7K06Hff+fr2V18N++0X2+AkIl26dAk6BBERkaSldUrjZMbi9RzUNsLSos75yU1NmsDdd8c+OInItGnTmDZtWtBhiIiIJCX1lMbBhq07mbcqnev7RFha9NVX/TJQzz8PDRrENjiJ2J133gloTKmIiEgsKCmNg5lLNuBchKVFMzLgxhvhsMNg4MCYxyaRe+GFF4IOQUREJGkpKY2DzxeXorTo8OGwZo2vb5+i0RWJpEOHDkGHICIikrSU9cRYfmnRIzs2Lrm06MKF8NhjcMklcOih8QlQIjZ58mQmT54cdBgiIiJJST2lMbYmYycZmdmRLQV1001Quzbcd1/sA5NSu/feewHo06dPwJGIiIgkH3POBR1DoGrXru22bdsW031k5eSR5xw1qqYW3WjBAujaFe66C0ITaiSxLF++HIA2bdoEHImIiFRWZrbdOVc76DhiQT2lcVCtSgSjJB59FGrWhCuvjH1AUiZKRkVERGJHY0oTQVoavPIKXHyxX5tUEtKkSZOYNGlS0GGIiIgkJfWUJoInnoDsbLjuuqAjkWKMHDkSgBNPPDHgSERERJKPxpTGYUxpsbZuhbZt4ZhjYPz44OKQEqWlpQHQvHnzgCMREZHKSmNKJXZefBE2bYIhQ4KOREqgZFRERCR21FMaZE9pTg507gwtWsDMmcHEIBGbOHEiAKeeemrAkYiISGWlnlKJjbffht9+g0ceCToSicAjoeOkpFRERCT61FMaVE+pc3D44f7U/U8/QWoxa5hKQli/fj0ATbRCgoiIBEQ9pRJ9n38O33wDTz+thLSCUDIqIiISO+opDaqn9PTTYdYsWLYMatWK//6l1CZMmADAWWedFXAkIiJSWamnVKLrp5/g3Xd9OVElpBXG448/DigpFRERiQX1lAbRUzpokK/gtGwZ7LFHfPctZZaeng5A/fr1A45EREQqK/WUSvSsWQMvvwwDByohrWCUjIqIiMROStABVDpPPQVZWSopWgGNGzeOcePGBR2GiIhIUtLp+3ievt++Hdq0gZ494X//i88+JWp69+4NwNSpUwONQ0REKi+dvpfoGDsWNm5USdEK6oMPPgg6BBERkaSlntJ49ZTm5kKXLtC0qV8Kyiz2+xQREZGkksw9pRpTGi/vvANLlvheUiWkFdKrr77Kq6++GnQYIiIiSUk9pfHoKXUOevSAtWvh559VwamC0phSEREJmnpKo8zMrjCz38xsh5nNMbOexbTtbWaukMveBdqdbWYLzGxn6PrM2L+TCM2aBV9+Cddfr4S0Avvkk0/45JNPgg5DRESkRKXJtULte4Xa7TCzX83ssnjFmi/uSamZ9QMeA+4DDgRmAR+aWdsSXtoVaBF2WRy2zSOAccBrQPfQ9ZtmdnjU30BZPPwwNGrk1yaVCqtq1apUrVo16DBERESKVdpcy8zaAx+E2h0I3A88YWZnxyfiUBzxPn1vZl8Bc51zl4Y9thh4yzl3cyHtewOfAU2dc+uL2OY4oJFzrm/YY5OBdc6584qLJ+an75cuhQ4d4NZb4Z57YrcfibmxY8cCMFD/XIiISEAiOX1fhlzrAeAs51ynsMeeA7o6546IXvTFi2tPqZlVAw4GPi7w1MdAjxJePtvMVpvZFDM7psBzRxSyzY8i2Gbs7bknTJsGV18ddCRSTmPHjv0jMRUREUlEZcy1isqjDjGzuJ0ijPc6pU2AVGBNgcfXAH2KeM1q4HLgG6AacAEwxcx6OedmhNo0L2KbzQvboJkNAgaF7jozyyzNmwhTBcgp42sl9mJyfEyrJ0SDfnYSm45P4tKxSWzxOD41zWx22P0xzrkxYffLkms1ByYX0r5KaHuryx5u5BJ+8Xzn3CJgUdhDX5hZO2AoMKOw10SwzTHAmBIblsDMZjvnDinvdiQ2dHwSl45NYtPxSVw6NolNx6d84j3RaT2QCzQr8HgzIK0U2/kK6BR2Py0K2xQRERGp6MqSaxWVR+WEthcXcU1KnXNZwBygb4Gn+uJnfEWqO7t3JX8RhW2KiIiIVGhlzLWKyqNmO+eyoxth0YI4ff8o8IqZfQ3MBC4DWgLPAJjZywDOuQtD9wcDS4H5+DGlA4AzgPBlCh4DppvZMOB/wJnAMcBRMX4v5R4CIDGl45O4dGwSm45P4tKxSWyJcnxKlWuFHr/KzEYB/waOBAYCxa5gFG2BVHQysyuAG/Hrjc4DrnPOTQ89NxXAOdc7dP9G4FKgNZCJT07vd859UGCb5wD3Ah2AJcCtzrkJcXg7IiIiIgmlNLlW6LFewL/w68KvAh5wzj0T15gre5lREREREQleIGVGRURERETCKSktRkWsG1uZlOb4mFkLM3vdzH4ys1wzGxvHUCudUh6bs8zsYzNbZ2ZbzOwrMzstnvFWNqU8Pr3MbJaZbTCzzNDP0JB4xluZlPbvTtjrjjKzHDObF+sYK7NS/uz0NjNXyGXveMZckSgpLUJFrRtbWZT2+ADV8ctajMQvKSYxUoZj0wv4FDgl1P4D4O1I/xhL6ZTh+GwFHgeOBvbFj92/OzReTaKoDMcm/3UNgZeBKTEPshIr6/HBj9FsEXZZHMs4KzKNKS1CRa0bW1mU9vgUeO17wHrn3MDYRlk5lefYhLX/GpjhnLshRmFWWlE6PhOAnc65uM7MTXZlPTah4/EDYMA5zrluMQ+2EipDXtAb+Axo6pyL21qfFZl6SgtRkevGVgZlPD4SB1E8NnWBTdGKS7xoHB8zOzDUdlp0o6vcynpsQj3WzfA92BIj5fzZmW1mq81sipkdE5MAk4SS0sIVVze2eRGvaV5E+/y6sRI9ZTk+Eh/lPjZmdiV+CbhXohuaUI7jY2YrzGwnMBsYHe+lYiqBUh8bM9sPuBMY4JzLjW14lV5ZfnZWA5fj11U/C18yfYqGJhUtiMXzRUQKFRqD/RDQzzm3LOh4ZDc9gTrA/wEPmNlvzjn94xAQM6sOjAOGOOd+Czoe+TPn3CJ8IprvCzNrBwwFZgQRU6JTUlq4Cls3tpIoy/GR+CjzsQkVwHgZuNA5NzE24VV6ZT4+YYnPj2bWDLgL9WZHU2mPTQtgH+BFM3sx9FgKYGaWA5zsnCt4qlnKLlp/d74C+kcrqGSj0/eFqMh1YyuDMh4fiYOyHhszOxef4Ax0zr0Vuwgrtyj+7KTgV7SQKCnDsVkJ7Ad0D7s8A/wSuq3fhVEUxZ+d7vjT+lII9ZQWrULWja1ESnt8MLPuoZv1gLzQ/Szn3IJ4Bl4JlOrYmFl/fEI6BJhuZvnjs7KccxvjHHtlUNrjczXwG7tOQx6NP1aj4xt2pRDxsQl1duy2JqmZrcWviqC1SmOjtD87g4Gl+PLo1YABwBn4MaZSCCWlRXDOjTOzxsBt7Kobe3LYOLe2Bdr/ZmYn4+vGXo6vG3uNc258HMOuNEp7fEK+K3D/VGAZ0C5WcVZGZTg2l+F/F40KXfJNA3rHNtrKpwzHJxV4AP9zkgMsAYYR+kMs0VPG32sSJ2U4PtXwY+RbA5n45PQU59wHcQq5wtE6pSIiIiISOI0pFREREZHAKSkVERERkcApKRURERGRwCkpFREREZHAKSkVERERkcApKRURERGRwCkpFUlwZtbJzJ40s4VmttXMtpjZT2b2rJn9X1i7pWbmzGxpgOHmxzI2FIsL1XrOf7yZmb1mZqvNLDf0/CgzaxfWfmwM42pgZneFLmdEGne8mFnvsP2XdLkr9Jr8+1PjHW9JYnlcS3OsCnyuUY1DRKJHi+eLJDAzuxh4mj+XdOwSujTFVwipKB4D+gW4/wbAnaHbLwH/CzAWEREJo6RUJEGZ2bHAc/gzGg4YgS9huxbYEzgH6BxYgMVwzg3El9kt6ODQ9WagvXNuc9hzFuOwSlRM3PHa/1TCPgczGwi8GLr7Uii+qDOzGs65HbHYtohIpHT6XiRx3c+un9HHnXO3O+dWOOeynHOLnXP3A5cWtwEz625mE8zsFzPLMLNsM0sLPXZIgbbtzexlM/vdzHaY2WYzmxc6TbpHWLtLzWy2mW00s51mttLMPjGzi8La7HZqNf/0KdAx1KQBsCn0/MDiTvOa2UFm9p/QfrLMbL2ZfWZmh4Wer2NmL5nZj2a2IfQeN5vZdDPrF7adu/A13PNdVHCfxQw7qG1md5vZfDPLNLPtZvadmV1vZlXC2u32PszswtBnmGl++MVFxJCZHWtmX4b2t8TMbjSz8CT3rrD4zjSz581sPb4EYn6bfczslbDPe62ZvWVm+xfYV0TflwKvOdfM5hb3eZhZTzN718zWhX1f3yi4/2I+g5aheLeGvg9PA3WLaFvq9yAiMeSc00UXXRLsAuyB7x3Nv7SK4DVLQ22Xhj3Wv8B2wi/bgH3C2s4vpm23UJu/FtPmrbBtjQ17vB2+hn1RrxsYapN/f2zYds4Esot6XahN82K27YALQ+3uKqbN2MLiDj1WG5hTzGs/AFJCbcPfx6Yi2h9Viu/BwMI+lwJt8p9fX8RnNSCs7V0F2v/RLvT8UcD2IuLOBHqW8vsS/nmklfR5AAOA3CLa7QB6F/UdCz1WE1hYyGtXFfY5RvIedNFFl/hd1FMqkpjahd3OcM6tLON2vgVOAFrgx6XWAy4PPVcL+CeAmTUG9g09/jg+EWsEHArcDqSHnjs6dL0VP6a1On4owbnApKKCcM5Ndc4ZsCz00DLnnIUuYwt7jZnVBJ5l1zCjO4BmQBN8cvxr6PEt+HGq7ULvqQbQA59cAVwfiuEuoH3YLl4Ki2FgUbEDg4GDQrc/wn+WHfCfLcBJ+OS/oAbAFUB94IGwxy8oZl/l0Rh4EGgIXBXB/gw4Ef+Z7Rd67Fl8YrcMP9SiOnAgsA7/uT4Fpfq+hGtGMZ+HmdUGnsCfHcjB/0NSD7gs1K46fvhKcS4E9g7d/hJoje+d31ywYRnfg4jEkMaUiiS3NODvwCh80lazwPNdQteb8H+4G+CTrC34HqcfnHP3hrX/LXRdG7gN34O4EPjYORftP+JH4hMtgKnOuXvCnnsr7PZ2fKI6DtgHf6o2fHxqF8rnlLDbNzvn0gDMbDi7JkqdDLxe4HVznHNPh9q+CtwUenzPcsZTlDXAHc65XDN7CXiyhP094pz7KHR7npl1YldCtyf+2Ba0n5k1x49rjuT7Eq6kz+PI0PYAPnDO5X+2/zazy4DuQGcz6+ic+6WIfRwbdvv+/H/mzOwR/PjscJF+50UkTtRTKpKYlobdrmdmLcu4nf8CN+KTtYIJKfmPOefy8D1WK4BOwK3Aq8CPobGabULtRwNvAvntR+F7D9eY2bAyxliUZmG3FxTT7iZ8D97h+J61ghOmapQzjqZht38Pu70s7HZh4w8Xhd3eFsV4irLEOZdbiv19V+B+pGMoG5fi+xKupM+jqM8ZSv6s/4gt7PaKIm4DpfrOi0icKCkVSUDOubXA12EPDS2sXfgkm0Kea4g/dQ++F60rkAoUOmHEOfce0Bbfs3gaMBw/vq8bvlcU59wO59y5+NOcRwGXAF/hT63eZ2atInuHEVkTdnufYtqFnzo/A6geGiqwoZC2rgxxrA273baI2+Ft8mWXc7+l9cf+nHOR7C+zwP3w9zA5bGjDHxf82Nn5oX2U+H0pKj4K/zyK+pwL3i/ss863Pux26yJu7wqi9O9BRGJISalI4roV3yMJcE1o5nRLM6tqfkH9W/BjAIuSw64//jlABv409z2FNTazJ4Dj8ONFJwHjgZ2hp9uG2pxtZlcBrYAf8L2mP+RvgiL++JfRTHYllseY2S1m1tTMGprZGWaWP741J+w1m4GqZnY7u/ea5QtPVDuFxjGW5P2w2yPMFwBohx/jWlibCsk5txj4OXT3ODMbbL7YQA0zO8DM7gDeyG8fyfellGbhT6kDnGRmp5lfWeFS/LhWgEXFnLoH+Czs9jAza2VmewE3FNY4Bu9BRMpBSalIgnLOTcZPRMrG/6zeCawEsvDJwwj8pJaiXr8FmBK62wpYju993LeIl1wOfBK2jx/wk2DAn6IH32P5BP50+pbQZVDoudXA3FK8xTAYB0cAAAGvSURBVGI55zLxS17lJ50j8L1kG4G38ZONCN3ONxWfYFxDIZNbnHNb8TOuwU+G2hpaHmlgMaE8xu6TmtLwY2vz11z9ED+eNRkMws9yN+Bf+CQxE/geuJvdh1RE8n2JmHNuG3A1/h+xqsA7+O/XmFCTneya9FSUl4GfQrePwJ+a/4XdhwaEi+p7EJHyUVIqksCcc8/hT7ePxieimfjxeIuA54GRJWxiAD5h2oSfTfwqRVdUGgl8jk/8cvATiL7FJ3iPhdpMwU/o+QWf/OXik9E3gF6hRDJqnHNv48eKvoFf1icHn5ROY9c40weA+/CJRWbouWMpevb0BcB0fM9xJDFsw686MBw/EWYnPnH7Ht8Dd1pofGKF55ybhk+2X8YndNn4z3su/p+RW8KaR/J9Ke3+X8MvH/Yevlc7B/+P1H+Bw5wvLlDc6zOBPsAE/M/JZnzxgaLW8436exCRsrPIhh6JiIiIiMSOekpFREREJHBKSkVEREQkcEpKRURERCRwSkpFREREJHBKSkVEREQkcEpKRURERCRwSkpFREREJHBKSkVEREQkcEpKRURERCRw/w9ybBUAXdtKXAAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "disp_imp = np.array(val_metrics['disp_imp'])\n", + "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " disp_imp_err, '1 - min(DI, 1/DI)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YNoY9lsYHHWJ", + "outputId": "9264c33b-4c98-468b-f685-9164dc51ef8c" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAroAAAG4CAYAAACq3USPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdeXxU1fnH8c8hC4Qt7DuooIKAijvgAii4VdwrqIiodddqrbbuFrXYurXWrbiiopYfCCpVULGCCoiIgIKKCggIBAh7CGR9fn+ciQwxCXdCkjuTfN+v17xm5s69d56JCN+cee45zswQEREREaluaoVdgIiIiIhIZVDQFREREZFqSUFXRERERKolBV0RERERqZYUdEVERESkWlLQFREREZFqSUFXRERERKqlUIKuc+4a59xS59wO59wc59yxu9n/AufcPOdctnMuwzk32jnXqtg+5zjnvnHO5UTuz6rcTyEiIiISn2LJWs651s6515xz3znnCpxzo0rZL+GyVpUHXefcIOAxYARwCDADmOSc61DK/kcDrwAvAd2AM4GuwKtR+/QCxkS29Yjcj3XOHVV5n0REREQk/sSatYDaQCbwN2BWKedMyKzlqnplNOfcLOArM7s8atsPwDgzu62E/W8GrjezvaK2XQI8bmb1I8/HAE3MbEDUPlOAdWZ2fuV9GhEREZH4EmvWKnbsf4FMMxtWbHtCZq0qHdF1zqUChwHvF3vpfaB3KYdNB1o75wY6rxkwGHg3ap9eJZzzvTLOKSIiIlLtlDNrBZGQWSu5it+vGZAErCm2fQ3Qv6QDzGymc24wfog8DV/zB8DFUbu1KuWcrSiBc+4K4AqAunXrHtasWbPYPoWIiIhICJYvX27Al1GbnjGzZ6Kex5y1Aoopa8WLqg66MXPOdQUeB+7D/+bQGngIGAkMLc85I38gngGoV6+eLVu2rGKKlWotIyMDgFat4vr/aRERqcacc9vN7PCw60gUVR10M4ECoGWx7S2BjFKOuQ343Mweijz/yjm3DfjEOXe7mf0cOTaWc4rEbPDgwQBMnTo13EJERERKV56sFURCZq0q7dE1s1xgDjCg2EsD8FcElqQu/j9YtKLnRfXPjPGcIjG79dZbufXWW8MuQ0REpFTlzFpBJGTWCqN14VHgFefc5/gLza4C2gD/BnDOvQxgZkVtCROBZ51zV7OzdeGfwJdmtjyyz2PAx865W4E3gbOAfsAxVfKJpEY4+eSTwy5BREQkiFizFs65HpGHDYHCyPNcM/smsj0hs1aVB10zG+OcawrciQ+tC4BTzayoUbZDsf1HOecaANcBjwCbgf8Bf47aZ0bkgrX7gXuBxcAgMytxLjiR8lixYgUA7du3D7kSERGR0sWatSLmFns+EFgG7B05Z0JmrSqfRzfe1KtXz7Zt2xZ2GZIA+vbtC6hHV0REwuOcyzazemHXkSjiftYFkXhx5513hl2CiIiIxEAjuhrRFRERkQShEd3YVOmsCyKJbMmSJSxZsiTsMkRERCQgjehqRFcCUo+uiIiETSO6sVGPrkhAw4cPD7sEERERiYFGdDWiKyIiIglCI7qxUY+uSECLFi1i0aJFYZchIiIiAWlEVyO6EpB6dEVEJGwa0Y2NenRFAhoxYkTYJYiIiEgMNKKrEV0RERFJEBrRjY16dEUCWrBgAQsWLAi7DBEREQlII7oa0ZWA1KMrIiJh04hubNSjKxLQQw89FHYJIiIiEgON6GpEV0RERBKERnRjox5dkYDmzZvHvHnzwi5DREREAtKIrkZ0JSD16IqISNg0ohsb9eiKBPTPf/4z7BJEREQkBhrR1YiuiIiIJAiN6MZGPboiAc2ePZvZs2eHXYaIiIgEpBFdjehKQOrRFRGRsGlENzbq0RUJ6Iknngi7BBEREYmBRnQ1oisiIiIJQiO6sVGPrkhAM2bMYMaMGWGXISIiIgFpRFcjuhKQenRFRCRsGtGNjXp0RQIaOXJk2CWIiIhIDDSiqxFdERERSRAa0Y2NenRFApo2bRrTpk0LuwwREREJSCO6GtGVgNSjKyIiYdOIbmzUoysS0AsvvBB2CSIiIhIDjehqRFdEREQShEZ0Y6MeXZGApkyZwpQpU8IuQ0RERALSiK5GdCUg9eiKiEjYNKIbGwVdBV0JaMWKFQC0b98+5EpERKSmUtCNjS5GEwlIAVdERCSxqEdXJKDJkyczefLksMsQERGRgNS6oNYFCUg9uiIiEja1LsRGQVdBVwLKyMgAoFWrViFXIiIiNZWCbmzUoysSkAKuiIhIYlGPrkhAEydOZOLEiWGXISIiIgGpdUGtCxKQenRFRCRsal2IjYKugq4ElJmZCUCzZs1CrkRERGoqBd3YqEdXJCAFXBERkcSiHl2RgMaPH8/48ePDLkNEREQCUuuCWhckIPXoiohI2NS6EBuN6IoE9NZbb/HWW2+FXYaIiMhuOeeucc4tdc7tcM7Ncc4du5v9+0T22+GcW+Kcu6rY639xzlmxW0blfoo9p6ArElB6ejrp6elhlyEiIlIm59wg4DFgBHAIMAOY5JzrUMr++wDvRvY7BHgAeNw5d06xXRcBraNuB1bKB6hAal1Q64IENGbMGAAGDRoUciUiIlJTBWldcM7NAr4ys8ujtv0AjDOz20rY/+/A2Wa2X9S254BuZtYr8vwvwLlm1r1iPknV0IiuSEBPP/00Tz/9dNhliIiIlMo5lwocBrxf7KX3gd6lHNarhP3fAw53zqVEbevonFsVaYn4j3OuY4UUXYlq/PRiTZo00cVFEsitt94K6GI0EREJVbJz7ouo58+Y2TNRz5sBScCaYsetAfqXcs5WwJQS9k+OnG81MAsYBnwHtADuBGY457qZ2fpyfI4qUeOD7oYNG365ml5EREQkzuWb2eFV/aZmNin6uXPuM2AJcDHwaFXXE5RaF0QCGj16NKNHjw67DBERkbJkAgVAy2LbWwKlzZKQUcr++ZHz/YqZZQELgf1Kej1eKOiKBPTcc8/x3HPPhV2GiIhIqcwsF5gDDCj20gD8rAolmVnK/l+YWV5JBzjn6gBd8G0NcUuzLmjWBQkoL8//v56SkrKbPUVERCpHwFkXBgGvANcA04GrgMvwsygsc869DGBmQyP77wMsAJ4FRgJHA08B55vZG5F9HgYmAsvxPbp3AccBB5rZsor+nBUllBHdWCYxds6NKmGCYnPObYvap28p+3Spmk8kNUFKSopCroiIxD0zGwPciL9gbB5wDHBqVCDtELkV7b8UOBUfXOcBdwC/Lwq5Ee2A1/Fz6Y4HcoCe8RxyIYQR3chvGaPxv2V8Grm/BOhqZstL2D8dSCu2eTrwsZldEtmnL/AR0A3YELXfOjMrKKsejehKUKNGjQJg2LBhodYhIiJ7KCsLVqyALl3AubCriYmWAI5NGCO6NwGjzOxZM/vWzK7H93dcXdLOZrbZzDKKbkAnoCN+eL24tdH77i7kisRi1KhRv4RdERFJQAUF8NxzsO++0LUrHHwwPP00bN0admVSSao06JZzEuPiLgcWmllJDdVfOOdWO+c+dM7124NSRX5l6tSpmkNXRCRR/e9/cNhhcPnl0KkTPPooJCfDNddAmzZw1VUwf37YVUoFq+oR3bImMW61u4MjbQzn8evR3KIR4XOAs/H9Ix+W1vvrnLvCOfeFc+6L/Pz82D6BiIiIJI7vv4czzoATToBNm2DMGPj0U/jDH2DOHPjsMzj3XHjpJejRA3r3hldegR07wq5cKkCV9ug659oAK4E+ZvZx1Pa7gQvNrPNujr8WeARoY2YbdrPvu/hJlU8vaz/16EpQzz7rf7+6/PLLd7OnVAs//QQtWkDdumFXItEyMmDmTFizBgYOhLZtw65I4tWGDXDfffDEE5CWBrffDjfeCHXqlL7/Sy/Bv//tw3GTJnDJJX4EuFEj2LjRB+VNm3Y+Lr7twAPh7rsr9WOpRzc2VR10U4Fs/HQVY6O2Pwl0N7M+uzl+Hr5t4cIA73UPMNjMDihrPwVdCap/f79y4pQpxVdJlGrn5Zfhssuge3d4/31o3jzsimqmvDz46isfbItuS5fufN05P0o3dCicdRbUrx9erVK5fvwRzKBZM0hPh1plfCGdl+f7bocP9wH0sst84G1ZfD2EUpjBRx/5c7z5Juzum986dXwQbtzY/3l8/PHgn6scFHRjE8asC7OA+WZ2RdS274E3zOy2Mo47Er/Ocj8zmxrgfSYA6WZ2fFn7KeiKyC/M4K9/hbvugqOO8v16HTvClCnQunXY1SWm7dvh7bchOxtSU/0tJWXX+6LHKSl+JL0o1M6e7Y8H30PZq9fOW+PG8J//+K+Yly6FevXg7LN96O3XD5KSQv3YUkHM4J57fFAtkpQETZv60Ft0X3Rr2BBGjYJFi6B/f3jkETjooPK//+rVMH68/6WqcWMfaItCbdHj0kaIK4mCbmzCml4s8CTGUcc9BxxnZvuXcM4bgZ/wS9GlAkOAW4FzzGx8WfUo6IoI4Edtrr7aX5F90UX+fsYMOO00aNUKPvwQ9tor7CoTx44d/mc4YoQPC7FIToZDDvGBtndvf9++fcnTQJnB9Ol+FP7//g82b/aheMgQ/9+xe/eK+TxS9QoLfR/tv/7l/1sOGACZmbB+vb+PvhVty8+Hzp3h4YfhN79JuKnDglDQjU0oK6M5564B/gS0xq/E8Yeinl3n3FQAM+sbtX8D/AVn95rZgyWc70/42RjaAdvxgfcBM3t3d7Uo6EpQTz31FADXXHNNyJVIhcvKgvPOg0mT4I47/OhR0T+Qn30GJ5/sR4o+/BD2i+tl3cOXkwPPP+8D7sqVcOyxvmdx3339V8q5uf5W9Dh6W26u/3r5sMN8T2WsduyAiRN96J00yU8l1aOHDz716pV8q19/5+NOnWCffSr+ZyKxKyjwvbEvvuj7ah95pOx2BfC/9GzZAg0a7H7fBKagGxstAaygKwGdcsopAEyaNCnkSqRCZWT4kZ/58+Gpp+CKK369z9y5cOKJfqRxyhTo1q3q64x3ubnwwgs+4K5YAUcf7Xskjz8+nFG1tWt9a8O4cf5xVhZs2+ZvkeW8S9Stm7/IbeBA376iFoiql5vrR+THjvVtC/fcUy1HZstLQTc2CroKuiI113ffwSmn+CA0diycemrp+37zje/5y831F6gdemjV1RnP8vJ8T+T998Py5b7NYPhw/7OK13CSl7cz9BbdsrLgyy99P/HHH/uvwJs3978EDRzof9HRxW6VLzvbT/U1aZJvP/jjH8OuKO4o6MZGQVdBV6Rm+uQTP7dmSgq88w4cfvjuj/nxR39V9ebN/h/iXr0qv87SbN8OP//sL4Rp377izpuX56dJKiz0Xx9H3xd//NlnPuD+9JMf/Rw+3AfCeA24QW3aBJMn+9A7aZJ/nprqR6cHDoTTT4d27cKusvrZssX3xH/6KYwc6VsX5FcUdGOjoKugKwE99thjANxwww0hVyJ7bOxYf3HL3nv7IBNLX+by5T7srl7t+0H7VcIijIWFfp7Y5ct33las2PX5unV+36QkuOUW3wdbnr7WImb+6vLrr4/t4rHDD/cB95RTEj/gliQvzweviRN98F282LewPPQQ3HBD9fzMYcjM9H+G5s3zM2kMHhx2RXFLQTc2CroKuhLQ6af7tUfefvvtkCuRXRQWwp//DBMm+Cl/mjb1E70X3Uc/btoUpk3z+x99NLz1lt8Wq9Wr/RXgixfDG2+U3fIQi4ICP2H93Xf7C7mi1a/vZ31o3x46dNh5++gjf8HO/vv7WQ6OLXFByLL9/DNce60Pcj16wKWX+pHuWrX8LSmp5MetWvn3qylhz8y3u9x6q/9ZnX8+PPusv5BNym/Vqp3/P40b50d1pVQKurFR0FXQFUlcZn4E8skn/T+USUl+mqENG/xt48aSjzv3XD9qtCfzX2Zmwkknwddf+1kGLrig/BcumfmR5T//GRYs8G0AQ4fuGmzT00sPlFOm+K95f/oJrrkGHnjAzxKxOwUFflL822/3PanDh/vpnJKTy/c5aorCQvjb3+DOO/3Fa+PHazaO8lq61Pdzr13rf3mojG9IqhkF3RiZWY2+1a1b10QkARUWmt14oxmY3Xyzf15cfr5ZZqbZokVmM2eavfOOvxUUVEwNGzea9e7ta+jQwezuu82WLo3tHLNnm/Xr58+x775m48aV/Fl2JyvL/zycM2vf3uzdd8ve/6uvzHr29O87YIDZ4sWxv2dN9957Zk2amKWnm739duzHFxSYTZhgdtRRZgccYDZypNmOHRVfZzzKy/M/vzZtzBo3Nvvss7ArShjANouD/JQot9ALCPumoCtBPfTQQ/bQQw+FXYaY+SD4pz/5v8J+//vyBcOKkpNjNmaM2Ykn+pDpnFn//mavv262fXvpxy1ZYnb++f4zNG9u9sQTZrm5e17PzJlmXbv68w4ZYrZu3a6vb99udscdZsnJZs2amb3ySrg/v0S3dKnZoYf6n/ddd/lfrnYnP9//+eje3R/XqZPZ4Yf7x23bmv3jH/4XlzDl55utWmU2Z47Zf/9r9swzZg89ZPb++2Zbt5bvnHl5Zh98YHbFFf7PPJi1bm02f37F1l7NKejGdlPrgloXJKBzzjkHgDfeeCPkSoS77/aLOlx9tW9biJce0WXL/FRbL77oHzdu7OcDvewyOPhgv8/69X6Z4See8C0Cf/yjv5gsSKtBUDk5fj7bESN8DY8/7hfEmDbNzxP8ww++NeKRR/yyqbJntm/3Pc4vvugXF3n1Vd8TXlxenn9txAj/36BrV982MmiQb3uZMsX/2Zg2zf93+cMf/HnT02OrZ8sW+OIL37qTn+9vBQWlP9661fedr1rl71ev9hdDFhaWfP6kJD+93rHH+tsxx5T+5yg/3/eRjx3r++gzM31P88CB8Nvf+p9X3bqxfb4aTq0LsVHQVdAVSSz33w933eXD4zPPxOcKSIWFfhW155/3/7jn5vrVvo47zi+qsHWrv+DrL3+Btm0rr46vv/Y/p9mzfdCePx86dvRTN/XvX3nvWxOZ+T+P11/vpx4bP95f2Ad+xbZRo3xf77JlfnnjO++EM88s+c/v9Ok+8E6a5H8Buu46vzpY8+a/3rew0F8gN3Omn+5t5kw/53Ms/7Y7By1aQOvW/tamTcmP69f3f5Y++cTfZs3yv1QBHHCA//N97LH+Qs8ffvBLMk+Y4H+5Kwq3553nw+2ezBBSwynoxkZBV0FXgti6Fd57Dw46yF/dLuF48EF/wdZFF/ngEI8ht7gNG/wo3vPP+6B52mk+8FTV6moFBfDYY/49L73Uj4ZrBK3yzJoF55zjw90TT/i/Ox56yI+W9uzpf0kLOhXb3Ll+9PeNN3wwvOIKuPJKf9FhUaidNcvP6wx+9L5nTz+/81FH+XCalOS/OUhOLv1xnTrluwAxJ2fX4Dt9uh9NLlK//q4jtwq3FUJBNzYKugq6Upb16/3Xvv/6F3+LXMF/a5cufqGBM8+EI4+s/LD100/+a7/TTvOjJjXVP//pv8odPBhGj068pVnNfCBp1CjsSqSyrV3r2xGmTvXP+/XzI7j9+pWvzebbb/0vKq++6n9xAf/3TvfuPtQWhdv99gv3l7+CAv8twowZfiT4pJMUbiuBgm5sFHQVdKUkK1fCo4/6r3i3bYMzz2Twpk2wZQv/adzY99Dl5/t5RAcO9KH3+OP3bLqq4hYv9qM5L7/s36tOHT+iee218TeSaea/nq1du3Jqe/JJ//XtOefA66/7OV5F4ll+vp/X+MAD/Vf5FWHpUnj3Xf8L7xFHQIMGFXNeSSgKurFR0FXQlWg//ujD5Esv+dGJ88/3k8MX/5p540b/D85bb/k+uqws34N28sk+9J5ySvkWIgD4/nsfcEeP9oHuiitg2DD/lec77/j5Yl98sXJ7O4tkZvqe0p9/9l/Bbtni74tu0c8LCvx8ryNG+J9bRQXeZ5/1P4PTT/cj26mpFXNeEZEEpKAbGwVdBV0B+Oor/9XgmDE+XF56qb8SPsjSsDk58L//+dD71luQkeG/nuzRw1/wc8IJ/gKN3fVFfvutvwDl9df9yOhVV/kaWrf2r5v50PeHP/jXn37afz1aGdatg4cf9iOp27b5q74bNvQjSNG36G316vlewrlz/bKwDz8MffqUvwYzPyJ25ZX+F4gJE/znFhGpwRR0Y6Ogq6Bbs/34ow+O//2vv3Dimmv881atfrXrfffdB8Bdd91V+vkKC/3FGe+/76+6nzHDTymUmup76E44wYffI47YefHHwoV+JoExY3w/27XX+imnWrYsveaLLvIXo1xwgb/gpXHjPf1JeGvW+ID61FN+yqTBg31vYdeuwY4vLPR9hLff7keBzzgD/v536Nw5eA0rV/oR9Rdf9J+1f3+YOLFi20JERBKUgm5sFHQVdGuu7Gw/5VNGBtx0k+8BLSMwDhkyBIDRo0cHf49t2+DTT33onTIF5s3zI5UNGkDfvj7sTpjgQ/b11/uQXdIUQsXl5/tlXocP9yO+o0b5EF1eGRm+ZePf//Yj1BdcAHfcAV26lO9827f7i8ceeMD/nK+6Cu65p/TPlpvrw+wLL8DkyT4w9+njp8Y67zyN5IqIRCjoxkZBV0G35rr6ah/spkzZs5AYi/Xr/eTpU6b48JuZuXOOzPL09M6e7Ud3Fy3y5xgxIrarnFet8gF35EgfNocM8QG3oqZQW7vWzxX7zDO+teH22+GGG3aOzi5Y4KfdGj3a/yzatvX9yMOGwb77VkwNIiLViIJubBR0FXRrprfe8heN3Xyzn+MyLGZ7vqpXdrafW/aJJ3yLwU037Zx6yy+yuXPy+OjHX33le37z831YvuOOyguX337ra5w40V+wdskl/mK+2bN9T/SZZ/q+6AEDEm/aMBGRKqSgGxsFXQXdmmfVKr/wQ4cOvs814FX8d999NwD33ntvZVZXfu+95wPk6tXB9k9Ohosvhttug06dKre2Ih995H+5+PJL/9/gsst8m4SWoRURCURBNzblWApFJIEVFvpwl50Nr70W01RVK1asqMTCKsBJJ/m5d4uCrnM7byU9b9DAz6ZQlfr186O4q1f7CeX3dDRbRESkDBrR1YhuzfLII35EceRIPzeriIhIAtGIbmwUdBV0a465c/3676ed5ud71WiiiIgkGAXd2CjoKujWDEVTiW3Z4i/CKscMB7fddhsADzzwQEVXJyIiEoiCbmzUoys1w003+Sm4Pvig3Evzrl+/voKLEhERkcqkEV2N6FZ/b74JZ53ll9N98MGwqxERESk3jejGRkFXQbd6K5pKbK+9YObMmGZZEBERiTcKurGpFXYBIpWmsBCGDvXL0cY4lVhJbr75Zm6++eYKKk5EREQqm3p0pfp69FG/zO4zz0Dnznt8uu3bt1dAUSIiIlJV1Lqg1oXq6csvoWdPTSUmIiLViloXYqPWBale1q+HBx6AU06BFi3g2WcVckVERGooBV2pHhYuhCuvhHbt4PbboUcP+O9/yz2VWEluvPFGbrzxxgo7n4iIiFQu9ehK4ioshMmT4Z//9PPj1qnjLz77/e+hW7ewqxMREZGQqUdXPbrh++ILGDsWWrWCtm39qGzbttC6dckzJWRlwcsvw2OPwfffQ5s2cN11cPnl0KxZ1dcvIiJSRdSjGxuN6FaFSZOgXz8/4ii7WrwYBgyATZtKfr1lSx96iwKwc36qsE2b4Igj/ONzz4WUlKqtW0REROKeRnQre0T3xx+hSxfo29ev0FW/fuW9V6LJyoJevfyiDl98AenpsHKlv/38c8mPN2+Gs8+GG2/0sypU4YVm1157LQBPPvlklb2niIhItKAjus65a4BbgNbAQuBGM/ukjP37AI8C3YBVwINm9u89OWc80IhuZdt3X3jhBbjkEjjpJHj3XR/oajozuOwy+OYb32e7zz5+e5MmcOCBpR9XWAi1wrmGMi0tLZT3FRERiYVzbhDwGHAN8GnkfpJzrquZLS9h/32Ad4EXgCHAMcBTzrl1ZvZGec4ZLwKN6DrHUWbMqoJ6qlyV9ei+8Qacf74Pce+9p17SBx+EP//Z399yS9jViIiIJIQgI7rOuVnAV2Z2edS2H4BxZnZbCfv/HTjbzPaL2vYc0M3MepXnnPEi6NDYTOeY7xzXO0fjSq2oujrnHN+68M030KcPrF4ddkXhef99uO02GDQItKSuiIhIhXHOpQKHAe8Xe+l9oHcph/UqYf/3gMOdcynlPGdciKV1oTvwT+DvzjEBeM6MjyqnrKrTpEkTpk6dWjVvVrcujUaM4MDbbyf38MOZ98gj5LRqVTXvHSfqrFrFYVddRc7ee/PlxRdTOG1a2CUF9vDDDwNws8K5iIiEJ9k590XU82fM7Jmo582AJGBNsePWAP1LOWcrYEoJ+ydHzufKcc64EDToPgqcB7QD6gCDgcHOsQR4HhhlRkbllFi5NmzYQN++favuDfv2hV69SDvlFHr96U8wZQrsv3/VvX+Ytm3zF58lJ5PywQcc17Fj2BXF5L333gOo2j8vIiIiu8o3s8PDLiJRBGpdMONmMzoAxwFPA+vw6b4T8FdguXOMdY4elVZpddKzJ3z0EezYAccdB19/HXZFla/o4rOFC+E//4EEC7kADzzwAA888EDYZYiIiJQlEygAWhbb3hJKHZTMKGX//Mj5ynPOuBDT5etmfGrGtcARQPR3zsnA2cAs5zijAuurvnr0gI8/hqQkP8r7xRe7PSQmZvD553DXXTB+vJ+tIEwPPwxjxsCIEXDiieHWIiIiUk2ZWS4wBxhQ7KUBwIxSDptZyv5fmFleOc8ZF2KaR9c5BgBXAafhw23RJKZzgYb4Ed5vzOhewXVWmtBXRluyBPr3h8xMP/XYMcfs2fm+/RZef90vpLB48c7t3brBnXfCb3/rw3VV+uADOPlkf0HemDFVOvdtRbrkkksAePHFF0OuREREaqqAsy4MAl7BTwE2HZ/dLsPPorDMOfcygJkNjey/D7AAeBYYCRwNPAWcX2x6sVLPWdGfs6IEGtF1jluc4wdgMnAmkAIY8CbQx4zDgB7AFqCGNJxWkI4d/chumzZ+pPOZZ2DOnNJXCivJihXw0ENwyCHQtSv89a9+XtoXXvAB+i/UCjAAACAASURBVNVX/Yju+ef7wPvKK5CfX3mfKdrSpTB4sK/rhRcSNuQCtG/fnvbt24ddhoiISJnMbAxwI3AnMA8/L+6pUYG0Q+RWtP9S4FR8i+o84A7g90UhN+A541LQeXQL8cHW4cPsC8C/zPip2H7fAfuZUcVDhuUX+ohukbVr/YIS8+bt3NakCXTq5Bed6NRp11tqqp+b97XXfFAGOPJIuOACOO88aN161/MXFvr9778fvvrKn+O22+Cii/y5KsO2bdC7NyxfDrNn+88hIiIi5RZ0ZTTxYgm6S4DHgefNyCplvzZAihlxne6jxU3QBcjL860HixfvvP34o79ftqzkPtsuXeDCC/2oaZAgWVgIEyfCfff5keMOHeDWW+HSS6F27fLVbQYbN/qlfIuW6l21Cj78EKZN8y0ZJ59cvnOLiIjILxR0YxM06J4BvG1G8IbeBBFXQbcseXk+7BYF302b4De/gYMPLl87gBlMmuQD72ef+daJ886DlJTdH5uf7xe8KAq0K1f6GSSKa9oU7rkHrr8+9vri0JAhQwAYPXp0yJWIiEhNpaAbm6BBNx1IB7LNyIza3gyoC2w2Y3OlVVmJEiboVhYzP/J6//1+loYgatWCVq2gbVsfkNu2/fXj1q2hTp3Krb2K3XfffQDcddddIVciIiI1lYJubIIG3TfwF6H9wYx/RW2/DngMmGDGuZVWZSWq8UFXREREEoaCbmyCzqN7VOT+jWLbx+MvUDsKEREREZE4EjToNo/cF5/zanOx10WqrcGDBzN48OCwyxAREZGAkgPutxVoDJwITIjaXrTEVYmzMIhUJz16aIVrERGRRBK0R/d9oD9+BPcR4FvgAOAm/EVqU8w4KfCbOncNcAvQGlgI3Ghmn5Sy7yjg4hJe2qVHxTnXB3gU6AasAh40s3/vrhb16IqIiEiiUI9ubIIG3bOBcfCr6cVcZNu5ZruM9JZxLjcIGI1fQu7TyP0lQFczW17C/ulAWrHN04GPzeySyD5FS9e9gF+y7pjI/eDoVT1KoqArIiIiiUJBNzaBgi6AczyMH8Et7mEz/hT4DZ2bBXxlZpdHbfsBGGdmtwU4/mh8QD7azGZEtv0dONvM9ova7zn8+su9yjqfgq4Edc455wDwxhtl/u4kIiJSaRR0YxO0RxczbnaOMcDpQEtgDX4RidlBz+GcSwUOAx4u9tL7QO+Ap7kcWFgUciN6Rc4R7T3gYudcipnlBa1RpDS9epX5O5OIiIjEmcBBFyASagMH2xI0A5LwITnaGnwPcJkibQznAcVHflsBU0o4Z3LkPVcXO88VwBUAqampAUuXmu7mm28OuwQRERGJQeCg6xzJwKlAZ37dM4sZ91ZgXaUZgp8S7ZU9OYmZPQM8A751oQLqEhEREZHK4lxB5JFhFji/BtrROVoAU/EhtzRBgm4mUIBvfYjWEsgIcPzlwBtmtqHY9oxSzpkfeU+RPXb66acD8Pbbb4dciYiISI3jynNQ0EQ8HOhSxuuBRkXNLNc5NwcYAIyNemkAv151bRfOuSOBg4EbS3h5JnBWsW0DgC/UnysV5YQTTgi7BBERkZpqOQHzZrSg04stBvYGRuGnAjPgBuD6yOO/mTEq0Bv66cVewU8rNh24CrgMP0PCMufcywBmNrTYcc8Bx5nZ/iWcs2h6sWeBkcDR+OnFztf0YiIiIlJdaNaF2AQd0W0bub8VH3Qx4wnn+Aj4GmgX9A3NbIxzrilwJ37BiAXAqWa2LLJLh+LHOOcaAIMppT3CzJY6504F/gFcjV8w4ve7C7kiIiIiEjLnHsX33v4R5/xAp9nLFXLqgCO624A6QAqwHR+QW0UebwF+Nvt1QE0EGtGVoE455RQAJk2aFHIlIiJSU1XLEV3nCoFCzJJ3eVwBgp5kPX5UNx1/4Vc74FVgR+T1xhVRjEg8GzhwYNgliIiIVEeFgMO5hpHn5brwrCRBR3Q/AI4HjsL35l7Irg3Bn5rRp6KKqkoa0RUREZFEUU1HdFfhZ8vahB88NWBZKXsbZp2CnjroiO6zwI/49oXhwIlA88hr6yh5JgQRERERkd35CDifnR0CDj8JQklimnkh0Ijurw5yNAT64eepnW7GpphPEic0oitB9e/vF++bMqX4InwiIiJVo5qO6LYA/gUcCuyLD7PLS93fbJ+gp97tiK5z1Aa+iTz9jRnfmbEFeCvom4hUB4MGDQq7BBERkerHbC1+dq2iC9NiCrNlCdqjuwloAKSZkVsRbxwvNKIrIiIiiaKajuhGTy92ceRxlU4vNg6/8lhPM2ZXxBvHCwVdERERSRTVNOhW2vRitQLu909gA/C6cwxyjs7O0SH6VhHFiMSzvn370rdv37DLEBERqW4qbXqxoGn5Y3xjcBPgtRJetxjOJZKQhg0bFnYJIiIi1dFa/PRiS3/Z4tySUvaNaXqxoK0LhbvZxcxICvqm8UStCyIiIpIoqmnrwqv46cWCMMwCZ86go7AvBT2hSHWVl5cHQEpKSsiViIiIVCt/AJLw04sVjdaWPr1YDMo1j251ohFdCaqoP3fq1Kmh1iEiIjVXtRzRjeYvRotp1LYs6qsVCeh3v/td2CWIiIhUd/0q8mSBgq5zvLCbXcyMyyqgHpHQmRnrtuawJHMbS9ZtY2lmFkvWbSNjy1707dycVZu206ZRWthlioiIVA/O+dm7zJZTdEFa0baS+P2CnTqGi9FK29Ghi9EkQa3evJ3ZP21kyboslv4SbLeRlZP/yz61k2uxT7N61E0q4MtlG0hKTePEri0Z2mtvenZsgnMVNguKiIhImapl68Kv59EtK5xaLHPsatYFBd0aZ8m6LCYvzOC9hWuYv2ITAM5Bm/Q0OjavR8dm9ejYvD77NKtHx+b1aJOeRq1ajr59+5KTX8jpd4xkzOwVbMrOo3PLBgztvRdnHdKWuqnqBBIRkcpVjYOu78stWgK4dDH17wYNunsV25QMdATuAg4BTjNjWtA3jScKutWfmbFw1RbeW5jB5AUZ/LA2C4CD2qVzUrdW9Nm/Ofu2qE+dlLL/vxkzZgwAgwYNYkdeAW/PX8VLM35i4aotNKiTzHmHt+einnuxd7Pq9fePiIjEj2oadEfhA+wlvzwui9klgU+9J7MuOEd9IBN404zB5T5RiBR0q6eCQuOLnzbw3sI1vLcwg5WbtlPLwZH7NOGkbq04sVsr2lZAn62Z8eXyjbw0Yxnvfr2a/EKjb+fmDD6iPX07t9hteBYREYlFtQy6lWhPg24jYDWQY0ajCquqCinoVh8btuXy8ffrmLpoLR//kMmGbbmkJtXi2P2acVK3VpxwQAua1q9d7vNv3rwZgPT09BJfX7tlB69/voJXZy1j7dYc6qYmcXyXFpx6YGv6dm6u1gYREdljCrqxCdq6UNKsC3WAo4H2wFozWlVwbVVCQTdxFRYaX63czNRFa5m6aB3zf96EGTSpl0qf/ZtzfJcW9OvSgvq1KyZgBp1HN7+gkFlLN/Du16t5b2EGmVm51EmpRb/OLTjlwNYcv4c1mRk78grZsiOPzdvz2LI9jy078tiyPZ8tO/Lo3jadQzs0Lvf5RUQkflXLoOvc7mb3imaYBZ7pa09nXSi63HyUGZcGfdN4oqCbWDZuy+XjH9YxddE6Pv5+Heu35eIcHNyuEX07N6dv5xYc1DadWrUqfiaE8ePHA3D22WcHPqag0Jj9kw+9kxZksG5rDqnJteizf3NOPbAVPTs2JTu3gE3ZeWzensum7Dw2ZuexOTuXTdvz2JSdx6bt/vmWHfm/hNq8gtL/v3UOfnfMPtx8UmdqJ6t1QkSkOqmmQXd3My38sieVdDFaaVfA5QCvAzeasSXom8YTBd3E8b/v1nDVK1+SW1BI47op9NnfB9tj92u2Ry0JVaWw0JizfKMPvV9nkLFlR6n7OgfpaSmkp6XQKC2FhmkpNKqbSsM6yTRMS6FhnRQapiWT/svjFBrWSSYtNYknP/qR0Z8tp0urBvxjUA8OaN2wCj+liIhUpmocdIOqklkXwPflZsRQWFxS0E0MP67dyplPzmCvpnW5/8zuHNSuEUmVMGpblszMTACaNWu2x+cqLDTmrtjEN6u30LBOMo3qptIoLYVGdVNolJZKgzrJezQq/dF3a7ll3Fds2Z7HzSftz++O6Vgpo9wiIlK1qmnQ7RP1rAEwEtgEPAL8DLQD/gg0Ay7H7O3Ap96Ti9GqAwXd+Lc5O48zn5rO1h15vHXdMRUyW0J5BO3RjRfrs3K4bfzXvP/NGnp2bMLDvz2Ydo3rhl2WiIjsgWoZdKM59xRwJbAvZkujtncCfgCex+zywKcLOKJ7MnAkMNeMiVHbTwd6AJ+bMTnom8YTBd34VlBoXDJqNjMXZ/La5T05Yu8modUycaL/oz9w4MDQaoiVmTF2zs8Mf3shtZzj3jO7cWaPtlrNTUQkQdWAoJsJNAbaYpYRtb0NfnR3I2ZNg56uVsD97gbuwffkRssC/oJfOEKkwv198nd8/P067j2je6ghF3zATaSQC+Cc47zD2zP5xuPo3KoBfxgzn+tem8um7NywSxMRkTjgnKvtnHvcOZfpnNvmnHvbOdcuwHHXOOeWOud2OOfmOOeOLfb6VOecFbv9J0BJRRfdvIFzp+FcD5w7Dfi/yPaUmD5fwBHdDUA60MiMrVHb6wNbgE1mhJtCykkjuvFrwtyf+cOY+QzttRf3ntE97HLIyPC/WLZqlZAz6VFQaIz8eDH/+OB7mtRL5W/nHETf/ZtrdFdEJIFU9Iiuc+5p4AzgYmA98CjQCDjMzApKOWYQMBq4Bvg0cn8J0NXMlkf2mQosAW6POnS7mW3eTUHjgLMpfRaG8Zj9Nshng+BBdwc+QbczY3XU9tbASiDXjDpB3zSeKOjGp/krNvHbkTM5tEMjXrnsKFKSgn75UHkSrUe3NAtWbubGMfP4cW0WHZvX4+xD2nLmIW3VvysikgAqMug659KBdcAlZvZqZFt7YBlwipm9V8pxs4CvLKpX1jn3AzDOzG6LPJ8KLDCz62IsqjXwP6BzCa9+B5yA2eoSXiv5dAGD7lKgA/C0GddFbX8cuBb4yYyOQd80nijoxp+1W3Yw8IlPSa5Vi4nXH0OTeqlhlwTA5Mm+Df3kk08OuZI9tyOvgDfnrmT83JV8vnQDAEft04SzD23LKQe2pmGdmL4ZiomZsWVHPmkpSaQmh/8LjIhIIqngoHs88CHQwszWRW1fiA+t95RwTCqQDZxvZmOjtj8JdDezPpHnU4Gir2PXAJOA4Wa2ld1xrg4wFDgeaApkAh8BL2NW+tycJZ0qYNB9FrgMP4y8GFiET9qdIrs8b8YVsbxxvGjfvr298sorYZchEQYsWbeNHXkFdGpenzopCkKVLa+gkI3ZfnGKnPwCajlHgzrJNK6bSv06yVRUY0N2bsEvK7jl5Ptvw2o5R1KtyC3qca2o543qppCsqdFERADo169fLvB11KZnzOyZ8pzLOXcB8DKQYlGB0Dn3P+AHM7uyhGPa4L/N72NmH0dtvxu40Mw6R55fgR8ZXgV0Ax6InPPE8tRaXkHXIf0bMAiohw+3RQHX4S9I+1vFl1Y1NmzY8MtX0hIuM+NP475i7JxtPH3hEZx8YOuwS9rFihUrAGjfvn3IlVQOM2Peik1MmLuS5+avYmN2Dk3rGQMPbsPhezemU/P67NOsHnVSgs3TnZNfwIzF63l/4Ro++GYNmVk5JNdy9OrUnN6dmlFQWMim7TuXLt4cWfVtc9Q2M+jUPJVxV/WmcZyM7IuIhCzfzA4vawfn3P3AHbs5T7+KK+nXioXvr51zS4BZzrlDzezLynzvaIHn0XWOnsDzwAFRm78BfmfGZ5VQW5VQ60L8eHH6UoZP/IbfH78vN51YUmtOuKpLj24QufmFTF20lglzV/Lht2vJLfCL1jgHbRul0al5fTo2r/fL/b7N69O8QW225uTz0Xdref+bNUz9bi3bcguol5pE3y4tOLFrS/p2bkF6WrC2iMJC47Ml6xk2ajbd2jTktd/1JC1VSxqLSM0WpHXBOdcMv7hCWZYDPamk1oUSjqsF5OJHfcfsprYKE/OCEc7RCWgJrDFjcaVUVYUUdOPD9B8zGfrC55zQpQX/HnJYXK7iNWXKFAD69+8fciVVa3tuAUsys1iybhuL1+16vz1v5wW5DWonsyO/gLwCo1n9VAZ0bcmJ3VrRu1NTaieXP6BOXrCaa179kn6dWzDyosNIjoMLE0VEwlJJF6MNM7PXItva4UPw7i5Gm29mV0Rt+x54o+hitBKOORiYR7GWh8qmldEUdEO3bP02Tn9iOi0b1mb8NUdTv3bQjhoJU2GhkbFlR1TwzaJOahIndm1Jj/aNK3SJ5tGfLePONxdw3uHt+Ps5B2lKNBGpsSpperGBwDB2Ti/WmKjpxZxz3wFPmNkTkeeDgFfw04pNB67CX8vVzcyWOb+K2YXAu/gLybril/PdDhxR2rRllSFQonCOV4HBwF/MuC9q+134BSNeM+OiSqlQqjUz45ZxXwHw7NDD4zrkLlmyBICOHRNygpEKV6uWo02jNNo0SuOY/Xb3DdmeGdJzL9ZuzeFfH/5A8wa1ueWkLpX6fiIiNciNQD4wBkjDtzIMLRZGOxPVCmFmY5xzTYE7gdbAAuBUM1sW2SUXOAG4AagPrADewc+6UGUhF2KfXmxfM5ZGbd8bPxnwMjP2qawiK5NGdMM1cf4qrn99LiPOOpALjuoQdjllqkk9uvHIzLh9wgJe/3w5fxnYlWFHJ+RfOSIie6TaLwFcEv81XjOi+oiDCjp8VnT5e0ax7Wsi94m5VJSEantuAQ+8+y3d2jRk0BHxP5PB8OHDwy6hRnPOcd8Z3cjMymH4f7+hWYPanHZQm7DLEhGRiuTcKfgZIT7DbDzOXQQ8BdTFubnAqZitDXq6oFd1FE3O26vY9l7FXhcJ7Olpi1m1eQf3DOxWof2claVPnz706VPixaRSRZKTavH4+Ydw+F6NuWnMfGYszgy7JBERqVjXAH8E6uFcGvAkfnpbBxwC3BvLyYIG3a8jbzDKOYY4x2HOMQR4ET/H/9dlHi1SzM8bsxk5bTEDD27Dkfs0CbucQBYtWsSiRYvCLqPGq5OSxHNDj2DvZnW54uU5LFxV9rLpIiKSUA6K3H8CHInv8f0W+C8+i54Uy8mCBt1Rkfu2wEvA55H79sVeFwlkxLvf4hzcdkriXFR05ZVXcuWVv1okRkKQXjeFly49koZ1khn24mxWbMgOuyQREakYzSP3K/GzNQD8E7g48jimnrVAPbpmPO8cJwPnlPDyODNeiOVNpWabsTiTd7/O4KYB+9OmUVrY5QQ2YsSIsEuQKK3T03j5siM5998zGfrC54y9qhepybXYnO1XV9u83S9r/Mvj7bls2Z5HTn4hB7drRK9OTdmvRX1NVSYiEl9ygdr4WR4OwncOfMfONtncWE4W0zy6znEefq61lvgL0d42Y2zZR8U3zbpQtfILCjnt8U/Jyslnyk19Ai8nK1KaOcs2cuFzn7Ejr7DM/VKTapFeNwUHrN2aA0DTeqn07NiUnp2a0qtjEzo1V/AVkfhW7WddcO4roBvwE370NgU/ytsU+B5YilmnoKeLadJSM/4P+L9d66E+cI4ZL8VyLqmZXv98Od9lbOXpCw9NuJC7YMECALp37x5yJRLtsL0a89rlPfnou7Wkp6XQMC2FRmkppKelkF43hUZpqaSnpVAnpdYvIXbFhmxmLlnPZ4vXM3PJet75ejUAzerXpmfHJvTq1JSeHZvSrF5t8goLyS8w8goKyS808gsKyS3w2/ILC8krMJJrOZrWr03T+qk0qJ2ssCwiUn6vASPgl2lrp2C2EefOiDz/MpaTlWtlNOeoBZwMXIQf4a1jFltojhca0a06m7Jz6fvwVA5o1ZDXLj8q4cKA5tGtnsyM5RuymRkJvTMXr/9lxLc8UpNq0bR+Kk3rp9Ksfm2a1qtNs8jzpvVq07heCo3qptK4biqN66bQsE5KXC55LSLxqQaM6DrgZuBYYClwL2brce53wFHAG5hNDny6GFsXjsCH20HsXCHD4f+tSKzhuQgF3apz91sLGP3ZMt694Vi6tGoYdjkxmz17NgBHHHFEyJVIZTIzlmZu4/OlG9iWW0BqkiM5qRbJtRwpSbVITnIk16pFSmR7Si1HbkEhG7blsj4rl8xtOazPymV9Vg7rt+WSuTWHzG255OaX3FpRy0F6WgqN66bSqG7RfSoN6iRTr3YSdVOTqZuaRL3UZNJSk3bZVjc1meYNapOellLFPyURCUu1D7oVbLejsM6xDzAEv2bxfkWbo3bZDrxZ8aVJdfJdxhZGf7aMC4/aKyFDLijg1hTOOTo2r0/H5vUr7JxmRlZOPuuzctmYncum7Dw2ZueyMTuPTdm5uzxevXkH367ewtacfLJzCygoLHswIiXJ8ZsDWzO0994c0r5Rwn1TIiJSmUod0XWOq/ABN3qRiOJ/gxqQbkZW5ZRX+TSiW/nMjAuencU3q7cw9ea+NK6XGnZJ5TJv3jwAevToEXIlUlOYGbkFhWTnFJCdV0B2Tj7bcgvIzs3/ZduXyzbyxpyf2ZqTz4Ft0xnaay8GHtwm4XrgRSSYajmi61xBDHsbZoHbZcsKuoX4IFsUbnOBKcAbwGJgKgncslBEQbfyTfp6NVe/+iX3ntGNob32DrucclOPrsSrrJx8JsxdycszfuKHtVk0rpvCoCM6MKRnB9o1rht2eSJSgapp0C172pxdGWaBs2eQoAvwAnCLGZsir3XDr4amoCtl2pFXwAmPTKNBnWT+e/0xJCcFXaMk/mhEV+KdmTFzyXpenrGM97/JAKD/AS25uPfe9O7UVG0NItVANQ26P7Ezc4KfSqw+kAesjzxPAbKBtZh1DHzqgEEXYB0wAT+im4mf3kFBV8r0rw9/4NEPvue1y4+id6dmuz9ARCrEyk3beW3WMl7/fAUbtuWyb4v6PPzbg+nRvlHYpYnIHqiWQTeac4cB/wOeAe7CbAfO1QH+ClwJDMBsZuDTlRF0RwAXAB2iNhftvB2oi4KulGHVpu0c/8hU+nVuwdNDDgu7nD2mWRckEe3IK+Cdr1bzjynfk5mVw+PnH8qAri3DLktEyqkGBN1PgN5AI8y2Rm1vAGwGZmJ2dODT7W56Mec4Dn9R2m+B9KiXig5cDYw249agbxpPFHQrR0Ghce2rX/LRorVMuakP7Zskfp+genQlkWVm5XDZqNl8vXIzw0/vxkUJ3C8vUpPVgKCbjV8C+ETMPoza3h94H9iBWeBQEXgeXedIxS8OMQQ4Fd8rUSSmkV3n3DXALUBrYCFwo5l9Usb+qcCd+Dl82+CXH37YzP4VeX0Y8GIJh6aZ2Y4Stv9CQbfiZWzewQ3/mcuspRu45aTOXNtv37BLqhBaGU0SXXZuPte/NpcPv1vLVX068aeTOmuxCpEEUwOC7jKgHZADvAv8HHl+Kj4A/4zZXoFPV86V0RoDg/Fz6/YmhqDrnBsEjAauAT6N3F8CdDWz5aUcMx7/Ie8AfgBa4kPs1Mjrw4AngV3WPjazjN3Vo6BbsT78dg03j51PTn4h953RnXMOaxd2SSISJb+gkHveXsirs5ZzRo82PHjuQdROTugONJEapQYE3VvxSwAXD6gusu02zB4MfLryBN1d66EjcKEZ9wXb380CvjKzy6O2/QCMM7PbStj/RGAs0MnMMks55zDgCTOLeYZ3Bd2KkZNfwN8nLeKF6Uvp2rohj19wCJ0qcML9eDBjxgwAevfuHXIlInvGzHh62mIenLyInh2bMPKiw7W6mkiCqPZBF8C5vwB/AupEbd0B/B2z4TGdak+Dbkxv5lsQsoHzzWxs1PYnge5m1qeEY54C9gc+B4biL4SbBNxuZlmRfYYBz+OHt5OAecBdZjZ3dzUp6O65nzK3cf3rc/l65WaG9d6bW0/pUi0nq1ePrlQ3E+b+zJ/GfcU+zerx4iVH0rZRWtglichu1IigC+BcOn7Rsqb42b4+w2xzrKcJvLJEBWmGD6Jrim1fA/Qv5ZiOwDH4Xo1zgEbA4/he3XMj+ywCLgXmAw2AG4DpzrmDzeyHivwAsqu35q3k9vFfk5xUi2cuOowTu7UKu6RKM3LkyLBLEKlQZx3SjpYN6nDlK3M4+6npvDjsSLq2ScwlukWkmvGhdvKenqaqR3TbACuBPmb2cdT2u4ELzaxzCce8DxwLtLJIko+0M7wX2VY8NOOcKxrV/cjMfl/C61cAVwCkpqYelpOTUxEfr0bJzs3nnrcWMnbOzxyxd2MeG3wIbTQaJJKQvsvYwrAXZpOVk8/TQw7l2P2ah12SiJSiWo7o+hwYnNm9gU+dAK0LLwFHm9m+UdvaA8uBI81sdinv9SI+CJ9SVk1qXYjdt6u3cN1rX7IkcxvX9duXG07YL6FXPAtq2rRpAPTp86s/piIJb/Xm7Vzy4mx+XJvFHb85gIt67lUj/r8WSTTVNOgWX6SsbDEsAVylf4uZWS4wBxhQ7KUBwIxSDpsOtHHORV/ZtH/kfllJBzi/zuVB+Dl+pQLNWbaRM56cztYd+bx62VH88cTONeYfw3vuuYd77rkn7DJEKkXr9DT+76peHLNfM4ZP/IbTHv+UmYvXh12WiNQcLuAttpNW5Ygu/DK92Cv4acWmA1cBlwHdzGyZc+5lADMbGtm/PvAt8BnwF3yP7kjgWzP7bWSfeyKv/wA0BH6Pn3P3aDP7vKx6NKIbXGGhcfqTn7I+K5eJ1x9Ds/q1wy6pSi1ZsgSAjh0DL7EtknDMjEkLMvjrO9+yctN2Tj2wFbefegDtGif+oi8i1UE1HdG9OOpZCjAcH2qfY+c8ur/DX+d1J2bPBj11qRejRVZEC8yMj3e/F5jZGOdcU/wCEK2BBcCpZlY0Otuh2P5Zzq+G8TgwG9gIvAm7rMTWCL8mciv88nBzgeN2F3IlNuPm/MyClVt4zFyJ3wAAIABJREFUbHCPGhdyQQFXagbnHKce2Jrju7TgmY+X8NTUH/nw27VceVxHru67L2mp1W9GFREJmdlLvzx27n58njsUs/lR2yfguwL2i+XUpY7oOkcs/RJmVuUzOFQIjegGs3VHHv0enkaHJmm8cXVvfHdIzTJlyhQA+vcvbYIQkepn1abtPDDpOybOX0Xr9DrcduoBDDyodY38O0AkHlTLEd1ozq3CLwzWDLONUdub4KcZW4NZ68Cn203QDSqmJYDjiYJuMH+b9B3/nraYt649moPbNwq7nFBoHl2pyT5fuoHhExeycNUWjty7CXcP7Er3tulhlyVS49SAoJuNX+p3Ar5ltah1YThwFrCdGD5/WUH3xWKbTsQPJU+PetOj8en6HTMujeVzxAsF3d1btn4bAx79mNMObs2j5/UIu5zQrFixAoD27duHXIlIOAoKjf/7YgUPvbeIjdm5DD6iA386qTON66WGXZpIjVEDgu77+LUVSusqmILZSYFPF+RiNOe4EHgZGGTGuKjt5wGvA1eY8XzQN40nCrq7d+UrX/DJD5l8dHNfWjass/sDRKRa27w9j8em/MBLM38iPS2F207pwrmHtVM7g0gVqAFBtzPwMVDShN5rgT6YLQp8uoBB91v8lF7pZmRFba8PbAG+N6NL0DeNJwq6ZZuxOJMLnp3FzSfuz3XHx9T/Xe1MnuwXaDn55JNDrkQkPny7egt3vrmAOcs2cuTeTbj/rO7s37JB2GWJVGvVPugCONccuAnox84lgD8C/oHZuphOFTDobgdSgdvMeDBq+5+BB4AcMxJyWSwF3dIVFBq/+dcnbN2Rz4d/7EOdlIRsw64w6tEV+bXCQmPsnBU8MOk7snbk87tjO/L7E/albmpCXp8sEvdqRNCtQEGD7nyge+RpJn4hhtZAs8i2BWYcXCkVVjIF3dK9OmsZd0xYwJMXHMpvDgp8gWO1lZGRAUCrVq1CrkQk/qzPyuGBSd8xbs7PtG2UxvDTu9G/a8uwyxKpdmpM0HWuJ3Aq0ALfsvBfyjFtbNCgexr+6rckdm0OdkAhcIYZ78T65vFAQbdkm7fn0e/hqezbvD5jruyp3jsRCeTzpRu4882v+X5NFid2bck9p3ejbaOE/MJPJC7ViKDr3NPAFSW88u//b+/O4+wczz+Of66ZTPZ9kUQSIkSQUCRoEKKSWvqjihZtEG0pqqil6GKrtfVrKaqoCsKvaYkWRUhIogmxtESCiCX7Olkm20xmu35/3M8kx5jlnMmZec6Z832/Xud1znme+7nPdc4zk1xzn+u5b9x/nEpXSa3d6s5zwLHALEKia9H9G8DXszXJldrdPWU+67aUcu0J+yjJjTz77LM8++yzcYchktEO3q0rz/1kBFcduxfT569m1P9O4/5pn7K1vCLu0EQkG5iNBX5Ezcv/no/ZWSl1l+oSwGa0BboA69zZktLBGUgjul/22epNfP330znlwL7cfup+cYeTMVSjK5KaJeu2cP0zHzD5w5X07NiKHx4+gDMO2YX2rVS/K9JQzX5E12wm8FVgIfD76H4X4KdAf+B13A9LurtUEl0zWgAHAt3ceSH5qDOXEt0v+8G4t5j1+VpevWIkPTrk3lK/tSksLASge/fu9bQUkUSvzV/NfVM/Zeana+jYugVnDe/P2MP65+RS4iI7KgcS3Q1AO+AruM9J2D4EmA1sxD3p1WqSTnTN+DZwD+ECNHenhRlTgN2A8915Kek3kUGU6H7R9I9Xc9Zf3uTq4/bi/CN3jzscEWlG3l28nj9N/ZRJH6ygZX4epx3Uj3NHDKBf17ZxhyaSNXIg0S0BCoBuuK9P2N4ZWAtsxT3pwv9kL0YbQZi/rKpGwt3JN+My4A7gz+41Fg1nPCW625VXVHLcXa9RWlHJSz89glYtcns6seomTpwIwMknnxxzJCLZ7dPVm3hg2mdM/O8SKh1O2K8354/cnb16dYw7NJGMlwOJ7nxgAPAwcAXu6zHrRMg3fwB8gvueSXeXZKL7POFitI+Avdie6A4CPgTmurNvym8mAyjR3e6RmQu47pm53H/mUI4ZrCm0qlONrkh6rSgq4aF/f8YTsxaxubSCowb14LLRg9i3b9LfSorknBxIdH8PXML2Wb42Ae0TWtyJ++VJd5dkorsW6ERYHW0+2xPdAmArsN6drsm+aCZRohus31LKyDumsk/vjjz+w0M000INioqKAOjUSf8Ji6RT0ZYyHntjAQ/PWMCGkjKuPWEwYw7ZRf8OidQgBxLdbsA7hAvQqlsADMN9bbLdJTW9GKEoGGBRte2do3tNkpjl7n7lEzYUl2k6sTp06tRJSa5II+jUtoCLvjaQVy4fyeF7dOdX/5jDlU/OpqRMU5KJ5Bz3NcAhwEOEBcrKgWXAg8DwVJJcgGTneFkK7AoMr7b9iuh+SSovKpll3eZSnpi1iJMO6KMauTpMmDABgNNOOy3mSESap05tC3jo7IO4a8p87poyn49WbOC+7w3VxWoiucZ9JXBuOrpKdkR3EuEitH9UbTDjI0Ki69F+yVLj31hIcVkFPzpCsyzU5b777uO+++6LOwyRZi0vz/jp6D156OxhLFyzhRPu+TfTP14dd1gikqWSTXRvAtYQShWqinoHEpLftcCt6Q9NmkJJWQWPvL6AkYN6MKhXh7jDyWjPP/88zz//fNxhiOSEo/fuybMXHU6vjq05++E3uffVT0h1gSMRqZ+ZtTKzu82s0Mw2m9kzZta3nmOOiNotNTO3sJpZ9TZmZteb2TIzKzazqWY2uNHeSC2SXQJ4KXAY8BJQSUhwK6PnI6L9koUm/mcphZtKOe+IAXGHkvHatm1L27b6ClWkqfTv3o6JFx7KCfvtzG8nzeNHj73DxpKyuMMSaW7uBE4BzgBGAB2B58ysrjlG2wNzCLMjFNfS5mfA5cBPgIOAVcDLZtako2oNWQK4NdAVWOtOSaNE1YRyedaFykpn1O+m0a5VC5656DBdhFaP8ePHAzBmzJiYIxHJLe7OX2Ys4JbnP2TXbm25f8xQBvbUN1CSm9I564KF+WlXA+e4++PRtn6EZXePc/d6S1PNbBNwkbuPS9hmhAvI7nH3m6NtbQjJ7hXufn864k9GUiO6ZnQyYxczurtT4s4yd0rM6B5t16XoWejlD1fyWeFmzjtigJLcJPz5z3/mz3/+c9xhiOQcM+MHh+/GEz88hA3F5Xzz3hk8894ylTKI7LihhFXItq1u6+6LCWskHLoD/e4G9KrWbzEwfQf7TVmy8+g+BZwE/NSdPyRsvwi4C3janVMbLcpG1K9fP3/sscfiDiMWn67eTHlFpWpzk1ReXg5AixbJTlYiIulWXuEsXLuFLaXltGqRT7f2LenStoA8/bEuOeKoo44qBd5P2PSAuz/QkL7M7LvAo0CBJySEZvYKMN/df5REHzWN6B4KzAB2dfdFCdv/AvRx92MaEm9DJPs/9iHR/VPVtk8E/pCwP+usXbt224pXueSdhWv59Yuvc/0JQxh52G5xhyMikrTS8kr++e5Sxs9axHv/XU+bgkpOOmBnvnfIrgzpoy8Ypdkrd/dhdTUws5uAX9TTz1HpC6kJmG1fQCIhea5Psoluj+h+fbXtRdX2S5a4f9pndG5bwHcO6hd3KFlj3LhxAIwdOzbWOERyXcsWeXx7WD++Pawf7y8pYvwbC3n6v0v5vzcXc8AunRlzyK58Y7/etC6o61oakWbtTmB8PW0WAV8F8oHuhFrdKj2B13bg9Vck9JOYlPZM2JeqBYSZv5zk89ekpxfbGN1/vdr2quebkn1Bid9nqzfx8ocrOfOru9K2pb6GT9a4ceO2Jbsikhn27duJ20/dj1nXjOLa/9mHouIyLv/7ewy/dQq3PP8hC9fk5sXGktvcvdDdP6rntoWw1G4ZMLrq2Ghqsb2BmTsQwueEhDax39aEWR12pF+LbskfkGSN7kvAKMII7v8SipT3Bi4DOgGT3Wmyeot0ysVZF66Z+D5P/WcJM676Gj06tIo7HBGRtHF3Xv90DeNnLWTS3JVUVDojB/XgnMN2Y8Qe3cnLUy2vZLd0zroQ9XcfcAIwlrBmwu+ALsBQd6+I2nxEmEHhnuh5e2CPqIuZwG3AM8DaqppcM7sK+DlwDvAx8EvgCGCQu1cNoKYS6FSq1nJwT7rsItlE92TgSbYvFrFtV7TtVHeeTvZFM0muJbqrN27lsNtf4ZQD+3DryfvFHY6ISKNZuaGEJ2Yt4vFZiyjctJXde7Rj7KH9OfnAvrRrpW+zJDs1QqLbCrgD+C7QBpgCXBjNvlDVxoEb3P366PlI4NUaunvE3cdGbQy4DvgRIXGeBfzY3eekK/ZkJD2Prhl3EEZwq7vDnZ+lNaomlGuJ7u9emsfdr37C5MuOZPce7eMOJ6s8+OCDAJx7blqW3xaRJlJaXsnz7y/n4Rmf896SIjq0bsF3hvXj7OH92aWbFoGR7JLuRLe5S2nBCDMOAk4kFBOvBJ5x561Giq1J5FKiu6W0nENve4WD+nflwbPqvGBTajBq1CgAJk+eHHMkItJQ/1m0jnEzFvD8+8upcOfovXpyzmH9OXT3bppPXLJCs090zY6oY68Da3D/IOnucn3C7VxKdB+ZuYDrnpnLk+cPZ1j/rnGHIyISm5UbShj/xkKemLWINZtL2bNne04d2peD+ndl8M6daNki2Wu1RZpWDiS6lXy5VLa6pcAFuP+r3u5SKF3oABwP7Aq0rr7fnRuT6ijD5EqiW15RyVH/O5Ue7Vsx8cLD4g5HRCQjlJRV8Nzs5Yyb+Tlzlm4AoHVBHl/p25mD+ndlaP8uHLhLFzq1KYg5UpEgRxLdZJQBB+E+u87ukrwY7SDgeaDWYUB3snLCwlxJdJ+bvYyLnvgvfxozlGOH9Io7nKz0xz/+EYALL7ww5khEpDGs2lDC2wvX8faCdby9cC1zl22gotIxg0E9OzCsfxeG7dqVg3brSp/ObeIOV3JUDiS6DxOmJduZMKPDIqAfcBiwDPgvYSawlsBjRBe/1dpdkonuDGB4HU1ciW7mcne+ee8MNpaUM/myI8nX9DoNctxxxwHwwgsvxByJiDSFLaXlvLtofUh+F67jPwvXsWlrWAp8n94dOXZIL44Z3Is9e7ZXfa80mRxIdMcAjwDfxX1CwvbTgceBs4FCwgDsQtzrXN412UR3I9AWmEZYBngz1eon3HkklfeRKXIh0X390zWc8eAb3PytIXzvkF3jDkdEJCtVVDrzVmxkxieFTJq7gncWrcMdduvejq8P7smxg3vxlb6dNVevNKocSHQ/AgYCnXDflLC9PbAB+Aj3fTBbB7TGvc6vV5JNdBcThpC7uX9pGeCslguJ7jkPv8nsJUXMuPprWhJTRCRNVm0s4eUPVvLinBW8/ukayiudXh1bc8zgnhwzuBcH79aVFvm6qE3SKwcS3WJCWcK1wC1UJapmVwC/Abbi3gazRUAX3DvU2V2Sie7NwNXAke78e8feQWZp7onuxys38vXfT+ey0Xty8dED4w4nq911110AXHLJJTFHIiKZpmhLGa/MC0nvtI9XU1JWSee2BQzq2YHenVrTs1NrendsTa9ObejVqTW9O7Wme/tWKiWTlOVAovsusG/0bC1hhoXeQPdo2/vAUGALsAD3QXV1l+zSMAsIy//+04yHgHmEq922cefRJPuSJvTA9M9oU5DPmV9VycKOmjJlCqBEV0S+rFPbAr51QF++dUBfiksrmPbxaqZ8uJKFa7bwzqJ1rCzaSmnFFy8mz88zdurQip4dW9O3Sxv22Kk9u/dozx47tWe37u30DZzkqp8D/wTyCZMgVE2EYEA5cA3wNaAAmFFfZ8mO6NY3p5m7J500Z5TmPKJbtKWMYTe/zBkH78KN3xwSdzgiIjnL3Vm7uZTlRSWsKCphxYZwv7yohBUbilm0dgtL1hVT9V+yGfTr0jZKftttS4IH7tSBTm011Vkua/YjugBhieGbgUOAPKASeAP4Be7TMGsBtCKUMZTX1VUqyam+X8kykz5YQVmFc+rQvnGHIiKS08yMbu1b0a19K4b06VRjm5KyCj5bvZlPV2/ik1Wbtt3/+5NCSsvDaHCewdF79+Ts4f05bA+t5ibNlPtU4DDM2gJdgLW4FyfsLyeM7tYr2UT3nBRDlAzwr9nL6de1DfvW8o+qpOaOO+4A4Iorrog5EhFpjloX5LPPzh3ZZ+eOX9heUeksXVfMp6s38eaCtUx4azEvf7CS3Xu04+xD+3PygX1p3yorv1QV+TKzqcBDwJO4byHU4ja8Oy0B3DxLF9ZtLuWgmyfzwxEDuPq4veIOp1k45ZRTAHjqqadijkREcllJWQX/mr2cR15fwOwlRbRv1YJTDuzDWYf2Z/ce7eMOTxpZsy9d2L4E8EZgAvAX3Gc1uDslus0z0Z3w1iKueup9nr3ocPbtqxFdEZHm6L+L1vHo6wt5bvYyyiqcEQO7c/bw/hy1106a0aGZyoFEt4QwvRhsvz7sQ+AvhJXQVqfUXbKJrhljgMuAQUDrart1MVqGOfOhWSxcs4VpV45UDZeISDO3euNWJry1iPFvLGLFhhL6dmnDyQf25bghvdirVwf9P9CM5ECi2wk4GfguMBK2rbzrhLrcf+F+ctLdJTnrwneAv0YvUtNvi5YAziDrNpcy7ObJnHfEAK46VmUL6XLbbbcBcPXVV8cciYhIzcoqKnn5g5U89vpC3vh8De6wa7e2HDu4F8cM6cX+Wrkt6zX7RDeRWU/gNOAMwgwMAI570jlnsqOwP47uiwlLATthEt9uwProJhli0twVVFQ639i3d9yhNCvvvvtu3CGIiNSpID+P4/ftzfH79mb1xq1M/jAsYvGXGZ9z//TP6NmxFccM7sWxWrlNssMmQr65DqiA1AdVkx3RXQd0BA4DZhKN4JrxK+Ai4GvuzE31xTNBcxzRPfOhWSxeu4VXr1DZgoiIQFFxGa9+tIoX56xg6serKCmrpEvbAkbt3ZNDBnSjfasWtGuVT9uW+bRt2eIL920K8jUKnEGa/YiuWQFwPKF04RtAm6o90f003I9KurskE91SQhbdhjCqC6FQuBUh237VnaOTfdFM0twS3TWbtnLwLVM4/8gBXHmMyhZEROSLqlZumzR3BZM/XMnGkvqnI21TkE/ntgUcM7gXpx/cj716daz3GGkcOZDorgWqrqKvSm6XAo8AD+P+aSrdJVu6sIEwYa8RpnvoABxHWBYYttdNSMwmzV0ZlS3sHHcozc6vf/1rAH71q1/FHImISMO1aZnPsUN6ceyQXpSWV7J0fTFbSsspLq1gc2kFxaXlbN5awZayCrZsLd+2bcm6Yp6YtYhxMxewf7/OnHFwP/5nv51ppzl8Jb06R/elhKWA/wK8RAOnCUv2p3MZIdHdiTDFw8HRi1dZ25AXl/T71/vL2K17O/bu3SHuUJqdefPmxR2CiEhatWyRx27dkx8cXLu5lIn/WcJf31rMVU+9z6+f+5ATvrIzZxzcj337dFK5nKTDu8DDwOO473B+mWzpwiPAmcB3gB7AvdWa3OxOVg5zNafShcJNWzn45slcOHIPrjhmUNzhiIhIM+XuvLNwHf/35mL+9f4ySsoq2ad3R844uB/fPKAPHVsXxB1is9XsSxfSLNlEtx3QHtjozhYzriZM91AOPA3c7k5Fo0baSJpTovv4rIX84uk5vHDJCPburfopERFpfEXFZTzz7lL+783FfLB8A60L8jiof1cG7tSBPXu2Z2DPDgzs2V7Jb5rkRKJr1oJwQdogtl+Mtp37jUl3pZXRmk+i+90H32DFhhKmXHakvj5qBNdeey0AN96Y9O+XiEjOcHfeX1rE399ewntL1jN/5SaKy7aPgfXu1JqBPTuw507t2TNKfvfq1ZE2LbNyGv7YNPtE12wnYCohya1ZOubRNWOXVOJyZ1Eq7SW9Vm/cyhufreGio/ZQkttIFi9eHHcIIiIZy8zYr29n9usbriWqrHSWrCvm45Ub+XjVRuav3MTHKzfy2Gdr2FpeCUCXtgX88XtDGb57tzhDl8xyA1DXtFEpjdDWOqJrRmUKnaW0BLCZXQhcCfQG5gKXuvtrdbRvCfySUCe8M7ASuMPd/5DQ5hTg18DuwKfAL9z96fpiaS4juo+9sZBf/WMOky49gkG9dCGaiIhkpopKZ/HaLcxbuZHfTprHgsLN3HTSEE4/OKXxtZyVAyO6nwL9gXHAOYRc9BLgJ9Hj23Afl2x39S2JYinckozfTgPuAm4BDiAsQPGCmdX1E/5X4FjgPMJQ9reB2Ql9DgcmAI8D+0f3fzeznJn27F+zl7HHTu3Zs2f7uEMRERGpVX6e0b97O44Z3IuJFx7KoXt05+qJ73Pjsx9QUZnb5ZQCQJ/o/uptW9zvAU4G9gT6ptJZXSO6D6fSkTvnJPWCZrOA2e5+bsK2+cCT7n5NDe2/Dvwd2N3dC2vpcwLQ1d1HJ2ybDKx29zPqiqc5jOiu2ljCIbdM4SdfG8hlo/eMO5xm65prwo/nrbfeGnMkIiLNR3lFJTc//yEPz1jAyEE9uPuMA+igC9dqlQMjupuB1kABYZGyFkCv6PEGYAnuSQ//11pukGzimoqoBGEocEe1XS8Bh9Zy2EnAW8BlZnYW4Y2+APzc3TdFbYYDd1c7bhJheeJm78U5K3CH/9mvd9yhNGtr1qyJOwQRkWanRX4e150wmD12as91/5zLyX+cyUNnH8Qu3drGHZrEYw1hVLcTsIIwgvs4UBLt75JKZ029nEl3wlLCK6ttXwmMquWYAcDhwFbgFMKKGXcTanVPjdr0qqXPXjV1aGbnEcogaNmyZUpvIBP9a/ZyBkZXsUrjeeCBB+IOQUSk2freIbuyW7d2XPD4f/jmvf/mT2OGcsgAXaSWg+YREt3dgenA94Cjo30O/CeVzuqr0d3GjEFm/M6Mf5nxSrXblFReNEV5hDf2XXef5e5VI7WnmFnPhnTo7g+4+zB3H9aiRXYvXbhqQwlvLljLNzSaKyIiWe7QPbrzjx8fRpd2LRnz0Cz+9pZmu8lBDwIPEMoXbgBWs/16sELg0lQ6SyrLM2MoYU6zmr5HMJKfnaEQqACqJ6g9CcPTNVkOLHX3ooRtH0b3uxBGblek2Gez8UJUtvCNfZXoNrYrrrgCgDvuqF55IyIi6bJb93Y8feFhXPTEf/jZU7P5ZPUmrjp2L/LzNHVmTnD/G/C3bc/NBgJHERYpm4H7+lS6S3Y48+fADhc+u3upmb0DjCZcYFZlNPBULYfNAL5tZu0TanKrrrhaGN2/HvXx22p9ztzRmDPdv2YvZ1DPDgxU2UKjKy4ujjsEEZGc0KlNAQ+PPYhfP/cBD0z/jHkrNnLknj3IM8jLM8wsPI7uzYx8M/Ly4Ct9OzOgh2YgajbcNwD/bOjhyS4BvBzYCbgQuI8wgvsV4CbCpL6nufNeUi8Yphd7LOprBnA+8ANgsLsvNLNHAdz9rKh9e8II7hvA9YQa3fuBD93921GbQwl1HL8E/gF8C7gRONzdZ9UVTzbPurCiqITht03hp6P25OKjB8YdjoiISNo99sZCfv3sB5RWVCbVPs/g1KF9uWTUnvTp/OXVY7Nds591Ic2STXRLCReRdSJM7eDu5JuxE6E84FF3xib9omHBiJ8RFoyYA/zU3adH+6YSXmBkQvtBhAvQDgfWEZLZq919Y0KbUwmJ9wC2Lxgxsb5YsjnRfXjG59zw7AdMvuxI9thJf72KiEjzVFJWQUlZBZUOle5UuuPbHodV2Nxha3kF//fmYsa/Eb7wHfPVXfnxUbvTrX2rmN9B+ijRTU2yiW4R0B5oBRQRCoT3ATYBi4H17nRtxDgbTTYnuqfeN5NNW8t58dIj4g4lJ1x6aah/v/POO2OORERE6rJ0fTF3Tf6YJ99ZQpuCfH4wYgDnjtitWczPq0Q3NcnOurAquu8KLIgev0qojQVI7vsESZsVRSW8vXCdLkITERGppk/nNvzm1K/w0k+P5MhBPfjDlPkc8ZtX+fNrn1FSVhF3eNKEkh3RnQh8EziGcJHXlXxxpoUJ7ny3USJsZNk6ovuXf3/Ojc99wJTLj2R3Fd2LiIjU6v0lRfxm0ke8Nr+Q3p1ac8nRAzl1aF9a5Cc9y2rG0IhuapJNdL9CuOjsHcJ0Xn8Hvk5IdqcA33NndSPG2WiyNdE95b6ZbCmt4IVLRsQdioiISFZ4/dM1/GbSR/x30Xo6ty2gQ+sWFOTlUZCfR4t8oyA/j4J8o0VeHgUt8ijIM1rkG5UOFZVOeaVTUVlJeYVve16e8HxY/67cevK+jfoelOimJqnpxaIZFRJnVTjWjM5AuTubajlMGsmy9cW8s3AdVx4zKO5QcsqPf/xjAO69996YIxERkYYYvns3Jl5wKJM/XMXkD1ZSWlFJWUVIVMsqKimrdMrKKymvrKS4uGLbvrw8o0WekZ9w36ogj7Z5edueF+Qbfbs0v1kest2OLAvWEsi+odBmYNrHYfD8mME1rnAsjaRNG/0DJiKS7cyM0fv0ZPQ+DVpcVbJMnYmuGQcCpxNmWfiHO6+Y8UPgVsKFaVvN+KM7VzR+qFLl/aVFdGjdgt176JuLpqQV0URERLJLrYmuGYcT6m+r2vzYjN8S5r91wtK/rYGfmvGJO39q7GAlmLu0iCE7d8JMyyGKiIiI1Kauyw2vBAoICW3V7cponwGFCY/PbKwA5YvKKir5cMVGhvTpGHcoOee8887jvPPOizsMERGRtDGzVmZ2t5kVmtlmM3vGzPrWc8wRUbulZuZmNraGNuOifYm3NxrtjdSirkR3GGHkdhJhud4XCEmtA2e4sxPwvajtPo0ZpGw3f+UmSssrGdKnU9yh5Jxu3brRrVu3uMMQERFJpzuBU4AzgBFAR+A5M8uv45j2hJVtLwGK62g3mbAKbtXt+HQEnIpapxczYyuhbKGLOxvM6ERYfteB1u6UmdESKCGswrcjF7a/5F9/AAAgAElEQVTFJtumF/vb24v52ZOzNX+uiIhIDkrn9GJm1glYDZzj7o9H2/oBC4Hj3H1SEn1sAi5y93HVto8Durv7/6Qj1oaqa0S3AMCdDdF9UdUOd8qi+9Jok4pFm8jcpUW0a5nPbt10IZqIiIjskKGEfO+lqg3uvhj4EDg0Df0fbmarzOxjM3vQzHZKQ58pqXcU1oxrk9mWrbp27crUqVPjDiNpu5Ru5vJ9Yfr0aXGHknNuv/12AK666qqYIxERkRzWwszeTnj+gLs/0MC+egEVbL/uqsrKaN+OeBGYCHwO9AduAl4xs6HuvnUH+05aMuUG1yU89hq2ZbW1a9cycuTIuMNISkWlc8F1kzjtoH58f+TguMPJOa+88gpA1vy8iIhIs1Tu7sPqamBmNwG/qKefo9IX0pe5+18Tnr5vZu8QSiK+QUiAm0R9ia5KEjLI54WbKC6r0IVoMbnxxhvjDkFERCQZdwLj62mzCPgqkA90J9TqVukJvJbOgNx9mZktAQams9/61JXo3tBkUUhS5izdAKCpxURERKRW7l7Il8sRviQaZS0DRgNPRNv6AnsDM9MZk5l1B/oAy9PZb31qTXTdlehmmjlLi2jVIo89NNtCLMaMGQPA+PH1/ZEsIiKS+dy9yMweAn5jZquANcDvgNmEqcEAMLOPgHvc/Z7oeXtgj2h3HrCLme0PrHX3RdH+64GnCIltf8KququAp5vgrW2TlVOC5ar3lxaxV++OtMiva7IMaSyDBg2KOwQREZF0uxQoByYAbQir4p7l7hUJbQYRyhuqDANeTXh+Q3R7BBhLuMBtX+AsoDMh2X0V+I67b2yUd1GLWufRzRXZMo9uZaXzlRte4psH7MxNJ+0bdzgiIiISg3TOo5sLNDSYJRat3cLGreUM2VkXoomIiIgkQ4lulpizLKzXoRkX4nP66adz+umnxx2GiIiIJEk1ulliztINFOQbA3vqQrS47L///nGHICIiIilQopsl5i4rYs+eHWjVIj/uUHLW1VdfHXcIIiIikgKVLmQBd2fO0iLV54qIiIikQIluFlhWVMK6LWVaKCJmp5xyCqecckrcYYiIiEiSVLqQBd5fEi5EG6wL0WI1fPjwuEMQERGRFCjRzQJzlxWRn2fs01sjunG64oor4g5BREREUqDShSwwZ2kRe/RoT+sCXYgmIiIikiwlullgzrINDFZ9buxOPPFETjzxxLjDEBERkSSpdCHDrdpQwuqNWzXjQgY4+uij4w5BREREUqBEN8NpRbTMcckll8QdgoiIiKRApQsZbs7SDQDss7NKF0RERERSoUQ3w81ZWsSA7u1o30qD73E77rjjOO644+IOQ0RERJKk7CnDzV22gQN37RJ3GAKccMIJcYcgIiIiKVCim8HWbi5l6fpizhq+a9yhCHDhhRfGHYKIiIikQKULGWzO0nAh2r66EE1EREQkZUp0M1jVjAuDNbVYRhg1ahSjRo2KOwwRERFJkkoXMtjcpRvo17UNndoWxB2KAKeddlrcIYiIiEgKlOhmsDnLirRQRAY599xz4w5BREREUqDShQxVVFzGwjVbtFCEiIiISAMp0c1QHywLC0UM1kIRGWPkyJGMHDky7jBEREQkSSpdyFBzdSFaxhk7dmzcIYiIiEgKlOhmqPeXFtGrY2t6dGgVdygSUaIrIiKSXVS6kKHmLC1iSB+VLWSSsrIyysrK4g5DREREkqRENwNt3lrOZ4WbdSFahhk9ejSjR4+OOwwRERFJkkoXMtCHyzfgjqYWyzA//OEP4w5BREREUqBENwNVLf2rEd3MMmbMmLhDEBERkRSodCEDzVm2ge7tW9Kzoy5EyyRbtmxhy5YtcYchIiIiSdKIbgaas7SIwTt3wsziDkUSHH/88QBMnTo13kBEREQkKUp0M0xJWQXzV23i6L13ijsUqeaCCy6IOwQRERFJgRLdDPPRio1UVLouRMtAp512WtwhiIiISApUo5thdCFa5ioqKqKoqCjuMERERCRJGtHNMHOXFdGpTQF9u7SJOxSp5pvf/CagGl0REZFsEcuIrpldaGafm1mJmb1jZiPqaDvSzLyG214JbcbW0qZ107yj9JmzdAND+nTUhWgZ6OKLL+biiy+OOwwRERFJUpOP6JrZacBdwIXAv6P7F8xsH3dfVMehg4G1Cc9XV9u/Bdg9cYO7l+x4xE2ntLySeSs2cs5h/eMORWpw8sknxx2CiIiIpCCO0oXLgHHu/mD0/CdmdixwAXBNHcetcvfCOva7u69IV5BxmL9qI6UVlQxWfW5GKiwMP37du3ePORIRERFJRpOWLphZS2Ao8FK1XS8Bh9Zz+NtmttzMppjZUTXsb2NmC81siZk9Z2YHpCPmpjR36QYAhuzcMeZIpCannnoqp556atxhiIiISJKaekS3O5APrKy2fSUwqpZjlhNGe98CWgJnAlPM7Eh3fy1qMw/4PvAe0AG4BJhhZl9x9/nVOzSz84DzAFq2bLlDbyid5iwrol3LfPp3axd3KFKDyy+/PO4QREREJAXm7k33YmY7A0uBI919esL2a4HvufugJPt5Hih39xNr2Z8PvAu86u51Xj3Url0737x5c7JvoVEdf9drdGzTgr+eNzzuUERERCQDmdkWd9eIWJKaetaFQqAC6Flte08glfraWcDA2na6ewXwdl1tMs3qjVv5YPkGDt9D9Z+ZasWKFaxYkdVl4CIiIjmlSRNddy8F3gFGV9s1GpiZQlf7E0oaamRhbq796mqTaWZ+Gi50GjGwR8yRSG1OP/10Tj/99LjDEBERkSTFMevC74DHzOxNYAZwPrAz8CcAM3sUwN3Pip5fCiwA5hJqdMcAJwGnVHVoZtcBbwDzgY7AxYRE94KmeEPpMP3jQjq3LdCKaBns6quvjjsEERERSUGTJ7ruPsHMugG/BHoDc4Dj3X1h1GSXaoe0BH4L9AWKCQnvN9z9+YQ2nYEHgF5AEfBf4Ah3f7PR3kgauTuvzV/NYXt0Jz9PC0VkqmOPPTbuEERERNLKzFoBdwBnAG2AKcCF7r6kjmOuAU4GBgFbCYON17j7nIQ2BlxHuPi/C6Hs9MfuPreR3krNsTblxWiZKBMuRvt45Ua+/vvp3Hbyvpx+cPU8XzLF4sWLAejXr1/MkYiISK5K98VoZnYf8E3gbGAN4Zv3zsDQ6Jqnmo6ZBPyVMCOWATcCw4F93H1t1OYqwqDmWMLsWNcChwOD3H1juuKvTxylC1LN9I/DIm+HD9SFaJnszDPPBGDq1KnxBiIiIpIGZtYJ+AFwjru/HG07E1hImPZ1Uk3Hufsx1fo5k/CN+mHAs9Fo7qXAbe7+VNTmbGAV8F3g/kZ5QzVQopsBXptfyIAe7ejbpW3coUgdfvnLX8YdgoiISDoNBQpIWMjL3Reb2YeEhbxqTHRr0IEwwcG66PluhHLSxH6LzWx61K8S3abStWvXWEfo3OHg1hs4do+WGinMcC1ahF8XnScREYlRCzN7O+H5A+7+QAP76kWY9rWw2vaV0b5k3UVYv+D1hH6r+qneb58UY9whOZ/orl27lpEjR8b2+jM/KeS3k2bx0NkHMHLv6tMLSyb57LPPABgwYEDMkYiISA4rd/dhdTUws5uAX9TTz1HpCMbMfkeovT28tpreOOV8ohu36fMLaZFnHDKgW9yhSD2+//3vAxrRFRGRjHcnML6eNouArwL5QHdgdcK+nsBr9b2Imf0eOB04yt0/S9hVtbpSz+h1Evtt0pWXlOjG7LX5qzlw1y60b6VTkeluuOGGuEMQERGpl7sX8uVyhC8xs3eAMsLCXU9E2/oCe1PPQl5mdhdwGiHJ/aja7s8JCe1owswMmFlrYARwZSrvZUcpu4pR4aatzF22gSu+vmfcoUgSjjzyyLhDEBERSRt3LzKzh4DfmNkqtk8vNhuYXNXOzD4C7nH3e6Ln9wJnEhbwWmdmVTW5m9x9k7u7md0J/Dw69mPCVGObiBLqpqJEN0YzPtGyv9lk3rx5AAwaNCjmSERERNLmUqAcmMD2BSPOqlZvO4hQ3lDlwuh+SrW+bgCujx7/JurvXrYvGPH1ppxDF7RgRKwLRlzx9/d4+YOV/OdXo7UiWhaoumhRNboiIhKXdC8Y0dxpRDcmVcv+Hq5lf7PGLbfcEncIIiIikgIlujGZv2oTKzdsZYRWQ8sahx56aNwhiIiISAry4g4gV702P9Tnatnf7DFnzhzmzJkTdxgiIiKSJI3oxuS1+au17G+WueiiiwDV6IqIiGQLJbox2FpewRufreG0Yf3iDkVS8Nvf/jbuEERERCQFSnRj8M6CdZSUVWpasSxz0EEHxR2CiIiIpEA1ujF47ZOw7O9Xd9eyv9nk3Xff5d133407DBEREUmSRnRjoGV/s9Oll14KqEZXREQkWyjTamJrNm1lztINXD5ay/5mmzvvvDPuEERERCQFSnSb2IxP1wAwYk/V52ab/fffP+4QREREJAWq0W1ir328mk5tCti3T6e4Q5EUvfXWW7z11ltxhyEiIiJJ0ohuEwrL/hZq2d8sdeWVVwKq0RUREckWSnSb0CerNrFiQ4mW/c1S99xzT9whiIiISAqU6DYhLfub3YYMGRJ3CCIiIpIC1eg2odfmr2ZAdy37m61mzpzJzJkz4w5DREREkqQR3SYSlv1dy3eG9Y07FGmgn//854BqdEVERLKFEt0m8s7CdRSXVWjZ3yx2//33xx2CiIiIpECJbhP593wt+5vtBg0aFHcIIiIikgLV6DaR1+YXcuAuWvY3m02bNo1p06bFHYaIiIgkSVlXE1izaStzlhVx2Sgt+5vNrrvuOkA1uiIiItlCiW4TmPHpGty17G+2+8tf/hJ3CCIiIpICJbpN4N/ztexvczBgwIC4QxAREZEUqEa3kVUt+3vYHt207G+Wmzx5MpMnT447DBEREUmSRnQb2coNW9lQXKZpxZqBm266CYBRo0bFHImIiIgkw9w97hhi1a5dO9+8eXOjvkZpeSWV7rQuyG/U15HGtXjxYgD69esXcyQiIpKrzGyLu7eLO45soUS3CRJdERERkXRQopsa1eiKJOnFF1/kxRdfjDsMERERSZJGdDWiK0kaOXIkoHl0RUQkPhrRTY0SXSW6kqQVK1YA0KtXr5gjERGRXKVENzWadUEkSUpwRUREsotqdEWS9Oyzz/Lss8/GHYaIiIgkSaULKl2QJKlGV0RE4qbShdQo0VWiK0kqLCwEoHv37jFHIiIiuUqJbmpUoyuSJCW4IiIi2UU1uiJJmjhxIhMnTow7DBEREUmSShdUuiBJUo2uiIjETaULqVGiq0RXklRUVARAp06dYo5ERERylRLd1KhGVyRJSnBFRESyi2p0RZI0YcIEJkyYEHcYIiIikiSVLqh0QZKkGl0REYmbShdSo0RXia4kacuWLQC0bds25khERCRXKdFNjWp0RZKkBFdERCS7qEZXJEnjx49n/PjxcYchIiIiSVLpgkoXJEmq0RURkbipdCE1sYzomtmFZva5mZWY2TtmNqKOtiPNzGu47VWt3Slm9oGZbY3uv9X470Ryycsvv8zLL78cdxgiIiJpY2atzOxuMys0s81m9oyZ9a3nmGvM7C0z22Bmq83sWTMbUq3NuBpytzca9918WZMnumZ2GnAXcAtwADATeMHMdqnn0MFA74Tb/IQ+hwMTgMeB/aP7v5vZIWl/A5KzCgoKKCgoiDsMERGRdLoTOAU4AxgBdASeM7P8Oo4ZCfwROBT4GlAOTDazrtXaTeaLudvxaY08CU1eumBms4DZ7n5uwrb5wJPufk0N7UcCrwI93L2wlj4nAF3dfXTCtsnAanc/o654VLogyRo3bhwAY8eOjTUOERHJXeksXTCzTsBq4Bx3fzza1g9YCBzn7pOS7Kc9UASc5O7PRtvGAd3d/X/SEWtDNemIrpm1BIYCL1Xb9RLhr4K6vG1my81sipkdVW3f8Br6nJREnyJJGzdu3LZkV0REpBkYChSQkEO5+2LgQ1LLoToQcsp11bYfbmarzOxjM3vQzHba0YBT1dTTi3UH8oGV1bavBEbVcsxy4ALgLaAlcCYwxcyOdPfXoja9aumzV00dmtl5wHnRUzez4lTeRIIWhOF6yUyNcn7MLN1d5iL97mQ2nZ/MpXOT2Zri/LQxs7cTnj/g7g80sK9eQAVQ/RvzWnOoWtwFvAu8nrDtRWAi8DnQH7gJeMXMhrr71gbGm7KMn0fX3ecB8xI2vW5m/YErgddqOiaJPh8AGvpDsY2Zve3uw3a0H2kcOj+ZS+cms+n8ZC6dm8yWKefHzG4CflFPs+rfjjf0tX4HHA4c7u4VVdvd/a8Jzd43s3cIJRHfICTATaKpE91Cwl8OPatt7wmsSKGfWcDpCc9XpKFPERERkebgTqC+id8XAV8lfNPenVCrW6UnSQwmmtnvCfnYUe7+WV1t3X2ZmS0BBtbXbzo1aaLr7qVRRj8a+HvCrtHAUyl0tT+hpKHK61Efv63W58wGhioiIiKSlaKL92u8gD9RlJOVEXKmJ6JtfYG9qSeHMrO7gNMISe5HSbxWd6APX8zfGl0cpQu/Ax4zszeBGcD5wM7AnwDM7FEAdz8ren4psACYS6jRHQOcRJgKo8pdwHQzuxr4B/AtwpD84Y38Xna4/EEalc5P5tK5yWw6P5lL5yazZdX5cfciM3sI+I2ZrQLWEPK02YSpwQAws4+Ae9z9nuj5vYRrpk4C1plZVT3vJnffFM3CcD1hEHM5oUb3VmAV8HQTvLVtYlkZzcwuBH5GmFNtDvBTd58e7ZsK4O4jo+c/A84F+gLFhIT3Vnd/vlqfpxIKnQcAnwK/cPcmqwERERERyTZm1gq4A/gu0AaYAlwYzb5Q1caBG9z9+oTnNbnB3a83szaEgccDgM6EZPdV4FeJ/TaFnF8CWERERESap1iWABYRERERaWxKdOtgZhea2edmVmJm75jZiHraHxm1KzGzz8zs/KaKNRelcn7MrLeZPWFmH5lZRbRiizSSFM/NyWb2UrRe+kYzm2VmJzZlvLkmxfNzpJnNNLM1ZlYc/Q5d0ZTx5pJU/99JOO5wMys3szmNHWMuS/F3Z6SZeQ23vZoy5lynRLcWZnYa4SK3Wwg1JjOBF8xsl1ra7wY8H7U7gFB0fbeZnVJTe9kxqZ4foBXhCtTbCNPTSSNpwLk5EniFMLfiAYTfo6eT/Q9eUtOA87MJ+ANwBLAP4VqIG6JrLSSNGnBuqo7rAjxKqK2URtLQ8wMMJlyTVHWb35hxyhepRrcWZjYLmO3u5yZsmw886e7X1ND+duBkdx+YsO3PwGB3H94UMeeSVM9PtWOfAwrdfWzjRpmbduTcJLR/E3jN3S9vpDBzVprOz0Rgq7uf0Uhh5qSGnpvofLwHGHCquw9p9GBzUAPygpGEC7B6RNN9SQw0olsDM2tJWP/5pWq7XqL2tZ+H19B+EjDMzArSG2Fua+D5kSaQxnPTgS+vmS47KB3nx8wOiNpOS290ua2h5yYaWe9JGGmXRrKDvztvm9lyM5tiZmlZjUySp0S3Zt0JK4WsrLa9rrWfe9XSvkXUn6RPQ86PNI0dPjdm9mPCdIKPpTc0YQfOj5ktMbOtwNvAH939T40TYs5K+dyY2b7AdcCYxKVXpVE05HdnOXABYd7/k4F5wBSVZTWtOBaMEBGpUVTT/lvgNHdfGHc88gUjgPaEJUNvN7PP3V1/jMQkmvt0AnCFu38edzzyZe4+j5DcVnndzPoDV5LE8rqSHkp0a1YIVBC+DkrUE1hRyzEramlfThLL8ElKGnJ+pGk0+NxEi748Cpzl7s82Tng5r8HnJyGZet/MehJWPVKimz6pnpvehGVaHzazh6NteYCZWTlwvLtX/5pdGi5d/+/MAk5PV1BSP5Uu1MDdS4F3CGs/JxpN7Ws/v15L+7fdvSy9Eea2Bp4faQINPTdm9h1C0jTW3Z9svAhzWxp/d/IIM5lImjTg3CwF9gX2T7j9Cfgkeqx/C9Mojb87+xNKGqSJaES3dr8DHouu/p4BnA/sTPiHBDN7FMDdz4ra/wm4yMzuBO4HDgPGAroquXGken4ws/2jhx2Byuh5qbt/0JSB54CUzo2ZnU5Icq8Aptv2NdNL3X1tE8eeC1I9Pz8BPmf7V7BHEM7VH5s27JyQ9LmJBlC+MGeuma0izIahuXQbR6q/O5cCC4C5QEtgDHASoWZXmogS3Vq4+wQz6wb8kvAV0RzCV0FVdYO7VGv/uZkdD/yeUHy+DLjY3Z9qwrBzRqrnJ/Lfas9PABYC/RsrzlzUgHNzPuHfojujW5VpwMjGjTb3NOD85AO3E35PyoFPgauJ/nOX9Gngv2vSRBpwfloSrjnoCxQTEt5vuPvzTRSyoHl0RURERKSZUo2uiIiIiDRLSnRFREREpFlSoisiIiIizZISXRERERFplpToioiIiEizpERXRERERJolJboiGc7MBprZPWb2oZltMrONZvaRmT1oZl9NaLfAzNzMFsQYblUs46JYPFrbvWp7TzN73MyWm1lFtP9OM+uf0H5cI8bV2cyuj24nJRt3UzGzkQmvX9/t+uiYqudTmzre+jTmeU3lXFX7XNMah4hkNi0YIZLBzOwc4D6+vNzqoOjWg7DSTra4CzgtxtfvDFwXPX4E+EeMsYiISCNToiuSoczsa8CfCd+8OHAzYXnpVcCuwKnAnrEFWAd3H0tYAru6odH9emA3d1+fsM8aOax61RF3U73+VBI+BzMbCzwcPX0kii/tzKy1u5c0Rt8iInFS6YJI5rqV7b+jf3D3X7n7Encvdff57n4rcG5dHZjZ/mY20cw+MbMNZlZmZiuibcOqtd3NzB41s0VmVmJm681sTvQV8U4J7c41s7fNbK2ZbTWzpWb2spmdndDmC18rV311DOwRNekMrIv2j63rK24zO9DM/i96nVIzKzSzV83s4Gh/ezN7xMzeN7M10Xtcb2bTzey0hH6uBz5P6Prs6q9ZR8lFOzO7wczmmlmxmW0xs/+a2WVm1iKh3Rfeh5mdFX2GxRZKT86mEZnZ18zsjej1PjWzn5lZYuJ8fUJ83zKzh8yskLA8aVWbvc3ssYTPe5WZPWlm+1V7raR+Xqod8x0zm13X52FmI8zsGTNbnfDz+tfqr1/HZ7BzFO+m6OfhPqBDLW1Tfg8ikmXcXTfddMuwG7ATYRS36tYniWMWRG0XJGw7vVo/ibfNwN4JbefW0XZI1ObbdbR5MqGvcQnb+wMj6zhubNSm6vm4hH6+BZTVdlzUplcdfTtwVtTu+jrajKsp7mhbO+CdOo59HsiL2ia+j3W1tD88hZ+DsTV9LtXaVO0vrOWzGpPQ9vpq7be1i/YfDmypJe5iYESKPy+Jn8eK+j4PYAxQUUu7EmBkbT9j0bY2wIc1HLusps8xmfegm266ZfdNI7oimal/wuMN7r60gf38BzgG6E2o8+0IXBDtawv8CMDMugH7RNv/QEjuugIHAb8CiqJ9R0T3mwg1wq0IZRTfAV6sLQh3n+ruBiyMNi10d4tu42o6xszaAA+yvcTqWqAn0J2QcH8Wbd9IqPvtH72n1sChhIQN4LIohuuB3RJe4pGEGMbWFjtwKXBg9HgS4bMcQPhsAY4j/EFRXWfgQqATcHvC9jPreK0d0Q34DdAFuCiJ1zPgWMJntm+07UFCsriQUGbSCjgAWE34XO+FlH5eEvWkjs/DzNoBdxO+xSgn/JHTETg/ateKULpTl7OAvaLHbwB9Cd8irK/esIHvQUSyjGp0RZq3FcAPgDsJiWCbavsHRffrCMlAZ0LitpEwMvaeu9+U0P7z6L4d8EvCSOeHwEvunu7E4DBC8gYw1d1/nbDvyYTHWwjJ7wRgb8LX1In1voPYMd9IeHyNu68AMLMb2X4x2/HAE9WOe8fd74vajgeuirbvuoPx1GYlcK27V5jZI8A99bze/7r7pOjxHDMbyPYkcVfCua1uXzPrRagTT+bnJVF9n8dhUX8Az7t71Wd7v5mdD+wP7Glme7j7J7W8xtcSHt9a9Qeimf0vod49UbI/8yKSxTSiK5KZFiQ87mhmOzewn78BPyMkgNWTXKq2uXslYWRtCTAQ+AUwHng/qn3tF7X/I/B3oKr9nYRRzpVmdnUDY6xNz4THH9TR7irCSOMhhBHA6he1td7BOHokPF6U8HhhwuOa6jnnJTzenMZ4avOpu1ek8Hr/rfY82ZrUbin8vCSq7/Oo7XOG+j/rbbElPF5Sy2MgpZ95EcliSnRFMpC7rwLeTNh0ZU3tEi+EqmFfF0LZAoTRvsFAPlDjRT3u/hywC2EE9ETgRkK95BDC6C3uXuLu3yF8xXs48H1gFuFr5VvMrE9y7zApKxMe711Hu8SygZOAVlGZxJoa2noD4liV8HiXWh4ntqlStoOvm6ptr+fuybxecbXnie9hckJZx7YboRZ5bvQa9f681BYfNX8etX3O1Z/X9FlXKUx43LeWx9uDSP09iEiWUaIrkrl+QRg5Bbg4umJ+ZzMrsLCIxM8JNZW1KWd7QlEObCB8xf/rmhqb2d3A0YT62xeBp4Ct0e5dojanmNlFQB/gPcLo7ntVXVBLQtFAM9ierB5lZj83sx5m1sXMTjKzqnrh8oRj1gMFZvYrvji6VyUx+R0Y1YXW518Jj2+2sOhFf0LNcE1tspK7zwc+jp4ebWaXWlhgo7WZfcXMrgX+WtU+mZ+XFM0klBMAHGdmJ1qYUeNcQp0wwLw6yhYAXk14fLWZ9TGz3YHLa2rcCO9BRDKMEl2RDOXukwkXi5URflevA5YCpYSE5GbChUe1Hb8RmBI97QMsJoyS7lPLIRcALye8xnuEC5UglCdAGFm9m1BKsDG6nRftWw7MTuEt1sndiwnTp1UlsjcTRvPWAk8TLggjelxlKiFpuZgaLkBy902EK+0hXLC2KZpqa2wdodzFFy88W0GoVa6aE/gFQn1wc3AeYY/EsycAAAFtSURBVHYDA35PSDyLgXeBG/hiOUkyPy9Jc/fNwE8If9wVAP8k/Hw9EDXZyvYL02rzKPBR9Hg4oSzhE75YFpEore9BRDKPEl2RDObufyaUGvyRkNwWE+ob5wEPAbfV08UYQhK2jnAV+XhqX5nsNuDfhGSynHCR138ISeNdUZsphIuuPiEklBWEBPevwJFRcpo27v40ofb2r4QposoJie40ttft3g7cQkhWiqN9X6P2q+bPBKYTRriTiWEzYbaJGwkXK20lJIPvEkYKT4zqPbOeu08jJPCPEpLEMsLnPZvwB87PE5on8/OS6us/TpiK7jnC6Hs54Y+zvwEHe1hQo67ji4FRwETC78l6woIbtc03nfb3ICKZxZIr5RIRERERyS4a0RURERGRZkmJroiIiIg0S0p0RURERKRZUqIrIiIiIs2SEl0RERERaZaU6IqIiIhIs6REV0RERESaJSW6IiIiItIsKdEVERERkWbp/wEAbgj7omUClQAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " val_metrics['avg_odds_diff'], 'avg. odds diff.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "m_x9fO3WHHWJ", + "outputId": "fb841c6c-8f0c-429f-9d76-a6b5e950993f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1200\nBest balanced accuracy: 0.6836\nCorresponding 1-min(DI, 1/DI) value: 0.2268\nCorresponding average odds difference value: 0.0254\nCorresponding statistical parity difference value: -0.0830\nCorresponding equal opportunity difference value: 0.1172\nCorresponding Theil index value: 0.1119\n" + } + ], + "source": [ + "describe_metrics(val_metrics, thresh_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0EjCQuShHHWK" + }, + "source": [ + "#### 5.1.3. Testing PR model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j3S5tpgYHHWK" + }, + "outputs": [], + "source": [ + "dataset = dataset_orig_panel19_test.copy()\n", + "dataset.features = pr_orig_scaler.transform(dataset.features)\n", + "\n", + "pr_orig_metrics = test(dataset=dataset,\n", + " model=pr_orig_panel19,\n", + " thresh_arr=[thresh_arr[pr_orig_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "90e5rVizHHWK", + "outputId": "541007a6-5736-4a83-e9da-5014a3618f73" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1200\nBest balanced accuracy: 0.6880\nCorresponding 1-min(DI, 1/DI) value: 0.1588\nCorresponding average odds difference value: 0.0523\nCorresponding statistical parity difference value: -0.0566\nCorresponding equal opportunity difference value: 0.1479\nCorresponding Theil index value: 0.1108\n" + } + ], + "source": [ + "describe_metrics(pr_orig_metrics, [thresh_arr[pr_orig_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TaTt2mnPHHWK" + }, + "source": [ + "As in the case of reweighing, prejudice remover results in a fair model. However, it has come at the expense of relatively lower balanced accuracy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yt1djlzTHHWK" + }, + "source": [ + "## [6.](#Table-of-Contents) Summary of Model Learning Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Basw72baHHWK", + "outputId": "71f7dc12-8075-4c91-be11-2239f777ed61" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " bal_acc avg_odds_diff disp_imp \\\nBias Mitigator Classifier \n Logistic Regression 0.775935 -0.205706 0.426176 \n Random Forest 0.763772 -0.138763 0.485869 \nReweighing Logistic Regression 0.753893 -0.015104 0.751755 \nReweighing Random Forest 0.758565 -0.084303 0.569260 \nPrejudice Remover 0.688028 0.052286 0.841229 \n\n stat_par_diff eq_opp_diff theil_ind \nBias Mitigator Classifier \n Logistic Regression -0.261207 -0.222779 0.092122 \n Random Forest -0.218998 -0.113503 0.093575 \nReweighing Logistic Regression -0.087196 -0.003518 0.096575 \nReweighing Random Forest -0.163191 -0.061108 0.096345 \nPrejudice Remover -0.056631 0.147869 0.110774 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
bal_accavg_odds_diffdisp_impstat_par_diffeq_opp_difftheil_ind
Bias MitigatorClassifier
Logistic Regression0.775935-0.2057060.426176-0.261207-0.2227790.092122
Random Forest0.763772-0.1387630.485869-0.218998-0.1135030.093575
ReweighingLogistic Regression0.753893-0.0151040.751755-0.087196-0.0035180.096575
ReweighingRandom Forest0.758565-0.0843030.569260-0.163191-0.0611080.096345
Prejudice Remover0.6880280.0522860.841229-0.0566310.1478690.110774
\n
" + }, + "metadata": {}, + "execution_count": 46 + } + ], + "source": [ + "import pandas as pd\n", + "pd.set_option('display.multi_sparse', False)\n", + "results = [lr_orig_metrics, rf_orig_metrics, lr_transf_metrics,\n", + " rf_transf_metrics, pr_orig_metrics]\n", + "debias = pd.Series(['']*2 + ['Reweighing']*2\n", + " + ['Prejudice Remover'],\n", + " name='Bias Mitigator')\n", + "clf = pd.Series(['Logistic Regression', 'Random Forest']*2 + [''],\n", + " name='Classifier')\n", + "pd.concat([pd.DataFrame(metrics) for metrics in results], axis=0).set_index([debias, clf])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CJDhooGKHHWK" + }, + "source": [ + "Of all the models, the logistic regression model gives the best balance in terms of balanced accuracy and fairness. While the model learnt by prejudice remover is slightly fairer, it has much lower accuracy. All other models are quite unfair compared to the logistic model. Hence, we take the logistic regression model learnt from data transformed by re-weighing and 'deploy' it." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aEDxFnodHHWK" + }, + "source": [ + "## [7.](#Table-of-Contents) Deploying model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oyqIkRvdHHWK" + }, + "source": [ + "### 7.1. Testing model learned on 2014 (Panel 19) on 2015 (Panel 20) deployment data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M3z8wVn5HHWL" + }, + "outputs": [], + "source": [ + "dataset_orig_panel20_deploy = MEPSDataset20()\n", + "\n", + "# now align it with the 2014 dataset\n", + "dataset_orig_panel20_deploy = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_deploy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "KdOl-sIsHHWL", + "outputId": "81790d78-d6ee-4719-a350-1745338d47dc" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Test Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "(17570, 138)\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Favorable and unfavorable labels" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "1.0 0.0\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Protected attribute names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['RACE']\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Privileged and unprivileged protected attribute values" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "[array([1.])] [array([0.])]\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Dataset feature names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['AGE', 'RACE', 'PCS42', 'MCS42', 'K6SUM42', 'REGION=1', 'REGION=2', 'REGION=3', 'REGION=4', 'SEX=1', 'SEX=2', 'MARRY=1', 'MARRY=2', 'MARRY=3', 'MARRY=4', 'MARRY=5', 'MARRY=6', 'MARRY=7', 'MARRY=8', 'MARRY=9', 'MARRY=10', 'FTSTU=-1', 'FTSTU=1', 'FTSTU=2', 'FTSTU=3', 'ACTDTY=1', 'ACTDTY=2', 'ACTDTY=3', 'ACTDTY=4', 'HONRDC=1', 'HONRDC=2', 'HONRDC=3', 'HONRDC=4', 'RTHLTH=-1', 'RTHLTH=1', 'RTHLTH=2', 'RTHLTH=3', 'RTHLTH=4', 'RTHLTH=5', 'MNHLTH=-1', 'MNHLTH=1', 'MNHLTH=2', 'MNHLTH=3', 'MNHLTH=4', 'MNHLTH=5', 'HIBPDX=-1', 'HIBPDX=1', 'HIBPDX=2', 'CHDDX=-1', 'CHDDX=1', 'CHDDX=2', 'ANGIDX=-1', 'ANGIDX=1', 'ANGIDX=2', 'MIDX=-1', 'MIDX=1', 'MIDX=2', 'OHRTDX=-1', 'OHRTDX=1', 'OHRTDX=2', 'STRKDX=-1', 'STRKDX=1', 'STRKDX=2', 'EMPHDX=-1', 'EMPHDX=1', 'EMPHDX=2', 'CHBRON=-1', 'CHBRON=1', 'CHBRON=2', 'CHOLDX=-1', 'CHOLDX=1', 'CHOLDX=2', 'CANCERDX=-1', 'CANCERDX=1', 'CANCERDX=2', 'DIABDX=-1', 'DIABDX=1', 'DIABDX=2', 'JTPAIN=-1', 'JTPAIN=1', 'JTPAIN=2', 'ARTHDX=-1', 'ARTHDX=1', 'ARTHDX=2', 'ARTHTYPE=-1', 'ARTHTYPE=1', 'ARTHTYPE=2', 'ARTHTYPE=3', 'ASTHDX=1', 'ASTHDX=2', 'ADHDADDX=-1', 'ADHDADDX=1', 'ADHDADDX=2', 'PREGNT=-1', 'PREGNT=1', 'PREGNT=2', 'WLKLIM=-1', 'WLKLIM=1', 'WLKLIM=2', 'ACTLIM=-1', 'ACTLIM=1', 'ACTLIM=2', 'SOCLIM=-1', 'SOCLIM=1', 'SOCLIM=2', 'COGLIM=-1', 'COGLIM=1', 'COGLIM=2', 'DFHEAR42=-1', 'DFHEAR42=1', 'DFHEAR42=2', 'DFSEE42=-1', 'DFSEE42=1', 'DFSEE42=2', 'ADSMOK42=-1', 'ADSMOK42=1', 'ADSMOK42=2', 'PHQ242=-1', 'PHQ242=0', 'PHQ242=1', 'PHQ242=2', 'PHQ242=3', 'PHQ242=4', 'PHQ242=5', 'PHQ242=6', 'EMPST=-1', 'EMPST=1', 'EMPST=2', 'EMPST=3', 'EMPST=4', 'POVCAT=1', 'POVCAT=2', 'POVCAT=3', 'POVCAT=4', 'POVCAT=5', 'INSCOV=1', 'INSCOV=2', 'INSCOV=3']\n" + } + ], + "source": [ + "# describe(dataset_orig_panel20_train, dataset_orig_panel20_val, dataset_orig_panel20_deploy)\n", + "describe(test=dataset_orig_panel20_deploy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "E4qSM5rSHHWL", + "outputId": "17959509-a2ff-44d4-eba8-1452bf446554" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.5456992351196291\n" + } + ], + "source": [ + "metric_orig_panel20_deploy = BinaryLabelDatasetMetric(\n", + " dataset_orig_panel20_deploy,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "explainer_orig_panel20_deploy = MetricTextExplainer(metric_orig_panel20_deploy)\n", + "\n", + "print(explainer_orig_panel20_deploy.disparate_impact())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z2-YoHhfHHWL" + }, + "outputs": [], + "source": [ + "lr_transf_metrics_panel20_deploy = test(\n", + " dataset=dataset_orig_panel20_deploy,\n", + " model=lr_transf_panel19,\n", + " thresh_arr=[thresh_arr[lr_transf_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "j2Qj7xcLHHWL", + "outputId": "116a14ee-e388-434e-b23d-730e1435f176" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7311\nCorresponding 1-min(DI, 1/DI) value: 0.1943\nCorresponding average odds difference value: 0.0071\nCorresponding statistical parity difference value: -0.0596\nCorresponding equal opportunity difference value: 0.0303\nCorresponding Theil index value: 0.1019\n" + } + ], + "source": [ + "describe_metrics(lr_transf_metrics_panel20_deploy, [thresh_arr[lr_transf_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VlVmF7LIHHWL" + }, + "source": [ + "Deployed model tested on the 2015 Panel 20 data still exhibits fairness as well as maintains accuracy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rUHqnJ94HHWL" + }, + "source": [ + "## [8.](#Table-of-Contents) Generating explanations for model predictions using LIME" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_GPgLfoHHWL" + }, + "source": [ + "### 8.1. Generating explanations on 2015 Panel 20 deployment data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bnp7xZwGHHWL" + }, + "source": [ + "This section shows how LIME can be integrated with AIF360 to get explanations for model predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QZBohMsZHHWM" + }, + "outputs": [], + "source": [ + "train_dataset = dataset_transf_panel19_train # data the deployed model (lr from transformed data)\n", + "test_dataset = dataset_orig_panel20_deploy # the data model is being tested on\n", + "model = lr_transf_panel19 # lr_transf_panel19 is LR model learned from Panel 19 with Reweighing\n", + "thresh_arr = np.linspace(0.01, 0.5, 50)\n", + "best_thresh = thresh_arr[lr_transf_best_ind]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G9rEHpH7HHWM" + }, + "source": [ + "First, we need to fit the encoder to the aif360 dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JcB0MdZ-HHWM" + }, + "outputs": [], + "source": [ + "lime_data = LimeEncoder().fit(train_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h0G8A9GTHHWM" + }, + "source": [ + "The `transform()` method is then used to convert aif360 features to LIME-compatible features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-uqsPpZiHHWM" + }, + "outputs": [], + "source": [ + "s_train = lime_data.transform(train_dataset.features)\n", + "s_test = lime_data.transform(test_dataset.features)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ou-JY9c_HHWM" + }, + "source": [ + "The `LimeTabularExplainer` takes as input the LIME-compatible data along with various other arguments to create a lime explainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R8CcGvwKHHWM" + }, + "outputs": [], + "source": [ + "explainer = LimeTabularExplainer(\n", + " s_train, class_names=lime_data.s_class_names,\n", + " feature_names=lime_data.s_feature_names,\n", + " categorical_features=lime_data.s_categorical_features,\n", + " categorical_names=lime_data.s_categorical_names,\n", + " kernel_width=3, verbose=False, discretize_continuous=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9jBM2wiiHHWM" + }, + "source": [ + "The `inverse_transform()` function is used to transform LIME-compatible data back to aif360-compatible data since that is needed by the model to make predictions. The function below is used to produce the predictions for any perturbed data that is produce by LIME" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2TmjbxoQHHWM" + }, + "outputs": [], + "source": [ + "def s_predict_fn(x):\n", + " return model.predict_proba(lime_data.inverse_transform(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SaTs8D_kHHWN" + }, + "source": [ + "The `explain_instance()` method can then be used to produce explanations for any instance in the test dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dr-0XvwEHHWN" + }, + "outputs": [], + "source": [ + "def show_explanation(ind):\n", + " exp = explainer.explain_instance(s_test[ind], s_predict_fn, num_features=10)\n", + " print(\"Actual label: \" + str(test_dataset.labels[ind]))\n", + " exp.as_pyplot_figure()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "mMRScWYZHHWN", + "outputId": "0340e4fc-6cdf-4f8d-c53e-a82c5a110629" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nActual label: [0.]\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbEAAAEICAYAAADRFcoMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZxcVZ3+8c8jO7ILIlsIBIQhgEFadEZARBR0ZNQRRkAFHDU6qMAIiAhqREQBR9SJivmJIogGWQcRWWQRRAQ6YQkJa1iUTQKCgCDr8/vjnoZLUdVd3elO9w3P+/WqV9c92z33dnV9+5x7q45sExER0USvGO0OREREDFWCWERENFaCWERENFaCWERENFaCWERENFaCWERENFaCWIxpkixpvQW8z+MkHbYg91nb92xJ24xAu6tKukTSo5L+Z7jbb9nXHZK2G8l9LIwkbSXppi7KfUHSjxZEn5ogQSwGJW9Qw6ddsLQ90fbFI7C7ycADwHK29xuB9hdKkjaWdK6kByS95EO1klaSdLqkv0u6U9JuHdrZpfztqCV9UUn3S3q37UttbzBQn2wfbvtjpf748o/eokM9xqZLEIt4eVgbmOMhfLvBWH6DlLSspKVGcBdPA78EPtoh/3vAU8CqwAeBH0ia2KbcGcAKwFta0ncADJwzLL19GUoQi2Ej6eOSbpX0V0lnSlq9ljdR0vkl7y+SvlDSt5B0uaSHJd0raaqkxbvc3/KSji317pZ0mKRFSt4PJJ1aK3uEpAtU2UbSXWVa5oHyH/IHO+xjRUlnSZon6aHyfM1a/sWSvirpsjJVd56klWv5J0u6T9LfynTexJI+mepN73OSHpP0q5L+/EhX0hKSvi3pnvL4tqQlSl7fMexX/pO/V9JHOhzDccAetX1t12XbB0q6D/hJh3Y/LumGctxzJL2+TZmOv9/yuzi69P8RSbMkbVzy3lXafLT8bvdv/ypgY+AeST+U9KYOZYbM9k22jwVmtzm2VwLvB75o+zHbvwfOBD7cpp1/UAXD3Vuydgd+bvuZvvNea//AcuyPSrpJ0ttK+hRJPyvFLik/Hy6/23+WtJ6k35XX3AOSTprP0zC22c4jj64fwB3Adm3St6Warno9sATwv8AlJW9Z4F5gP2DJsv3Gkrc58CZgUWA8cAOwb61dA+t16MvpwA+BVwKvBq4EPlHylgZuBvYEtip9W7PkbQM8A3yr9PUtwN+BDUr+ccBh5fmrqN6oli79Phk4o9aHi4G5wGuBpcr2N2r5/1nqLQF8G7imlvf8ftqdX+BQ4I/l2FYB/gB8teUYDgUWA94FPA6s2OFcvWhfXbZ9ROn3Um3a2xm4G3gDIGA9YO02x9Dx9wtsD8ygGqEI+CdgtZJ3L7BVeb4i8Pp+XpPrAFOA20r7n+trp1ZmS+Dhfh5bDvC6Xw9wS9pmwOMtafsDv+rQxpuBR/rOJ7A88AQwqXbe7yrPNwD+DKxetscDE8rzKcDPaukGFq3t5xfAwVSDlCUHOramP0a9A3k060HnIHYscGRtexmqqZjxwK7A1V22vy9wem27bRCjmr55sv4GW/ZzUW37jcBfgTuBXWvp21C9Sb+ylvZLqv+ooU1wqZWbBDxU274YOKS2vRdwToe6K5TjWb7TfnhxAJgLvKuWtz1wR+0Ynmh587ofeFOHfb9oX120/RSwZD+/p3OBfQbzGmn9/VL943MzVZB7RUu5PwGfoLqG1+1rU1T/kPwYeAg4Cxg3TK/7dkFsK+C+lrSPAxf3084twG61ste2vC7vqu3vfmA7YLGWNqbQfxA7HphG+adtYX9kOjGGy+pUwQIA248BDwJrAGtRvWm+hKTXlim6+yQ9AhwOrNyubIu1qUYg95apqoepRmWvrvXhCqr/zkUVpOoesv332vad5Rha+7d0maq6s/TvEmCFvmnL4r7a88epAjiSFpH0DUlzS907Splujg9azmmbPj5o+5l2+x6Gtue5mgLrpOPvtK6/36/tC4GpVNeV7pc0TdJyper7qUaXd5apsX8eaF+u3sHnANcCdwETqUbpI+UxYLmWtOWAR/upczwvTCl+uGy/hO1bqQL+FKpzM1216fkBfI7qNX+lqrtd/7PLeo2UIBbD5R6qwAI8f73gVVRTTn8G1u1Q7wfAjcD6tpcDvkD1BziQP1ONxFa2vUJ5LGf7+Yvqkj5FNR12D9Ufdt2KpY99xpVyrfajmtp5Y+nf1n3Nd9HH3YD3UP03vTzVf831ugPdZPGic9pPH4dioLYH6tufgQld7Kff36/t79reHNiIakr2gJJ+le33UP1TcgYv/SfkeeX63k7luuItVFOYewPr2r6hlNmqXDPq9Niqi2NpdTOwqKT1a2mvo831s5oTgLeVoPwm4MROBW3/3PaWVL8nU03vvqRYm3r32f647dWpRrPf1wL+mMqClCAWQ7GYpCVrj0Wp5uE/ImlSuUHgcOAK23dQTeusJmnf8oazrKQ3lraWpbpO8JikDYH/6qYDtu8FzgP+R9Jykl4haYKkt0A1AgAOAz5E9R/v5yRNamnmK5IWL29g76a63tVqWappu4clrQR8ubtT9HzdJ6lGpEtTnZO6v9A5uEN1Tg+RtIqqm0W+BPysn/KDMb9t/wjYX9Lm5QaN9SSt3aZcx9+vpDdIeqOkxaiuSf4DeK78Tj4oaXnbT5f6z7XrhKRNqa6f7UMV7Nayvbvti8rIDABXt68v08/j0g7tS9KSQN/NKEuW1zdlJH8acKikV0p6M9U/LSd0Omnl7+H3VOf/fNv3tSsnaQNJ25Z9/YPqNdjuHMwr6evW6u6sF24+eogq0LU9fwuDBLEYirOp/qj6HlNs/xb4InAq1ZvKBGAXANuPAm8HdqSaersFeGtpa3+qEcujwP8DBnMn1e5Uby5zqP5YT6EKlotSvSEfYfta27dQjQBO6HsDKv14iGr0cSLwSds3ttnHt6lu2HiA6kaIwdwKfTzVNN3dpY9/bMk/FtioTIee0ab+YUAvcB0wC5hZ0obDfLVt+2Tga8DPqX53ZwArtSna3+93uZL2ENV5ehA4quR9GLijTEF+kupOznbuB7awvZXtY8trbTitTfUa7xtdPQHUP5C8F9Xr436qwPRftvsbiQH8tLTbdiqxWAL4BtXr7j6qEelBrYVsP071e7isvI7eRHWzzRWSHqO6W3If27cN0KfGUu2flYiXBVXfiPEz22sOVDYixraMxCIiorESxCIiorEynRgREY2VkVhERDTWmP1iz4XVyiuv7PHjx492NyIiGmXGjBkP2F6lNT1BbAEbP348vb29o92NiIhGkXRnu/RMJ0ZERGMliEVERGMliEVERGMliEVERGMliEVERGMliEVERGMliEVERGMliEVERGPlw85Nom4WE46IYZPvlh3zMhKLiIjGShCLiIjGShCLiIjGShCLiIjGWuBBTNJrJE2XNFfSDElnS3qtpOtbyk2RtH95fpyk2yVdI+lGSV+ulbtY0k2SrpV0laRJtbzlJR0v6dayv+MlLV/yxkuypM/Uyk+VtGeXx/H20v9Z5ee283lqIiJikBZoEJMk4HTgYtsTbG8OHASs2kX1A2xPAiYBe0hap5b3QduvA74PHFVLPxa4zfZ6ticAtwM/quXfD+wjafEhHM4DwI62NwH2AE4YQhsRETEfFvRI7K3A07aP6UuwfS3w50G0sWT5+fc2eZcDawBIWg/YHPhqLf9QoEfShLI9D7iAKggNiu2rbd9TNmcDS0laYrDtRETE0C3oILYxMKND3oQyXXiNpGuAT7bkH1XS7wKm276/TRs7AGeU5xsB19h+ti+zPL8GmFircwSwv6RF6g1JOqDen9rju232+35gpu0n2x2YpMmSeiX1zps3r8PhR0TEYI2lDzvPLdOFQHVNrCX/ANunSFoGuEDSv9j+Q8k7sUwJLkM13dg127dJugLYrSX9KF48NdmWpIlUgfAd/exjGjANoKenJ5+ejIgYJgt6JDabaopvyGw/BlwMbFlL/iCwLvBT4H9L2hxgkqTnj7E8n1Ty6g4HDgRUKzvgSEzSmlTX+Ha3PXd+jisiIgZvQQexC4ElJE3uS5C0KbBWtw1IWhR4I/CioGHbwBeBN0na0PatwNXAIbVih1BN+93aUvdGqsC2Yy3tKNuT2jz2Lv1YAfg18Hnbl3Xb/4iIGD4LNIiVQPM+YLtyy/ts4OvAfV1U77smdh0wCzitTftPAP8DHFCSPgq8tuxrLvDaktbO14A1B3E4nwbWA75UG6W9ehD1IyJiPsn5gssFqqenx729vUOrnC8Ajliw8v44ZkiaYbunNT3f2BEREY2VIBYREY01lm6xj4FkaiMi4kUyEouIiMZKEIuIiMZKEIuIiMbKNbEYutzyHwu7XIce8zISi4iIxkoQi4iIxkoQi4iIxkoQi4iIxhr1ICbpNZKmly/pnSHpbEmvlTRR0oWSbpJ0i6QvSqovlbKDpCsl3Vi+fPckSeNK3nGSdmrZz3hJ15fn20iypI/V8ieVtP277PfbS39nlZ/bDs8ZiYiIbo1qECtB6XTgYtsTbG8OHASsCpwJfMP2BsDrgH8B9ir1NqZaN2wP2xuWxTRPBMYPYvfXA/9R294VuHYQ9R8AdrS9CbAHcMIg6kZExDAY7ZHYW4GnbR/Tl2D7WqolUy6zfV5Je5xq6ZPPl2IHAofbvqFW70zblwxi33cCS0patQTTHYDfdFvZ9tW27ymbs4GlJC0xiP1HRMR8Gu3PiW0MzGiTPrE13fZcSctIWq7kf3MY9n8KsDPV4pkzgSf7MiQdQLVidKtL+hbGrHk/1WKbT7YpT1kEdDLAuHHjhqHbEREBox/E5pukVwEXAEsD02wPJrj9EjgJ2BD4BdWUJVCt7Awc1cX+JwJHAO/oVMb2NGAaVOuJDaJ/ERHRj9GeTpwNbN4mfU5ruqR1gcdsP1LqvR7A9oPlmtg0YJnB7Nz2fcDTwNupAmF9fwfUVmyuP75bK7Mm1TW93W3PHcy+IyJi/o12ELsQWKJMtwEgaVPgJmBLSduVtKWA7wJHlmJHAgdL+qdaW0sPsQ9fAg60/Ww90fZRtie1eexd+rQC8Gvg87YvG+K+IyJiPoxqELNt4H3AduUW+9nA14H7gPcAh0i6CZgFXAVMLfVmAfsAx5db8C8D/gn4ea35H0q6qzwu76cPf7B9xhC6/2lgPeBLtVHaq4fQTkREDJGcL7hcoHp6etzb2zva3Rge+QLgWNjl/XHMkDTDdk9r+mhPJ0ZERAxZglhERDRWglhERDRW4z8nFqMo1wsiYpRlJBYREY2VIBYREY2VIBYREY2Va2IRER3oK/ks5HDxl0fmGnpGYhER0VgJYhER0VgJYhER0VjzHcQkvVeSJW1YtsdLekLS1ZJukHSlpD1r5VeVdJakayXNkXR2rZ4lHVYru7KkpyVNraVNlnRjeVwpacta3sWSesrzdSTdImn7Wv44SY9J2r9sryXpotKP2ZL2GcRxv13SDEmzys9th3QCIyJiyIbjxo5dgd+Xn18uaXNtbwbPrwN2miTZ/glwKHC+7e+U/E1rbd0O/CtwSNnemWrtMErZdwOfALa0/YCk1wNnSNqirA3WV25N4BxgP9vn1tr/FvCb2vYzpcxMScsCMySdb3tOF8f9ALCj7XskbQycC6zRRb2IiBgm8zUSk7QMsCXwUWCXdmVs3wZ8Fti7JK0G3FXLv65W/HHghr7RFPABqtWX+xwIHGD7gVJ3JvBT4FO1MqsB5wEH2z6z1tf3UgXJ54Oi7XtLG9h+FLiBLgOR7att31M2ZwNLSVqim7oRETE85nc68T3AObZvBh6U1G6VZoCZwIbl+feAY8s03sGSVm8pOx3YRdJawLPAPbW8icCMlvK9Jb3PT4Gptk/pSyjB9kDgK50ORNJ4YDPgirI94MrONe8HZtp+slP7EREx/OZ3OnFX4Dvl+fSyPbVNuec/bGH73DLFuAPwTuDqMh3X5xzgq8BfgJOG0KffAh+SdJztx0vaFOBo24+pzRpYJcidCuxr+5HSz6OAowbamaSJwBHAO/opMxmYDDBu3LhBHUxERHQ25CAmaSVgW2ATSQYWAUw10mq1GdVUHQC2/0q1CvPPJZ0FbE0ZYdl+StIMYD9gI+Dfau3MATYHLqylbU5tihA4EvgwcLKk99h+BngjsJOkI4EVgOck/cP2VEmLUQWwE22fVju+A4APtjmWS2zvXcqsCZwO7G57bqdzZXsaMA2qRTE7lYuIiMGZn5HYTsAJtj/RlyDpd8Ba9UJlmu6bwP+W7W2BP9p+vNxMMQH4U0vb/wP8zvZfW0ZORwJHSNrB9oOSJgF7UgWpun2pguSxkva0vVWtP1OAx0oAE3AscIPtb9UbGGgkJmkF4NfA521f1qlcRESMnPkJYrtSTaPVnQocBEyQdDWwJPAo8F3bx5UymwNTJT1DdU3uR7avKsEOANuzefHoqi/9TElrAH8oo79HgQ/ZvrelnCXtAZxFFfgO6HAMb6Yatc2SdE1J+4Lts7s4/k8D6wFfkvSlkvYO2/d3UTciIoaBnDWhFqienh739vaOdjciogv57sThM7/fnShphu2e1vR8Y0dERDRWglhERDRWglhERDRW1hOLiOhgpNbAiuGTkVhERDRWglhERDRWglhERDRWrolFvNy1+T7RKPI52jEvI7GIiGisBLGIiGisBLGIiGisBLGIiGisBRrEJD1bVke+XtLJkpZuk/6rsswJksZLeqJlZeXdS94ykn4gaa6kmZJmSPp4rZ4lfaa276mS9pT0vdLOnJa2dxrC8ewsabak5yS95IspIyJiZC3okdgTtifZ3hh4Cvhkm/S/Ap+q1Zlb8voex5f0HwEPAevbfj3VStEr1erdD+wjafF6B2x/yvYk4F0tbZ8yhOO5Hvh34JIh1I2IiPk0mtOJl1Ktx9XqcmCN/ipKmgBsARxi+zkA2/Ns19c3mwdcAOwxPN19Kds32L5ppNqPiIj+jcrnxCQtCrwTOKclfRHgbVSrLfeZUFuwEuAzwIrAtX0BrB9HAL+R9OMu+3UA8ME2WZfY3rubNjq0OxmYDDBu3LihNhMRES0WdBBbqhaQLuWFYNWXvgZwA3B+rc7cMv33PEn/1rJ9MLAz8Grbq/el275N0hXAbt10zvZRwFGDOJ6u2J4GTINqUczhbj8i4uVqtK6JTbL9GdtP1dOBtQHx4mti7cwBXifpFQC2v1bqL9em7OHAgaXdfkk6oOUmkr7Hd0v+T8r22d0dbkREjKQx9bVTth+XtDdwhqTv91PuVkm9wGGSvmj7WUlL0iZQ2b5R0hxgR+CqAfbf70jM9ke6PZaIiBh5Y+5zYravBq4Ddi1JE1pGRX3Xpj4GvAroC2jnA5/r0OzXgDWHu6+S3ifpLuCfgV9LOne49xEREZ3J+YLLBaqnp8e9vb2j3Y2IF+QLgDvL++OYIWmG7Zd8HnfMjcQiIiK6lSAWERGNNaZu7IiIUZAps2iwjMQiIqKxEsQiIqKxEsQiIqKxck0sIqIDfWXh+fiBv7xwXvvMSCwiIhorQSwiIhorQSwiIhorQSwiIhprgQcxSc+2fKHv50v6xZL+JL3wRW6SzpD0WHk+XtITpc4cScdIekVJv75lH1Mk7V+eHyfpdknXSrpZ0vGS1ix5y0qaK2n9sr2YpFmS3tjlsRwl6UZJ10k6XdIKw3OWIiKiG6MxEquvKTbJ9jdqeQ8DbwYoAWG1lrp9C2RuCmwEvLfLfR5g+3XABsDVwIWSFrf9KHAQMLWU2x/4g+0rumz3fGBj25sCN5e2IiJiARlr04nTgV3K838HTmtXyPYzwB+A9QbTuCtHA/cB7yxpvwSQ9DngkwwiENk+r/QF4I+MwHIvERHR2WgEsaVaphM/UMu7ANha0iJUweykdg1IWhp4GzCrJL1ozTGqYNSfmcCGte19gCOAw2z/tbafSzus9Lxdmzb/E/hNh/5OltQrqXfevHkDdC0iIro1Gh92fqJMCbbzLPB7qgC2lO079OK1jiaUIGXg/2z/RtJ4XphmBKprYgP0ofUTjDsA9wIb1xNtbzVAO337Oxh4BjixXb7tacA0qNYT66bNiIgY2Fj8xo7pwOnAlDZ5c/sJgIOxGdWoD0mrA3sDWwAXSTrW9nUl71Jg2Tb197f921JmT+DdwNucFUYjIhaosRjELgW+DvxiuBsudz5+huqGkXNK8tHA4bbvkvRZ4HuSti7Xz/odiUnaAfgc8Bbbjw93fyMion9j4ZpY/e7Evpsvvmn7gWHc51GSrqW6g/ANwFttPyXp7cA44Niy718BDwG7d9nuVKqR2vnlWI4Zxj5HRMQAlBmwBaunp8e9vb2j3Y2I6EK+AHjskDTDdk9r+li7xT4iIqJrCWIREdFYY/HGjoiIMaHpU3AvBxmJRUREYyWIRUREYyWIRUREY+WaWEREBwvLLfYL87W9jMQiIqKxEsQiIqKxEsQiIqKxEsQiIqKxGhnEJFnSz2rbi0qaJ+mssr2npKnl+RRJd5cv6L1F0mmSNip5i0iaIWnrWlvnSdq5y358VtIcSddJukDS2sN7pBER0Z9GBjHg78DGkpYq228H7u6n/NG2J9len2q16AslrWL7WWAvYKqkxSTtCjxn++Qu+3E10GN7U+AU4MghHU1ERAxJU4MYwNnAv5bnu9Ll+mO2TwLOA3Yr21cAl1Mtwnk48OluO2D7oto6Yn8E1uy2bkREzL8mB7HpwC6SlgQ2Ba4YRN2ZwIa17YOAfYGf2761L1HSSS1rn/U92q039lHgN+12JmmypF5JvfPmzRtENyMioj+N/bCz7eskjacahZ09yOqtn2DcGvgbsHHLPj7QVWPSh4Ae4C0d+joNmAbVemKD7GtERHTQ5JEYwJnAN+lyKrFmM+AGAEmvpLqWtS3waknv6ivUzUhM0nbAwcC/2X5yPo8nIiIGobEjseLHwMO2Z0nappsKkt4PvAPYryR9Cfil7Rsl7QVMl3Sh7X8MNBKTtBnwQ2AH2/cP+SgiImJIGj0Ss32X7e92UfS/+26xBz4EbGt7nqSJwPuAr5X2rgbOBQ7ssgtHAcsAJ5f2zxz8UURExFDJziWaBamnp8e9vb2j3Y2I6EK+AHjskDTDdk9reqNHYhER8fKWIBYREY3V9Bs7IiJGzMIwDbewy0gsIiIaK0EsIiIaK0EsIiIaK9fEIiI6WBhusV/Yr+tlJBYREY2VIBYREY2VIBYREY2VIBYREY016kFM0nslWdKGZXt82f5MrcxUSXvWtj8r6UZJsyRdK+lbkhYreXdIWrk8f7Z8Me/sUm4/Sa8oef8u6YJam1uWsgPe7CJpLUkXSZpT2t5n2E5IRER0bdSDGNWilr8vP/vcD+wjafHWwpI+SbWUyptsbwK8oZRfqk3bT9ieZHsi8HbgncCXAWyfBjwpabcSAL8P7GX7mS76/Aywn+2NgDcBn5K0UXeHGxERw2VUg5ikZYAtgY8Cu9Sy5gEXAHu0qXYw8F+2Hwaw/ZTtb9h+pL99lfW+JgOfltR33+yngcOAKcBVtv/QTb9t32t7Znn+KNUCm2t0UzciIobPaH9O7D3AObZvlvSgpM2BB0veEcBvJP24r7Ck5YBlbN8+lJ3Zvk3SIsCrgb+U7ZOogtmE2n7eChzdponHbf9LPUHSeKqVoq/otF9Jk6kCKOPGjRtK1yMioo3Rnk7cFZhenk+nNqVo+zaqwLBbp8qSti/Xse6Q9C+dyvVTfxGqacbHgLVr+76oTEO2PloD2DLAqcC+/Y0EbU+z3WO7Z5VVVhlsNyMiooNRG4lJWgnYFthEkoFFAAPfqxU7HDgF+B2A7UckPSZpHdu32z4XOFfSWcBLrp+12ee6wLNU19AA9gJmAYcA35P0z7bdzUisXEc7FTixXF+LiIgFbDSnE3cCTrD9ib4ESb8D1urbtn2jpDnAjsBVJfnrwA8k7WL74XJ9a8mBdiZpFeAYYGoJVK8BPgtsYXuepI8DHwP+n+2LgEn9tCXgWOAG298a3GFHRMRwGc0gtivVda+6U4GDWtK+Blxd2/4B8ErgCklPUk0FXtZSps9Skq4BFqO6o/AEoC/ofAs40va8sr0vcKmkU23/dYC+vxn4MDCrtA/wBdtnD1AvIiKGkeyF+8shx5qenh739vaOdjciogv5AuCxQ9IM2z2t6aN9Y0dERMSQJYhFRERjjfbnxCIixqyFZSpuYZaRWERENFaCWERENFaCWERENFauiUVEdNDkW+xfLtfzMhKLiIjGShCLiIjGShCLiIjGShCLiIjGGlNBTNJrJE2XNFfSDElnS3qtpOtbyk2RtH95fpyk2yVdK+lmScdLWrNW9g5Js8pjjqTDJC1Z8nokzZa0eNmeIOm2svjmQH1dWtKvJd1Y2vjG8J6NiIgYyJgJYmV5k9OBi21PsL051Tfar9pF9QNsvw7YgOrb7C/sC0zFW21vAmwBrAv8EMB2L9VaZfuXct8DDu5vgcsW37S9IdXKzm+W9M4u60VExDAYS7fYvxV42vYxfQm2r5U0vtsGXH0l/9GS3ge8E/i/lvzHJH0S+LOklcqSK18Arpb0DLCo7V90ua/HgYvK86ckzQTW7L9WREQMp7EUxDYGZnTIm1BbtwvgNcA3+2lrJrAhLUEMnl8d+nZgfeCKsrDmN4DvAxv1lZO0AXBSh/a3sf1wrewKVAt3fqefPkVExDAbS0GsP3NtP7/SsqQpA5Qf6BOKrfnvBP5CFcRuArB9E/2s7lzry6LAL4Dv2r6tQ5nJwGSAcePGDdRkRER0acxcEwNmA5sPU1ubATe0y5C0LDAeuLlsvxtYHtgeOErS0iV9A0nXdHisUGtyGnCL7W936oztabZ7bPesssoqw3KAERExtoLYhcASZdQCgKRNgbW6bUCVvYHVgHPa5C9DNW14hu2HJC0FfAv4lO1ZVNOPB0M1ErM9qcPj4dLeYVQBcN+hHnRERAzdmAli5aaM9wHblVvsZwNfB+7rovpRkq6lGl29gepuxKdq+ReV2/SvBP4EfKKkfxE43facsj0F2FXS+gPtsNzGfzDVFOTMMkL7WBd9jYiIYTKmronZvgf4jzZZG7eUm1J7vucAbY7vJ+8LLduPUt2CPyDbd9FDutAAAAt6SURBVDHwtbeIiBhBY2YkFhERMVgJYhER0VgJYhER0Vhj6ppYRMRY8nJZWLLJMhKLiIjGShCLiIjGShCLiIjGyjWxiIgO9JXmfRT05XYdLyOxiIhorASxiIhorASxiIhorASxiIhorFENYpKeLd/+fr2kk2treT3WUm5PSVNr25Ml3VgevZK2qeWdKOmm0uaPJS3W0tYbJD0jaaeyPUnS5ZJmS7pO0gcG0f8NS90nJe0/xNMQERFDNNojsSfK+lwbA08BnxyoQlnE8hPAlrY3pFox+WeS1ihFTgQ2BDYBlgI+Vqu7CHAEcF6tyceB3W1PBHYAvt2y6GV//grsDXyzy/IRETGMRjuI1V0KrNdFuQOBA2w/AGB7JvAT4FNl+2wXVOuHrVmr+xngVOD+vgTbN9u+pTy/p+R1tfyy7fttXwU83U35iIgYXmPic2KSFgXeyQurMS8l6ZpakZWAM8vzicCMliZ6gY+0tLkY8GFgn7K9BtWim2+lWjizXT+2ABYH5pbto0v5VtNtf6ObYyvtTKYaMTJu3Lhuq0VExABGO4jVg9WlwLHl+RO2J/UVkrQn0DPItr8PXGL70rL9beBA289JL/0Ao6TVgBOAPWw/B2D7vwe5z7ZsTwOmAfT09Ly8PokYETGCRjuIvShYdWkOsDlwYS1tc6rRGACSvkw1JfiJWpkeYHoJYCsD75L0jO0zJC0H/Bo42PYfa+0My0gsIiJGxmgHsaE4EjhC0g62H5Q0iWqacFsASR8Dtgfe1jeiArC9Tt9zSccBZ5UAtjhwOnC87VPqOxqukVhERIyMxgUx22dKWh24rFxLew3wOtvzSpFjgDuBy8uo6zTbh/bT5H8AWwOvKtOWAHvavqZzlYqk11CNAJcDnpO0L7CR7UeGcGgRETFIqm7ia6YSxH5CdZflh9yAg+np6XFvb+/ABSNi1OULgMcOSTNsv+TeiMaNxOpsP0N1B2JERLwMjaXPiUVERAxKo0diEREjaWGdmluYZCQWERGNlSAWERGNlSAWERGNlWtiEREdNOkW+5fr9buMxCIiorESxCIiorESxCIiorESxCIiorG6CmKS3ivJkjYs2+MlPSHpakk3SLqy9uW5SNpT0tSWNi6W1FOe3yFpVnnMkXSYpCVbyu8r6R+Slq+lbSPpb2W/N0m6RNK7a/lTJN0t6RpJt0g6TdJGJW8RSTMkbV0rf56knQd1xl6o+zVJf5b02FDqR0TE/Ot2JLYr8Pvys89c25vZ/idgF2BfSR9pW7u9t9reBNgCWBf4YZt9XgX8e0v6pWW/GwB7A1Mlva2Wf7TtSbbXB04CLpS0iu1ngb1K+cUk7Qo8Z/vkQfS57lel7xERMUoGDGKSlgG2BD5KFaxewvZtwGepgsqg2H4M+CTwXkkrlX1OAJYBDuHFgbO17jXAocCnO+SfBJwH7Fa2rwAuB6YAh3eq12W//2j73qHWj4iI+dfN58TeA5xj+2ZJD0raHHiwTbmZwIa17Q9I2rK2vV6nHdh+RNLtwPrAFVTBcjpwKbCBpFVt/6VD9ZnAAf30v7VfBwF/Br5t+9a+REknARu0qf8t28f30/6AJE0GJgOMGzdufpqKiIiaboLYrsB3yvPpZXtqm3Ktnwo8yfbzIx1JFw+wn3r9XYH32X5O0qnAzh322W6/A+VvDfwN2LieaPsDA7QzZLanAdOgWk9spPYTEfFy028QK9N72wKbSDKwCGDge22KbwbcMJROSFoWGA/cLGkTqhHZ+WVl5sWB2+kcxAba72ZUqy8j6ZXAkVTH9BNJ77J9dsnrOBIDTgRmlO0zbX+p64OLiIgRM9BIbCfgBNuf6EuQ9DtgrXohSeOBbwL/O9gOlGtu3wfOsP2QpAOAKba/Xitzu6S129TdFPgi8LEObb8feAewX0n6EvBL2zdK2guYLulC2//oYiQ2abDHFhERI2ugGzt2BU5vSTuV6rrShL5b7IFfAt+1/ZNB7PsiSdcDVwJ/AvoC5S5t9nk6L9xUslXfLfZUI8K9bV9QK/vffbfYAx8CtrU9T9JE4H3A1wBsXw2cCxw4iD4/T9KRku4ClpZ0l6QpQ2knIiKGTnYu0SxIPT097u3tHe1uREQX8gXAY4ekGbZ7WtPzjR0REdFYCWIREdFYWU8sIqKDhX2KbmGQkVhERDRWglhERDRWglhERDRWglhERDRWglhERDRWglhERDRWglhERDRWglhERDRWglhERDRWvgB4AZM0D7hzhJpfGXhghNoeTk3pJzSnr03pJ6SvI6Ep/YSh93Vt26u0JiaILUQk9bb7luexpin9hOb0tSn9hPR1JDSlnzD8fc10YkRENFaCWERENFaC2MJl2mh3oEtN6Sc0p69N6SekryOhKf2EYe5rrolFRERjZSQWERGNlSAWERGNlSDWMJJWknS+pFvKzxXblJkk6XJJsyVdJ+kDtbzjJN0u6ZrymDRG+7mOpCsk3SrpJEmLj0Q/u+1rKXeOpIclndWSPmbO6QD9HIvndI9S5hZJe9TSL5Z0U+2cvnqY+7dDaf9WSZ9vk79EOUe3lnM2vpZ3UEm/SdL2w9mv4eyrpPGSnqidw2NGuZ9bS5op6RlJO7XktX0ddMV2Hg16AEcCny/PPw8c0abMa4H1y/PVgXuBFcr2ccBODejnL4FdyvNjgP8azb6WvLcBOwJntaSPmXM6QD/H1DkFVgJuKz9XLM9XLHkXAz0j1LdFgLnAusDiwLXARi1l9gKOKc93AU4qzzcq5ZcA1intLDKC53F++joeuH6kX5eD6Od4YFPg+PrfS3+vg24eGYk1z3uAn5bnPwXe21rA9s22bynP7wHuB17ySfcRNuR+ShKwLXBKf/UXZF9LHy8AHh3BfgxkyP0co+d0e+B823+1/RBwPrDDCPapzxbArbZvs/0UML30t67e/1OAt5Vz+B5guu0nbd8O3FraG4t9XZAG7KftO2xfBzzXUne+XgcJYs2zqu17y/P7gFX7KyxpC6r/jObWkr9Wpu+OlrTEGOznq4CHbT9Tsu8C1hihfsIg+9rBmDunLcbiOV0D+HNtu7VPPynTYF8c5jflgfb7ojLlnP2N6hx2U3c4zU9fAdaRdLWk30naapT7ORJ1WbTbgrHgSPot8Jo2WQfXN2xbUsfPSEhaDTgB2MN2338/B1G9qSxO9XmNA4FDx1I/R+KfyOHqawdj7pwuCCPc1w/avlvSssCpwIeppqGie/cC42w/KGlz4AxJE20/MtodG04JYmOQ7e065Un6i6TVbN9b3vzv71BuOeDXwMG2/1hru++/4ycl/QTYfwz280FgBUmLlv8s1wTuHmo/h6uv/bQ9ps5pB2PxnN4NbFPbXpPqWhi27y4/H5X0c6rpquEKYncDa7Xst/Vc9JW5S9KiwPJU57CbusNpyH11dcHpSQDbMyTNpboO3TtK/eyv7jYtdS/udseZTmyeM4G+u3f2AP6vtUC56+x04Hjbp7TkrVZ+iuo6xfVjrZ/lj+8iYKf+6i/IvvZnLJ3TTsboOT0XeIekFcvdi+8AzpW0qKSVASQtBryb4T2nVwHrq7pbc3GqmyHO7Kf/OwEXlnN4JrBLuSNwHWB94Mph7Nuw9VXSKpIWAZC0bunrbaPYz07avg663vOCuHMlj2G9C+hVwAXALcBvgZVKeg/wo/L8Q8DTwDW1x6SSdyEwi+pN4WfAMmO0n+tSvTncCpwMLDGa57RsXwrMA56gmrfffqyd0wH6ORbP6X+W/twKfKSkvRKYAVwHzAa+wzDfAQi8C7iZ6hrswSXtUODfyvMlyzm6tZyzdWt1Dy71bgLeOVLncH77Cry/nL9rgJnAjqPczzeU1+PfqUa1s/t7HXT7yNdORUREY2U6MSIiGitBLCIiGitBLCIiGitBLCIiGitBLCIiGitBLCIiGitBLCIiGuv/AwdiDi/FfWL9AAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "stream", + "name": "stdout", + "text": "Actual label: [0.]\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "print(\"Threshold corresponding to Best balanced accuracy: {:6.4f}\".format(best_thresh))\n", + "show_explanation(0)\n", + "show_explanation(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q0-hXW89HHWN" + }, + "source": [ + "See the [LIME documentation](https://github.com/marcotcr/lime) for detailed description of results. In short, the left hand side shows the label predictions made by the model, the middle shows the features that are important to the instance in question and their contributions (weights) to the label prediction, while the right hand side shows the actual values of the features in the particular instance." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "No8MeAB_HHWN" + }, + "source": [ + "## [9.](#Table-of-Contents) Re-deploying Model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A621roj3HHWN" + }, + "source": [ + "### 9.1. Testing model learned on 2014 (Panel 19) data on 2016 (Panel 21) deployment data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0lV2Yv79HHWN" + }, + "source": [ + "Load the Panel 21 data, and split it again into 3 parts: train, validate, and deploy. We test the deployed model against the deployment data. If a new model needs to be learnt, it will be learnt from the train/validate data and then tested again on the deployment data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "An4HJoIgHHWO", + "outputId": "088e2fb5-c634-4a23-ad6a-8b53c4406cf2" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Test Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "(15675, 138)\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Favorable and unfavorable labels" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "1.0 0.0\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Protected attribute names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['RACE']\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Privileged and unprivileged protected attribute values" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "[array([1.])] [array([0.])]\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/markdown": "#### Dataset feature names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "['AGE', 'RACE', 'PCS42', 'MCS42', 'K6SUM42', 'REGION=1', 'REGION=2', 'REGION=3', 'REGION=4', 'SEX=1', 'SEX=2', 'MARRY=1', 'MARRY=2', 'MARRY=3', 'MARRY=4', 'MARRY=5', 'MARRY=6', 'MARRY=7', 'MARRY=8', 'MARRY=9', 'MARRY=10', 'FTSTU=-1', 'FTSTU=1', 'FTSTU=2', 'FTSTU=3', 'ACTDTY=1', 'ACTDTY=2', 'ACTDTY=3', 'ACTDTY=4', 'HONRDC=1', 'HONRDC=2', 'HONRDC=3', 'HONRDC=4', 'RTHLTH=-1', 'RTHLTH=1', 'RTHLTH=2', 'RTHLTH=3', 'RTHLTH=4', 'RTHLTH=5', 'MNHLTH=-1', 'MNHLTH=1', 'MNHLTH=2', 'MNHLTH=3', 'MNHLTH=4', 'MNHLTH=5', 'HIBPDX=-1', 'HIBPDX=1', 'HIBPDX=2', 'CHDDX=-1', 'CHDDX=1', 'CHDDX=2', 'ANGIDX=-1', 'ANGIDX=1', 'ANGIDX=2', 'MIDX=-1', 'MIDX=1', 'MIDX=2', 'OHRTDX=-1', 'OHRTDX=1', 'OHRTDX=2', 'STRKDX=-1', 'STRKDX=1', 'STRKDX=2', 'EMPHDX=-1', 'EMPHDX=1', 'EMPHDX=2', 'CHBRON=-1', 'CHBRON=1', 'CHBRON=2', 'CHOLDX=-1', 'CHOLDX=1', 'CHOLDX=2', 'CANCERDX=-1', 'CANCERDX=1', 'CANCERDX=2', 'DIABDX=-1', 'DIABDX=1', 'DIABDX=2', 'JTPAIN=-1', 'JTPAIN=1', 'JTPAIN=2', 'ARTHDX=-1', 'ARTHDX=1', 'ARTHDX=2', 'ARTHTYPE=-1', 'ARTHTYPE=1', 'ARTHTYPE=2', 'ARTHTYPE=3', 'ASTHDX=1', 'ASTHDX=2', 'ADHDADDX=-1', 'ADHDADDX=1', 'ADHDADDX=2', 'PREGNT=-1', 'PREGNT=1', 'PREGNT=2', 'WLKLIM=-1', 'WLKLIM=1', 'WLKLIM=2', 'ACTLIM=-1', 'ACTLIM=1', 'ACTLIM=2', 'SOCLIM=-1', 'SOCLIM=1', 'SOCLIM=2', 'COGLIM=-1', 'COGLIM=1', 'COGLIM=2', 'DFHEAR42=-1', 'DFHEAR42=1', 'DFHEAR42=2', 'DFSEE42=-1', 'DFSEE42=1', 'DFSEE42=2', 'ADSMOK42=-1', 'ADSMOK42=1', 'ADSMOK42=2', 'PHQ242=-1', 'PHQ242=0', 'PHQ242=1', 'PHQ242=2', 'PHQ242=3', 'PHQ242=4', 'PHQ242=5', 'PHQ242=6', 'EMPST=-1', 'EMPST=1', 'EMPST=2', 'EMPST=3', 'EMPST=4', 'POVCAT=1', 'POVCAT=2', 'POVCAT=3', 'POVCAT=4', 'POVCAT=5', 'INSCOV=1', 'INSCOV=2', 'INSCOV=3']\n" + } + ], + "source": [ + "dataset_orig_panel21_deploy = MEPSDataset21()\n", + "\n", + "# now align it with the panel19 datasets\n", + "dataset_orig_panel21_deploy = dataset_orig_panel19_train.align_datasets(dataset_orig_panel21_deploy)\n", + "\n", + "describe(test=dataset_orig_panel21_deploy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "AHoDpe7xHHWO", + "outputId": "d7ba8540-9d6a-4f83-8b03-02b04865bd4c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.48375589333734254\n" + } + ], + "source": [ + "metric_orig_panel21_deploy = BinaryLabelDatasetMetric(\n", + " dataset_orig_panel21_deploy,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "explainer_orig_panel21_deploy = MetricTextExplainer(metric_orig_panel21_deploy)\n", + "\n", + "print(explainer_orig_panel21_deploy.disparate_impact())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sMz4MX7zHHWO" + }, + "source": [ + "Now, the logistic regression classifier trained on the panel 19 data after reweighing is tested against the panel 21 deployment data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WMOZML07HHWO" + }, + "outputs": [], + "source": [ + "lr_transf_metrics_panel21_deploy = test(\n", + " dataset=dataset_orig_panel21_deploy,\n", + " model=lr_transf_panel19,\n", + " thresh_arr=[thresh_arr[lr_transf_best_ind]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "RbzA-DCMHHWO", + "outputId": "e0e077ed-939b-4363-c42b-b8febacee519" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.2200\nBest balanced accuracy: 0.7379\nCorresponding 1-min(DI, 1/DI) value: 0.2559\nCorresponding average odds difference value: -0.0143\nCorresponding statistical parity difference value: -0.0813\nCorresponding equal opportunity difference value: -0.0044\nCorresponding Theil index value: 0.0994\n" + } + ], + "source": [ + "describe_metrics(lr_transf_metrics_panel21_deploy, [thresh_arr[lr_transf_best_ind]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_rMgf0GbHHWO" + }, + "source": [ + "Compared to the 2015 panel 20 deployment data results, the $|1 - \\text{disparate impact}|$ fairness metric shows a noticable drift upwards. While still within specs, it may be worthwhile to re-learn the model. So even though the model is still relatively fair and accurate, we go ahead and re-learn the model from the 2015 Panel 20 data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OXg8g2XFHHWO" + }, + "source": [ + "### 9.2. Re-learning model (from 2015 Panel 20 data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PUB2J2HzHHWO" + }, + "outputs": [], + "source": [ + "(dataset_orig_panel20_train,\n", + " dataset_orig_panel20_val,\n", + " dataset_orig_panel20_test) = MEPSDataset20().split([0.5, 0.8], shuffle=True)\n", + "\n", + "# now align them with the 2014 datasets\n", + "dataset_orig_panel20_train = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_train)\n", + "dataset_orig_panel20_val = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_val)\n", + "dataset_orig_panel20_test = dataset_orig_panel19_train.align_datasets(dataset_orig_panel20_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SbU22z6zHHWO" + }, + "source": [ + "**Train and evaluate new model on 'transformed' 2016 training/test data**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iYr0J7UeHHWO" + }, + "outputs": [], + "source": [ + "RW = Reweighing(unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "RW.fit(dataset_orig_panel20_train)\n", + "dataset_transf_panel20_train = RW.transform(dataset_orig_panel20_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "ycBpixZiHHWO", + "outputId": "76ca5700-77de-484f-98cb-66473480b633" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0000000000000002\n" + } + ], + "source": [ + "metric_transf_panel20_train = BinaryLabelDatasetMetric(\n", + " dataset_transf_panel20_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "explainer_transf_panel20_train = MetricTextExplainer(metric_transf_panel20_train)\n", + "\n", + "print(explainer_transf_panel20_train.disparate_impact())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dn80T84LHHWP" + }, + "outputs": [], + "source": [ + "dataset = dataset_transf_panel20_train\n", + "model = make_pipeline(StandardScaler(),\n", + " LogisticRegression(solver='liblinear', random_state=1))\n", + "fit_params = {'logisticregression__sample_weight': dataset.instance_weights}\n", + "lr_transf_panel20 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-m9vMblqHHWP" + }, + "outputs": [], + "source": [ + "thresh_arr = np.linspace(0.01, 0.5, 50)\n", + "val_metrics = test(dataset=dataset_orig_panel20_val,\n", + " model=lr_transf_panel20,\n", + " thresh_arr=thresh_arr)\n", + "lr_transf_best_ind_panel20 = np.argmax(val_metrics['bal_acc'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KZABqErYHHWP", + "outputId": "4a47c4b5-926d-4223-affa-6bf9a36f016b" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "disp_imp = np.array(val_metrics['disp_imp'])\n", + "disp_imp_err = 1 - np.minimum(disp_imp, 1/disp_imp)\n", + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " disp_imp_err, '1 - min(DI, 1/DI)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QPY6Jpg8HHWP", + "outputId": "2f63deeb-00f5-4217-d093-bc0519656749" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plot(thresh_arr, 'Classification Thresholds',\n", + " val_metrics['bal_acc'], 'Balanced Accuracy',\n", + " val_metrics['avg_odds_diff'], 'avg. odds diff.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "Kzz07t6iHHWP", + "outputId": "29d487fa-d6a2-48ea-ba77-f6e3037c7f8e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7465\nCorresponding 1-min(DI, 1/DI) value: 0.1129\nCorresponding average odds difference value: 0.0036\nCorresponding statistical parity difference value: -0.0414\nCorresponding equal opportunity difference value: -0.0057\nCorresponding Theil index value: 0.0946\n" + } + ], + "source": [ + "describe_metrics(val_metrics, thresh_arr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OtQaMv-kHHWP" + }, + "outputs": [], + "source": [ + "lr_transf_metrics_panel20_test = test(\n", + " dataset=dataset_orig_panel20_test,\n", + " model=lr_transf_panel20,\n", + " thresh_arr=[thresh_arr[lr_transf_best_ind_panel20]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "OZSFd0V1HHWP", + "outputId": "8fc14730-50f4-48d4-95ee-10b447cc0811" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7490\nCorresponding 1-min(DI, 1/DI) value: 0.0533\nCorresponding average odds difference value: 0.0158\nCorresponding statistical parity difference value: -0.0184\nCorresponding equal opportunity difference value: -0.0150\nCorresponding Theil index value: 0.0988\n" + } + ], + "source": [ + "describe_metrics(lr_transf_metrics_panel20_test, [thresh_arr[lr_transf_best_ind_panel20]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nu30oFGsHHWQ" + }, + "source": [ + "The new model is both relatively fair as well as accurate so we deploy and test against the 2016 deployment data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ELHXc_4eHHWQ" + }, + "source": [ + "### 9.3. Testing model learned on 2015 (Panel 20) data on 2016 (Panel 21) deployment data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "smQov8sGHHWQ" + }, + "source": [ + "**Evaluate new 2015 transformed data model and evaluate again on 2016 deployment data**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GU4e1HSoHHWQ" + }, + "outputs": [], + "source": [ + "lr_transf_panel20_metrics_panel21_deploy = test(\n", + " dataset=dataset_orig_panel21_deploy,\n", + " model=lr_transf_panel20,\n", + " thresh_arr=[thresh_arr[lr_transf_best_ind_panel20]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [], + "id": "_8ipT0CcHHWQ", + "outputId": "bf801b64-147d-46c8-8854-b889e0052aba" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Threshold corresponding to Best balanced accuracy: 0.1900\nBest balanced accuracy: 0.7370\nCorresponding 1-min(DI, 1/DI) value: 0.1698\nCorresponding average odds difference value: -0.0021\nCorresponding statistical parity difference value: -0.0648\nCorresponding equal opportunity difference value: 0.0016\nCorresponding Theil index value: 0.0960\n" + } + ], + "source": [ + "describe_metrics(lr_transf_panel20_metrics_panel21_deploy, [thresh_arr[lr_transf_best_ind_panel20]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9y--XlvYHHWQ" + }, + "source": [ + "The new transformed 2016 data model is again within original accuracy/fairness specs so is deployed" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "48SLB63AHHWQ" + }, + "source": [ + "## [10.](#Table-of-Contents) SUMMARY" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "pvC7jg_PHHWQ", + "outputId": "23be184f-38b5-4ff9-aa7b-1c759542fc30" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " bal_acc \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 0.775935 \nReweighing Logistic Regression Panel19 Panel19 0.753893 \nReweighing Logistic Regression Panel19 Panel20 0.731136 \nReweighing Logistic Regression Panel19 Panel21 0.737916 \nReweighing Logistic Regression Panel20 Panel20 0.749024 \nReweighing Logistic Regression Panel20 Panel21 0.736958 \n\n avg_odds_diff \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 -0.205706 \nReweighing Logistic Regression Panel19 Panel19 -0.015104 \nReweighing Logistic Regression Panel19 Panel20 0.007135 \nReweighing Logistic Regression Panel19 Panel21 -0.014340 \nReweighing Logistic Regression Panel20 Panel20 0.015756 \nReweighing Logistic Regression Panel20 Panel21 -0.002077 \n\n disp_imp \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 0.426176 \nReweighing Logistic Regression Panel19 Panel19 0.751755 \nReweighing Logistic Regression Panel19 Panel20 0.805724 \nReweighing Logistic Regression Panel19 Panel21 0.744126 \nReweighing Logistic Regression Panel20 Panel20 0.946696 \nReweighing Logistic Regression Panel20 Panel21 0.830199 \n\n stat_par_diff \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 -0.261207 \nReweighing Logistic Regression Panel19 Panel19 -0.087196 \nReweighing Logistic Regression Panel19 Panel20 -0.059602 \nReweighing Logistic Regression Panel19 Panel21 -0.081262 \nReweighing Logistic Regression Panel20 Panel20 -0.018444 \nReweighing Logistic Regression Panel20 Panel21 -0.064846 \n\n eq_opp_diff \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 -0.222779 \nReweighing Logistic Regression Panel19 Panel19 -0.003518 \nReweighing Logistic Regression Panel19 Panel20 0.030262 \nReweighing Logistic Regression Panel19 Panel21 -0.004405 \nReweighing Logistic Regression Panel20 Panel20 -0.015005 \nReweighing Logistic Regression Panel20 Panel21 0.001623 \n\n theil_ind \nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 0.092122 \nReweighing Logistic Regression Panel19 Panel19 0.096575 \nReweighing Logistic Regression Panel19 Panel20 0.101910 \nReweighing Logistic Regression Panel19 Panel21 0.099420 \nReweighing Logistic Regression Panel20 Panel20 0.098818 \nReweighing Logistic Regression Panel20 Panel21 0.095961 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
bal_accavg_odds_diffdisp_impstat_par_diffeq_opp_difftheil_ind
Bias MitigatorClassifierTraining setTesting set
Logistic RegressionPanel19Panel190.775935-0.2057060.426176-0.261207-0.2227790.092122
ReweighingLogistic RegressionPanel19Panel190.753893-0.0151040.751755-0.087196-0.0035180.096575
ReweighingLogistic RegressionPanel19Panel200.7311360.0071350.805724-0.0596020.0302620.101910
ReweighingLogistic RegressionPanel19Panel210.737916-0.0143400.744126-0.081262-0.0044050.099420
ReweighingLogistic RegressionPanel20Panel200.7490240.0157560.946696-0.018444-0.0150050.098818
ReweighingLogistic RegressionPanel20Panel210.736958-0.0020770.830199-0.0648460.0016230.095961
\n
" + }, + "metadata": {}, + "execution_count": 75 + } + ], + "source": [ + "results = [lr_orig_metrics, lr_transf_metrics,\n", + " lr_transf_metrics_panel20_deploy,\n", + " lr_transf_metrics_panel21_deploy,\n", + " lr_transf_metrics_panel20_test,\n", + " lr_transf_panel20_metrics_panel21_deploy]\n", + "debias = pd.Series([''] + ['Reweighing']*5, name='Bias Mitigator')\n", + "clf = pd.Series(['Logistic Regression']*6, name='Classifier')\n", + "tr = pd.Series(['Panel19']*4 + ['Panel20']*2, name='Training set')\n", + "te = pd.Series(['Panel19']*2 + ['Panel20', 'Panel21']*2, name='Testing set')\n", + "pd.concat([pd.DataFrame(m) for m in results], axis=0).set_index([debias, clf, tr, te])" + ] } - ], - "source": [ - "describe_metrics(lr_transf_panel20_metrics_panel21_deploy, [thresh_arr[lr_transf_best_ind_panel20]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The new transformed 2016 data model is again within original accuracy/fairness specs so is deployed" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## [10.](#Table-of-Contents) SUMMARY" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": " bal_acc \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 0.775935 \nReweighing Logistic Regression Panel19 Panel19 0.753893 \nReweighing Logistic Regression Panel19 Panel20 0.731136 \nReweighing Logistic Regression Panel19 Panel21 0.737916 \nReweighing Logistic Regression Panel20 Panel20 0.749024 \nReweighing Logistic Regression Panel20 Panel21 0.736958 \n\n avg_odds_diff \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 -0.205706 \nReweighing Logistic Regression Panel19 Panel19 -0.015104 \nReweighing Logistic Regression Panel19 Panel20 0.007135 \nReweighing Logistic Regression Panel19 Panel21 -0.014340 \nReweighing Logistic Regression Panel20 Panel20 0.015756 \nReweighing Logistic Regression Panel20 Panel21 -0.002077 \n\n disp_imp \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 0.426176 \nReweighing Logistic Regression Panel19 Panel19 0.751755 \nReweighing Logistic Regression Panel19 Panel20 0.805724 \nReweighing Logistic Regression Panel19 Panel21 0.744126 \nReweighing Logistic Regression Panel20 Panel20 0.946696 \nReweighing Logistic Regression Panel20 Panel21 0.830199 \n\n stat_par_diff \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 -0.261207 \nReweighing Logistic Regression Panel19 Panel19 -0.087196 \nReweighing Logistic Regression Panel19 Panel20 -0.059602 \nReweighing Logistic Regression Panel19 Panel21 -0.081262 \nReweighing Logistic Regression Panel20 Panel20 -0.018444 \nReweighing Logistic Regression Panel20 Panel21 -0.064846 \n\n eq_opp_diff \\\nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 -0.222779 \nReweighing Logistic Regression Panel19 Panel19 -0.003518 \nReweighing Logistic Regression Panel19 Panel20 0.030262 \nReweighing Logistic Regression Panel19 Panel21 -0.004405 \nReweighing Logistic Regression Panel20 Panel20 -0.015005 \nReweighing Logistic Regression Panel20 Panel21 0.001623 \n\n theil_ind \nBias Mitigator Classifier Training set Testing set \n Logistic Regression Panel19 Panel19 0.092122 \nReweighing Logistic Regression Panel19 Panel19 0.096575 \nReweighing Logistic Regression Panel19 Panel20 0.101910 \nReweighing Logistic Regression Panel19 Panel21 0.099420 \nReweighing Logistic Regression Panel20 Panel20 0.098818 \nReweighing Logistic Regression Panel20 Panel21 0.095961 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
bal_accavg_odds_diffdisp_impstat_par_diffeq_opp_difftheil_ind
Bias MitigatorClassifierTraining setTesting set
Logistic RegressionPanel19Panel190.775935-0.2057060.426176-0.261207-0.2227790.092122
ReweighingLogistic RegressionPanel19Panel190.753893-0.0151040.751755-0.087196-0.0035180.096575
ReweighingLogistic RegressionPanel19Panel200.7311360.0071350.805724-0.0596020.0302620.101910
ReweighingLogistic RegressionPanel19Panel210.737916-0.0143400.744126-0.081262-0.0044050.099420
ReweighingLogistic RegressionPanel20Panel200.7490240.0157560.946696-0.018444-0.0150050.098818
ReweighingLogistic RegressionPanel20Panel210.736958-0.0020770.830199-0.0648460.0016230.095961
\n
" - }, - "metadata": {}, - "execution_count": 75 + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.6" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "results = [lr_orig_metrics, lr_transf_metrics,\n", - " lr_transf_metrics_panel20_deploy,\n", - " lr_transf_metrics_panel21_deploy,\n", - " lr_transf_metrics_panel20_test,\n", - " lr_transf_panel20_metrics_panel21_deploy]\n", - "debias = pd.Series([''] + ['Reweighing']*5, name='Bias Mitigator')\n", - "clf = pd.Series(['Logistic Regression']*6, name='Classifier')\n", - "tr = pd.Series(['Panel19']*4 + ['Panel20']*2, name='Training set')\n", - "te = pd.Series(['Panel19']*2 + ['Panel20', 'Panel21']*2, name='Testing set')\n", - "pd.concat([pd.DataFrame(m) for m in results], axis=0).set_index([debias, clf, tr, te])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file