Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ascending parameter causal validation #220

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions src/fklearn/causal/validation/auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def area_under_the_cumulative_effect_curve(df: pd.DataFrame,
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect) -> float:
"""
Orders the dataset by prediction and computes the area under the cumulative effect curve, according to that
Expand All @@ -38,6 +39,9 @@ def area_under_the_cumulative_effect_curve(df: pd.DataFrame,
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -55,7 +59,7 @@ def area_under_the_cumulative_effect_curve(df: pd.DataFrame,
step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])]

cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
min_rows=min_rows, steps=steps, effect_fn=effect_fn)
min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)

return abs(sum([(effect - ate) * (step_size / size) for effect, step_size in zip(cum_effect, step_sizes)]))

Expand All @@ -67,6 +71,7 @@ def area_under_the_cumulative_gain_curve(df: pd.DataFrame,
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect) -> float:
"""
Orders the dataset by prediction and computes the area under the cumulative gain curve, according to that ordering.
Expand All @@ -91,6 +96,9 @@ def area_under_the_cumulative_gain_curve(df: pd.DataFrame,
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -107,7 +115,7 @@ def area_under_the_cumulative_gain_curve(df: pd.DataFrame,
step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])]

cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
min_rows=min_rows, steps=steps, effect_fn=effect_fn)
min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)

return abs(sum([effect * (rows / size) * (step_size / size)
for rows, effect, step_size in zip(n_rows, cum_effect, step_sizes)]))
Expand All @@ -120,6 +128,7 @@ def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame,
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect) -> float:
"""
Orders the dataset by prediction and computes the area under the relative cumulative gain curve, according to that
Expand All @@ -145,6 +154,9 @@ def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame,
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -162,7 +174,7 @@ def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame,
step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])]

cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
min_rows=min_rows, steps=steps, effect_fn=effect_fn)
min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)

return abs(sum([(effect - ate) * (rows / size) * (step_size / size)
for rows, effect, step_size in zip(n_rows, cum_effect, step_sizes)]))
23 changes: 20 additions & 3 deletions src/fklearn/causal/validation/curves.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def cumulative_effect_curve(df: pd.DataFrame,
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect) -> np.ndarray:
"""
Orders the dataset by prediction and computes the cumulative effect curve according to that ordering
Expand All @@ -83,6 +84,9 @@ def cumulative_effect_curve(df: pd.DataFrame,
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -95,7 +99,7 @@ def cumulative_effect_curve(df: pd.DataFrame,
"""

size = df.shape[0]
ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)
ordered_df = df.sort_values(prediction, ascending=ascending).reset_index(drop=True)
n_rows = list(range(min_rows, size, size // steps)) + [size]
return np.array([effect_fn(ordered_df.head(rows), treatment, outcome) for rows in n_rows])

Expand All @@ -107,6 +111,7 @@ def cumulative_gain_curve(df: pd.DataFrame,
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect) -> np.ndarray:
"""
Orders the dataset by prediction and computes the cumulative gain (effect * proportional sample size) curve
Expand All @@ -132,6 +137,9 @@ def cumulative_gain_curve(df: pd.DataFrame,
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -147,7 +155,7 @@ def cumulative_gain_curve(df: pd.DataFrame,
n_rows = list(range(min_rows, size, size // steps)) + [size]

cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
min_rows=min_rows, steps=steps, effect_fn=effect_fn)
min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)

return np.array([effect * (rows / size) for rows, effect in zip(n_rows, cum_effect)])

Expand All @@ -159,6 +167,7 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect) -> np.ndarray:
"""
Orders the dataset by prediction and computes the relative cumulative gain curve curve according to that ordering.
Expand All @@ -185,6 +194,9 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -201,7 +213,7 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
n_rows = list(range(min_rows, size, size // steps)) + [size]

cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
min_rows=min_rows, steps=steps, effect_fn=effect_fn)
min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)

return np.array([(effect - ate) * (rows / size) for rows, effect in zip(n_rows, cum_effect)])

Expand All @@ -214,6 +226,7 @@ def effect_curves(
prediction: str,
min_rows: int = 30,
steps: int = 100,
ascending: bool = False,
effect_fn: EffectFnType = linear_effect,
) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -243,6 +256,9 @@ def effect_curves(
steps : Integer
The number of cumulative steps to iterate when accumulating the effect

ascending : Boolean
Indicates if the dataset should be ordered ascending with respect to the prediction column

effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
of the outcome column.
Expand All @@ -264,6 +280,7 @@ def effect_curves(
prediction=prediction,
min_rows=min_rows,
steps=steps,
ascending=ascending,
effect_fn=effect_fn,
)
ate: float = cum_effect[-1]
Expand Down
12 changes: 8 additions & 4 deletions tests/causal/validation/test_curves.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@ def test_cumulative_effect_curve():
y=[1, 1, 1, 2, 3, 4, 3, 5, 7],
))

expected = np.array([3., 3., 2.92857143, 2.5, 2.5, 2.46153846, 2.])
asc_expected = np.array([1., 1., 1.07142857, 1.5, 1.5, 1.53846154, 2.])
desc_expected = np.array([3., 3., 2.92857143, 2.5, 2.5, 2.46153846, 2.])

result = cumulative_effect_curve(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
effect_fn=linear_effect)
asc_result = cumulative_effect_curve(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
ascending=True, effect_fn=linear_effect)
desc_result = cumulative_effect_curve(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
effect_fn=linear_effect)

np.testing.assert_allclose(expected, result, rtol=1e-07)
np.testing.assert_allclose(asc_expected, asc_result, rtol=1e-07)
np.testing.assert_allclose(desc_expected, desc_result, rtol=1e-07)


def test_cumulative_gain_curve():
Expand Down