nubank · MarianaBlaz · Nov 29, 2022 · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022
@@ -13,6 +13,7 @@ def area_under_the_cumulative_effect_curve(df: pd.DataFrame,
                                            prediction: str,
                                            min_rows: int = 30,
                                            steps: int = 100,
+                                           ascending: bool = False,
                                            effect_fn: EffectFnType = linear_effect) -> float:
     """
      Orders the dataset by prediction and computes the area under the cumulative effect curve, according to that
@@ -38,6 +39,9 @@ def area_under_the_cumulative_effect_curve(df: pd.DataFrame,
      steps : Integer
          The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
      effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
@@ -55,7 +59,7 @@ def area_under_the_cumulative_effect_curve(df: pd.DataFrame,
     step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])]
 
     cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
-                                         min_rows=min_rows, steps=steps, effect_fn=effect_fn)
+                                         min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)
 
     return abs(sum([(effect - ate) * (step_size / size) for effect, step_size in zip(cum_effect, step_sizes)]))
 
@@ -67,6 +71,7 @@ def area_under_the_cumulative_gain_curve(df: pd.DataFrame,
                                          prediction: str,
                                          min_rows: int = 30,
                                          steps: int = 100,
+                                         ascending: bool = False,
                                          effect_fn: EffectFnType = linear_effect) -> float:
     """
      Orders the dataset by prediction and computes the area under the cumulative gain curve, according to that ordering.
@@ -91,6 +96,9 @@ def area_under_the_cumulative_gain_curve(df: pd.DataFrame,
      steps : Integer
          The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
      effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
@@ -107,7 +115,7 @@ def area_under_the_cumulative_gain_curve(df: pd.DataFrame,
     step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])]
 
     cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
-                                         min_rows=min_rows, steps=steps, effect_fn=effect_fn)
+                                         min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)
 
     return abs(sum([effect * (rows / size) * (step_size / size)
                     for rows, effect, step_size in zip(n_rows, cum_effect, step_sizes)]))
@@ -120,6 +128,7 @@ def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame,
                                                   prediction: str,
                                                   min_rows: int = 30,
                                                   steps: int = 100,
+                                                  ascending: bool = False,
                                                   effect_fn: EffectFnType = linear_effect) -> float:
     """
      Orders the dataset by prediction and computes the area under the relative cumulative gain curve, according to that
@@ -145,6 +154,9 @@ def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame,
      steps : Integer
          The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
      effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
@@ -162,7 +174,7 @@ def area_under_the_relative_cumulative_gain_curve(df: pd.DataFrame,
     step_sizes = [min_rows] + [t - s for s, t in zip(n_rows, n_rows[1:])]
 
     cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
-                                         min_rows=min_rows, steps=steps, effect_fn=effect_fn)
+                                         min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)
 
     return abs(sum([(effect - ate) * (rows / size) * (step_size / size)
                     for rows, effect, step_size in zip(n_rows, cum_effect, step_sizes)]))
@@ -59,6 +59,7 @@ def cumulative_effect_curve(df: pd.DataFrame,
                             prediction: str,
                             min_rows: int = 30,
                             steps: int = 100,
+                            ascending: bool = False,
                             effect_fn: EffectFnType = linear_effect) -> np.ndarray:
     """
     Orders the dataset by prediction and computes the cumulative effect curve according to that ordering
@@ -83,6 +84,9 @@ def cumulative_effect_curve(df: pd.DataFrame,
     steps : Integer
         The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
     effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
         A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
         of the outcome column.
@@ -95,7 +99,7 @@ def cumulative_effect_curve(df: pd.DataFrame,
     """
 
     size = df.shape[0]
-    ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)
+    ordered_df = df.sort_values(prediction, ascending=ascending).reset_index(drop=True)
     n_rows = list(range(min_rows, size, size // steps)) + [size]
     return np.array([effect_fn(ordered_df.head(rows), treatment, outcome) for rows in n_rows])
 
@@ -107,6 +111,7 @@ def cumulative_gain_curve(df: pd.DataFrame,
                           prediction: str,
                           min_rows: int = 30,
                           steps: int = 100,
+                          ascending: bool = False,
                           effect_fn: EffectFnType = linear_effect) -> np.ndarray:
     """
     Orders the dataset by prediction and computes the cumulative gain (effect * proportional sample size) curve
@@ -132,6 +137,9 @@ def cumulative_gain_curve(df: pd.DataFrame,
     steps : Integer
         The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
     effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
         A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
         of the outcome column.
@@ -147,7 +155,7 @@ def cumulative_gain_curve(df: pd.DataFrame,
     n_rows = list(range(min_rows, size, size // steps)) + [size]
 
     cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
-                                         min_rows=min_rows, steps=steps, effect_fn=effect_fn)
+                                         min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)
 
     return np.array([effect * (rows / size) for rows, effect in zip(n_rows, cum_effect)])
 
@@ -159,6 +167,7 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
                                    prediction: str,
                                    min_rows: int = 30,
                                    steps: int = 100,
+                                   ascending: bool = False,
                                    effect_fn: EffectFnType = linear_effect) -> np.ndarray:
     """
      Orders the dataset by prediction and computes the relative cumulative gain curve curve according to that ordering.
@@ -185,6 +194,9 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
      steps : Integer
          The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
      effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
@@ -201,7 +213,7 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
     n_rows = list(range(min_rows, size, size // steps)) + [size]
 
     cum_effect = cumulative_effect_curve(df=df, treatment=treatment, outcome=outcome, prediction=prediction,
-                                         min_rows=min_rows, steps=steps, effect_fn=effect_fn)
+                                         min_rows=min_rows, steps=steps, ascending=ascending, effect_fn=effect_fn)
 
     return np.array([(effect - ate) * (rows / size) for rows, effect in zip(n_rows, cum_effect)])
 
@@ -214,6 +226,7 @@ def effect_curves(
     prediction: str,
     min_rows: int = 30,
     steps: int = 100,
+    ascending: bool = False,
     effect_fn: EffectFnType = linear_effect,
 ) -> pd.DataFrame:
     """
@@ -243,6 +256,9 @@ def effect_curves(
      steps : Integer
          The number of cumulative steps to iterate when accumulating the effect
 
+    ascending : Boolean
+        Indicates if the dataset should be ordered ascending with respect to the prediction column
+
      effect_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> int or Array of int
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
@@ -264,6 +280,7 @@ def effect_curves(
         prediction=prediction,
         min_rows=min_rows,
         steps=steps,
+        ascending=ascending,
         effect_fn=effect_fn,
     )
     ate: float = cum_effect[-1]

@@ -28,12 +28,16 @@ def test_cumulative_effect_curve():
         y=[1, 1, 1, 2, 3, 4, 3, 5, 7],
     ))
 
-    expected = np.array([3., 3., 2.92857143, 2.5, 2.5, 2.46153846, 2.])
+    asc_expected = np.array([1., 1., 1.07142857, 1.5, 1.5, 1.53846154, 2.])
+    desc_expected = np.array([3., 3., 2.92857143, 2.5, 2.5, 2.46153846, 2.])
 
-    result = cumulative_effect_curve(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
-                                     effect_fn=linear_effect)
+    asc_result = cumulative_effect_curve(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
+                                         ascending=True, effect_fn=linear_effect)
+    desc_result = cumulative_effect_curve(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
+                                          effect_fn=linear_effect)
 
-    np.testing.assert_allclose(expected, result, rtol=1e-07)
+    np.testing.assert_allclose(asc_expected, asc_result, rtol=1e-07)
+    np.testing.assert_allclose(desc_expected, desc_result, rtol=1e-07)
 
 
 def test_cumulative_gain_curve():