conformal GBDT v0.7.0 Pt.2

thierrymoudiki · Sep 2, 2024 · 4d1c79d · 4d1c79d
1 parent 8ed3574
commit 4d1c79d
Show file tree

Hide file tree

Showing 12 changed files with 409 additions and 0 deletions.
diff --git a/examples/conformal-classification.py b/examples/conformal-classification.py
@@ -0,0 +1,59 @@
+import numpy as np
+import os 
+import unifiedbooster as ub
+from sklearn.datasets import load_iris, load_breast_cancer, load_wine
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error
+from time import time
+
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+load_datasets = [load_iris(), load_breast_cancer(), load_wine()]
+dataset_names = ["Iris", "Breast Cancer", "Wine"]
+
+for i, dataset in enumerate(load_datasets):
+
+    print(f"\n ----- Running: {dataset_names[i]} ----- \n")
+    X, y = dataset.data, dataset.target
+
+    # Split dataset into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # Initialize the unified clf (example with XGBoost)
+    print("\n ---------- Initialize the unified clf (example with XGBoost)")
+    clf1 = ub.GBDTClassifier(model_type="xgboost", 
+                            level=95, 
+                            pi_method="tcp")
+
+    # Fit the model
+    start = time()
+    clf1.fit(X_train, y_train)
+    print(f"Time taken: {time() - start} seconds")
+    # Predict with the model
+    y_pred1 = clf1.predict(X_test)
+    print(y_test)
+    print(y_pred1.argmax(axis=1))
+    # Calculate accuracy
+    accuracy = (y_test == y_pred1.argmax(axis=1)).mean()
+    print(f"\nAccuracy: {accuracy:.4f}")
+
+    print("\n ---------- Initialize the unified clf (example with LightGBM)")
+    clf2 = ub.GBDTClassifier(model_type="lightgbm", 
+                            level=95, 
+                            pi_method="icp")
+    # Fit the model
+    start = time()
+    clf2.fit(X_train, y_train)
+    print(f"Time taken: {time() - start} seconds")
+    # Predict with the model
+    y_pred2 = clf2.predict(X_test)
+    print(y_pred2)
+
+    # Calculate accuracy
+    print(y_test)
+    print(y_pred2.argmax(axis=1))
+    accuracy = (y_test == y_pred2.argmax(axis=1)).mean()
+    print(f"\nAccuracy: {accuracy:.4f}")
diff --git a/examples/conformal-regression.py b/examples/conformal-regression.py
@@ -0,0 +1,108 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os 
+import unifiedbooster as ub
+import warnings
+from sklearn.datasets import load_diabetes, fetch_california_housing
+from sklearn.model_selection import train_test_split
+from time import time
+
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+load_datasets = [fetch_california_housing(), load_diabetes()]
+dataset_names = ["California Housing", "Diabetes"]
+
+warnings.filterwarnings('ignore')
+
+split_color = 'green'
+split_color2 = 'orange'
+local_color = 'gray'
+
+def plot_func(x,
+              y,
+              y_u=None,
+              y_l=None,
+              pred=None,
+              shade_color="lightblue",
+              method_name="",
+              title=""):
+
+    fig = plt.figure()
+
+    plt.plot(x, y, 'k.', alpha=.3, markersize=10,
+             fillstyle='full', label=u'Test set observations')
+
+    if (y_u is not None) and (y_l is not None):
+        plt.fill(np.concatenate([x, x[::-1]]),
+                 np.concatenate([y_u, y_l[::-1]]),
+                 alpha=.3, fc=shade_color, ec='None',
+                 label = method_name + ' Prediction interval')
+
+    if pred is not None:
+        plt.plot(x, pred, 'k--', lw=2, alpha=0.9,
+                 label=u'Predicted value')
+
+    #plt.ylim([-2.5, 7])
+    plt.xlabel('$X$')
+    plt.ylabel('$Y$')
+    plt.legend(loc='upper right')
+    plt.title(title)
+
+    plt.show()
+
+for i, dataset in enumerate(load_datasets):
+
+    print(f"\n ----- Running: {dataset_names[i]} ----- \n")
+    X, y = dataset.data, dataset.target
+
+    # Split dataset into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # Initialize the unified regr (example with XGBoost)
+    print("\n ---------- Initialize the unified regr (example with XGBoost)")
+    regr1 = ub.GBDTRegressor(model_type="xgboost", 
+                            level=95, 
+                            pi_method="splitconformal")
+
+    # Fit the model
+    start = time()
+    regr1.fit(X_train, y_train)
+    print(f"Time taken: {time() - start} seconds")
+    # Predict with the model
+    y_pred1 = regr1.predict(X_test)
+    # Coverage error
+    coverage_error = (y_test >= y_pred1.lower) & (y_test <= y_pred1.upper)
+    print(f"Coverage rate: {coverage_error.mean():.4f}")
+    #x,
+    #y,
+    #y_u=None,
+    #y_l=None,
+    #pred=None,
+    plot_func(range(len(y_test))[0:30], y_test[0:30],
+              y_pred1.upper[0:30], y_pred1.lower[0:30], 
+              y_pred1.mean[0:30], method_name="Split Conformal")
+
+    print("\n ---------- Initialize the unified regr (example with LightGBM)")
+    regr2 = ub.GBDTRegressor(model_type="lightgbm", 
+                            level=95, 
+                            pi_method="localconformal")
+    # Fit the model
+    start = time()
+    regr2.fit(X_train, y_train)
+    print(f"Time taken: {time() - start} seconds")
+    # Predict with the model
+    y_pred2 = regr2.predict(X_test)
+    # Coverage error
+    coverage_error = (y_test >= y_pred2.lower) & (y_test <= y_pred2.upper)
+    print(f"Coverage rate: {coverage_error.mean():.4f}")
+    #x,
+    #y,
+    #y_u=None,
+    #y_l=None,
+    #pred=None,
+    plot_func(range(len(y_test))[0:30], y_test[0:30], 
+              y_pred2.upper[0:30], y_pred2.lower[0:30], 
+              y_pred2.mean[0:30], method_name="Local Conformal")
diff --git a/examples/conformal.py b/examples/conformal.py
@@ -0,0 +1,59 @@
+import numpy as np
+import os 
+import unifiedbooster as ub
+from sklearn.datasets import load_iris, load_breast_cancer, load_wine
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error
+from time import time
+
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+load_datasets = [load_iris(), load_breast_cancer(), load_wine()]
+dataset_names = ["Iris", "Breast Cancer", "Wine"]
+
+for i, dataset in enumerate(load_datasets):
+
+    print(f"\n ----- Running: {dataset_names[i]} ----- \n")
+    X, y = dataset.data, dataset.target
+
+    # Split dataset into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # Initialize the unified clf (example with XGBoost)
+    print("\n ---------- Initialize the unified clf (example with XGBoost)")
+    clf1 = ub.GBDTClassifier(model_type="xgboost", 
+                            level=95, 
+                            pi_method="tcp")
+
+    # Fit the model
+    start = time()
+    clf1.fit(X_train, y_train)
+    print(f"Time taken: {time() - start} seconds")
+    # Predict with the model
+    y_pred1 = clf1.predict(X_test)
+    print(y_test)
+    print(y_pred1.argmax(axis=1))
+    # Calculate accuracy
+    accuracy = (y_test == y_pred1.argmax(axis=1)).mean()
+    print(f"\nAccuracy: {accuracy:.4f}")
+
+    print("\n ---------- Initialize the unified clf (example with LightGBM)")
+    clf2 = ub.GBDTClassifier(model_type="lightgbm", 
+                            level=95, 
+                            pi_method="icp")
+    # Fit the model
+    start = time()
+    clf2.fit(X_train, y_train)
+    print(f"Time taken: {time() - start} seconds")
+    # Predict with the model
+    y_pred2 = clf2.predict(X_test)
+    print(y_pred2)
+
+    # Calculate accuracy
+    print(y_test)
+    print(y_pred2.argmax(axis=1))
+    accuracy = (y_test == y_pred2.argmax(axis=1)).mean()
+    print(f"\nAccuracy: {accuracy:.4f}")
diff --git a/unifiedbooster.egg-info/PKG-INFO b/unifiedbooster.egg-info/PKG-INFO
@@ -0,0 +1,29 @@
+Metadata-Version: 2.1
+Name: unifiedbooster
+Version: 0.7.0
+Summary: Unified interface for Gradient Boosted Decision Trees
+Home-page: https://github.com/thierrymoudiki/unifiedbooster
+Author: T. Moudiki
+Author-email: [email protected]
+License: BSD license
+Keywords: unifiedbooster
+Classifier: Development Status :: 2 - Pre-Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Natural Language :: English
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Requires-Python: >=3.6
+License-File: LICENSE
+Requires-Dist: Cython
+Requires-Dist: numpy
+Requires-Dist: scikit-learn
+Requires-Dist: xgboost
+Requires-Dist: lightgbm
+Requires-Dist: catboost
+Requires-Dist: GPopt
+Requires-Dist: nnetsauce
+
+Unified interface for Gradient Boosted Decision Trees
diff --git a/unifiedbooster.egg-info/SOURCES.txt b/unifiedbooster.egg-info/SOURCES.txt
@@ -0,0 +1,27 @@
+LICENSE
+README.md
+setup.py
+unifiedbooster/__init__.py
+unifiedbooster/gbdt.py
+unifiedbooster/gbdt_classification.py
+unifiedbooster/gbdt_regression.py
+unifiedbooster/gpoptimization.py
+unifiedbooster.egg-info/PKG-INFO
+unifiedbooster.egg-info/SOURCES.txt
+unifiedbooster.egg-info/dependency_links.txt
+unifiedbooster.egg-info/entry_points.txt
+unifiedbooster.egg-info/not-zip-safe
+unifiedbooster.egg-info/requires.txt
+unifiedbooster.egg-info/top_level.txt
+unifiedbooster/nonconformist/__init__.py
+unifiedbooster/nonconformist/acp.py
+unifiedbooster/nonconformist/base.py
+unifiedbooster/nonconformist/cp.py
+unifiedbooster/nonconformist/evaluation.py
+unifiedbooster/nonconformist/icp.py
+unifiedbooster/nonconformist/nc.py
+unifiedbooster/nonconformist/util.py
+unifiedbooster/predictioninterval/__init__.py
+unifiedbooster/predictioninterval/predictioninterval.py
+unifiedbooster/predictionset/__init__.py
+unifiedbooster/predictionset/predictionset.py
diff --git a/unifiedbooster.egg-info/dependency_links.txt b/unifiedbooster.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/unifiedbooster.egg-info/entry_points.txt b/unifiedbooster.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+unifiedbooster = unifiedbooster.cli:main
diff --git a/unifiedbooster.egg-info/not-zip-safe b/unifiedbooster.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/unifiedbooster.egg-info/requires.txt b/unifiedbooster.egg-info/requires.txt
@@ -0,0 +1,8 @@
+Cython
+numpy
+scikit-learn
+xgboost
+lightgbm
+catboost
+GPopt
+nnetsauce
diff --git a/unifiedbooster.egg-info/top_level.txt b/unifiedbooster.egg-info/top_level.txt
@@ -0,0 +1 @@
+unifiedbooster
diff --git a/unifiedbooster/predictionset/__init__.py b/unifiedbooster/predictionset/__init__.py
@@ -0,0 +1,3 @@
+from .predictionset import PredictionSet
+
+__all__ = ["PredictionSet"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[console_scripts]
		unifiedbooster = unifiedbooster.cli:main
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .predictionset import PredictionSet

		__all__ = ["PredictionSet"]