Skip to content

Commit

Permalink
conformal GBDT v0.7.0 Pt.2
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Sep 2, 2024
1 parent 8ed3574 commit 4d1c79d
Show file tree
Hide file tree
Showing 12 changed files with 409 additions and 0 deletions.
59 changes: 59 additions & 0 deletions examples/conformal-classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import os
import unifiedbooster as ub
from sklearn.datasets import load_iris, load_breast_cancer, load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from time import time


print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

load_datasets = [load_iris(), load_breast_cancer(), load_wine()]
dataset_names = ["Iris", "Breast Cancer", "Wine"]

for i, dataset in enumerate(load_datasets):

print(f"\n ----- Running: {dataset_names[i]} ----- \n")
X, y = dataset.data, dataset.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

# Initialize the unified clf (example with XGBoost)
print("\n ---------- Initialize the unified clf (example with XGBoost)")
clf1 = ub.GBDTClassifier(model_type="xgboost",
level=95,
pi_method="tcp")

# Fit the model
start = time()
clf1.fit(X_train, y_train)
print(f"Time taken: {time() - start} seconds")
# Predict with the model
y_pred1 = clf1.predict(X_test)
print(y_test)
print(y_pred1.argmax(axis=1))
# Calculate accuracy
accuracy = (y_test == y_pred1.argmax(axis=1)).mean()
print(f"\nAccuracy: {accuracy:.4f}")

print("\n ---------- Initialize the unified clf (example with LightGBM)")
clf2 = ub.GBDTClassifier(model_type="lightgbm",
level=95,
pi_method="icp")
# Fit the model
start = time()
clf2.fit(X_train, y_train)
print(f"Time taken: {time() - start} seconds")
# Predict with the model
y_pred2 = clf2.predict(X_test)
print(y_pred2)

# Calculate accuracy
print(y_test)
print(y_pred2.argmax(axis=1))
accuracy = (y_test == y_pred2.argmax(axis=1)).mean()
print(f"\nAccuracy: {accuracy:.4f}")
108 changes: 108 additions & 0 deletions examples/conformal-regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import matplotlib.pyplot as plt
import numpy as np
import os
import unifiedbooster as ub
import warnings
from sklearn.datasets import load_diabetes, fetch_california_housing
from sklearn.model_selection import train_test_split
from time import time


print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

load_datasets = [fetch_california_housing(), load_diabetes()]
dataset_names = ["California Housing", "Diabetes"]

warnings.filterwarnings('ignore')

split_color = 'green'
split_color2 = 'orange'
local_color = 'gray'

def plot_func(x,
y,
y_u=None,
y_l=None,
pred=None,
shade_color="lightblue",
method_name="",
title=""):

fig = plt.figure()

plt.plot(x, y, 'k.', alpha=.3, markersize=10,
fillstyle='full', label=u'Test set observations')

if (y_u is not None) and (y_l is not None):
plt.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_u, y_l[::-1]]),
alpha=.3, fc=shade_color, ec='None',
label = method_name + ' Prediction interval')

if pred is not None:
plt.plot(x, pred, 'k--', lw=2, alpha=0.9,
label=u'Predicted value')

#plt.ylim([-2.5, 7])
plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.legend(loc='upper right')
plt.title(title)

plt.show()

for i, dataset in enumerate(load_datasets):

print(f"\n ----- Running: {dataset_names[i]} ----- \n")
X, y = dataset.data, dataset.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

# Initialize the unified regr (example with XGBoost)
print("\n ---------- Initialize the unified regr (example with XGBoost)")
regr1 = ub.GBDTRegressor(model_type="xgboost",
level=95,
pi_method="splitconformal")

# Fit the model
start = time()
regr1.fit(X_train, y_train)
print(f"Time taken: {time() - start} seconds")
# Predict with the model
y_pred1 = regr1.predict(X_test)
# Coverage error
coverage_error = (y_test >= y_pred1.lower) & (y_test <= y_pred1.upper)
print(f"Coverage rate: {coverage_error.mean():.4f}")
#x,
#y,
#y_u=None,
#y_l=None,
#pred=None,
plot_func(range(len(y_test))[0:30], y_test[0:30],
y_pred1.upper[0:30], y_pred1.lower[0:30],
y_pred1.mean[0:30], method_name="Split Conformal")

print("\n ---------- Initialize the unified regr (example with LightGBM)")
regr2 = ub.GBDTRegressor(model_type="lightgbm",
level=95,
pi_method="localconformal")
# Fit the model
start = time()
regr2.fit(X_train, y_train)
print(f"Time taken: {time() - start} seconds")
# Predict with the model
y_pred2 = regr2.predict(X_test)
# Coverage error
coverage_error = (y_test >= y_pred2.lower) & (y_test <= y_pred2.upper)
print(f"Coverage rate: {coverage_error.mean():.4f}")
#x,
#y,
#y_u=None,
#y_l=None,
#pred=None,
plot_func(range(len(y_test))[0:30], y_test[0:30],
y_pred2.upper[0:30], y_pred2.lower[0:30],
y_pred2.mean[0:30], method_name="Local Conformal")
59 changes: 59 additions & 0 deletions examples/conformal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import os
import unifiedbooster as ub
from sklearn.datasets import load_iris, load_breast_cancer, load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from time import time


print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

load_datasets = [load_iris(), load_breast_cancer(), load_wine()]
dataset_names = ["Iris", "Breast Cancer", "Wine"]

for i, dataset in enumerate(load_datasets):

print(f"\n ----- Running: {dataset_names[i]} ----- \n")
X, y = dataset.data, dataset.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

# Initialize the unified clf (example with XGBoost)
print("\n ---------- Initialize the unified clf (example with XGBoost)")
clf1 = ub.GBDTClassifier(model_type="xgboost",
level=95,
pi_method="tcp")

# Fit the model
start = time()
clf1.fit(X_train, y_train)
print(f"Time taken: {time() - start} seconds")
# Predict with the model
y_pred1 = clf1.predict(X_test)
print(y_test)
print(y_pred1.argmax(axis=1))
# Calculate accuracy
accuracy = (y_test == y_pred1.argmax(axis=1)).mean()
print(f"\nAccuracy: {accuracy:.4f}")

print("\n ---------- Initialize the unified clf (example with LightGBM)")
clf2 = ub.GBDTClassifier(model_type="lightgbm",
level=95,
pi_method="icp")
# Fit the model
start = time()
clf2.fit(X_train, y_train)
print(f"Time taken: {time() - start} seconds")
# Predict with the model
y_pred2 = clf2.predict(X_test)
print(y_pred2)

# Calculate accuracy
print(y_test)
print(y_pred2.argmax(axis=1))
accuracy = (y_test == y_pred2.argmax(axis=1)).mean()
print(f"\nAccuracy: {accuracy:.4f}")
29 changes: 29 additions & 0 deletions unifiedbooster.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Metadata-Version: 2.1
Name: unifiedbooster
Version: 0.7.0
Summary: Unified interface for Gradient Boosted Decision Trees
Home-page: https://github.com/thierrymoudiki/unifiedbooster
Author: T. Moudiki
Author-email: [email protected]
License: BSD license
Keywords: unifiedbooster
Classifier: Development Status :: 2 - Pre-Alpha
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: BSD License
Classifier: Natural Language :: English
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Requires-Python: >=3.6
License-File: LICENSE
Requires-Dist: Cython
Requires-Dist: numpy
Requires-Dist: scikit-learn
Requires-Dist: xgboost
Requires-Dist: lightgbm
Requires-Dist: catboost
Requires-Dist: GPopt
Requires-Dist: nnetsauce

Unified interface for Gradient Boosted Decision Trees
27 changes: 27 additions & 0 deletions unifiedbooster.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
LICENSE
README.md
setup.py
unifiedbooster/__init__.py
unifiedbooster/gbdt.py
unifiedbooster/gbdt_classification.py
unifiedbooster/gbdt_regression.py
unifiedbooster/gpoptimization.py
unifiedbooster.egg-info/PKG-INFO
unifiedbooster.egg-info/SOURCES.txt
unifiedbooster.egg-info/dependency_links.txt
unifiedbooster.egg-info/entry_points.txt
unifiedbooster.egg-info/not-zip-safe
unifiedbooster.egg-info/requires.txt
unifiedbooster.egg-info/top_level.txt
unifiedbooster/nonconformist/__init__.py
unifiedbooster/nonconformist/acp.py
unifiedbooster/nonconformist/base.py
unifiedbooster/nonconformist/cp.py
unifiedbooster/nonconformist/evaluation.py
unifiedbooster/nonconformist/icp.py
unifiedbooster/nonconformist/nc.py
unifiedbooster/nonconformist/util.py
unifiedbooster/predictioninterval/__init__.py
unifiedbooster/predictioninterval/predictioninterval.py
unifiedbooster/predictionset/__init__.py
unifiedbooster/predictionset/predictionset.py
1 change: 1 addition & 0 deletions unifiedbooster.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 2 additions & 0 deletions unifiedbooster.egg-info/entry_points.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[console_scripts]
unifiedbooster = unifiedbooster.cli:main
1 change: 1 addition & 0 deletions unifiedbooster.egg-info/not-zip-safe
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

8 changes: 8 additions & 0 deletions unifiedbooster.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Cython
numpy
scikit-learn
xgboost
lightgbm
catboost
GPopt
nnetsauce
1 change: 1 addition & 0 deletions unifiedbooster.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
unifiedbooster
3 changes: 3 additions & 0 deletions unifiedbooster/predictionset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .predictionset import PredictionSet

__all__ = ["PredictionSet"]
Loading

0 comments on commit 4d1c79d

Please sign in to comment.