From 0902e6350278ce57c81daa2b3d44e3fcbbd63945 Mon Sep 17 00:00:00 2001 From: Denise Date: Wed, 11 Sep 2024 10:31:28 -0300 Subject: [PATCH 1/3] Bugfix based on MOA source code in the _detect_change method. The k index should increment from 0 to bucket.current_idx - 1. The previous code "for k in range(bucket.current_idx - 1):" only increment k to bucket.current_idx - 2 because of the range function. --- river/drift/adwin_c.pyx | 3 ++- river/drift/test_drift_detectors.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/river/drift/adwin_c.pyx b/river/drift/adwin_c.pyx index 322859c780..889145481b 100644 --- a/river/drift/adwin_c.pyx +++ b/river/drift/adwin_c.pyx @@ -241,7 +241,7 @@ cdef class AdaptiveWindowing: break bucket = self.bucket_deque[idx] - for k in range(bucket.current_idx - 1): + for k in range(bucket.current_idx): n2 = self._calculate_bucket_size(idx) # length of window 2 u2 = bucket.get_total_at(k) # total of window 2 # Warning: means are calculated inside the loop to get updated values. @@ -307,6 +307,7 @@ cdef class AdaptiveWindowing: + (1.0 / (n1 - self.min_window_length + 1))) epsilon = (sqrt(2 * m_recip * self.variance_in_window * delta_prime) + 2 / 3 * delta_prime * m_recip) + return fabs(delta_mean) > epsilon diff --git a/river/drift/test_drift_detectors.py b/river/drift/test_drift_detectors.py index 665a3f4eb2..2af897920a 100644 --- a/river/drift/test_drift_detectors.py +++ b/river/drift/test_drift_detectors.py @@ -28,7 +28,8 @@ def test_adwin(): - expected_indices = [1055] + # expected_indices = [1055] + expected_indices = [1023] # DMVS - after fixing as MOA, the expected change point is different detected_indices = perform_test(drift.ADWIN(), data_stream_1) assert detected_indices == expected_indices From 6c422ba4e7528dd7488e25e936af4df5c1067ee8 Mon Sep 17 00:00:00 2001 From: Denise Date: Wed, 11 Sep 2024 10:34:48 -0300 Subject: [PATCH 2/3] Bugfix based on MOA source code in the _detect_change method. The k index should increment from 0 to bucket.current_idx - 1. The previous code "for k in range(bucket.current_idx - 1):" only increment k to bucket.current_idx - 2 because of the range function. --- river/drift/test_drift_detectors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/river/drift/test_drift_detectors.py b/river/drift/test_drift_detectors.py index 2af897920a..cff6f37c2f 100644 --- a/river/drift/test_drift_detectors.py +++ b/river/drift/test_drift_detectors.py @@ -28,8 +28,7 @@ def test_adwin(): - # expected_indices = [1055] - expected_indices = [1023] # DMVS - after fixing as MOA, the expected change point is different + expected_indices = [1023] detected_indices = perform_test(drift.ADWIN(), data_stream_1) assert detected_indices == expected_indices From 3a014cf95545936b9e9050f479ee763d842dadee Mon Sep 17 00:00:00 2001 From: Denise Date: Wed, 11 Sep 2024 11:51:17 -0300 Subject: [PATCH 3/3] Tests updated due to bugfix on the ADWIN change detector - reported on issue #1614 --- river/ensemble/streaming_random_patches.py | 2 +- river/forest/adaptive_random_forest.py | 4 ++-- river/forest/online_extra_trees.py | 2 +- river/imblearn/chebyshev.py | 8 ++++---- river/tree/hoeffding_adaptive_tree_regressor.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/river/ensemble/streaming_random_patches.py b/river/ensemble/streaming_random_patches.py index ac4ee26b68..415b30be10 100644 --- a/river/ensemble/streaming_random_patches.py +++ b/river/ensemble/streaming_random_patches.py @@ -407,7 +407,7 @@ class SRPClassifier(BaseSRPEnsemble, base.Classifier): >>> metric = metrics.Accuracy() >>> evaluate.progressive_val_score(dataset, model, metric) - Accuracy: 71.97% + Accuracy: 72.17% Notes ----- diff --git a/river/forest/adaptive_random_forest.py b/river/forest/adaptive_random_forest.py index 6ec1bada2c..dd9a757f4d 100644 --- a/river/forest/adaptive_random_forest.py +++ b/river/forest/adaptive_random_forest.py @@ -565,7 +565,7 @@ class ARFClassifier(BaseForest, base.Classifier): >>> metric = metrics.Accuracy() >>> evaluate.progressive_val_score(dataset, model, metric) - Accuracy: 71.17% + Accuracy: 67.97% The total number of warnings and drifts detected, respectively >>> model.n_warnings_detected(), model.n_drifts_detected() @@ -849,7 +849,7 @@ class ARFRegressor(BaseForest, base.Regressor): >>> metric = metrics.MAE() >>> evaluate.progressive_val_score(dataset, model, metric) - MAE: 0.788619 + MAE: 0.772113 """ diff --git a/river/forest/online_extra_trees.py b/river/forest/online_extra_trees.py index a13707bdb0..ee361007eb 100644 --- a/river/forest/online_extra_trees.py +++ b/river/forest/online_extra_trees.py @@ -614,7 +614,7 @@ class OXTRegressor(ExtraTrees, base.Regressor): >>> metric = metrics.RMSE() >>> evaluate.progressive_val_score(dataset, model, metric) - RMSE: 3.127311 + RMSE: 3.16212 References ---------- diff --git a/river/imblearn/chebyshev.py b/river/imblearn/chebyshev.py index 5bc1a769bc..5c9c3baaf7 100644 --- a/river/imblearn/chebyshev.py +++ b/river/imblearn/chebyshev.py @@ -162,10 +162,10 @@ class ChebyshevOverSampler(base.Wrapper, base.Regressor): ... metrics.MAE(), ... print_every=500 ... ) - [500] MAE: 1.673902 - [1,000] MAE: 1.743046 - [1,001] MAE: 1.741335 - MAE: 1.741335 + [500] MAE: 1.629786 + [1,000] MAE: 1.663799 + [1,001] MAE: 1.66253 + MAE: 1.66253 References ---------- diff --git a/river/tree/hoeffding_adaptive_tree_regressor.py b/river/tree/hoeffding_adaptive_tree_regressor.py index 89aeae0d20..6c609cc8f4 100644 --- a/river/tree/hoeffding_adaptive_tree_regressor.py +++ b/river/tree/hoeffding_adaptive_tree_regressor.py @@ -140,7 +140,7 @@ class HoeffdingAdaptiveTreeRegressor(HoeffdingTreeRegressor): >>> metric = metrics.MAE() >>> evaluate.progressive_val_score(dataset, model, metric) - MAE: 0.823026 + MAE: 0.917576 """