modAL-python · TomKingsfordUoA · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023
diff --git a/modAL/acquisition.py b/modAL/acquisition.py
@@ -13,16 +13,16 @@
 
 
 def PI(mean, std, max_val, tradeoff):
-    return ndtr((mean - max_val - tradeoff)/std)
+    return ndtr((mean - max_val - tradeoff) / std)
 
 
 def EI(mean, std, max_val, tradeoff):
     z = (mean - max_val - tradeoff) / std
-    return (mean - max_val - tradeoff)*ndtr(z) + std*norm.pdf(z)
+    return (mean - max_val - tradeoff) * ndtr(z) + std * norm.pdf(z)
 
 
 def UCB(mean, std, beta):
-    return mean + beta*std
+    return mean + beta * std
 
 
 """

diff --git a/modAL/batch.py b/modAL/batch.py
@@ -158,15 +158,14 @@ def ranked_batch(classifier: Union[BaseLearner, BaseCommittee],
             classifier.X_training
         ) if classifier.on_transformed else classifier.X_training[:]
         instance_index_ranking = []
-    
+
     # The maximum number of records to sample.
     ceiling = np.minimum(unlabeled.shape[0], n_instances) - len(instance_index_ranking)
 
     # mask for unlabeled initialized as transparent
     mask = np.ones(unlabeled.shape[0], bool)
 
     for _ in range(ceiling):
-
         # Receive the instance and corresponding index from our unlabeled copy that scores highest.
         instance_index, instance, mask = select_instance(X_training=labeled, X_pool=unlabeled,
                                                          X_uncertainty=uncertainty_scores, mask=mask,
@@ -220,5 +219,4 @@ def uncertainty_batch_sampling(classifier: Union[BaseLearner, BaseCommittee],
     """
     uncertainty = classifier_uncertainty(classifier, X, **uncertainty_measure_kwargs)
     return ranked_batch(classifier, unlabeled=X, uncertainty_scores=uncertainty,
-                                 n_instances=n_instances, metric=metric, n_jobs=n_jobs)
-
+                        n_instances=n_instances, metric=metric, n_jobs=n_jobs)
diff --git a/modAL/cluster.py b/modAL/cluster.py
@@ -4,8 +4,6 @@
 ========================================
 """
 
-from sklearn.cluster.hierarchical import AgglomerativeClustering
-
 
 class HierarchicalClustering:
     def __init__(self):

diff --git a/modAL/density.py b/modAL/density.py
@@ -20,8 +20,9 @@ def similarize_distance(distance_measure: Callable) -> Callable:
     Returns:
         The information_density measure obtained from the given distance measure.
     """
+
     def sim(*args, **kwargs):
-        return 1/(1 + distance_measure(*args, **kwargs))
+        return 1 / (1 + distance_measure(*args, **kwargs))
 
     return sim
 
@@ -51,6 +52,6 @@ def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean
     #
     # return inf_density/X.shape[0]
 
-    similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric))
+    similarity_mtx = 1 / (1 + pairwise_distances(X, X, metric=metric))
 
     return similarity_mtx.mean(axis=1)
diff --git a/modAL/disagreement.py b/modAL/disagreement.py
@@ -39,7 +39,7 @@ def vote_entropy(committee: BaseCommittee, X: modALinput, **predict_proba_kwargs
         vote_counter = Counter(vote)
 
         for class_idx, class_label in enumerate(committee.classes_):
-            p_vote[vote_idx, class_idx] = vote_counter[class_label]/n_learners
+            p_vote[vote_idx, class_idx] = vote_counter[class_label] / n_learners
 
     entr = entropy(p_vote, axis=1)
     return entr
@@ -184,7 +184,7 @@ def max_disagreement_sampling(committee: BaseCommittee, X: modALinput,
 
 
 def max_std_sampling(regressor: BaseEstimator, X: modALinput,
-                     n_instances: int = 1,  random_tie_break=False,
+                     n_instances: int = 1, random_tie_break=False,
                      **predict_kwargs) -> np.ndarray:
     """
     Regressor standard deviation sampling strategy.

diff --git a/modAL/dropout.py b/modAL/dropout.py
@@ -21,7 +21,7 @@ def mc_dropout_bald(classifier: BaseEstimator, X: modALinput, n_instances: int =
                     num_cycles: int = 50, sample_per_forward_pass: int = 1000,
                     logits_adaptor: Callable[[
                         torch.tensor, modALinput], torch.tensor] = default_logits_adaptor,
-                    **mc_dropout_kwargs,) -> np.ndarray:
+                    **mc_dropout_kwargs, ) -> np.ndarray:
     """
         Mc-Dropout bald query strategy. Returns the indexes of the instances with the largest BALD 
         (Bayesian Active Learning by Disagreement) score calculated through the dropout cycles
@@ -337,8 +337,7 @@ def _entropy(proba: list) -> np.ndarray:
 
     # calculate entropy per class and sum along dropout cycles
     entropy_classes = entropy_sum(proba_stacked, axis=-1)
-    entropy = np.mean(entropy_classes, where=~
-                      np.isnan(entropy_classes), axis=-1)
+    entropy = np.mean(entropy_classes, where=~np.isnan(entropy_classes), axis=-1)
     return entropy
 
 
@@ -381,11 +380,11 @@ def _bald_divergence(proba: list) -> np.ndarray:
 
     # entropy along dropout cycles
     accumulated_entropy = entropy_sum(proba_stacked, axis=-1)
-    f_x = accumulated_entropy/len(proba)
+    f_x = accumulated_entropy / len(proba)
 
     # score sums along dropout cycles
     accumulated_score = np.sum(proba_stacked, axis=-1)
-    average_score = accumulated_score/len(proba)
+    average_score = accumulated_score / len(proba)
     # expand dimension w/o data for entropy calculation
     average_score = np.expand_dims(average_score, axis=-1)
 
@@ -419,9 +418,9 @@ def set_dropout_mode(model, dropout_layer_indexes: list, train_mode: bool):
         for index in dropout_layer_indexes:
             layer = modules[index]
             if layer.__class__.__name__.startswith('Dropout'):
-                if True == train_mode:
+                if train_mode:
                     layer.train()
-                elif False == train_mode:
+                else:
                     layer.eval()
             else:
                 raise KeyError(
@@ -430,7 +429,7 @@ def set_dropout_mode(model, dropout_layer_indexes: list, train_mode: bool):
     else:
         for module in modules:
             if module.__class__.__name__.startswith('Dropout'):
-                if True == train_mode:
+                if train_mode:
                     module.train()
-                elif False == train_mode:
+                else:
                     module.eval()
diff --git a/modAL/expected_error.py b/modAL/expected_error.py
@@ -2,8 +2,6 @@
 Expected error reduction framework for active learning.
 """
 
-from typing import Tuple
-
 import numpy as np
 from sklearn.base import clone
 from sklearn.exceptions import NotFittedError
@@ -16,7 +14,7 @@
 
 
 def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str = 'binary',
-                             p_subsample: np.float = 1.0, n_instances: int = 1,
+                             p_subsample: float = 1.0, n_instances: int = 1,
                              random_tie_break: bool = False) -> np.ndarray:
     """
     Expected error reduction query strategy.
@@ -63,16 +61,16 @@ def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str =
             # estimate the expected error
             for y_idx, y in enumerate(possible_labels):
                 X_new = add_row(learner.X_training, x)
-                y_new = data_vstack((learner.y_training, np.array(y).reshape(1,)))
+                y_new = data_vstack((learner.y_training, np.array(y).reshape(1, )))
 
                 cloned_estimator.fit(X_new, y_new)
                 refitted_proba = cloned_estimator.predict_proba(X_reduced)
-                if loss is 'binary':
+                if loss == 'binary':
                     nloss = _proba_uncertainty(refitted_proba)
-                elif loss is 'log':
+                elif loss == 'log':
                     nloss = _proba_entropy(refitted_proba)
 
-                expected_error[x_idx] += np.sum(nloss)*X_proba[x_idx, y_idx]
+                expected_error[x_idx] += np.sum(nloss) * X_proba[x_idx, y_idx]
 
         else:
             expected_error[x_idx] = np.inf

diff --git a/modAL/models/base.py b/modAL/models/base.py
@@ -171,19 +171,17 @@ def query(self, X_pool, *query_args, return_metrics: bool = False, **query_kwarg
             query_metrics: returns also the corresponding metrics, if return_metrics == True
         """
 
-        try:
-            query_result, query_metrics = self.query_strategy(
-                self, X_pool, *query_args, **query_kwargs)
-
-        except:
+        _query_strategy_result = self.query_strategy(
+            self, X_pool, *query_args, **query_kwargs)
+        if isinstance(_query_strategy_result, tuple) and len(_query_strategy_result) == 2:
+            query_result, query_metrics = _query_strategy_result
+        else:
+            query_result = _query_strategy_result
             query_metrics = None
-            query_result = self.query_strategy(
-                self, X_pool, *query_args, **query_kwargs)
 
         if return_metrics:
-            if query_metrics is None: 
-                warnings.warn(
-                "The selected query strategy doesn't support return_metrics")
+            if query_metrics is None:
+                warnings.warn("The selected query strategy doesn't support return_metrics")
             return query_result, retrieve_rows(X_pool, query_result), query_metrics
         else:
             return query_result, retrieve_rows(X_pool, query_result)
@@ -216,6 +214,7 @@ class BaseCommittee(ABC, BaseEstimator):
         on_transformed: Whether to transform samples with the pipeline defined by each learner's estimator
             when applying the query strategy.
     """
+
     def __init__(self, learner_list: List[BaseLearner], query_strategy: Callable, on_transformed: bool = False) -> None:
         assert type(learner_list) == list, 'learners must be supplied in a list'
 
@@ -313,19 +312,18 @@ def query(self, X_pool, return_metrics: bool = False, *query_args, **query_kwarg
             query_metrics: returns also the corresponding metrics, if return_metrics == True
         """
 
-        try:
-            query_result, query_metrics = self.query_strategy(
-                self, X_pool, *query_args, **query_kwargs)
-
-        except:
+        _query_strategy_result = self.query_strategy(
+            self, X_pool, *query_args, **query_kwargs)
+        if isinstance(_query_strategy_result, tuple) and len(_query_strategy_result) == 2:
+            query_result, query_metrics = _query_strategy_result
+        else:
+            query_result = _query_strategy_result
             query_metrics = None
-            query_result = self.query_strategy(
-                self, X_pool, *query_args, **query_kwargs)
 
         if return_metrics:
-            if query_metrics is None: 
+            if query_metrics is None:
                 warnings.warn(
-                "The selected query strategy doesn't support return_metrics")
+                    "The selected query strategy doesn't support return_metrics")
             return query_result, retrieve_rows(X_pool, query_result), query_metrics
         else:
             return query_result, retrieve_rows(X_pool, query_result)
@@ -341,7 +339,8 @@ def rebag(self, **fit_kwargs) -> None:
         """
         self._fit_to_known(bootstrap=True, **fit_kwargs)
 
-    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
+    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
+              **fit_kwargs) -> None:
         """
         Adds X and y to the known training data for each learner and retrains learners with the augmented dataset.
         Args:
@@ -364,4 +363,3 @@ def predict(self, X: modALinput) -> Any:
     @abc.abstractmethod
     def vote(self, X: modALinput) -> Any:  # TODO: clarify typing
         pass
-
diff --git a/modAL/models/learners.py b/modAL/models/learners.py
@@ -157,7 +157,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg
         self.X_training, self.y_training = X, y
         return self._fit_to_known(bootstrap=bootstrap, **fit_kwargs)
 
-    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
+    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
+              **fit_kwargs) -> None:
         """
         Adds X and y to the known training data and retrains the predictor with the augmented dataset.
 
@@ -228,7 +229,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg
         """
         return self._fit_on_new(X, y, bootstrap=bootstrap, **fit_kwargs)
 
-    def teach(self, X: modALinput, y: modALinput, warm_start: bool = True, bootstrap: bool = False, **fit_kwargs) -> None:
+    def teach(self, X: modALinput, y: modALinput, warm_start: bool = True, bootstrap: bool = False,
+              **fit_kwargs) -> None:
         """
         Trains the predictor with the passed data (warm_start decides if params are resetted or not). 
 
@@ -404,7 +406,8 @@ def get_max(self) -> Tuple:
         """
         return self.X_max, self.y_max
 
-    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
+    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
+              **fit_kwargs) -> None:
         """
         Adds X and y to the known training data and retrains the predictor with the augmented dataset. This method also
         keeps track of the maximum value encountered in the training data.
@@ -481,6 +484,7 @@ class Committee(BaseCommittee):
         ...     y=iris['target'][query_idx].reshape(1, )
         ... )
     """
+
     def __init__(self, learner_list: List[ActiveLearner], query_strategy: Callable = vote_entropy_sampling,
                  on_transformed: bool = False) -> None:
         super().__init__(learner_list, query_strategy, on_transformed)
@@ -524,7 +528,8 @@ def fit(self, X: modALinput, y: modALinput, **fit_kwargs) -> 'BaseCommittee':
         super().fit(X, y, **fit_kwargs)
         self._set_classes()
 
-    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
+    def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
+              **fit_kwargs) -> None:
         """
         Adds X and y to the known training data for each learner and retrains learners with the augmented dataset.
         Args:
@@ -678,6 +683,7 @@ class CommitteeRegressor(BaseCommittee):
         ...     query_idx, query_instance = committee.query(X.reshape(-1, 1))
         ...     committee.teach(X[query_idx].reshape(-1, 1), y[query_idx].reshape(-1, 1))
     """
+
     def __init__(self, learner_list: List[ActiveLearner], query_strategy: Callable = max_std_sampling,
                  on_transformed: bool = False) -> None:
         super().__init__(learner_list, query_strategy, on_transformed)

diff --git a/modAL/multilabel.py b/modAL/multilabel.py
@@ -25,12 +25,12 @@ def _SVM_loss(multiclass_classifier: ActiveLearner,
         np.ndarray of shape (n_instances, ), losses for the instances in X.
 
     """
-    predictions = 2*multiclass_classifier.predict(X)-1
+    predictions = 2 * multiclass_classifier.predict(X) - 1
     n_classes = len(multiclass_classifier.classes_)
 
     if most_certain_classes is None:
-        cls_mtx = 2*np.eye(n_classes, n_classes) - 1
-        loss_mtx = np.maximum(1-np.dot(predictions, cls_mtx), 0)
+        cls_mtx = 2 * np.eye(n_classes, n_classes) - 1
+        loss_mtx = np.maximum(1 - np.dot(predictions, cls_mtx), 0)
         return loss_mtx.mean(axis=1)
     else:
         cls_mtx = -np.ones(shape=(len(X), n_classes))
@@ -74,7 +74,6 @@ def SVM_binary_minimum(classifier: ActiveLearner, X_pool: modALinput,
 
 def max_loss(classifier: OneVsRestClassifier, X_pool: modALinput,
              n_instances: int = 1, random_tie_break: bool = False) -> np.ndarray:
-
     """
     Max Loss query strategy for SVM multilabel classification.
 
@@ -217,13 +216,12 @@ def max_score(classifier: OneVsRestClassifier, X_pool: modALinput,
 
     Returns:
         The index of the instance from X_pool chosen to be labelled.
-        The classwise maximum metric of the chosen instances. 
-
+        The classwise maximum metric of the chosen instances.
     """
 
     classwise_confidence = classifier.predict_proba(X_pool)
     classwise_predictions = classifier.predict(X_pool)
-    classwise_scores = classwise_confidence*(classwise_predictions - 1/2)
+    classwise_scores = classwise_confidence * (classwise_predictions - 1 / 2)
     classwise_max = np.max(classwise_scores, axis=1)
 
     if not random_tie_break:
@@ -255,7 +253,7 @@ def avg_score(classifier: OneVsRestClassifier, X_pool: modALinput,
 
     classwise_confidence = classifier.predict_proba(X_pool)
     classwise_predictions = classifier.predict(X_pool)
-    classwise_scores = classwise_confidence*(classwise_predictions-1/2)
+    classwise_scores = classwise_confidence * (classwise_predictions - 1 / 2)
     classwise_mean = np.mean(classwise_scores, axis=1)
 
     if not random_tie_break:

diff --git a/modAL/uncertainty.py b/modAL/uncertainty.py
@@ -76,7 +76,7 @@ def classifier_uncertainty(classifier: BaseEstimator, X: modALinput, **predict_p
     try:
         classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
     except NotFittedError:
-        return np.ones(shape=(X.shape[0], ))
+        return np.ones(shape=(X.shape[0],))
 
     # for each point, select the maximum uncertainty
     uncertainty = 1 - np.max(classwise_uncertainty, axis=1)
@@ -99,7 +99,7 @@ def classifier_margin(classifier: BaseEstimator, X: modALinput, **predict_proba_
     try:
         classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
     except NotFittedError:
-        return np.zeros(shape=(X.shape[0], ))
+        return np.zeros(shape=(X.shape[0],))
 
     if classwise_uncertainty.shape[1] == 1:
         return np.zeros(shape=(classwise_uncertainty.shape[0],))
@@ -125,7 +125,7 @@ def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba
     try:
         classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
     except NotFittedError:
-        return np.zeros(shape=(X.shape[0], ))
+        return np.zeros(shape=(X.shape[0],))
 
     return np.transpose(entropy(np.transpose(classwise_uncertainty)))