Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address flake8 style issues #175

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions modAL/acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@


def PI(mean, std, max_val, tradeoff):
return ndtr((mean - max_val - tradeoff)/std)
return ndtr((mean - max_val - tradeoff) / std)


def EI(mean, std, max_val, tradeoff):
z = (mean - max_val - tradeoff) / std
return (mean - max_val - tradeoff)*ndtr(z) + std*norm.pdf(z)
return (mean - max_val - tradeoff) * ndtr(z) + std * norm.pdf(z)


def UCB(mean, std, beta):
return mean + beta*std
return mean + beta * std


"""
Expand Down
6 changes: 2 additions & 4 deletions modAL/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,14 @@ def ranked_batch(classifier: Union[BaseLearner, BaseCommittee],
classifier.X_training
) if classifier.on_transformed else classifier.X_training[:]
instance_index_ranking = []

# The maximum number of records to sample.
ceiling = np.minimum(unlabeled.shape[0], n_instances) - len(instance_index_ranking)

# mask for unlabeled initialized as transparent
mask = np.ones(unlabeled.shape[0], bool)

for _ in range(ceiling):

# Receive the instance and corresponding index from our unlabeled copy that scores highest.
instance_index, instance, mask = select_instance(X_training=labeled, X_pool=unlabeled,
X_uncertainty=uncertainty_scores, mask=mask,
Expand Down Expand Up @@ -220,5 +219,4 @@ def uncertainty_batch_sampling(classifier: Union[BaseLearner, BaseCommittee],
"""
uncertainty = classifier_uncertainty(classifier, X, **uncertainty_measure_kwargs)
return ranked_batch(classifier, unlabeled=X, uncertainty_scores=uncertainty,
n_instances=n_instances, metric=metric, n_jobs=n_jobs)

n_instances=n_instances, metric=metric, n_jobs=n_jobs)
2 changes: 0 additions & 2 deletions modAL/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
========================================
"""

from sklearn.cluster.hierarchical import AgglomerativeClustering


class HierarchicalClustering:
def __init__(self):
Expand Down
5 changes: 3 additions & 2 deletions modAL/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def similarize_distance(distance_measure: Callable) -> Callable:
Returns:
The information_density measure obtained from the given distance measure.
"""

def sim(*args, **kwargs):
return 1/(1 + distance_measure(*args, **kwargs))
return 1 / (1 + distance_measure(*args, **kwargs))

return sim

Expand Down Expand Up @@ -51,6 +52,6 @@ def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean
#
# return inf_density/X.shape[0]

similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric))
similarity_mtx = 1 / (1 + pairwise_distances(X, X, metric=metric))

return similarity_mtx.mean(axis=1)
4 changes: 2 additions & 2 deletions modAL/disagreement.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def vote_entropy(committee: BaseCommittee, X: modALinput, **predict_proba_kwargs
vote_counter = Counter(vote)

for class_idx, class_label in enumerate(committee.classes_):
p_vote[vote_idx, class_idx] = vote_counter[class_label]/n_learners
p_vote[vote_idx, class_idx] = vote_counter[class_label] / n_learners

entr = entropy(p_vote, axis=1)
return entr
Expand Down Expand Up @@ -184,7 +184,7 @@ def max_disagreement_sampling(committee: BaseCommittee, X: modALinput,


def max_std_sampling(regressor: BaseEstimator, X: modALinput,
n_instances: int = 1, random_tie_break=False,
n_instances: int = 1, random_tie_break=False,
**predict_kwargs) -> np.ndarray:
"""
Regressor standard deviation sampling strategy.
Expand Down
17 changes: 8 additions & 9 deletions modAL/dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def mc_dropout_bald(classifier: BaseEstimator, X: modALinput, n_instances: int =
num_cycles: int = 50, sample_per_forward_pass: int = 1000,
logits_adaptor: Callable[[
torch.tensor, modALinput], torch.tensor] = default_logits_adaptor,
**mc_dropout_kwargs,) -> np.ndarray:
**mc_dropout_kwargs, ) -> np.ndarray:
"""
Mc-Dropout bald query strategy. Returns the indexes of the instances with the largest BALD
(Bayesian Active Learning by Disagreement) score calculated through the dropout cycles
Expand Down Expand Up @@ -337,8 +337,7 @@ def _entropy(proba: list) -> np.ndarray:

# calculate entropy per class and sum along dropout cycles
entropy_classes = entropy_sum(proba_stacked, axis=-1)
entropy = np.mean(entropy_classes, where=~
np.isnan(entropy_classes), axis=-1)
entropy = np.mean(entropy_classes, where=~np.isnan(entropy_classes), axis=-1)
return entropy


Expand Down Expand Up @@ -381,11 +380,11 @@ def _bald_divergence(proba: list) -> np.ndarray:

# entropy along dropout cycles
accumulated_entropy = entropy_sum(proba_stacked, axis=-1)
f_x = accumulated_entropy/len(proba)
f_x = accumulated_entropy / len(proba)

# score sums along dropout cycles
accumulated_score = np.sum(proba_stacked, axis=-1)
average_score = accumulated_score/len(proba)
average_score = accumulated_score / len(proba)
# expand dimension w/o data for entropy calculation
average_score = np.expand_dims(average_score, axis=-1)

Expand Down Expand Up @@ -419,9 +418,9 @@ def set_dropout_mode(model, dropout_layer_indexes: list, train_mode: bool):
for index in dropout_layer_indexes:
layer = modules[index]
if layer.__class__.__name__.startswith('Dropout'):
if True == train_mode:
if train_mode:
layer.train()
elif False == train_mode:
else:
layer.eval()
else:
raise KeyError(
Expand All @@ -430,7 +429,7 @@ def set_dropout_mode(model, dropout_layer_indexes: list, train_mode: bool):
else:
for module in modules:
if module.__class__.__name__.startswith('Dropout'):
if True == train_mode:
if train_mode:
module.train()
elif False == train_mode:
else:
module.eval()
12 changes: 5 additions & 7 deletions modAL/expected_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Expected error reduction framework for active learning.
"""

from typing import Tuple

import numpy as np
from sklearn.base import clone
from sklearn.exceptions import NotFittedError
Expand All @@ -16,7 +14,7 @@


def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str = 'binary',
p_subsample: np.float = 1.0, n_instances: int = 1,
p_subsample: float = 1.0, n_instances: int = 1,
random_tie_break: bool = False) -> np.ndarray:
"""
Expected error reduction query strategy.
Expand Down Expand Up @@ -63,16 +61,16 @@ def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str =
# estimate the expected error
for y_idx, y in enumerate(possible_labels):
X_new = add_row(learner.X_training, x)
y_new = data_vstack((learner.y_training, np.array(y).reshape(1,)))
y_new = data_vstack((learner.y_training, np.array(y).reshape(1, )))

cloned_estimator.fit(X_new, y_new)
refitted_proba = cloned_estimator.predict_proba(X_reduced)
if loss is 'binary':
if loss == 'binary':
nloss = _proba_uncertainty(refitted_proba)
elif loss is 'log':
elif loss == 'log':
nloss = _proba_entropy(refitted_proba)

expected_error[x_idx] += np.sum(nloss)*X_proba[x_idx, y_idx]
expected_error[x_idx] += np.sum(nloss) * X_proba[x_idx, y_idx]

else:
expected_error[x_idx] = np.inf
Expand Down
40 changes: 19 additions & 21 deletions modAL/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,19 +171,17 @@ def query(self, X_pool, *query_args, return_metrics: bool = False, **query_kwarg
query_metrics: returns also the corresponding metrics, if return_metrics == True
"""

try:
query_result, query_metrics = self.query_strategy(
self, X_pool, *query_args, **query_kwargs)

except:
_query_strategy_result = self.query_strategy(
self, X_pool, *query_args, **query_kwargs)
if isinstance(_query_strategy_result, tuple) and len(_query_strategy_result) == 2:
query_result, query_metrics = _query_strategy_result
else:
query_result = _query_strategy_result
query_metrics = None
query_result = self.query_strategy(
self, X_pool, *query_args, **query_kwargs)

if return_metrics:
if query_metrics is None:
warnings.warn(
"The selected query strategy doesn't support return_metrics")
if query_metrics is None:
warnings.warn("The selected query strategy doesn't support return_metrics")
return query_result, retrieve_rows(X_pool, query_result), query_metrics
else:
return query_result, retrieve_rows(X_pool, query_result)
Expand Down Expand Up @@ -216,6 +214,7 @@ class BaseCommittee(ABC, BaseEstimator):
on_transformed: Whether to transform samples with the pipeline defined by each learner's estimator
when applying the query strategy.
"""

def __init__(self, learner_list: List[BaseLearner], query_strategy: Callable, on_transformed: bool = False) -> None:
assert type(learner_list) == list, 'learners must be supplied in a list'

Expand Down Expand Up @@ -313,19 +312,18 @@ def query(self, X_pool, return_metrics: bool = False, *query_args, **query_kwarg
query_metrics: returns also the corresponding metrics, if return_metrics == True
"""

try:
query_result, query_metrics = self.query_strategy(
self, X_pool, *query_args, **query_kwargs)

except:
_query_strategy_result = self.query_strategy(
self, X_pool, *query_args, **query_kwargs)
if isinstance(_query_strategy_result, tuple) and len(_query_strategy_result) == 2:
query_result, query_metrics = _query_strategy_result
else:
query_result = _query_strategy_result
query_metrics = None
query_result = self.query_strategy(
self, X_pool, *query_args, **query_kwargs)

if return_metrics:
if query_metrics is None:
if query_metrics is None:
warnings.warn(
"The selected query strategy doesn't support return_metrics")
"The selected query strategy doesn't support return_metrics")
return query_result, retrieve_rows(X_pool, query_result), query_metrics
else:
return query_result, retrieve_rows(X_pool, query_result)
Expand All @@ -341,7 +339,8 @@ def rebag(self, **fit_kwargs) -> None:
"""
self._fit_to_known(bootstrap=True, **fit_kwargs)

def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
**fit_kwargs) -> None:
"""
Adds X and y to the known training data for each learner and retrains learners with the augmented dataset.
Args:
Expand All @@ -364,4 +363,3 @@ def predict(self, X: modALinput) -> Any:
@abc.abstractmethod
def vote(self, X: modALinput) -> Any: # TODO: clarify typing
pass

14 changes: 10 additions & 4 deletions modAL/models/learners.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg
self.X_training, self.y_training = X, y
return self._fit_to_known(bootstrap=bootstrap, **fit_kwargs)

def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
**fit_kwargs) -> None:
"""
Adds X and y to the known training data and retrains the predictor with the augmented dataset.

Expand Down Expand Up @@ -228,7 +229,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg
"""
return self._fit_on_new(X, y, bootstrap=bootstrap, **fit_kwargs)

def teach(self, X: modALinput, y: modALinput, warm_start: bool = True, bootstrap: bool = False, **fit_kwargs) -> None:
def teach(self, X: modALinput, y: modALinput, warm_start: bool = True, bootstrap: bool = False,
**fit_kwargs) -> None:
"""
Trains the predictor with the passed data (warm_start decides if params are resetted or not).

Expand Down Expand Up @@ -404,7 +406,8 @@ def get_max(self) -> Tuple:
"""
return self.X_max, self.y_max

def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
**fit_kwargs) -> None:
"""
Adds X and y to the known training data and retrains the predictor with the augmented dataset. This method also
keeps track of the maximum value encountered in the training data.
Expand Down Expand Up @@ -481,6 +484,7 @@ class Committee(BaseCommittee):
... y=iris['target'][query_idx].reshape(1, )
... )
"""

def __init__(self, learner_list: List[ActiveLearner], query_strategy: Callable = vote_entropy_sampling,
on_transformed: bool = False) -> None:
super().__init__(learner_list, query_strategy, on_transformed)
Expand Down Expand Up @@ -524,7 +528,8 @@ def fit(self, X: modALinput, y: modALinput, **fit_kwargs) -> 'BaseCommittee':
super().fit(X, y, **fit_kwargs)
self._set_classes()

def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False,
**fit_kwargs) -> None:
"""
Adds X and y to the known training data for each learner and retrains learners with the augmented dataset.
Args:
Expand Down Expand Up @@ -678,6 +683,7 @@ class CommitteeRegressor(BaseCommittee):
... query_idx, query_instance = committee.query(X.reshape(-1, 1))
... committee.teach(X[query_idx].reshape(-1, 1), y[query_idx].reshape(-1, 1))
"""

def __init__(self, learner_list: List[ActiveLearner], query_strategy: Callable = max_std_sampling,
on_transformed: bool = False) -> None:
super().__init__(learner_list, query_strategy, on_transformed)
Expand Down
14 changes: 6 additions & 8 deletions modAL/multilabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ def _SVM_loss(multiclass_classifier: ActiveLearner,
np.ndarray of shape (n_instances, ), losses for the instances in X.

"""
predictions = 2*multiclass_classifier.predict(X)-1
predictions = 2 * multiclass_classifier.predict(X) - 1
n_classes = len(multiclass_classifier.classes_)

if most_certain_classes is None:
cls_mtx = 2*np.eye(n_classes, n_classes) - 1
loss_mtx = np.maximum(1-np.dot(predictions, cls_mtx), 0)
cls_mtx = 2 * np.eye(n_classes, n_classes) - 1
loss_mtx = np.maximum(1 - np.dot(predictions, cls_mtx), 0)
return loss_mtx.mean(axis=1)
else:
cls_mtx = -np.ones(shape=(len(X), n_classes))
Expand Down Expand Up @@ -74,7 +74,6 @@ def SVM_binary_minimum(classifier: ActiveLearner, X_pool: modALinput,

def max_loss(classifier: OneVsRestClassifier, X_pool: modALinput,
n_instances: int = 1, random_tie_break: bool = False) -> np.ndarray:

"""
Max Loss query strategy for SVM multilabel classification.

Expand Down Expand Up @@ -217,13 +216,12 @@ def max_score(classifier: OneVsRestClassifier, X_pool: modALinput,

Returns:
The index of the instance from X_pool chosen to be labelled.
The classwise maximum metric of the chosen instances.

The classwise maximum metric of the chosen instances.
"""

classwise_confidence = classifier.predict_proba(X_pool)
classwise_predictions = classifier.predict(X_pool)
classwise_scores = classwise_confidence*(classwise_predictions - 1/2)
classwise_scores = classwise_confidence * (classwise_predictions - 1 / 2)
classwise_max = np.max(classwise_scores, axis=1)

if not random_tie_break:
Expand Down Expand Up @@ -255,7 +253,7 @@ def avg_score(classifier: OneVsRestClassifier, X_pool: modALinput,

classwise_confidence = classifier.predict_proba(X_pool)
classwise_predictions = classifier.predict(X_pool)
classwise_scores = classwise_confidence*(classwise_predictions-1/2)
classwise_scores = classwise_confidence * (classwise_predictions - 1 / 2)
classwise_mean = np.mean(classwise_scores, axis=1)

if not random_tie_break:
Expand Down
6 changes: 3 additions & 3 deletions modAL/uncertainty.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def classifier_uncertainty(classifier: BaseEstimator, X: modALinput, **predict_p
try:
classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
except NotFittedError:
return np.ones(shape=(X.shape[0], ))
return np.ones(shape=(X.shape[0],))

# for each point, select the maximum uncertainty
uncertainty = 1 - np.max(classwise_uncertainty, axis=1)
Expand All @@ -99,7 +99,7 @@ def classifier_margin(classifier: BaseEstimator, X: modALinput, **predict_proba_
try:
classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
except NotFittedError:
return np.zeros(shape=(X.shape[0], ))
return np.zeros(shape=(X.shape[0],))

if classwise_uncertainty.shape[1] == 1:
return np.zeros(shape=(classwise_uncertainty.shape[0],))
Expand All @@ -125,7 +125,7 @@ def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba
try:
classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
except NotFittedError:
return np.zeros(shape=(X.shape[0], ))
return np.zeros(shape=(X.shape[0],))

return np.transpose(entropy(np.transpose(classwise_uncertainty)))

Expand Down
Loading