diff --git a/modAL/acquisition.py b/modAL/acquisition.py index 8aa1fac..f5001c0 100644 --- a/modAL/acquisition.py +++ b/modAL/acquisition.py @@ -13,16 +13,16 @@ def PI(mean, std, max_val, tradeoff): - return ndtr((mean - max_val - tradeoff)/std) + return ndtr((mean - max_val - tradeoff) / std) def EI(mean, std, max_val, tradeoff): z = (mean - max_val - tradeoff) / std - return (mean - max_val - tradeoff)*ndtr(z) + std*norm.pdf(z) + return (mean - max_val - tradeoff) * ndtr(z) + std * norm.pdf(z) def UCB(mean, std, beta): - return mean + beta*std + return mean + beta * std """ diff --git a/modAL/batch.py b/modAL/batch.py index d85afed..6338b54 100644 --- a/modAL/batch.py +++ b/modAL/batch.py @@ -158,7 +158,7 @@ def ranked_batch(classifier: Union[BaseLearner, BaseCommittee], classifier.X_training ) if classifier.on_transformed else classifier.X_training[:] instance_index_ranking = [] - + # The maximum number of records to sample. ceiling = np.minimum(unlabeled.shape[0], n_instances) - len(instance_index_ranking) @@ -166,7 +166,6 @@ def ranked_batch(classifier: Union[BaseLearner, BaseCommittee], mask = np.ones(unlabeled.shape[0], bool) for _ in range(ceiling): - # Receive the instance and corresponding index from our unlabeled copy that scores highest. instance_index, instance, mask = select_instance(X_training=labeled, X_pool=unlabeled, X_uncertainty=uncertainty_scores, mask=mask, @@ -220,5 +219,4 @@ def uncertainty_batch_sampling(classifier: Union[BaseLearner, BaseCommittee], """ uncertainty = classifier_uncertainty(classifier, X, **uncertainty_measure_kwargs) return ranked_batch(classifier, unlabeled=X, uncertainty_scores=uncertainty, - n_instances=n_instances, metric=metric, n_jobs=n_jobs) - + n_instances=n_instances, metric=metric, n_jobs=n_jobs) diff --git a/modAL/cluster.py b/modAL/cluster.py index 652506d..17ee769 100755 --- a/modAL/cluster.py +++ b/modAL/cluster.py @@ -4,8 +4,6 @@ ======================================== """ -from sklearn.cluster.hierarchical import AgglomerativeClustering - class HierarchicalClustering: def __init__(self): diff --git a/modAL/density.py b/modAL/density.py index 267b478..c4e4394 100644 --- a/modAL/density.py +++ b/modAL/density.py @@ -20,8 +20,9 @@ def similarize_distance(distance_measure: Callable) -> Callable: Returns: The information_density measure obtained from the given distance measure. """ + def sim(*args, **kwargs): - return 1/(1 + distance_measure(*args, **kwargs)) + return 1 / (1 + distance_measure(*args, **kwargs)) return sim @@ -51,6 +52,6 @@ def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean # # return inf_density/X.shape[0] - similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric)) + similarity_mtx = 1 / (1 + pairwise_distances(X, X, metric=metric)) return similarity_mtx.mean(axis=1) diff --git a/modAL/disagreement.py b/modAL/disagreement.py index 22430b4..ccdb76e 100644 --- a/modAL/disagreement.py +++ b/modAL/disagreement.py @@ -39,7 +39,7 @@ def vote_entropy(committee: BaseCommittee, X: modALinput, **predict_proba_kwargs vote_counter = Counter(vote) for class_idx, class_label in enumerate(committee.classes_): - p_vote[vote_idx, class_idx] = vote_counter[class_label]/n_learners + p_vote[vote_idx, class_idx] = vote_counter[class_label] / n_learners entr = entropy(p_vote, axis=1) return entr @@ -184,7 +184,7 @@ def max_disagreement_sampling(committee: BaseCommittee, X: modALinput, def max_std_sampling(regressor: BaseEstimator, X: modALinput, - n_instances: int = 1, random_tie_break=False, + n_instances: int = 1, random_tie_break=False, **predict_kwargs) -> np.ndarray: """ Regressor standard deviation sampling strategy. diff --git a/modAL/dropout.py b/modAL/dropout.py index c6c9cb1..89c97ac 100644 --- a/modAL/dropout.py +++ b/modAL/dropout.py @@ -21,7 +21,7 @@ def mc_dropout_bald(classifier: BaseEstimator, X: modALinput, n_instances: int = num_cycles: int = 50, sample_per_forward_pass: int = 1000, logits_adaptor: Callable[[ torch.tensor, modALinput], torch.tensor] = default_logits_adaptor, - **mc_dropout_kwargs,) -> np.ndarray: + **mc_dropout_kwargs, ) -> np.ndarray: """ Mc-Dropout bald query strategy. Returns the indexes of the instances with the largest BALD (Bayesian Active Learning by Disagreement) score calculated through the dropout cycles @@ -337,8 +337,7 @@ def _entropy(proba: list) -> np.ndarray: # calculate entropy per class and sum along dropout cycles entropy_classes = entropy_sum(proba_stacked, axis=-1) - entropy = np.mean(entropy_classes, where=~ - np.isnan(entropy_classes), axis=-1) + entropy = np.mean(entropy_classes, where=~np.isnan(entropy_classes), axis=-1) return entropy @@ -381,11 +380,11 @@ def _bald_divergence(proba: list) -> np.ndarray: # entropy along dropout cycles accumulated_entropy = entropy_sum(proba_stacked, axis=-1) - f_x = accumulated_entropy/len(proba) + f_x = accumulated_entropy / len(proba) # score sums along dropout cycles accumulated_score = np.sum(proba_stacked, axis=-1) - average_score = accumulated_score/len(proba) + average_score = accumulated_score / len(proba) # expand dimension w/o data for entropy calculation average_score = np.expand_dims(average_score, axis=-1) @@ -419,9 +418,9 @@ def set_dropout_mode(model, dropout_layer_indexes: list, train_mode: bool): for index in dropout_layer_indexes: layer = modules[index] if layer.__class__.__name__.startswith('Dropout'): - if True == train_mode: + if train_mode: layer.train() - elif False == train_mode: + else: layer.eval() else: raise KeyError( @@ -430,7 +429,7 @@ def set_dropout_mode(model, dropout_layer_indexes: list, train_mode: bool): else: for module in modules: if module.__class__.__name__.startswith('Dropout'): - if True == train_mode: + if train_mode: module.train() - elif False == train_mode: + else: module.eval() diff --git a/modAL/expected_error.py b/modAL/expected_error.py index d7b3611..98f7643 100644 --- a/modAL/expected_error.py +++ b/modAL/expected_error.py @@ -2,8 +2,6 @@ Expected error reduction framework for active learning. """ -from typing import Tuple - import numpy as np from sklearn.base import clone from sklearn.exceptions import NotFittedError @@ -16,7 +14,7 @@ def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str = 'binary', - p_subsample: np.float = 1.0, n_instances: int = 1, + p_subsample: float = 1.0, n_instances: int = 1, random_tie_break: bool = False) -> np.ndarray: """ Expected error reduction query strategy. @@ -63,16 +61,16 @@ def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str = # estimate the expected error for y_idx, y in enumerate(possible_labels): X_new = add_row(learner.X_training, x) - y_new = data_vstack((learner.y_training, np.array(y).reshape(1,))) + y_new = data_vstack((learner.y_training, np.array(y).reshape(1, ))) cloned_estimator.fit(X_new, y_new) refitted_proba = cloned_estimator.predict_proba(X_reduced) - if loss is 'binary': + if loss == 'binary': nloss = _proba_uncertainty(refitted_proba) - elif loss is 'log': + elif loss == 'log': nloss = _proba_entropy(refitted_proba) - expected_error[x_idx] += np.sum(nloss)*X_proba[x_idx, y_idx] + expected_error[x_idx] += np.sum(nloss) * X_proba[x_idx, y_idx] else: expected_error[x_idx] = np.inf diff --git a/modAL/models/base.py b/modAL/models/base.py index 57c8b81..d1bf101 100644 --- a/modAL/models/base.py +++ b/modAL/models/base.py @@ -171,19 +171,17 @@ def query(self, X_pool, *query_args, return_metrics: bool = False, **query_kwarg query_metrics: returns also the corresponding metrics, if return_metrics == True """ - try: - query_result, query_metrics = self.query_strategy( - self, X_pool, *query_args, **query_kwargs) - - except: + _query_strategy_result = self.query_strategy( + self, X_pool, *query_args, **query_kwargs) + if isinstance(_query_strategy_result, tuple) and len(_query_strategy_result) == 2: + query_result, query_metrics = _query_strategy_result + else: + query_result = _query_strategy_result query_metrics = None - query_result = self.query_strategy( - self, X_pool, *query_args, **query_kwargs) if return_metrics: - if query_metrics is None: - warnings.warn( - "The selected query strategy doesn't support return_metrics") + if query_metrics is None: + warnings.warn("The selected query strategy doesn't support return_metrics") return query_result, retrieve_rows(X_pool, query_result), query_metrics else: return query_result, retrieve_rows(X_pool, query_result) @@ -216,6 +214,7 @@ class BaseCommittee(ABC, BaseEstimator): on_transformed: Whether to transform samples with the pipeline defined by each learner's estimator when applying the query strategy. """ + def __init__(self, learner_list: List[BaseLearner], query_strategy: Callable, on_transformed: bool = False) -> None: assert type(learner_list) == list, 'learners must be supplied in a list' @@ -313,19 +312,18 @@ def query(self, X_pool, return_metrics: bool = False, *query_args, **query_kwarg query_metrics: returns also the corresponding metrics, if return_metrics == True """ - try: - query_result, query_metrics = self.query_strategy( - self, X_pool, *query_args, **query_kwargs) - - except: + _query_strategy_result = self.query_strategy( + self, X_pool, *query_args, **query_kwargs) + if isinstance(_query_strategy_result, tuple) and len(_query_strategy_result) == 2: + query_result, query_metrics = _query_strategy_result + else: + query_result = _query_strategy_result query_metrics = None - query_result = self.query_strategy( - self, X_pool, *query_args, **query_kwargs) if return_metrics: - if query_metrics is None: + if query_metrics is None: warnings.warn( - "The selected query strategy doesn't support return_metrics") + "The selected query strategy doesn't support return_metrics") return query_result, retrieve_rows(X_pool, query_result), query_metrics else: return query_result, retrieve_rows(X_pool, query_result) @@ -341,7 +339,8 @@ def rebag(self, **fit_kwargs) -> None: """ self._fit_to_known(bootstrap=True, **fit_kwargs) - def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None: + def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, + **fit_kwargs) -> None: """ Adds X and y to the known training data for each learner and retrains learners with the augmented dataset. Args: @@ -364,4 +363,3 @@ def predict(self, X: modALinput) -> Any: @abc.abstractmethod def vote(self, X: modALinput) -> Any: # TODO: clarify typing pass - diff --git a/modAL/models/learners.py b/modAL/models/learners.py index b7dac72..83b4a8c 100644 --- a/modAL/models/learners.py +++ b/modAL/models/learners.py @@ -157,7 +157,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg self.X_training, self.y_training = X, y return self._fit_to_known(bootstrap=bootstrap, **fit_kwargs) - def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None: + def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, + **fit_kwargs) -> None: """ Adds X and y to the known training data and retrains the predictor with the augmented dataset. @@ -228,7 +229,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg """ return self._fit_on_new(X, y, bootstrap=bootstrap, **fit_kwargs) - def teach(self, X: modALinput, y: modALinput, warm_start: bool = True, bootstrap: bool = False, **fit_kwargs) -> None: + def teach(self, X: modALinput, y: modALinput, warm_start: bool = True, bootstrap: bool = False, + **fit_kwargs) -> None: """ Trains the predictor with the passed data (warm_start decides if params are resetted or not). @@ -404,7 +406,8 @@ def get_max(self) -> Tuple: """ return self.X_max, self.y_max - def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None: + def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, + **fit_kwargs) -> None: """ Adds X and y to the known training data and retrains the predictor with the augmented dataset. This method also keeps track of the maximum value encountered in the training data. @@ -481,6 +484,7 @@ class Committee(BaseCommittee): ... y=iris['target'][query_idx].reshape(1, ) ... ) """ + def __init__(self, learner_list: List[ActiveLearner], query_strategy: Callable = vote_entropy_sampling, on_transformed: bool = False) -> None: super().__init__(learner_list, query_strategy, on_transformed) @@ -524,7 +528,8 @@ def fit(self, X: modALinput, y: modALinput, **fit_kwargs) -> 'BaseCommittee': super().fit(X, y, **fit_kwargs) self._set_classes() - def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None: + def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, + **fit_kwargs) -> None: """ Adds X and y to the known training data for each learner and retrains learners with the augmented dataset. Args: @@ -678,6 +683,7 @@ class CommitteeRegressor(BaseCommittee): ... query_idx, query_instance = committee.query(X.reshape(-1, 1)) ... committee.teach(X[query_idx].reshape(-1, 1), y[query_idx].reshape(-1, 1)) """ + def __init__(self, learner_list: List[ActiveLearner], query_strategy: Callable = max_std_sampling, on_transformed: bool = False) -> None: super().__init__(learner_list, query_strategy, on_transformed) diff --git a/modAL/multilabel.py b/modAL/multilabel.py index c908674..5f3a4ce 100644 --- a/modAL/multilabel.py +++ b/modAL/multilabel.py @@ -25,12 +25,12 @@ def _SVM_loss(multiclass_classifier: ActiveLearner, np.ndarray of shape (n_instances, ), losses for the instances in X. """ - predictions = 2*multiclass_classifier.predict(X)-1 + predictions = 2 * multiclass_classifier.predict(X) - 1 n_classes = len(multiclass_classifier.classes_) if most_certain_classes is None: - cls_mtx = 2*np.eye(n_classes, n_classes) - 1 - loss_mtx = np.maximum(1-np.dot(predictions, cls_mtx), 0) + cls_mtx = 2 * np.eye(n_classes, n_classes) - 1 + loss_mtx = np.maximum(1 - np.dot(predictions, cls_mtx), 0) return loss_mtx.mean(axis=1) else: cls_mtx = -np.ones(shape=(len(X), n_classes)) @@ -74,7 +74,6 @@ def SVM_binary_minimum(classifier: ActiveLearner, X_pool: modALinput, def max_loss(classifier: OneVsRestClassifier, X_pool: modALinput, n_instances: int = 1, random_tie_break: bool = False) -> np.ndarray: - """ Max Loss query strategy for SVM multilabel classification. @@ -217,13 +216,12 @@ def max_score(classifier: OneVsRestClassifier, X_pool: modALinput, Returns: The index of the instance from X_pool chosen to be labelled. - The classwise maximum metric of the chosen instances. - + The classwise maximum metric of the chosen instances. """ classwise_confidence = classifier.predict_proba(X_pool) classwise_predictions = classifier.predict(X_pool) - classwise_scores = classwise_confidence*(classwise_predictions - 1/2) + classwise_scores = classwise_confidence * (classwise_predictions - 1 / 2) classwise_max = np.max(classwise_scores, axis=1) if not random_tie_break: @@ -255,7 +253,7 @@ def avg_score(classifier: OneVsRestClassifier, X_pool: modALinput, classwise_confidence = classifier.predict_proba(X_pool) classwise_predictions = classifier.predict(X_pool) - classwise_scores = classwise_confidence*(classwise_predictions-1/2) + classwise_scores = classwise_confidence * (classwise_predictions - 1 / 2) classwise_mean = np.mean(classwise_scores, axis=1) if not random_tie_break: diff --git a/modAL/uncertainty.py b/modAL/uncertainty.py index d0f7b37..8b57d1b 100644 --- a/modAL/uncertainty.py +++ b/modAL/uncertainty.py @@ -76,7 +76,7 @@ def classifier_uncertainty(classifier: BaseEstimator, X: modALinput, **predict_p try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: - return np.ones(shape=(X.shape[0], )) + return np.ones(shape=(X.shape[0],)) # for each point, select the maximum uncertainty uncertainty = 1 - np.max(classwise_uncertainty, axis=1) @@ -99,7 +99,7 @@ def classifier_margin(classifier: BaseEstimator, X: modALinput, **predict_proba_ try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: - return np.zeros(shape=(X.shape[0], )) + return np.zeros(shape=(X.shape[0],)) if classwise_uncertainty.shape[1] == 1: return np.zeros(shape=(classwise_uncertainty.shape[0],)) @@ -125,7 +125,7 @@ def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba try: classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs) except NotFittedError: - return np.zeros(shape=(X.shape[0], )) + return np.zeros(shape=(X.shape[0],)) return np.transpose(entropy(np.transpose(classwise_uncertainty))) diff --git a/modAL/utils/combination.py b/modAL/utils/combination.py index eb2b4d2..46277c9 100644 --- a/modAL/utils/combination.py +++ b/modAL/utils/combination.py @@ -29,7 +29,7 @@ def make_linear_combination(*functions: Callable, weights: Optional[Sequence] = 'same as the number of given functions' def linear_combination(*args, **kwargs): - return sum((weights[i]*functions[i](*args, **kwargs) for i in range(len(weights)))) + return sum((weights[i] * functions[i](*args, **kwargs) for i in range(len(weights)))) return linear_combination @@ -55,8 +55,8 @@ def make_product(*functions: Callable, exponents: Optional[Sequence] = None) -> 'same as the number of given functions' def product_function(*args, **kwargs): - return np.prod([functions[i](*args, **kwargs)**exponents[i] - for i in range(len(exponents))], axis=0) + return np.prod([functions[i](*args, **kwargs) ** exponents[i] + for i in range(len(exponents))], axis=0) return product_function @@ -75,6 +75,7 @@ def make_query_strategy(utility_measure: Callable, selector: Callable) -> Callab Returns: A function which returns queried instances given a classifier and an unlabelled pool. """ + def query_strategy(classifier: BaseEstimator, X: modALinput) -> Tuple: utility = utility_measure(classifier, X) return selector(utility) diff --git a/modAL/utils/data.py b/modAL/utils/data.py index 3e707ff..e37a384 100644 --- a/modAL/utils/data.py +++ b/modAL/utils/data.py @@ -1,3 +1,4 @@ +import sys from typing import List, Sequence, Union import numpy as np @@ -6,7 +7,7 @@ try: import torch -except: +except ImportError: pass @@ -23,22 +24,34 @@ def data_vstack(blocks: Sequence[modALinput]) -> modALinput: Returns: New sequence of vertically stacked elements. """ + + if not blocks: + return blocks + + types = {type(block) for block in blocks} + if any([sp.issparse(b) for b in blocks]): return sp.vstack(blocks) - elif isinstance(blocks[0], pd.DataFrame): - return blocks[0].append(blocks[1:]) - elif isinstance(blocks[0], np.ndarray): + elif types - {pd.DataFrame, pd.Series} == set(): + def _block_to_df(block): + if isinstance(block, pd.DataFrame): + return block + elif isinstance(block, pd.Series): + # interpret series as a row + return block.to_frame().T + else: + raise TypeError(f"Expected DataFrame or Series but encountered {type(block)}") + + return pd.concat([_block_to_df(block) for block in blocks]) + elif types == {np.ndarray}: return np.concatenate(blocks) - elif isinstance(blocks[0], list): + elif types == {list}: return np.concatenate(blocks).tolist() - try: - if torch.is_tensor(blocks[0]): - return torch.cat(blocks) - except: - pass + if 'torch' in sys.modules and all(torch.is_tensor(block) for block in blocks): + return torch.cat(blocks) - raise TypeError("%s datatype is not supported" % type(blocks[0])) + raise TypeError("%s datatype(s) not supported" % types) def data_hstack(blocks: Sequence[modALinput]) -> modALinput: @@ -51,22 +64,25 @@ def data_hstack(blocks: Sequence[modALinput]) -> modALinput: Returns: New sequence of horizontally stacked elements. """ + + if not blocks: + return blocks + + types = {type(block) for block in blocks} + if any([sp.issparse(b) for b in blocks]): return sp.hstack(blocks) - elif isinstance(blocks[0], pd.DataFrame): + elif types == {pd.DataFrame}: pd.concat(blocks, axis=1) - elif isinstance(blocks[0], np.ndarray): + elif types == {np.ndarray}: return np.hstack(blocks) - elif isinstance(blocks[0], list): + elif types == {list}: return np.hstack(blocks).tolist() - try: - if torch.is_tensor(blocks[0]): - return torch.cat(blocks, dim=1) - except: - pass + if 'torch' in sys.modules and torch.is_tensor(blocks[0]): + return torch.cat(blocks, dim=1) - TypeError("%s datatype is not supported" % type(blocks[0])) + raise TypeError("%s datatype(s) not supported" % types) def add_row(X: modALinput, row: modALinput): @@ -87,7 +103,7 @@ def add_row(X: modALinput, row: modALinput): def retrieve_rows( - X: modALinput, I: Union[int, List[int], np.ndarray] + X: modALinput, I: Union[int, List[int], np.ndarray] # noqa: E741 ) -> Union[sp.csc_matrix, np.ndarray, pd.DataFrame]: """ Returns the rows I from the data set X @@ -100,30 +116,32 @@ def retrieve_rows( try: return X[I] - except: - if sp.issparse(X): - # Out of the sparse matrix formats (sp.csc_matrix, sp.csr_matrix, sp.bsr_matrix, - # sp.lil_matrix, sp.dok_matrix, sp.coo_matrix, sp.dia_matrix), only sp.bsr_matrix, sp.coo_matrix - # and sp.dia_matrix don't support indexing and need to be converted to a sparse format - # that does support indexing. It seems conversion to CSR is currently most efficient. - - sp_format = X.getformat() - return X.tocsr()[I].asformat(sp_format) - elif isinstance(X, pd.DataFrame): - return X.iloc[I] - elif isinstance(X, list): - return np.array(X)[I].tolist() - elif isinstance(X, dict): - X_return = {} - for key, value in X.items(): - X_return[key] = retrieve_rows(value, I) - return X_return + except (KeyError, IndexError, TypeError): + pass + + if sp.issparse(X): + # Out of the sparse matrix formats (sp.csc_matrix, sp.csr_matrix, sp.bsr_matrix, + # sp.lil_matrix, sp.dok_matrix, sp.coo_matrix, sp.dia_matrix), only sp.bsr_matrix, sp.coo_matrix + # and sp.dia_matrix don't support indexing and need to be converted to a sparse format + # that does support indexing. It seems conversion to CSR is currently most efficient. + + sp_format = X.getformat() + return X.tocsr()[I].asformat(sp_format) + elif isinstance(X, pd.DataFrame): + return X.iloc[I] + elif isinstance(X, list): + return np.array(X)[I].tolist() + elif isinstance(X, dict): + X_return = {} + for key, value in X.items(): + X_return[key] = retrieve_rows(value, I) + return X_return raise TypeError("%s datatype is not supported" % type(X)) def drop_rows( - X: modALinput, I: Union[int, List[int], np.ndarray] + X: modALinput, I: Union[int, List[int], np.ndarray] # noqa: E741 ) -> Union[sp.csc_matrix, np.ndarray, pd.DataFrame]: """ Returns X without the row(s) at index/indices I @@ -139,12 +157,6 @@ def drop_rows( elif isinstance(X, list): return np.delete(X, I, axis=0).tolist() - try: - if torch.is_tensor(blocks[0]): - return torch.cat(blocks) - except: - X[[True if row not in I else False for row in range(X.size(0))]] - raise TypeError("%s datatype is not supported" % type(X)) @@ -173,11 +185,9 @@ def data_shape(X: modALinput): """ Returns the shape of the data set X """ - try: - # scipy.sparse, torch, pandas and numpy all support .shape + if isinstance(X, list): + return np.array(X).shape + elif hasattr(X, "shape"): # scipy.sparse, torch, pandas and numpy all support .shape return X.shape - except: - if isinstance(X, list): - return np.array(X).shape raise TypeError("%s datatype is not supported" % type(X)) diff --git a/modAL/utils/selection.py b/modAL/utils/selection.py index 6c9c2d9..a869609 100644 --- a/modAL/utils/selection.py +++ b/modAL/utils/selection.py @@ -25,8 +25,7 @@ def shuffled_argmax(values: np.ndarray, n_instances: int = 1) -> np.ndarray: # getting the n_instances best instance # since mergesort is used, the shuffled order is preserved - sorted_query_idx = np.argsort(shuffled_values, kind='mergesort')[ - len(shuffled_values)-n_instances:] + sorted_query_idx = np.argsort(shuffled_values, kind='mergesort')[len(shuffled_values) - n_instances:] # inverting the shuffle query_idx = shuffled_idx[sorted_query_idx] @@ -64,7 +63,7 @@ def multi_argmax(values: np.ndarray, n_instances: int = 1) -> np.ndarray: """ assert n_instances <= values.shape[0], 'n_instances must be less or equal than the size of utility' - max_idx = np.argpartition(-values, n_instances-1, axis=0)[:n_instances] + max_idx = np.argpartition(-values, n_instances - 1, axis=0)[:n_instances] return max_idx, values[max_idx] @@ -99,5 +98,5 @@ def weighted_random(weights: np.ndarray, n_instances: int = 1) -> np.ndarray: assert weight_sum > 0, 'the sum of weights must be larger than zero' random_idx = np.random.choice( - range(len(weights)), size=n_instances, p=weights/weight_sum, replace=False) + range(len(weights)), size=n_instances, p=weights / weight_sum, replace=False) return random_idx diff --git a/modAL/utils/validation.py b/modAL/utils/validation.py index 93667db..a367f27 100644 --- a/modAL/utils/validation.py +++ b/modAL/utils/validation.py @@ -21,7 +21,7 @@ def check_class_labels(*args: BaseEstimator) -> bool: raise NotFittedError('Not all estimators are fitted. Fit all estimators before using this method.') for classifier_idx in range(len(args) - 1): - if not np.array_equal(classes_[classifier_idx], classes_[classifier_idx+1]): + if not np.array_equal(classes_[classifier_idx], classes_[classifier_idx + 1]): return False return True diff --git a/rtd_requirements.txt b/rtd_requirements.txt index db0bd81..8108132 100644 --- a/rtd_requirements.txt +++ b/rtd_requirements.txt @@ -1,7 +1,8 @@ -numpy==1.20.0 +numpy scipy scikit-learn ipykernel nbsphinx pandas skorch +torch \ No newline at end of file diff --git a/tests/core_tests.py b/tests/core_tests.py index e3113c4..b2d67f8 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -115,11 +115,8 @@ def dummy_function(X_in): else: true_result = n_functions*np.ones(shape=(n_samples, 1)) - try: - np.testing.assert_almost_equal( - linear_combination(X_in), true_result) - except: - linear_combination(X_in) + np.testing.assert_almost_equal( + linear_combination(X_in), true_result) def test_product(self): for n_dim in range(1, 5): @@ -476,15 +473,11 @@ def test_KL_max_disagreement(self): true_KL_disagreement = np.zeros(shape=(n_samples, )) - try: - np.testing.assert_array_almost_equal( - true_KL_disagreement, - modAL.disagreement.KL_max_disagreement( - committee, np.random.rand(n_samples, 1)) - ) - except: + np.testing.assert_array_almost_equal( + true_KL_disagreement, modAL.disagreement.KL_max_disagreement( committee, np.random.rand(n_samples, 1)) + ) # 2. unfitted committee committee = mock.MockCommittee(fitted=False) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..8183cba --- /dev/null +++ b/tox.ini @@ -0,0 +1,2 @@ +[flake8] +ignore = E501,W291