From 17fb95fd56a70357e295b057e9a43e9b10cde4cb Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 29 Feb 2024 13:28:33 +0100 Subject: [PATCH] Try to fix lint workflow --- .github/workflows/lint.yaml | 8 +++++--- .pre-commit-config.yaml | 12 +++++++----- dask_ml/_partial.py | 2 +- dask_ml/metrics/pairwise.py | 6 +++--- dask_ml/metrics/regression.py | 1 - dask_ml/model_selection/_hyperband.py | 1 - dask_ml/model_selection/_incremental.py | 1 - dask_ml/model_selection/_search.py | 5 ++--- dask_ml/preprocessing/_block_transformer.py | 2 +- dask_ml/preprocessing/data.py | 4 ---- docs/dimensions.py | 3 +-- tests/feature_extraction/test_text.py | 2 +- tests/model_selection/test_hyperband.py | 2 +- tests/model_selection/test_pytorch.py | 1 - tests/preprocessing/test_label.py | 1 - tests/test_pca.py | 2 -- 16 files changed, 22 insertions(+), 31 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 50744f2e6..7ec4823ee 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -5,6 +5,8 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v3.0.0 \ No newline at end of file + - uses: actions/checkout@v4.1.1 + - uses: actions/setup-python@v5 + with: + python-version: '3.9' + - uses: pre-commit/action@v3.0.1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 37d43ad5f..e2319a077 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,17 +1,19 @@ repos: -- repo: https://github.com/python/black - rev: 22.3.0 +- repo: https://github.com/psf/black + rev: 23.12.1 hooks: - id: black language_version: python3 + args: + - --target-version=py39 - repo: https://github.com/pycqa/flake8 - rev: 3.7.9 + rev: 7.0.0 hooks: - id: flake8 language_version: python3 args: ["--ignore=E501,W503,E203,E741,E731"] -- repo: https://github.com/timothycrosley/isort - rev: 4.3.21 +- repo: https://github.com/pycqa/isort + rev: 5.13.2 hooks: - id: isort language_version: python3 diff --git a/dask_ml/_partial.py b/dask_ml/_partial.py index 943237336..4a126195e 100644 --- a/dask_ml/_partial.py +++ b/dask_ml/_partial.py @@ -29,7 +29,7 @@ def fit( shuffle_blocks=True, random_state=None, assume_equal_chunks=False, - **kwargs + **kwargs, ): """Fit scikit learn model against dask arrays diff --git a/dask_ml/metrics/pairwise.py b/dask_ml/metrics/pairwise.py index 7f28a6a36..ddbc3f877 100644 --- a/dask_ml/metrics/pairwise.py +++ b/dask_ml/metrics/pairwise.py @@ -51,7 +51,7 @@ def pairwise_distances( Y: ArrayLike, metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean", n_jobs: Optional[int] = None, - **kwargs: Any + **kwargs: Any, ): if isinstance(Y, da.Array): raise TypeError("`Y` must be a numpy array") @@ -62,7 +62,7 @@ def pairwise_distances( dtype=float, chunks=chunks, metric=metric, - **kwargs + **kwargs, ) @@ -203,7 +203,7 @@ def pairwise_kernels( metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "linear", filter_params: bool = False, n_jobs: Optional[int] = 1, - **kwds + **kwds, ): from sklearn.gaussian_process.kernels import Kernel as GPKernel diff --git a/dask_ml/metrics/regression.py b/dask_ml/metrics/regression.py index db95f2f0c..0c1b21b59 100644 --- a/dask_ml/metrics/regression.py +++ b/dask_ml/metrics/regression.py @@ -187,7 +187,6 @@ def mean_squared_log_error( multioutput: Optional[str] = "uniform_average", compute: bool = True, ) -> ArrayLike: - result = mean_squared_error( np.log1p(y_true), np.log1p(y_pred), diff --git a/dask_ml/model_selection/_hyperband.py b/dask_ml/model_selection/_hyperband.py index 90beda8b8..d41354815 100644 --- a/dask_ml/model_selection/_hyperband.py +++ b/dask_ml/model_selection/_hyperband.py @@ -530,7 +530,6 @@ def _get_meta( SHAs: Dict[int, SuccessiveHalvingSearchCV], key: Callable[[int, int], str], ) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]: - meta_ = [] history_ = {} for bracket in brackets: diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py index fb1c0b02f..2d1d7255e 100644 --- a/dask_ml/model_selection/_incremental.py +++ b/dask_ml/model_selection/_incremental.py @@ -1009,7 +1009,6 @@ def __init__( predict_proba_meta=None, transform_meta=None, ): - self.n_initial_parameters = n_initial_parameters self.decay_rate = decay_rate self.fits_per_score = fits_per_score diff --git a/dask_ml/model_selection/_search.py b/dask_ml/model_selection/_search.py index 7f1187309..dd3f702b2 100644 --- a/dask_ml/model_selection/_search.py +++ b/dask_ml/model_selection/_search.py @@ -80,7 +80,6 @@ _check_param_grid = None if SK_VERSION <= packaging.version.parse("0.21.dev0"): - _RETURN_TRAIN_SCORE_DEFAULT = "warn" def handle_deprecated_train_score(results, return_train_score): @@ -414,7 +413,7 @@ def do_fit_and_score( xtest = X_test + (n,) ytest = y_test + (n,) - for (name, m) in fit_ests: + for name, m in fit_ests: dsk[(score_name, m, n)] = ( score, (name, m, n), @@ -879,7 +878,7 @@ def _do_featureunion( fit_steps = [] tr_Xs = [] - for (step_name, step) in est.transformer_list: + for step_name, step in est.transformer_list: fits, out_Xs = _do_fit_step( dsk, next_token, diff --git a/dask_ml/preprocessing/_block_transformer.py b/dask_ml/preprocessing/_block_transformer.py index a578898ff..4d892587f 100644 --- a/dask_ml/preprocessing/_block_transformer.py +++ b/dask_ml/preprocessing/_block_transformer.py @@ -69,7 +69,7 @@ def __init__( func: Callable[..., Union[ArrayLike, DataFrameType]], *, validate: bool = False, - **kw_args: Any + **kw_args: Any, ): self.func: Callable[..., Union[ArrayLike, DataFrameType]] = func self.validate = validate diff --git a/dask_ml/preprocessing/data.py b/dask_ml/preprocessing/data.py index 9cccd6895..a8fb3bbe8 100644 --- a/dask_ml/preprocessing/data.py +++ b/dask_ml/preprocessing/data.py @@ -50,7 +50,6 @@ def _handle_zeros_in_scale(scale: NDArrayOrScalar, copy=True): class StandardScaler(DaskMLBaseMixin, sklearn.preprocessing.StandardScaler): - __doc__ = sklearn.preprocessing.StandardScaler.__doc__ def fit( @@ -120,7 +119,6 @@ def inverse_transform( class MinMaxScaler(sklearn.preprocessing.MinMaxScaler): - __doc__ = sklearn.preprocessing.MinMaxScaler.__doc__ def fit( @@ -202,7 +200,6 @@ def inverse_transform( class RobustScaler(sklearn.preprocessing.RobustScaler): - __doc__ = sklearn.preprocessing.RobustScaler.__doc__ def _check_array( @@ -407,7 +404,6 @@ def _transform_col( X_col[lower_bounds_idx] = lower_bound_y if not inverse: - if output_distribution == "normal": X_col = X_col.map_blocks(stats.norm.ppf) # find the value to clip the data to avoid mapping to diff --git a/docs/dimensions.py b/docs/dimensions.py index 7ff76d932..216f19c16 100644 --- a/docs/dimensions.py +++ b/docs/dimensions.py @@ -1,6 +1,5 @@ -import numpy as np - import matplotlib.pyplot as plt +import numpy as np def draw_brace(ax, xspan, text): diff --git a/tests/feature_extraction/test_text.py b/tests/feature_extraction/test_text.py index d3a95d61e..e833a30d3 100644 --- a/tests/feature_extraction/test_text.py +++ b/tests/feature_extraction/test_text.py @@ -33,7 +33,7 @@ ) def test_basic(vect, container): b = db.from_sequence(JUNK_FOOD_DOCS, npartitions=2) - if type(vect) == dask_ml.feature_extraction.text.FeatureHasher: + if type(vect) is dask_ml.feature_extraction.text.FeatureHasher: b = b.str.split() elif container == "series": b = b.to_dataframe(columns=["text"])["text"] diff --git a/tests/model_selection/test_hyperband.py b/tests/model_selection/test_hyperband.py index 2de19acfc..af8a8a30f 100644 --- a/tests/model_selection/test_hyperband.py +++ b/tests/model_selection/test_hyperband.py @@ -85,7 +85,7 @@ async def test_basic(c, s, a, b, array_type, library, max_iter): # validation/test set. assert abs(score - search.best_score_) < 0.1 - assert type(search.best_estimator_) == type(model) + assert type(search.best_estimator_) is type(model) assert isinstance(search.best_params_, dict) num_fit_models = len(set(search.cv_results_["model_id"])) diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py index 8abe5695d..e88ad2230 100644 --- a/tests/model_selection/test_pytorch.py +++ b/tests/model_selection/test_pytorch.py @@ -27,7 +27,6 @@ def forward(self, x): @gen_cluster(client=True) async def test_pytorch(c, s, a, b): - n_features = 10 defaults = { "callbacks": False, diff --git a/tests/preprocessing/test_label.py b/tests/preprocessing/test_label.py index a0fd9bdf6..34741c37a 100644 --- a/tests/preprocessing/test_label.py +++ b/tests/preprocessing/test_label.py @@ -107,7 +107,6 @@ def test_fit_transform_categorical(self): ) @pytest.mark.parametrize("array", [y, s]) def test_inverse_transform(self, array): - a = dpp.LabelEncoder() assert_eq_ar(a.inverse_transform(a.fit_transform(array)), da.asarray(array)) diff --git a/tests/test_pca.py b/tests/test_pca.py index 80a47eb21..3d7d655fe 100644 --- a/tests/test_pca.py +++ b/tests/test_pca.py @@ -373,12 +373,10 @@ def test_pca_validation(): # But dask-ml needs tall and skinny for data in [X]: for n_components in [-1, 3]: - with pytest.raises(ValueError, match="n_components"): dd.PCA(n_components, svd_solver=solver).fit(data) if solver == "arpack": - n_components = smallest_d with pytest.raises(ValueError, match="n_components"): dd.PCA(n_components, svd_solver=solver).fit(data)